2021-07-12 04:12:15 +00:00
|
|
|
use std::collections::HashMap;
|
2021-07-15 16:29:55 +00:00
|
|
|
use std::sync::atomic::Ordering;
|
2021-07-12 04:12:15 +00:00
|
|
|
use std::sync::Arc;
|
|
|
|
use std::time::Duration;
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2022-01-02 20:34:00 +00:00
|
|
|
use actix_web::http::header::{HeaderMap, HeaderName, HeaderValue};
|
2021-07-12 04:12:15 +00:00
|
|
|
use actix_web::web::Data;
|
2021-07-10 22:53:28 +00:00
|
|
|
use bytes::Bytes;
|
|
|
|
use once_cell::sync::Lazy;
|
|
|
|
use parking_lot::RwLock;
|
2021-07-12 04:12:15 +00:00
|
|
|
use reqwest::header::{
|
|
|
|
ACCESS_CONTROL_ALLOW_ORIGIN, ACCESS_CONTROL_EXPOSE_HEADERS, CACHE_CONTROL, CONTENT_LENGTH,
|
|
|
|
CONTENT_TYPE, LAST_MODIFIED, X_CONTENT_TYPE_OPTIONS,
|
2021-07-10 22:53:28 +00:00
|
|
|
};
|
2021-07-15 16:29:55 +00:00
|
|
|
use reqwest::{Client, Proxy, StatusCode};
|
2021-07-12 04:12:15 +00:00
|
|
|
use tokio::sync::watch::{channel, Receiver};
|
|
|
|
use tokio::sync::Notify;
|
2021-07-15 16:29:55 +00:00
|
|
|
use tracing::{debug, error, info, warn};
|
2021-07-10 22:53:28 +00:00
|
|
|
|
|
|
|
use crate::cache::{Cache, CacheKey, ImageMetadata};
|
2021-07-15 16:29:55 +00:00
|
|
|
use crate::config::{DISABLE_CERT_VALIDATION, USE_PROXY};
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-15 16:29:55 +00:00
|
|
|
pub static HTTP_CLIENT: Lazy<CachingClient> = Lazy::new(|| {
|
|
|
|
let mut inner = Client::builder()
|
2021-07-10 22:53:28 +00:00
|
|
|
.pool_idle_timeout(Duration::from_secs(180))
|
|
|
|
.https_only(true)
|
2021-07-15 16:29:55 +00:00
|
|
|
.http2_prior_knowledge();
|
|
|
|
|
|
|
|
if let Some(socket_addr) = USE_PROXY.get() {
|
|
|
|
info!(
|
|
|
|
"Using {} as a proxy for upstream requests.",
|
|
|
|
socket_addr.as_str()
|
|
|
|
);
|
|
|
|
inner = inner.proxy(Proxy::all(socket_addr.as_str()).unwrap());
|
|
|
|
}
|
|
|
|
|
|
|
|
if DISABLE_CERT_VALIDATION.load(Ordering::Acquire) {
|
|
|
|
inner = inner.danger_accept_invalid_certs(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
let inner = inner.build().expect("Client initialization to work");
|
|
|
|
CachingClient {
|
|
|
|
inner,
|
|
|
|
locks: RwLock::new(HashMap::new()),
|
|
|
|
}
|
2021-07-10 22:53:28 +00:00
|
|
|
});
|
|
|
|
|
2021-07-20 20:47:04 +00:00
|
|
|
#[cfg(not(tarpaulin_include))]
|
2021-07-11 18:23:15 +00:00
|
|
|
pub static DEFAULT_HEADERS: Lazy<HeaderMap> = Lazy::new(|| {
|
2021-07-10 23:04:27 +00:00
|
|
|
let mut headers = HeaderMap::with_capacity(8);
|
|
|
|
headers.insert(X_CONTENT_TYPE_OPTIONS, HeaderValue::from_static("nosniff"));
|
|
|
|
headers.insert(
|
|
|
|
ACCESS_CONTROL_ALLOW_ORIGIN,
|
|
|
|
HeaderValue::from_static("https://mangadex.org"),
|
|
|
|
);
|
|
|
|
headers.insert(ACCESS_CONTROL_EXPOSE_HEADERS, HeaderValue::from_static("*"));
|
|
|
|
headers.insert(
|
|
|
|
CACHE_CONTROL,
|
|
|
|
HeaderValue::from_static("public, max-age=1209600"),
|
|
|
|
);
|
|
|
|
headers.insert(
|
|
|
|
HeaderName::from_static("timing-allow-origin"),
|
|
|
|
HeaderValue::from_static("https://mangadex.org"),
|
|
|
|
);
|
|
|
|
headers
|
|
|
|
});
|
|
|
|
|
2021-07-10 22:53:28 +00:00
|
|
|
pub struct CachingClient {
|
|
|
|
inner: Client,
|
|
|
|
locks: RwLock<HashMap<String, Receiver<FetchResult>>>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
pub enum FetchResult {
|
|
|
|
ServiceUnavailable,
|
|
|
|
InternalServerError,
|
|
|
|
Data(StatusCode, HeaderMap, Bytes),
|
|
|
|
Processing,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl CachingClient {
|
|
|
|
pub async fn fetch_and_cache(
|
|
|
|
&'static self,
|
|
|
|
url: String,
|
|
|
|
key: CacheKey,
|
|
|
|
cache: Data<dyn Cache>,
|
|
|
|
) -> FetchResult {
|
2021-07-17 17:32:43 +00:00
|
|
|
let maybe_receiver = {
|
|
|
|
let lock = self.locks.read();
|
|
|
|
lock.get(&url).map(Clone::clone)
|
|
|
|
};
|
|
|
|
if let Some(mut recv) = maybe_receiver {
|
2021-07-10 22:53:28 +00:00
|
|
|
loop {
|
|
|
|
if !matches!(*recv.borrow(), FetchResult::Processing) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if recv.changed().await.is_err() {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return recv.borrow().clone();
|
|
|
|
}
|
|
|
|
|
|
|
|
let notify = Arc::new(Notify::new());
|
2021-07-17 17:32:43 +00:00
|
|
|
tokio::spawn(self.fetch_and_cache_impl(cache, url.clone(), key, Arc::clone(¬ify)));
|
|
|
|
notify.notified().await;
|
|
|
|
|
|
|
|
let mut recv = self
|
|
|
|
.locks
|
|
|
|
.read()
|
|
|
|
.get(&url)
|
|
|
|
.expect("receiver to exist since we just made one")
|
|
|
|
.clone();
|
|
|
|
loop {
|
|
|
|
if !matches!(*recv.borrow(), FetchResult::Processing) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if recv.changed().await.is_err() {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
let resp = recv.borrow().clone();
|
|
|
|
resp
|
|
|
|
}
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
async fn fetch_and_cache_impl(
|
|
|
|
&self,
|
|
|
|
cache: Data<dyn Cache>,
|
|
|
|
url: String,
|
|
|
|
key: CacheKey,
|
|
|
|
notify: Arc<Notify>,
|
|
|
|
) {
|
|
|
|
let (tx, rx) = channel(FetchResult::Processing);
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
self.locks.write().insert(url.clone(), rx);
|
|
|
|
notify.notify_one();
|
|
|
|
let resp = self.inner.get(&url).send().await;
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
let resp = match resp {
|
|
|
|
Ok(mut resp) => {
|
|
|
|
let content_type = resp.headers().get(CONTENT_TYPE);
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
let is_image = content_type
|
|
|
|
.map(|v| String::from_utf8_lossy(v.as_ref()).contains("image/"))
|
|
|
|
.unwrap_or_default();
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
if resp.status() != StatusCode::OK || !is_image {
|
|
|
|
warn!("Got non-OK or non-image response code from upstream, proxying and not caching result.");
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
let mut headers = DEFAULT_HEADERS.clone();
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
if let Some(content_type) = content_type {
|
|
|
|
headers.insert(CONTENT_TYPE, content_type.clone());
|
|
|
|
}
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
FetchResult::Data(
|
|
|
|
resp.status(),
|
|
|
|
headers,
|
|
|
|
resp.bytes().await.unwrap_or_default(),
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
let (content_type, length, last_mod) = {
|
|
|
|
let headers = resp.headers_mut();
|
|
|
|
(
|
|
|
|
headers.remove(CONTENT_TYPE),
|
|
|
|
headers.remove(CONTENT_LENGTH),
|
|
|
|
headers.remove(LAST_MODIFIED),
|
2021-07-10 22:53:28 +00:00
|
|
|
)
|
2021-07-17 17:32:43 +00:00
|
|
|
};
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
let body = resp.bytes().await.unwrap();
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
debug!("Inserting into cache");
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
let metadata =
|
|
|
|
ImageMetadata::new(content_type.clone(), length.clone(), last_mod.clone())
|
|
|
|
.unwrap();
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
match cache.put(key, body.clone(), metadata).await {
|
|
|
|
Ok(()) => {
|
|
|
|
debug!("Done putting into cache");
|
2021-07-10 22:53:28 +00:00
|
|
|
|
2021-07-17 17:32:43 +00:00
|
|
|
let mut headers = DEFAULT_HEADERS.clone();
|
|
|
|
if let Some(content_type) = content_type {
|
|
|
|
headers.insert(CONTENT_TYPE, content_type);
|
2021-07-10 22:53:28 +00:00
|
|
|
}
|
2021-07-17 17:32:43 +00:00
|
|
|
|
|
|
|
if let Some(content_length) = length {
|
|
|
|
headers.insert(CONTENT_LENGTH, content_length);
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(last_modified) = last_mod {
|
|
|
|
headers.insert(LAST_MODIFIED, last_modified);
|
2021-07-10 22:53:28 +00:00
|
|
|
}
|
2021-07-17 17:32:43 +00:00
|
|
|
|
|
|
|
FetchResult::Data(StatusCode::OK, headers, body)
|
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
warn!("Failed to insert into cache: {}", e);
|
|
|
|
FetchResult::InternalServerError
|
2021-07-10 22:53:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-07-17 17:32:43 +00:00
|
|
|
Err(e) => {
|
|
|
|
error!("Failed to fetch image from server: {}", e);
|
|
|
|
FetchResult::ServiceUnavailable
|
2021-07-10 22:53:28 +00:00
|
|
|
}
|
2021-07-17 17:32:43 +00:00
|
|
|
};
|
|
|
|
// This shouldn't happen
|
|
|
|
tx.send(resp).unwrap();
|
|
|
|
self.locks.write().remove(&url);
|
2021-07-10 22:53:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
pub const fn inner(&self) -> &Client {
|
|
|
|
&self.inner
|
|
|
|
}
|
|
|
|
}
|