Compare commits

..

No commits in common. "de17c738d27ab8ebeeee74a689252ebff93ce5a2" and "453cad1b76e673be9608498231c1abcdd7ad3645" have entirely different histories.

7 changed files with 118 additions and 224 deletions

56
src/cache/fs.rs vendored
View file

@ -1,21 +1,20 @@
use actix_web::HttpResponse;
use bytes::BytesMut;
use futures::{Future, Stream, StreamExt};
use once_cell::sync::Lazy;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::pin::Pin;
use std::sync::atomic::{AtomicU8, Ordering};
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::Duration;
use std::{collections::HashMap, fmt::Display};
use bytes::{Bytes, BytesMut};
use futures::{Future, Stream, StreamExt};
use once_cell::sync::Lazy;
use parking_lot::RwLock;
use reqwest::Error;
use tokio::fs::{remove_file, File};
use tokio::io::{AsyncRead, AsyncWriteExt, ReadBuf};
use tokio::sync::RwLock;
use tokio::time::Sleep;
use super::{BoxedImageStream, CacheStreamItem};
/// Keeps track of files that are currently being written to.
///
/// Why is this necessary? Consider the following situation:
@ -36,15 +35,14 @@ static WRITING_STATUS: Lazy<RwLock<HashMap<PathBuf, Arc<CacheStatus>>>> =
Lazy::new(|| RwLock::new(HashMap::new()));
/// Tries to read from the file, returning a byte stream if it exists
pub async fn read_file(path: &Path) -> Option<Result<FsStream, std::io::Error>> {
pub async fn read_file(path: &Path) -> Option<Result<FromFsStream, std::io::Error>> {
if path.exists() {
let status = WRITING_STATUS
.read()
.await
.get(path)
.map_or_else(|| Arc::new(CacheStatus::done()), Arc::clone);
Some(FsStream::new(path, status).await)
Some(FromFsStream::new(path, status).await)
} else {
None
}
@ -52,14 +50,14 @@ pub async fn read_file(path: &Path) -> Option<Result<FsStream, std::io::Error>>
/// Maps the input byte stream into one that writes to disk instead, returning
/// a stream that reads from disk instead.
pub async fn write_file(
pub async fn transparent_file_stream(
path: &Path,
mut byte_stream: BoxedImageStream,
) -> Result<FsStream, std::io::Error> {
mut byte_stream: impl Stream<Item = Result<Bytes, Error>> + Unpin + Send + 'static,
) -> Result<FromFsStream, std::io::Error> {
let done_writing_flag = Arc::new(CacheStatus::new());
let mut file = {
let mut write_lock = WRITING_STATUS.write().await;
let mut write_lock = WRITING_STATUS.write();
let file = File::create(path).await?; // we need to make sure the file exists and is truncated.
write_lock.insert(path.to_path_buf(), Arc::clone(&done_writing_flag));
file
@ -89,7 +87,7 @@ pub async fn write_file(
file.sync_all().await?; // we need metadata
}
let mut write_lock = WRITING_STATUS.write().await;
let mut write_lock = WRITING_STATUS.write();
// This needs to be written atomically with the write lock, else
// it's possible we have an inconsistent state
if errored {
@ -103,16 +101,16 @@ pub async fn write_file(
Ok::<_, std::io::Error>(())
});
Ok(FsStream::new(path, done_writing_flag).await?)
Ok(FromFsStream::new(path, done_writing_flag).await?)
}
pub struct FsStream {
pub struct FromFsStream {
file: Pin<Box<File>>,
sleep: Pin<Box<Sleep>>,
is_file_done_writing: Arc<CacheStatus>,
}
impl FsStream {
impl FromFsStream {
async fn new(path: &Path, is_done: Arc<CacheStatus>) -> Result<Self, std::io::Error> {
Ok(Self {
file: Box::pin(File::open(path).await?),
@ -124,19 +122,10 @@ impl FsStream {
}
/// Represents some upstream error.
#[derive(Debug)]
pub struct UpstreamError;
impl std::error::Error for UpstreamError {}
impl Display for UpstreamError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "An upstream error occurred")
}
}
impl Stream for FsStream {
type Item = CacheStreamItem;
impl Stream for FromFsStream {
type Item = Result<Bytes, UpstreamError>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let status = self.is_file_done_writing.load();
@ -158,13 +147,6 @@ impl Stream for FsStream {
}
}
impl From<UpstreamError> for actix_web::Error {
#[inline]
fn from(_: UpstreamError) -> Self {
HttpResponse::BadGateway().finish().into()
}
}
struct CacheStatus(AtomicU8);
impl CacheStatus {

View file

@ -2,15 +2,12 @@ use std::path::PathBuf;
use async_trait::async_trait;
use bytes::Bytes;
use futures::{stream::StreamExt, TryStreamExt};
use log::{debug, warn};
use lru::LruCache;
use tokio::fs::{remove_file, File};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use super::{
BoxedImageStream, Cache, CacheError, CacheKey, CacheStream, CachedImage, ImageMetadata,
};
use super::{Cache, CacheKey, CachedImage, ImageMetadata};
pub struct GenerationalCache {
in_memory: LruCache<CacheKey, (CachedImage, ImageMetadata)>,
@ -135,16 +132,9 @@ impl GenerationalCache {
#[async_trait]
impl Cache for GenerationalCache {
async fn get(
&mut self,
key: &CacheKey,
) -> Option<Result<(CacheStream, &ImageMetadata), CacheError>> {
async fn get(&mut self, key: &CacheKey) -> Option<&(CachedImage, ImageMetadata)> {
if self.in_memory.contains(key) {
return self
.in_memory
.get(key)
// TODO: get rid of clone?
.map(|(image, metadata)| Ok((CacheStream::from(image.clone()), metadata)));
return self.in_memory.get(key);
}
if let Some(metadata) = self.on_disk.pop(key) {
@ -159,7 +149,7 @@ impl Cache for GenerationalCache {
let mut buffer = metadata
.content_length
.map_or_else(Vec::new, |v| Vec::with_capacity(v as usize));
.map_or_else(Vec::new, Vec::with_capacity);
match file {
Ok(mut file) => {
@ -183,30 +173,20 @@ impl Cache for GenerationalCache {
buffer.shrink_to_fit();
self.disk_cur_size -= buffer.len() as u64;
let image = CacheStream::from(CachedImage(Bytes::from(buffer))).map_err(|e| e.into());
let image = CachedImage(Bytes::from(buffer));
return Some(self.put(key.clone(), Box::new(image), metadata).await);
// Since we just put it in the in-memory cache it should be there
// when we retrieve it
self.put(key.clone(), image, metadata).await;
return self.get(key).await;
}
None
}
async fn put(
&mut self,
key: CacheKey,
mut image: BoxedImageStream,
metadata: ImageMetadata,
) -> Result<(CacheStream, &ImageMetadata), CacheError> {
#[inline]
async fn put(&mut self, key: CacheKey, image: CachedImage, metadata: ImageMetadata) {
let mut hot_evicted = vec![];
let image = {
let mut resolved = vec![];
while let Some(bytes) = image.next().await {
resolved.extend(bytes?);
}
CachedImage(Bytes::from(resolved))
};
let new_img_size = image.0.len() as u64;
if self.memory_max_size >= new_img_size {
@ -224,19 +204,17 @@ impl Cache for GenerationalCache {
}
}
self.in_memory.put(key.clone(), (image, metadata));
self.in_memory.put(key, (image, metadata));
self.memory_cur_size += new_img_size;
} else {
// Image was larger than memory capacity, push directly into cold
// storage.
self.push_into_cold(key.clone(), image, metadata).await;
self.push_into_cold(key, image, metadata).await;
};
// Push evicted hot entires into cold storage.
for (key, image, metadata) in hot_evicted {
self.push_into_cold(key, image, metadata).await;
}
self.get(&key).await.unwrap()
}
}

39
src/cache/low_mem.rs vendored
View file

@ -1,14 +1,16 @@
//! Low memory caching stuff
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use async_trait::async_trait;
use bytes::Bytes;
use futures::Stream;
use lru::LruCache;
use super::{BoxedImageStream, Cache, CacheError, CacheKey, CacheStream, ImageMetadata};
use super::{fs::FromFsStream, ByteStream, Cache, CacheKey};
pub struct LowMemCache {
on_disk: LruCache<CacheKey, ImageMetadata>,
on_disk: LruCache<CacheKey, ()>,
disk_path: PathBuf,
disk_max_size: u64,
disk_cur_size: u64,
@ -25,37 +27,18 @@ impl LowMemCache {
}
}
// todo: schedule eviction
#[async_trait]
impl Cache for LowMemCache {
async fn get(
&mut self,
key: &CacheKey,
) -> Option<Result<(CacheStream, &ImageMetadata), CacheError>> {
if let Some(metadata) = self.on_disk.get(key) {
let path = self.disk_path.clone().join(PathBuf::from(key.clone()));
super::fs::read_file(&path).await.map(|res| {
res.map(|stream| (CacheStream::Fs(stream), metadata))
.map_err(Into::into)
})
async fn get_stream(&mut self, key: &CacheKey) -> Option<Result<FromFsStream, std::io::Error>> {
if self.on_disk.get(key).is_some() {
super::fs::read_file(Path::new(&key.to_string())).await
} else {
None
}
}
async fn put(
&mut self,
key: CacheKey,
image: BoxedImageStream,
metadata: ImageMetadata,
) -> Result<(CacheStream, &ImageMetadata), CacheError> {
let path = self.disk_path.clone().join(PathBuf::from(key.clone()));
self.on_disk.put(key.clone(), metadata);
super::fs::write_file(&path, image)
.await
.map(CacheStream::Fs)
.map(move |stream| (stream, self.on_disk.get(&key).unwrap()))
.map_err(Into::into)
async fn put_stream(&mut self, key: CacheKey, image: ByteStream) {
// this call has a side effect and the returned future is for reading
let _ = super::fs::transparent_file_stream(&PathBuf::from(key), image);
}
}

111
src/cache/mod.rs vendored
View file

@ -1,21 +1,17 @@
use std::fmt::Display;
use std::path::PathBuf;
use std::pin::Pin;
use std::str::FromStr;
use std::task::{Context, Poll};
use std::{fmt::Display, str::FromStr};
use actix_web::http::HeaderValue;
use async_trait::async_trait;
use bytes::Bytes;
use chrono::{DateTime, FixedOffset};
use fs::FsStream;
use futures::{Stream, StreamExt};
use thiserror::Error;
use futures::Stream;
pub use fs::UpstreamError;
pub use generational::GenerationalCache;
pub use low_mem::LowMemCache;
use self::fs::FromFsStream;
mod fs;
mod generational;
mod low_mem;
@ -40,23 +36,23 @@ impl From<CacheKey> for PathBuf {
}
}
#[derive(Clone)]
pub struct CachedImage(pub Bytes);
#[derive(Copy, Clone)]
pub struct ImageMetadata {
pub content_type: Option<ImageContentType>,
// If we can guarantee a non-zero u32 here we can save 4 bytes
pub content_length: Option<u32>,
pub content_length: Option<usize>,
pub last_modified: Option<DateTime<FixedOffset>>,
}
// Confirmed by Ply to be these types: https://link.eddie.sh/ZXfk0
// Note to self: If these are wrong blame Triscuit 9
#[derive(Copy, Clone)]
pub enum ImageContentType {
Png,
Jpeg,
Gif,
Bmp,
Tif,
}
pub struct InvalidContentType;
@ -70,6 +66,8 @@ impl FromStr for ImageContentType {
"image/png" => Ok(Self::Png),
"image/jpeg" => Ok(Self::Jpeg),
"image/gif" => Ok(Self::Gif),
"image/bmp" => Ok(Self::Bmp),
"image/tif" => Ok(Self::Tif),
_ => Err(InvalidContentType),
}
}
@ -82,6 +80,8 @@ impl AsRef<str> for ImageContentType {
Self::Png => "image/png",
Self::Jpeg => "image/jpeg",
Self::Gif => "image/gif",
Self::Bmp => "image/bmp",
Self::Tif => "image/tif",
}
}
}
@ -130,78 +130,37 @@ impl ImageMetadata {
}
}
type BoxedImageStream = Box<dyn Stream<Item = Result<Bytes, CacheError>> + Unpin + Send>;
#[derive(Error, Debug)]
pub enum CacheError {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Reqwest(#[from] reqwest::Error),
#[error(transparent)]
Upstream(#[from] UpstreamError),
}
#[async_trait]
pub trait Cache: Send + Sync {
async fn get(
async fn get(&mut self, _key: &CacheKey) -> Option<&(CachedImage, ImageMetadata)> {
unimplemented!()
}
async fn put(&mut self, _key: CacheKey, _image: CachedImage, _metadata: ImageMetadata) {
unimplemented!()
}
async fn get_stream(
&mut self,
key: &CacheKey,
) -> Option<Result<(CacheStream, &ImageMetadata), CacheError>>;
async fn put(
&mut self,
key: CacheKey,
image: BoxedImageStream,
metadata: ImageMetadata,
) -> Result<(CacheStream, &ImageMetadata), CacheError>;
}
_key: &CacheKey,
) -> Option<Result<FromFsStream, std::io::Error>> {
unimplemented!()
}
pub enum CacheStream {
Fs(FsStream),
Memory(MemStream),
}
impl From<CachedImage> for CacheStream {
fn from(image: CachedImage) -> Self {
Self::Memory(MemStream(image.0))
async fn put_stream(&mut self, _key: CacheKey, _image: ByteStream) {
unimplemented!()
}
}
type CacheStreamItem = Result<Bytes, UpstreamError>;
pub enum ByteStream {}
impl Stream for CacheStream {
type Item = CacheStreamItem;
impl Stream for ByteStream {
type Item = Result<Bytes, reqwest::Error>;
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match self.get_mut() {
Self::Fs(stream) => stream.poll_next_unpin(cx),
Self::Memory(stream) => stream.poll_next_unpin(cx),
}
}
}
pub struct MemStream(Bytes);
impl Stream for MemStream {
type Item = CacheStreamItem;
fn poll_next(mut self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let mut new_bytes = Bytes::new();
std::mem::swap(&mut self.0, &mut new_bytes);
if new_bytes.is_empty() {
Poll::Ready(None)
} else {
Poll::Ready(Some(Ok(new_bytes)))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn metadata_size() {
assert_eq!(std::mem::size_of::<ImageMetadata>(), 32);
fn poll_next(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Option<Self::Item>> {
todo!()
}
}

View file

@ -2,7 +2,7 @@ use std::num::{NonZeroU16, NonZeroU64};
use std::path::PathBuf;
use std::sync::atomic::AtomicBool;
use clap::{crate_authors, crate_description, crate_version, Clap};
use clap::Clap;
// Validate tokens is an atomic because it's faster than locking on rwlock.
pub static VALIDATE_TOKENS: AtomicBool = AtomicBool::new(false);
@ -11,7 +11,6 @@ pub static VALIDATE_TOKENS: AtomicBool = AtomicBool::new(false);
pub static SEND_SERVER_VERSION: AtomicBool = AtomicBool::new(false);
#[derive(Clap, Clone)]
#[clap(version = crate_version!(), author = crate_authors!(), about = crate_description!())]
pub struct CliArgs {
/// The port to listen on.
#[clap(short, long, default_value = "42069", env = "PORT")]
@ -35,8 +34,6 @@ pub struct CliArgs {
/// reasons.
#[clap(long, env = "ENABLE_SERVER_STRING", takes_value = false)]
pub enable_server_string: bool,
#[clap(short, long, conflicts_with("memory-quota"), env = "LOW_MEMORY_MODE")]
#[clap(short, long, conflicts_with("memory_quota"), env = "LOW_MEMORY_MODE")]
pub low_memory: bool,
#[clap(short, long, parse(from_occurrences))]
pub verbose: usize,
}

View file

@ -1,6 +1,6 @@
#![warn(clippy::pedantic, clippy::nursery)]
// We're end users, so these is ok
#![allow(clippy::module_name_repetitions)]
#![allow(clippy::future_not_send, clippy::module_name_repetitions)]
use std::env::{self, VarError};
use std::process;
@ -53,9 +53,7 @@ async fn main() -> Result<(), std::io::Error> {
println!(concat!(
env!("CARGO_PKG_NAME"),
" Copyright (C) 2021 ",
env!("CARGO_PKG_AUTHORS"),
"\n\n",
" Copyright (C) 2021 Edward Shen\n\n",
env!("CARGO_PKG_NAME"),
" is free software: you can redistribute it and/or modify\n\
it under the terms of the GNU General Public License as published by\n\
@ -78,11 +76,8 @@ async fn main() -> Result<(), std::io::Error> {
let cache_path = cli_args.cache_path.clone();
let low_mem_mode = cli_args.low_memory;
match cli_args.verbose {
0 => SimpleLogger::new().with_level(LevelFilter::Info),
1 => SimpleLogger::new().with_level(LevelFilter::Debug),
_ => SimpleLogger::new().with_level(LevelFilter::Trace),
}
SimpleLogger::new()
.with_level(LevelFilter::Info)
.init()
.unwrap();

View file

@ -1,3 +1,4 @@
use std::convert::Infallible;
use std::sync::atomic::Ordering;
use actix_web::dev::HttpResponseBuilder;
@ -10,14 +11,14 @@ use actix_web::{get, web::Data, HttpRequest, HttpResponse, Responder};
use base64::DecodeError;
use bytes::Bytes;
use chrono::{DateTime, Utc};
use futures::{Stream, TryStreamExt};
use futures::stream;
use log::{error, info, warn};
use parking_lot::Mutex;
use serde::Deserialize;
use sodiumoxide::crypto::box_::{open_precomputed, Nonce, PrecomputedKey, NONCEBYTES};
use thiserror::Error;
use crate::cache::{Cache, CacheKey, ImageMetadata, UpstreamError};
use crate::cache::{Cache, CacheKey, CachedImage, ImageMetadata};
use crate::client_api_version;
use crate::config::{SEND_SERVER_VERSION, VALIDATE_TOKENS};
use crate::state::RwLockServerState;
@ -181,10 +182,8 @@ async fn fetch_image(
) -> ServerResponse {
let key = CacheKey(chapter_hash, file_name, is_data_saver);
match cache.lock().get(&key).await {
Some(Ok((image, metadata))) => return construct_response(image, metadata),
Some(Err(_)) => return ServerResponse::HttpResponse(HttpResponse::BadGateway().finish()),
_ => (),
if let Some((image, metadata)) = cache.lock().get(&key).await {
return construct_response(image, metadata);
}
// It's important to not get a write lock before this request, else we're
@ -239,22 +238,22 @@ async fn fetch_image(
headers.remove(LAST_MODIFIED),
)
};
let body = resp.bytes_stream().map_err(|e| e.into());
let metadata = ImageMetadata::new(content_type, length, last_mod).unwrap();
let (stream, metadata) = {
match cache.lock().put(key, Box::new(body), metadata).await {
Ok((stream, metadata)) => (stream, *metadata),
let body = resp.bytes().await;
match body {
Ok(bytes) => {
let cached = ImageMetadata::new(content_type, length, last_mod).unwrap();
let image = CachedImage(bytes);
let resp = construct_response(&image, &cached);
cache.lock().put(key, image, cached).await;
return resp;
}
Err(e) => {
warn!("Failed to insert into cache: {}", e);
return ServerResponse::HttpResponse(
HttpResponse::InternalServerError().finish(),
);
warn!("Got payload error from image server: {}", e);
ServerResponse::HttpResponse(
push_headers(&mut HttpResponse::ServiceUnavailable()).finish(),
)
}
}
};
return construct_response(stream, &metadata);
}
Err(e) => {
error!("Failed to fetch image from server: {}", e);
@ -265,22 +264,23 @@ async fn fetch_image(
}
}
fn construct_response(
data: impl Stream<Item = Result<Bytes, UpstreamError>> + Unpin + 'static,
metadata: &ImageMetadata,
) -> ServerResponse {
fn construct_response(cached: &CachedImage, metadata: &ImageMetadata) -> ServerResponse {
let data: Vec<Result<Bytes, Infallible>> = cached
.0
.to_vec()
.chunks(1460) // TCP MSS default size
.map(|v| Ok(Bytes::from(v.to_vec())))
.collect();
let mut resp = HttpResponse::Ok();
if let Some(content_type) = metadata.content_type {
if let Some(content_type) = &metadata.content_type {
resp.append_header((CONTENT_TYPE, content_type.as_ref()));
}
if let Some(content_length) = metadata.content_length {
resp.append_header((CONTENT_LENGTH, content_length));
if let Some(content_length) = &metadata.content_length {
resp.append_header((CONTENT_LENGTH, content_length.to_string()));
}
if let Some(last_modified) = metadata.last_modified {
if let Some(last_modified) = &metadata.last_modified {
resp.append_header((LAST_MODIFIED, last_modified.to_rfc2822()));
}
ServerResponse::HttpResponse(push_headers(&mut resp).streaming(data))
ServerResponse::HttpResponse(push_headers(&mut resp).streaming(stream::iter(data)))
}