Add LFU cache

This commit is contained in:
Edward Shen 2021-04-24 23:56:56 -04:00
parent 0db06fcabd
commit 0857ffadc7
Signed by: edward
GPG key ID: 19182661E818369F
9 changed files with 182 additions and 10 deletions

7
Cargo.lock generated
View file

@ -947,6 +947,12 @@ dependencies = [
"static_assertions", "static_assertions",
] ]
[[package]]
name = "lfu_cache"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33bfa6580d3aa7abe1f17d413dc9952d726eb588a0a8082821444cb89ffdabdf"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.93" version = "0.2.93"
@ -1035,6 +1041,7 @@ dependencies = [
"ctrlc", "ctrlc",
"dotenv", "dotenv",
"futures", "futures",
"lfu_cache",
"log", "log",
"lru", "lru",
"once_cell", "once_cell",

View file

@ -27,6 +27,7 @@ dotenv = "0.15"
futures = "0.3" futures = "0.3"
once_cell = "1" once_cell = "1"
log = "0.4" log = "0.4"
lfu_cache = "1"
lru = "0.6" lru = "0.6"
parking_lot = "0.11" parking_lot = "0.11"
reqwest = { version = "0.11", default_features = false, features = [ "json", "stream", "rustls-tls" ] } reqwest = { version = "0.11", default_features = false, features = [ "json", "stream", "rustls-tls" ] }

9
src/cache/fs.rs vendored
View file

@ -21,6 +21,7 @@ use log::debug;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use serde::Deserialize; use serde::Deserialize;
use std::collections::HashMap; use std::collections::HashMap;
use std::error::Error;
use std::fmt::Display; use std::fmt::Display;
use std::io::SeekFrom; use std::io::SeekFrom;
use std::num::NonZeroU32; use std::num::NonZeroU32;
@ -196,9 +197,15 @@ where
} }
pub struct ConcurrentFsStream { pub struct ConcurrentFsStream {
/// The File to read from
file: Pin<Box<BufReader<File>>>, file: Pin<Box<BufReader<File>>>,
/// The channel to get updates from. The writer must send its status, else
/// this reader will never complete.
receiver: Pin<Box<WatchStream<WritingStatus>>>, receiver: Pin<Box<WatchStream<WritingStatus>>>,
/// The number of bytes the reader has read
bytes_read: u32, bytes_read: u32,
/// The number of bytes that the writer has reported it has written. If the
/// writer has not reported yet, this value is None.
bytes_total: Option<NonZeroU32>, bytes_total: Option<NonZeroU32>,
} }
@ -227,7 +234,7 @@ impl ConcurrentFsStream {
#[derive(Debug)] #[derive(Debug)]
pub struct UpstreamError; pub struct UpstreamError;
impl std::error::Error for UpstreamError {} impl Error for UpstreamError {}
impl Display for UpstreamError { impl Display for UpstreamError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {

145
src/cache/mem_lfu.rs vendored Normal file
View file

@ -0,0 +1,145 @@
use std::path::PathBuf;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use crate::cache::DiskCache;
use super::{BoxedImageStream, Cache, CacheKey, CacheStream, ImageMetadata, MemStream};
use async_trait::async_trait;
use bytes::Bytes;
use futures::FutureExt;
use lfu_cache::LfuCache;
use tokio::sync::mpsc::{channel, Sender};
use tokio::sync::Mutex;
/// Memory accelerated disk cache. Uses an LRU in memory to speed up reads.
pub struct MemoryLfuCache {
inner: Arc<Box<dyn Cache>>,
cur_mem_size: AtomicU64,
mem_cache: Mutex<LfuCache<CacheKey, (Bytes, ImageMetadata, usize)>>,
master_sender: Sender<(CacheKey, Bytes, ImageMetadata, usize)>,
}
impl MemoryLfuCache {
#[allow(clippy::new_ret_no_self)]
pub async fn new(
disk_max_size: u64,
disk_path: PathBuf,
max_mem_size: u64,
) -> Arc<Box<dyn Cache>> {
let (tx, mut rx) = channel(100);
let new_self = Arc::new(Box::new(Self {
inner: DiskCache::new(disk_max_size, disk_path).await,
cur_mem_size: AtomicU64::new(0),
mem_cache: Mutex::new(LfuCache::unbounded()),
master_sender: tx,
}) as Box<dyn Cache>);
let new_self_0 = Arc::clone(&new_self);
tokio::spawn(async move {
let new_self = new_self_0;
let max_mem_size = max_mem_size / 20 * 19;
while let Some((key, bytes, metadata, size)) = rx.recv().await {
new_self.increase_usage(size as u32);
new_self.put_internal(key, bytes, metadata, size).await;
while new_self.mem_size() >= max_mem_size {
if let Some((_, _, _, size)) = new_self.pop_memory().await {
new_self.decrease_usage(size as u64);
} else {
break;
}
}
}
});
new_self
}
}
#[async_trait]
impl Cache for MemoryLfuCache {
#[inline]
async fn get(
&self,
key: &CacheKey,
) -> Option<Result<(CacheStream, ImageMetadata), super::CacheError>> {
match self.mem_cache.lock().now_or_never() {
Some(mut mem_cache) => match mem_cache.get(key).map(|(bytes, metadata, _)| {
Ok((CacheStream::Memory(MemStream(bytes.clone())), *metadata))
}) {
Some(v) => Some(v),
None => self.inner.get(key).await,
},
None => self.inner.get(key).await,
}
}
#[inline]
async fn put(
&self,
key: CacheKey,
image: BoxedImageStream,
metadata: ImageMetadata,
) -> Result<CacheStream, super::CacheError> {
self.inner
.put_with_on_completed_callback(key, image, metadata, self.master_sender.clone())
.await
}
#[inline]
fn increase_usage(&self, amt: u32) {
self.cur_mem_size
.fetch_add(u64::from(amt), Ordering::Release);
}
#[inline]
fn decrease_usage(&self, amt: u64) {
self.cur_mem_size.fetch_sub(amt, Ordering::Release);
}
#[inline]
fn on_disk_size(&self) -> u64 {
self.inner.on_disk_size()
}
#[inline]
fn mem_size(&self) -> u64 {
self.cur_mem_size.load(Ordering::Acquire)
}
#[inline]
async fn put_with_on_completed_callback(
&self,
key: CacheKey,
image: BoxedImageStream,
metadata: ImageMetadata,
on_complete: Sender<(CacheKey, Bytes, ImageMetadata, usize)>,
) -> Result<CacheStream, super::CacheError> {
self.inner
.put_with_on_completed_callback(key, image, metadata, on_complete)
.await
}
#[inline]
async fn put_internal(
&self,
key: CacheKey,
image: Bytes,
metadata: ImageMetadata,
size: usize,
) {
self.mem_cache
.lock()
.await
.insert(key, (image, metadata, size));
}
#[inline]
async fn pop_memory(&self) -> Option<(CacheKey, Bytes, ImageMetadata, usize)> {
self.mem_cache
.lock()
.await
.pop_lfu_key_value()
.map(|(key, (bytes, metadata, size))| (key, bytes, metadata, size))
}
}

10
src/cache/mod.rs vendored
View file

@ -17,13 +17,15 @@ use tokio::io::BufReader;
use tokio::sync::mpsc::Sender; use tokio::sync::mpsc::Sender;
use tokio_util::codec::{BytesCodec, FramedRead}; use tokio_util::codec::{BytesCodec, FramedRead};
pub use disk_cache::DiskCache; pub use disk::DiskCache;
pub use fs::UpstreamError; pub use fs::UpstreamError;
pub use mem_cache::MemoryLruCache; pub use mem_lfu::MemoryLfuCache;
pub use mem_lru::MemoryLruCache;
mod disk_cache; mod disk;
mod fs; mod fs;
mod mem_cache; mod mem_lfu;
mod mem_lru;
#[derive(PartialEq, Eq, Hash, Clone)] #[derive(PartialEq, Eq, Hash, Clone)]
pub struct CacheKey(pub String, pub String, pub bool); pub struct CacheKey(pub String, pub String, pub bool);

View file

@ -36,16 +36,17 @@ pub struct CliArgs {
/// reasons. /// reasons.
#[clap(long, env = "ENABLE_SERVER_STRING", takes_value = false)] #[clap(long, env = "ENABLE_SERVER_STRING", takes_value = false)]
pub enable_server_string: bool, pub enable_server_string: bool,
/// Changes the caching behavior to avoid buffering images in memory, and
/// instead use the filesystem as the buffer backing. This is useful for
/// clients in low (< 1GB) RAM environments.
#[clap( #[clap(
short, short,
long, long,
conflicts_with("memory-quota"), conflicts_with("memory-quota"),
conflicts_with("use-lfu"),
env = "LOW_MEMORY_MODE", env = "LOW_MEMORY_MODE",
takes_value = false takes_value = false
)] )]
/// Changes the caching behavior to avoid buffering images in memory, and
/// instead use the filesystem as the buffer backing. This is useful for
/// clients in low (< 1GB) RAM environments.
pub low_memory: bool, pub low_memory: bool,
/// Changes verbosity. Default verbosity is INFO, while increasing counts of /// Changes verbosity. Default verbosity is INFO, while increasing counts of
/// verbose flags increases the verbosity to DEBUG and TRACE, respectively. /// verbose flags increases the verbosity to DEBUG and TRACE, respectively.
@ -64,4 +65,8 @@ pub struct CliArgs {
/// ramifications of this command. /// ramifications of this command.
#[clap(long)] #[clap(long)]
pub disable_token_validation: bool, pub disable_token_validation: bool,
/// Use an LFU implementation for the in-memory cache instead of the default
/// LRU implementation.
#[clap(short = 'F', long)]
pub use_lfu: bool,
} }

View file

@ -24,7 +24,7 @@ use state::{RwLockServerState, ServerState};
use stop::send_stop; use stop::send_stop;
use thiserror::Error; use thiserror::Error;
use crate::cache::MemoryLruCache; use crate::cache::{MemoryLfuCache, MemoryLruCache};
use crate::state::DynamicServerCert; use crate::state::DynamicServerCert;
mod cache; mod cache;
@ -60,6 +60,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let disk_quota = cli_args.disk_quota; let disk_quota = cli_args.disk_quota;
let cache_path = cli_args.cache_path.clone(); let cache_path = cli_args.cache_path.clone();
let low_mem_mode = cli_args.low_memory; let low_mem_mode = cli_args.low_memory;
let use_lfu = cli_args.use_lfu;
let log_level = match (cli_args.quiet, cli_args.verbose) { let log_level = match (cli_args.quiet, cli_args.verbose) {
(n, _) if n > 2 => LevelFilter::Off, (n, _) if n > 2 => LevelFilter::Off,
@ -130,7 +131,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let cache: Arc<Box<dyn Cache>> = if low_mem_mode { let cache: Arc<Box<dyn Cache>> = if low_mem_mode {
DiskCache::new(disk_quota, cache_path.clone()).await DiskCache::new(disk_quota, cache_path.clone()).await
} else { } else {
MemoryLruCache::new(disk_quota, cache_path.clone(), memory_max_size).await if use_lfu {
MemoryLfuCache::new(disk_quota, cache_path.clone(), memory_max_size).await
} else {
MemoryLruCache::new(disk_quota, cache_path.clone(), memory_max_size).await
}
}; };
let cache_0 = Arc::clone(&cache); let cache_0 = Arc::clone(&cache);