Add LFU cache

This commit is contained in:
Edward Shen 2021-04-24 23:56:56 -04:00
parent 0db06fcabd
commit 0857ffadc7
Signed by: edward
GPG key ID: 19182661E818369F
9 changed files with 182 additions and 10 deletions

7
Cargo.lock generated
View file

@ -947,6 +947,12 @@ dependencies = [
"static_assertions",
]
[[package]]
name = "lfu_cache"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33bfa6580d3aa7abe1f17d413dc9952d726eb588a0a8082821444cb89ffdabdf"
[[package]]
name = "libc"
version = "0.2.93"
@ -1035,6 +1041,7 @@ dependencies = [
"ctrlc",
"dotenv",
"futures",
"lfu_cache",
"log",
"lru",
"once_cell",

View file

@ -27,6 +27,7 @@ dotenv = "0.15"
futures = "0.3"
once_cell = "1"
log = "0.4"
lfu_cache = "1"
lru = "0.6"
parking_lot = "0.11"
reqwest = { version = "0.11", default_features = false, features = [ "json", "stream", "rustls-tls" ] }

9
src/cache/fs.rs vendored
View file

@ -21,6 +21,7 @@ use log::debug;
use once_cell::sync::Lazy;
use serde::Deserialize;
use std::collections::HashMap;
use std::error::Error;
use std::fmt::Display;
use std::io::SeekFrom;
use std::num::NonZeroU32;
@ -196,9 +197,15 @@ where
}
pub struct ConcurrentFsStream {
/// The File to read from
file: Pin<Box<BufReader<File>>>,
/// The channel to get updates from. The writer must send its status, else
/// this reader will never complete.
receiver: Pin<Box<WatchStream<WritingStatus>>>,
/// The number of bytes the reader has read
bytes_read: u32,
/// The number of bytes that the writer has reported it has written. If the
/// writer has not reported yet, this value is None.
bytes_total: Option<NonZeroU32>,
}
@ -227,7 +234,7 @@ impl ConcurrentFsStream {
#[derive(Debug)]
pub struct UpstreamError;
impl std::error::Error for UpstreamError {}
impl Error for UpstreamError {}
impl Display for UpstreamError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {

145
src/cache/mem_lfu.rs vendored Normal file
View file

@ -0,0 +1,145 @@
use std::path::PathBuf;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use crate::cache::DiskCache;
use super::{BoxedImageStream, Cache, CacheKey, CacheStream, ImageMetadata, MemStream};
use async_trait::async_trait;
use bytes::Bytes;
use futures::FutureExt;
use lfu_cache::LfuCache;
use tokio::sync::mpsc::{channel, Sender};
use tokio::sync::Mutex;
/// Memory accelerated disk cache. Uses an LRU in memory to speed up reads.
pub struct MemoryLfuCache {
inner: Arc<Box<dyn Cache>>,
cur_mem_size: AtomicU64,
mem_cache: Mutex<LfuCache<CacheKey, (Bytes, ImageMetadata, usize)>>,
master_sender: Sender<(CacheKey, Bytes, ImageMetadata, usize)>,
}
impl MemoryLfuCache {
#[allow(clippy::new_ret_no_self)]
pub async fn new(
disk_max_size: u64,
disk_path: PathBuf,
max_mem_size: u64,
) -> Arc<Box<dyn Cache>> {
let (tx, mut rx) = channel(100);
let new_self = Arc::new(Box::new(Self {
inner: DiskCache::new(disk_max_size, disk_path).await,
cur_mem_size: AtomicU64::new(0),
mem_cache: Mutex::new(LfuCache::unbounded()),
master_sender: tx,
}) as Box<dyn Cache>);
let new_self_0 = Arc::clone(&new_self);
tokio::spawn(async move {
let new_self = new_self_0;
let max_mem_size = max_mem_size / 20 * 19;
while let Some((key, bytes, metadata, size)) = rx.recv().await {
new_self.increase_usage(size as u32);
new_self.put_internal(key, bytes, metadata, size).await;
while new_self.mem_size() >= max_mem_size {
if let Some((_, _, _, size)) = new_self.pop_memory().await {
new_self.decrease_usage(size as u64);
} else {
break;
}
}
}
});
new_self
}
}
#[async_trait]
impl Cache for MemoryLfuCache {
#[inline]
async fn get(
&self,
key: &CacheKey,
) -> Option<Result<(CacheStream, ImageMetadata), super::CacheError>> {
match self.mem_cache.lock().now_or_never() {
Some(mut mem_cache) => match mem_cache.get(key).map(|(bytes, metadata, _)| {
Ok((CacheStream::Memory(MemStream(bytes.clone())), *metadata))
}) {
Some(v) => Some(v),
None => self.inner.get(key).await,
},
None => self.inner.get(key).await,
}
}
#[inline]
async fn put(
&self,
key: CacheKey,
image: BoxedImageStream,
metadata: ImageMetadata,
) -> Result<CacheStream, super::CacheError> {
self.inner
.put_with_on_completed_callback(key, image, metadata, self.master_sender.clone())
.await
}
#[inline]
fn increase_usage(&self, amt: u32) {
self.cur_mem_size
.fetch_add(u64::from(amt), Ordering::Release);
}
#[inline]
fn decrease_usage(&self, amt: u64) {
self.cur_mem_size.fetch_sub(amt, Ordering::Release);
}
#[inline]
fn on_disk_size(&self) -> u64 {
self.inner.on_disk_size()
}
#[inline]
fn mem_size(&self) -> u64 {
self.cur_mem_size.load(Ordering::Acquire)
}
#[inline]
async fn put_with_on_completed_callback(
&self,
key: CacheKey,
image: BoxedImageStream,
metadata: ImageMetadata,
on_complete: Sender<(CacheKey, Bytes, ImageMetadata, usize)>,
) -> Result<CacheStream, super::CacheError> {
self.inner
.put_with_on_completed_callback(key, image, metadata, on_complete)
.await
}
#[inline]
async fn put_internal(
&self,
key: CacheKey,
image: Bytes,
metadata: ImageMetadata,
size: usize,
) {
self.mem_cache
.lock()
.await
.insert(key, (image, metadata, size));
}
#[inline]
async fn pop_memory(&self) -> Option<(CacheKey, Bytes, ImageMetadata, usize)> {
self.mem_cache
.lock()
.await
.pop_lfu_key_value()
.map(|(key, (bytes, metadata, size))| (key, bytes, metadata, size))
}
}

10
src/cache/mod.rs vendored
View file

@ -17,13 +17,15 @@ use tokio::io::BufReader;
use tokio::sync::mpsc::Sender;
use tokio_util::codec::{BytesCodec, FramedRead};
pub use disk_cache::DiskCache;
pub use disk::DiskCache;
pub use fs::UpstreamError;
pub use mem_cache::MemoryLruCache;
pub use mem_lfu::MemoryLfuCache;
pub use mem_lru::MemoryLruCache;
mod disk_cache;
mod disk;
mod fs;
mod mem_cache;
mod mem_lfu;
mod mem_lru;
#[derive(PartialEq, Eq, Hash, Clone)]
pub struct CacheKey(pub String, pub String, pub bool);

View file

@ -36,16 +36,17 @@ pub struct CliArgs {
/// reasons.
#[clap(long, env = "ENABLE_SERVER_STRING", takes_value = false)]
pub enable_server_string: bool,
/// Changes the caching behavior to avoid buffering images in memory, and
/// instead use the filesystem as the buffer backing. This is useful for
/// clients in low (< 1GB) RAM environments.
#[clap(
short,
long,
conflicts_with("memory-quota"),
conflicts_with("use-lfu"),
env = "LOW_MEMORY_MODE",
takes_value = false
)]
/// Changes the caching behavior to avoid buffering images in memory, and
/// instead use the filesystem as the buffer backing. This is useful for
/// clients in low (< 1GB) RAM environments.
pub low_memory: bool,
/// Changes verbosity. Default verbosity is INFO, while increasing counts of
/// verbose flags increases the verbosity to DEBUG and TRACE, respectively.
@ -64,4 +65,8 @@ pub struct CliArgs {
/// ramifications of this command.
#[clap(long)]
pub disable_token_validation: bool,
/// Use an LFU implementation for the in-memory cache instead of the default
/// LRU implementation.
#[clap(short = 'F', long)]
pub use_lfu: bool,
}

View file

@ -24,7 +24,7 @@ use state::{RwLockServerState, ServerState};
use stop::send_stop;
use thiserror::Error;
use crate::cache::MemoryLruCache;
use crate::cache::{MemoryLfuCache, MemoryLruCache};
use crate::state::DynamicServerCert;
mod cache;
@ -60,6 +60,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let disk_quota = cli_args.disk_quota;
let cache_path = cli_args.cache_path.clone();
let low_mem_mode = cli_args.low_memory;
let use_lfu = cli_args.use_lfu;
let log_level = match (cli_args.quiet, cli_args.verbose) {
(n, _) if n > 2 => LevelFilter::Off,
@ -130,7 +131,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let cache: Arc<Box<dyn Cache>> = if low_mem_mode {
DiskCache::new(disk_quota, cache_path.clone()).await
} else {
MemoryLruCache::new(disk_quota, cache_path.clone(), memory_max_size).await
if use_lfu {
MemoryLfuCache::new(disk_quota, cache_path.clone(), memory_max_size).await
} else {
MemoryLruCache::new(disk_quota, cache_path.clone(), memory_max_size).await
}
};
let cache_0 = Arc::clone(&cache);