Add LFU cache

2021-04-24 23:56:56 -04:00 · 2021-04-24 23:56:56 -04:00 · 0857ffadc7
parent 0db06fcabd
commit 0857ffadc7
9 changed files with 182 additions and 10 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -947,6 +947,12 @@ dependencies = [
 "static_assertions",
 ]

+[[package]]
+name = "lfu_cache"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33bfa6580d3aa7abe1f17d413dc9952d726eb588a0a8082821444cb89ffdabdf"
+
 [[package]]
 name = "libc"
 version = "0.2.93"
@ -1035,6 +1041,7 @@ dependencies = [
 "ctrlc",
 "dotenv",
 "futures",
+ "lfu_cache",
 "log",
 "lru",
 "once_cell",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -27,6 +27,7 @@ dotenv = "0.15"
 futures = "0.3"
 once_cell = "1"
 log = "0.4"
+lfu_cache = "1"
 lru = "0.6"
 parking_lot = "0.11"
 reqwest = { version = "0.11", default_features = false, features = [ "json", "stream", "rustls-tls" ] }
--- a/src/cache/disk_cache.rs
+++ b/src/cache/disk_cache.rs
--- a/src/cache/fs.rs
+++ b/src/cache/fs.rs
@ -21,6 +21,7 @@ use log::debug;
 use once_cell::sync::Lazy;
 use serde::Deserialize;
 use std::collections::HashMap;
+use std::error::Error;
 use std::fmt::Display;
 use std::io::SeekFrom;
 use std::num::NonZeroU32;
@ -196,9 +197,15 @@ where
 }

 pub struct ConcurrentFsStream {
+    /// The File to read from
    file: Pin<Box<BufReader<File>>>,
+    /// The channel to get updates from. The writer must send its status, else
+    /// this reader will never complete.
    receiver: Pin<Box<WatchStream<WritingStatus>>>,
+    /// The number of bytes the reader has read
    bytes_read: u32,
+    /// The number of bytes that the writer has reported it has written. If the
+    /// writer has not reported yet, this value is None.
    bytes_total: Option<NonZeroU32>,
 }

@ -227,7 +234,7 @@ impl ConcurrentFsStream {
 #[derive(Debug)]
 pub struct UpstreamError;

-impl std::error::Error for UpstreamError {}
+impl Error for UpstreamError {}

 impl Display for UpstreamError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
--- a/src/cache/mem_lfu.rs
+++ b/src/cache/mem_lfu.rs
@ -0,0 +1,145 @@
+use std::path::PathBuf;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Arc;
+
+use crate::cache::DiskCache;
+
+use super::{BoxedImageStream, Cache, CacheKey, CacheStream, ImageMetadata, MemStream};
+use async_trait::async_trait;
+use bytes::Bytes;
+use futures::FutureExt;
+use lfu_cache::LfuCache;
+use tokio::sync::mpsc::{channel, Sender};
+use tokio::sync::Mutex;
+
+/// Memory accelerated disk cache. Uses an LRU in memory to speed up reads.
+pub struct MemoryLfuCache {
+    inner: Arc<Box<dyn Cache>>,
+    cur_mem_size: AtomicU64,
+    mem_cache: Mutex<LfuCache<CacheKey, (Bytes, ImageMetadata, usize)>>,
+    master_sender: Sender<(CacheKey, Bytes, ImageMetadata, usize)>,
+}
+
+impl MemoryLfuCache {
+    #[allow(clippy::new_ret_no_self)]
+    pub async fn new(
+        disk_max_size: u64,
+        disk_path: PathBuf,
+        max_mem_size: u64,
+    ) -> Arc<Box<dyn Cache>> {
+        let (tx, mut rx) = channel(100);
+        let new_self = Arc::new(Box::new(Self {
+            inner: DiskCache::new(disk_max_size, disk_path).await,
+            cur_mem_size: AtomicU64::new(0),
+            mem_cache: Mutex::new(LfuCache::unbounded()),
+            master_sender: tx,
+        }) as Box<dyn Cache>);
+
+        let new_self_0 = Arc::clone(&new_self);
+        tokio::spawn(async move {
+            let new_self = new_self_0;
+            let max_mem_size = max_mem_size / 20 * 19;
+            while let Some((key, bytes, metadata, size)) = rx.recv().await {
+                new_self.increase_usage(size as u32);
+                new_self.put_internal(key, bytes, metadata, size).await;
+                while new_self.mem_size() >= max_mem_size {
+                    if let Some((_, _, _, size)) = new_self.pop_memory().await {
+                        new_self.decrease_usage(size as u64);
+                    } else {
+                        break;
+                    }
+                }
+            }
+        });
+
+        new_self
+    }
+}
+
+#[async_trait]
+impl Cache for MemoryLfuCache {
+    #[inline]
+    async fn get(
+        &self,
+        key: &CacheKey,
+    ) -> Option<Result<(CacheStream, ImageMetadata), super::CacheError>> {
+        match self.mem_cache.lock().now_or_never() {
+            Some(mut mem_cache) => match mem_cache.get(key).map(|(bytes, metadata, _)| {
+                Ok((CacheStream::Memory(MemStream(bytes.clone())), *metadata))
+            }) {
+                Some(v) => Some(v),
+                None => self.inner.get(key).await,
+            },
+            None => self.inner.get(key).await,
+        }
+    }
+
+    #[inline]
+    async fn put(
+        &self,
+        key: CacheKey,
+        image: BoxedImageStream,
+        metadata: ImageMetadata,
+    ) -> Result<CacheStream, super::CacheError> {
+        self.inner
+            .put_with_on_completed_callback(key, image, metadata, self.master_sender.clone())
+            .await
+    }
+
+    #[inline]
+    fn increase_usage(&self, amt: u32) {
+        self.cur_mem_size
+            .fetch_add(u64::from(amt), Ordering::Release);
+    }
+
+    #[inline]
+    fn decrease_usage(&self, amt: u64) {
+        self.cur_mem_size.fetch_sub(amt, Ordering::Release);
+    }
+
+    #[inline]
+    fn on_disk_size(&self) -> u64 {
+        self.inner.on_disk_size()
+    }
+
+    #[inline]
+    fn mem_size(&self) -> u64 {
+        self.cur_mem_size.load(Ordering::Acquire)
+    }
+
+    #[inline]
+    async fn put_with_on_completed_callback(
+        &self,
+        key: CacheKey,
+        image: BoxedImageStream,
+        metadata: ImageMetadata,
+        on_complete: Sender<(CacheKey, Bytes, ImageMetadata, usize)>,
+    ) -> Result<CacheStream, super::CacheError> {
+        self.inner
+            .put_with_on_completed_callback(key, image, metadata, on_complete)
+            .await
+    }
+
+    #[inline]
+    async fn put_internal(
+        &self,
+        key: CacheKey,
+        image: Bytes,
+        metadata: ImageMetadata,
+        size: usize,
+    ) {
+        self.mem_cache
+            .lock()
+            .await
+            .insert(key, (image, metadata, size));
+    }
+
+    #[inline]
+    async fn pop_memory(&self) -> Option<(CacheKey, Bytes, ImageMetadata, usize)> {
+        self.mem_cache
+            .lock()
+            .await
+            .pop_lfu_key_value()
+            .map(|(key, (bytes, metadata, size))| (key, bytes, metadata, size))
+    }
+}
--- a/src/cache/mem_cache.rs
+++ b/src/cache/mem_cache.rs
--- a/src/cache/mod.rs
+++ b/src/cache/mod.rs
@ -17,13 +17,15 @@ use tokio::io::BufReader;
 use tokio::sync::mpsc::Sender;
 use tokio_util::codec::{BytesCodec, FramedRead};

-pub use disk_cache::DiskCache;
+pub use disk::DiskCache;
 pub use fs::UpstreamError;
-pub use mem_cache::MemoryLruCache;
+pub use mem_lfu::MemoryLfuCache;
+pub use mem_lru::MemoryLruCache;

-mod disk_cache;
+mod disk;
 mod fs;
-mod mem_cache;
+mod mem_lfu;
+mod mem_lru;

 #[derive(PartialEq, Eq, Hash, Clone)]
 pub struct CacheKey(pub String, pub String, pub bool);
--- a/src/config.rs
+++ b/src/config.rs
@ -36,16 +36,17 @@ pub struct CliArgs {
    /// reasons.
    #[clap(long, env = "ENABLE_SERVER_STRING", takes_value = false)]
    pub enable_server_string: bool,
+    /// Changes the caching behavior to avoid buffering images in memory, and
+    /// instead use the filesystem as the buffer backing. This is useful for
+    /// clients in low (< 1GB) RAM environments.
    #[clap(
        short,
        long,
        conflicts_with("memory-quota"),
+        conflicts_with("use-lfu"),
        env = "LOW_MEMORY_MODE",
        takes_value = false
    )]
-    /// Changes the caching behavior to avoid buffering images in memory, and
-    /// instead use the filesystem as the buffer backing. This is useful for
-    /// clients in low (< 1GB) RAM environments.
    pub low_memory: bool,
    /// Changes verbosity. Default verbosity is INFO, while increasing counts of
    /// verbose flags increases the verbosity to DEBUG and TRACE, respectively.
@ -64,4 +65,8 @@ pub struct CliArgs {
    /// ramifications of this command.
    #[clap(long)]
    pub disable_token_validation: bool,
+    /// Use an LFU implementation for the in-memory cache instead of the default
+    /// LRU implementation.
+    #[clap(short = 'F', long)]
+    pub use_lfu: bool,
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -24,7 +24,7 @@ use state::{RwLockServerState, ServerState};
 use stop::send_stop;
 use thiserror::Error;

-use crate::cache::MemoryLruCache;
+use crate::cache::{MemoryLfuCache, MemoryLruCache};
 use crate::state::DynamicServerCert;

 mod cache;
@ -60,6 +60,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let disk_quota = cli_args.disk_quota;
    let cache_path = cli_args.cache_path.clone();
    let low_mem_mode = cli_args.low_memory;
+    let use_lfu = cli_args.use_lfu;

    let log_level = match (cli_args.quiet, cli_args.verbose) {
        (n, _) if n > 2 => LevelFilter::Off,
@ -130,7 +131,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let cache: Arc<Box<dyn Cache>> = if low_mem_mode {
        DiskCache::new(disk_quota, cache_path.clone()).await
    } else {
-        MemoryLruCache::new(disk_quota, cache_path.clone(), memory_max_size).await
+        if use_lfu {
+            MemoryLfuCache::new(disk_quota, cache_path.clone(), memory_max_size).await
+        } else {
+            MemoryLruCache::new(disk_quota, cache_path.clone(), memory_max_size).await
+        }
    };

    let cache_0 = Arc::clone(&cache);