add missing files

This commit is contained in:
Edward Shen 2021-04-23 00:13:36 -04:00
parent 8e94bd5033
commit ef5ed89626
Signed by: edward
GPG key ID: 19182661E818369F
3 changed files with 373 additions and 1 deletions

2
.gitignore vendored
View file

@ -1,6 +1,6 @@
/target
.env
cache
/cache
flamegraph*.svg
perf.data*
dhat.out.*

227
src/cache/disk_cache.rs vendored Normal file
View file

@ -0,0 +1,227 @@
//! Low memory caching stuff
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use async_trait::async_trait;
use bytes::Bytes;
use futures::StreamExt;
use log::{warn, LevelFilter};
use sqlx::sqlite::SqliteConnectOptions;
use sqlx::{ConnectOptions, SqlitePool};
use tokio::fs::remove_file;
use tokio::sync::mpsc::{channel, Receiver, Sender};
use tokio_stream::wrappers::ReceiverStream;
use super::{BoxedImageStream, Cache, CacheError, CacheKey, CacheStream, ImageMetadata};
pub struct LowMemCache {
disk_path: PathBuf,
disk_cur_size: AtomicU64,
db_update_channel_sender: Sender<DbMessage>,
}
enum DbMessage {
Get(Arc<PathBuf>),
Put(Arc<PathBuf>, u32),
}
impl LowMemCache {
/// Constructs a new low memory cache at the provided path and capaci ty.
/// This internally spawns a task that will wait for filesystem
/// notifications when a file has been written.
#[allow(clippy::new_ret_no_self)]
pub async fn new(disk_max_size: u64, disk_path: PathBuf) -> Arc<Box<dyn Cache>> {
let (db_tx, db_rx) = channel(128);
let db_pool = {
let db_url = format!("sqlite:{}/metadata.sqlite", disk_path.to_str().unwrap());
let mut options = SqliteConnectOptions::from_str(&db_url)
.unwrap()
.create_if_missing(true);
options.log_statements(LevelFilter::Trace);
let db = SqlitePool::connect_with(options).await.unwrap();
// Run db init
sqlx::query_file!("./db_queries/init.sql")
.execute(&mut db.acquire().await.unwrap())
.await
.unwrap();
db
};
let new_self: Arc<Box<dyn Cache>> = Arc::new(Box::new(Self {
disk_path,
disk_cur_size: AtomicU64::new(0),
db_update_channel_sender: db_tx,
}));
tokio::spawn(db_listener(
Arc::clone(&new_self),
db_rx,
db_pool,
disk_max_size / 20 * 19,
));
new_self
}
}
/// Spawn a new task that will listen for updates to the db, pruning if the size
/// becomes too large.
async fn db_listener(
cache: Arc<Box<dyn Cache>>,
db_rx: Receiver<DbMessage>,
db_pool: SqlitePool,
max_on_disk_size: u64,
) {
let mut recv_stream = ReceiverStream::new(db_rx).ready_chunks(128);
while let Some(messages) = recv_stream.next().await {
let now = chrono::Utc::now();
let mut transaction = db_pool.begin().await.unwrap();
for message in messages {
match message {
DbMessage::Get(entry) => {
let key = entry.as_os_str().to_str();
let query =
sqlx::query!("update Images set accessed = ? where id = ?", now, key)
.execute(&mut transaction)
.await;
if let Err(e) = query {
warn!("Failed to update timestamp in db for {:?}: {}", key, e);
}
}
DbMessage::Put(entry, size) => {
let key = entry.as_os_str().to_str();
let query = sqlx::query!(
"insert into Images (id, size, accessed) values (?, ?, ?) on conflict do nothing",
key,
size,
now,
)
.execute(&mut transaction)
.await;
if let Err(e) = query {
warn!("Failed to add {:?} to db: {}", key, e);
}
cache.increase_usage(size);
}
}
}
transaction.commit().await.unwrap();
if cache.on_disk_size() >= max_on_disk_size {
let mut conn = db_pool.acquire().await.unwrap();
let items =
sqlx::query!("select id, size from Images order by accessed asc limit 1000")
.fetch_all(&mut conn)
.await
.unwrap();
let mut size_freed = 0;
for item in items {
// Can't be helped, SQLite doesn't support unsigned integers
#[allow(clippy::cast_sign_loss)]
{
size_freed += item.size as u64;
}
tokio::spawn(remove_file(item.id));
}
cache.decrease_usage(size_freed);
}
}
}
#[async_trait]
impl Cache for LowMemCache {
async fn get(
&self,
key: &CacheKey,
) -> Option<Result<(CacheStream, ImageMetadata), CacheError>> {
let channel = self.db_update_channel_sender.clone();
let path = Arc::new(self.disk_path.clone().join(PathBuf::from(key)));
let path_0 = Arc::clone(&path);
tokio::spawn(async move { channel.send(DbMessage::Get(path_0)).await });
super::fs::read_file(&path)
.await
.map(|res| res.map_err(Into::into))
}
async fn put(
&self,
key: CacheKey,
image: BoxedImageStream,
metadata: ImageMetadata,
) -> Result<CacheStream, CacheError> {
let channel = self.db_update_channel_sender.clone();
let path = Arc::new(self.disk_path.clone().join(PathBuf::from(&key)));
let path_0 = Arc::clone(&path);
let db_callback = |size: u32| async move {
let _ = channel.send(DbMessage::Put(path_0, size)).await;
};
super::fs::write_file(&path, key, image, metadata, db_callback, None)
.await
.map_err(Into::into)
}
#[inline]
fn increase_usage(&self, amt: u32) {
self.disk_cur_size
.fetch_add(u64::from(amt), Ordering::Release);
}
#[inline]
fn decrease_usage(&self, amt: u64) {
self.disk_cur_size.fetch_sub(amt, Ordering::Release);
}
#[inline]
fn on_disk_size(&self) -> u64 {
(self.disk_cur_size.load(Ordering::Acquire) + 4095) / 4096 * 4096
}
fn mem_size(&self) -> u64 {
0
}
async fn put_with_on_completed_callback(
&self,
key: CacheKey,
image: BoxedImageStream,
metadata: ImageMetadata,
on_complete: Sender<(CacheKey, Bytes, ImageMetadata, usize)>,
) -> Result<CacheStream, CacheError> {
let channel = self.db_update_channel_sender.clone();
let path = Arc::new(self.disk_path.clone().join(PathBuf::from(&key)));
let path_0 = Arc::clone(&path);
let db_callback = |size: u32| async move {
let _ = channel.send(DbMessage::Put(path_0, size)).await;
};
super::fs::write_file(&path, key, image, metadata, db_callback, Some(on_complete))
.await
.map_err(Into::into)
}
#[inline]
async fn put_internal(&self, _: CacheKey, _: Bytes, _: ImageMetadata, _: usize) {
unimplemented!()
}
#[inline]
async fn pop_memory(&self) -> Option<(CacheKey, Bytes, ImageMetadata, usize)> {
None
}
}

145
src/cache/mem_cache.rs vendored Normal file
View file

@ -0,0 +1,145 @@
use std::path::PathBuf;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use crate::cache::LowMemCache;
use super::{BoxedImageStream, Cache, CacheKey, CacheStream, ImageMetadata, MemStream};
use async_trait::async_trait;
use bytes::Bytes;
use futures::FutureExt;
use lru::LruCache;
use tokio::sync::mpsc::{channel, Sender};
use tokio::sync::Mutex;
/// Memory accelerated disk cache. Uses an LRU in memory to speed up reads.
pub struct MemoryLruCache {
inner: Arc<Box<dyn Cache>>,
cur_mem_size: AtomicU64,
mem_cache: Mutex<LruCache<CacheKey, (Bytes, ImageMetadata, usize)>>,
master_sender: Sender<(CacheKey, Bytes, ImageMetadata, usize)>,
}
impl MemoryLruCache {
#[allow(clippy::new_ret_no_self)]
pub async fn new(
disk_max_size: u64,
disk_path: PathBuf,
max_mem_size: u64,
) -> Arc<Box<dyn Cache>> {
let (tx, mut rx) = channel(100);
let new_self = Arc::new(Box::new(Self {
inner: LowMemCache::new(disk_max_size, disk_path).await,
cur_mem_size: AtomicU64::new(0),
mem_cache: Mutex::new(LruCache::unbounded()),
master_sender: tx,
}) as Box<dyn Cache>);
let new_self_0 = Arc::clone(&new_self);
tokio::spawn(async move {
let new_self = new_self_0;
let max_mem_size = max_mem_size / 20 * 19;
while let Some((key, bytes, metadata, size)) = rx.recv().await {
new_self.increase_usage(size as u32);
new_self.put_internal(key, bytes, metadata, size).await;
while new_self.mem_size() >= max_mem_size {
if let Some((_, _, _, size)) = new_self.pop_memory().await {
new_self.decrease_usage(size as u64);
} else {
break;
}
}
}
});
new_self
}
}
#[async_trait]
impl Cache for MemoryLruCache {
#[inline]
async fn get(
&self,
key: &CacheKey,
) -> Option<Result<(CacheStream, ImageMetadata), super::CacheError>> {
match self.mem_cache.lock().now_or_never() {
Some(mut mem_cache) => match mem_cache.get(key).map(|(bytes, metadata, _)| {
Ok((CacheStream::Memory(MemStream(bytes.clone())), *metadata))
}) {
Some(v) => Some(v),
None => self.inner.get(key).await,
},
None => self.inner.get(key).await,
}
}
#[inline]
async fn put(
&self,
key: CacheKey,
image: BoxedImageStream,
metadata: ImageMetadata,
) -> Result<CacheStream, super::CacheError> {
self.inner
.put_with_on_completed_callback(key, image, metadata, self.master_sender.clone())
.await
}
#[inline]
fn increase_usage(&self, amt: u32) {
self.cur_mem_size
.fetch_add(u64::from(amt), Ordering::Release);
}
#[inline]
fn decrease_usage(&self, amt: u64) {
self.cur_mem_size.fetch_sub(amt, Ordering::Release);
}
#[inline]
fn on_disk_size(&self) -> u64 {
self.inner.on_disk_size()
}
#[inline]
fn mem_size(&self) -> u64 {
self.cur_mem_size.load(Ordering::Acquire)
}
#[inline]
async fn put_with_on_completed_callback(
&self,
key: CacheKey,
image: BoxedImageStream,
metadata: ImageMetadata,
on_complete: Sender<(CacheKey, Bytes, ImageMetadata, usize)>,
) -> Result<CacheStream, super::CacheError> {
self.inner
.put_with_on_completed_callback(key, image, metadata, on_complete)
.await
}
#[inline]
async fn put_internal(
&self,
key: CacheKey,
image: Bytes,
metadata: ImageMetadata,
size: usize,
) {
self.mem_cache
.lock()
.await
.put(key, (image, metadata, size));
}
#[inline]
async fn pop_memory(&self) -> Option<(CacheKey, Bytes, ImageMetadata, usize)> {
self.mem_cache
.lock()
.await
.pop_lru()
.map(|(key, (bytes, metadata, size))| (key, bytes, metadata, size))
}
}