From 8f3430fb77d9094d5f6864020901b77d7107022b Mon Sep 17 00:00:00 2001 From: Edward Shen Date: Sun, 11 Jul 2021 23:33:22 -0400 Subject: [PATCH] Add support for reading old db image ids --- Cargo.lock | 12 +++++++ Cargo.toml | 1 + src/cache/disk.rs | 87 +++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 94 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 967b2c0..470f931 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1184,6 +1184,7 @@ dependencies = [ "lfu_cache", "log", "lru", + "md-5", "once_cell", "parking_lot", "prometheus", @@ -1217,6 +1218,17 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" +[[package]] +name = "md-5" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" +dependencies = [ + "block-buffer", + "digest", + "opaque-debug", +] + [[package]] name = "memchr" version = "2.4.0" diff --git a/Cargo.toml b/Cargo.toml index f5f2c6e..0537695 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ once_cell = "1" log = { version = "0.4", features = [ "serde" ] } lfu_cache = "1" lru = "0.6" +md-5 = "0.9" parking_lot = "0.11" prometheus = { version = "0.12", features = [ "process" ] } reqwest = { version = "0.11", default_features = false, features = [ "json", "stream", "rustls-tls" ] } diff --git a/src/cache/disk.rs b/src/cache/disk.rs index 6384b03..f69ab1c 100644 --- a/src/cache/disk.rs +++ b/src/cache/disk.rs @@ -1,6 +1,7 @@ //! Low memory caching stuff -use std::path::PathBuf; +use std::os::unix::prelude::OsStrExt; +use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; @@ -8,6 +9,8 @@ use std::sync::Arc; use async_trait::async_trait; use futures::StreamExt; use log::{debug, error, warn, LevelFilter}; +use md5::digest::generic_array::GenericArray; +use md5::{Digest, Md5}; use sqlx::sqlite::SqliteConnectOptions; use sqlx::{ConnectOptions, SqlitePool}; use tokio::fs::remove_file; @@ -120,11 +123,18 @@ async fn db_listener( for message in messages { match message { DbMessage::Get(entry) => { + let hash = Md5Hash::from(entry.as_path()); + let hash_str = hash.to_hex_string(); let key = entry.as_os_str().to_str(); - let query = - sqlx::query!("update Images set accessed = ? where id = ?", now, key) - .execute(&mut transaction) - .await; + // let legacy_key = key.map(); + let query = sqlx::query!( + "update Images set accessed = ? where id = ? or id = ?", + now, + key, + hash_str + ) + .execute(&mut transaction) + .await; if let Err(e) = query { warn!("Failed to update timestamp in db for {:?}: {}", key, e); } @@ -195,7 +205,27 @@ async fn db_listener( for item in items { debug!("deleting file due to exceeding cache size"); size_freed += item.size as u64; - tokio::spawn(remove_file(item.id)); + tokio::spawn(async move { + let key = item.id; + if let Err(e) = remove_file(key.clone()).await { + match e.kind() { + std::io::ErrorKind::NotFound => { + let hash = Md5Hash(*GenericArray::from_slice(key.as_bytes())); + let path: PathBuf = hash.into(); + if let Err(e) = remove_file(&path).await { + warn!( + "Failed to delete file `{}` from cache: {}", + path.to_string_lossy(), + e + ); + } + } + _ => { + warn!("Failed to delete file `{}` from cache: {}", &key, e); + } + } + } + }); } cache.disk_cur_size.fetch_sub(size_freed, Ordering::Release); @@ -203,6 +233,51 @@ async fn db_listener( } } +/// Represents a Md5 hash that can be converted to and from a path. This is used +/// for compatibility with the official client, where the image id and on-disk +/// path is determined by file path. +#[derive(Clone, Copy)] +struct Md5Hash(GenericArray::OutputSize>); + +impl Md5Hash { + fn to_hex_string(self) -> String { + format!("{:x}", self.0) + } +} + +impl From<&Path> for Md5Hash { + fn from(path: &Path) -> Self { + let mut iter = path.iter(); + let file_name = iter.next_back().unwrap(); + let chapter_hash = iter.next_back().unwrap(); + let is_data_saver = iter.next_back().unwrap() == "saver"; + let mut hasher = Md5::new(); + if is_data_saver { + hasher.update("saver"); + } + hasher.update(chapter_hash.as_bytes()); + hasher.update("."); + hasher.update(file_name.as_bytes()); + Self(hasher.finalize()) + } +} + +// Lint is overly aggressive here, as Md5Hash guarantees there to be at least 3 +// bytes. +#[allow(clippy::fallible_impl_from)] +impl From for PathBuf { + fn from(hash: Md5Hash) -> Self { + let hex_value = hash.to_hex_string(); + hex_value[0..3] + .chars() + .rev() + .map(|char| Self::from(char.to_string())) + .reduce(|first, second| first.join(second)) + .unwrap() // literally not possible + .join(hex_value) + } +} + #[async_trait] impl Cache for DiskCache { async fn get(