From 5f4be9809a5f9a3e55d62a61450a554927aed327 Mon Sep 17 00:00:00 2001 From: Edward Shen Date: Sun, 11 Jul 2021 02:33:51 -0400 Subject: [PATCH] Add support for legacy files --- Cargo.lock | 24 ++++++++++ Cargo.toml | 3 ++ src/cache/compat.rs | 87 +++++++++++++++++++++++++++++++++++ src/cache/disk.rs | 2 +- src/cache/fs.rs | 110 +++++++++++++++++++++++++++++++++++++++++--- src/cache/mod.rs | 17 ++++++- 6 files changed, 233 insertions(+), 10 deletions(-) create mode 100644 src/cache/compat.rs diff --git a/Cargo.lock b/Cargo.lock index 2b5b97a..967b2c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1196,6 +1196,7 @@ dependencies = [ "simple_logger", "sodiumoxide", "sqlx", + "tempfile", "thiserror", "tokio", "tokio-stream", @@ -1594,6 +1595,15 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + [[package]] name = "reqwest" version = "0.11.4" @@ -2139,6 +2149,20 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tempfile" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +dependencies = [ + "cfg-if", + "libc", + "rand", + "redox_syscall", + "remove_dir_all", + "winapi", +] + [[package]] name = "termcolor" version = "1.1.2" diff --git a/Cargo.toml b/Cargo.toml index 6a2c8e5..f5f2c6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,3 +48,6 @@ url = { version = "2", features = [ "serde" ] } [build-dependencies] vergen = "5" + +[dev-dependencies] +tempfile = "3" \ No newline at end of file diff --git a/src/cache/compat.rs b/src/cache/compat.rs new file mode 100644 index 0000000..93d516f --- /dev/null +++ b/src/cache/compat.rs @@ -0,0 +1,87 @@ +use std::str::FromStr; + +use chrono::{DateTime, FixedOffset}; +use serde::{ + de::{Unexpected, Visitor}, + Deserialize, Serialize, +}; + +use super::ImageContentType; + +#[derive(Copy, Clone, Serialize, Deserialize)] +pub(crate) struct LegacyImageMetadata { + pub(crate) content_type: Option, + pub(crate) size: Option, + pub(crate) last_modified: Option, +} + +#[derive(Copy, Clone, Serialize)] +pub(crate) struct LegacyDateTime(pub DateTime); + +impl<'de> Deserialize<'de> for LegacyDateTime { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct LegacyDateTimeVisitor; + + impl<'de> Visitor<'de> for LegacyDateTimeVisitor { + type Value = LegacyDateTime; + + fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "a valid image type") + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + DateTime::parse_from_rfc2822(v) + .map(LegacyDateTime) + .map_err(|_| E::invalid_value(Unexpected::Str(v), &"a valid image type")) + } + } + + deserializer.deserialize_str(LegacyDateTimeVisitor) + } +} + +#[derive(Copy, Clone)] +pub(crate) struct LegacyImageContentType(pub ImageContentType); + +impl Serialize for LegacyImageContentType { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(self.0.as_ref()) + } +} + +impl<'de> Deserialize<'de> for LegacyImageContentType { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct LegacyImageContentTypeVisitor; + + impl<'de> Visitor<'de> for LegacyImageContentTypeVisitor { + type Value = LegacyImageContentType; + + fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "a valid image type") + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + ImageContentType::from_str(v) + .map(LegacyImageContentType) + .map_err(|_| E::invalid_value(Unexpected::Str(v), &"a valid image type")) + } + } + + deserializer.deserialize_str(LegacyImageContentTypeVisitor) + } +} diff --git a/src/cache/disk.rs b/src/cache/disk.rs index 75b0dc0..406b9e9 100644 --- a/src/cache/disk.rs +++ b/src/cache/disk.rs @@ -197,7 +197,7 @@ impl Cache for DiskCache { tokio::spawn(async move { channel.send(DbMessage::Get(path_0)).await }); - super::fs::read_file(&path).await.map(|res| { + super::fs::read_file_from_path(&path).await.map(|res| { let (inner, maybe_header, metadata) = res?; CacheStream::new(inner, maybe_header) .map(|stream| (stream, metadata)) diff --git a/src/cache/fs.rs b/src/cache/fs.rs index c06cfd4..4a40a83 100644 --- a/src/cache/fs.rs +++ b/src/cache/fs.rs @@ -16,6 +16,7 @@ use std::error::Error; use std::fmt::Display; +use std::io::{Seek, SeekFrom}; use std::path::Path; use std::pin::Pin; use std::task::{Context, Poll}; @@ -33,6 +34,7 @@ use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, ReadBuf}; use tokio::sync::mpsc::Sender; use tokio_util::codec::{BytesCodec, FramedRead}; +use super::compat::LegacyImageMetadata; use super::{CacheKey, ImageMetadata, InnerStream, ENCRYPTION_KEY}; #[derive(Serialize, Deserialize)] @@ -44,14 +46,30 @@ pub enum OnDiskMetadata { /// Attempts to lookup the file on disk, returning a byte stream if it exists. /// Note that this could return two types of streams, depending on if the file /// is in progress of being written to. -pub(super) async fn read_file( +#[inline] +pub(super) async fn read_file_from_path( path: &Path, ) -> Option, ImageMetadata), std::io::Error>> { - let file = std::fs::File::open(path).ok()?; - let file_0 = file.try_clone().unwrap(); + read_file(std::fs::File::open(path).ok()?).await +} + +async fn read_file( + file: std::fs::File, +) -> Option, ImageMetadata), std::io::Error>> { + let mut file_0 = file.try_clone().unwrap(); + let file_1 = file.try_clone().unwrap(); + // Try reading decrypted header first... let mut deserializer = serde_json::Deserializer::from_reader(file); - let maybe_metadata = ImageMetadata::deserialize(&mut deserializer); + let mut maybe_metadata = ImageMetadata::deserialize(&mut deserializer); + + // Failed to parse normally, see if we have a legacy file format + if maybe_metadata.is_err() { + file_0.seek(SeekFrom::Start(2)).ok()?; + let mut deserializer = serde_json::Deserializer::from_reader(file_0); + maybe_metadata = + LegacyImageMetadata::deserialize(&mut deserializer).map(LegacyImageMetadata::into); + } let parsed_metadata; let mut maybe_header = None; @@ -65,11 +83,11 @@ pub(super) async fn read_file( return None; } - reader = Some(Box::pin(File::from_std(file_0))); + reader = Some(Box::pin(File::from_std(file_1))); parsed_metadata = Some(metadata); debug!("Found not encrypted file"); } else { - let mut file = File::from_std(file_0); + let mut file = File::from_std(file_1); let file_0 = file.try_clone().await.unwrap(); // image is encrypted or corrupt @@ -343,7 +361,7 @@ impl AsyncWrite for EncryptedDiskWriter { } /// Represents some upstream error. -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] pub struct UpstreamError; impl Error for UpstreamError {} @@ -360,3 +378,81 @@ impl From for actix_web::Error { PayloadError::Incomplete(None).into() } } + +#[cfg(test)] +mod read_file { + use crate::cache::{ImageContentType, ImageMetadata}; + + use super::read_file; + use bytes::Bytes; + use chrono::DateTime; + use futures::StreamExt; + use std::io::{Seek, SeekFrom, Write}; + use tempfile::tempfile; + + #[tokio::test] + async fn can_read() { + let mut temp_file = tempfile().unwrap(); + temp_file + .write_all( + br#"{"content_type":0,"content_length":708370,"last_modified":"2021-04-13T04:37:41+00:00"}abc"#, + ) + .unwrap(); + temp_file.seek(SeekFrom::Start(0)).unwrap(); + + let (inner_stream, maybe_header, metadata) = read_file(temp_file).await.unwrap().unwrap(); + + let foo: Vec<_> = inner_stream.collect().await; + assert_eq!(foo, vec![Ok(Bytes::from("abc"))]); + assert!(maybe_header.is_none()); + assert_eq!( + metadata, + ImageMetadata { + content_length: Some(708370), + content_type: Some(ImageContentType::Png), + last_modified: Some( + DateTime::parse_from_rfc3339("2021-04-13T04:37:41+00:00").unwrap() + ) + } + ); + } +} + +#[cfg(test)] +mod read_file_compat { + use crate::cache::{ImageContentType, ImageMetadata}; + + use super::read_file; + use bytes::Bytes; + use chrono::DateTime; + use futures::StreamExt; + use std::io::{Seek, SeekFrom, Write}; + use tempfile::tempfile; + + #[tokio::test] + async fn can_read_legacy() { + let mut temp_file = tempfile().unwrap(); + temp_file + .write_all( + b"\x00\x5b{\"content_type\":\"image/jpeg\",\"last_modified\":\"Sat, 10 Apr 2021 10:55:22 GMT\",\"size\":117888}abc", + ) + .unwrap(); + temp_file.seek(SeekFrom::Start(0)).unwrap(); + + let (inner_stream, maybe_header, metadata) = read_file(temp_file).await.unwrap().unwrap(); + + let foo: Vec<_> = inner_stream.collect().await; + assert_eq!(foo, vec![Ok(Bytes::from("abc"))]); + assert!(maybe_header.is_none()); + assert_eq!( + metadata, + ImageMetadata { + content_length: Some(117888), + content_type: Some(ImageContentType::Jpeg), + last_modified: Some( + DateTime::parse_from_rfc2822("Sat, 10 Apr 2021 10:55:22 GMT").unwrap() + ) + } + ); + } +} diff --git a/src/cache/mod.rs b/src/cache/mod.rs index baf1604..fdc4443 100644 --- a/src/cache/mod.rs +++ b/src/cache/mod.rs @@ -23,8 +23,11 @@ pub use disk::DiskCache; pub use fs::UpstreamError; pub use mem::MemoryCache; +use self::compat::LegacyImageMetadata; + pub static ENCRYPTION_KEY: OnceCell = OnceCell::new(); +mod compat; mod disk; mod fs; pub mod mem; @@ -59,7 +62,7 @@ impl From<&CacheKey> for PathBuf { #[derive(Clone)] pub struct CachedImage(pub Bytes); -#[derive(Copy, Clone, Serialize, Deserialize)] +#[derive(Copy, Clone, Serialize, Deserialize, Debug, PartialEq)] pub struct ImageMetadata { pub content_type: Option, pub content_length: Option, @@ -67,7 +70,7 @@ pub struct ImageMetadata { } // Confirmed by Ply to be these types: https://link.eddie.sh/ZXfk0 -#[derive(Copy, Clone, Serialize_repr, Deserialize_repr)] +#[derive(Copy, Clone, Serialize_repr, Deserialize_repr, Debug, PartialEq, Eq)] #[repr(u8)] pub enum ImageContentType { Png = 0, @@ -102,6 +105,16 @@ impl AsRef for ImageContentType { } } +impl From for ImageMetadata { + fn from(legacy: LegacyImageMetadata) -> Self { + Self { + content_type: legacy.content_type.map(|v| v.0), + content_length: legacy.size, + last_modified: legacy.last_modified.map(|v| v.0), + } + } +} + #[allow(clippy::pub_enum_variant_names)] #[derive(Debug)] pub enum ImageRequestError {