omegaupload/server/src/main.rs

380 lines
12 KiB
Rust
Raw Normal View History

2021-10-16 09:50:11 -07:00
#![warn(clippy::nursery, clippy::pedantic)]
2021-10-31 14:01:27 -07:00
// OmegaUpload Zero Knowledge File Hosting
// Copyright (C) 2021 Edward Shen
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
2021-10-26 23:51:05 -07:00
use std::convert::Infallible;
2021-10-16 09:50:11 -07:00
use std::sync::Arc;
2021-10-24 18:07:48 -07:00
use std::time::Duration;
2021-10-16 09:50:11 -07:00
use anyhow::Result;
use axum::body::Bytes;
2021-11-14 14:02:55 -08:00
use axum::error_handling::HandleErrorExt;
2021-10-16 09:50:11 -07:00
use axum::extract::{Extension, Path, TypedHeader};
2021-10-19 02:18:33 -07:00
use axum::http::header::EXPIRES;
use axum::http::StatusCode;
2021-10-27 01:49:06 -07:00
use axum::response::Html;
2021-11-14 14:02:55 -08:00
use axum::routing::{get, post, service_method_routing};
use axum::{AddExtensionLayer, Router};
2021-10-21 18:35:54 -07:00
use chrono::Utc;
use futures::stream::StreamExt;
2021-10-19 02:18:33 -07:00
use headers::HeaderMap;
2021-10-27 19:16:43 -07:00
use lazy_static::lazy_static;
2021-10-31 12:34:26 -07:00
use omegaupload_common::crypto::get_csrng;
2021-10-27 01:49:06 -07:00
use omegaupload_common::{Expiration, API_ENDPOINT};
2021-10-19 02:18:33 -07:00
use rand::Rng;
2021-10-24 18:07:48 -07:00
use rocksdb::{ColumnFamilyDescriptor, IteratorMode};
2021-10-19 02:18:33 -07:00
use rocksdb::{Options, DB};
use signal_hook::consts::SIGUSR1;
use signal_hook_tokio::Signals;
2021-11-14 13:52:40 -08:00
use tokio::task::{self, JoinHandle};
2021-10-26 23:51:05 -07:00
use tower_http::services::ServeDir;
2021-10-23 10:10:55 -07:00
use tracing::{error, instrument, trace};
2021-10-21 18:35:54 -07:00
use tracing::{info, warn};
2021-10-16 09:50:11 -07:00
2021-10-19 02:18:33 -07:00
use crate::short_code::ShortCode;
2021-10-16 09:50:11 -07:00
mod short_code;
2021-10-24 18:07:48 -07:00
const BLOB_CF_NAME: &str = "blob";
const META_CF_NAME: &str = "meta";
2021-10-27 19:16:43 -07:00
lazy_static! {
static ref MAX_PASTE_AGE: chrono::Duration = chrono::Duration::days(1);
}
2021-10-16 09:50:11 -07:00
#[tokio::main]
async fn main() -> Result<()> {
2021-10-31 12:34:26 -07:00
const INDEX_PAGE: Html<&'static str> = Html(include_str!("../../dist/index.html"));
2021-10-24 18:07:48 -07:00
const PASTE_DB_PATH: &str = "database";
2021-10-16 09:50:11 -07:00
const SHORT_CODE_SIZE: usize = 12;
tracing_subscriber::fmt::init();
2021-10-24 18:07:48 -07:00
let mut db_options = Options::default();
db_options.create_if_missing(true);
db_options.create_missing_column_families(true);
db_options.set_compression_type(rocksdb::DBCompressionType::Zstd);
let db = Arc::new(DB::open_cf_descriptors(
&db_options,
PASTE_DB_PATH,
[
ColumnFamilyDescriptor::new(BLOB_CF_NAME, Options::default()),
ColumnFamilyDescriptor::new(META_CF_NAME, Options::default()),
],
)?);
2021-10-16 09:50:11 -07:00
2021-11-14 13:52:40 -08:00
set_up_expirations::<SHORT_CODE_SIZE>(&db);
2021-10-16 09:50:11 -07:00
let signals = Signals::new(&[SIGUSR1])?;
let signals_handle = signals.handle();
let signals_task = tokio::spawn(handle_signals(signals, Arc::clone(&db)));
2021-11-14 14:02:55 -08:00
let root_service = service_method_routing::get(ServeDir::new("static"))
2021-10-26 23:51:05 -07:00
.handle_error(|_| Ok::<_, Infallible>(StatusCode::NOT_FOUND));
axum::Server::bind(&"0.0.0.0:8080".parse()?)
2021-11-14 14:02:55 -08:00
.serve({
info!("Now serving on 0.0.0.0:8080");
2021-10-16 09:50:11 -07:00
Router::new()
.route(
2021-10-31 01:16:31 -07:00
"/",
post(upload::<SHORT_CODE_SIZE>).get(|| async { INDEX_PAGE }),
2021-10-26 23:51:05 -07:00
)
2021-10-31 01:16:31 -07:00
.route("/:code", get(|| async { INDEX_PAGE }))
2021-10-26 23:51:05 -07:00
.nest("/static", root_service)
.route(
2022-01-16 00:49:42 -08:00
&format!("{API_ENDPOINT}/:code"),
2021-10-16 09:50:11 -07:00
get(paste::<SHORT_CODE_SIZE>).delete(delete::<SHORT_CODE_SIZE>),
)
.layer(AddExtensionLayer::new(db))
2021-11-14 14:02:55 -08:00
.into_make_service()
})
2021-10-16 09:50:11 -07:00
.await?;
// Must be called for correct shutdown
2021-10-24 18:07:48 -07:00
DB::destroy(&Options::default(), PASTE_DB_PATH)?;
signals_handle.close();
signals_task.await?;
2021-10-16 09:50:11 -07:00
Ok(())
}
2021-10-31 00:57:52 -07:00
// See https://link.eddie.sh/5JHlD
#[allow(clippy::cognitive_complexity)]
2021-11-14 13:52:40 -08:00
fn set_up_expirations<const N: usize>(db: &Arc<DB>) {
2021-10-21 18:35:54 -07:00
let mut corrupted = 0;
let mut expired = 0;
let mut pending = 0;
info!("Setting up cleanup timers, please wait...");
2021-10-24 18:07:48 -07:00
let meta_cf = db.cf_handle(META_CF_NAME).unwrap();
let db_ref = Arc::clone(db);
2021-10-24 18:07:48 -07:00
for (key, value) in db.iterator_cf(meta_cf, IteratorMode::Start) {
2021-11-14 13:52:40 -08:00
let key: [u8; N] = (*key).try_into().unwrap();
let expiration = if let Ok(value) = bincode::deserialize::<Expiration>(&value) {
2021-10-21 18:35:54 -07:00
value
} else {
corrupted += 1;
2021-11-14 13:52:40 -08:00
delete_entry(Arc::clone(&db_ref), key);
2021-10-21 18:35:54 -07:00
continue;
};
let expiration_time = match expiration {
2021-10-24 18:07:48 -07:00
Expiration::BurnAfterReading => {
2021-10-27 19:16:43 -07:00
warn!("Found unbounded burn after reading. Defaulting to max age");
Utc::now() + *MAX_PASTE_AGE
2021-10-21 18:35:54 -07:00
}
2021-10-27 19:16:43 -07:00
Expiration::BurnAfterReadingWithDeadline(deadline) => deadline,
2021-10-24 18:07:48 -07:00
Expiration::UnixTime(time) => time,
};
let sleep_duration = (expiration_time - Utc::now()).to_std().unwrap_or_default();
if sleep_duration == Duration::default() {
expired += 1;
2021-11-14 13:52:40 -08:00
delete_entry(Arc::clone(&db_ref), key);
} else {
2021-10-24 18:07:48 -07:00
pending += 1;
2021-11-14 13:52:40 -08:00
let db = Arc::clone(&db_ref);
task::spawn(async move {
2021-10-24 18:07:48 -07:00
tokio::time::sleep(sleep_duration).await;
2021-11-14 13:52:40 -08:00
delete_entry(db, key);
2021-10-24 18:07:48 -07:00
});
2021-10-21 18:35:54 -07:00
}
}
if corrupted == 0 {
info!("No corrupted pastes found.");
} else {
2022-01-16 00:49:42 -08:00
warn!("Found {corrupted} corrupted pastes.");
2021-10-21 18:35:54 -07:00
}
2022-01-16 00:49:42 -08:00
info!("Found {expired} expired pastes.");
info!("Found {pending} active pastes.");
2021-10-21 18:35:54 -07:00
info!("Cleanup timers have been initialized.");
}
async fn handle_signals(mut signals: Signals, db: Arc<DB>) {
while let Some(signal) = signals.next().await {
2021-10-31 00:57:52 -07:00
if signal == SIGUSR1 {
let meta_cf = db.cf_handle(META_CF_NAME).unwrap();
info!(
"Active paste count: {}",
db.iterator_cf(meta_cf, IteratorMode::Start).count()
);
}
}
}
2021-10-23 10:10:55 -07:00
#[instrument(skip(db, body), err)]
2021-10-16 09:50:11 -07:00
async fn upload<const N: usize>(
Extension(db): Extension<Arc<DB>>,
maybe_expires: Option<TypedHeader<Expiration>>,
body: Bytes,
) -> Result<Vec<u8>, StatusCode> {
if body.is_empty() {
return Err(StatusCode::BAD_REQUEST);
}
2021-10-27 19:16:43 -07:00
if let Some(header) = maybe_expires {
if let Expiration::UnixTime(time) = header.0 {
if (time - Utc::now()) > *MAX_PASTE_AGE {
2022-01-16 00:49:42 -08:00
warn!("{time} exceeds allowed paste lifetime");
2021-10-27 19:16:43 -07:00
return Err(StatusCode::BAD_REQUEST);
}
}
}
2021-10-16 09:50:11 -07:00
// 3GB max; this is a soft-limit of RocksDb
if body.len() >= 3_221_225_472 {
return Err(StatusCode::PAYLOAD_TOO_LARGE);
}
let mut new_key = None;
2021-10-23 10:10:55 -07:00
trace!("Generating short code...");
2021-10-16 09:50:11 -07:00
// Try finding a code; give up after 1000 attempts
// Statistics show that this is very unlikely to happen
2021-10-23 10:10:55 -07:00
for i in 0..1000 {
2021-10-31 12:34:26 -07:00
let code: ShortCode<N> = get_csrng().sample(short_code::Generator);
2021-10-16 09:50:11 -07:00
let db = Arc::clone(&db);
let key = code.as_bytes();
2021-10-24 18:07:48 -07:00
let query = task::spawn_blocking(move || {
db.key_may_exist_cf(db.cf_handle(META_CF_NAME).unwrap(), key)
})
.await;
2021-10-16 09:50:11 -07:00
if matches!(query, Ok(false)) {
new_key = Some(key);
2022-01-16 00:49:42 -08:00
trace!("Found new key after {i} attempts.");
2021-10-23 10:10:55 -07:00
break;
2021-10-16 09:50:11 -07:00
}
}
let key = if let Some(key) = new_key {
key
} else {
2021-10-23 10:10:55 -07:00
error!("Failed to generate a valid short code!");
2021-10-16 09:50:11 -07:00
return Err(StatusCode::INTERNAL_SERVER_ERROR);
};
2021-10-21 18:35:54 -07:00
let db_ref = Arc::clone(&db);
2021-10-24 18:07:48 -07:00
match task::spawn_blocking(move || {
let blob_cf = db_ref.cf_handle(BLOB_CF_NAME).unwrap();
let meta_cf = db_ref.cf_handle(META_CF_NAME).unwrap();
let data = bincode::serialize(&body).expect("bincode to serialize");
db_ref.put_cf(blob_cf, key, data)?;
let expires = maybe_expires.map(|v| v.0).unwrap_or_default();
2021-10-27 19:16:43 -07:00
let expires = if let Expiration::BurnAfterReading = expires {
Expiration::BurnAfterReadingWithDeadline(Utc::now() + *MAX_PASTE_AGE)
} else {
expires
};
2021-10-24 18:07:48 -07:00
let meta = bincode::serialize(&expires).expect("bincode to serialize");
if db_ref.put_cf(meta_cf, key, meta).is_err() {
// try and roll back on metadata write failure
db_ref.delete_cf(blob_cf, key)?;
}
Result::<_, anyhow::Error>::Ok(())
})
.await
{
2021-10-21 18:35:54 -07:00
Ok(Ok(_)) => {
if let Some(expires) = maybe_expires {
2021-10-27 19:16:43 -07:00
if let Expiration::UnixTime(expiration_time)
| Expiration::BurnAfterReadingWithDeadline(expiration_time) = expires.0
{
2021-10-24 18:07:48 -07:00
let sleep_duration =
(expiration_time - Utc::now()).to_std().unwrap_or_default();
2021-11-14 13:52:40 -08:00
task::spawn(async move {
2021-10-24 18:07:48 -07:00
tokio::time::sleep(sleep_duration).await;
2021-11-14 13:52:40 -08:00
delete_entry(db, key);
2021-10-24 18:07:48 -07:00
});
2021-10-21 18:35:54 -07:00
}
}
}
2021-10-16 09:50:11 -07:00
e => {
2022-01-16 00:49:42 -08:00
error!("Failed to insert paste into db: {e:?}");
2021-10-16 09:50:11 -07:00
return Err(StatusCode::INTERNAL_SERVER_ERROR);
}
}
Ok(Vec::from(key))
}
#[instrument(skip(db), err)]
async fn paste<const N: usize>(
Extension(db): Extension<Arc<DB>>,
Path(url): Path<ShortCode<N>>,
2021-10-19 02:18:33 -07:00
) -> Result<(HeaderMap, Bytes), StatusCode> {
2021-10-16 09:50:11 -07:00
let key = url.as_bytes();
2021-10-24 18:07:48 -07:00
let metadata: Expiration = {
let meta_cf = db.cf_handle(META_CF_NAME).unwrap();
let query_result = db.get_cf(meta_cf, key).map_err(|e| {
2022-01-16 00:49:42 -08:00
error!("Failed to fetch initial query: {e}");
2021-10-16 09:50:11 -07:00
StatusCode::INTERNAL_SERVER_ERROR
})?;
let data = match query_result {
Some(data) => data,
None => return Err(StatusCode::NOT_FOUND),
};
bincode::deserialize(&data).map_err(|_| {
error!("Failed to deserialize data?!");
StatusCode::INTERNAL_SERVER_ERROR
})?
};
2021-10-24 18:07:48 -07:00
// Check if paste has expired.
if let Expiration::UnixTime(expires) = metadata {
if expires < Utc::now() {
2021-11-14 13:52:40 -08:00
delete_entry(db, url.as_bytes()).await.map_err(|e| {
2022-01-16 00:49:42 -08:00
error!("Failed to join handle: {e}");
2021-10-16 09:50:11 -07:00
StatusCode::INTERNAL_SERVER_ERROR
2021-11-14 13:52:40 -08:00
})??;
2021-10-24 18:07:48 -07:00
return Err(StatusCode::NOT_FOUND);
}
}
let paste: Bytes = {
// not sure if perf of get_pinned is better than spawn_blocking
let blob_cf = db.cf_handle(BLOB_CF_NAME).unwrap();
let query_result = db.get_pinned_cf(blob_cf, key).map_err(|e| {
2022-01-16 00:49:42 -08:00
error!("Failed to fetch initial query: {e}");
2021-10-16 09:50:11 -07:00
StatusCode::INTERNAL_SERVER_ERROR
})?;
2021-10-24 18:07:48 -07:00
let data = match query_result {
Some(data) => data,
None => return Err(StatusCode::NOT_FOUND),
};
2021-10-16 09:50:11 -07:00
2021-10-24 18:07:48 -07:00
bincode::deserialize(&data).map_err(|_| {
error!("Failed to deserialize data?!");
StatusCode::INTERNAL_SERVER_ERROR
})?
};
// Check if we need to burn after read
2021-10-27 19:16:43 -07:00
if matches!(
metadata,
Expiration::BurnAfterReading | Expiration::BurnAfterReadingWithDeadline(_)
) {
2021-11-14 13:52:40 -08:00
delete_entry(db, key).await.map_err(|e| {
2022-01-16 00:49:42 -08:00
error!("Failed to join handle: {e}");
2021-10-27 19:16:43 -07:00
StatusCode::INTERNAL_SERVER_ERROR
2021-11-14 13:52:40 -08:00
})??;
2021-10-16 09:50:11 -07:00
}
2021-10-19 02:18:33 -07:00
let mut map = HeaderMap::new();
2021-10-24 18:07:48 -07:00
map.insert(EXPIRES, metadata.into());
Ok((map, paste))
2021-10-16 09:50:11 -07:00
}
#[instrument(skip(db))]
async fn delete<const N: usize>(
Extension(db): Extension<Arc<DB>>,
Path(url): Path<ShortCode<N>>,
) -> StatusCode {
2021-11-14 13:52:40 -08:00
match delete_entry(db, url.as_bytes()).await {
Ok(_) => StatusCode::OK,
_ => StatusCode::INTERNAL_SERVER_ERROR,
}
}
fn delete_entry<const N: usize>(db: Arc<DB>, key: [u8; N]) -> JoinHandle<Result<(), StatusCode>> {
task::spawn_blocking(move || {
2021-10-24 18:07:48 -07:00
let blob_cf = db.cf_handle(BLOB_CF_NAME).unwrap();
let meta_cf = db.cf_handle(META_CF_NAME).unwrap();
2021-11-14 13:52:40 -08:00
if let Err(e) = db.delete_cf(blob_cf, &key) {
2022-01-16 00:49:42 -08:00
warn!("{e}");
2021-11-14 13:52:40 -08:00
return Err(StatusCode::INTERNAL_SERVER_ERROR);
2021-10-24 18:07:48 -07:00
}
2021-11-14 13:52:40 -08:00
if let Err(e) = db.delete_cf(meta_cf, &key) {
2022-01-16 00:49:42 -08:00
warn!("{e}");
2021-11-14 13:52:40 -08:00
return Err(StatusCode::INTERNAL_SERVER_ERROR);
2021-10-24 18:07:48 -07:00
}
Ok(())
})
2021-10-16 09:50:11 -07:00
}