Compare commits

...

241 commits

Author SHA1 Message Date
1152f775b9
lint tests 2022-03-26 16:30:50 -07:00
e404d144a2
clippy 2022-03-26 16:28:58 -07:00
bc6ed7d07e
Update actix 2022-03-26 16:21:27 -07:00
ff0944f58c
Update dependencies 2022-03-26 16:20:00 -07:00
81604a7e94
Clippy 2022-01-02 13:25:00 -08:00
f6a9caf653
Update Cargo.lock 2022-01-02 13:18:08 -08:00
5c6f02b9a5
Update minor version 2022-01-02 12:48:03 -08:00
557f141ed2
Update lru 2022-01-02 12:42:22 -08:00
55f6279dce
Update to actix beta 18 2022-01-02 12:34:00 -08:00
0bf76eab6b
Update deps 2022-01-02 12:12:35 -08:00
a838a94ce9
Update to modern code 2022-01-02 12:12:23 -08:00
63eba4dc37
add db listener test 2021-07-23 20:27:52 -04:00
4544061845
clarify iso code impl 2021-07-23 17:41:17 -04:00
42cbd81375
convert some unwraps to expects 2021-07-22 13:46:40 -04:00
07bd39e69d
Add unit tests for token validation 2021-07-22 13:37:43 -04:00
8f5799211c
Try reserve port during startup 2021-07-22 13:37:32 -04:00
0300135a6a
Add exclusions for code coverage 2021-07-20 16:47:04 -04:00
7bbbf44328
Hololive-ify sample config file 2021-07-18 23:57:38 -04:00
2878ddf0dc
Update dependencies 2021-07-18 23:45:24 -04:00
e4af231829
Remove tracing-futures 2021-07-18 22:05:36 -04:00
b04fac9b01
Added docker-compose file 2021-07-18 21:36:54 -04:00
5aa72e9821
Add parser directive to Dockerfile 2021-07-18 21:33:56 -04:00
6d6bf7371b
Add dockerfile 2021-07-18 21:27:29 -04:00
6b1c913b5d
Remove unneeded imports 2021-07-18 18:35:22 -04:00
acd37297fd
Remove legacy token validation field 2021-07-18 18:32:19 -04:00
bd306455bc
Use ReaderStream 2021-07-18 11:37:39 -04:00
afa2cf55fa
Fix some future not send lints 2021-07-17 13:32:43 -04:00
e95afd3e32
Bump to 0.5.3 2021-07-17 13:05:13 -04:00
fbcf9566c1
Update readme 2021-07-17 13:04:39 -04:00
d42b80d7e1
Remove encryption option warning 2021-07-17 12:52:41 -04:00
931da0c3ff
Add redis support 2021-07-17 12:52:02 -04:00
5fdcfa5071
Add newline to Cargo.toml 2021-07-16 21:15:47 -04:00
93ff76aa89
create folder if not found 2021-07-16 20:03:59 -04:00
f8f4098fae
Finish mem tests 2021-07-16 16:40:07 -04:00
bfcf131b33
Add stop url test 2021-07-16 15:26:18 -04:00
5da486d43d
Add put test for mem cache 2021-07-16 15:18:10 -04:00
51546eb387
Add memory cache get tests 2021-07-16 14:06:28 -04:00
712257429a
Remove Compression wrapper 2021-07-16 12:32:24 -04:00
5e7a82a610
Add partial test for mem cache 2021-07-16 01:13:51 -04:00
afb02db7b7
Remove unnecessary async keyword 2021-07-16 01:13:31 -04:00
b41ae8cb79
Add sync restriction on CacheStream 2021-07-16 01:13:01 -04:00
54c8fe1cb3
Turn DB messages into struct from tuple 2021-07-15 21:49:19 -04:00
8556f37904
Extract internal cache listener to function 2021-07-15 21:49:19 -04:00
fc930285f0
Bump to 0.5.2 2021-07-15 19:14:54 -04:00
041760f9e9
clippy 2021-07-15 19:13:31 -04:00
87271c85a7
Fix deleting legacy names 2021-07-15 19:03:39 -04:00
3e4260f6e1
rename /metric endpoint to /prometheus 2021-07-15 15:54:26 -04:00
dc99437aec
Fix legacy path lookup 2021-07-15 13:47:55 -04:00
3dbf2f8bb0
Read legacy path on disk 2021-07-15 13:25:46 -04:00
f7b037e8e1
Update readme 2021-07-15 12:58:40 -04:00
a552523a3a
increment version to 0.5.1 2021-07-15 12:39:19 -04:00
5935e4220b
Fix stop url 2021-07-15 12:37:55 -04:00
fa9ab93c77
Add proxy support 2021-07-15 12:29:55 -04:00
833a0c0468
Add automatic migration to new db location 2021-07-15 11:17:54 -04:00
b71253d8dc
Remove debug statement 2021-07-15 10:48:25 -04:00
84941e2cb4
Fix sending bytes instead of mebibytes 2021-07-15 03:01:15 -04:00
940af6508c
Default metadata path is metadata.db now 2021-07-15 02:53:00 -04:00
d3434e8408
Fix includes for publishing 2021-07-15 02:45:05 -04:00
3786827f20
Add sqlx json 2021-07-15 02:16:33 -04:00
261427a735
Bump version to 0.5.0 2021-07-15 02:14:41 -04:00
c4fa53fa40
Add geo ip logging support 2021-07-15 02:14:04 -04:00
1c00c993bf
Rename metrics to conventions 2021-07-15 02:13:31 -04:00
b2650da556
Documented more CLI options 2021-07-15 02:12:20 -04:00
6b3c6ce03a
Added geo ip dependencies 2021-07-15 02:10:30 -04:00
032db4e1dd
Add special thanks to readme 2021-07-15 02:09:57 -04:00
355fd936ab
Add debug message to outgoing ping request 2021-07-15 01:19:21 -04:00
6415c3dee6
Respect external ip config 2021-07-15 00:54:31 -04:00
acf6dc1cb1
Add geoip config reading 2021-07-14 22:32:05 -04:00
d4d22ec674
Reduce tokio features 2021-07-14 21:56:46 -04:00
353ee72713
Add unit tests 2021-07-14 21:56:29 -04:00
b1797dafd2
Fix double write bug 2021-07-14 19:11:46 -04:00
9209b822a9
Simplify DiskWriter poll_flush 2021-07-14 14:20:31 -04:00
5338ff81a5
Move impl block to correct location 2021-07-14 14:00:02 -04:00
53015e116f
Renamed EncryptedDiskReader to EncryptedReader 2021-07-14 13:32:26 -04:00
7ce974b4f9
Make EncryptedDiskReader generic over R 2021-07-14 13:32:00 -04:00
973ece3604
MetadataFuture tests, fix UB 2021-07-14 13:28:09 -04:00
0c78b379f1
Remove comments w.r.t. potential db optimizations
DB queries already are performed on their own thread, so spawning a
thread to do db work is unncessary.
2021-07-14 11:21:28 -04:00
94375b185f
More debugging 2021-07-13 23:12:29 -04:00
6ac8582183
Encrypted files work in debug mode 2021-07-13 20:38:01 -04:00
656543b539
more partial work into encryption 2021-07-13 16:39:32 -04:00
2ace8d3d66
Partial rewrite of encrypted writer 2021-07-13 13:16:44 -04:00
160f369a72
Migrate to tracing crate 2021-07-12 23:23:51 -04:00
f8ee49ffd7
Add extended options for sample config 2021-07-12 22:35:06 -04:00
9f76a7a1b3
Add compression middleware 2021-07-12 16:39:06 -04:00
2f271a220a
remove tarpaulin-report 2021-07-12 16:35:17 -04:00
868278991a
Add disk tests 2021-07-12 15:59:52 -04:00
e8bea39100
Remove serialize impl from legacy structs 2021-07-12 13:43:47 -04:00
20e349fd79
Add sqlx envs 2021-07-12 01:44:01 -04:00
80eeacd884
try fix ci 2021-07-12 01:35:32 -04:00
580d05d00c
add sqlx check 2021-07-12 01:34:37 -04:00
acd8f234ab
ignore cfg tarpaulin for now 2021-07-12 01:08:19 -04:00
4c135cd72d
Add coverage action 2021-07-12 01:05:18 -04:00
acc3ab2186
use hex instead of u8s in test 2021-07-12 00:49:06 -04:00
8daa6bdc27
add tests for Md5Hash conversions 2021-07-12 00:48:00 -04:00
69587b9ade
Clippy lints 2021-07-12 00:12:15 -04:00
8f3430fb77
Add support for reading old db image ids 2021-07-11 23:33:22 -04:00
ec9473fa78
clippy lint 2021-07-11 23:25:17 -04:00
3764af0ed5
Optimize header creation 2021-07-11 14:23:15 -04:00
099c795cca
Add potential perf gains in the future 2021-07-11 14:22:59 -04:00
92a66e60dc
Seek file from beginning on encrypted header 2021-07-11 13:25:02 -04:00
5143dff888
Add logging 2021-07-11 13:21:57 -04:00
7546948196
nightly clippy lints 2021-07-11 13:19:37 -04:00
5f4be9809a
Add support for legacy files 2021-07-11 02:33:51 -04:00
8040c49a1e
Add warning for encryption 2021-07-11 00:15:43 -04:00
9871fc3774
bump to 0.4 2021-07-10 19:07:55 -04:00
f64d03493e
use static default headers 2021-07-10 19:04:27 -04:00
93249397f1
Simply codebase 2021-07-10 18:53:28 -04:00
154679967b
writing optimizations 2021-07-10 14:22:29 -04:00
b90edd72a6
Add clippy to CI 2021-07-09 21:25:08 -04:00
3ec4d1c125
remove anchor from github workflows 2021-07-09 20:59:23 -04:00
52ca595029
fix workflow ignores 2021-07-09 20:58:07 -04:00
e0bd29751a
Update paths to ignore 2021-07-09 20:56:35 -04:00
7bd9189ebd
Update readme 2021-07-09 20:51:45 -04:00
c98a6d59f4
Have build script create .env if not found 2021-07-09 20:43:33 -04:00
60bec5592a
Have build script modify .env file 2021-07-09 20:37:21 -04:00
de5816a44a
add env value to CI 2021-07-09 20:22:58 -04:00
6fa301a4a5
rename workflow 2021-07-09 20:12:58 -04:00
7b5738da8d
add standard CI 2021-07-09 20:11:43 -04:00
5ab04a9e9c
add security CI 2021-07-09 20:02:22 -04:00
e65a7ba9ef
Update dependencies 2021-07-09 19:53:58 -04:00
3a855a7e4a
add debug for client config 2021-07-09 19:51:48 -04:00
5300afa205
Clean up ClientSecret usage 2021-07-09 19:48:25 -04:00
c5383639f5
ignore default config value 2021-07-09 19:18:09 -04:00
88561f7c2c
Fix short arg conflict 2021-07-09 19:17:56 -04:00
ce03ce0baf
add support for yaml files 2021-07-09 19:14:53 -04:00
e78315025d
Use build script 2021-07-09 17:32:00 -04:00
a8e5d09ff0
clippy lints 2021-07-09 17:20:15 -04:00
b4f27c5f8c
migrate config to Config struct 2021-07-09 17:18:43 -04:00
5b67431778
initial work 2021-07-09 14:36:04 -04:00
e33c0c73e1
Update dependencies 2021-07-07 14:59:58 -04:00
fe967a2272
clippy 2021-06-24 10:41:42 -04:00
98ab7ab9e8
Blackhole non-token routes 2021-06-24 10:39:12 -04:00
51173fa1a3
update index.html 2021-06-06 20:36:51 -04:00
2501bb8da0
Upstream client to always use HTTP/2 and HTTPS 2021-06-06 18:51:05 -04:00
97ea5eb448
Use short SHA in start message 2021-06-06 18:17:42 -04:00
75752eb13e
remove unnecessary negation 2021-06-06 18:03:47 -04:00
1839b64807
cargo update 2021-06-06 17:49:07 -04:00
85c644ffed
Add git commit version 2021-06-06 17:48:48 -04:00
9e9c955f5a
Update client spec version 2021-05-28 15:30:25 -04:00
1c8f3da8d0
More logging 2021-05-27 19:24:54 -04:00
5536f9638a
Add index page 2021-05-27 17:05:50 -04:00
00739ec645
More logging 2021-05-27 16:47:35 -04:00
491d0c9fda
Fix reading from encrypted files 2021-05-27 16:22:32 -04:00
f038452263
Fix logging messages 2021-05-27 16:22:15 -04:00
c32e534300
Revamp reading potentially encrypted files
Previously, we'd assume that if the header was successfully parsed, then
we'd have a valid encrypted header. This isn't the case, and now instead
we try to read metadata, first unencrypted, then encrypted, to make sure
we're reading with the correct abstraction.
2021-05-27 16:19:26 -04:00
34c716bfce
Update dependencies 2021-05-27 16:18:50 -04:00
a732019f60
use split_at for token parsing 2021-05-24 15:21:45 -04:00
b8c12b463f
Use macro for metrics generation 2021-05-23 19:05:57 -04:00
579d316945
clippy 2021-05-22 23:10:34 -04:00
5fd8e86d97
Add support for offline mode 2021-05-22 23:06:05 -04:00
79f73ed68e
Add prometheus endpoint 2021-05-22 22:10:03 -04:00
96a6b76baa
better combinator 2021-05-22 20:19:17 -04:00
7acc126de2
optimize file opening 2021-05-20 14:23:16 -04:00
ea8e031a3d
Remove extraneous conversion 2021-05-20 13:49:37 -04:00
7f16e697e6
Fix lints 2021-05-20 13:42:08 -04:00
a3f3b5e3ab
Gracefully handle secretstream errors 2021-05-20 13:35:57 -04:00
a70f4bfdc3
Support loading disk size from db 2021-05-20 00:38:28 -04:00
6daadefaf1
Resolve clippy lints 2021-05-19 23:52:15 -04:00
0fcde518c5
Use qualified instead of absolute 2021-05-19 23:29:18 -04:00
c488eccc8d
Clean up traits 2021-05-19 23:27:56 -04:00
4f55380d23
Remove layer of indirection 2021-05-19 22:42:44 -04:00
b66cccc832
implement at-rest encryption 2021-05-19 21:42:55 -04:00
8772df2231
Add additional handling for ping errors 2021-05-19 20:33:39 -04:00
fe28f19f07
initial work into encryption at rest 2021-05-11 21:01:01 -04:00
eb605b9c27
merge memory cache types 2021-05-11 18:56:00 -04:00
739137c4be
Fixed erronous log statement 2021-05-11 16:56:28 -04:00
6da2cba78a
Use unstable options 2021-04-25 12:55:31 -04:00
316e69851e
Fix typo in readme 2021-04-25 00:02:06 -04:00
d4c0859115
Update build instructions 2021-04-25 00:01:20 -04:00
0857ffadc7
Add LFU cache 2021-04-24 23:56:56 -04:00
0db06fcabd
explicitly mem drop lazy init failure 2021-04-24 12:57:32 -04:00
4a36541570
reorder imports 2021-04-24 12:47:33 -04:00
151d0c5a83
Add docs 2021-04-24 12:46:18 -04:00
9abeec89bc
use arc_swap for tls 2021-04-24 00:56:58 -04:00
70015a7c38
comment why tls is slow 2021-04-23 23:28:47 -04:00
654178dd5c
Use static HTTP client instead 2021-04-23 22:23:24 -04:00
424dc09ae3
remove more unwraps 2021-04-23 18:25:59 -04:00
eb8a334a05
Remove some unwraps 2021-04-23 18:03:53 -04:00
141b5b257c
rename lowmem cache to disk cache 2021-04-23 17:22:29 -04:00
0918b210ea
don't fill logs with compromised response 2021-04-23 00:34:23 -04:00
3268321711
add db_queries to package 2021-04-23 00:15:34 -04:00
ef5ed89626
add missing files 2021-04-23 00:13:36 -04:00
8e94bd5033
remove debug in release mode 2021-04-23 00:12:55 -04:00
548ce87631
increment minor version 2021-04-23 00:12:15 -04:00
d9cebcbb4b
Add memory accelerated disk cache 2021-04-23 00:11:30 -04:00
77cd416849
Move task to separate function 2021-04-22 21:55:26 -04:00
a86cd3edf5
Don't use arc for cache keys 2021-04-22 21:46:34 -04:00
8d95fe3f07
remove unnecessary channel 2021-04-22 21:34:31 -04:00
758b0ec78d
Fix updating db timestamp 2021-04-22 21:29:26 -04:00
288872e84b
Add db queries 2021-04-22 21:21:32 -04:00
b01fa618b4
Add buffered reader again 2021-04-22 20:01:11 -04:00
84ea4bea89
update gitignore 2021-04-22 19:44:19 -04:00
2193d74e24
Fix not skipping over metadata 2021-04-22 19:22:34 -04:00
8cc21f4803
Revert "use bufreaders"
This reverts commit f335b99024.
2021-04-22 18:09:48 -04:00
f335b99024
use bufreaders 2021-04-22 17:52:16 -04:00
3c1388fced
false positive 2021-04-22 13:40:19 -04:00
7e4c54867e
Downgrade sql statements to trace 2021-04-22 13:20:45 -04:00
6fda24186b
use debug logging level for sql statements 2021-04-22 13:18:50 -04:00
5099666322
temporary remove generational 2021-04-22 13:11:08 -04:00
eab0449a02
use Arc for cache keys 2021-04-22 13:03:33 -04:00
efd6b5c60c
Remove cache from hot path 2021-04-22 12:44:02 -04:00
53c0cb664a
fix truncated bytes 2021-04-20 17:48:04 -04:00
8f03aa0236
use cleaner concurrentfs implementation 2021-04-20 17:28:02 -04:00
e3f6ff8e71
add verbosity options 2021-04-20 14:12:20 -04:00
db8473d5bf
clippy 2021-04-19 22:14:57 -04:00
9904ba2cfc
use callback based pruning 2021-04-19 22:01:32 -04:00
74966678bd
fix deadlock 2021-04-19 00:16:13 -04:00
1ac7b619cf
debug streaming 2021-04-18 23:06:18 -04:00
6be1f80e8a
have reqwest use rustls 2021-04-18 19:14:36 -04:00
327fc48f5b
add version number 2021-04-18 18:39:12 -04:00
ae216b2410
add pruning 2021-04-18 17:38:33 -04:00
525aef91bf
Use question mark in get 2021-04-18 17:25:28 -04:00
2650a96d16
fix memstream impl 2021-04-18 17:17:31 -04:00
de17c738d2
remove todos 2021-04-18 17:11:30 -04:00
6717fbe20b
fix caching 2021-04-18 17:06:40 -04:00
63a2e0beb1
initial work into lowmem 2021-04-17 23:19:27 -04:00
6181486827
fix conflicts_with for low mem mode 2021-04-17 22:13:36 -04:00
453cad1b76
switch to GPLv3 or later 2021-04-17 22:12:02 -04:00
c25b8be45b
cache trait accepts a bytestream instead 2021-04-14 23:44:13 -04:00
8949e41bee
Run clippy 2021-04-14 22:52:54 -04:00
c75b5063af
remove cacahe, use disk cache instead 2021-04-14 22:11:00 -04:00
a679055f3d
more comments 2021-04-09 23:15:44 -04:00
de75ff52e2
pedantic clippy 2021-04-09 22:02:08 -04:00
109a0850f9
fixed imports 2021-04-09 22:00:59 -04:00
c776f09026
make UpstreamError pub 2021-04-09 22:00:09 -04:00
70b7e61745
Add transparent caching 2021-04-09 21:59:29 -04:00
630187ecb2
remove double removal logic 2021-03-27 14:20:10 -04:00
49114d4c61
Custom implementation of Tls 2021-03-26 00:07:32 -04:00
ee830fc152
more perf 2021-03-25 22:58:07 -04:00
f775ad72d3
Support CLI args 2021-03-25 21:06:54 -04:00
cf82aedb12
use TCP MSS as chunk size 2021-03-23 15:31:59 -04:00
46eec93773
inline 2021-03-23 12:59:49 -04:00
91f420330b
better logging 2021-03-22 23:19:56 -04:00
2e02944958
Remove no token paths 2021-03-22 23:04:54 -04:00
c828c76128
organize deps 2021-03-22 22:52:03 -04:00
afb4a1b47f
fix lint 2021-03-22 20:00:21 -04:00
48dc68e680
More cargo fixes 2021-03-22 19:55:15 -04:00
7b39d5c730
update deps 2021-03-22 19:51:51 -04:00
839ba47a8c
rename crate 2021-03-22 19:51:25 -04:00
e1f26b102d
update readme, add license 2021-03-22 18:01:12 -04:00
95e41f42c0
finish minimum version 2021-03-22 17:47:56 -04:00
33 changed files with 7644 additions and 761 deletions

10
.dockerignore Normal file
View file

@ -0,0 +1,10 @@
# Ignore everything
*
# Only include necessary paths (This should be synchronized with `Cargo.toml`)
!db_queries/
!src/
!settings.sample.yaml
!sqlx-data.json
!Cargo.toml
!Cargo.lock

53
.github/workflows/build_and_test.yml vendored Normal file
View file

@ -0,0 +1,53 @@
name: Build and test
on:
push:
branches: [ master ]
paths-ignore:
- "docs/**"
- "settings.sample.yaml"
- "README.md"
- "LICENSE"
pull_request:
branches: [ master ]
paths-ignore:
- "docs/**"
- "settings.sample.yaml"
- "README.md"
- "LICENSE"
env:
CARGO_TERM_COLOR: always
DATABASE_URL: sqlite:./cache/metadata.sqlite
SQLX_OFFLINE: true
jobs:
clippy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- run: rustup component add clippy
- uses: actions-rs/clippy-check@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
args: --all-features
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Build
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose
sqlx-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Install sqlx-cli
run: cargo install sqlx-cli
- name: Initialize database
run: mkdir -p cache && sqlite3 cache/metadata.sqlite < db_queries/init.sql
- name: Check sqlx statements
run: cargo sqlx prepare --check

22
.github/workflows/coverage.yml vendored Normal file
View file

@ -0,0 +1,22 @@
name: coverage
on: [push]
jobs:
test:
name: coverage
runs-on: ubuntu-latest
container:
image: xd009642/tarpaulin:develop-nightly
options: --security-opt seccomp=unconfined
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Generate code coverage
run: |
cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --avoid-cfg-tarpaulin --out Xml
- name: Upload to codecov.io
uses: codecov/codecov-action@v1
with:
fail_ci_if_error: true

14
.github/workflows/security_audit.yml vendored Normal file
View file

@ -0,0 +1,14 @@
name: Security audit
on:
push:
paths:
- '**/Cargo.toml'
- '**/Cargo.lock'
jobs:
security_audit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: actions-rs/audit-check@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}

8
.gitignore vendored
View file

@ -1 +1,9 @@
/target /target
.env
/cache
flamegraph*.svg
perf.data*
dhat.out.*
settings.yaml
tarpaulin-report.html
GeoLite2-Country.mmdb

2243
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,25 +1,69 @@
[package] [package]
name = "mangadex-home-rs" name = "mangadex-home"
version = "0.1.0" version = "0.5.4"
license = "GPL-3.0-or-later"
authors = ["Edward Shen <code@eddie.sh>"] authors = ["Edward Shen <code@eddie.sh>"]
edition = "2018" edition = "2018"
include = [
"src/**/*",
"db_queries",
"LICENSE",
"README.md",
"sqlx-data.json",
"settings.sample.yaml"
]
description = "A MangaDex@Home implementation in Rust."
repository = "https://github.com/edward-shen/mangadex-home-rs"
[profile.release]
lto = true
codegen-units = 1
debug = 1
[dependencies] [dependencies]
actix-web = { version = "4.0.0-beta.4", features = [ "rustls" ] } # Pin because we're using unstable versions
awc = "3.0.0-beta.3" actix-web = { version = "4", features = [ "rustls" ] }
parking_lot = "0.11" arc-swap = "1"
async-trait = "0.1"
base64 = "0.13" base64 = "0.13"
sodiumoxide = "0.2" bincode = "1"
thiserror = "1" bytes = { version = "1", features = [ "serde" ] }
chacha20 = "0.7"
chrono = { version = "0.4", features = [ "serde" ] } chrono = { version = "0.4", features = [ "serde" ] }
clap = { version = "3", features = [ "wrap_help", "derive", "cargo" ] }
ctrlc = "3"
dotenv = "0.15"
flate2 = { version = "1", features = [ "tokio" ] }
futures = "0.3"
once_cell = "1"
log = { version = "0.4", features = [ "serde" ] }
lfu_cache = "1"
lru = "0.7"
maxminddb = "0.20"
md-5 = "0.9"
parking_lot = "0.11"
prometheus = { version = "0.12", features = [ "process" ] }
redis = "0.21"
reqwest = { version = "0.11", default_features = false, features = [ "json", "stream", "rustls-tls" ] }
rustls = "0.20"
rustls-pemfile = "0.2"
serde = "1" serde = "1"
serde_json = "1" serde_json = "1"
serde_repr = "0.1"
serde_yaml = "0.8"
sodiumoxide = "0.2"
sqlx = { version = "0.5", features = [ "runtime-actix-rustls", "sqlite", "time", "chrono", "macros", "offline" ] }
tar = "0.4"
thiserror = "1"
tokio = { version = "1", features = [ "rt-multi-thread", "macros", "fs", "time", "sync", "parking_lot" ] }
tokio-stream = { version = "0.1", features = [ "sync" ] }
tokio-util = { version = "0.6", features = [ "codec" ] }
tracing = "0.1"
tracing-subscriber = { version = "0.2", features = [ "parking_lot" ] }
url = { version = "2", features = [ "serde" ] } url = { version = "2", features = [ "serde" ] }
dotenv = "0.15"
log = "0.4" [build-dependencies]
rustls = "0.19" vergen = "5"
simple_logger = "1"
lru = "0.6" [dev-dependencies]
futures = "0.3" tempfile = "3"
bytes = "1"

11
Dockerfile Normal file
View file

@ -0,0 +1,11 @@
# syntax=docker/dockerfile:1
FROM rust:alpine as builder
COPY . .
RUN apk add --no-cache file make musl-dev \
&& cargo install --path . \
&& strip /usr/local/cargo/bin/mangadex-home
FROM alpine:latest
COPY --from=builder /usr/local/cargo/bin/mangadex-home /usr/local/bin/mangadex-home
CMD ["mangadex-home"]

674
LICENSE Normal file
View file

@ -0,0 +1,674 @@
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.

87
README.md Normal file
View file

@ -0,0 +1,87 @@
A Rust implementation of a MangaDex@Home client.
This client contains the following features:
- Easy migration from the official client
- Fully compliant with MangaDex@Home specifications
- Multi-threaded, high performance, and low overhead client
- HTTP/2 support for API users, HTTP/2 only for upstream connections
- Secure and privacy oriented features:
- Only supports TLS 1.2 or newer; HTTP is not enabled by default
- Options for no logging and no metrics
- Support for on-disk XChaCha20 encryption with ephemeral key generation
- Supports an internal LFU, LRU, or a redis instance for in-memory caching
## Building
```sh
cargo build
cargo test
```
You may need to set a client secret, see Configuration for more information.
# Migration
Migration from the official client was made to be as painless as possible. There
are caveats though:
- If you ever want to return to using the official client, you will need to
clear your cache.
- As this is an unofficial client implementation, the only support you can
probably get is from me.
Otherwise, the steps to migration is easy:
1. Place the binary in the same folder as your `images` folder and
`settings.yaml`.
2. Rename `images` to `cache`.
# Client implementation
This client follows a secure-first approach. As such, your statistics may report
a _ever-so-slightly_ higher-than-average failure rate. Specifically, this client
choses to:
- Not support TLS 1.1 or 1.0, which would be a primary source of
incompatibility.
- Not provide a server identification string in the header of served requests.
- HTTPS by enabled by default, HTTP is provided (and unsupported).
That being said, this client should be backwards compatibility with the official
client data and config. That means you should be able to replace the binary and
preserve all your settings and cache.
## Installation
Either build it from source or run `cargo install mangadex-home`.
## Configuration
Most configuration options can be either provided on the command line or sourced
from a file named `settings.yaml` from the directory you ran the command from,
which will be created on first run.
Note that the client secret (`CLIENT_SECRET`) is the only configuration option
that can only can be provided from the environment, an `.env` file, or the
`settings.yaml` file. In other words, you _cannot_ provide this value from the
command line.
## Special thanks
This project could not have been completed without the assistance of the
following:
#### Development Assistance (Alphabetical Order)
- carbotaniuman#6974
- LFlair#1337
- Plykiya#1738
- Tristan 9#6752
- The Rust Discord community
#### Beta testers
- NigelVH#7162
---
If using the geo IP logging feature, then this product includes GeoLite2 data
created by MaxMind, available from https://www.maxmind.com.

12
build.rs Normal file
View file

@ -0,0 +1,12 @@
use std::error::Error;
use vergen::{vergen, Config, ShaKind};
fn main() -> Result<(), Box<dyn Error>> {
// Initialize vergen stuff
let mut config = Config::default();
*config.git_mut().sha_kind_mut() = ShaKind::Short;
vergen(config)?;
Ok(())
}

6
db_queries/init.sql Normal file
View file

@ -0,0 +1,6 @@
create table if not exists Images(
id varchar primary key not null,
size integer not null,
accessed timestamp not null default CURRENT_TIMESTAMP
);
create index if not exists Images_accessed on Images(accessed);

View file

@ -0,0 +1 @@
insert into Images (id, size, accessed) values (?, ?, ?) on conflict do nothing

9
docker-compose.yml Normal file
View file

@ -0,0 +1,9 @@
version: "3.9"
services:
mangadex-home:
build: .
ports:
- "443:443"
volumes:
- ./cache:/cache
- ./settings.yaml:/settings.yaml

14
docs/ciphers.md Normal file
View file

@ -0,0 +1,14 @@
# Ciphers
This client relies on rustls, which only supports a subset of TLS ciphers.
Specifically, only TLS 1.2 ECDSA GCM ciphers as well as all TLS 1.3 ciphers are
supported. This means that clients that only support older, more insecure
ciphers may not be able to connect to this client.
In practice, this means this client's failure rate may be higher than expected.
This is okay, and within specifications.
## Why even bother?
Well, Australia has officially banned hentai... so I gotta make sure my mates
over there won't get in trouble if I'm connecting to them.

14
docs/unstable_options.md Normal file
View file

@ -0,0 +1,14 @@
# Unstable Options
Unstable options are options that are either experimental, dangerous, for
development only, or a mix of the three. The following table describes each
option. Generally speaking, you should never need to enable these unless you
know what you're doing.
| Option | Experimental? | Dangerous? | For development? |
| -------------------------- | ------------- | ---------- | ---------------- |
| `override-upstream` | | | Yes |
| `use-lfu` | Yes | | |
| `disable-token-validation` | | Yes | Yes |
| `offline-mode` | | | Yes |
| `disable-tls` | | Yes | Yes |

114
settings.sample.yaml Normal file
View file

@ -0,0 +1,114 @@
---
# ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢦⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
#⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢰⠀⠀⠀⠀⠀⠀⠀⣀⡠⠤⢼⣈⡆⠀⠀⠀⠀⠀⠀⠀⠀⠀
#⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠀⠀⠀⠀⠀⠀⣀⣀⣀⣀⡀⠀⠀⠀⢸⡧⣀⠀⣄⡠⠒⠉⠀⠀⠀⢀⡈⢑⢦⠀⠀⠀⠀⠀⠀⠀⠀
#⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⡠⣴⡗⠒⠉⠉⠉⠉⠉⠀⠀⠀⠀⠈⠉⠉⠉⠑⠣⡀⠉⠚⡷⡆⠀⣀⣀⣀⠤⡺⠈⠫⠗⠢⡄⠀⠀⠀⠀⠀
#⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣠⠔⠊⠁⢰⠃⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⢰⠢⣧⠘⣎⠐⠠⠟⠋⠀⠀⠀⠀⢄⢸⠀⠀⠀⠀⠀
#⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⠴⠋⠀⠀⠀⠀⡜⠀⢱⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⡆⡏⠀⠸⡄⢀⠀⠀⠪⠴⠒⠒⠚⠘⢤⠀⠀⠀⠀
#⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⠔⠁⠀⠀⠀⠀⠀⠀⡇⠀⠀⢣⡀⢀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠀⢱⠃⠀⠀⡇⡇⠉⠖⡤⠤⠤⠤⠴⢒⠺⠀⠀⠀⠀
#⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣠⠋⠀⠀⠀⠀⠀⠀⠀⠀⢸⠀⠀⠀⠱⡼⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠘⣆⡎⠀⠀⢀⡇⠙⠦⣌⣹⣏⠉⠉⠁⣀⠠⣂⠀⠀⠀
#⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡴⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢧⠀⠀⠀⣇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢰⠀⢻⣿⣿⣿⡟⠓⠤⣀⡀⠉⠓⠭⠭⠔⠒⠉⠈⡆⠀⠀
#⠀⠀⠀⠀⠀⠀⢀⣀⣀⣀⣀⣀⡸⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠈⢣⣠⠴⢻⠀⠀⡄⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣧⢸⡈⠉⠉⢣⠀⠀⠀⠉⠑⠢⢄⣀⣀⡤⠖⠋⠳⡀⠀
#⠀⠀⠀⠀⠀⡔⠁⠀⢇⠀⠀⠀⠈⠳⡀⠀⠀⢀⠂⠀⠀⠀⠀⢀⠃⢠⠏⠀⠀⠈⡆⢠⢷⠀⠀⠀⠀⠀⠀⠀⠀⢰⠀⣿⡎⡇⠀⡠⠈⡇⠀⠀⠀⠀⠀⠀⠈⣦⠃⠀⠀⠈⢦⠀
#⠀⠀⠀⠀⡰⠃⠀⠀⠘⡄⠀⠀⠀⠀⢱⠀⠀⡜⠀⠀⠀⠀⠀⢸⠐⡮⢀⠀⠀⠀⢱⡸⡄⢧⠀⠀⠀⡀⠀⠀⠀⢸⣇⡿⢳⡧⠊⠀⠀⡇⡇⠀⠆⠀⠀⠀⠀⢱⡀⡠⠂⠀⠈⡇
#⠀⠀⠀⢰⠁⠀⠀⡀⠀⠘⡄⠀⠀⠀⢸⠀⢠⠃⠀⠀⢀⠀⠀⣼⢠⠃⠀⠁⠢⢄⠀⠳⣇⠀⠣⡀⠀⢣⡀⠀⠀⢸⢹⡧⢺⠀⠀⠀⠀⡷⢹⠀⢠⠀⠀⠀⠀⠈⡏⠳⡄⠀⠀⢳
#⠀⠀⠀⢸⠀⠀⠀⠈⠢⢄⠈⣢⠔⠒⠙⠒⢼⠀⠀⠀⢸⠀⢀⠿⣸⠀⠀⠀⠀⠀⠉⠢⢌⠀⠀⠈⠉⠒⠯⠉⠒⠚⠚⣠⠾⠶⠿⠷⢶⣧⡈⢆⢸⠀⠀⠀⠀⠀⢣⠀⢱⠀⠀⡎
#⠀⠀⠀⢸⠀⠀⠸⠀⠀⠀⢹⠁⠀⠀⠀⡀⡞⠀⠀⠀⢸⠀⢸⠀⢿⠀⣠⣴⠾⠛⠛⠓⠢⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠁⡀⠘⣿⣶⣄⠈⢻⣆⢻⠀⡆⠀⠀⠀⢸⠠⣸⠂⢠⠃
#⠀⠀⠀⠘⡄⠀⠀⢡⠀⠀⡼⠀⡠⠐⠁⠀⡇⠀⠀⠀⠈⡆⢸⠀⢨⡾⠋⠀⠀⢻⣿⣿⣷⣦⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⡿⢻⣿⣿⠻⣇⠀⢻⣾⠀⡇⠀⠀⠀⠈⡞⠁⣠⠋⠀
#⠀⠀⠀⠀⠱⡀⠀⠀⠑⢄⡑⣅⠀⠀⠀⠀⡇⠀⠀⠀⠀⠘⣼⠀⣿⠁⠀⢠⡷⢾⣿⣿⡟⠛⡇⠀⠀⠀⠀⠀⠀⠀⠀⠸⡄⠈⠛⠁⠀⢸⠀⠈⢹⡸⠀⠀⠀⠀⠀⡧⠚⡇⠀⠀
#⠀⠀⠀⠀⠀⠈⠢⢄⣀⠀⠈⠉⢑⣶⠴⠒⡇⠀⠀⠀⠀⠀⡟⠧⡇⠀⠀⠸⡁⠀⠙⠋⠀⠀⡞⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⠦⣤⣤⡴⠃⠀⠀⠼⠣⡀⠀⡇⠀⠀⡷⢄⣇⠀⠀
#⠀⠀⠀⠀⠀⠀⢀⠞⠀⡏⠉⢉⣵⣳⠀⠀⡇⠀⠀⠀⠀⠀⢱⠀⠁⠀⠀⠀⠑⠤⠤⡠⠤⠊⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠠⠡⢁⠀⠀⢱⠀⡇⠀⢠⡇⠀⢻⡀⠀
#⠀⠀⠀⠀⠀⢠⠎⠀⢸⣇⡔⠉⠀⢹⡀⠀⡇⠀⠀⠀⠀⠀⢸⠀⠀⠀⠀⠐⡀⢀⠀⠄⡀⠀⠀⠀⠀⠀⠀⢀⣀⣀⣀⣀⣀⣀⣤⠈⠠⠡⠁⠂⠌⠀⢸⠀⡗⠀⢸⠇⠀⢀⡇⠀
#⠀⠀⠀⠀⢠⠃⠀⠀⡎⡏⠀⠀⠀⠀⡇⠀⡇⠀⡆⠀⠀⠀⠘⡄⠀⠀⠈⠌⠠⠂⠌⠐⠀⢀⠎⠉⠒⠉⠉⠉⠉⠙⠛⠧⢸⣿⣿⠀⠀⠀⠀⠀⠀⠀⡼⢀⠇⠀⢸⣀⠴⠋⢱⠀
#⠀⠀⠀⢠⠃⠀⠀⢰⠙⢣⡀⠀⠀⣇⡇⠀⢧⠀⡇⠀⠀⠀⠀⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⢸⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠉⡿⠀⠀⠀⠀⠀⢀⡼⠃⡜⠀⠀⡏⢱⠀⠐⠈⡇
#⠀⠀⢠⢃⠀⠀⢠⠇⠀⢸⡉⠓⠶⢿⠃⠀⢸⠀⡇⠀⠀⠀⡄⢹⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⡄⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⡴⠁⠀⠀⠀⢀⣴⡟⠁⡰⠁⠀⢰⢧⠈⡆⠀⠇⢇
#⠀⠀⡜⡄⠀⢀⡎⠀⠀⠀⡇⠀⠀⢸⠀⠀⠈⡇⣿⠀⠀⠀⢧⠈⡗⠢⢤⣀⠀⠀⠀⠀⠀⠀⠙⢄⡀⠀⠀⠀⠀⠀⣀⡤⠚⠁⢀⣠⡤⠞⠋⢀⠇⡴⠁⠀⠀⠾⣼⠀⢱⠀⢸⢸
#⠀⠀⡇⡇⠀⡜⠀⠀⠀⠀⡇⠀⠀⣾⠀⠀⠀⢹⡏⡆⠀⠀⢸⢆⠸⡄⠀⠀⠉⢑⣦⣤⡀⠀⠀⠀⠉⠑⠒⣒⣋⣉⣡⣤⠒⠊⠉⡇⠀⠀⠀⣾⣊⠀⠀⠀⠈⢠⢻⠀⢸⣀⣿⡜
#⠀⠀⣷⢇⢸⠁⠀⠀⠀⠀⡇⠀⢰⢹⠀⠀⠀⠀⢿⠹⡀⠀⠸⡀⠳⣵⡀⡠⠚⠉⠙⢿⣿⣷⣦⣀⠀⠀⠀⣱⣿⣿⠀⠈⠉⠲⣄⢧⣠⠒⢌⡇⡠⣃⣀⡠⠔⠁⠀⡇⢸⡟⢸⠇
#⠀⠀⢻⠘⣼⠀⠀⠀⠀⢰⠁⣠⠃⢸⠀⠀⠀⠀⠘⠀⠳⡀⠀⡇⠀⢀⠟⠦⠀⡀⠀⢸⣛⣻⣿⣿⣿⣶⣭⣿⣿⣻⡆⠀⠀⠀⠈⢦⠸⣽⢝⠿⡫⡁⢸⡇⠀⠀⠀⢣⠘⠁⠘⠀
#⠀⠀⠘⠆⠸⠄⠀⠀⢠⠏⡰⠁⠀⡞⠀⠀⠀⠀⠀⠀⠀⠙⢄⣸⣶⣷⣶⣶⣶⣤⣤⣼⣿⣽⣯⣿⣿⣿⣷⣾⣿⣿⣿⣾⣤⣴⣶⣾⣷⣇⠺⠤⠕⠈⢉⠇⠀⠀⠀⠘⡄
#
# MangaDex@Home configuration file
#
# Thanks for contributing to MangaDex@Home, friend!
# Beat up a pineapple, and don't forget your AsaCoco!
#
# Default values are commented out.
# The size in mebibytes of the cache You can use megabytes instead in a pinch,
# but just know the two are **NOT** the same.
max_cache_size_in_mebibytes: 0
server_settings:
# The client secret. Keep this secret at all costs :P
secret: suichan wa kyou mo kawaii!
# The port for the webserver to listen on. 443 is recommended for max appeal.
# port: 443
# This controls the value the server receives for your upload speed.
external_max_kilobits_per_second: 1
#
# Advanced settings
#
# The external hostname to listen on. Keep this at 0.0.0.0 unless you know
# what you're doing.
# hostname: 0.0.0.0
# The external port to broadcast to the backend. Keep this at 0 unless you
# know what you're doing. 0 means broadcast the same value as `port`.
# external_port: 0
# How long to wait at most for the graceful shutdown (Ctrl-C or SIGINT).
# graceful_shutdown_wait_seconds: 60
# The external ip to broadcast to the webserver. The default of null (~) means
# the backend will infer it from where it was sent from, which may fail in the
# presence of multiple IPs.
# external_ip: ~
# Settings for geo IP analytics
metric_settings:
# Whether to enable geo IP analytics
# enable_geoip: false
# geoip_license_key: none
# These settings are unique to the Rust client, and may be ignored or behave
# differently from the official client.
extended_options:
# Which cache type to use. By default, this is `on_disk`, but one can select
# `lfu`, `lru`, or `redis` to use a LFU, LRU, or redis instance in addition
# to the on-disk cache to improve lookup times. Generally speaking, using one
# is almost always better, but by how much depends on how much memory you let
# the node use, how large is your node, and which caching implementation you
# use.
# cache_type: on_disk
# The redis url to connect with. Does nothing if the cache type isn't`redis`.
# redis_url: "redis://127.0.0.1/"
# The amount of memory the client should use when using an in-memory cache.
# This does nothing if only the on-disk cache is used.
# memory_quota: 0
# Whether or not to expose a prometheus endpoint at /metrics. This is a
# completely open endpoint, so best practice is to make sure this is only
# readable from the internal network.
# enable_metrics: false
# If you'd like to specify a different path location for the cache, you can do
# so here.
# cache_path: "./cache"
# What logging level to use. Valid options are "error", "warn", "info",
# "debug", "trace", and "off", which disables logging.
# logging_level: info
# Enables disk encryption where the key is stored in memory. In other words,
# when the MD@H program is stopped, all cached files are irrecoverable.
# Practically speaking, this isn't all too useful (and definitely hurts
# performance), but for peace of mind, this may be useful.
# ephemeral_disk_encryption: false

75
sqlx-data.json Normal file
View file

@ -0,0 +1,75 @@
{
"db": "SQLite",
"24b536161a0ed44d0595052ad069c023631ffcdeadb15a01ee294717f87cdd42": {
"query": "update Images set accessed = ? where id = ?",
"describe": {
"columns": [],
"parameters": {
"Right": 2
},
"nullable": []
}
},
"2a8aa6dd2c59241a451cd73f23547d0e94930e35654692839b5d11bb8b87703e": {
"query": "insert into Images (id, size, accessed) values (?, ?, ?) on conflict do nothing",
"describe": {
"columns": [],
"parameters": {
"Right": 3
},
"nullable": []
}
},
"311721fec7824c2fc3ecf53f714949a49245c11a6b622efdb04fdac24be41ba3": {
"query": "SELECT IFNULL(SUM(size), 0) AS size FROM Images",
"describe": {
"columns": [
{
"name": "size",
"ordinal": 0,
"type_info": "Int"
}
],
"parameters": {
"Right": 0
},
"nullable": [
true
]
}
},
"44234188e873a467ecf2c60dfb4731011e0b7afc4472339ed2ae33aee8b0c9dd": {
"query": "select id, size from Images order by accessed asc limit 1000",
"describe": {
"columns": [
{
"name": "id",
"ordinal": 0,
"type_info": "Text"
},
{
"name": "size",
"ordinal": 1,
"type_info": "Int64"
}
],
"parameters": {
"Right": 0
},
"nullable": [
false,
false
]
}
},
"a60501a30fd75b2a2a59f089e850343af075436a5c543a267ecb4fa841593ce9": {
"query": "create table if not exists Images(\n id varchar primary key not null,\n size integer not null,\n accessed timestamp not null default CURRENT_TIMESTAMP\n);\ncreate index if not exists Images_accessed on Images(accessed);",
"describe": {
"columns": [],
"parameters": {
"Right": 0
},
"nullable": []
}
}
}

152
src/cache/compat.rs vendored Normal file
View file

@ -0,0 +1,152 @@
//! These structs have alternative deserialize and serializations
//! implementations to assist reading from the official client file format.
use std::str::FromStr;
use chrono::{DateTime, FixedOffset};
use serde::de::{Unexpected, Visitor};
use serde::{Deserialize, Serialize};
use super::ImageContentType;
#[derive(Copy, Clone, Deserialize)]
pub struct LegacyImageMetadata {
pub(crate) content_type: Option<LegacyImageContentType>,
pub(crate) size: Option<u32>,
pub(crate) last_modified: Option<LegacyDateTime>,
}
#[derive(Copy, Clone, Serialize)]
pub struct LegacyDateTime(pub DateTime<FixedOffset>);
impl<'de> Deserialize<'de> for LegacyDateTime {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct LegacyDateTimeVisitor;
impl<'de> Visitor<'de> for LegacyDateTimeVisitor {
type Value = LegacyDateTime;
fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "a valid image type")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
DateTime::parse_from_rfc2822(v)
.map(LegacyDateTime)
.map_err(|_| E::invalid_value(Unexpected::Str(v), &"a valid image type"))
}
}
deserializer.deserialize_str(LegacyDateTimeVisitor)
}
}
#[derive(Copy, Clone)]
pub struct LegacyImageContentType(pub ImageContentType);
impl<'de> Deserialize<'de> for LegacyImageContentType {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct LegacyImageContentTypeVisitor;
impl<'de> Visitor<'de> for LegacyImageContentTypeVisitor {
type Value = LegacyImageContentType;
fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "a valid image type")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
ImageContentType::from_str(v)
.map(LegacyImageContentType)
.map_err(|_| E::invalid_value(Unexpected::Str(v), &"a valid image type"))
}
}
deserializer.deserialize_str(LegacyImageContentTypeVisitor)
}
}
#[cfg(test)]
mod parse {
use std::error::Error;
use chrono::DateTime;
use crate::cache::ImageContentType;
use super::LegacyImageMetadata;
#[test]
fn from_valid_legacy_format() -> Result<(), Box<dyn Error>> {
let legacy_header = r#"{"content_type":"image/jpeg","last_modified":"Sat, 10 Apr 2021 10:55:22 GMT","size":117888}"#;
let metadata: LegacyImageMetadata = serde_json::from_str(legacy_header)?;
assert_eq!(
metadata.content_type.map(|v| v.0),
Some(ImageContentType::Jpeg)
);
assert_eq!(metadata.size, Some(117_888));
assert_eq!(
metadata.last_modified.map(|v| v.0),
Some(DateTime::parse_from_rfc2822(
"Sat, 10 Apr 2021 10:55:22 GMT"
)?)
);
Ok(())
}
#[test]
fn empty_metadata() -> Result<(), Box<dyn Error>> {
let legacy_header = "{}";
let metadata: LegacyImageMetadata = serde_json::from_str(legacy_header)?;
assert!(metadata.content_type.is_none());
assert!(metadata.size.is_none());
assert!(metadata.last_modified.is_none());
Ok(())
}
#[test]
fn invalid_image_mime_value() {
let legacy_header = r#"{"content_type":"image/not-a-real-image"}"#;
assert!(serde_json::from_str::<LegacyImageMetadata>(legacy_header).is_err());
}
#[test]
fn invalid_date_time() {
let legacy_header = r#"{"last_modified":"idk last tuesday?"}"#;
assert!(serde_json::from_str::<LegacyImageMetadata>(legacy_header).is_err());
}
#[test]
fn invalid_size() {
let legacy_header = r#"{"size":-1}"#;
assert!(serde_json::from_str::<LegacyImageMetadata>(legacy_header).is_err());
}
#[test]
fn wrong_image_type() {
let legacy_header = r#"{"content_type":25}"#;
assert!(serde_json::from_str::<LegacyImageMetadata>(legacy_header).is_err());
}
#[test]
fn wrong_date_time_type() {
let legacy_header = r#"{"last_modified":false}"#;
assert!(serde_json::from_str::<LegacyImageMetadata>(legacy_header).is_err());
}
}

692
src/cache/disk.rs vendored Normal file
View file

@ -0,0 +1,692 @@
//! Low memory caching stuff
use std::convert::TryFrom;
use std::hint::unreachable_unchecked;
use std::os::unix::prelude::OsStrExt;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use async_trait::async_trait;
use futures::StreamExt;
use log::LevelFilter;
use md5::digest::generic_array::GenericArray;
use md5::{Digest, Md5};
use sodiumoxide::hex;
use sqlx::sqlite::SqliteConnectOptions;
use sqlx::{ConnectOptions, Sqlite, SqlitePool, Transaction};
use tokio::fs::{create_dir_all, remove_file, rename, File};
use tokio::join;
use tokio::sync::mpsc::{channel, Receiver, Sender};
use tokio_stream::wrappers::ReceiverStream;
use tracing::{debug, error, info, instrument, warn};
use crate::units::Bytes;
use super::{Cache, CacheEntry, CacheError, CacheKey, CacheStream, CallbackCache, ImageMetadata};
#[derive(Debug)]
pub struct DiskCache {
disk_path: PathBuf,
disk_cur_size: AtomicU64,
db_update_channel_sender: Sender<DbMessage>,
}
#[derive(Debug)]
enum DbMessage {
Get(Arc<PathBuf>),
Put(Arc<PathBuf>, u64),
}
impl DiskCache {
/// Constructs a new low memory cache at the provided path and capacity.
/// This internally spawns a task that will wait for filesystem
/// notifications when a file has been written.
pub async fn new(disk_max_size: Bytes, disk_path: PathBuf) -> Arc<Self> {
if let Err(e) = create_dir_all(&disk_path).await {
error!("Failed to create cache folder: {}", e);
}
let cache_path = disk_path.to_string_lossy();
// Migrate old to new path
if rename(
format!("{}/metadata.sqlite", cache_path),
format!("{}/metadata.db", cache_path),
)
.await
.is_ok()
{
info!("Found old metadata file, migrating to new location.");
}
let db_pool = {
let db_url = format!("sqlite:{}/metadata.db", cache_path);
let mut options = SqliteConnectOptions::from_str(&db_url)
.unwrap()
.create_if_missing(true);
options.log_statements(LevelFilter::Trace);
SqlitePool::connect_with(options).await.unwrap()
};
Self::from_db_pool(db_pool, disk_max_size, disk_path).await
}
async fn from_db_pool(pool: SqlitePool, disk_max_size: Bytes, disk_path: PathBuf) -> Arc<Self> {
let (db_tx, db_rx) = channel(128);
// Run db init
sqlx::query_file!("./db_queries/init.sql")
.execute(&mut pool.acquire().await.unwrap())
.await
.unwrap();
// This is intentional.
#[allow(clippy::cast_sign_loss)]
let disk_cur_size = {
let mut conn = pool.acquire().await.unwrap();
sqlx::query!("SELECT IFNULL(SUM(size), 0) AS size FROM Images")
.fetch_one(&mut conn)
.await
.map(|record| record.size)
.unwrap_or_default()
.unwrap_or_default() as u64
};
let new_self = Arc::new(Self {
disk_path,
disk_cur_size: AtomicU64::new(disk_cur_size),
db_update_channel_sender: db_tx,
});
tokio::spawn(db_listener(
Arc::clone(&new_self),
db_rx,
pool,
disk_max_size.get() as u64 / 20 * 19,
));
new_self
}
#[cfg(test)]
fn in_memory() -> (Self, Receiver<DbMessage>) {
let (db_tx, db_rx) = channel(128);
(
Self {
disk_path: PathBuf::new(),
disk_cur_size: AtomicU64::new(0),
db_update_channel_sender: db_tx,
},
db_rx,
)
}
}
/// Spawn a new task that will listen for updates to the db, pruning if the size
/// becomes too large.
async fn db_listener(
cache: Arc<DiskCache>,
db_rx: Receiver<DbMessage>,
db_pool: SqlitePool,
max_on_disk_size: u64,
) {
// This is in a receiver stream to process up to 128 simultaneous db updates
// in one transaction
let mut recv_stream = ReceiverStream::new(db_rx).ready_chunks(128);
while let Some(messages) = recv_stream.next().await {
let mut transaction = match db_pool.begin().await {
Ok(transaction) => transaction,
Err(e) => {
error!("Failed to start a transaction to DB, cannot update DB. Disk cache may be losing track of files! {}", e);
continue;
}
};
for message in messages {
match message {
DbMessage::Get(entry) => handle_db_get(&entry, &mut transaction).await,
DbMessage::Put(entry, size) => {
handle_db_put(&entry, size, &cache, &mut transaction).await;
}
}
}
if let Err(e) = transaction.commit().await {
error!(
"Failed to commit transaction to DB. Disk cache may be losing track of files! {}",
e
);
}
let on_disk_size = (cache.disk_cur_size.load(Ordering::Acquire) + 4095) / 4096 * 4096;
if on_disk_size >= max_on_disk_size {
let items = {
let request =
sqlx::query!("select id, size from Images order by accessed asc limit 1000")
.fetch_all(&db_pool)
.await;
match request {
Ok(items) => items,
Err(e) => {
error!(
"Failed to fetch oldest images and cannot prune disk cache: {}",
e
);
continue;
}
}
};
let mut size_freed = 0;
#[allow(clippy::cast_sign_loss)]
for item in items {
debug!("deleting file due to exceeding cache size");
size_freed += item.size as u64;
tokio::spawn(remove_file_handler(item.id));
}
cache.disk_cur_size.fetch_sub(size_freed, Ordering::Release);
}
}
}
/// Returns if a file was successfully deleted.
async fn remove_file_handler(key: String) -> bool {
let error = if let Err(e) = remove_file(&key).await {
e
} else {
return true;
};
if error.kind() != std::io::ErrorKind::NotFound {
warn!("Failed to delete file `{}` from cache: {}", &key, error);
return false;
}
if let Ok(bytes) = hex::decode(&key) {
if bytes.len() != 16 {
warn!("Failed to delete file `{}`; invalid hash size.", &key);
return false;
}
let hash = Md5Hash(*GenericArray::from_slice(&bytes));
let path: PathBuf = hash.into();
if let Err(e) = remove_file(&path).await {
warn!(
"Failed to delete file `{}` from cache: {}",
path.to_string_lossy(),
e
);
false
} else {
true
}
} else {
warn!("Failed to delete file `{}`; not a md5hash.", &key);
false
}
}
#[instrument(level = "debug", skip(transaction))]
async fn handle_db_get(entry: &Path, transaction: &mut Transaction<'_, Sqlite>) {
let key = entry.as_os_str().to_str();
let now = chrono::Utc::now();
let query = sqlx::query!("update Images set accessed = ? where id = ?", now, key)
.execute(transaction)
.await;
if let Err(e) = query {
warn!("Failed to update timestamp in db for {:?}: {}", key, e);
}
}
#[instrument(level = "debug", skip(transaction, cache))]
async fn handle_db_put(
entry: &Path,
size: u64,
cache: &DiskCache,
transaction: &mut Transaction<'_, Sqlite>,
) {
let key = entry.as_os_str().to_str();
let now = chrono::Utc::now();
// This is intentional.
#[allow(clippy::cast_possible_wrap)]
let casted_size = size as i64;
let query = sqlx::query_file!("./db_queries/insert_image.sql", key, casted_size, now)
.execute(transaction)
.await;
if let Err(e) = query {
warn!("Failed to add to db: {}", e);
}
cache.disk_cur_size.fetch_add(size, Ordering::Release);
}
/// Represents a Md5 hash that can be converted to and from a path. This is used
/// for compatibility with the official client, where the image id and on-disk
/// path is determined by file path.
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
struct Md5Hash(GenericArray<u8, <Md5 as md5::Digest>::OutputSize>);
impl Md5Hash {
fn to_hex_string(self) -> String {
format!("{:x}", self.0)
}
}
impl TryFrom<&Path> for Md5Hash {
type Error = ();
fn try_from(path: &Path) -> Result<Self, Self::Error> {
let mut iter = path.iter();
let file_name = iter.next_back().ok_or(())?;
let chapter_hash = iter.next_back().ok_or(())?;
let is_data_saver = iter.next_back().ok_or(())? == "saver";
let mut hasher = Md5::new();
if is_data_saver {
hasher.update("saver");
}
hasher.update(chapter_hash.as_bytes());
hasher.update(".");
hasher.update(file_name.as_bytes());
Ok(Self(hasher.finalize()))
}
}
impl From<Md5Hash> for PathBuf {
fn from(hash: Md5Hash) -> Self {
let hex_value = hash.to_hex_string();
let path = hex_value[0..3]
.chars()
.rev()
.map(|char| Self::from(char.to_string()))
.reduce(|first, second| first.join(second));
match path {
Some(p) => p.join(hex_value),
None => unsafe { unreachable_unchecked() }, // literally not possible
}
}
}
#[async_trait]
impl Cache for DiskCache {
async fn get(
&self,
key: &CacheKey,
) -> Option<Result<(CacheStream, ImageMetadata), CacheError>> {
let channel = self.db_update_channel_sender.clone();
let path = Arc::new(self.disk_path.clone().join(PathBuf::from(key)));
let path_0 = Arc::clone(&path);
let legacy_path = Md5Hash::try_from(path_0.as_path())
.map(PathBuf::from)
.map(|path| self.disk_path.clone().join(path))
.map(Arc::new);
// Get file and path of first existing location path
let (file, path) = if let Ok(legacy_path) = legacy_path {
let maybe_files = join!(
File::open(legacy_path.as_path()),
File::open(path.as_path()),
);
match maybe_files {
(Ok(f), _) => Some((f, legacy_path)),
(_, Ok(f)) => Some((f, path)),
_ => return None,
}
} else {
File::open(path.as_path())
.await
.ok()
.map(|file| (file, path))
}?;
tokio::spawn(async move { channel.send(DbMessage::Get(path)).await });
super::fs::read_file(file).await.map(|res| {
res.map(|(stream, _, metadata)| (stream, metadata))
.map_err(|_| CacheError::DecryptionFailure)
})
}
async fn put(
&self,
key: CacheKey,
image: bytes::Bytes,
metadata: ImageMetadata,
) -> Result<(), CacheError> {
let channel = self.db_update_channel_sender.clone();
let path = Arc::new(self.disk_path.clone().join(PathBuf::from(&key)));
let path_0 = Arc::clone(&path);
let db_callback = |size: u64| async move {
std::mem::drop(channel.send(DbMessage::Put(path_0, size)).await);
};
super::fs::write_file(&path, key, image, metadata, db_callback, None)
.await
.map_err(CacheError::from)
}
}
#[async_trait]
impl CallbackCache for DiskCache {
async fn put_with_on_completed_callback(
&self,
key: CacheKey,
image: bytes::Bytes,
metadata: ImageMetadata,
on_complete: Sender<CacheEntry>,
) -> Result<(), CacheError> {
let channel = self.db_update_channel_sender.clone();
let path = Arc::new(self.disk_path.clone().join(PathBuf::from(&key)));
let path_0 = Arc::clone(&path);
let db_callback = |size: u64| async move {
// We don't care about the result of the send
std::mem::drop(channel.send(DbMessage::Put(path_0, size)).await);
};
super::fs::write_file(&path, key, image, metadata, db_callback, Some(on_complete))
.await
.map_err(CacheError::from)
}
}
#[cfg(test)]
mod db_listener {
use super::{db_listener, DbMessage};
use crate::DiskCache;
use futures::TryStreamExt;
use sqlx::{Row, SqlitePool};
use std::error::Error;
use std::path::PathBuf;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use tokio::sync::mpsc::channel;
#[tokio::test]
async fn can_handle_multiple_events() -> Result<(), Box<dyn Error>> {
let (mut cache, rx) = DiskCache::in_memory();
let (mut tx, _) = channel(1);
// Swap the tx with the new one, else the receiver will never end
std::mem::swap(&mut cache.db_update_channel_sender, &mut tx);
assert_eq!(tx.capacity(), 128);
let cache = Arc::new(cache);
let db = SqlitePool::connect("sqlite::memory:").await?;
sqlx::query_file!("./db_queries/init.sql")
.execute(&db)
.await?;
// Populate the queue with messages
for c in 'a'..='z' {
tx.send(DbMessage::Put(Arc::new(PathBuf::from(c.to_string())), 10))
.await?;
tx.send(DbMessage::Get(Arc::new(PathBuf::from(c.to_string()))))
.await?;
}
// Explicitly close the channel so that the listener terminates
std::mem::drop(tx);
db_listener(cache, rx, db.clone(), u64::MAX).await;
let count = Arc::new(AtomicUsize::new(0));
sqlx::query("select * from Images")
.fetch(&db)
.try_for_each_concurrent(None, |row| {
let count = Arc::clone(&count);
async move {
assert_eq!(row.get::<i32, _>("size"), 10);
count.fetch_add(1, Ordering::Release);
Ok(())
}
})
.await?;
assert_eq!(count.load(Ordering::Acquire), 26);
Ok(())
}
}
#[cfg(test)]
mod remove_file_handler {
use std::error::Error;
use tempfile::tempdir;
use tokio::fs::{create_dir_all, remove_dir_all};
use super::{remove_file_handler, File};
#[tokio::test]
async fn should_not_panic_on_invalid_path() {
assert!(!remove_file_handler("/this/is/a/non-existent/path/".to_string()).await);
}
#[tokio::test]
async fn should_not_panic_on_invalid_hash() {
assert!(!remove_file_handler("68b329da9893e34099c7d8ad5cb9c940".to_string()).await);
}
#[tokio::test]
async fn should_not_panic_on_malicious_hashes() {
assert!(!remove_file_handler("68b329da9893e34".to_string()).await);
assert!(
!remove_file_handler("68b329da9893e34099c7d8ad5cb9c940aaaaaaaaaaaaaaaaaa".to_string())
.await
);
}
#[tokio::test]
async fn should_delete_existing_file() -> Result<(), Box<dyn Error>> {
let temp_dir = tempdir()?;
let mut dir_path = temp_dir.path().to_path_buf();
dir_path.push("abc123.png");
// create a file, it can be empty
File::create(&dir_path).await?;
assert!(remove_file_handler(dir_path.to_string_lossy().into_owned()).await);
Ok(())
}
#[tokio::test]
async fn should_delete_existing_hash() -> Result<(), Box<dyn Error>> {
create_dir_all("b/8/6").await?;
File::create("b/8/6/68b329da9893e34099c7d8ad5cb9c900").await?;
assert!(remove_file_handler("68b329da9893e34099c7d8ad5cb9c900".to_string()).await);
remove_dir_all("b").await?;
Ok(())
}
}
#[cfg(test)]
mod disk_cache {
use std::error::Error;
use std::path::PathBuf;
use std::sync::atomic::Ordering;
use chrono::Utc;
use sqlx::SqlitePool;
use crate::units::Bytes;
use super::DiskCache;
#[tokio::test]
async fn db_is_initialized() -> Result<(), Box<dyn Error>> {
let conn = SqlitePool::connect("sqlite::memory:").await?;
let _cache = DiskCache::from_db_pool(conn.clone(), Bytes(1000), PathBuf::new()).await;
let res = sqlx::query("select * from Images").execute(&conn).await;
assert!(res.is_ok());
Ok(())
}
#[tokio::test]
async fn db_initializes_empty() -> Result<(), Box<dyn Error>> {
let conn = SqlitePool::connect("sqlite::memory:").await?;
let cache = DiskCache::from_db_pool(conn.clone(), Bytes(1000), PathBuf::new()).await;
assert_eq!(cache.disk_cur_size.load(Ordering::SeqCst), 0);
Ok(())
}
#[tokio::test]
async fn db_can_load_from_existing() -> Result<(), Box<dyn Error>> {
let conn = SqlitePool::connect("sqlite::memory:").await?;
sqlx::query_file!("./db_queries/init.sql")
.execute(&conn)
.await?;
let now = Utc::now();
sqlx::query_file!("./db_queries/insert_image.sql", "a", 4, now)
.execute(&conn)
.await?;
let now = Utc::now();
sqlx::query_file!("./db_queries/insert_image.sql", "b", 15, now)
.execute(&conn)
.await?;
let cache = DiskCache::from_db_pool(conn.clone(), Bytes(1000), PathBuf::new()).await;
assert_eq!(cache.disk_cur_size.load(Ordering::SeqCst), 19);
Ok(())
}
}
#[cfg(test)]
mod db {
use chrono::{DateTime, Utc};
use sqlx::{Connection, Row, SqliteConnection};
use std::error::Error;
use super::{handle_db_get, handle_db_put, DiskCache, FromStr, Ordering, PathBuf, StreamExt};
#[tokio::test]
#[cfg_attr(miri, ignore)]
async fn get() -> Result<(), Box<dyn Error>> {
let (cache, _) = DiskCache::in_memory();
let path = PathBuf::from_str("a/b/c")?;
let mut conn = SqliteConnection::connect("sqlite::memory:").await?;
sqlx::query_file!("./db_queries/init.sql")
.execute(&mut conn)
.await?;
// Add an entry
let mut transaction = conn.begin().await?;
handle_db_put(&path, 10, &cache, &mut transaction).await;
transaction.commit().await?;
let time_fence = Utc::now();
let mut transaction = conn.begin().await?;
handle_db_get(&path, &mut transaction).await;
transaction.commit().await?;
let mut rows: Vec<_> = sqlx::query("select * from Images")
.fetch(&mut conn)
.collect()
.await;
assert_eq!(rows.len(), 1);
let entry = rows.pop().unwrap()?;
assert!(time_fence < entry.get::<'_, DateTime<Utc>, _>("accessed"));
Ok(())
}
#[tokio::test]
#[cfg_attr(miri, ignore)]
async fn put() -> Result<(), Box<dyn Error>> {
let (cache, _) = DiskCache::in_memory();
let path = PathBuf::from_str("a/b/c")?;
let mut conn = SqliteConnection::connect("sqlite::memory:").await?;
sqlx::query_file!("./db_queries/init.sql")
.execute(&mut conn)
.await?;
let mut transaction = conn.begin().await?;
let transaction_time = Utc::now();
handle_db_put(&path, 10, &cache, &mut transaction).await;
transaction.commit().await?;
let mut rows: Vec<_> = sqlx::query("select * from Images")
.fetch(&mut conn)
.collect()
.await;
assert_eq!(rows.len(), 1);
let entry = rows.pop().unwrap()?;
assert_eq!(entry.get::<'_, &str, _>("id"), "a/b/c");
assert_eq!(entry.get::<'_, i64, _>("size"), 10);
let accessed: DateTime<Utc> = entry.get("accessed");
assert!(transaction_time < accessed);
assert!(accessed < Utc::now());
assert_eq!(cache.disk_cur_size.load(Ordering::SeqCst), 10);
Ok(())
}
}
#[cfg(test)]
mod md5_hash {
use super::{Digest, GenericArray, Md5, Md5Hash, Path, PathBuf, TryFrom};
#[test]
fn to_cache_path() {
let hash = Md5Hash(
*GenericArray::<_, <Md5 as md5::Digest>::OutputSize>::from_slice(&[
0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef, 0xab, 0xcd,
0xef, 0xab,
]),
);
assert_eq!(
PathBuf::from(hash).to_str(),
Some("c/b/a/abcdefabcdefabcdefabcdefabcdefab")
)
}
#[test]
fn from_data_path() {
let mut expected_hasher = Md5::new();
expected_hasher.update("foo.bar.png");
assert_eq!(
Md5Hash::try_from(Path::new("data/foo/bar.png")),
Ok(Md5Hash(expected_hasher.finalize()))
);
}
#[test]
fn from_data_saver_path() {
let mut expected_hasher = Md5::new();
expected_hasher.update("saverfoo.bar.png");
assert_eq!(
Md5Hash::try_from(Path::new("saver/foo/bar.png")),
Ok(Md5Hash(expected_hasher.finalize()))
);
}
#[test]
fn can_handle_long_paths() {
assert_eq!(
Md5Hash::try_from(Path::new("a/b/c/d/e/f/g/saver/foo/bar.png")),
Md5Hash::try_from(Path::new("saver/foo/bar.png")),
);
}
#[test]
fn from_invalid_paths() {
assert!(Md5Hash::try_from(Path::new("foo/bar.png")).is_err());
assert!(Md5Hash::try_from(Path::new("bar.png")).is_err());
assert!(Md5Hash::try_from(Path::new("")).is_err());
}
}

667
src/cache/fs.rs vendored Normal file
View file

@ -0,0 +1,667 @@
//! This module contains two functions whose sole purpose is to allow a single
//! producer multiple consumer (SPMC) system using the filesystem as an
//! intermediate.
//!
//! Consider the scenario where two clients, A and B, request the same uncached
//! file, one after the other. In a typical caching system, both requests would
//! result in a cache miss, and both requests would then be proxied from
//! upstream. But, we can do better. We know that by the time one request
//! begins, there should be a file on disk for us to read from. Why require
//! subsequent requests to read from upstream, when we can simply fetch one and
//! read from the filesystem that we know will have the exact same data?
//! Instead, we can just read from the filesystem and just inform all readers
//! when the file is done. This is beneficial to both downstream and upstream as
//! upstream no longer needs to process duplicate requests and sequential cache
//! misses are treated as closer as a cache hit.
use std::error::Error;
use std::fmt::Display;
use std::io::SeekFrom;
use std::path::Path;
use std::pin::Pin;
use std::task::{Context, Poll};
use actix_web::error::PayloadError;
use async_trait::async_trait;
use bytes::Bytes;
use chacha20::cipher::{NewCipher, StreamCipher};
use chacha20::{Key, XChaCha20, XNonce};
use futures::Future;
use serde::Deserialize;
use sodiumoxide::crypto::stream::xchacha20::{gen_nonce, NONCEBYTES};
use tokio::fs::{create_dir_all, remove_file, File};
use tokio::io::{
AsyncBufRead, AsyncRead, AsyncReadExt, AsyncSeekExt, AsyncWrite, AsyncWriteExt, BufReader,
ReadBuf,
};
use tokio::sync::mpsc::Sender;
use tokio_util::io::ReaderStream;
use tracing::{debug, instrument, warn};
use super::compat::LegacyImageMetadata;
use super::{CacheEntry, CacheKey, CacheStream, ImageMetadata, ENCRYPTION_KEY};
/// Attempts to lookup the file on disk, returning a byte stream if it exists.
/// Note that this could return two types of streams, depending on if the file
/// is in progress of being written to.
#[instrument(level = "debug")]
pub(super) async fn read_file(
file: File,
) -> Option<Result<(CacheStream, Option<XNonce>, ImageMetadata), std::io::Error>> {
let mut file_0 = file.try_clone().await.ok()?;
let file_1 = file.try_clone().await.ok()?;
// Try reading decrypted header first...
let mut deserializer = serde_json::Deserializer::from_reader(file.into_std().await);
let mut maybe_metadata = ImageMetadata::deserialize(&mut deserializer);
// Failed to parse normally, see if we have a legacy file format
if maybe_metadata.is_err() {
file_0.seek(SeekFrom::Start(2)).await.ok()?;
let mut deserializer = serde_json::Deserializer::from_reader(file_0.into_std().await);
maybe_metadata =
LegacyImageMetadata::deserialize(&mut deserializer).map(LegacyImageMetadata::into);
}
let parsed_metadata;
let mut maybe_header = None;
let mut reader: Option<Pin<Box<dyn MetadataFetch + Send + Sync>>> = None;
if let Ok(metadata) = maybe_metadata {
// image is decrypted
if ENCRYPTION_KEY.get().is_some() {
// invalidate cache since we're running in at-rest encryption and
// the file wasn't encrypted.
warn!("Found file but was not encrypted!");
return None;
}
reader = Some(Box::pin(BufReader::new(file_1)));
parsed_metadata = Some(metadata);
debug!("Found not encrypted file");
} else {
debug!("metadata read failed, trying to see if it's encrypted");
let mut file = file_1;
file.seek(SeekFrom::Start(0)).await.ok()?;
// image is encrypted or corrupt
// If the encryption key was set, use the encrypted disk reader instead;
// else, just directly read from file.
if let Some(key) = ENCRYPTION_KEY.get() {
let mut nonce_bytes = [0; NONCEBYTES];
if let Err(e) = file.read_exact(&mut nonce_bytes).await {
warn!("Found file but failed reading header: {}", e);
return None;
}
debug!("header bytes: {:x?}", nonce_bytes);
maybe_header = Some(*XNonce::from_slice(&nonce_bytes));
reader = Some(Box::pin(BufReader::new(EncryptedReader::new(
file,
XNonce::from_slice(XNonce::from_slice(&nonce_bytes)),
key,
))));
}
parsed_metadata = if let Some(reader) = reader.as_mut() {
if let Ok(metadata) = reader.as_mut().metadata().await {
debug!("Successfully parsed encrypted metadata");
Some(metadata)
} else {
debug!("Failed to parse encrypted metadata");
None
}
} else {
debug!("Failed to read encrypted data");
None
};
}
// parsed_metadata is either set or unset here. If it's set then we
// successfully decoded the data; otherwise the file is garbage.
reader.map_or_else(
|| {
debug!("Reader was invalid, file is corrupt");
None
},
|reader| {
let stream = CacheStream::Completed(ReaderStream::new(reader));
parsed_metadata.map(|metadata| Ok((stream, maybe_header, metadata)))
},
)
}
struct EncryptedReader<R> {
file: Pin<Box<R>>,
keystream: XChaCha20,
}
impl<R> EncryptedReader<R> {
fn new(file: R, nonce: &XNonce, key: &Key) -> Self {
Self {
file: Box::pin(file),
keystream: XChaCha20::new(key, nonce),
}
}
}
impl<R: AsyncRead> AsyncRead for EncryptedReader<R> {
fn poll_read(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut ReadBuf<'_>,
) -> Poll<std::io::Result<()>> {
let mut pinned = self.as_mut();
let previously_read = buf.filled().len();
let res = pinned.file.as_mut().poll_read(cx, buf);
let bytes_modified = buf.filled().len() - previously_read;
pinned.keystream.apply_keystream(
&mut buf.filled_mut()[previously_read..previously_read + bytes_modified],
);
res
}
}
#[async_trait]
pub trait MetadataFetch: AsyncBufRead {
async fn metadata(mut self: Pin<&mut Self>) -> Result<ImageMetadata, ()>;
}
#[async_trait]
impl<R: AsyncBufRead + Send> MetadataFetch for R {
#[inline]
async fn metadata(mut self: Pin<&mut Self>) -> Result<ImageMetadata, ()> {
MetadataFuture(self).await
}
}
struct MetadataFuture<'a, R>(Pin<&'a mut R>);
impl<'a, R: AsyncBufRead> Future for MetadataFuture<'a, R> {
type Output = Result<ImageMetadata, ()>;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let mut filled = 0;
let mut pinned = self.0.as_mut();
loop {
let buf = match pinned.as_mut().poll_fill_buf(cx) {
Poll::Ready(Ok(buffer)) => buffer,
Poll::Ready(Err(_)) => return Poll::Ready(Err(())),
Poll::Pending => return Poll::Pending,
};
if filled == buf.len() {
return Poll::Ready(Err(()));
}
filled = buf.len();
let mut reader = serde_json::Deserializer::from_slice(buf).into_iter();
let (res, bytes_consumed) = match reader.next() {
Some(Ok(metadata)) => (Poll::Ready(Ok(metadata)), reader.byte_offset()),
Some(Err(e)) if e.is_eof() => continue,
Some(Err(_)) | None => return Poll::Ready(Err(())),
};
assert_ne!(bytes_consumed, 0);
// This needs to be outside the loop because we need to drop the
// reader ref, since that depends on a mut self.
pinned.as_mut().consume(bytes_consumed);
return res;
}
}
}
/// Writes the metadata and input stream (in that order) to a file, returning a
/// stream that reads from that file. Accepts a db callback function that is
/// provided the number of bytes written, and an optional on-complete callback
/// that is called with a completed cache entry.
pub(super) async fn write_file<Fut, DbCallback>(
path: &Path,
key: CacheKey,
data: Bytes,
metadata: ImageMetadata,
db_callback: DbCallback,
on_complete: Option<Sender<CacheEntry>>,
) -> Result<(), std::io::Error>
where
Fut: 'static + Send + Sync + Future<Output = ()>,
DbCallback: 'static + Send + Sync + FnOnce(u64) -> Fut,
{
let mut file = {
let parent = path.parent().expect("The path to have a parent");
create_dir_all(parent).await?;
let file = File::create(path).await?; // we need to make sure the file exists and is truncated.
file
};
let mut writer: Pin<Box<dyn AsyncWrite + Send>> = if let Some(key) = ENCRYPTION_KEY.get() {
let nonce = gen_nonce();
file.write_all(nonce.as_ref()).await?;
Box::pin(EncryptedDiskWriter::new(
file,
XNonce::from_slice(nonce.as_ref()),
key,
))
} else {
Box::pin(file)
};
let metadata_string = serde_json::to_string(&metadata).expect("serialization to work");
let metadata_size = metadata_string.len();
let mut error = writer.write_all(metadata_string.as_bytes()).await.err();
if error.is_none() {
debug!("decrypted write {:x?}", &data[..40]);
error = writer.write_all(&data).await.err();
}
if let Some(e) = error {
// It's ok if the deleting the file fails, since we truncate on
// create anyways, but it should be best effort.
//
// We don't care about the result of the call.
std::mem::drop(remove_file(path).await);
return Err(e);
}
writer.flush().await?;
debug!("writing to file done");
let on_disk_size = (metadata_size + data.len()) as u64;
tokio::spawn(db_callback(on_disk_size));
if let Some(sender) = on_complete {
tokio::spawn(async move {
sender
.send(CacheEntry {
key,
data,
metadata,
on_disk_size,
})
.await
});
}
Ok(())
}
struct EncryptedDiskWriter {
file: Pin<Box<File>>,
keystream: XChaCha20,
buffer: Vec<u8>,
}
impl EncryptedDiskWriter {
fn new(file: File, nonce: &XNonce, key: &Key) -> Self {
Self {
file: Box::pin(file),
keystream: XChaCha20::new(key, nonce),
buffer: vec![],
}
}
}
impl AsyncWrite for EncryptedDiskWriter {
#[inline]
fn poll_write(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &[u8],
) -> Poll<Result<usize, std::io::Error>> {
let pinned = Pin::into_inner(self);
let old_buffer_size = pinned.buffer.len();
pinned.buffer.extend_from_slice(buf);
pinned
.keystream
.apply_keystream(&mut pinned.buffer[old_buffer_size..]);
match pinned.file.as_mut().poll_write(cx, &pinned.buffer) {
Poll::Ready(Ok(n)) => {
pinned.buffer.drain(..n);
Poll::Ready(Ok(buf.len()))
}
Poll::Ready(Err(e)) => Poll::Ready(Err(e)),
// We have written the data to our buffer, even if we haven't
// couldn't write the file to disk.
Poll::Pending => Poll::Ready(Ok(buf.len())),
}
}
#[inline]
fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), std::io::Error>> {
let pinned = Pin::into_inner(self);
while !pinned.buffer.is_empty() {
match pinned.file.as_mut().poll_write(cx, &pinned.buffer) {
Poll::Ready(Ok(n)) => {
pinned.buffer.drain(..n);
}
Poll::Ready(Err(e)) => return Poll::Ready(Err(e)),
Poll::Pending => return Poll::Pending,
}
}
pinned.file.as_mut().poll_flush(cx)
}
#[inline]
fn poll_shutdown(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Result<(), std::io::Error>> {
match self.as_mut().poll_flush(cx) {
Poll::Ready(Ok(())) => self.as_mut().file.as_mut().poll_shutdown(cx),
poll => poll,
}
}
}
/// Represents some upstream error.
#[derive(Debug, PartialEq, Eq)]
pub struct UpstreamError;
impl Error for UpstreamError {}
impl Display for UpstreamError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "An upstream error occurred")
}
}
impl From<UpstreamError> for actix_web::Error {
#[inline]
fn from(_: UpstreamError) -> Self {
PayloadError::Incomplete(None).into()
}
}
#[cfg(test)]
mod read_file {
use crate::cache::{ImageContentType, ImageMetadata};
use super::read_file;
use bytes::Bytes;
use chrono::DateTime;
use futures::StreamExt;
use std::io::{Seek, SeekFrom, Write};
use tempfile::tempfile;
use tokio::fs::File;
#[tokio::test]
#[cfg_attr(miri, ignore)]
async fn can_read() {
let mut temp_file = tempfile().unwrap();
temp_file
.write_all(
br#"{"content_type":0,"content_length":708370,"last_modified":"2021-04-13T04:37:41+00:00"}abc"#,
)
.unwrap();
temp_file.seek(SeekFrom::Start(0)).unwrap();
let temp_file = File::from_std(temp_file);
let (inner_stream, maybe_header, metadata) = read_file(temp_file).await.unwrap().unwrap();
let foo: Vec<_> = inner_stream.collect().await;
assert_eq!(foo, vec![Ok(Bytes::from("abc"))]);
assert!(maybe_header.is_none());
assert_eq!(
metadata,
ImageMetadata {
content_length: Some(708_370),
content_type: Some(ImageContentType::Png),
last_modified: Some(
DateTime::parse_from_rfc3339("2021-04-13T04:37:41+00:00").unwrap()
)
}
);
}
}
#[cfg(test)]
mod read_file_compat {
use crate::cache::{ImageContentType, ImageMetadata};
use super::read_file;
use bytes::Bytes;
use chrono::DateTime;
use futures::StreamExt;
use std::io::{Seek, SeekFrom, Write};
use tempfile::tempfile;
use tokio::fs::File;
#[tokio::test]
#[cfg_attr(miri, ignore)]
async fn can_read_legacy() {
let mut temp_file = tempfile().unwrap();
temp_file
.write_all(
b"\x00\x5b{\"content_type\":\"image/jpeg\",\"last_modified\":\"Sat, 10 Apr 2021 10:55:22 GMT\",\"size\":117888}abc",
)
.unwrap();
temp_file.seek(SeekFrom::Start(0)).unwrap();
let temp_file = File::from_std(temp_file);
let (inner_stream, maybe_header, metadata) = read_file(temp_file).await.unwrap().unwrap();
let foo: Vec<_> = inner_stream.collect().await;
assert_eq!(foo, vec![Ok(Bytes::from("abc"))]);
assert!(maybe_header.is_none());
assert_eq!(
metadata,
ImageMetadata {
content_length: Some(117_888),
content_type: Some(ImageContentType::Jpeg),
last_modified: Some(
DateTime::parse_from_rfc2822("Sat, 10 Apr 2021 10:55:22 GMT").unwrap()
)
}
);
}
}
#[cfg(test)]
mod metadata_future {
use std::{collections::VecDeque, io::ErrorKind};
use super::{
AsyncBufRead, AsyncRead, AsyncReadExt, BufReader, Context, Error, MetadataFuture, Pin,
Poll, ReadBuf,
};
use crate::cache::ImageContentType;
use chrono::DateTime;
#[derive(Default)]
struct TestReader {
fill_buf_events: VecDeque<Poll<std::io::Result<&'static [u8]>>>,
consume_events: VecDeque<usize>,
buffer: Vec<u8>,
}
impl TestReader {
fn new() -> Self {
Self::default()
}
fn push_fill_buf_event(&mut self, event: Poll<std::io::Result<&'static [u8]>>) {
self.fill_buf_events.push_back(event);
}
fn push_consume_event(&mut self, event: usize) {
self.consume_events.push_back(event);
}
}
impl AsyncRead for TestReader {
fn poll_read(
mut self: Pin<&mut Self>,
_: &mut Context<'_>,
buf: &mut ReadBuf<'_>,
) -> Poll<std::io::Result<()>> {
assert!(self.consume_events.is_empty());
assert!(self
.fill_buf_events
.iter()
.all(|event| matches!(event, Poll::Pending)));
buf.put_slice(&self.as_mut().buffer.drain(..).collect::<Vec<_>>());
Poll::Ready(Ok(()))
}
}
impl AsyncBufRead for TestReader {
fn poll_fill_buf(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<std::io::Result<&[u8]>> {
let pinned = Pin::into_inner(self);
match pinned.fill_buf_events.pop_front() {
Some(Poll::Ready(Ok(bytes))) => {
pinned.buffer.extend_from_slice(bytes);
return Poll::Ready(Ok(pinned.buffer.as_ref()));
}
Some(res @ Poll::Ready(_)) => res,
Some(Poll::Pending) => {
cx.waker().wake_by_ref();
Poll::Pending
}
None => panic!("poll_fill_buf was called but no events are left"),
}
}
fn consume(mut self: Pin<&mut Self>, amt: usize) {
assert_eq!(self.as_mut().consume_events.pop_front(), Some(amt));
self.as_mut().buffer.drain(..amt);
}
}
// We don't use the tokio executor here because it relies on epoll, which
// isn't supported by miri
#[test]
fn full_data_is_available() -> Result<(), Box<dyn Error>> {
let content = br#"{"content_type":0,"content_length":708370,"last_modified":"2021-04-13T04:37:41+00:00"}abc"#;
let mut reader = Box::pin(BufReader::new(&content[..]));
let metadata = futures::executor::block_on(async {
MetadataFuture(reader.as_mut())
.await
.map_err(|_| "metadata future returned error")
})?;
assert_eq!(metadata.content_type, Some(ImageContentType::Png));
assert_eq!(metadata.content_length, Some(708_370));
assert_eq!(
metadata.last_modified,
Some(DateTime::parse_from_rfc3339("2021-04-13T04:37:41+00:00")?)
);
let mut buf = vec![];
futures::executor::block_on(reader.read_to_end(&mut buf))?;
assert_eq!(&buf, b"abc");
Ok(())
}
#[test]
fn data_is_immediately_available_in_chunks() -> Result<(), Box<dyn Error>> {
let mut test_reader = TestReader::new();
let msg_0 = br#"{"content_type":0,"content_length":708370,"last_"#;
let msg_1 = br#"modified":"2021-04-13T04:37:41+00:00"}abc"#;
test_reader.push_fill_buf_event(Poll::Ready(Ok(msg_0)));
test_reader.push_fill_buf_event(Poll::Ready(Ok(msg_1)));
test_reader.push_consume_event(86);
let mut reader = Box::pin(test_reader);
let metadata = futures::executor::block_on(async {
MetadataFuture(reader.as_mut())
.await
.map_err(|_| "metadata future returned error")
})?;
assert_eq!(metadata.content_type, Some(ImageContentType::Png));
assert_eq!(metadata.content_length, Some(708_370));
assert_eq!(
metadata.last_modified,
Some(DateTime::parse_from_rfc3339("2021-04-13T04:37:41+00:00")?)
);
let mut buf = vec![];
futures::executor::block_on(reader.read_to_end(&mut buf))?;
assert_eq!(&buf, b"abc");
Ok(())
}
#[test]
fn data_is_available_in_chunks() -> Result<(), Box<dyn Error>> {
let mut test_reader = TestReader::new();
let msg_0 = br#"{"content_type":0,"content_length":708370,"last_"#;
let msg_1 = br#"modified":"2021-04-13T04:37:41+00:00"}abc"#;
test_reader.push_fill_buf_event(Poll::Pending);
test_reader.push_fill_buf_event(Poll::Ready(Ok(msg_0)));
test_reader.push_fill_buf_event(Poll::Pending);
test_reader.push_fill_buf_event(Poll::Ready(Ok(msg_1)));
test_reader.push_fill_buf_event(Poll::Pending);
test_reader.push_consume_event(86);
let mut reader = Box::pin(test_reader);
let metadata = futures::executor::block_on(async {
MetadataFuture(reader.as_mut())
.await
.map_err(|_| "metadata future returned error")
})?;
assert_eq!(metadata.content_type, Some(ImageContentType::Png));
assert_eq!(metadata.content_length, Some(708_370));
assert_eq!(
metadata.last_modified,
Some(DateTime::parse_from_rfc3339("2021-04-13T04:37:41+00:00")?)
);
let mut buf = vec![];
futures::executor::block_on(reader.read_to_end(&mut buf))?;
assert_eq!(&buf, b"abc");
Ok(())
}
#[test]
fn underlying_reader_reports_err() -> Result<(), Box<dyn Error>> {
let mut test_reader = TestReader::new();
let msg_0 = br#"{"content_type":0,"content_length":708370,"last_"#;
test_reader.push_fill_buf_event(Poll::Pending);
test_reader.push_fill_buf_event(Poll::Ready(Ok(msg_0)));
test_reader.push_fill_buf_event(Poll::Pending);
test_reader.push_fill_buf_event(Poll::Ready(Err(std::io::Error::new(
ErrorKind::Other,
"sup",
))));
let mut reader = Box::pin(test_reader);
let metadata = futures::executor::block_on(MetadataFuture(reader.as_mut()));
assert!(metadata.is_err());
Ok(())
}
#[test]
fn underlying_reader_reports_early_eof() -> Result<(), Box<dyn Error>> {
let mut test_reader = TestReader::new();
test_reader.push_fill_buf_event(Poll::Ready(Ok(&[])));
let mut reader = Box::pin(test_reader);
let metadata = futures::executor::block_on(MetadataFuture(reader.as_mut()));
assert!(metadata.is_err());
Ok(())
}
#[test]
fn invalid_metadata() -> Result<(), Box<dyn Error>> {
let mut test_reader = TestReader::new();
// content type is incorrect, should be a number
let msg_0 = br#"{"content_type":"foo","content_length":708370,"last_modified":"2021-04-13T04:37:41+00:00"}"#;
test_reader.push_fill_buf_event(Poll::Ready(Ok(msg_0)));
let mut reader = Box::pin(test_reader);
let metadata = futures::executor::block_on(MetadataFuture(reader.as_mut()));
assert!(metadata.is_err());
Ok(())
}
}

722
src/cache/mem.rs vendored Normal file
View file

@ -0,0 +1,722 @@
use std::borrow::Cow;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use super::{Cache, CacheEntry, CacheKey, CacheStream, CallbackCache, ImageMetadata, MemStream};
use async_trait::async_trait;
use bytes::Bytes;
use futures::FutureExt;
use lfu_cache::LfuCache;
use lru::LruCache;
use redis::{
Client as RedisClient, Commands, FromRedisValue, RedisError, RedisResult, ToRedisArgs,
};
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc::{channel, Receiver, Sender};
use tokio::sync::Mutex;
use tracing::warn;
#[derive(Clone, Serialize, Deserialize)]
pub struct CacheValue {
data: Bytes,
metadata: ImageMetadata,
on_disk_size: u64,
}
impl CacheValue {
#[inline]
fn new(data: Bytes, metadata: ImageMetadata, on_disk_size: u64) -> Self {
Self {
data,
metadata,
on_disk_size,
}
}
}
impl FromRedisValue for CacheValue {
fn from_redis_value(v: &redis::Value) -> RedisResult<Self> {
use bincode::ErrorKind;
if let redis::Value::Data(data) = v {
bincode::deserialize(data).map_err(|err| match *err {
ErrorKind::Io(e) => RedisError::from(e),
ErrorKind::Custom(e) => RedisError::from((
redis::ErrorKind::ResponseError,
"bincode deserialize failed",
e,
)),
e => RedisError::from((
redis::ErrorKind::ResponseError,
"bincode deserialized failed",
e.to_string(),
)),
})
} else {
Err(RedisError::from((
redis::ErrorKind::TypeError,
"Got non data type from redis db",
)))
}
}
}
impl ToRedisArgs for CacheValue {
fn write_redis_args<W>(&self, out: &mut W)
where
W: ?Sized + redis::RedisWrite,
{
out.write_arg(&bincode::serialize(self).expect("serialization to work"));
}
}
/// Use LRU as the eviction strategy
pub type Lru = LruCache<CacheKey, CacheValue>;
/// Use LFU as the eviction strategy
pub type Lfu = LfuCache<CacheKey, CacheValue>;
/// Adapter trait for memory cache backends
pub trait InternalMemoryCacheInitializer: InternalMemoryCache {
fn new() -> Self;
}
pub trait InternalMemoryCache: Sync + Send {
fn get(&mut self, key: &CacheKey) -> Option<Cow<CacheValue>>;
fn push(&mut self, key: CacheKey, data: CacheValue);
fn pop(&mut self) -> Option<(CacheKey, CacheValue)>;
}
#[cfg(not(tarpaulin_include))]
impl InternalMemoryCacheInitializer for Lfu {
#[inline]
fn new() -> Self {
Self::unbounded()
}
}
#[cfg(not(tarpaulin_include))]
impl InternalMemoryCache for Lfu {
#[inline]
fn get(&mut self, key: &CacheKey) -> Option<Cow<CacheValue>> {
self.get(key).map(Cow::Borrowed)
}
#[inline]
fn push(&mut self, key: CacheKey, data: CacheValue) {
self.insert(key, data);
}
#[inline]
fn pop(&mut self) -> Option<(CacheKey, CacheValue)> {
self.pop_lfu_key_value()
}
}
#[cfg(not(tarpaulin_include))]
impl InternalMemoryCacheInitializer for Lru {
#[inline]
fn new() -> Self {
Self::unbounded()
}
}
#[cfg(not(tarpaulin_include))]
impl InternalMemoryCache for Lru {
#[inline]
fn get(&mut self, key: &CacheKey) -> Option<Cow<CacheValue>> {
self.get(key).map(Cow::Borrowed)
}
#[inline]
fn push(&mut self, key: CacheKey, data: CacheValue) {
self.put(key, data);
}
#[inline]
fn pop(&mut self) -> Option<(CacheKey, CacheValue)> {
self.pop_lru()
}
}
#[cfg(not(tarpaulin_include))]
impl InternalMemoryCache for RedisClient {
fn get(&mut self, key: &CacheKey) -> Option<Cow<CacheValue>> {
Commands::get(self, key).ok().map(Cow::Owned)
}
fn push(&mut self, key: CacheKey, data: CacheValue) {
if let Err(e) = Commands::set::<_, _, ()>(self, key, data) {
warn!("Failed to push to redis: {}", e);
}
}
fn pop(&mut self) -> Option<(CacheKey, CacheValue)> {
unimplemented!("redis should handle its own memory")
}
}
/// Memory accelerated disk cache. Uses the internal cache implementation in
/// memory to speed up reads.
pub struct MemoryCache<MemoryCacheImpl, ColdCache> {
inner: ColdCache,
cur_mem_size: AtomicU64,
mem_cache: Mutex<MemoryCacheImpl>,
master_sender: Sender<CacheEntry>,
}
impl<MemoryCacheImpl, ColdCache> MemoryCache<MemoryCacheImpl, ColdCache>
where
MemoryCacheImpl: 'static + InternalMemoryCacheInitializer,
ColdCache: 'static + Cache,
{
pub fn new(inner: ColdCache, max_mem_size: crate::units::Bytes) -> Arc<Self> {
let (tx, rx) = channel(100);
let new_self = Arc::new(Self {
inner,
cur_mem_size: AtomicU64::new(0),
mem_cache: Mutex::new(MemoryCacheImpl::new()),
master_sender: tx,
});
tokio::spawn(internal_cache_listener(
Arc::clone(&new_self),
max_mem_size,
rx,
));
new_self
}
/// Returns an instance of the cache with the receiver for callback events
/// Really only useful for inspecting the receiver, e.g. for testing
#[cfg(test)]
pub fn new_with_receiver(
inner: ColdCache,
_: crate::units::Bytes,
) -> (Self, Receiver<CacheEntry>) {
let (tx, rx) = channel(100);
(
Self {
inner,
cur_mem_size: AtomicU64::new(0),
mem_cache: Mutex::new(MemoryCacheImpl::new()),
master_sender: tx,
},
rx,
)
}
}
impl<MemoryCacheImpl, ColdCache> MemoryCache<MemoryCacheImpl, ColdCache>
where
MemoryCacheImpl: 'static + InternalMemoryCache,
ColdCache: 'static + Cache,
{
pub fn new_with_cache(inner: ColdCache, init_mem_cache: MemoryCacheImpl) -> Self {
Self {
inner,
cur_mem_size: AtomicU64::new(0),
mem_cache: Mutex::new(init_mem_cache),
master_sender: channel(1).0,
}
}
}
async fn internal_cache_listener<MemoryCacheImpl, ColdCache>(
cache: Arc<MemoryCache<MemoryCacheImpl, ColdCache>>,
max_mem_size: crate::units::Bytes,
mut rx: Receiver<CacheEntry>,
) where
MemoryCacheImpl: InternalMemoryCache,
ColdCache: Cache,
{
let max_mem_size = mem_threshold(&max_mem_size);
while let Some(CacheEntry {
key,
data,
metadata,
on_disk_size,
}) = rx.recv().await
{
// Add to memory cache
// We can add first because we constrain our memory usage to 95%
cache
.cur_mem_size
.fetch_add(on_disk_size as u64, Ordering::Release);
cache
.mem_cache
.lock()
.await
.push(key, CacheValue::new(data, metadata, on_disk_size));
// Pop if too large
while cache.cur_mem_size.load(Ordering::Acquire) >= max_mem_size as u64 {
let popped = cache.mem_cache.lock().await.pop().map(
|(
key,
CacheValue {
data,
metadata,
on_disk_size,
},
)| (key, data, metadata, on_disk_size),
);
if let Some((_, _, _, size)) = popped {
cache.cur_mem_size.fetch_sub(size as u64, Ordering::Release);
} else {
break;
}
}
}
}
const fn mem_threshold(bytes: &crate::units::Bytes) -> usize {
bytes.get() / 20 * 19
}
#[async_trait]
impl<MemoryCacheImpl, ColdCache> Cache for MemoryCache<MemoryCacheImpl, ColdCache>
where
MemoryCacheImpl: InternalMemoryCache,
ColdCache: CallbackCache,
{
#[inline]
async fn get(
&self,
key: &CacheKey,
) -> Option<Result<(CacheStream, ImageMetadata), super::CacheError>> {
match self.mem_cache.lock().now_or_never() {
Some(mut mem_cache) => {
match mem_cache.get(key).map(Cow::into_owned).map(
|CacheValue { data, metadata, .. }| {
Ok((CacheStream::Memory(MemStream(data)), metadata))
},
) {
Some(v) => Some(v),
None => self.inner.get(key).await,
}
}
None => self.inner.get(key).await,
}
}
#[inline]
async fn put(
&self,
key: CacheKey,
image: Bytes,
metadata: ImageMetadata,
) -> Result<(), super::CacheError> {
self.inner
.put_with_on_completed_callback(key, image, metadata, self.master_sender.clone())
.await
}
}
#[cfg(test)]
mod test_util {
use std::borrow::Cow;
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap};
use super::{CacheValue, InternalMemoryCache, InternalMemoryCacheInitializer};
use crate::cache::{
Cache, CacheEntry, CacheError, CacheKey, CacheStream, CallbackCache, ImageMetadata,
};
use async_trait::async_trait;
use parking_lot::Mutex;
use tokio::io::BufReader;
use tokio::sync::mpsc::Sender;
use tokio_util::io::ReaderStream;
#[derive(Default)]
pub struct TestDiskCache(
pub Mutex<RefCell<HashMap<CacheKey, Result<(CacheStream, ImageMetadata), CacheError>>>>,
);
#[async_trait]
impl Cache for TestDiskCache {
async fn get(
&self,
key: &CacheKey,
) -> Option<Result<(CacheStream, ImageMetadata), CacheError>> {
self.0.lock().get_mut().remove(key)
}
async fn put(
&self,
key: CacheKey,
image: bytes::Bytes,
metadata: ImageMetadata,
) -> Result<(), CacheError> {
let reader = Box::pin(BufReader::new(tokio_util::io::StreamReader::new(
tokio_stream::once(Ok::<_, std::io::Error>(image)),
)));
let stream = CacheStream::Completed(ReaderStream::new(reader));
self.0.lock().get_mut().insert(key, Ok((stream, metadata)));
Ok(())
}
}
#[async_trait]
impl CallbackCache for TestDiskCache {
async fn put_with_on_completed_callback(
&self,
key: CacheKey,
data: bytes::Bytes,
metadata: ImageMetadata,
on_complete: Sender<CacheEntry>,
) -> Result<(), CacheError> {
self.put(key.clone(), data.clone(), metadata)
.await?;
let on_disk_size = data.len() as u64;
let _ = on_complete
.send(CacheEntry {
key,
data,
metadata,
on_disk_size,
})
.await;
Ok(())
}
}
#[derive(Default)]
pub struct TestMemoryCache(pub BTreeMap<CacheKey, CacheValue>);
impl InternalMemoryCacheInitializer for TestMemoryCache {
fn new() -> Self {
Self::default()
}
}
impl InternalMemoryCache for TestMemoryCache {
fn get(&mut self, key: &CacheKey) -> Option<Cow<CacheValue>> {
self.0.get(key).map(Cow::Borrowed)
}
fn push(&mut self, key: CacheKey, data: CacheValue) {
self.0.insert(key, data);
}
fn pop(&mut self) -> Option<(CacheKey, CacheValue)> {
let mut cache = BTreeMap::new();
std::mem::swap(&mut cache, &mut self.0);
let mut iter = cache.into_iter();
let ret = iter.next();
self.0 = iter.collect();
ret
}
}
}
#[cfg(test)]
mod cache_ops {
use std::error::Error;
use bytes::Bytes;
use futures::{FutureExt, StreamExt};
use crate::cache::mem::{CacheValue, InternalMemoryCache};
use crate::cache::{Cache, CacheEntry, CacheKey, CacheStream, ImageMetadata, MemStream};
use super::test_util::{TestDiskCache, TestMemoryCache};
use super::MemoryCache;
#[tokio::test]
async fn get_mem_cached() -> Result<(), Box<dyn Error>> {
let (cache, mut rx) = MemoryCache::<TestMemoryCache, _>::new_with_receiver(
TestDiskCache::default(),
crate::units::Bytes(10),
);
let key = CacheKey("a".to_string(), "b".to_string(), false);
let metadata = ImageMetadata {
content_type: None,
content_length: Some(1),
last_modified: None,
};
let bytes = Bytes::from_static(b"abcd");
let value = CacheValue::new(bytes.clone(), metadata, 34);
// Populate the cache, need to drop the lock else it's considered locked
// when we actually call the cache
{
let mem_cache = &mut cache.mem_cache.lock().await;
mem_cache.push(key.clone(), value.clone());
}
let (stream, ret_metadata) = cache.get(&key).await.unwrap()?;
assert_eq!(metadata, ret_metadata);
if let CacheStream::Memory(MemStream(ret_stream)) = stream {
assert_eq!(bytes, ret_stream);
} else {
panic!("wrong stream type");
}
assert!(rx.recv().now_or_never().is_none());
Ok(())
}
#[tokio::test]
async fn get_disk_cached() -> Result<(), Box<dyn Error>> {
let (mut cache, mut rx) = MemoryCache::<TestMemoryCache, _>::new_with_receiver(
TestDiskCache::default(),
crate::units::Bytes(10),
);
let key = CacheKey("a".to_string(), "b".to_string(), false);
let metadata = ImageMetadata {
content_type: None,
content_length: Some(1),
last_modified: None,
};
let bytes = Bytes::from_static(b"abcd");
{
let cache = &mut cache.inner;
cache
.put(key.clone(), bytes.clone(), metadata)
.await?;
}
let (mut stream, ret_metadata) = cache.get(&key).await.unwrap()?;
assert_eq!(metadata, ret_metadata);
assert!(matches!(stream, CacheStream::Completed(_)));
assert_eq!(stream.next().await, Some(Ok(bytes.clone())));
assert!(rx.recv().now_or_never().is_none());
Ok(())
}
// Identical to the get_disk_cached test but we hold a lock on the mem_cache
#[tokio::test]
async fn get_mem_locked() -> Result<(), Box<dyn Error>> {
let (mut cache, mut rx) = MemoryCache::<TestMemoryCache, _>::new_with_receiver(
TestDiskCache::default(),
crate::units::Bytes(10),
);
let key = CacheKey("a".to_string(), "b".to_string(), false);
let metadata = ImageMetadata {
content_type: None,
content_length: Some(1),
last_modified: None,
};
let bytes = Bytes::from_static(b"abcd");
{
let cache = &mut cache.inner;
cache
.put(key.clone(), bytes.clone(), metadata)
.await?;
}
// intentionally not dropped
let _mem_cache = &mut cache.mem_cache.lock().await;
let (mut stream, ret_metadata) = cache.get(&key).await.unwrap()?;
assert_eq!(metadata, ret_metadata);
assert!(matches!(stream, CacheStream::Completed(_)));
assert_eq!(stream.next().await, Some(Ok(bytes.clone())));
assert!(rx.recv().now_or_never().is_none());
Ok(())
}
#[tokio::test]
async fn get_miss() {
let (cache, mut rx) = MemoryCache::<TestMemoryCache, _>::new_with_receiver(
TestDiskCache::default(),
crate::units::Bytes(10),
);
let key = CacheKey("a".to_string(), "b".to_string(), false);
assert!(cache.get(&key).await.is_none());
assert!(rx.recv().now_or_never().is_none());
}
#[tokio::test]
async fn put_puts_into_disk_and_hears_from_rx() -> Result<(), Box<dyn Error>> {
let (cache, mut rx) = MemoryCache::<TestMemoryCache, _>::new_with_receiver(
TestDiskCache::default(),
crate::units::Bytes(10),
);
let key = CacheKey("a".to_string(), "b".to_string(), false);
let metadata = ImageMetadata {
content_type: None,
content_length: Some(1),
last_modified: None,
};
let bytes = Bytes::from_static(b"abcd");
let bytes_len = bytes.len() as u64;
cache
.put(key.clone(), bytes.clone(), metadata)
.await?;
// Because the callback is supposed to let the memory cache insert the
// entry into its cache, we can check that it properly stored it on the
// disk layer by checking if we can successfully fetch it.
let (mut stream, ret_metadata) = cache.get(&key).await.unwrap()?;
assert_eq!(metadata, ret_metadata);
assert!(matches!(stream, CacheStream::Completed(_)));
assert_eq!(stream.next().await, Some(Ok(bytes.clone())));
// Check that we heard back
let cache_entry = rx
.recv()
.now_or_never()
.flatten()
.ok_or("failed to hear back from cache")?;
assert_eq!(
cache_entry,
CacheEntry {
key,
data: bytes,
metadata,
on_disk_size: bytes_len,
}
);
Ok(())
}
}
#[cfg(test)]
mod db_listener {
use std::error::Error;
use std::iter::FromIterator;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use bytes::Bytes;
use tokio::task;
use crate::cache::{Cache, CacheKey, ImageMetadata};
use super::test_util::{TestDiskCache, TestMemoryCache};
use super::{internal_cache_listener, MemoryCache};
#[tokio::test]
async fn put_into_memory() -> Result<(), Box<dyn Error>> {
let (cache, rx) = MemoryCache::<TestMemoryCache, _>::new_with_receiver(
TestDiskCache::default(),
crate::units::Bytes(0),
);
let cache = Arc::new(cache);
tokio::spawn(internal_cache_listener(
Arc::clone(&cache),
crate::units::Bytes(20),
rx,
));
// put small image into memory
let key = CacheKey("a".to_string(), "b".to_string(), false);
let metadata = ImageMetadata {
content_type: None,
content_length: Some(1),
last_modified: None,
};
let bytes = Bytes::from_static(b"abcd");
cache.put(key.clone(), bytes.clone(), metadata).await?;
// let the listener run first
for _ in 0..10 {
task::yield_now().await;
}
assert_eq!(
cache.cur_mem_size.load(Ordering::SeqCst),
bytes.len() as u64
);
// Since we didn't populate the cache, fetching must be from memory, so
// this should succeed since the cache listener should push the item
// into cache
assert!(cache.get(&key).await.is_some());
Ok(())
}
#[tokio::test]
async fn pops_items() -> Result<(), Box<dyn Error>> {
let (cache, rx) = MemoryCache::<TestMemoryCache, _>::new_with_receiver(
TestDiskCache::default(),
crate::units::Bytes(0),
);
let cache = Arc::new(cache);
tokio::spawn(internal_cache_listener(
Arc::clone(&cache),
crate::units::Bytes(20),
rx,
));
// put small image into memory
let key_0 = CacheKey("a".to_string(), "b".to_string(), false);
let key_1 = CacheKey("c".to_string(), "d".to_string(), false);
let metadata = ImageMetadata {
content_type: None,
content_length: Some(1),
last_modified: None,
};
let bytes = Bytes::from_static(b"abcde");
cache.put(key_0, bytes.clone(), metadata).await?;
cache.put(key_1, bytes.clone(), metadata).await?;
// let the listener run first
task::yield_now().await;
for _ in 0..10 {
task::yield_now().await;
}
// Items should be in cache now
assert_eq!(
cache.cur_mem_size.load(Ordering::SeqCst),
(bytes.len() * 2) as u64
);
let key_3 = CacheKey("e".to_string(), "f".to_string(), false);
let metadata = ImageMetadata {
content_type: None,
content_length: Some(1),
last_modified: None,
};
let bytes = Bytes::from_iter(b"0".repeat(16).into_iter());
let bytes_len = bytes.len();
cache.put(key_3, bytes, metadata).await?;
// let the listener run first
task::yield_now().await;
for _ in 0..10 {
task::yield_now().await;
}
// Items should have been evicted, only 16 bytes should be there now
assert_eq!(cache.cur_mem_size.load(Ordering::SeqCst), bytes_len as u64);
Ok(())
}
}
#[cfg(test)]
mod mem_threshold {
use crate::units::Bytes;
use super::mem_threshold;
#[test]
fn small_amount_works() {
assert_eq!(mem_threshold(&Bytes(100)), 95);
}
#[test]
fn large_amount_cannot_overflow() {
assert_eq!(mem_threshold(&Bytes(usize::MAX)), 17_524_406_870_024_074_020);
}
}

302
src/cache/mod.rs vendored Normal file
View file

@ -0,0 +1,302 @@
use std::fmt::Display;
use std::path::PathBuf;
use std::pin::Pin;
use std::str::FromStr;
use std::sync::Arc;
use std::task::{Context, Poll};
use actix_web::http::header::HeaderValue;
use async_trait::async_trait;
use bytes::Bytes;
use chacha20::Key;
use chrono::{DateTime, FixedOffset};
use futures::{Stream, StreamExt};
use once_cell::sync::OnceCell;
use redis::ToRedisArgs;
use serde::{Deserialize, Serialize};
use serde_repr::{Deserialize_repr, Serialize_repr};
use thiserror::Error;
use tokio::sync::mpsc::Sender;
use tokio_util::io::ReaderStream;
pub use disk::DiskCache;
pub use fs::UpstreamError;
pub use mem::MemoryCache;
use self::compat::LegacyImageMetadata;
use self::fs::MetadataFetch;
pub static ENCRYPTION_KEY: OnceCell<Key> = OnceCell::new();
mod compat;
mod disk;
mod fs;
pub mod mem;
#[derive(PartialEq, Eq, Hash, Clone, Debug, PartialOrd, Ord)]
pub struct CacheKey(pub String, pub String, pub bool);
impl ToRedisArgs for CacheKey {
fn write_redis_args<W>(&self, out: &mut W)
where
W: ?Sized + redis::RedisWrite,
{
out.write_arg_fmt(self);
}
}
impl Display for CacheKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.2 {
write!(f, "saver/{}/{}", self.0, self.1)
} else {
write!(f, "data/{}/{}", self.0, self.1)
}
}
}
impl From<CacheKey> for PathBuf {
#[inline]
fn from(key: CacheKey) -> Self {
key.to_string().into()
}
}
impl From<&CacheKey> for PathBuf {
#[inline]
fn from(key: &CacheKey) -> Self {
key.to_string().into()
}
}
#[derive(Clone)]
pub struct CachedImage(pub Bytes);
#[derive(Copy, Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]
pub struct ImageMetadata {
pub content_type: Option<ImageContentType>,
pub content_length: Option<u32>,
pub last_modified: Option<DateTime<FixedOffset>>,
}
// Confirmed by Ply to be these types: https://link.eddie.sh/ZXfk0
#[derive(Copy, Clone, Serialize_repr, Deserialize_repr, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum ImageContentType {
Png = 0,
Jpeg,
Gif,
}
pub struct InvalidContentType;
impl FromStr for ImageContentType {
type Err = InvalidContentType;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"image/png" => Ok(Self::Png),
"image/jpeg" => Ok(Self::Jpeg),
"image/gif" => Ok(Self::Gif),
_ => Err(InvalidContentType),
}
}
}
impl AsRef<str> for ImageContentType {
#[inline]
fn as_ref(&self) -> &str {
match self {
Self::Png => "image/png",
Self::Jpeg => "image/jpeg",
Self::Gif => "image/gif",
}
}
}
impl From<LegacyImageMetadata> for ImageMetadata {
fn from(legacy: LegacyImageMetadata) -> Self {
Self {
content_type: legacy.content_type.map(|v| v.0),
content_length: legacy.size,
last_modified: legacy.last_modified.map(|v| v.0),
}
}
}
#[derive(Debug)]
pub enum ImageRequestError {
ContentType,
ContentLength,
LastModified,
}
impl ImageMetadata {
pub fn new(
content_type: Option<HeaderValue>,
content_length: Option<HeaderValue>,
last_modified: Option<HeaderValue>,
) -> Result<Self, ImageRequestError> {
Ok(Self {
content_type: content_type
.map(|v| match v.to_str() {
Ok(v) => ImageContentType::from_str(v),
Err(_) => Err(InvalidContentType),
})
.transpose()
.map_err(|_| ImageRequestError::ContentType)?,
content_length: content_length
.map(|header_val| {
header_val
.to_str()
.map_err(|_| ImageRequestError::ContentLength)?
.parse()
.map_err(|_| ImageRequestError::ContentLength)
})
.transpose()?,
last_modified: last_modified
.map(|header_val| {
DateTime::parse_from_rfc2822(
header_val
.to_str()
.map_err(|_| ImageRequestError::LastModified)?,
)
.map_err(|_| ImageRequestError::LastModified)
})
.transpose()?,
})
}
}
#[derive(Error, Debug)]
pub enum CacheError {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Reqwest(#[from] reqwest::Error),
#[error(transparent)]
Upstream(#[from] UpstreamError),
#[error("An error occurred while reading the decryption header")]
DecryptionFailure,
}
#[async_trait]
pub trait Cache: Send + Sync {
async fn get(&self, key: &CacheKey)
-> Option<Result<(CacheStream, ImageMetadata), CacheError>>;
async fn put(
&self,
key: CacheKey,
image: Bytes,
metadata: ImageMetadata,
) -> Result<(), CacheError>;
}
#[async_trait]
impl<T: Cache> Cache for Arc<T> {
#[inline]
async fn get(
&self,
key: &CacheKey,
) -> Option<Result<(CacheStream, ImageMetadata), CacheError>> {
self.as_ref().get(key).await
}
#[inline]
async fn put(
&self,
key: CacheKey,
image: Bytes,
metadata: ImageMetadata,
) -> Result<(), CacheError> {
self.as_ref().put(key, image, metadata).await
}
}
#[async_trait]
pub trait CallbackCache: Cache {
async fn put_with_on_completed_callback(
&self,
key: CacheKey,
image: Bytes,
metadata: ImageMetadata,
on_complete: Sender<CacheEntry>,
) -> Result<(), CacheError>;
}
#[async_trait]
impl<T: CallbackCache> CallbackCache for Arc<T> {
#[inline]
#[cfg(not(tarpaulin_include))]
async fn put_with_on_completed_callback(
&self,
key: CacheKey,
image: Bytes,
metadata: ImageMetadata,
on_complete: Sender<CacheEntry>,
) -> Result<(), CacheError> {
self.as_ref()
.put_with_on_completed_callback(key, image, metadata, on_complete)
.await
}
}
#[derive(PartialEq, Eq, Debug)]
pub struct CacheEntry {
key: CacheKey,
data: Bytes,
metadata: ImageMetadata,
on_disk_size: u64,
}
pub enum CacheStream {
Memory(MemStream),
Completed(ReaderStream<Pin<Box<dyn MetadataFetch + Send + Sync>>>),
}
impl From<CachedImage> for CacheStream {
fn from(image: CachedImage) -> Self {
Self::Memory(MemStream(image.0))
}
}
type CacheStreamItem = Result<Bytes, UpstreamError>;
impl Stream for CacheStream {
type Item = CacheStreamItem;
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match self.get_mut() {
Self::Memory(stream) => stream.poll_next_unpin(cx),
Self::Completed(stream) => stream.poll_next_unpin(cx).map_err(|_| UpstreamError),
}
}
}
pub struct MemStream(pub Bytes);
impl Stream for MemStream {
type Item = CacheStreamItem;
fn poll_next(mut self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let mut new_bytes = Bytes::new();
std::mem::swap(&mut self.0, &mut new_bytes);
if new_bytes.is_empty() {
Poll::Ready(None)
} else {
Poll::Ready(Some(Ok(new_bytes)))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn metadata_size() {
assert_eq!(std::mem::size_of::<ImageMetadata>(), 32);
}
}

220
src/client.rs Normal file
View file

@ -0,0 +1,220 @@
use std::collections::HashMap;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::time::Duration;
use actix_web::http::header::{HeaderMap, HeaderName, HeaderValue};
use actix_web::web::Data;
use bytes::Bytes;
use once_cell::sync::Lazy;
use parking_lot::RwLock;
use reqwest::header::{
ACCESS_CONTROL_ALLOW_ORIGIN, ACCESS_CONTROL_EXPOSE_HEADERS, CACHE_CONTROL, CONTENT_LENGTH,
CONTENT_TYPE, LAST_MODIFIED, X_CONTENT_TYPE_OPTIONS,
};
use reqwest::{Client, Proxy, StatusCode};
use tokio::sync::watch::{channel, Receiver};
use tokio::sync::Notify;
use tracing::{debug, error, info, warn};
use crate::cache::{Cache, CacheKey, ImageMetadata};
use crate::config::{DISABLE_CERT_VALIDATION, USE_PROXY};
pub static HTTP_CLIENT: Lazy<CachingClient> = Lazy::new(|| {
let mut inner = Client::builder()
.pool_idle_timeout(Duration::from_secs(180))
.https_only(true)
.http2_prior_knowledge();
if let Some(socket_addr) = USE_PROXY.get() {
info!(
"Using {} as a proxy for upstream requests.",
socket_addr.as_str()
);
inner = inner.proxy(Proxy::all(socket_addr.as_str()).unwrap());
}
if DISABLE_CERT_VALIDATION.load(Ordering::Acquire) {
inner = inner.danger_accept_invalid_certs(true);
}
let inner = inner.build().expect("Client initialization to work");
CachingClient {
inner,
locks: RwLock::new(HashMap::new()),
}
});
#[cfg(not(tarpaulin_include))]
pub static DEFAULT_HEADERS: Lazy<HeaderMap> = Lazy::new(|| {
let mut headers = HeaderMap::with_capacity(8);
headers.insert(X_CONTENT_TYPE_OPTIONS, HeaderValue::from_static("nosniff"));
headers.insert(
ACCESS_CONTROL_ALLOW_ORIGIN,
HeaderValue::from_static("https://mangadex.org"),
);
headers.insert(ACCESS_CONTROL_EXPOSE_HEADERS, HeaderValue::from_static("*"));
headers.insert(
CACHE_CONTROL,
HeaderValue::from_static("public, max-age=1209600"),
);
headers.insert(
HeaderName::from_static("timing-allow-origin"),
HeaderValue::from_static("https://mangadex.org"),
);
headers
});
pub struct CachingClient {
inner: Client,
locks: RwLock<HashMap<String, Receiver<FetchResult>>>,
}
#[derive(Clone, Debug)]
pub enum FetchResult {
ServiceUnavailable,
InternalServerError,
Data(StatusCode, HeaderMap, Bytes),
Processing,
}
impl CachingClient {
pub async fn fetch_and_cache(
&'static self,
url: String,
key: CacheKey,
cache: Data<dyn Cache>,
) -> FetchResult {
let maybe_receiver = {
let lock = self.locks.read();
lock.get(&url).map(Clone::clone)
};
if let Some(mut recv) = maybe_receiver {
loop {
if !matches!(*recv.borrow(), FetchResult::Processing) {
break;
}
if recv.changed().await.is_err() {
break;
}
}
return recv.borrow().clone();
}
let notify = Arc::new(Notify::new());
tokio::spawn(self.fetch_and_cache_impl(cache, url.clone(), key, Arc::clone(&notify)));
notify.notified().await;
let mut recv = self
.locks
.read()
.get(&url)
.expect("receiver to exist since we just made one")
.clone();
loop {
if !matches!(*recv.borrow(), FetchResult::Processing) {
break;
}
if recv.changed().await.is_err() {
break;
}
}
let resp = recv.borrow().clone();
resp
}
async fn fetch_and_cache_impl(
&self,
cache: Data<dyn Cache>,
url: String,
key: CacheKey,
notify: Arc<Notify>,
) {
let (tx, rx) = channel(FetchResult::Processing);
self.locks.write().insert(url.clone(), rx);
notify.notify_one();
let resp = self.inner.get(&url).send().await;
let resp = match resp {
Ok(mut resp) => {
let content_type = resp.headers().get(CONTENT_TYPE);
let is_image = content_type
.map(|v| String::from_utf8_lossy(v.as_ref()).contains("image/"))
.unwrap_or_default();
if resp.status() != StatusCode::OK || !is_image {
warn!("Got non-OK or non-image response code from upstream, proxying and not caching result.");
let mut headers = DEFAULT_HEADERS.clone();
if let Some(content_type) = content_type {
headers.insert(CONTENT_TYPE, content_type.clone());
}
FetchResult::Data(
resp.status(),
headers,
resp.bytes().await.unwrap_or_default(),
)
} else {
let (content_type, length, last_mod) = {
let headers = resp.headers_mut();
(
headers.remove(CONTENT_TYPE),
headers.remove(CONTENT_LENGTH),
headers.remove(LAST_MODIFIED),
)
};
let body = resp.bytes().await.unwrap();
debug!("Inserting into cache");
let metadata =
ImageMetadata::new(content_type.clone(), length.clone(), last_mod.clone())
.unwrap();
match cache.put(key, body.clone(), metadata).await {
Ok(()) => {
debug!("Done putting into cache");
let mut headers = DEFAULT_HEADERS.clone();
if let Some(content_type) = content_type {
headers.insert(CONTENT_TYPE, content_type);
}
if let Some(content_length) = length {
headers.insert(CONTENT_LENGTH, content_length);
}
if let Some(last_modified) = last_mod {
headers.insert(LAST_MODIFIED, last_modified);
}
FetchResult::Data(StatusCode::OK, headers, body)
}
Err(e) => {
warn!("Failed to insert into cache: {}", e);
FetchResult::InternalServerError
}
}
}
}
Err(e) => {
error!("Failed to fetch image from server: {}", e);
FetchResult::ServiceUnavailable
}
};
// This shouldn't happen
tx.send(resp).unwrap();
self.locks.write().remove(&url);
}
#[inline]
pub const fn inner(&self) -> &Client {
&self.inner
}
}

479
src/config.rs Normal file
View file

@ -0,0 +1,479 @@
use std::fmt::{Display, Formatter};
use std::fs::{File, OpenOptions};
use std::hint::unreachable_unchecked;
use std::io::{ErrorKind, Write};
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::num::NonZeroU16;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::atomic::{AtomicBool, Ordering};
use clap::{crate_authors, crate_description, crate_version, Parser};
use log::LevelFilter;
use once_cell::sync::OnceCell;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use tracing::level_filters::LevelFilter as TracingLevelFilter;
use url::Url;
use crate::units::{KilobitsPerSecond, Mebibytes, Port};
// Validate tokens is an atomic because it's faster than locking on rwlock.
pub static VALIDATE_TOKENS: AtomicBool = AtomicBool::new(false);
pub static OFFLINE_MODE: AtomicBool = AtomicBool::new(false);
pub static USE_PROXY: OnceCell<Url> = OnceCell::new();
pub static DISABLE_CERT_VALIDATION: AtomicBool = AtomicBool::new(false);
#[derive(Error, Debug)]
pub enum ConfigError {
#[error("No config found. One has been created for you to modify.")]
NotInitialized,
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Parse(#[from] serde_yaml::Error),
}
pub fn load_config() -> Result<Config, ConfigError> {
// Load cli args first
let cli_args: CliArgs = CliArgs::parse();
// Load yaml file next
let config_file: Result<YamlArgs, _> = {
let config_path = cli_args
.config_path
.as_deref()
.unwrap_or_else(|| Path::new("./settings.yaml"));
match File::open(config_path) {
Ok(file) => serde_yaml::from_reader(file),
Err(e) if e.kind() == ErrorKind::NotFound => {
let mut file = OpenOptions::new()
.write(true)
.create_new(true)
.open(config_path)
.unwrap();
let default_config = include_str!("../settings.sample.yaml");
file.write_all(default_config.as_bytes()).unwrap();
return Err(ConfigError::NotInitialized);
}
Err(e) => return Err(e.into()),
}
};
// generate config
let config = Config::from_cli_and_file(cli_args, config_file?);
// initialize globals
OFFLINE_MODE.store(
config
.unstable_options
.contains(&UnstableOptions::OfflineMode),
Ordering::Release,
);
if let Some(socket) = config.proxy.clone() {
USE_PROXY
.set(socket)
.expect("USE_PROXY to be set only by this function");
}
DISABLE_CERT_VALIDATION.store(
config
.unstable_options
.contains(&UnstableOptions::DisableCertValidation),
Ordering::Release,
);
Ok(config)
}
#[derive(Debug)]
/// Represents a fully parsed config, from a variety of sources.
pub struct Config {
pub cache_type: CacheType,
pub cache_path: PathBuf,
pub shutdown_timeout: NonZeroU16,
pub log_level: TracingLevelFilter,
pub client_secret: ClientSecret,
pub port: Port,
pub bind_address: SocketAddr,
pub external_address: Option<SocketAddr>,
pub ephemeral_disk_encryption: bool,
pub network_speed: KilobitsPerSecond,
pub disk_quota: Mebibytes,
pub memory_quota: Mebibytes,
pub unstable_options: Vec<UnstableOptions>,
pub override_upstream: Option<Url>,
pub enable_metrics: bool,
pub geoip_license_key: Option<ClientSecret>,
pub proxy: Option<Url>,
pub redis_url: Option<Url>,
}
impl Config {
fn from_cli_and_file(cli_args: CliArgs, file_args: YamlArgs) -> Self {
let file_extended_options = file_args.extended_options.unwrap_or_default();
let log_level = match (cli_args.quiet, cli_args.verbose) {
(n, _) if n > 2 => TracingLevelFilter::OFF,
(2, _) => TracingLevelFilter::ERROR,
(1, _) => TracingLevelFilter::WARN,
// Use log level from file if no flags were provided to CLI
(0, 0) => {
file_extended_options
.logging_level
.map_or(TracingLevelFilter::INFO, |filter| match filter {
LevelFilter::Off => TracingLevelFilter::OFF,
LevelFilter::Error => TracingLevelFilter::ERROR,
LevelFilter::Warn => TracingLevelFilter::WARN,
LevelFilter::Info => TracingLevelFilter::INFO,
LevelFilter::Debug => TracingLevelFilter::DEBUG,
LevelFilter::Trace => TracingLevelFilter::TRACE,
})
}
(_, 1) => TracingLevelFilter::DEBUG,
(_, n) if n > 1 => TracingLevelFilter::TRACE,
// compiler can't figure it out
_ => unsafe { unreachable_unchecked() },
};
let bind_port = cli_args
.port
.unwrap_or(file_args.server_settings.port)
.get();
// This needs to be outside because rust isn't smart enough yet to
// realize a disjointed borrow of a moved value is ok. This will be
// fixed in Rust 2021.
let external_port = file_args
.server_settings
.external_port
.map_or(bind_port, Port::get);
Self {
cache_type: cli_args
.cache_type
.or(file_extended_options.cache_type)
.unwrap_or_default(),
cache_path: cli_args
.cache_path
.or(file_extended_options.cache_path)
.unwrap_or_else(|| PathBuf::from_str("./cache").unwrap()),
shutdown_timeout: file_args
.server_settings
.graceful_shutdown_wait_seconds
.unwrap_or(unsafe { NonZeroU16::new_unchecked(60) }),
log_level,
// secret should never be in CLI
client_secret: if let Ok(v) = std::env::var("CLIENT_SECRET") {
ClientSecret(v)
} else {
file_args.server_settings.secret
},
port: cli_args.port.unwrap_or(file_args.server_settings.port),
bind_address: SocketAddr::new(
file_args
.server_settings
.hostname
.unwrap_or_else(|| IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0))),
bind_port,
),
external_address: file_args
.server_settings
.external_ip
.map(|ip_addr| SocketAddr::new(ip_addr, external_port)),
ephemeral_disk_encryption: cli_args.ephemeral_disk_encryption
|| file_extended_options
.ephemeral_disk_encryption
.unwrap_or_default(),
network_speed: cli_args
.network_speed
.unwrap_or(file_args.server_settings.external_max_kilobits_per_second),
disk_quota: cli_args
.disk_quota
.unwrap_or(file_args.max_cache_size_in_mebibytes),
memory_quota: cli_args
.memory_quota
.or(file_extended_options.memory_quota)
.unwrap_or_default(),
enable_metrics: file_extended_options.enable_metrics.unwrap_or_default(),
// Unstable options (and related) should never be in yaml config
unstable_options: cli_args.unstable_options,
override_upstream: cli_args.override_upstream,
geoip_license_key: file_args.metric_settings.and_then(|args| {
if args.enable_geoip.unwrap_or_default() {
args.geoip_license_key
} else {
None
}
}),
proxy: cli_args.proxy,
redis_url: file_extended_options.redis_url,
}
}
}
// this intentionally does not implement display
#[derive(Deserialize, Serialize, Clone)]
pub struct ClientSecret(String);
impl ClientSecret {
pub fn as_str(&self) -> &str {
self.0.as_ref()
}
}
impl std::fmt::Debug for ClientSecret {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "[client secret]")
}
}
#[derive(Deserialize, Copy, Clone, Debug, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum CacheType {
OnDisk,
Lru,
Lfu,
Redis,
}
impl FromStr for CacheType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"on_disk" => Ok(Self::OnDisk),
"lru" => Ok(Self::Lru),
"lfu" => Ok(Self::Lfu),
"redis" => Ok(Self::Redis),
_ => Err(format!("Unknown option: {}", s)),
}
}
}
impl Default for CacheType {
fn default() -> Self {
Self::OnDisk
}
}
#[derive(Deserialize)]
struct YamlArgs {
// Naming is legacy
max_cache_size_in_mebibytes: Mebibytes,
server_settings: YamlServerSettings,
metric_settings: Option<YamlMetricSettings>,
// This implementation's custom options
extended_options: Option<YamlExtendedOptions>,
}
// Naming is legacy
#[derive(Deserialize)]
struct YamlServerSettings {
secret: ClientSecret,
#[serde(default)]
port: Port,
external_max_kilobits_per_second: KilobitsPerSecond,
external_port: Option<Port>,
graceful_shutdown_wait_seconds: Option<NonZeroU16>,
hostname: Option<IpAddr>,
external_ip: Option<IpAddr>,
}
#[derive(Deserialize)]
struct YamlMetricSettings {
enable_geoip: Option<bool>,
geoip_license_key: Option<ClientSecret>,
}
#[derive(Deserialize, Default)]
struct YamlExtendedOptions {
memory_quota: Option<Mebibytes>,
cache_type: Option<CacheType>,
ephemeral_disk_encryption: Option<bool>,
enable_metrics: Option<bool>,
logging_level: Option<LevelFilter>,
cache_path: Option<PathBuf>,
redis_url: Option<Url>,
}
#[derive(Parser, Clone)]
#[clap(version = crate_version!(), author = crate_authors!(), about = crate_description!())]
struct CliArgs {
/// The port to listen on.
#[clap(short, long)]
pub port: Option<Port>,
/// How large, in mebibytes, the in-memory cache should be. Note that this
/// does not include runtime memory usage.
#[clap(long)]
pub memory_quota: Option<Mebibytes>,
/// How large, in mebibytes, the on-disk cache should be. Note that actual
/// values may be larger for metadata information.
#[clap(long)]
pub disk_quota: Option<Mebibytes>,
/// Sets the location of the disk cache.
#[clap(long)]
pub cache_path: Option<PathBuf>,
/// The network speed to advertise to Mangadex@Home control server.
#[clap(long)]
pub network_speed: Option<KilobitsPerSecond>,
/// Changes verbosity. Default verbosity is INFO, while increasing counts of
/// verbose flags increases the verbosity to DEBUG and TRACE, respectively.
#[clap(short, long, parse(from_occurrences), conflicts_with = "quiet")]
pub verbose: usize,
/// Changes verbosity. Default verbosity is INFO, while increasing counts of
/// quiet flags decreases the verbosity to WARN, ERROR, and no logs,
/// respectively.
#[clap(short, long, parse(from_occurrences), conflicts_with = "verbose")]
pub quiet: usize,
/// Unstable options. Intentionally not documented.
#[clap(short = 'Z', long)]
pub unstable_options: Vec<UnstableOptions>,
/// Override the image server with the one provided. Do not set this unless
/// you know what you're doing.
#[clap(long)]
pub override_upstream: Option<Url>,
/// Enables ephemeral disk encryption. Items written to disk are first
/// encrypted with a key generated at runtime. There are implications to
/// performance, privacy, and usability with this flag enabled.
#[clap(short, long)]
pub ephemeral_disk_encryption: bool,
/// The path to the config file. Default value is `./settings.yaml`.
#[clap(short, long)]
pub config_path: Option<PathBuf>,
/// Whether to use an in-memory cache in addition to the disk cache. Default
/// value is "on_disk", other options are "lfu", "lru", and "redis".
#[clap(short = 't', long)]
pub cache_type: Option<CacheType>,
/// Whether or not to use a proxy for upstream requests. This affects all
/// requests except for the shutdown request.
#[clap(short = 'P', long)]
pub proxy: Option<Url>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum UnstableOptions {
/// Overrides the upstream URL to fetch images from. Don't use this unless
/// you know what you're dealing with.
OverrideUpstream,
/// Disables token validation. Don't use this unless you know the
/// ramifications of this command.
DisableTokenValidation,
/// Tries to run without communication to MangaDex.
OfflineMode,
/// Serves HTTP in plaintext
DisableTls,
/// Disable certificate validation. Only useful for debugging with a MITM
/// proxy
DisableCertValidation,
}
impl FromStr for UnstableOptions {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"override-upstream" => Ok(Self::OverrideUpstream),
"disable-token-validation" => Ok(Self::DisableTokenValidation),
"offline-mode" => Ok(Self::OfflineMode),
"disable-tls" => Ok(Self::DisableTls),
"disable-cert-validation" => Ok(Self::DisableCertValidation),
_ => Err(format!("Unknown unstable option '{}'", s)),
}
}
}
impl Display for UnstableOptions {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::OverrideUpstream => write!(f, "override-upstream"),
Self::DisableTokenValidation => write!(f, "disable-token-validation"),
Self::OfflineMode => write!(f, "offline-mode"),
Self::DisableTls => write!(f, "disable-tls"),
Self::DisableCertValidation => write!(f, "disable-cert-validation"),
}
}
}
#[cfg(test)]
mod sample_yaml {
use crate::config::YamlArgs;
#[test]
fn parses() {
assert!(serde_yaml::from_str::<YamlArgs>(include_str!("../settings.sample.yaml")).is_ok());
}
}
#[cfg(test)]
mod config {
use std::path::PathBuf;
use log::LevelFilter;
use tracing::level_filters::LevelFilter as TracingLevelFilter;
use crate::config::{CacheType, ClientSecret, Config, YamlExtendedOptions, YamlServerSettings};
use crate::units::{KilobitsPerSecond, Mebibytes, Port};
use super::{CliArgs, YamlArgs};
#[test]
fn cli_has_priority() {
let cli_config = CliArgs {
port: Port::new(1234),
memory_quota: Some(Mebibytes::new(10)),
disk_quota: Some(Mebibytes::new(10)),
cache_path: Some(PathBuf::from("a")),
network_speed: KilobitsPerSecond::new(10),
verbose: 1,
quiet: 0,
unstable_options: vec![],
override_upstream: None,
ephemeral_disk_encryption: true,
config_path: None,
cache_type: Some(CacheType::Lfu),
proxy: None,
};
let yaml_args = YamlArgs {
max_cache_size_in_mebibytes: Mebibytes::new(50),
server_settings: YamlServerSettings {
secret: ClientSecret(String::new()),
port: Port::new(4321).expect("to work?"),
external_max_kilobits_per_second: KilobitsPerSecond::new(50).expect("to work?"),
external_port: None,
graceful_shutdown_wait_seconds: None,
hostname: None,
external_ip: None,
},
metric_settings: None,
extended_options: Some(YamlExtendedOptions {
memory_quota: Some(Mebibytes::new(50)),
cache_type: Some(CacheType::Lru),
ephemeral_disk_encryption: Some(false),
enable_metrics: None,
logging_level: Some(LevelFilter::Error),
cache_path: Some(PathBuf::from("b")),
redis_url: None,
}),
};
let config = Config::from_cli_and_file(cli_config, yaml_args);
assert_eq!(Some(config.port), Port::new(1234));
assert_eq!(config.memory_quota, Mebibytes::new(10));
assert_eq!(config.disk_quota, Mebibytes::new(10));
assert_eq!(config.cache_path, PathBuf::from("a"));
assert_eq!(Some(config.network_speed), KilobitsPerSecond::new(10));
assert_eq!(config.log_level, TracingLevelFilter::DEBUG);
assert_eq!(config.ephemeral_disk_encryption, true);
assert_eq!(config.cache_type, CacheType::Lfu);
}
}

43
src/index.html Normal file
View file

@ -0,0 +1,43 @@
<!DOCTYPE html>
<html lang=en>
<head>
<meta charset=utf-8 />
<meta name=viewport content="initial-scale=1, minimum-scale=1, width=device-width" />
<meta name="robots" content="none" />
<link rel="preconnect" href="https://fonts.gstatic.com" />
<link href="https://fonts.googleapis.com/css2?family=Poppins&family=Spartan&display=swap" rel="stylesheet" />
<title>MangaDex@Home</title>
<style>
html {
height: 100%;
display: grid;
}
body {
margin: auto;
}
h3 {
font-family: 'Spartan', sans-serif;
}
p {
font-family: 'Poppins', sans-serif;
}
</style>
</head>
<body>
<img
src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHgAAAB4CAYAAAA5ZDbSAAANnklEQVR4nO2dfXRU5Z3H725tt/Z49nTb1XM8e1qdQCSLbcVCdytrVVg5VVdbrQLW2hURmiycxlZ3WyukohUEtAhxXhKSBhJCIAMxgWAUDC9DIAkhAUwIRhKCIS/3Ps+9N/P+kiGZ7/4hYkJmkrnz9kzi8znn+1dmnvnd3yfPvXfuvXOvIHA4HA6Hw+FwOBwOh8PhcDgcDofD4XCSG0D4O/epLRk2y7pDjqpM6ip70ueoM61mXRcnCtSGgu/YLevfc1Sm2915s+DV676IKQ3Og1lW1jVyNAKYv9JfsynXUbHU6cmZPlKqPgX+qgwMXtgP+F1wnavYybpeTpjglPlGx6HVx93b5gZGSv0sA/uWYEhuxecMXjwcYF0zJwxAD9/gOLT6uPtv/z5Kqlevgzf3dgy2lWEEgUHYGv5mYV07ZxxsR7PzPEX3Bp2xXr0Ovm33IWDtxLUMdh2F/dz2VNb1c0KgNuyY7ahY4gwl1qvXwbd1NgKO7lFyAWCgQe9nvQycEPTXbMp15/8opFivXgdf/g8RsHUFlQu/G47qVRdYLwfnGgDz9c4PXujz6lPGlOvV6zDYvi+4XABDPbWwH88uZr08nGE4GnenuXYtHBhPrFevg//A70LKBYDLTSZY6/MXs14mzhV8zdvvD/XVZ1SMqQg4+8YU7K9Kh+OM+W7Wy8URrsgdYy951Oyt/r8x5QLAQOkjwCnzjayX7UuP86OiuVrkevU6DNGz4wr2Fd0D27FN+Thd/k3Wy/il5TO592mS69vyYwCBcQV7N38fXr0OnpzpcFcsctstb5WivuofWS/zlwacMt/o2fHwoBa5Xr0O/g9fHFfuEGmG1zBl1Hs9+bNg3/9yh60x94esl3/SY9+3rF+rXK9eh8GW4tBiu45goPypccfw5NwO+/6XLno/KtKx7kNCcTqdN6mqOk9RlKdVVf2dqqqvKIryR0VRfqsoytNOp/NeAN+I9nNsR9cfiUSuV6/DUOeBUWID1gsYqHxW81juvFmwHdO/G4veJRUArrNarY9SSgtlWT4vSZJTkqShvr4+hBNJki6LothPCDkhy/J6Sumd4X62UpvzJ48pLSK5Xr0OQ2LTsCl7GZfr1sNrnBrxeF59CuzvZbr62/b8IJ49jzsAvqUoyl9kWT6nRWa4IYT4KaVn+vv7lwH4+6A10MM3uIvmDUUuQ4eA0vbFrC19JAqx18zmgtlwNm17JdFeooZSOp8Q0ipJUiDWUseY4QOU0jJFUf5leC3OD1d8HK2Iod4TGLpUA+/mH8RM7hfb5umw1Rp2sXKlCVVVswgh9kRJDSE6QAhpUVV1nq2+8InRV15oj78qI8pV8jiSjalwHNtYw9pfSBRF+a0kSS6WYoOlvz4vqlVzYpMCtfO0mbXLEciyPJMQ0sVaZMjZfP5kEogLL+6C2ejr64OiKGtZexUEQRBkWc5N5DY20rgK5zCXF05sVS98vokZcjgc/8xMrNvtvplS2s1aXNir6YOvM5cXTujZA1drVhTldSZy+/v7/0sUxQHW0rRE6vwIXmMqc4FjxVV4H/r6eq/WLMvyoYTLVVV1aTy+yyYi9j3pzCWOFaUub0S9lNLuhMqVZXk9a0lRzeL2RniMtzEXGXT2br0HfT3dI+oVRXEo1IEbLjdEbAdWMJcZLPKpsqD1KorydNzlUkpXshYTq4jdF+Haeg9zoV69Dl7DZwdN7JXLQ9ZLKT0db7kLJ8LXIC0hH1sQzUmHSOPO/ze0ljyPN/6wGD+9fy6m3T4Dj//8IbyStRJ19SeC1ipJUuDaw64xw+l0TieEDLIWEo8oDUVBT8zHK85t8zB79mxMu31GyDz48GPB/yEJORhzuQCukyRJYS0inlFr3kE410DHIvbtD+OOGTPx3wt/htbtz0PZvRhdbSfR0nIWJTtKcddP5mD2Pf+J3t7e4LWq6gMxFUwp/YC1gIRIrsuL6wmE4fHkz4LHmApP3p0g5w5rqlOSJBeAr8dErtVq/Tnrxicy8pm98OTdmRDJru0PQuo4HVGdlNLGqOUCuI4Q4mTd9ERH6myGvezX8ZNrTIW1ehXEnq7o/hnPdjZEJViW5TzWzWYZpckMV9Hc2G5/dz8N0nYs6tpo7Vk4Vu5Cf8WJtojkOp3OmybrXrOm9PZAbtwJh3l+xHvanpzpsFUuB2k9FLO6rMb9cKwww/7au0AtrtcsWFGUvcybm2QRO1ugHjfBvjcDrq0/CblD5smZDlfxA7BVvQi5cSfESxdiXott0/twrDDDscIMm6XlOU1yrVbrN0VRnJAnERKanm5IHadBPq4BOXcY5PwJiJ3NGH4W6ERDA3aXlcOUm4c1a9dj1Wurr2bDxmxsKy7B/gMforW1VdNnK3vqrwq2HzqzQZNgSmkh8+ZN0Jxvb0epeRcMplxs2JgNgykXJTtKsbdyH6oPHsIRy1FUvf8BKvdVoWRHKfTGHLz6l9XIWJaJZ5ekY3nm72Ew5qKxqWnMzxF7eqFut8CRtQu26tNrNAlOxmupJkI6OzvR3NIS1Rg9PT2oratHwZZCZOuNKNlROubsFi/1AJ19t2iZvQtZN4pnZFpazuLCheDbcUmSPFpXz3WsF2iypaHhJDbnF+ClFVl4ZvFS/Oyx+Xjk0Sfw+IKn8PiCp/DrRc8hY1kmMn//v/jjy1l4Y92bMJpyUVZegfb2jjHHVhRF2+0hCCF+1g2ZDDl8xIKVf34VGcsysSJrFbYVl8BytAZtbZ+EPcbFTz9FY2MTzre3B/07IcSv6WI8SulDrBszkdPb24uKPZXI1htRsacSFz/9NG6fJYoiVFV9VtPslWV5B+smTdR0dXWhoeEkLl68GNF7j9YcQ3HJTmx6x4D1b21AzuZ8lL1bjppjx9HVNfqQJqV0pSa5giAIyXzB+mRKe3sHzLvK8NKKLDy+4Cksei4da9a+ifKKvThz5qOQpwn7+j67RppSOl+z3CuC+fY3Tunu7kZZeQUylmXisflP4o11b6L64CF09/RoGkdRlGcikut2u29m3YTJmO7ubmzZWoQnFv4Kr69Zh9q6+ojHIoSIEckVBEFQFOUZ1s2YbKncV4Ul6ctRsKUw6HZUa1RVXRqx4MlyKWwypL29A2/9dSNKdpSOuT3VEkVR6iOWKwiCQCktY92YyZDmlhYUbCmMaG86WERR9KqqmhOV3CuCj8agmCFKaZOiKH+ilD5ECJlis9l0X5ZUV1dPf/vtt++K1XgAYnc/LVmWT0YjlxBSzfQnjpyxkSTpbCRiJUkKyLL8Iuv6OeNAKf0kEsGqqr46fBzkzvyqN2+aDsbvpvBEkeKpsb3dYSSCCSGtw8ewGb/7Tz6Dri0Rl5xO9vgMOvflHN08poKvvfGYRz91AevGTKoYdbF7zpJWwYQQ9doxkDt1ik+v8zBvzCSJ36hbzkywLMuHg43jy0lJ9Rp0z/mNU37DE3m8pmlzYyY3EsGU0sKYFsCJLxHM4DzWNXM0EMEMDvqoNq8p7Vavacoi1qu4iR6Padp/MBUsSZI8Sm7eNJ1Pr3Ox3jmZLPGbbk1nJrivrw9Wq3XG8DE8hlsXsm7KpIpBVxozwYSQ5ggOU4440IHCtG/7DLoLzBszCeIz6Lw+4y0PxkwwpdQSyaFKSunLIyRXTf0HZN92h9s4ZSbPlJl+063pYQoNDH8fCtO+HTO5giAIsizvjESwJEmB/v7+2H0hn2R4DbfMCVdwXAuxWq2LIxE87MjWAQDfimuRE5CkEQzghmgEX5E8SAg5QQj5g9PpvDceJ9XdbvfNcW1EjEkawYIgCIQQMVrJiYokSUOEENuVJ66sisVjduJBUglWFGUta3HRCKeUntLyaJ1EkFSCAXxNkqQJde/nYCGE1Fmt1qR4GGRSCRYEQVBV9U3WgmIRURR9Ef/EI4aMFpxC/fqU//HrU/7qM6T4Ey5YEASBECKxFhSx2EsXoNbmwGGeD8/mGXAc28j0rNfngn36lMs+vS7XkXvb1QsTfTkpqQNGnTnhgh0OR5okSRPrFko9l6BaNsCz+Y6Rqz9jKuzHs7X9SDqGeA23zPEZUqoH8v/1e6Feczn7tvt976S0JLIuwWq1PjpRbh0sdTTBVfJwyO2bJ2c6rPW5WQlt4BWQO/OrsXxdTJEk6ZfJ/kwG2lYDT/6scXdiPIV3B2zN21MS3sRkx263/zhZ71cpdZyCJ39m2Afwne89P+oUJ0cQBABfl2V5X1KtsnsuwVX8gLazNMZUOE4ULGHdz6SFUppKKbUkwz0s+4+sj+g0nKMy3c66j0kPgOsURcmUJOmIJElUFEWfJElDkiQFEhGx6zw8uZE91tVjSoNcv3Um6x5yxsD1Sc0dNsvag+5tcwKRSLZZ1sb+uQec2OPsKLrJWf6MU/Nqel+GjXXtnDABzNe7Sx+9rGk1XXh34o4acaLHWmt6Tdve9FQA5q+xrpujAXfBXZpW0/aG4h+xrpmjAVfZkz5NO1r1eb9hXTNHA449Sx1aBLuatv2Kdc0cDdiq/9wc9k7W5hkAzNofZMFhh/OU+d5wD344q16I/I5xHHbYLGuawvmK5Gjcnca6Vk6EOC3rmkM+7qb4wSFrQ9EvWNfIiRJrQ9EvnO+/2OPetWDAXfzQkKt8kdtuWXcA9PANrGvjcDgcDofD4XA4HA6Hw+FwOBwOZzLw/96dp0jvNLV7AAAAAElFTkSuQmCC"
alt="MangaDex logo" />
<h3>Content Delivery Node</h3>
<p>
This <a href="https://mangadex.org/md_at_home" target="_blank">MangaDex@Home</a>
node is part of the <a href="https://mangadex.network" target="_blank">MangaDex Network</a>
content delivery and caching network.</p>
<p>Please report DMCA inquiries only to our <a href="mailto:support@mangadex.com">dedicated agent</a>.</p>
</body>
</html>

View file

@ -1,135 +1,53 @@
#![warn(clippy::pedantic, clippy::nursery)] #![warn(clippy::pedantic, clippy::nursery)]
#![allow(clippy::future_not_send)] // We're end users, so this is ok // We're end users, so these is ok
#![allow(clippy::module_name_repetitions)]
use std::env::{self, VarError}; use std::env::VarError;
use std::error::Error;
use std::fmt::Display;
use std::net::SocketAddr;
use std::num::ParseIntError;
use std::str::FromStr;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use std::{num::ParseIntError, sync::Arc};
use crate::ping::Tls; use actix_web::dev::Service;
use actix_web::rt::{spawn, time}; use actix_web::rt::{spawn, time, System};
use actix_web::web::Data; use actix_web::web::{self, Data};
use actix_web::{App, HttpServer}; use actix_web::{App, HttpResponse, HttpServer};
use awc::{error::SendRequestError, Client}; use cache::{Cache, DiskCache};
use log::{debug, error, info, warn}; use chacha20::Key;
use lru::LruCache; use config::Config;
use maxminddb::geoip2;
use parking_lot::RwLock; use parking_lot::RwLock;
use ping::{Request, Response}; use redis::Client as RedisClient;
use rustls::sign::{CertifiedKey, RSASigningKey};
use rustls::PrivateKey;
use rustls::{Certificate, NoClientAuth, ResolvesServerCert, ServerConfig};
use simple_logger::SimpleLogger;
use sodiumoxide::crypto::box_::PrecomputedKey;
use thiserror::Error;
use url::Url;
use rustls::server::NoClientAuth;
use rustls::ServerConfig;
use sodiumoxide::crypto::stream::xchacha20::gen_key;
use state::{RwLockServerState, ServerState};
use stop::send_stop;
use thiserror::Error;
use tracing::{debug, error, info, warn};
use crate::cache::mem::{Lfu, Lru};
use crate::cache::{MemoryCache, ENCRYPTION_KEY};
use crate::config::{CacheType, UnstableOptions, OFFLINE_MODE};
use crate::metrics::{record_country_visit, GEOIP_DATABASE};
use crate::state::DynamicServerCert;
mod cache;
mod client;
mod config;
mod metrics;
mod ping; mod ping;
mod routes; mod routes;
mod state;
mod stop; mod stop;
mod units;
const CONTROL_CENTER_PING_URL: &str = "https://api.mangadex.network/ping"; const CLIENT_API_VERSION: usize = 31;
#[macro_export]
macro_rules! client_api_version {
() => {
"30"
};
}
struct ServerState {
precomputed_key: PrecomputedKey,
image_server: Url,
tls_config: Tls,
disabled_tokens: bool,
url: String,
cache: LruCache<(String, String, bool), CachedImage>,
}
struct CachedImage {
data: Vec<u8>,
content_type: Option<Vec<u8>>,
content_length: Option<Vec<u8>>,
last_modified: Option<Vec<u8>>,
}
impl ServerState {
async fn init(config: &Config) -> Result<Self, ()> {
let resp = Client::new()
.post(CONTROL_CENTER_PING_URL)
.send_json(&Request::from(config))
.await;
match resp {
Ok(mut resp) => match resp.json::<Response>().await {
Ok(resp) => {
let key = resp
.token_key
.and_then(|key| {
if let Some(key) = PrecomputedKey::from_slice(key.as_bytes()) {
Some(key)
} else {
error!("Failed to parse token key: got {}", key);
None
}
})
.unwrap();
if resp.compromised {
warn!("Got compromised response from control center!");
}
if resp.paused {
debug!("Got paused response from control center.");
}
info!("This client's URL has been set to {}", resp.url);
if resp.disabled_tokens {
info!("This client will not validated tokens");
}
Ok(Self {
precomputed_key: key,
image_server: resp.image_server,
tls_config: resp.tls.unwrap(),
disabled_tokens: resp.disabled_tokens,
url: resp.url,
cache: LruCache::new(1000),
})
}
Err(e) => {
warn!("Got malformed response: {}", e);
Err(())
}
},
Err(e) => match e {
SendRequestError::Timeout => {
error!("Response timed out to control server. Is MangaDex down?");
Err(())
}
e => {
warn!("Failed to send request: {}", e);
Err(())
}
},
}
}
}
pub struct RwLockServerState(RwLock<ServerState>);
impl ResolvesServerCert for RwLockServerState {
fn resolve(&self, _: rustls::ClientHello) -> Option<CertifiedKey> {
let read_guard = self.0.read();
Some(CertifiedKey {
cert: vec![Certificate(read_guard.tls_config.certificate.clone())],
key: Arc::new(Box::new(
RSASigningKey::new(&PrivateKey(read_guard.tls_config.private_key.clone())).unwrap(),
)),
ocsp: None,
sct_list: None,
})
}
}
#[derive(Error, Debug)] #[derive(Error, Debug)]
enum ServerError { enum ServerError {
@ -140,60 +58,292 @@ enum ServerError {
} }
#[actix_web::main] #[actix_web::main]
async fn main() -> Result<(), std::io::Error> { async fn main() -> Result<(), Box<dyn Error>> {
sodiumoxide::init().expect("Failed to initialize crypto");
// It's ok to fail early here, it would imply we have a invalid config.
dotenv::dotenv().ok(); dotenv::dotenv().ok();
SimpleLogger::new().init().unwrap();
let config = Config::new().unwrap(); //
let port = config.port; // Config loading
//
let server = ServerState::init(&config).await.unwrap(); let config = match config::load_config() {
Ok(c) => c,
Err(e) => {
eprintln!("{}", e);
return Err(Box::new(e) as Box<_>);
}
};
let memory_quota = config.memory_quota;
let disk_quota = config.disk_quota;
let cache_type = config.cache_type;
let cache_path = config.cache_path.clone();
let disable_tls = config
.unstable_options
.contains(&UnstableOptions::DisableTls);
let bind_address = config.bind_address;
let redis_url = config.redis_url.clone();
//
// Logging and warnings
//
tracing_subscriber::fmt()
.with_max_level(config.log_level)
.init();
if let Err(e) = print_preamble_and_warnings(&config) {
error!("{}", e);
return Err(e);
}
debug!("{:?}", &config);
let client_secret = config.client_secret.clone();
let client_secret_1 = config.client_secret.clone();
if config.ephemeral_disk_encryption {
info!("Running with at-rest encryption!");
ENCRYPTION_KEY
.set(*Key::from_slice(gen_key().as_ref()))
.unwrap();
}
if config.enable_metrics {
metrics::init();
}
if let Some(key) = config.geoip_license_key.clone() {
if let Err(e) = metrics::load_geo_ip_data(key).await {
error!("Failed to initialize geo ip db: {}", e);
}
}
// HTTP Server init
// Try bind to provided port first
let port_reservation = std::net::TcpListener::bind(bind_address);
if let Err(e) = port_reservation {
error!("Failed to bind to port!");
return Err(e.into());
};
let server = if OFFLINE_MODE.load(Ordering::Acquire) {
ServerState::init_offline()
} else {
ServerState::init(&client_secret, &config).await?
};
let data_0 = Arc::new(RwLockServerState(RwLock::new(server))); let data_0 = Arc::new(RwLockServerState(RwLock::new(server)));
let data_1 = Arc::clone(&data_0); let data_1 = Arc::clone(&data_0);
let data_2 = Arc::clone(&data_0);
spawn(async move { //
let mut interval = time::interval(Duration::from_secs(90)); // At this point, the server is ready to start, and starts the necessary
let mut data = Arc::clone(&data_0); // threads.
loop { //
interval.tick().await;
ping::update_server_state(&config, &mut data).await; // Set ctrl+c to send a stop message
let running = Arc::new(AtomicBool::new(true));
let running_1 = running.clone();
let system = System::current();
ctrlc::set_handler(move || {
let system = &system;
let client_secret = client_secret.clone();
let running_2 = Arc::clone(&running_1);
if !OFFLINE_MODE.load(Ordering::Acquire) {
System::new().block_on(async move {
if running_2.load(Ordering::SeqCst) {
send_stop(&client_secret).await;
} else {
warn!("Got second Ctrl-C, forcefully exiting");
system.stop();
}
});
} }
}); running_1.store(false, Ordering::SeqCst);
let mut tls_config = ServerConfig::new(NoClientAuth::new());
tls_config.cert_resolver = data_2;
HttpServer::new(move || {
App::new()
.service(routes::token_data)
.app_data(Data::from(Arc::clone(&data_1)))
}) })
.shutdown_timeout(60) .expect("Error setting Ctrl-C handler");
.bind_rustls(format!("0.0.0.0:{}", port), tls_config)?
.run() // Spawn ping task
.await if !OFFLINE_MODE.load(Ordering::Acquire) {
spawn(async move {
let mut interval = time::interval(Duration::from_secs(90));
let mut data = Arc::clone(&data_0);
loop {
interval.tick().await;
debug!("Sending ping!");
ping::update_server_state(&client_secret_1, &config, &mut data).await;
}
});
}
let memory_max_size = memory_quota.into();
let cache = DiskCache::new(disk_quota.into(), cache_path.clone()).await;
let cache: Arc<dyn Cache> = match cache_type {
CacheType::OnDisk => cache,
CacheType::Lru => MemoryCache::<Lfu, _>::new(cache, memory_max_size),
CacheType::Lfu => MemoryCache::<Lru, _>::new(cache, memory_max_size),
CacheType::Redis => {
let url = redis_url.unwrap_or_else(|| {
url::Url::parse("redis://127.0.0.1/").expect("default redis url to be parsable")
});
info!("Trying to connect to redis instance at {}", url);
let mem_cache = RedisClient::open(url)?;
Arc::new(MemoryCache::new_with_cache(cache, mem_cache))
}
};
let cache_0 = Arc::clone(&cache);
// Start HTTPS server
let server = HttpServer::new(move || {
App::new()
.wrap_fn(|req, srv| {
if let Some(reader) = GEOIP_DATABASE.get() {
let maybe_country = req
.connection_info()
.realip_remote_addr()
.map(SocketAddr::from_str)
.and_then(Result::ok)
.as_ref()
.map(SocketAddr::ip)
.map(|ip| reader.lookup::<geoip2::Country>(ip))
.and_then(Result::ok);
record_country_visit(maybe_country);
}
srv.call(req)
})
.service(routes::index)
.service(routes::token_data)
.service(routes::token_data_saver)
.service(routes::metrics)
.route(
"/data/{tail:.*}",
web::get().to(HttpResponse::UnavailableForLegalReasons),
)
.route(
"/data-saver/{tail:.*}",
web::get().to(HttpResponse::UnavailableForLegalReasons),
)
.route("{tail:.*}", web::get().to(routes::default))
.app_data(Data::from(Arc::clone(&data_1)))
.app_data(Data::from(Arc::clone(&cache_0)))
})
.shutdown_timeout(60);
// drop port reservation, might have a TOCTOU but it's not a big deal; this
// is just a best effort.
std::mem::drop(port_reservation);
if disable_tls {
server.bind(bind_address)?.run().await?;
} else {
// Rustls only supports TLS 1.2 and 1.3.
let tls_config = ServerConfig::builder()
.with_safe_defaults()
.with_client_cert_verifier(NoClientAuth::new())
.with_cert_resolver(Arc::new(DynamicServerCert));
server.bind_rustls(bind_address, tls_config)?.run().await?;
}
// Waiting for us to finish sending stop message
while running.load(Ordering::SeqCst) {
tokio::time::sleep(Duration::from_millis(250)).await;
}
Ok(())
} }
pub struct Config { #[derive(Debug)]
secret: String, enum InvalidCombination {
port: u16, MissingUnstableOption(&'static str, UnstableOptions),
disk_quota: usize,
network_speed: usize,
} }
impl Config { #[cfg(not(tarpaulin_include))]
fn new() -> Result<Self, ServerError> { impl Display for InvalidCombination {
let secret = env::var("CLIENT_SECRET")?; fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let port = env::var("PORT")?.parse::<u16>()?; match self {
let disk_quota = env::var("MAX_STORAGE_BYTES")?.parse::<usize>()?; InvalidCombination::MissingUnstableOption(opt, arg) => {
let network_speed = env::var("MAX_NETWORK_SPEED")?.parse::<usize>()?; write!(
f,
Ok(Self { "The option '{}' requires the unstable option '-Z {}'",
secret, opt, arg
port, )
disk_quota, }
network_speed, }
}) }
}
impl Error for InvalidCombination {}
#[cfg(not(tarpaulin_include))]
#[allow(clippy::cognitive_complexity)]
fn print_preamble_and_warnings(args: &Config) -> Result<(), Box<dyn Error>> {
let build_string = option_env!("VERGEN_GIT_SHA_SHORT")
.map(|git_sha| format!(" ({})", git_sha))
.unwrap_or_default();
println!(
concat!(
env!("CARGO_PKG_NAME"),
" ",
env!("CARGO_PKG_VERSION"),
"{} Copyright (C) 2021 ",
env!("CARGO_PKG_AUTHORS"),
"\n\n",
env!("CARGO_PKG_NAME"),
" is free software: you can redistribute it and/or modify\n\
it under the terms of the GNU General Public License as published by\n\
the Free Software Foundation, either version 3 of the License, or\n\
(at your option) any later version.\n\n",
env!("CARGO_PKG_NAME"),
" is distributed in the hope that it will be useful,\n\
but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
GNU General Public License for more details.\n\n\
You should have received a copy of the GNU General Public License\n\
along with ",
env!("CARGO_PKG_NAME"),
". If not, see <https://www.gnu.org/licenses/>.\n"
),
build_string
);
if !args.unstable_options.is_empty() {
warn!("Unstable options are enabled. These options should not be used in production!");
}
if args
.unstable_options
.contains(&UnstableOptions::OfflineMode)
{
warn!("Running in offline mode. No communication to MangaDex will be made!");
}
if args.unstable_options.contains(&UnstableOptions::DisableTls) {
warn!("Serving insecure traffic! You better be running this for development only.");
}
if args
.unstable_options
.contains(&UnstableOptions::DisableCertValidation)
{
error!("Cert validation disabled! You REALLY only better be debugging.");
}
if args.override_upstream.is_some()
&& !args
.unstable_options
.contains(&UnstableOptions::OverrideUpstream)
{
Err(Box::new(InvalidCombination::MissingUnstableOption(
"override-upstream",
UnstableOptions::OverrideUpstream,
)))
} else {
Ok(())
} }
} }

185
src/metrics.rs Normal file
View file

@ -0,0 +1,185 @@
#![cfg(not(tarpaulin_include))]
use std::fs::metadata;
use std::hint::unreachable_unchecked;
use std::time::SystemTime;
use chrono::Duration;
use flate2::read::GzDecoder;
use maxminddb::geoip2::Country;
use once_cell::sync::{Lazy, OnceCell};
use prometheus::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
use tar::Archive;
use thiserror::Error;
use tracing::{debug, field::debug, info, warn};
use crate::client::HTTP_CLIENT;
use crate::config::ClientSecret;
pub static GEOIP_DATABASE: OnceCell<maxminddb::Reader<Vec<u8>>> = OnceCell::new();
static COUNTRY_VISIT_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"country_visits_total",
"The number of visits from a country",
&["country"]
)
.unwrap()
});
macro_rules! init_counters {
($(($counter:ident, $ty:ty, $name:literal, $desc:literal),)*) => {
$(
pub static $counter: Lazy<$ty> = Lazy::new(|| {
register_int_counter!($name, $desc).unwrap()
});
)*
#[allow(clippy::shadow_unrelated)]
pub fn init() {
// These need to be called at least once, otherwise the macro never
// called and thus the metrics don't get logged
$(let _a = $counter.get();)*
init_other();
}
};
}
init_counters!(
(
CACHE_HIT_COUNTER,
IntCounter,
"cache_hit_total",
"The number of cache hits."
),
(
CACHE_MISS_COUNTER,
IntCounter,
"cache_miss_total",
"The number of cache misses."
),
(
REQUESTS_TOTAL_COUNTER,
IntCounter,
"requests_total",
"The total number of requests served."
),
(
REQUESTS_DATA_COUNTER,
IntCounter,
"requests_data_total",
"The number of requests served from the /data endpoint."
),
(
REQUESTS_DATA_SAVER_COUNTER,
IntCounter,
"requests_data_saver_total",
"The number of requests served from the /data-saver endpoint."
),
(
REQUESTS_OTHER_COUNTER,
IntCounter,
"requests_other_total",
"The total number of request not served by primary endpoints."
),
);
// initialization for any other counters that aren't simple int counters
fn init_other() {
let _a = COUNTRY_VISIT_COUNTER.local();
}
#[derive(Error, Debug)]
pub enum DbLoadError {
#[error(transparent)]
Reqwest(#[from] reqwest::Error),
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
MaxMindDb(#[from] maxminddb::MaxMindDBError),
}
pub async fn load_geo_ip_data(license_key: ClientSecret) -> Result<(), DbLoadError> {
const DB_PATH: &str = "./GeoLite2-Country.mmdb";
// Check date of db
let db_date_created = metadata(DB_PATH)
.ok()
.and_then(|metadata| {
if let Ok(time) = metadata.created() {
Some(time)
} else {
debug("fs didn't report birth time, fall back to last modified instead");
metadata.modified().ok()
}
})
.unwrap_or(SystemTime::UNIX_EPOCH);
let duration = if let Ok(time) = SystemTime::now().duration_since(db_date_created) {
Duration::from_std(time).expect("duration to fit")
} else {
warn!("Clock may have gone backwards?");
Duration::max_value()
};
// DB expired, fetch a new one
if duration > Duration::weeks(1) {
fetch_db(license_key).await?;
} else {
info!("Geo IP database isn't old enough, not updating.");
}
// Result literally cannot panic here, buuuuuut if it does we'll panic
GEOIP_DATABASE
.set(maxminddb::Reader::open_readfile(DB_PATH)?)
.map_err(|_| ()) // Need to map err here or can't expect
.expect("to set the geo ip db singleton");
Ok(())
}
async fn fetch_db(license_key: ClientSecret) -> Result<(), DbLoadError> {
let resp = HTTP_CLIENT
.inner()
.get(format!("https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-Country&license_key={}&suffix=tar.gz", license_key.as_str()))
.send()
.await?
.bytes()
.await?;
let mut decoder = Archive::new(GzDecoder::new(resp.as_ref()));
let mut decoded_paths: Vec<_> = decoder
.entries()?
.filter_map(Result::ok)
.filter_map(|mut entry| {
let path = entry.path().ok()?.to_path_buf();
let file_name = path.file_name()?;
if file_name != "GeoLite2-Country.mmdb" {
return None;
}
entry.unpack(file_name).ok()?;
Some(path)
})
.collect();
assert_eq!(decoded_paths.len(), 1);
let path = match decoded_paths.pop() {
Some(path) => path,
None => unsafe { unreachable_unchecked() },
};
debug!("Extracted {}", path.as_path().to_string_lossy());
Ok(())
}
pub fn record_country_visit(country: Option<Country>) {
let iso_code = country
.and_then(|country| country.country.and_then(|c| c.iso_code))
.unwrap_or("unknown");
COUNTRY_VISIT_COUNTER
.get_metric_with_label_values(&[iso_code])
.unwrap()
.inc();
}

View file

@ -1,111 +1,263 @@
use std::sync::Arc; use std::net::{IpAddr, SocketAddr};
use std::sync::atomic::Ordering;
use std::{io::BufReader, sync::Arc};
use awc::{error::SendRequestError, Client}; use rustls::sign::{CertifiedKey, RsaSigningKey, SigningKey};
use log::{debug, error, info, warn}; use rustls::{Certificate, PrivateKey};
use rustls_pemfile::{certs, rsa_private_keys};
use serde::de::{MapAccess, Visitor};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_repr::Deserialize_repr;
use sodiumoxide::crypto::box_::PrecomputedKey; use sodiumoxide::crypto::box_::PrecomputedKey;
use tracing::{debug, error, info, warn};
use url::Url; use url::Url;
use crate::{client_api_version, Config, RwLockServerState, CONTROL_CENTER_PING_URL}; use crate::client::HTTP_CLIENT;
use crate::config::{ClientSecret, Config};
use crate::state::{
RwLockServerState, CERTIFIED_KEY, PREVIOUSLY_COMPROMISED, PREVIOUSLY_PAUSED,
TLS_PREVIOUSLY_CREATED,
};
use crate::units::{Bytes, BytesPerSecond, Port};
use crate::CLIENT_API_VERSION;
#[derive(Serialize)] pub const CONTROL_CENTER_PING_URL: &str = "https://api.mangadex.network/ping";
#[derive(Serialize, Debug)]
pub struct Request<'a> { pub struct Request<'a> {
secret: &'a str, secret: &'a ClientSecret,
port: u16, port: Port,
disk_space: usize, disk_space: Bytes,
network_speed: usize, network_speed: BytesPerSecond,
build_version: usize, build_version: usize,
tls_created_at: Option<String>, tls_created_at: Option<String>,
ip_address: Option<IpAddr>,
} }
impl<'a> Request<'a> { impl<'a> Request<'a> {
fn from_config_and_state(config: &'a Config, state: &'a Arc<RwLockServerState>) -> Self { fn from_config_and_state(secret: &'a ClientSecret, config: &Config) -> Self {
Self { Self {
secret: &config.secret, secret,
port: config.port, port: config
disk_space: config.disk_quota, .external_address
network_speed: config.network_speed, .and_then(|v| Port::new(v.port()))
build_version: client_api_version!().parse().unwrap(), .unwrap_or(config.port),
tls_created_at: Some(state.0.read().tls_config.created_at.clone()), disk_space: config.disk_quota.into(),
network_speed: config.network_speed.into(),
build_version: CLIENT_API_VERSION,
tls_created_at: TLS_PREVIOUSLY_CREATED
.get()
.map(|v| v.load().as_ref().clone()),
ip_address: config.external_address.as_ref().map(SocketAddr::ip),
} }
} }
} }
impl<'a> From<&'a Config> for Request<'a> { impl<'a> From<(&'a ClientSecret, &Config)> for Request<'a> {
fn from(config: &'a Config) -> Self { fn from((secret, config): (&'a ClientSecret, &Config)) -> Self {
Self { Self {
secret: &config.secret, secret,
port: config.port, port: config
disk_space: config.disk_quota, .external_address
network_speed: config.network_speed, .and_then(|v| Port::new(v.port()))
build_version: client_api_version!().parse().unwrap(), .unwrap_or(config.port),
disk_space: config.disk_quota.into(),
network_speed: config.network_speed.into(),
build_version: CLIENT_API_VERSION,
tls_created_at: None, tls_created_at: None,
ip_address: config.external_address.as_ref().map(SocketAddr::ip),
} }
} }
} }
#[derive(Deserialize)] #[derive(Deserialize, Debug)]
pub struct Response { #[serde(untagged)]
pub enum Response {
Ok(Box<OkResponse>),
Error(ErrorResponse),
}
#[derive(Deserialize, Debug)]
pub struct OkResponse {
pub image_server: Url, pub image_server: Url,
pub latest_build: usize, pub latest_build: usize,
pub url: String, pub url: Url,
pub token_key: Option<String>, pub token_key: Option<String>,
pub compromised: bool, pub compromised: bool,
pub paused: bool, pub paused: bool,
pub disabled_tokens: bool,
pub tls: Option<Tls>, pub tls: Option<Tls>,
} }
#[derive(Deserialize)] #[derive(Deserialize, Debug)]
pub struct Tls { pub struct ErrorResponse {
pub created_at: String, pub error: String,
pub private_key: Vec<u8>, pub status: ErrorCode,
pub certificate: Vec<u8>,
} }
pub async fn update_server_state(req: &Config, data: &mut Arc<RwLockServerState>) { #[derive(Deserialize_repr, Debug, Copy, Clone)]
let req = Request::from_config_and_state(req, data); #[repr(u16)]
let resp = Client::new() pub enum ErrorCode {
.post(CONTROL_CENTER_PING_URL) MalformedJson = 400,
.send_json(&req) InvalidSecret = 401,
.await; InvalidContentType = 415,
match resp { }
Ok(mut resp) => match resp.json::<Response>().await {
Ok(resp) => {
let mut write_guard = data.0.write();
write_guard.image_server = resp.image_server; pub struct Tls {
pub created_at: String,
pub priv_key: Arc<RsaSigningKey>,
pub certs: Vec<Certificate>,
}
if let Some(key) = resp.token_key { impl<'de> Deserialize<'de> for Tls {
match PrecomputedKey::from_slice(key.as_bytes()) { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
Some(key) => write_guard.precomputed_key = key, where
None => error!("Failed to parse token key: got {}", key), D: serde::Deserializer<'de>,
{
struct TlsVisitor;
impl<'de> Visitor<'de> for TlsVisitor {
type Value = Tls;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a tls struct")
}
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
where
A: MapAccess<'de>,
{
let mut created_at = None;
let mut priv_key = None;
let mut certificates = None;
while let Some((key, value)) = map.next_entry::<&str, String>()? {
match key {
"created_at" => created_at = Some(value.to_string()),
"private_key" => {
priv_key = rsa_private_keys(&mut BufReader::new(value.as_bytes()))
.ok()
.and_then(|mut v| {
v.pop()
.and_then(|key| RsaSigningKey::new(&PrivateKey(key)).ok())
});
}
"certificate" => {
certificates = certs(&mut BufReader::new(value.as_bytes())).ok();
}
_ => (), // Ignore extra fields
} }
} }
write_guard.disabled_tokens = resp.disabled_tokens; match (created_at, priv_key, certificates) {
(Some(created_at), Some(priv_key), Some(certificates)) => Ok(Tls {
created_at,
priv_key: Arc::new(priv_key),
certs: certificates.into_iter().map(Certificate).collect(),
}),
_ => Err(serde::de::Error::custom("Could not deserialize tls info")),
}
}
}
deserializer.deserialize_map(TlsVisitor)
}
}
#[cfg(not(tarpaulin_include))]
impl std::fmt::Debug for Tls {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Tls")
.field("created_at", &self.created_at)
.finish()
}
}
pub async fn update_server_state(
secret: &ClientSecret,
cli: &Config,
data: &mut Arc<RwLockServerState>,
) {
let req = Request::from_config_and_state(secret, cli);
debug!("Sending ping request: {:?}", req);
let resp = HTTP_CLIENT
.inner()
.post(CONTROL_CENTER_PING_URL)
.json(&req)
.send()
.await;
match resp {
Ok(resp) => match resp.json::<Response>().await {
Ok(Response::Ok(resp)) => {
debug!("got write guard for server state");
let mut write_guard = data.0.write();
let image_server_changed = write_guard.image_server != resp.image_server;
if !write_guard.url_overridden && image_server_changed {
write_guard.image_server = resp.image_server;
} else if image_server_changed {
warn!("Ignoring new upstream url!");
}
if let Some(key) = resp.token_key {
base64::decode(&key)
.ok()
.and_then(|k| PrecomputedKey::from_slice(&k))
.map_or_else(
|| error!("Failed to parse token key: got {}", key),
|key| write_guard.precomputed_key = key,
);
}
if let Some(tls) = resp.tls { if let Some(tls) = resp.tls {
write_guard.tls_config = tls; TLS_PREVIOUSLY_CREATED
.get()
.unwrap()
.swap(Arc::new(tls.created_at));
CERTIFIED_KEY.store(Some(Arc::new(CertifiedKey {
cert: tls.certs.clone(),
key: Arc::clone(&tls.priv_key) as Arc<dyn SigningKey>,
ocsp: None,
sct_list: None,
})));
} }
if resp.compromised { let previously_compromised = PREVIOUSLY_COMPROMISED.load(Ordering::Acquire);
warn!("Got compromised response from control center!"); if resp.compromised != previously_compromised {
PREVIOUSLY_COMPROMISED.store(resp.compromised, Ordering::Release);
if resp.compromised {
error!("Got compromised response from control center!");
} else if previously_compromised {
info!("No longer compromised!");
}
} }
if resp.paused { let previously_paused = PREVIOUSLY_PAUSED.load(Ordering::Acquire);
debug!("Got paused response from control center."); if resp.paused != previously_paused {
PREVIOUSLY_PAUSED.store(resp.paused, Ordering::Release);
if resp.paused {
warn!("Control center has paused this node.");
} else {
info!("Control center is no longer pausing this node.");
}
} }
if resp.url != write_guard.url { if resp.url != write_guard.url {
info!("This client's URL has been updated to {}", resp.url); info!("This client's URL has been updated to {}", resp.url);
} }
debug!("dropping write guard for server state");
}
Ok(Response::Error(resp)) => {
error!(
"Got an {} error from upstream: {}",
resp.status as u16, resp.error
);
} }
Err(e) => warn!("Got malformed response: {}", e), Err(e) => warn!("Got malformed response: {}", e),
}, },
Err(e) => match e { Err(e) => match e {
SendRequestError::Timeout => { e if e.is_timeout() => {
error!("Response timed out to control server. Is MangaDex down?") error!("Response timed out to control server. Is MangaDex down?");
} }
e => warn!("Failed to send request: {}", e), e => warn!("Failed to send request: {}", e),
}, },

View file

@ -1,104 +1,147 @@
use std::convert::Infallible; use std::hint::unreachable_unchecked;
use std::sync::atomic::Ordering;
use actix_web::dev::HttpResponseBuilder; use actix_web::body::BoxBody;
use actix_web::http::header::{ use actix_web::error::ErrorNotFound;
ACCESS_CONTROL_ALLOW_ORIGIN, ACCESS_CONTROL_EXPOSE_HEADERS, CACHE_CONTROL, CONTENT_LENGTH, use actix_web::http::header::{HeaderValue, CONTENT_LENGTH, CONTENT_TYPE, LAST_MODIFIED};
CONTENT_TYPE, LAST_MODIFIED, X_CONTENT_TYPE_OPTIONS, use actix_web::web::{Data, Path};
}; use actix_web::HttpResponseBuilder;
use actix_web::web::Path; use actix_web::{get, HttpRequest, HttpResponse, Responder};
use actix_web::{get, web::Data, HttpRequest, HttpResponse, Responder};
use awc::Client;
use base64::DecodeError; use base64::DecodeError;
use bytes::Bytes; use bytes::Bytes;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use futures::stream; use futures::Stream;
use log::{error, warn}; use prometheus::{Encoder, TextEncoder};
use serde::Deserialize; use serde::Deserialize;
use sodiumoxide::crypto::box_::{open_precomputed, Nonce, PrecomputedKey, NONCEBYTES}; use sodiumoxide::crypto::box_::{open_precomputed, Nonce, PrecomputedKey, NONCEBYTES};
use thiserror::Error; use thiserror::Error;
use tracing::{debug, error, info, trace};
use crate::{client_api_version, CachedImage, RwLockServerState}; use crate::cache::{Cache, CacheKey, ImageMetadata, UpstreamError};
use crate::client::{FetchResult, DEFAULT_HEADERS, HTTP_CLIENT};
use crate::config::{OFFLINE_MODE, VALIDATE_TOKENS};
use crate::metrics::{
CACHE_HIT_COUNTER, CACHE_MISS_COUNTER, REQUESTS_DATA_COUNTER, REQUESTS_DATA_SAVER_COUNTER,
REQUESTS_OTHER_COUNTER, REQUESTS_TOTAL_COUNTER,
};
use crate::state::RwLockServerState;
const BASE64_CONFIG: base64::Config = base64::Config::new(base64::CharacterSet::UrlSafe, false); const BASE64_CONFIG: base64::Config = base64::Config::new(base64::CharacterSet::UrlSafe, false);
const SERVER_ID_STRING: &str = concat!( pub enum ServerResponse {
env!("CARGO_CRATE_NAME"),
" ",
env!("CARGO_PKG_VERSION"),
" (",
client_api_version!(),
")",
);
enum ServerResponse {
TokenValidationError(TokenValidationError), TokenValidationError(TokenValidationError),
HttpResponse(HttpResponse), HttpResponse(HttpResponse),
} }
impl Responder for ServerResponse { impl Responder for ServerResponse {
type Body = BoxBody;
#[inline]
fn respond_to(self, req: &HttpRequest) -> HttpResponse { fn respond_to(self, req: &HttpRequest) -> HttpResponse {
match self { match self {
Self::TokenValidationError(e) => e.respond_to(req), Self::TokenValidationError(e) => e.respond_to(req),
Self::HttpResponse(resp) => resp.respond_to(req), Self::HttpResponse(resp) => {
REQUESTS_TOTAL_COUNTER.inc();
resp.respond_to(req)
}
} }
} }
} }
#[allow(clippy::unused_async)]
#[get("/")]
async fn index() -> impl Responder {
HttpResponse::Ok().body(include_str!("index.html"))
}
#[get("/{token}/data/{chapter_hash}/{file_name}")] #[get("/{token}/data/{chapter_hash}/{file_name}")]
async fn token_data( async fn token_data(
state: Data<RwLockServerState>, state: Data<RwLockServerState>,
cache: Data<dyn Cache>,
path: Path<(String, String, String)>, path: Path<(String, String, String)>,
) -> impl Responder { ) -> impl Responder {
REQUESTS_DATA_COUNTER.inc();
let (token, chapter_hash, file_name) = path.into_inner(); let (token, chapter_hash, file_name) = path.into_inner();
if !state.0.read().disabled_tokens { if VALIDATE_TOKENS.load(Ordering::Acquire) {
if let Err(e) = validate_token(&state.0.read().precomputed_key, token, &chapter_hash) { if let Err(e) = validate_token(&state.0.read().precomputed_key, token, &chapter_hash) {
return ServerResponse::TokenValidationError(e); return ServerResponse::TokenValidationError(e);
} }
} }
fetch_image(state, cache, chapter_hash, file_name, false).await
fetch_image(state, chapter_hash, file_name, false).await
} }
#[get("/{token}/data-saver/{chapter_hash}/{file_name}")] #[get("/{token}/data-saver/{chapter_hash}/{file_name}")]
async fn token_data_saver( async fn token_data_saver(
state: Data<RwLockServerState>, state: Data<RwLockServerState>,
cache: Data<dyn Cache>,
path: Path<(String, String, String)>, path: Path<(String, String, String)>,
) -> impl Responder { ) -> impl Responder {
REQUESTS_DATA_SAVER_COUNTER.inc();
let (token, chapter_hash, file_name) = path.into_inner(); let (token, chapter_hash, file_name) = path.into_inner();
if !state.0.read().disabled_tokens { if VALIDATE_TOKENS.load(Ordering::Acquire) {
if let Err(e) = validate_token(&state.0.read().precomputed_key, token, &chapter_hash) { if let Err(e) = validate_token(&state.0.read().precomputed_key, token, &chapter_hash) {
return ServerResponse::TokenValidationError(e); return ServerResponse::TokenValidationError(e);
} }
} }
fetch_image(state, chapter_hash, file_name, true).await
fetch_image(state, cache, chapter_hash, file_name, true).await
} }
#[get("/data/{chapter_hash}/{file_name}")] #[allow(clippy::future_not_send)]
async fn no_token_data( pub async fn default(state: Data<RwLockServerState>, req: HttpRequest) -> impl Responder {
state: Data<RwLockServerState>, REQUESTS_OTHER_COUNTER.inc();
path: Path<(String, String)>, let path = &format!(
) -> impl Responder { "{}{}",
let (chapter_hash, file_name) = path.into_inner(); state.0.read().image_server,
fetch_image(state, chapter_hash, file_name, false).await req.path().chars().skip(1).collect::<String>()
);
if OFFLINE_MODE.load(Ordering::Acquire) {
info!("Got unknown path in offline mode, returning 404: {}", path);
return ServerResponse::HttpResponse(
ErrorNotFound("Path is not valid in offline mode").into(),
);
}
info!("Got unknown path, just proxying: {}", path);
let mut resp = match HTTP_CLIENT.inner().get(path).send().await {
Ok(resp) => resp,
Err(e) => {
error!("{}", e);
return ServerResponse::HttpResponse(HttpResponse::BadGateway().finish());
}
};
let content_type = resp.headers_mut().remove(CONTENT_TYPE);
let mut resp_builder = HttpResponseBuilder::new(resp.status());
let mut headers = DEFAULT_HEADERS.clone();
if let Some(content_type) = content_type {
headers.insert(CONTENT_TYPE, content_type);
}
// push_headers(&mut resp_builder);
let mut resp = resp_builder.body(resp.bytes().await.unwrap_or_default());
*resp.headers_mut() = headers;
ServerResponse::HttpResponse(resp)
} }
#[get("/data-saver/{chapter_hash}/{file_name}")] #[allow(clippy::unused_async)]
async fn no_token_data_saver( #[get("/prometheus")]
state: Data<RwLockServerState>, pub async fn metrics() -> impl Responder {
path: Path<(String, String)>, let metric_families = prometheus::gather();
) -> impl Responder { let mut buffer = Vec::new();
let (chapter_hash, file_name) = path.into_inner(); TextEncoder::new()
fetch_image(state, chapter_hash, file_name, true).await .encode(&metric_families, &mut buffer)
.expect("Should never have an io error writing to a vec");
String::from_utf8(buffer).expect("Text encoder should render valid utf-8")
} }
#[derive(Error, Debug)] #[derive(Error, Debug, PartialEq, Eq)]
enum TokenValidationError { pub enum TokenValidationError {
#[error("Failed to decode base64 token.")] #[error("Failed to decode base64 token.")]
DecodeError(#[from] DecodeError), DecodeError(#[from] DecodeError),
#[error("Nonce was too short.")] #[error("Nonce was too short.")]
IncompleteNonce, IncompleteNonce,
#[error("Invalid nonce.")]
InvalidNonce,
#[error("Decryption failed")] #[error("Decryption failed")]
DecryptionFailure, DecryptionFailure,
#[error("The token format was invalid.")] #[error("The token format was invalid.")]
@ -110,8 +153,13 @@ enum TokenValidationError {
} }
impl Responder for TokenValidationError { impl Responder for TokenValidationError {
type Body = BoxBody;
#[inline]
fn respond_to(self, _: &HttpRequest) -> HttpResponse { fn respond_to(self, _: &HttpRequest) -> HttpResponse {
push_headers(&mut HttpResponse::Forbidden()).finish() let mut resp = HttpResponse::Forbidden().finish();
*resp.headers_mut() = DEFAULT_HEADERS.clone();
resp
} }
} }
@ -131,8 +179,14 @@ fn validate_token(
return Err(TokenValidationError::IncompleteNonce); return Err(TokenValidationError::IncompleteNonce);
} }
let nonce = Nonce::from_slice(&data[..NONCEBYTES]).ok_or(TokenValidationError::InvalidNonce)?; let (nonce, encrypted) = data.split_at(NONCEBYTES);
let decrypted = open_precomputed(&data[NONCEBYTES..], &nonce, precomputed_key)
let nonce = match Nonce::from_slice(nonce) {
Some(nonce) => nonce,
// We split at NONCEBYTES, so this should never happen.
None => unsafe { unreachable_unchecked() },
};
let decrypted = open_precomputed(encrypted, &nonce, precomputed_key)
.map_err(|_| TokenValidationError::DecryptionFailure)?; .map_err(|_| TokenValidationError::DecryptionFailure)?;
let parsed_token: Token = let parsed_token: Token =
@ -146,99 +200,252 @@ fn validate_token(
return Err(TokenValidationError::InvalidChapterHash); return Err(TokenValidationError::InvalidChapterHash);
} }
debug!("Token validated!");
Ok(()) Ok(())
} }
fn push_headers(builder: &mut HttpResponseBuilder) -> &mut HttpResponseBuilder { #[allow(clippy::future_not_send)]
builder
.insert_header((X_CONTENT_TYPE_OPTIONS, "nosniff"))
.insert_header((ACCESS_CONTROL_ALLOW_ORIGIN, "https://mangadex.org"))
.insert_header((ACCESS_CONTROL_EXPOSE_HEADERS, "*"))
.insert_header((CACHE_CONTROL, "public, max-age=1209600"))
.insert_header(("Timing-Allow-Origin", "https://mangadex.org"))
.insert_header(("Server", SERVER_ID_STRING))
}
async fn fetch_image( async fn fetch_image(
state: Data<RwLockServerState>, state: Data<RwLockServerState>,
cache: Data<dyn Cache>,
chapter_hash: String, chapter_hash: String,
file_name: String, file_name: String,
is_data_saver: bool, is_data_saver: bool,
) -> ServerResponse { ) -> ServerResponse {
let key = (chapter_hash, file_name, is_data_saver); let key = CacheKey(chapter_hash, file_name, is_data_saver);
if let Some(cached) = state.0.write().cache.get(&key) { match cache.get(&key).await {
return construct_response(cached); Some(Ok((image, metadata))) => {
CACHE_HIT_COUNTER.inc();
return construct_response(image, &metadata);
}
Some(Err(_)) => {
return ServerResponse::HttpResponse(HttpResponse::BadGateway().finish());
}
None => (),
} }
let mut state = state.0.write(); CACHE_MISS_COUNTER.inc();
let resp = if is_data_saver {
Client::new().get(format!( // If in offline mode, return early since there's nothing else we can do
if OFFLINE_MODE.load(Ordering::Acquire) {
return ServerResponse::HttpResponse(
ErrorNotFound("Offline mode enabled and image not in cache").into(),
);
}
let url = if is_data_saver {
format!(
"{}/data-saver/{}/{}", "{}/data-saver/{}/{}",
state.image_server, &key.1, &key.2 state.0.read().image_server,
)) &key.0,
&key.1,
)
} else { } else {
Client::new().get(format!("{}/data/{}/{}", state.image_server, &key.1, &key.2)) format!("{}/data/{}/{}", state.0.read().image_server, &key.0, &key.1)
} };
.send()
.await;
match resp { match HTTP_CLIENT.fetch_and_cache(url, key, cache).await {
Ok(mut resp) => { FetchResult::ServiceUnavailable => {
let headers = resp.headers(); ServerResponse::HttpResponse(HttpResponse::ServiceUnavailable().finish())
let content_type = headers.get(CONTENT_TYPE).map(AsRef::as_ref).map(Vec::from);
let content_length = headers
.get(CONTENT_LENGTH)
.map(AsRef::as_ref)
.map(Vec::from);
let last_modified = headers.get(LAST_MODIFIED).map(AsRef::as_ref).map(Vec::from);
let body = resp.body().await;
match body {
Ok(bytes) => {
let cached = CachedImage {
data: bytes.to_vec(),
content_type,
content_length,
last_modified,
};
let resp = construct_response(&cached);
state.cache.put(key, cached);
return resp;
}
Err(e) => {
warn!("Got payload error from image server: {}", e);
ServerResponse::HttpResponse(
push_headers(&mut HttpResponse::ServiceUnavailable()).finish(),
)
}
}
} }
Err(e) => { FetchResult::InternalServerError => {
error!("Failed to fetch image from server: {}", e); ServerResponse::HttpResponse(HttpResponse::InternalServerError().finish())
ServerResponse::HttpResponse(
push_headers(&mut HttpResponse::ServiceUnavailable()).finish(),
)
} }
FetchResult::Data(status, headers, data) => {
let mut resp = HttpResponseBuilder::new(status);
let mut resp = resp.body(data);
*resp.headers_mut() = headers;
ServerResponse::HttpResponse(resp)
}
FetchResult::Processing => panic!("Race condition found with fetch result"),
} }
} }
fn construct_response(cached: &CachedImage) -> ServerResponse { #[inline]
let data: Vec<Result<Bytes, Infallible>> = cached pub fn construct_response(
.data data: impl Stream<Item = Result<Bytes, UpstreamError>> + Unpin + 'static,
.to_vec() metadata: &ImageMetadata,
.chunks(1024) ) -> ServerResponse {
.map(|v| Ok(Bytes::from(v.to_vec()))) trace!("Constructing response");
.collect();
let mut resp = HttpResponse::Ok(); let mut resp = HttpResponse::Ok();
if let Some(content_type) = &cached.content_type {
resp.append_header((CONTENT_TYPE, &**content_type)); let mut headers = DEFAULT_HEADERS.clone();
} if let Some(content_type) = metadata.content_type {
if let Some(content_length) = &cached.content_length { headers.insert(
resp.append_header((CONTENT_LENGTH, &**content_length)); CONTENT_TYPE,
} HeaderValue::from_str(content_type.as_ref()).unwrap(),
if let Some(last_modified) = &cached.last_modified { );
resp.append_header((LAST_MODIFIED, &**last_modified));
} }
return ServerResponse::HttpResponse(push_headers(&mut resp).streaming(stream::iter(data))); if let Some(content_length) = metadata.content_length {
headers.insert(CONTENT_LENGTH, HeaderValue::from(content_length));
}
if let Some(last_modified) = metadata.last_modified {
headers.insert(
LAST_MODIFIED,
HeaderValue::from_str(&last_modified.to_rfc2822()).unwrap(),
);
}
let mut ret = resp.streaming(data);
*ret.headers_mut() = headers;
ServerResponse::HttpResponse(ret)
}
#[cfg(test)]
mod token_validation {
use super::{BASE64_CONFIG, DecodeError, PrecomputedKey, TokenValidationError, Utc, validate_token};
use sodiumoxide::crypto::box_::precompute;
use sodiumoxide::crypto::box_::seal_precomputed;
use sodiumoxide::crypto::box_::{gen_keypair, gen_nonce, PRECOMPUTEDKEYBYTES};
#[test]
fn invalid_base64() {
let res = validate_token(
&PrecomputedKey::from_slice(&b"1".repeat(PRECOMPUTEDKEYBYTES))
.expect("valid test token"),
"a".to_string(),
"b",
);
assert_eq!(
res,
Err(TokenValidationError::DecodeError(
DecodeError::InvalidLength
))
);
}
#[test]
fn not_long_enough_for_nonce() {
let res = validate_token(
&PrecomputedKey::from_slice(&b"1".repeat(PRECOMPUTEDKEYBYTES))
.expect("valid test token"),
"aGVsbG8gaW50ZXJuZXR-Cg==".to_string(),
"b",
);
assert_eq!(res, Err(TokenValidationError::IncompleteNonce));
}
#[test]
fn invalid_precomputed_key() {
let precomputed_1 = {
let (pk, sk) = gen_keypair();
precompute(&pk, &sk)
};
let precomputed_2 = {
let (pk, sk) = gen_keypair();
precompute(&pk, &sk)
};
let nonce = gen_nonce();
// Seal with precomputed_2, open with precomputed_1
let data = seal_precomputed(b"hello world", &nonce, &precomputed_2);
let data: Vec<u8> = nonce.as_ref().iter().copied().chain(data).collect();
let data = base64::encode_config(data, BASE64_CONFIG);
let res = validate_token(&precomputed_1, data, "b");
assert_eq!(res, Err(TokenValidationError::DecryptionFailure));
}
#[test]
fn invalid_token_data() {
let precomputed = {
let (pk, sk) = gen_keypair();
precompute(&pk, &sk)
};
let nonce = gen_nonce();
let data = seal_precomputed(b"hello world", &nonce, &precomputed);
let data: Vec<u8> = nonce.as_ref().iter().copied().chain(data).collect();
let data = base64::encode_config(data, BASE64_CONFIG);
let res = validate_token(&precomputed, data, "b");
assert_eq!(res, Err(TokenValidationError::InvalidToken));
}
#[test]
fn token_must_have_valid_expiration() {
let precomputed = {
let (pk, sk) = gen_keypair();
precompute(&pk, &sk)
};
let nonce = gen_nonce();
let time = Utc::now() - chrono::Duration::weeks(1);
let data = seal_precomputed(
serde_json::json!({
"expires": time.to_rfc3339(),
"hash": "b",
})
.to_string()
.as_bytes(),
&nonce,
&precomputed,
);
let data: Vec<u8> = nonce.as_ref().iter().copied().chain(data).collect();
let data = base64::encode_config(data, BASE64_CONFIG);
let res = validate_token(&precomputed, data, "b");
assert_eq!(res, Err(TokenValidationError::TokenExpired));
}
#[test]
fn token_must_have_valid_chapter_hash() {
let precomputed = {
let (pk, sk) = gen_keypair();
precompute(&pk, &sk)
};
let nonce = gen_nonce();
let time = Utc::now() + chrono::Duration::weeks(1);
let data = seal_precomputed(
serde_json::json!({
"expires": time.to_rfc3339(),
"hash": "b",
})
.to_string()
.as_bytes(),
&nonce,
&precomputed,
);
let data: Vec<u8> = nonce.as_ref().iter().copied().chain(data).collect();
let data = base64::encode_config(data, BASE64_CONFIG);
let res = validate_token(&precomputed, data, "");
assert_eq!(res, Err(TokenValidationError::InvalidChapterHash));
}
#[test]
fn valid_token_returns_ok() {
let precomputed = {
let (pk, sk) = gen_keypair();
precompute(&pk, &sk)
};
let nonce = gen_nonce();
let time = Utc::now() + chrono::Duration::weeks(1);
let data = seal_precomputed(
serde_json::json!({
"expires": time.to_rfc3339(),
"hash": "b",
})
.to_string()
.as_bytes(),
&nonce,
&precomputed,
);
let data: Vec<u8> = nonce.as_ref().iter().copied().chain(data).collect();
let data = base64::encode_config(data, BASE64_CONFIG);
let res = validate_token(&precomputed, data, "b");
assert!(res.is_ok());
}
} }

165
src/state.rs Normal file
View file

@ -0,0 +1,165 @@
use std::str::FromStr;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use crate::client::HTTP_CLIENT;
use crate::config::{ClientSecret, Config, OFFLINE_MODE};
use crate::ping::{Request, Response, CONTROL_CENTER_PING_URL};
use arc_swap::{ArcSwap, ArcSwapOption};
use once_cell::sync::OnceCell;
use parking_lot::RwLock;
use rustls::server::{ClientHello, ResolvesServerCert};
use rustls::sign::{CertifiedKey, RsaSigningKey, SigningKey};
use rustls::Certificate;
use sodiumoxide::crypto::box_::{PrecomputedKey, PRECOMPUTEDKEYBYTES};
use thiserror::Error;
use tracing::{error, info, warn};
use url::Url;
pub struct ServerState {
pub precomputed_key: PrecomputedKey,
pub image_server: Url,
pub url: Url,
pub url_overridden: bool,
}
pub static PREVIOUSLY_PAUSED: AtomicBool = AtomicBool::new(false);
pub static PREVIOUSLY_COMPROMISED: AtomicBool = AtomicBool::new(false);
pub static TLS_PREVIOUSLY_CREATED: OnceCell<ArcSwap<String>> = OnceCell::new();
static TLS_SIGNING_KEY: OnceCell<ArcSwap<RsaSigningKey>> = OnceCell::new();
static TLS_CERTS: OnceCell<ArcSwap<Vec<Certificate>>> = OnceCell::new();
pub static CERTIFIED_KEY: ArcSwapOption<CertifiedKey> = ArcSwapOption::const_empty();
#[derive(Error, Debug)]
pub enum ServerInitError {
#[error(transparent)]
MalformedResponse(reqwest::Error),
#[error(transparent)]
Timeout(reqwest::Error),
#[error(transparent)]
SendFailure(reqwest::Error),
#[error("Failed to parse token key")]
KeyParseError(String),
#[error("Token key was not provided in initial request")]
MissingTokenKey,
#[error("Got error response from control center")]
ErrorResponse,
}
impl ServerState {
pub async fn init(secret: &ClientSecret, config: &Config) -> Result<Self, ServerInitError> {
let resp = HTTP_CLIENT
.inner()
.post(CONTROL_CENTER_PING_URL)
.json(&Request::from((secret, config)))
.send()
.await;
match resp {
Ok(resp) => match resp.json::<Response>().await {
Ok(Response::Ok(mut resp)) => {
let key = resp
.token_key
.ok_or(ServerInitError::MissingTokenKey)
.and_then(|key| {
base64::decode(&key)
.ok()
.and_then(|k| PrecomputedKey::from_slice(&k))
.map_or_else(
|| {
error!("Failed to parse token key: got {}", key);
Err(ServerInitError::KeyParseError(key))
},
Ok,
)
})?;
PREVIOUSLY_COMPROMISED.store(resp.compromised, Ordering::Release);
if resp.compromised {
error!("Got compromised response from control center!");
}
PREVIOUSLY_PAUSED.store(resp.paused, Ordering::Release);
if resp.paused {
warn!("Control center has paused this node!");
}
if let Some(ref override_url) = config.override_upstream {
resp.image_server = override_url.clone();
warn!("Upstream URL overridden to: {}", resp.image_server);
}
info!("This client's URL has been set to {}", resp.url);
let tls = resp.tls.unwrap();
CERTIFIED_KEY.store(Some(Arc::new(CertifiedKey {
cert: tls.certs.clone(),
key: Arc::clone(&tls.priv_key) as Arc<dyn SigningKey>,
ocsp: None,
sct_list: None,
})));
std::mem::drop(
TLS_PREVIOUSLY_CREATED.set(ArcSwap::from_pointee(tls.created_at)),
);
std::mem::drop(TLS_SIGNING_KEY.set(ArcSwap::new(tls.priv_key)));
std::mem::drop(TLS_CERTS.set(ArcSwap::from_pointee(tls.certs)));
Ok(Self {
precomputed_key: key,
image_server: resp.image_server,
url: resp.url,
url_overridden: config.override_upstream.is_some(),
})
}
Ok(Response::Error(resp)) => {
error!(
"Got an {} error from upstream: {}",
resp.status as u16, resp.error
);
Err(ServerInitError::ErrorResponse)
}
Err(e) => {
error!("Got malformed response: {}. Is MangaDex@Home down?", e);
Err(ServerInitError::MalformedResponse(e))
}
},
Err(e) => match e {
e if e.is_timeout() => {
error!("Response timed out to control server. Is MangaDex@Home down?");
Err(ServerInitError::Timeout(e))
}
e => {
error!("Failed to send request: {}", e);
Err(ServerInitError::SendFailure(e))
}
},
}
}
pub fn init_offline() -> Self {
assert!(OFFLINE_MODE.load(Ordering::Acquire));
Self {
precomputed_key: PrecomputedKey::from_slice(&[41; PRECOMPUTEDKEYBYTES])
.expect("expect offline config to work"),
image_server: Url::from_file_path("/dev/null").expect("expect offline config to work"),
url: Url::from_str("http://localhost").expect("expect offline config to work"),
url_overridden: false,
}
}
}
pub struct RwLockServerState(pub RwLock<ServerState>);
pub struct DynamicServerCert;
impl ResolvesServerCert for DynamicServerCert {
fn resolve(&self, _: ClientHello) -> Option<Arc<CertifiedKey>> {
// TODO: wait for actix-web to use a new version of rustls so we can
// remove cloning the certs all the time
CERTIFIED_KEY.load_full()
}
}

View file

@ -1,3 +1,48 @@
struct StopRequest { #![cfg(not(tarpaulin_include))]
secret: String,
use reqwest::StatusCode;
use serde::Serialize;
use tracing::{info, warn};
use crate::client::HTTP_CLIENT;
use crate::config::ClientSecret;
const CONTROL_CENTER_STOP_URL: &str = "https://api.mangadex.network/stop";
#[derive(Serialize)]
struct StopRequest<'a> {
secret: &'a ClientSecret,
}
pub async fn send_stop(secret: &ClientSecret) {
match HTTP_CLIENT
.inner()
.post(CONTROL_CENTER_STOP_URL)
.json(&StopRequest { secret })
.send()
.await
{
Ok(resp) => {
if resp.status() == StatusCode::OK {
info!("Successfully sent stop message to control center.");
} else {
warn!("Got weird response from server: {:?}", resp.headers());
}
}
Err(e) => warn!("Got error while sending stop message: {}", e),
}
}
#[cfg(test)]
mod stop {
use super::CONTROL_CENTER_STOP_URL;
#[test]
fn stop_url_does_not_have_ping_in_url() {
// This looks like a dumb test, yes, but it ensures that clients don't
// get marked compromised because apparently just sending a json obj
// with just the secret is acceptable to the ping endpoint, which messes
// up non-trivial client configs.
assert!(!CONTROL_CENTER_STOP_URL.contains("ping"))
}
} }

99
src/units.rs Normal file
View file

@ -0,0 +1,99 @@
use std::fmt::Display;
use std::num::{NonZeroU16, NonZeroU64, ParseIntError};
use std::str::FromStr;
use serde::{Deserialize, Serialize};
/// Wrapper type for a port number.
#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
pub struct Port(NonZeroU16);
impl Port {
pub const fn get(self) -> u16 {
self.0.get()
}
pub fn new(amt: u16) -> Option<Self> {
NonZeroU16::new(amt).map(Self)
}
}
impl Default for Port {
fn default() -> Self {
Self(unsafe { NonZeroU16::new_unchecked(443) })
}
}
impl FromStr for Port {
type Err = <NonZeroU16 as FromStr>::Err;
fn from_str(s: &str) -> Result<Self, Self::Err> {
NonZeroU16::from_str(s).map(Self)
}
}
impl Display for Port {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
#[derive(Copy, Clone, Deserialize, Default, Debug, Hash, Eq, PartialEq)]
pub struct Mebibytes(usize);
impl Mebibytes {
#[cfg(test)]
pub fn new(size: usize) -> Self {
Self(size)
}
}
impl FromStr for Mebibytes {
type Err = ParseIntError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
s.parse::<usize>().map(Self)
}
}
#[derive(Serialize, Debug)]
pub struct Bytes(pub usize);
impl Bytes {
pub const fn get(&self) -> usize {
self.0
}
}
impl From<Mebibytes> for Bytes {
fn from(mib: Mebibytes) -> Self {
Self(mib.0 << 20)
}
}
#[derive(Copy, Clone, Deserialize, Debug, Hash, Eq, PartialEq)]
pub struct KilobitsPerSecond(NonZeroU64);
impl KilobitsPerSecond {
#[cfg(test)]
pub fn new(size: u64) -> Option<Self> {
NonZeroU64::new(size).map(Self)
}
}
impl FromStr for KilobitsPerSecond {
type Err = ParseIntError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
s.parse::<NonZeroU64>().map(Self)
}
}
#[derive(Copy, Clone, Serialize, Debug, Hash, Eq, PartialEq)]
pub struct BytesPerSecond(NonZeroU64);
impl From<KilobitsPerSecond> for BytesPerSecond {
fn from(kbps: KilobitsPerSecond) -> Self {
Self(unsafe { NonZeroU64::new_unchecked(kbps.0.get() * 125) })
}
}