diff --git a/Cargo.lock b/Cargo.lock index 9ae95c604..4ae50c55d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -235,6 +235,27 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "ahash" version = "0.8.11" @@ -1016,6 +1037,26 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" +[[package]] +name = "bcrypt-pbkdf" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aeac2e1fe888769f34f05ac343bbef98b14d1ffb292ab69d4608b3abc86f2a2" +dependencies = [ + "blowfish", + "pbkdf2", + "sha2", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bindgen" version = "0.69.4" @@ -1025,7 +1066,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools", + "itertools 0.10.5", "lazy_static", "lazycell", "log", @@ -1051,6 +1092,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -1060,6 +1110,25 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + +[[package]] +name = "blowfish" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e412e2cd0f2b2d93e02543ceae7917b3c70331573df19ee046bcbc35e45e87d7" +dependencies = [ + "byteorder", + "cipher", +] + [[package]] name = "brotli" version = "6.0.0" @@ -1164,6 +1233,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + [[package]] name = "cc" version = "1.0.97" @@ -1190,6 +1268,30 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chacha20" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + [[package]] name = "chrono" version = "0.4.38" @@ -1230,6 +1332,17 @@ dependencies = [ "half", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + [[package]] name = "clang-sys" version = "1.7.0" @@ -1375,7 +1488,7 @@ dependencies = [ "criterion-plot", "futures", "is-terminal", - "itertools", + "itertools 0.10.5", "num-traits", "once_cell", "oorandom", @@ -1397,7 +1510,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -1440,6 +1553,35 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "crypt4gh" +version = "0.4.1" +source = "git+https://github.com/EGA-archive/crypt4gh-rust#2d41a1770067003bc67ab499841e0def186ed218" +dependencies = [ + "aes", + "base64 0.21.7", + "bcrypt-pbkdf", + "bincode", + "cbc", + "chacha20poly1305", + "clap", + "crypto_kx", + "ctr", + "curve25519-dalek", + "ed25519_to_curve25519", + "itertools 0.11.0", + "lazy_static", + "log", + "pretty_env_logger", + "rand", + "rand_chacha", + "regex", + "rpassword", + "scrypt", + "serde", + "thiserror", +] + [[package]] name = "crypto-bigint" version = "0.4.9" @@ -1469,9 +1611,56 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ "generic-array", + "rand_core", "typenum", ] +[[package]] +name = "crypto_kx" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704722d1d929489c8528bb1882805700f1ba20f54325704973e786352320b1ed" +dependencies = [ + "blake2", + "curve25519-dalek", + "rand_core", +] + +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + +[[package]] +name = "curve25519-dalek" +version = "4.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" +dependencies = [ + "cfg-if", + "cpufeatures", + "curve25519-dalek-derive", + "fiat-crypto", + "rustc_version", + "subtle", + "zeroize", +] + +[[package]] +name = "curve25519-dalek-derive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.61", +] + [[package]] name = "darling" version = "0.20.8" @@ -1581,6 +1770,12 @@ dependencies = [ "signature", ] +[[package]] +name = "ed25519_to_curve25519" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a976025474add79730a8df2913b114afd39bc53ce5633e045100aceb6d06bb6" + [[package]] name = "either" version = "1.11.0" @@ -1616,6 +1811,19 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "env_logger" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" +dependencies = [ + "humantime", + "is-terminal", + "log", + "regex", + "termcolor", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -1660,6 +1868,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "fiat-crypto" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" + [[package]] name = "figment" version = "0.10.18" @@ -2009,6 +2223,7 @@ version = "0.10.1" dependencies = [ "async-trait", "clap", + "crypt4gh", "figment", "http 1.1.0", "http-serde", @@ -2099,8 +2314,10 @@ dependencies = [ "aws-sdk-s3", "axum", "base64 0.22.1", + "bincode", "bytes", "criterion", + "crypt4gh", "data-url", "futures", "futures-util", @@ -2127,6 +2344,7 @@ dependencies = [ "aws-credential-types", "aws-sdk-s3", "base64 0.22.1", + "crypt4gh", "futures", "htsget-config", "http 1.1.0", @@ -2228,6 +2446,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "hyper" version = "0.14.28" @@ -2399,6 +2623,16 @@ version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "block-padding", + "generic-array", +] + [[package]] name = "ipnet" version = "2.9.0" @@ -2431,6 +2665,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -3073,6 +3316,12 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "openssl-probe" version = "0.1.5" @@ -3125,6 +3374,17 @@ dependencies = [ "windows-targets 0.52.5", ] +[[package]] +name = "password-hash" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166" +dependencies = [ + "base64ct", + "rand_core", + "subtle", +] + [[package]] name = "paste" version = "1.0.15" @@ -3149,6 +3409,16 @@ dependencies = [ "once_cell", ] +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest", + "hmac", +] + [[package]] name = "pear" version = "0.2.9" @@ -3264,6 +3534,17 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -3286,6 +3567,16 @@ dependencies = [ "yansi 0.5.1", ] +[[package]] +name = "pretty_env_logger" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" +dependencies = [ + "env_logger", + "log", +] + [[package]] name = "prettyplease" version = "0.2.20" @@ -3573,6 +3864,27 @@ dependencies = [ "xmlparser", ] +[[package]] +name = "rpassword" +version = "7.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80472be3c897911d0137b2d2b9055faf6eeac5b14e324073d83bc17b191d7e3f" +dependencies = [ + "libc", + "rtoolbox", + "windows-sys 0.48.0", +] + +[[package]] +name = "rtoolbox" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c247d24e63230cdb56463ae328478bd5eac8b8faa8c69461a77e8e323afac90e" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -3812,6 +4124,15 @@ dependencies = [ "uuid", ] +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] + [[package]] name = "same-file" version = "1.0.6" @@ -3836,6 +4157,18 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scrypt" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" +dependencies = [ + "password-hash", + "pbkdf2", + "salsa20", + "sha2", +] + [[package]] name = "sct" version = "0.7.1" @@ -4165,6 +4498,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "thiserror" version = "1.0.60" @@ -4583,6 +4925,16 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + [[package]] name = "untrusted" version = "0.9.0" diff --git a/data/c4gh/README.md b/data/c4gh/README.md new file mode 100644 index 000000000..0d33c7e15 --- /dev/null +++ b/data/c4gh/README.md @@ -0,0 +1,34 @@ +# Crypt4GH example file + +This is just a customised summary for htsget-rs. Please refer to the official [`crypt4gh-rust` documentation](https://ega-archive.github.io/crypt4gh-rust) for further information. + +## Keygen + +```sh +cargo install crypt4gh +crypt4gh keygen --sk keys/alice.sec --pk keys/alice.pub +crypt4gh keygen --sk keys/bob.sec --pk keys/bob.pub +``` + +## Encrypt +``` +crypt4gh encrypt --sk keys/alice.sec --recipient_pk keys/bob.pub < htsnexus_test_NA12878.bam > htsnexus_test_NA12878.bam.c4gh +``` + +## Decrypt + +```sh +crypt4gh decryptor --range 0-65535 --sk data/crypt4gh/keys/bob.sec \ + --sender-pk data/crypt4gh/keys/alice.pub \ + < data/crypt4gh/htsnexus_test_NA12878.bam.c4gh \ + > out.bam + +samtools view out.bam +(...) +SRR098401.61822403 83 11 5009470 60 76M = 5009376 -169 TCTTCTTGCCCTGGTGTTTCGCCGTTCCAGTGCCCCCTGCTGCAGACCATAAAGGATGGGACTTTGTTGAGGTAGG ?B6BDCD@I?JFI?FHHFEAIIAHHDIJHHFIIIIIJEIIFIJGHCIJDDEEHHHDEHHHCIGGEGFDGFGFBEDC X0:i:1 X1:i:0 MD:Z:76 RG:Z:SRR098401 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@B + +samtools view: error reading file "out.bam" +samtools view: error closing "out.bam": -1 +``` + +The last samtools view error suggests that the returned bytes do not include BAM file termination. diff --git a/data/c4gh/htsnexus_test_NA12878.bam.bai b/data/c4gh/htsnexus_test_NA12878.bam.bai new file mode 100644 index 000000000..6e723eb69 Binary files /dev/null and b/data/c4gh/htsnexus_test_NA12878.bam.bai differ diff --git a/data/c4gh/htsnexus_test_NA12878.bam.c4gh b/data/c4gh/htsnexus_test_NA12878.bam.c4gh new file mode 100644 index 000000000..35c959faa Binary files /dev/null and b/data/c4gh/htsnexus_test_NA12878.bam.c4gh differ diff --git a/data/c4gh/htsnexus_test_NA12878.cram.c4gh b/data/c4gh/htsnexus_test_NA12878.cram.c4gh new file mode 100644 index 000000000..72dd83e43 Binary files /dev/null and b/data/c4gh/htsnexus_test_NA12878.cram.c4gh differ diff --git a/data/c4gh/htsnexus_test_NA12878.cram.crai b/data/c4gh/htsnexus_test_NA12878.cram.crai new file mode 100644 index 000000000..61ea847e0 Binary files /dev/null and b/data/c4gh/htsnexus_test_NA12878.cram.crai differ diff --git a/data/c4gh/keys/alice.pub b/data/c4gh/keys/alice.pub new file mode 100644 index 000000000..686226ca8 --- /dev/null +++ b/data/c4gh/keys/alice.pub @@ -0,0 +1,3 @@ +-----BEGIN CRYPT4GH PUBLIC KEY----- +ToQrpj4UfuLgxZRe1wSGIZtXC19fOEHUHe3RQy63qwM= +-----END CRYPT4GH PUBLIC KEY----- diff --git a/data/c4gh/keys/alice.sec b/data/c4gh/keys/alice.sec new file mode 100644 index 000000000..ecc3b8916 --- /dev/null +++ b/data/c4gh/keys/alice.sec @@ -0,0 +1,3 @@ +-----BEGIN CRYPT4GH PRIVATE KEY----- +YzRnaC12MQAEbm9uZQAEbm9uZQAgxi4tNmUO++HAApv9ryZB9S8QfqrWKKe5CunJuChH5vU= +-----END CRYPT4GH PRIVATE KEY----- diff --git a/data/c4gh/keys/bob.pub b/data/c4gh/keys/bob.pub new file mode 100644 index 000000000..990643c83 --- /dev/null +++ b/data/c4gh/keys/bob.pub @@ -0,0 +1,3 @@ +-----BEGIN CRYPT4GH PUBLIC KEY----- +TyKEXZPnfon6dj1kRXl6HumfZDzo/h60RIc8Wd0Ig2s= +-----END CRYPT4GH PUBLIC KEY----- diff --git a/data/c4gh/keys/bob.sec b/data/c4gh/keys/bob.sec new file mode 100644 index 000000000..0bc62269f --- /dev/null +++ b/data/c4gh/keys/bob.sec @@ -0,0 +1,3 @@ +-----BEGIN CRYPT4GH PRIVATE KEY----- +YzRnaC12MQAEbm9uZQAEbm9uZQAg6uLXNqcXAi6FRKzRBk2KBKF4BnmueySZv5MGzKjIPcI= +-----END CRYPT4GH PRIVATE KEY----- diff --git a/data/c4gh/sample1-bcbio-cancer.bcf.c4gh b/data/c4gh/sample1-bcbio-cancer.bcf.c4gh new file mode 100644 index 000000000..758b98f9a Binary files /dev/null and b/data/c4gh/sample1-bcbio-cancer.bcf.c4gh differ diff --git a/data/c4gh/sample1-bcbio-cancer.bcf.csi b/data/c4gh/sample1-bcbio-cancer.bcf.csi new file mode 100644 index 000000000..045f7dec4 Binary files /dev/null and b/data/c4gh/sample1-bcbio-cancer.bcf.csi differ diff --git a/data/c4gh/spec-v4.3.vcf.gz.c4gh b/data/c4gh/spec-v4.3.vcf.gz.c4gh new file mode 100644 index 000000000..4e31cc24b Binary files /dev/null and b/data/c4gh/spec-v4.3.vcf.gz.c4gh differ diff --git a/data/c4gh/spec-v4.3.vcf.gz.tbi b/data/c4gh/spec-v4.3.vcf.gz.tbi new file mode 100644 index 000000000..d47baa55c Binary files /dev/null and b/data/c4gh/spec-v4.3.vcf.gz.tbi differ diff --git a/htsget-actix/Cargo.toml b/htsget-actix/Cargo.toml index 55eb82dd6..7d7337ecd 100644 --- a/htsget-actix/Cargo.toml +++ b/htsget-actix/Cargo.toml @@ -13,6 +13,13 @@ repository = "https://github.com/umccr/htsget-rs" [features] s3-storage = ["htsget-config/s3-storage", "htsget-search/s3-storage", "htsget-http/s3-storage", "htsget-axum/s3-storage", "htsget-test/s3-storage"] url-storage = ["htsget-config/url-storage", "htsget-search/url-storage", "htsget-http/url-storage", "htsget-axum/url-storage", "htsget-test/url-storage"] +c4gh-experimental = [ + "htsget-config/c4gh-experimental", + "htsget-search/c4gh-experimental", + "htsget-http/c4gh-experimental", + "htsget-axum/c4gh-experimental", + "htsget-test/c4gh-experimental" +] default = [] [dependencies] diff --git a/htsget-actix/README.md b/htsget-actix/README.md index a13e16b5f..6ac5d66ad 100644 --- a/htsget-actix/README.md +++ b/htsget-actix/README.md @@ -50,6 +50,7 @@ are exposed in the public API. This crate has the following features: * `s3-storage`: used to enable `S3Storage` functionality. * `url-storage`: used to enable `UrlStorage` functionality. +* `c4gh-experimental`: used to enable `C4GHStorage` functionality. ## Benchmarks Benchmarks for this crate written using [Criterion.rs][criterion-rs], and aim to compare the performance of this crate with the diff --git a/htsget-actix/benches/request_benchmarks.rs b/htsget-actix/benches/request_benchmarks.rs index f3762335c..1dd6b5d95 100644 --- a/htsget-actix/benches/request_benchmarks.rs +++ b/htsget-actix/benches/request_benchmarks.rs @@ -12,7 +12,8 @@ use serde::{Deserialize, Serialize}; use htsget_config::types::{Headers, JsonResponse}; use htsget_http::{PostRequest, Region}; -use htsget_test::http::{default_config_fixed_port, default_dir, default_dir_data}; +use htsget_test::http::default_config_fixed_port; +use htsget_test::util::{default_dir, default_dir_data}; const REFSERVER_DOCKER_IMAGE: &str = "ga4gh/htsget-refserver:1.5.0"; const BENCHMARK_DURATION_SECONDS: u64 = 30; diff --git a/htsget-actix/src/lib.rs b/htsget-actix/src/lib.rs index 98ea64c20..85c805284 100644 --- a/htsget-actix/src/lib.rs +++ b/htsget-actix/src/lib.rs @@ -10,16 +10,12 @@ use tracing_actix_web::TracingLogger; use htsget_config::config::cors::CorsConfig; pub use htsget_config::config::{Config, DataServerConfig, ServiceInfo, TicketServerConfig, USAGE}; pub use htsget_config::storage::Storage; -use htsget_search::from_storage::HtsGetFromStorage; use htsget_search::HtsGet; -use htsget_search::LocalStorage; use crate::handlers::{get, post, reads_service_info, variants_service_info, HttpVersionCompat}; pub mod handlers; -pub type HtsGetStorage = HtsGetFromStorage>; - /// Represents the actix app state. pub struct AppState { pub htsget: Arc, diff --git a/htsget-axum/Cargo.toml b/htsget-axum/Cargo.toml index 202380fd0..a5ad8e44c 100644 --- a/htsget-axum/Cargo.toml +++ b/htsget-axum/Cargo.toml @@ -15,12 +15,20 @@ s3-storage = [ "htsget-config/s3-storage", "htsget-search/s3-storage", "htsget-test/s3-storage", - "htsget-test/aws-mocks" + "htsget-test/aws-mocks", + "htsget-http/s3-storage" ] url-storage = [ "htsget-config/url-storage", "htsget-search/url-storage", - "htsget-test/url-storage" + "htsget-test/url-storage", + "htsget-http/url-storage" +] +c4gh-experimental = [ + "htsget-config/c4gh-experimental", + "htsget-search/c4gh-experimental", + "htsget-test/c4gh-experimental", + "htsget-http/c4gh-experimental" ] default = [] diff --git a/htsget-axum/README.md b/htsget-axum/README.md index a962cf608..a28d4ceaf 100644 --- a/htsget-axum/README.md +++ b/htsget-axum/README.md @@ -110,6 +110,7 @@ htsget-rs. It also contains the data block server which fetches data from a `Loc This crate has the following features: * `s3-storage`: used to enable `S3Storage` functionality. * `url-storage`: used to enable `UrlStorage` functionality. +* `c4gh-experimental`: used to enable `C4GHStorage` functionality. ## License diff --git a/htsget-config/Cargo.toml b/htsget-config/Cargo.toml index 87a0e26e4..ce9b78129 100644 --- a/htsget-config/Cargo.toml +++ b/htsget-config/Cargo.toml @@ -13,6 +13,7 @@ repository = "https://github.com/umccr/htsget-rs" [features] s3-storage = [] url-storage = ["dep:reqwest"] +c4gh-experimental = ["dep:crypt4gh"] default = [] [dependencies] @@ -37,6 +38,9 @@ rustls-pki-types = "1" # url-storage reqwest = { version = "0.12", features = ["rustls-tls"], default-features = false, optional = true } +# Crypt4GH +crypt4gh = { version = "0.4", git = "https://github.com/EGA-archive/crypt4gh-rust", optional = true } + [dev-dependencies] serde_json = "1" figment = { version = "0.10", features = ["test"] } diff --git a/htsget-config/README.md b/htsget-config/README.md index 06370da04..43ac82321 100644 --- a/htsget-config/README.md +++ b/htsget-config/README.md @@ -488,6 +488,40 @@ addressing by setting the `MINIO_DOMAIN` environment variable. [Path][path-addre See the MinIO deployment [example][minio-deployment] for more information on how to configure htsget-rs and MinIO. +### Crypt4GH + +There is experimental support for serving [Crypt4GH][c4gh] encrypted files. This can be enabled by compiling with the +`c4gh-experimental` feature flag. + +This allows htsget-rs to read Crypt4GH files and serve them encrypted, directly to the client. In the process of +serving the data, htsget-rs will decrypt the headers of the Crypt4GH files and reencrypt them so that the client can read +them. When the client receives byte ranges from htsget-rs and concatenates them, the output bytes will be Crypt4GH encrypted, +and will need to be decrypted before they can be read. All file formats (BAM, CRAM, VCF, and BCF) are supported using Crypt4GH. + +To use this feature, an additional config option called `object_type` under `resolvers.storage` is required, +which allows specifying the private and public keys: + +| Option | Description | Type | Default | +|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------|---------| +| `private_key` | The path to private key which htsget-rs uses to decrypt Crypt4GH data. | Filesystem path | Not Set | +| `recipient_public_key` | The path to the public key which the recipient of the data will use. This is what the client will use to decrypt the returned data, using the corresponding private key. | Filesystem path | Not Set | + +For example: + +```toml +[[resolvers]] +regex = ".*" +substitution_string = "$0" + +[resolvers.storage] +object_type = { private_key = "data/c4gh/keys/bob.sec", recipient_public_key = "data/c4gh/keys/alice.pub" } # pragma: allowlist secret +``` + +The htsget-rs server expects the Crypt4GH file to end with `.c4gh`, and the index file to be unencrypted. See the [`data/c4gh`][data-c4gh] for examples of file structure. + +> [!NOTE] +> This option is currently only supported for `LocalStorage`. The `object_type` will not have an effect if using `S3Storage` or `UrlStorage`. + ### As a library This crate reads config files and environment variables using [figment], and accepts command-line arguments using clap. The main function for this is `from_config`, @@ -501,6 +535,7 @@ regex, and changing it by using a substitution string. This crate has the following features: * `s3-storage`: used to enable `S3Storage` functionality. * `url-storage`: used to enable `UrlStorage` functionality. +* `c4gh-experimental`: used to enable `C4GHStorage` functionality. ## License @@ -511,4 +546,6 @@ This project is licensed under the [MIT license][license]. [virtual-addressing]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access [minio-deployment]: ../deploy/examples/minio/README.md [license]: LICENSE -[minio]: https://min.io/ \ No newline at end of file +[minio]: https://min.io/ +[c4gh]: https://samtools.github.io/hts-specs/crypt4gh.pdf +[data-c4gh]: ../data/c4gh \ No newline at end of file diff --git a/htsget-config/examples/config-files/c4gh.toml b/htsget-config/examples/config-files/c4gh.toml new file mode 100644 index 000000000..22c983a8b --- /dev/null +++ b/htsget-config/examples/config-files/c4gh.toml @@ -0,0 +1,12 @@ +# An example of running htsget-rs with Crypt4GH enabled. +# Run with `cargo run -p htsget-axum --features c4gh-experimental -- --config htsget-config/examples/config-files/c4gh.toml` + +ticket_server_addr = "127.0.0.1:8080" +data_server_addr = "127.0.0.1:8081" + +[[resolvers]] +regex = ".*" +substitution_string = "$0" + +[resolvers.storage] +object_type = { private_key = "data/c4gh/keys/bob.sec", recipient_public_key = "data/c4gh/keys/alice.pub" } # pragma: allowlist secret diff --git a/htsget-config/src/resolver.rs b/htsget-config/src/resolver.rs index b68a91b1e..1844feda9 100644 --- a/htsget-config/src/resolver.rs +++ b/htsget-config/src/resolver.rs @@ -484,6 +484,7 @@ mod tests { Authority::from_static("127.0.0.1:8080"), "data".to_string(), "/data".to_string(), + Default::default(), ); let resolver = Resolver::new( Storage::Local { local_storage }, diff --git a/htsget-config/src/storage/local.rs b/htsget-config/src/storage/local.rs index f721b7041..d35ceac3d 100644 --- a/htsget-config/src/storage/local.rs +++ b/htsget-config/src/storage/local.rs @@ -4,6 +4,7 @@ use http::uri::Authority; use serde::{Deserialize, Serialize}; use crate::config::{default_localstorage_addr, default_path, DataServerConfig}; +use crate::storage::object::ObjectType; use crate::tls::KeyPairScheme; use crate::types::Scheme; @@ -23,6 +24,7 @@ pub struct LocalStorage { authority: Authority, local_path: String, path_prefix: String, + object_type: ObjectType, } impl LocalStorage { @@ -32,12 +34,14 @@ impl LocalStorage { authority: Authority, local_path: String, path_prefix: String, + object_type: ObjectType, ) -> Self { Self { scheme, authority, local_path, path_prefix, + object_type, } } @@ -60,6 +64,11 @@ impl LocalStorage { pub fn path_prefix(&self) -> &str { &self.path_prefix } + + /// Get the object type. + pub fn object_type(&self) -> &ObjectType { + &self.object_type + } } impl Default for LocalStorage { @@ -69,6 +78,7 @@ impl Default for LocalStorage { authority: default_authority(), local_path: default_local_path(), path_prefix: Default::default(), + object_type: Default::default(), } } } @@ -80,6 +90,7 @@ impl From<&DataServerConfig> for Option { Authority::from_str(&config.addr().to_string()).ok()?, config.local_path().to_str()?.to_string(), config.serve_at().to_string(), + Default::default(), )) } } @@ -134,6 +145,7 @@ mod tests { Authority::from_static("127.0.0.1:8080"), "data".to_string(), "/data".to_string(), + Default::default(), ); assert_eq!(result.unwrap(), expected); diff --git a/htsget-config/src/storage/mod.rs b/htsget-config/src/storage/mod.rs index c36e6c676..d0c57da4a 100644 --- a/htsget-config/src/storage/mod.rs +++ b/htsget-config/src/storage/mod.rs @@ -9,6 +9,7 @@ use crate::storage::url::UrlStorageClient; use crate::types::{Query, Response, Result}; pub mod local; +pub mod object; #[cfg(feature = "s3-storage")] pub mod s3; #[cfg(feature = "url-storage")] diff --git a/htsget-config/src/storage/object/c4gh.rs b/htsget-config/src/storage/object/c4gh.rs new file mode 100644 index 000000000..a5b20a855 --- /dev/null +++ b/htsget-config/src/storage/object/c4gh.rs @@ -0,0 +1,62 @@ +//! Crypt4GH key parsing. +//! + +use crate::error::Error::ParseError; +use crate::error::{Error, Result}; +use crypt4gh::error::Crypt4GHError; +use crypt4gh::keys::{get_private_key, get_public_key}; +use crypt4gh::Keys; +use serde::Deserialize; +use std::path::PathBuf; + +/// Config for Crypt4GH keys. +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(try_from = "C4GHPath")] +pub struct C4GHKeys { + keys: Vec, +} + +impl C4GHKeys { + /// Get the inner value. + pub fn into_inner(self) -> Vec { + self.keys + } +} + +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +pub struct C4GHPath { + private_key: PathBuf, + recipient_public_key: PathBuf, +} + +impl C4GHPath { + pub fn new(private_key: PathBuf, recipient_public_key: PathBuf) -> Self { + Self { + private_key, + recipient_public_key, + } + } +} + +impl TryFrom for C4GHKeys { + type Error = Error; + + fn try_from(path: C4GHPath) -> Result { + let private_key = get_private_key(path.private_key, Ok("".to_string()))?; + let recipient_public_key = get_public_key(path.recipient_public_key)?; + + Ok(C4GHKeys { + keys: vec![Keys { + method: 0, + privkey: private_key, + recipient_pubkey: recipient_public_key, + }], + }) + } +} + +impl From for Error { + fn from(err: Crypt4GHError) -> Self { + ParseError(err.to_string()) + } +} diff --git a/htsget-config/src/storage/object/mod.rs b/htsget-config/src/storage/object/mod.rs new file mode 100644 index 000000000..4a222ebb3 --- /dev/null +++ b/htsget-config/src/storage/object/mod.rs @@ -0,0 +1,23 @@ +//! Defines the type of object used by storage. +//! + +#[cfg(feature = "c4gh-experimental")] +pub mod c4gh; + +#[cfg(feature = "c4gh-experimental")] +use crate::storage::object::c4gh::C4GHKeys; +use serde::{Deserialize, Serialize}; + +/// An object type, can be regular or Crypt4GH encrypted. +#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq)] +#[serde(untagged, deny_unknown_fields)] +#[non_exhaustive] +pub enum ObjectType { + #[default] + Regular, + #[cfg(feature = "c4gh-experimental")] + C4GH { + #[serde(flatten, skip_serializing)] + keys: C4GHKeys, + }, +} diff --git a/htsget-config/src/types.rs b/htsget-config/src/types.rs index 08c13284e..8e6a637cf 100644 --- a/htsget-config/src/types.rs +++ b/htsget-config/src/types.rs @@ -72,6 +72,15 @@ impl Format { pub fn fmt_gzi(&self, id: &str) -> io::Result { Ok(format!("{id}{}", self.gzi_index_file_ending()?)) } + + /// Check if the id points at an index file. + pub fn is_index(id: &str) -> bool { + id.ends_with(".bai") + || id.ends_with(".crai") + || id.ends_with(".tbi") + || id.ends_with(".csi") + || id.ends_with(".gzi") + } } impl From for String { @@ -521,6 +530,11 @@ impl Headers { pub fn as_ref_inner(&self) -> &HashMap { &self.0 } + + /// Get a mutable reference to the inner HashMap. + pub fn as_mut_inner(&mut self) -> &mut HashMap { + &mut self.0 + } } impl TryFrom<&HeaderMap> for Headers { diff --git a/htsget-http/Cargo.toml b/htsget-http/Cargo.toml index e715d32ab..7e984b368 100644 --- a/htsget-http/Cargo.toml +++ b/htsget-http/Cargo.toml @@ -13,6 +13,7 @@ repository = "https://github.com/umccr/htsget-rs" [features] s3-storage = ["htsget-config/s3-storage", "htsget-search/s3-storage", "htsget-test/s3-storage"] url-storage = ["htsget-config/url-storage", "htsget-search/url-storage", "htsget-test/url-storage"] +c4gh-experimental = ["htsget-config/c4gh-experimental", "htsget-search/c4gh-experimental", "htsget-test/c4gh-experimental"] default = [] [dependencies] diff --git a/htsget-http/README.md b/htsget-http/README.md index 391ce8d22..c0dee5b4d 100644 --- a/htsget-http/README.md +++ b/htsget-http/README.md @@ -38,6 +38,7 @@ These functions take query and endpoint information, and process it using [htsge This crate has the following features: * `s3-storage`: used to enable `S3Storage` functionality. * `url-storage`: used to enable `UrlStorage` functionality. +* `c4gh-experimental`: used to enable `C4GHStorage` functionality. [warp]: https://github.com/seanmonstar/warp [htsget-search]: ../htsget-search diff --git a/htsget-http/src/lib.rs b/htsget-http/src/lib.rs index 84dadfd7d..96eead2ae 100644 --- a/htsget-http/src/lib.rs +++ b/htsget-http/src/lib.rs @@ -92,7 +92,7 @@ mod tests { use htsget_search::from_storage::HtsGetFromStorage; use htsget_search::HtsGet; use htsget_search::LocalStorage; - use htsget_test::util::expected_bgzf_eof_data_url; + use htsget_search::Storage; use super::*; @@ -125,7 +125,7 @@ mod tests { let request = HashMap::new(); let mut expected_response_headers = Headers::default(); - expected_response_headers.insert("Range".to_string(), "bytes=0-2596770".to_string()); + expected_response_headers.insert("Range".to_string(), "bytes=0-2596798".to_string()); let request = Request::new( "bam/htsnexus_test_NA12878".to_string(), @@ -164,7 +164,7 @@ mod tests { request.insert("end".to_string(), "200".to_string()); let mut expected_response_headers = Headers::default(); - expected_response_headers.insert("Range".to_string(), "bytes=0-3465".to_string()); + expected_response_headers.insert("Range".to_string(), "bytes=0-3493".to_string()); let request = Request::new( "vcf/sample1-bcbio-cancer".to_string(), @@ -191,7 +191,7 @@ mod tests { }; let mut expected_response_headers = Headers::default(); - expected_response_headers.insert("Range".to_string(), "bytes=0-2596770".to_string()); + expected_response_headers.insert("Range".to_string(), "bytes=0-2596798".to_string()); assert_eq!( post(get_searcher(), body, request, Endpoint::Reads).await, @@ -234,7 +234,7 @@ mod tests { }; let mut expected_response_headers = Headers::default(); - expected_response_headers.insert("Range".to_string(), "bytes=0-3465".to_string()); + expected_response_headers.insert("Range".to_string(), "bytes=0-3493".to_string()); assert_eq!( post(get_searcher(), body, request, Endpoint::Variants).await, @@ -248,7 +248,6 @@ mod tests { vec![ Url::new("http://127.0.0.1:8081/data/vcf/sample1-bcbio-cancer.vcf.gz".to_string()) .with_headers(headers), - Url::new(expected_bgzf_eof_data_url()), ], )) } @@ -259,7 +258,6 @@ mod tests { vec![ Url::new("http://127.0.0.1:8081/data/bam/htsnexus_test_NA12878.bam".to_string()) .with_headers(headers), - Url::new(expected_bgzf_eof_data_url()), ], )) } @@ -273,7 +271,7 @@ mod tests { } fn get_searcher() -> Arc { - Arc::new(HtsGetFromStorage::new( + Arc::new(HtsGetFromStorage::new(Storage::new( LocalStorage::new( get_base_path(), ConfigLocalStorage::new( @@ -281,9 +279,10 @@ mod tests { Authority::from_static("127.0.0.1:8081"), "data".to_string(), "/data".to_string(), + Default::default(), ), ) .unwrap(), - )) + ))) } } diff --git a/htsget-lambda/Cargo.toml b/htsget-lambda/Cargo.toml index d5cc5f450..82daff643 100644 --- a/htsget-lambda/Cargo.toml +++ b/htsget-lambda/Cargo.toml @@ -13,6 +13,13 @@ repository = "https://github.com/umccr/htsget-rs" [features] s3-storage = ["htsget-axum/s3-storage", "htsget-config/s3-storage", "htsget-search/s3-storage", "htsget-http/s3-storage", "htsget-test/s3-storage"] url-storage = ["htsget-axum/url-storage", "htsget-config/url-storage", "htsget-search/url-storage", "htsget-http/url-storage", "htsget-test/url-storage"] +c4gh-experimental = [ + "htsget-axum/c4gh-experimental", + "htsget-config/c4gh-experimental", + "htsget-search/c4gh-experimental", + "htsget-http/c4gh-experimental", + "htsget-test/c4gh-experimental" +] default = [] [dependencies] diff --git a/htsget-lambda/README.md b/htsget-lambda/README.md index 881ab236c..949393d23 100644 --- a/htsget-lambda/README.md +++ b/htsget-lambda/README.md @@ -46,6 +46,7 @@ library code, and it instead uses `htsget-axum`. Please use that crate for funct This crate has the following features: * `s3-storage`: used to enable `S3Storage` functionality. * `url-storage`: used to enable `UrlStorage` functionality. +* `c4gh-experimental`: used to enable `C4GHStorage` functionality. ## License diff --git a/htsget-search/Cargo.toml b/htsget-search/Cargo.toml index 52eccf363..95e2ed6e7 100644 --- a/htsget-search/Cargo.toml +++ b/htsget-search/Cargo.toml @@ -22,6 +22,11 @@ url-storage = [ "htsget-config/url-storage", "htsget-test/url-storage" ] +c4gh-experimental = [ + "htsget-storage/c4gh-experimental", + "htsget-config/c4gh-experimental", + "htsget-test/c4gh-experimental" +] default = [] [dependencies] diff --git a/htsget-search/README.md b/htsget-search/README.md index d8c77c165..400807ee4 100644 --- a/htsget-search/README.md +++ b/htsget-search/README.md @@ -59,6 +59,7 @@ used to process requests. This crate has the following features: * `s3-storage`: used to enable `S3Storage` functionality. * `url-storage`: used to enable `UrlStorage` functionality. +* `c4gh-experimental`: used to enable `C4GHStorage` functionality. ## Minimising Byte Ranges diff --git a/htsget-search/benches/search_benchmarks.rs b/htsget-search/benches/search_benchmarks.rs index d86dc78d1..0b4e71181 100644 --- a/htsget-search/benches/search_benchmarks.rs +++ b/htsget-search/benches/search_benchmarks.rs @@ -16,12 +16,13 @@ const BENCHMARK_DURATION_SECONDS: u64 = 30; const NUMBER_OF_SAMPLES: usize = 50; async fn perform_query(query: Query) -> Result<(), HtsGetError> { - HtsGetFromStorage::<()>::from_local( + HtsGetFromStorage::from_local( &ConfigLocalStorage::new( Scheme::Http, Authority::from_static("127.0.0.1:8081"), "../data".to_string(), "/data".to_string(), + Default::default(), ), &query, ) diff --git a/htsget-search/src/bam_search.rs b/htsget-search/src/bam_search.rs index 850bb17fc..4ccccf48d 100644 --- a/htsget-search/src/bam_search.rs +++ b/htsget-search/src/bam_search.rs @@ -21,22 +21,17 @@ use crate::search::{BgzfSearch, Search, SearchAll, SearchReads}; use crate::Class::Body; use crate::HtsGetError; use crate::{Format, Query, Result}; -use htsget_storage::{BytesPosition, Storage}; +use htsget_storage::{BytesPosition, Storage, Streamable}; -type AsyncReader = bam::AsyncReader>; +type AsyncReader = bam::AsyncReader>; /// Allows searching through bam files. -pub struct BamSearch { - storage: Arc, +pub struct BamSearch { + storage: Arc, } #[async_trait] -impl BgzfSearch, Header> - for BamSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ +impl BgzfSearch for BamSearch { #[instrument(level = "trace", skip(self, index))] async fn get_byte_ranges_for_unmapped( &self, @@ -62,28 +57,22 @@ where .with_class(Body)]) } - async fn read_bytes(reader: &mut AsyncReader) -> Option { + async fn read_bytes(reader: &mut AsyncReader) -> Option { reader.read_record(&mut Default::default()).await.ok() } - fn virtual_position(&self, reader: &AsyncReader) -> VirtualPosition { + fn virtual_position(&self, reader: &AsyncReader) -> VirtualPosition { reader.get_ref().virtual_position() } } #[async_trait] -impl - Search, Index, AsyncReader, Header> - for BamSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ - fn init_reader(inner: ReaderType) -> AsyncReader { +impl Search, Index, AsyncReader, Header> for BamSearch { + fn init_reader(inner: Streamable) -> AsyncReader { AsyncReader::new(inner) } - async fn read_header(reader: &mut AsyncReader) -> io::Result
{ + async fn read_header(reader: &mut AsyncReader) -> io::Result
{ reader.read_header().await } @@ -106,7 +95,7 @@ where .await } - fn get_storage(&self) -> Arc { + fn get_storage(&self) -> Arc { Arc::clone(&self.storage) } @@ -116,13 +105,7 @@ where } #[async_trait] -impl - SearchReads, Index, AsyncReader, Header> - for BamSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ +impl SearchReads, Index, AsyncReader, Header> for BamSearch { async fn get_reference_sequence_from_name<'a>( &self, header: &'a Header, @@ -151,32 +134,30 @@ where } } -impl BamSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ +impl BamSearch { /// Create the bam search. - pub fn new(storage: Arc) -> Self { + pub fn new(storage: Arc) -> Self { Self { storage } } } #[cfg(test)] pub(crate) mod tests { - use std::future::Future; - use htsget_config::storage::local::LocalStorage as ConfigLocalStorage; use htsget_test::http::concat::ConcatResponse; - use htsget_test::util::expected_bgzf_eof_data_url; + use std::future::Future; + use super::*; #[cfg(feature = "s3-storage")] use crate::from_storage::tests::with_aws_storage_fn; use crate::from_storage::tests::with_local_storage_fn; use crate::{Class::Body, Class::Header, Headers, HtsGetError::NotFound, Response, Url}; use htsget_storage::local::LocalStorage; - - use super::*; + #[cfg(feature = "c4gh-experimental")] + use { + crate::from_storage::tests::with_local_storage_c4gh, + htsget_storage::c4gh::storage::C4GHStorage, htsget_test::c4gh::get_decryption_keys, + }; const DATA_LOCATION: &str = "data/bam"; const INDEX_FILE_LOCATION: &str = "htsnexus_test_NA12878.bam.bai"; @@ -185,18 +166,15 @@ pub(crate) mod tests { #[tokio::test] async fn search_all_reads() { with_local_storage(|storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam); let response = search.search(query).await; println!("{response:#?}"); let expected_response = Ok(Response::new( Format::Bam, - vec![ - Url::new(expected_url()) - .with_headers(Headers::default().with_header("Range", "bytes=0-2596770")), - Url::new(expected_bgzf_eof_data_url()), - ], + vec![Url::new(expected_url()) + .with_headers(Headers::default().with_header("Range", "bytes=0-2596798"))], )); assert_eq!(response, expected_response); @@ -208,7 +186,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_unmapped_reads() { with_local_storage(|storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("*"); let response = search.search(query).await; @@ -221,9 +199,8 @@ pub(crate) mod tests { .with_headers(Headers::default().with_header("Range", "bytes=0-4667")) .with_class(Header), Url::new(expected_url()) - .with_headers(Headers::default().with_header("Range", "bytes=2060795-2596770")) + .with_headers(Headers::default().with_header("Range", "bytes=2060795-2596798")) .with_class(Body), - Url::new(expected_bgzf_eof_data_url()).with_class(Body), ], )); assert_eq!(response, expected_response); @@ -236,7 +213,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_without_seq_range_chr11() { with_local_storage(|storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11"); let response = search.search(query).await; @@ -247,7 +224,7 @@ pub(crate) mod tests { vec![ Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=0-996014")), - Url::new(expected_bgzf_eof_data_url()), + expected_eof_url().set_class(None), ], )); assert_eq!(response, expected_response); @@ -260,7 +237,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_without_seq_range_chr20() { with_local_storage(|storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("20"); let response = search.search(query).await; @@ -275,7 +252,7 @@ pub(crate) mod tests { Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=977196-2128165")) .with_class(Body), - Url::new(expected_bgzf_eof_data_url()).with_class(Body), + expected_eof_url(), ], )); assert_eq!(response, expected_response); @@ -288,7 +265,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_with_seq_range() { with_local_storage(|storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11") .with_start(5015000) @@ -311,7 +288,7 @@ pub(crate) mod tests { Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=977196-996014")) .with_class(Body), - Url::new(expected_bgzf_eof_data_url()).with_class(Body), + expected_eof_url(), ], )); assert_eq!(response, expected_response); @@ -324,7 +301,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_no_end_position() { with_local_storage(|storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11") .with_start(5015000); @@ -340,7 +317,7 @@ pub(crate) mod tests { Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=256721-996014")) .with_class(Body), - Url::new(expected_bgzf_eof_data_url()).with_class(Body), + expected_eof_url(), ], )); assert_eq!(response, expected_response); @@ -353,7 +330,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_many_response_urls() { with_local_storage(|storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11") .with_start(4999976) @@ -374,7 +351,7 @@ pub(crate) mod tests { .with_headers(Headers::default().with_header("Range", "bytes=824361-842100")), Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=977196-996014")), - Url::new(expected_bgzf_eof_data_url()), + expected_eof_url().set_class(None), ], )); assert_eq!(response, expected_response); @@ -388,7 +365,7 @@ pub(crate) mod tests { async fn search_no_gzi() { with_local_storage_fn( |storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("11") .with_start(5015000) @@ -405,7 +382,7 @@ pub(crate) mod tests { Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=256721-1065951")) .with_class(Body), - Url::new(expected_bgzf_eof_data_url()).with_class(Body), + expected_eof_url(), ], )); assert_eq!(response, expected_response); @@ -421,7 +398,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_header() { with_local_storage(|storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam).with_class(Header); let response = search.search(query).await; @@ -446,7 +423,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_header_with_no_mapped_reads() { with_local_storage(|storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("22"); let response = search.search(query).await; @@ -458,7 +435,7 @@ pub(crate) mod tests { Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=0-4667")) .with_class(Header), - Url::new(expected_bgzf_eof_data_url()).with_class(Body), + expected_eof_url(), ], )); assert_eq!(response, expected_response); @@ -471,7 +448,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_header_with_non_existent_reference_name() { with_local_storage(|storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("25"); let response = search.search(query).await; @@ -488,7 +465,7 @@ pub(crate) mod tests { async fn search_non_existent_id_reference_name() { with_local_storage_fn( |storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -505,7 +482,7 @@ pub(crate) mod tests { async fn search_non_existent_id_all_reads() { with_local_storage_fn( |storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("20"); let response = search.search(query).await; @@ -523,7 +500,7 @@ pub(crate) mod tests { async fn search_non_existent_id_header() { with_local_storage_fn( |storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam).with_class(Header); let response = search.search(query).await; @@ -541,7 +518,7 @@ pub(crate) mod tests { async fn get_header_end_offset() { with_local_storage_fn( |storage| async move { - let search = BamSearch::new(storage.clone()); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam).with_class(Header); @@ -563,7 +540,7 @@ pub(crate) mod tests { async fn search_non_existent_id_reference_name_aws() { with_aws_storage_fn( |storage| async move { - let search = BamSearch::new(storage); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam); let response = search.search(query).await; assert!(response.is_err()); @@ -581,7 +558,7 @@ pub(crate) mod tests { async fn search_non_existent_id_all_reads_aws() { with_aws_storage_fn( |storage| async move { - let search = BamSearch::new(storage); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) .with_reference_name("20"); let response = search.search(query).await; @@ -600,7 +577,7 @@ pub(crate) mod tests { async fn search_non_existent_id_header_aws() { with_aws_storage_fn( |storage| async move { - let search = BamSearch::new(storage); + let search = BamSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam).with_class(Header); let response = search.search(query).await; @@ -614,6 +591,47 @@ pub(crate) mod tests { .await } + #[cfg(feature = "c4gh-experimental")] + #[tokio::test] + async fn search_all_c4gh() { + with_local_storage_c4gh(|storage| async move { + let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let search = BamSearch::new(Arc::new(Storage::new(storage))); + let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam); + let response = search.search(query).await.unwrap(); + + println!("{:#?}", response); + + Some(( + "htsnexus_test_NA12878.bam.c4gh".to_string(), + (response, Body).into(), + )) + }) + .await; + } + + #[cfg(feature = "c4gh-experimental")] + #[tokio::test] + async fn search_all_range_c4gh() { + with_local_storage_c4gh(|storage| async move { + let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let search = BamSearch::new(Arc::new(Storage::new(storage))); + let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam) + .with_reference_name("11") + .with_start(5015000) + .with_end(5050000); + let response = search.search(query).await.unwrap(); + + println!("{:#?}", response); + + Some(( + "htsnexus_test_NA12878.bam.c4gh".to_string(), + (response, Body).into(), + )) + }) + .await; + } + pub(crate) async fn with_local_storage(test: F) where F: FnOnce(Arc>) -> Fut, @@ -625,4 +643,10 @@ pub(crate) mod tests { pub(crate) fn expected_url() -> String { "http://127.0.0.1:8081/data/htsnexus_test_NA12878.bam".to_string() } + + pub(crate) fn expected_eof_url() -> Url { + Url::new(expected_url()) + .with_headers(Headers::default().with_header("Range", "bytes=2596771-2596798")) + .with_class(Body) + } } diff --git a/htsget-search/src/bcf_search.rs b/htsget-search/src/bcf_search.rs index ef9e7f8e9..6ebcde947 100644 --- a/htsget-search/src/bcf_search.rs +++ b/htsget-search/src/bcf_search.rs @@ -18,44 +18,33 @@ use tracing::{instrument, trace}; use crate::search::{find_first, BgzfSearch, Search}; use crate::{Format, Query, Result}; -use htsget_storage::{BytesPosition, Storage}; +use htsget_storage::{BytesPosition, Storage, Streamable}; -type AsyncReader = bcf::AsyncReader>; +type AsyncReader = bcf::AsyncReader>; /// Allows searching through bcf files. -pub struct BcfSearch { - storage: Arc, +pub struct BcfSearch { + storage: Arc, } #[async_trait] -impl BgzfSearch, Header> - for BcfSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ - async fn read_bytes(reader: &mut AsyncReader) -> Option { +impl BgzfSearch for BcfSearch { + async fn read_bytes(reader: &mut AsyncReader) -> Option { reader.read_record(&mut Default::default()).await.ok() } - fn virtual_position(&self, reader: &AsyncReader) -> VirtualPosition { + fn virtual_position(&self, reader: &AsyncReader) -> VirtualPosition { reader.get_ref().virtual_position() } } #[async_trait] -impl - Search, Index, AsyncReader, Header> - for BcfSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ - fn init_reader(inner: ReaderType) -> AsyncReader { +impl Search, Index, AsyncReader, Header> for BcfSearch { + fn init_reader(inner: Streamable) -> AsyncReader { AsyncReader::new(inner) } - async fn read_header(reader: &mut AsyncReader) -> io::Result
{ + async fn read_header(reader: &mut AsyncReader) -> io::Result
{ reader.read_header().await } @@ -98,7 +87,7 @@ where Ok(byte_ranges) } - fn get_storage(&self) -> Arc { + fn get_storage(&self) -> Arc { self.storage.clone() } @@ -107,13 +96,9 @@ where } } -impl BcfSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ +impl BcfSearch { /// Create the bcf search. - pub fn new(storage: Arc) -> Self { + pub fn new(storage: Arc) -> Self { Self { storage } } } @@ -125,16 +110,19 @@ mod tests { use htsget_config::storage::local::LocalStorage as ConfigLocalStorage; use htsget_config::types::Class::Body; use htsget_test::http::concat::ConcatResponse; - use htsget_test::util::expected_bgzf_eof_data_url; + use super::*; #[cfg(feature = "s3-storage")] use crate::from_storage::tests::with_aws_storage_fn; use crate::from_storage::tests::with_local_storage_fn; use crate::search::SearchAll; use crate::{Class::Header, Headers, HtsGetError::NotFound, Response, Url}; use htsget_storage::local::LocalStorage; - - use super::*; + #[cfg(feature = "c4gh-experimental")] + use { + crate::from_storage::tests::with_local_storage_c4gh, + htsget_storage::c4gh::storage::C4GHStorage, htsget_test::c4gh::get_decryption_keys, + }; const DATA_LOCATION: &str = "data/bcf"; const INDEX_FILE_LOCATION: &str = "vcf-spec-v4.3.bcf.csi"; @@ -144,7 +132,7 @@ mod tests { #[tokio::test] async fn search_all_variants() { with_local_storage(|storage| async move { - let search = BcfSearch::new(storage.clone()); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Bcf); let response = search.search(query).await; @@ -164,7 +152,7 @@ mod tests { #[tokio::test] async fn search_reference_name_without_seq_range() { with_local_storage(|storage| async move { - let search = BcfSearch::new(storage.clone()); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let filename = "vcf-spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Bcf).with_reference_name("20"); let response = search.search(query).await; @@ -172,11 +160,8 @@ mod tests { let expected_response = Ok(Response::new( Format::Bcf, - vec![ - Url::new(expected_url(filename)) - .with_headers(Headers::default().with_header("Range", "bytes=0-949")), - Url::new(expected_bgzf_eof_data_url()), - ], + vec![Url::new(expected_url(filename)) + .with_headers(Headers::default().with_header("Range", "bytes=0-977"))], )); assert_eq!(response, expected_response); @@ -199,7 +184,7 @@ mod tests { #[tokio::test] async fn search_reference_name_no_end_position() { with_local_storage(|storage| async move { - let search = BcfSearch::new(storage.clone()); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Bcf) .with_reference_name("chrM") @@ -231,7 +216,7 @@ mod tests { #[tokio::test] async fn search_header() { with_local_storage(|storage| async move { - let search = BcfSearch::new(storage.clone()); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let filename = "vcf-spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Bcf).with_class(Header); let response = search.search(query).await; @@ -257,7 +242,7 @@ mod tests { async fn search_non_existent_id_reference_name() { with_local_storage_fn( |storage| async move { - let search = BcfSearch::new(storage.clone()); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -274,7 +259,7 @@ mod tests { async fn search_non_existent_id_all_reads() { with_local_storage_fn( |storage| async move { - let search = BcfSearch::new(storage.clone()); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_reference_name("chrM"); let response = search.search(query).await; @@ -292,7 +277,7 @@ mod tests { async fn search_non_existent_id_header() { with_local_storage_fn( |storage| async move { - let search = BcfSearch::new(storage.clone()); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_class(Header); let response = search.search(query).await; @@ -309,7 +294,7 @@ mod tests { #[tokio::test] async fn search_header_with_non_existent_reference_name() { with_local_storage(|storage| async move { - let search = BcfSearch::new(storage.clone()); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_reference_name("chr1"); let response = search.search(query).await; @@ -326,7 +311,7 @@ mod tests { async fn get_header_end_offset() { with_local_storage_fn( |storage| async move { - let search = BcfSearch::new(storage.clone()); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_class(Header); @@ -348,7 +333,7 @@ mod tests { async fn search_non_existent_id_reference_name_aws() { with_aws_storage_fn( |storage| async move { - let search = BcfSearch::new(storage); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf); let response = search.search(query).await; assert!(response.is_err()); @@ -366,7 +351,7 @@ mod tests { async fn search_non_existent_id_all_reads_aws() { with_aws_storage_fn( |storage| async move { - let search = BcfSearch::new(storage); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_reference_name("chrM"); let response = search.search(query).await; @@ -385,7 +370,7 @@ mod tests { async fn search_non_existent_id_header_aws() { with_aws_storage_fn( |storage| async move { - let search = BcfSearch::new(storage); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("vcf-spec-v4.3", Format::Bcf).with_class(Header); let response = search.search(query).await; @@ -399,10 +384,51 @@ mod tests { .await } + #[cfg(feature = "c4gh-experimental")] + #[tokio::test] + async fn search_all_c4gh() { + with_local_storage_c4gh(|storage| async move { + let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let search = BcfSearch::new(Arc::new(Storage::new(storage))); + let query = Query::new_with_default_request("sample1-bcbio-cancer", Format::Bcf); + let response = search.search(query).await.unwrap(); + + println!("{:#?}", response); + + Some(( + "sample1-bcbio-cancer.bcf.c4gh".to_string(), + (response, Body).into(), + )) + }) + .await; + } + + #[cfg(feature = "c4gh-experimental")] + #[tokio::test] + async fn search_range_c4gh() { + with_local_storage_c4gh(|storage| async move { + let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let search = BcfSearch::new(Arc::new(Storage::new(storage))); + let query = Query::new_with_default_request("sample1-bcbio-cancer", Format::Bcf) + .with_reference_name("chrM") + .with_start(150) + .with_end(153); + let response = search.search(query).await.unwrap(); + + println!("{:#?}", response); + + Some(( + "sample1-bcbio-cancer.bcf.c4gh".to_string(), + (response, Body).into(), + )) + }) + .await; + } + async fn test_reference_sequence_with_seq_range( storage: Arc>, ) -> Option<(String, ConcatResponse)> { - let search = BcfSearch::new(storage.clone()); + let search = BcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Bcf) .with_reference_name("chrM") @@ -423,11 +449,8 @@ mod tests { fn expected_bcf_response(filename: &str) -> Response { Response::new( Format::Bcf, - vec![ - Url::new(expected_url(filename)) - .with_headers(Headers::default().with_header("Range", "bytes=0-3529")), - Url::new(expected_bgzf_eof_data_url()), - ], + vec![Url::new(expected_url(filename)) + .with_headers(Headers::default().with_header("Range", "bytes=0-3557"))], ) } diff --git a/htsget-search/src/cram_search.rs b/htsget-search/src/cram_search.rs index 0613e7d65..b128d2f89 100644 --- a/htsget-search/src/cram_search.rs +++ b/htsget-search/src/cram_search.rs @@ -23,7 +23,7 @@ use crate::search::{Search, SearchAll, SearchReads}; use crate::Class::Body; use crate::{ConcurrencyError, ParsedHeader}; use crate::{Format, HtsGetError, Query, Result}; -use htsget_storage::{BytesPosition, DataBlock, Storage}; +use htsget_storage::{BytesPosition, DataBlock, Storage, Streamable}; // § 9 End of file container . static CRAM_EOF: &[u8] = &[ @@ -32,21 +32,15 @@ static CRAM_EOF: &[u8] = &[ 0x01, 0x00, 0xee, 0x63, 0x01, 0x4b, ]; -type AsyncReader = cram::AsyncReader>; +type AsyncReader = cram::AsyncReader>; /// Allows searching through cram files. -pub struct CramSearch { - storage: Arc, +pub struct CramSearch { + storage: Arc, } #[async_trait] -impl - SearchAll, Index, AsyncReader, Header> - for CramSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ +impl SearchAll, Index, AsyncReader, Header> for CramSearch { #[instrument(level = "trace", skip_all, ret)] async fn get_byte_ranges_for_all(&self, query: &Query) -> Result> { Ok(vec![ @@ -72,7 +66,7 @@ where async fn get_byte_ranges_for_header( &self, index: &Index, - _reader: &mut AsyncReader, + _reader: &mut AsyncReader, _query: &Query, ) -> Result { Ok( @@ -95,13 +89,7 @@ where } #[async_trait] -impl - SearchReads, Index, AsyncReader, Header> - for CramSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ +impl SearchReads, Index, AsyncReader, Header> for CramSearch { async fn get_reference_sequence_from_name<'a>( &self, header: &'a Header, @@ -142,17 +130,12 @@ where /// PhantomData is used because of a lack of reference sequence data for CRAM. #[async_trait] -impl Search, Index, AsyncReader, Header> - for CramSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ - fn init_reader(inner: ReaderType) -> AsyncReader { +impl Search, Index, AsyncReader, Header> for CramSearch { + fn init_reader(inner: Streamable) -> AsyncReader { AsyncReader::new(BufReader::new(inner)) } - async fn read_header(reader: &mut AsyncReader) -> io::Result
{ + async fn read_header(reader: &mut AsyncReader) -> io::Result
{ reader.read_file_definition().await?; Ok( @@ -180,7 +163,7 @@ where .await } - fn get_storage(&self) -> Arc { + fn get_storage(&self) -> Arc { self.storage.clone() } @@ -189,13 +172,9 @@ where } } -impl CramSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ +impl CramSearch { /// Create the cram search. - pub fn new(storage: Arc) -> Self { + pub fn new(storage: Arc) -> Self { Self { storage } } @@ -292,15 +271,18 @@ mod tests { use htsget_config::storage::local::LocalStorage as ConfigLocalStorage; use htsget_test::http::concat::ConcatResponse; - use htsget_test::util::expected_cram_eof_data_url; + use super::*; #[cfg(feature = "s3-storage")] use crate::from_storage::tests::with_aws_storage_fn; use crate::from_storage::tests::with_local_storage_fn; use crate::{Class::Header, Headers, HtsGetError::NotFound, Response, Url}; use htsget_storage::local::LocalStorage; - - use super::*; + #[cfg(feature = "c4gh-experimental")] + use { + crate::from_storage::tests::with_local_storage_c4gh, + htsget_storage::c4gh::storage::C4GHStorage, htsget_test::c4gh::get_decryption_keys, + }; const DATA_LOCATION: &str = "data/cram"; const INDEX_FILE_LOCATION: &str = "htsnexus_test_NA12878.cram.crai"; @@ -309,18 +291,15 @@ mod tests { #[tokio::test] async fn search_all_reads() { with_local_storage(|storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram); let response = search.search(query).await; println!("{response:#?}"); let expected_response = Ok(Response::new( Format::Cram, - vec![ - Url::new(expected_url()) - .with_headers(Headers::default().with_header("Range", "bytes=0-1672409")), - Url::new(expected_cram_eof_data_url()), - ], + vec![Url::new(expected_url()) + .with_headers(Headers::default().with_header("Range", "bytes=0-1672447"))], )); assert_eq!(response, expected_response); @@ -332,7 +311,7 @@ mod tests { #[tokio::test] async fn search_unmapped_reads() { with_local_storage(|storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("*"); let response = search.search(query).await; @@ -345,9 +324,8 @@ mod tests { .with_headers(Headers::default().with_header("Range", "bytes=0-6133")) .with_class(Header), Url::new(expected_url()) - .with_headers(Headers::default().with_header("Range", "bytes=1324614-1672409")) + .with_headers(Headers::default().with_header("Range", "bytes=1324614-1672447")) .with_class(Body), - Url::new(expected_cram_eof_data_url()).with_class(Body), ], )); assert_eq!(response, expected_response); @@ -360,7 +338,7 @@ mod tests { #[tokio::test] async fn search_reference_name_without_seq_range_chr11() { with_local_storage(|storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("11"); let response = search.search(query).await; @@ -371,7 +349,7 @@ mod tests { vec![ Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=0-625727")), - Url::new(expected_cram_eof_data_url()), + expected_eof_url().set_class(None), ], )); assert_eq!(response, expected_response); @@ -384,7 +362,7 @@ mod tests { #[tokio::test] async fn search_reference_name_without_seq_range_chr20() { with_local_storage(|storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("20"); let response = search.search(query).await; @@ -399,7 +377,7 @@ mod tests { Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=625728-1324613")) .with_class(Body), - Url::new(expected_cram_eof_data_url()).with_class(Body), + expected_eof_url(), ], )); assert_eq!(response, expected_response); @@ -412,7 +390,7 @@ mod tests { #[tokio::test] async fn search_reference_name_with_seq_range_no_overlap() { with_local_storage(|storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("11") .with_start(5000000) @@ -425,7 +403,7 @@ mod tests { vec![ Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=0-480537")), - Url::new(expected_cram_eof_data_url()), + expected_eof_url().set_class(None), ], )); assert_eq!(response, expected_response); @@ -438,7 +416,7 @@ mod tests { #[tokio::test] async fn search_reference_name_with_seq_range_overlap() { with_local_storage(|storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("11") .with_start(5000000) @@ -457,7 +435,7 @@ mod tests { #[tokio::test] async fn search_reference_name_with_no_end_position() { with_local_storage(|storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("11") .with_start(5000000); @@ -478,7 +456,7 @@ mod tests { vec![ Url::new(expected_url()) .with_headers(Headers::default().with_header("Range", "bytes=0-625727")), - Url::new(expected_cram_eof_data_url()), + expected_eof_url().set_class(None), ], ) } @@ -486,7 +464,7 @@ mod tests { #[tokio::test] async fn search_header() { with_local_storage(|storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram).with_class(Header); let response = search.search(query).await; @@ -512,7 +490,7 @@ mod tests { async fn search_non_existent_id_reference_name() { with_local_storage_fn( |storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -529,7 +507,7 @@ mod tests { async fn search_non_existent_id_all_reads() { with_local_storage_fn( |storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("20"); let response = search.search(query).await; @@ -547,7 +525,7 @@ mod tests { async fn search_non_existent_id_header() { with_local_storage_fn( |storage| async move { - let search = CramSearch::new(storage.clone()); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram).with_class(Header); let response = search.search(query).await; @@ -566,7 +544,7 @@ mod tests { async fn search_non_existent_id_reference_name_aws() { with_aws_storage_fn( |storage| async move { - let search = CramSearch::new(storage); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram); let response = search.search(query).await; assert!(response.is_err()); @@ -584,7 +562,7 @@ mod tests { async fn search_non_existent_id_all_reads_aws() { with_aws_storage_fn( |storage| async move { - let search = CramSearch::new(storage); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) .with_reference_name("20"); let response = search.search(query).await; @@ -603,7 +581,7 @@ mod tests { async fn search_non_existent_id_header_aws() { with_aws_storage_fn( |storage| async move { - let search = CramSearch::new(storage); + let search = CramSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram).with_class(Header); let response = search.search(query).await; @@ -617,6 +595,47 @@ mod tests { .await } + #[cfg(feature = "c4gh-experimental")] + #[tokio::test] + async fn search_all_c4gh() { + with_local_storage_c4gh(|storage| async move { + let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let search = CramSearch::new(Arc::new(Storage::new(storage))); + let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram); + let response = search.search(query).await.unwrap(); + + println!("{:#?}", response); + + Some(( + "htsnexus_test_NA12878.cram.c4gh".to_string(), + (response, Body).into(), + )) + }) + .await; + } + + #[cfg(feature = "c4gh-experimental")] + #[tokio::test] + async fn search_range_c4gh() { + with_local_storage_c4gh(|storage| async move { + let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let search = CramSearch::new(Arc::new(Storage::new(storage))); + let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Cram) + .with_reference_name("11") + .with_start(5000000) + .with_end(5050000); + let response = search.search(query).await.unwrap(); + + println!("{:#?}", response); + + Some(( + "htsnexus_test_NA12878.cram.c4gh".to_string(), + (response, Body).into(), + )) + }) + .await; + } + async fn with_local_storage(test: F) where F: FnOnce(Arc>) -> Fut, @@ -628,4 +647,10 @@ mod tests { fn expected_url() -> String { "http://127.0.0.1:8081/data/htsnexus_test_NA12878.cram".to_string() } + + pub(crate) fn expected_eof_url() -> Url { + Url::new(expected_url()) + .with_headers(Headers::default().with_header("Range", "bytes=1672410-1672447")) + .with_class(Body) + } } diff --git a/htsget-search/src/from_storage.rs b/htsget-search/src/from_storage.rs index 4e6cea59a..04d6cc0d2 100644 --- a/htsget-search/src/from_storage.rs +++ b/htsget-search/src/from_storage.rs @@ -1,22 +1,18 @@ -//! Module providing an implementation of the [HtsGet] trait using a [Storage]. +//! Module providing an implementation of the [HtsGet] trait using a [StorageTrait]. //! use std::sync::Arc; use async_trait::async_trait; -use tokio::io::AsyncRead; use tracing::debug; use tracing::instrument; use htsget_config::resolver::{ResolveResponse, StorageResolver}; use htsget_config::storage::local::LocalStorage as LocalStorageConfig; #[cfg(feature = "s3-storage")] -use {htsget_config::storage::s3::S3Storage as S3StorageConfig, htsget_storage::s3::S3Storage}; +use htsget_config::storage::s3::S3Storage as S3StorageConfig; #[cfg(feature = "url-storage")] -use { - htsget_config::storage::url::UrlStorageClient as UrlStorageConfig, - htsget_storage::url::UrlStorage, -}; +use htsget_config::storage::url::UrlStorageClient as UrlStorageConfig; use crate::search::Search; use crate::Resolver; @@ -28,13 +24,11 @@ use crate::{ {HtsGet, Query, Response, Result}, }; use crate::{Format, HtsGetError}; -use htsget_storage::local::LocalStorage; use htsget_storage::Storage; -/// Implementation of the [HtsGet] trait using a [Storage]. -#[derive(Debug, Clone)] -pub struct HtsGetFromStorage { - storage_ref: Arc, +/// Implementation of the [HtsGet] trait using a [StorageTrait]. +pub struct HtsGetFromStorage { + storage_ref: Arc, } #[async_trait] @@ -48,18 +42,14 @@ impl HtsGet for Vec { impl HtsGet for &[Resolver] { async fn search(&self, mut query: Query) -> Result { self - .resolve_request::>(&mut query) + .resolve_request::(&mut query) .await .ok_or_else(|| HtsGetError::not_found("failed to match query with storage"))? } } #[async_trait] -impl HtsGet for HtsGetFromStorage -where - R: AsyncRead + Send + Sync + Unpin, - S: Storage + Sync + Send + 'static, -{ +impl HtsGet for HtsGetFromStorage { #[instrument(level = "debug", skip(self))] async fn search(&self, query: Query) -> Result { debug!(format = ?query.format(), ?query, "searching {:?}, with query {:?}", query.format(), query); @@ -73,51 +63,39 @@ where } #[async_trait] -impl ResolveResponse for HtsGetFromStorage { +impl ResolveResponse for HtsGetFromStorage { async fn from_local( local_storage_config: &LocalStorageConfig, query: &Query, ) -> Result { - let local_storage = local_storage_config.clone(); - let path = local_storage.local_path().to_string(); - let searcher = HtsGetFromStorage::new(LocalStorage::new(path, local_storage)?); + let storage = Storage::from_local(local_storage_config).await?; + let searcher = HtsGetFromStorage::new(storage); searcher.search(query.clone()).await } #[cfg(feature = "s3-storage")] async fn from_s3(s3_storage: &S3StorageConfig, query: &Query) -> Result { - let searcher = HtsGetFromStorage::new( - S3Storage::new_with_default_config( - s3_storage.bucket().to_string(), - s3_storage.clone().endpoint(), - s3_storage.clone().path_style(), - ) - .await, - ); + let storage = Storage::from_s3(s3_storage).await; + let searcher = HtsGetFromStorage::new(storage); searcher.search(query.clone()).await } #[cfg(feature = "url-storage")] async fn from_url(url_storage_config: &UrlStorageConfig, query: &Query) -> Result { - let searcher = HtsGetFromStorage::new(UrlStorage::new( - url_storage_config.client_cloned(), - url_storage_config.url().clone(), - url_storage_config.response_url().clone(), - url_storage_config.forward_headers(), - url_storage_config.header_blacklist().to_vec(), - )); + let storage = Storage::from_url(url_storage_config).await; + let searcher = HtsGetFromStorage::new(storage); searcher.search(query.clone()).await } } -impl HtsGetFromStorage { - pub fn new(storage: S) -> Self { +impl HtsGetFromStorage { + pub fn new(storage: Storage) -> Self { Self { storage_ref: Arc::new(storage), } } - pub fn storage(&self) -> Arc { + pub fn storage(&self) -> Arc { Arc::clone(&self.storage_ref) } } @@ -128,7 +106,9 @@ pub(crate) mod tests { use std::future::Future; use std::path::{Path, PathBuf}; #[cfg(feature = "s3-storage")] - use {htsget_test::aws_mocks::with_s3_test_server, std::fs::create_dir}; + use { + htsget_storage::s3::S3Storage, htsget_test::aws_mocks::with_s3_test_server, std::fs::create_dir, + }; use http::uri::Authority; use tempfile::TempDir; @@ -136,8 +116,10 @@ pub(crate) mod tests { use htsget_config::storage; use htsget_config::types::Class::Body; use htsget_config::types::Scheme::Http; + use htsget_storage::local::LocalStorage; + #[cfg(feature = "c4gh-experimental")] + use htsget_test::c4gh::decrypt_data; use htsget_test::http::concat::ConcatResponse; - use htsget_test::util::expected_bgzf_eof_data_url; use crate::bam_search::tests::{ expected_url as bam_expected_url, with_local_storage as with_bam_local_storage, BAM_FILE_NAME, @@ -153,18 +135,16 @@ pub(crate) mod tests { #[tokio::test] async fn search_bam() { with_bam_local_storage(|storage| async move { - let htsget = HtsGetFromStorage::new(Arc::try_unwrap(storage).unwrap()); + let storage = Arc::try_unwrap(storage).unwrap(); + let htsget = HtsGetFromStorage::new(Storage::new(storage)); let query = Query::new_with_default_request("htsnexus_test_NA12878", Format::Bam); let response = htsget.search(query).await; println!("{response:#?}"); let expected_response = Ok(Response::new( Format::Bam, - vec![ - Url::new(bam_expected_url()) - .with_headers(Headers::default().with_header("Range", "bytes=0-2596770")), - Url::new(expected_bgzf_eof_data_url()), - ], + vec![Url::new(bam_expected_url()) + .with_headers(Headers::default().with_header("Range", "bytes=0-2596798"))], )); assert_eq!(response, expected_response); @@ -176,7 +156,8 @@ pub(crate) mod tests { #[tokio::test] async fn search_vcf() { with_vcf_local_storage(|storage| async move { - let htsget = HtsGetFromStorage::new(Arc::try_unwrap(storage).unwrap()); + let storage = Arc::try_unwrap(storage).unwrap(); + let htsget = HtsGetFromStorage::new(Storage::new(storage)); let filename = "spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Vcf); let response = htsget.search(query).await; @@ -198,7 +179,7 @@ pub(crate) mod tests { |_, local_storage| async move { let filename = "spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Vcf); - let response = HtsGetFromStorage::<()>::from_local(&local_storage, &query).await; + let response = HtsGetFromStorage::from_local(&local_storage, &query).await; assert_eq!(response, expected_vcf_response(filename)); @@ -245,11 +226,8 @@ pub(crate) mod tests { fn expected_vcf_response(filename: &str) -> Result { Ok(Response::new( Format::Vcf, - vec![ - Url::new(vcf_expected_url(filename)) - .with_headers(Headers::default().with_header("Range", "bytes=0-822")), - Url::new(expected_bgzf_eof_data_url()), - ], + vec![Url::new(vcf_expected_url(filename)) + .with_headers(Headers::default().with_header("Range", "bytes=0-850"))], )) } @@ -270,10 +248,15 @@ pub(crate) mod tests { base_path } - async fn with_config_local_storage(test: F, path: &str, copy_files: &[&str]) - where + async fn with_config_local_storage_map( + test: F, + path: &str, + copy_files: &[&str], + map: M, + ) where F: FnOnce(PathBuf, LocalStorageConfig) -> Fut, Fut: Future>, + M: FnOnce(&[u8]) -> Vec, { let tmp_dir = TempDir::new().unwrap(); let base_path = copy_files_from(path, tmp_dir.path(), copy_files).await; @@ -286,22 +269,35 @@ pub(crate) mod tests { Authority::from_static("127.0.0.1:8081"), base_path.to_str().unwrap().to_string(), "/data".to_string(), + Default::default(), ), ) .await; - read_records(response, &base_path).await; + read_records(response, &base_path, map).await; + } + + async fn with_config_local_storage(test: F, path: &str, copy_files: &[&str]) + where + F: FnOnce(PathBuf, LocalStorageConfig) -> Fut, + Fut: Future>, + { + with_config_local_storage_map(test, path, copy_files, |b| b.to_vec()).await; } - async fn read_records(response: Option<(String, ConcatResponse)>, base_path: &Path) { + async fn read_records(response: Option<(String, ConcatResponse)>, base_path: &Path, map: F) + where + F: FnOnce(&[u8]) -> Vec, + { if let Some((target_file, response)) = response { - response + let records = response .concat_from_file_path(&base_path.join(target_file)) .await - .unwrap() - .read_records() - .await .unwrap(); + + let bytes = map(records.merged_bytes()); + + records.set_bytes(bytes).read_records().await.unwrap(); } } @@ -323,6 +319,26 @@ pub(crate) mod tests { .await; } + #[cfg(feature = "c4gh-experimental")] + pub(crate) async fn with_local_storage_c4gh(test: F) + where + F: FnOnce(Arc>) -> Fut, + Fut: Future>, + { + with_config_local_storage_map( + |base_path, local_storage| async { + test(Arc::new( + LocalStorage::new(base_path, local_storage).unwrap(), + )) + .await + }, + "data/c4gh", + &[], + decrypt_data, + ) + .await; + } + #[cfg(feature = "s3-storage")] pub(crate) async fn with_aws_storage_fn(test: F, path: &str, copy_files: &[&str]) where @@ -338,7 +354,7 @@ pub(crate) mod tests { with_aws_s3_storage_fn( |storage| async { let response = test(storage).await; - read_records(response, &base_path).await; + read_records(response, &base_path, |b| b.to_vec()).await; }, "folder".to_string(), base_path.parent().unwrap(), diff --git a/htsget-search/src/lib.rs b/htsget-search/src/lib.rs index 981e7bbf0..27522a5a5 100644 --- a/htsget-search/src/lib.rs +++ b/htsget-search/src/lib.rs @@ -7,10 +7,11 @@ pub use htsget_config::config::{Config, DataServerConfig, ServiceInfo, TicketSer pub use htsget_config::resolver::{ IdResolver, QueryAllowed, ResolveResponse, Resolver, StorageResolver, }; -pub use htsget_config::storage::Storage; +pub use htsget_config::storage::Storage as ConfigStorage; pub use htsget_config::types::{ Class, Format, Headers, HtsGetError, JsonResponse, Query, Response, Result, Url, }; +pub use htsget_storage::Storage; pub use htsget_storage::local::LocalStorage; diff --git a/htsget-search/src/search.rs b/htsget-search/src/search.rs index 15767b3b2..664775cff 100644 --- a/htsget-search/src/search.rs +++ b/htsget-search/src/search.rs @@ -27,7 +27,10 @@ use htsget_config::types::Class::Header; use crate::ConcurrencyError; use crate::{Class, Class::Body, Format, HtsGetError, Query, Response, Result}; -use htsget_storage::{BytesPosition, HeadOptions, RangeUrlOptions, Storage}; +use htsget_storage::{ + BytesPosition, BytesPositionOptions, HeadOptions, RangeUrlOptions, Storage, StorageTrait, + Streamable, +}; use htsget_storage::{DataBlock, GetOptions}; // § 4.1.2 End-of-file marker . @@ -68,7 +71,7 @@ pub(crate) async fn find_first( /// [Reader] is the format's reader type. /// [Header] is the format's header type. #[async_trait] -pub trait SearchAll +pub trait SearchAll where Index: Send + Sync, { @@ -91,6 +94,28 @@ where /// Get the eof data block for this format. fn get_eof_data_block(&self) -> Option; + + /// Get the eof bytes positions converting from a data block. + fn get_eof_byte_positions(&self, file_size: u64) -> Option> { + if let Some(DataBlock::Data(data, class)) = self.get_eof_data_block() { + let data_len = + u64::try_from(data.len()).map_err(|err| HtsGetError::InvalidInput(err.to_string())); + + return match data_len { + Ok(data_len) => { + let bytes_position = BytesPosition::default() + .with_start(file_size - data_len) + .with_end(file_size); + let bytes_position = bytes_position.set_class(class); + + Some(Ok(bytes_position)) + } + Err(err) => Some(Err(err)), + }; + } + + None + } } /// [SearchReads] represents searching bytes ranges for the reads endpoint. @@ -102,11 +127,9 @@ where /// [Reader] is the format's reader type. /// [Header] is the format's header type. #[async_trait] -pub trait SearchReads: - Search +pub trait SearchReads: + Search where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, Reader: Send, Header: Send + Sync, Index: Send + Sync, @@ -170,17 +193,15 @@ where /// [Reader] is the format's reader type. /// [Header] is the format's header type. #[async_trait] -pub trait Search: - SearchAll +pub trait Search: + SearchAll where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, Index: Send + Sync, Header: Send + Sync, Reader: Send, Self: Sync + Send, { - fn init_reader(inner: ReaderType) -> Reader; + fn init_reader(inner: Streamable) -> Reader; async fn read_header(reader: &mut Reader) -> io::Result
; async fn read_index_inner(inner: T) -> io::Result; @@ -194,7 +215,7 @@ where ) -> Result>; /// Get the storage of this format. - fn get_storage(&self) -> Arc; + fn get_storage(&self) -> Arc; /// Get the format of this format. fn get_format(&self) -> Format; @@ -205,7 +226,7 @@ where let file_size = self .get_storage() .head( - query.format().fmt_file(query.id()), + &query.format().fmt_file(query.id()), HeadOptions::new(query.request().headers()), ) .await?; @@ -223,7 +244,7 @@ where let storage = self .get_storage() .get( - query.format().fmt_index(query.id()), + &query.format().fmt_index(query.id()), GetOptions::new_with_default_range(query.request().headers()), ) .await?; @@ -245,7 +266,7 @@ where ))); } - let byte_ranges = match query.reference_name().as_ref() { + let mut byte_ranges = match query.reference_name().as_ref() { None => self.get_byte_ranges_for_all(&query).await?, Some(reference_name) => { let index = self.read_index(&query).await?; @@ -272,11 +293,25 @@ where } }; - let mut blocks = DataBlock::from_bytes_positions(byte_ranges); - if let Some(eof) = self.get_eof_data_block() { - blocks.push(eof); + let file_size = self + .get_storage() + .head( + &query.format().fmt_file(query.id()), + HeadOptions::new(query.request().headers()), + ) + .await?; + if let Some(eof) = self.get_eof_byte_positions(file_size) { + byte_ranges.push(eof?); } + let blocks = self + .get_storage() + .update_byte_positions( + &query.format().fmt_file(query.id()), + BytesPositionOptions::new(byte_ranges, query.request().headers()), + ) + .await?; + self.build_response(&query, blocks).await } Class::Header => { @@ -284,7 +319,7 @@ where self .get_storage() .head( - query.format().fmt_file(query.id()), + &query.format().fmt_file(query.id()), HeadOptions::new(query.request().headers()), ) .await?; @@ -298,12 +333,15 @@ where .get_byte_ranges_for_header(&index, &mut reader, &query) .await?; - self - .build_response( - &query, - DataBlock::from_bytes_positions(vec![header_byte_ranges]), + let blocks = self + .get_storage() + .update_byte_positions( + &query.format().fmt_file(query.id()), + BytesPositionOptions::new(vec![header_byte_ranges], query.request().headers()), ) - .await + .await?; + + self.build_response(&query, blocks).await } } } @@ -323,14 +361,15 @@ where storage_futures.push_back(tokio::spawn(async move { storage .range_url( - query_owned.format().fmt_file(query_owned.id()), + &query_owned.format().fmt_file(query_owned.id()), RangeUrlOptions::new(range, query_owned.request().headers()), ) .await })); } DataBlock::Data(data, class) => { - storage_futures.push_back(tokio::spawn(async move { Ok(S::data_url(data, class)) })); + let data_url = self.get_storage().data_url(data, class); + storage_futures.push_back(tokio::spawn(async move { Ok(data_url) })); } } } @@ -343,7 +382,7 @@ where } } - return Ok(Response::new(query.format(), urls)); + Ok(Response::new(query.format(), urls)) } /// Get the header from the file specified by the id and format. @@ -357,7 +396,7 @@ where let reader_type = self .get_storage() - .get(query.format().fmt_file(query.id()), get_options) + .get(&query.format().fmt_file(query.id()), get_options) .await?; let mut reader = Self::init_reader(reader_type); @@ -381,12 +420,10 @@ where /// [Reader] is the format's reader type. /// [Header] is the format's header type. #[async_trait] -pub trait BgzfSearch: - Search, Index, Reader, Header> +pub trait BgzfSearch: + Search, Index, Reader, Header> where - S: Storage + Send + Sync + 'static, I: reference_sequence::Index + Send + Sync, - ReaderType: AsyncRead + Unpin + Send + Sync, Reader: Send + Sync, Header: Send + Sync, { @@ -444,7 +481,7 @@ where let gzi_data = self .get_storage() .get( - query.format().fmt_gzi(query.id())?, + &query.format().fmt_gzi(query.id())?, GetOptions::new_with_default_range(query.request().headers()), ) .await; @@ -554,15 +591,12 @@ where } #[async_trait] -impl - SearchAll, Index, Reader, Header> for T +impl SearchAll, Index, Reader, Header> for T where - S: Storage + Send + Sync + 'static, I: reference_sequence::Index + Send + Sync, - ReaderType: AsyncRead + Unpin + Send + Sync, Reader: Send + Sync, Header: Send + Sync, - T: BgzfSearch + Send + Sync, + T: BgzfSearch + Send + Sync, { #[instrument(level = "debug", skip(self), ret)] async fn get_byte_ranges_for_all(&self, query: &Query) -> Result> { diff --git a/htsget-search/src/vcf_search.rs b/htsget-search/src/vcf_search.rs index f104a37e9..d07a56955 100644 --- a/htsget-search/src/vcf_search.rs +++ b/htsget-search/src/vcf_search.rs @@ -22,44 +22,33 @@ use htsget_config::types::HtsGetError; use crate::search::{find_first, BgzfSearch, Search}; use crate::{Format, Query, Result}; -use htsget_storage::{BytesPosition, Storage}; +use htsget_storage::{BytesPosition, Storage, Streamable}; -type AsyncReader = vcf::AsyncReader>; +type AsyncReader = vcf::AsyncReader>; /// Allows searching through vcf files. -pub struct VcfSearch { - storage: Arc, +pub struct VcfSearch { + storage: Arc, } #[async_trait] -impl BgzfSearch, Header> - for VcfSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ - async fn read_bytes(reader: &mut AsyncReader) -> Option { +impl BgzfSearch for VcfSearch { + async fn read_bytes(reader: &mut AsyncReader) -> Option { reader.read_record(&mut Default::default()).await.ok() } - fn virtual_position(&self, reader: &AsyncReader) -> VirtualPosition { + fn virtual_position(&self, reader: &AsyncReader) -> VirtualPosition { reader.get_ref().virtual_position() } } #[async_trait] -impl - Search, Index, AsyncReader, Header> - for VcfSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ - fn init_reader(inner: ReaderType) -> AsyncReader { +impl Search, Index, AsyncReader, Header> for VcfSearch { + fn init_reader(inner: Streamable) -> AsyncReader { AsyncReader::new(bgzf::AsyncReader::new(inner)) } - async fn read_header(reader: &mut AsyncReader) -> io::Result
{ + async fn read_header(reader: &mut AsyncReader) -> io::Result
{ reader.read_header().await } @@ -109,7 +98,7 @@ where Ok(byte_ranges) } - fn get_storage(&self) -> Arc { + fn get_storage(&self) -> Arc { self.storage.clone() } @@ -118,13 +107,9 @@ where } } -impl VcfSearch -where - S: Storage + Send + Sync + 'static, - ReaderType: AsyncRead + Unpin + Send + Sync, -{ +impl VcfSearch { /// Create the vcf search. - pub fn new(storage: Arc) -> Self { + pub fn new(storage: Arc) -> Self { Self { storage } } } @@ -136,16 +121,19 @@ pub(crate) mod tests { use htsget_config::storage::local::LocalStorage as ConfigLocalStorage; use htsget_config::types::Class::Body; use htsget_test::http::concat::ConcatResponse; - use htsget_test::util::expected_bgzf_eof_data_url; + use super::*; #[cfg(feature = "s3-storage")] use crate::from_storage::tests::with_aws_storage_fn; use crate::from_storage::tests::with_local_storage_fn; use crate::search::SearchAll; use crate::{Class::Header, Headers, HtsGetError::NotFound, Response, Url}; use htsget_storage::local::LocalStorage; - - use super::*; + #[cfg(feature = "c4gh-experimental")] + use { + crate::from_storage::tests::with_local_storage_c4gh, + htsget_storage::c4gh::storage::C4GHStorage, htsget_test::c4gh::get_decryption_keys, + }; const VCF_LOCATION: &str = "data/vcf"; const INDEX_FILE_LOCATION: &str = "spec-v4.3.vcf.gz.tbi"; @@ -155,7 +143,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_all_variants() { with_local_storage(|storage| async move { - let search = VcfSearch::new(storage.clone()); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Vcf); let response = search.search(query).await; @@ -175,7 +163,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_without_seq_range() { with_local_storage(|storage| async move { - let search = VcfSearch::new(storage.clone()); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let filename = "spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Vcf).with_reference_name("20"); let response = search.search(query).await; @@ -183,11 +171,8 @@ pub(crate) mod tests { let expected_response = Ok(Response::new( Format::Vcf, - vec![ - Url::new(expected_url(filename)) - .with_headers(Headers::default().with_header("Range", "bytes=0-822")), - Url::new(expected_bgzf_eof_data_url()), - ], + vec![Url::new(expected_url(filename)) + .with_headers(Headers::default().with_header("Range", "bytes=0-850"))], )); assert_eq!(response, expected_response); @@ -208,7 +193,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_reference_name_no_end_position() { with_local_storage(|storage| async move { - let search = VcfSearch::new(storage.clone()); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Vcf) .with_reference_name("chrM") @@ -244,7 +229,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_header() { with_local_storage(|storage| async move { - let search = VcfSearch::new(storage.clone()); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let filename = "spec-v4.3"; let query = Query::new_with_default_request(filename, Format::Vcf).with_class(Header); let response = search.search(query).await; @@ -270,7 +255,7 @@ pub(crate) mod tests { async fn search_non_existent_id_reference_name() { with_local_storage_fn( |storage| async move { - let search = VcfSearch::new(storage.clone()); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -287,7 +272,7 @@ pub(crate) mod tests { async fn search_non_existent_id_all_reads() { with_local_storage_fn( |storage| async move { - let search = VcfSearch::new(storage.clone()); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_reference_name("chrM"); let response = search.search(query).await; @@ -305,7 +290,7 @@ pub(crate) mod tests { async fn search_non_existent_id_header() { with_local_storage_fn( |storage| async move { - let search = VcfSearch::new(storage.clone()); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_class(Header); let response = search.search(query).await; assert!(matches!(response, Err(NotFound(_)))); @@ -321,7 +306,7 @@ pub(crate) mod tests { #[tokio::test] async fn search_header_with_non_existent_reference_name() { with_local_storage(|storage| async move { - let search = VcfSearch::new(storage.clone()); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_reference_name("chr1"); let response = search.search(query).await; @@ -338,7 +323,7 @@ pub(crate) mod tests { async fn get_header_end_offset() { with_local_storage_fn( |storage| async move { - let search = VcfSearch::new(storage.clone()); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_class(Header); let index = search.read_index(&query).await.unwrap(); @@ -359,7 +344,7 @@ pub(crate) mod tests { async fn search_non_existent_id_reference_name_aws() { with_aws_storage_fn( |storage| async move { - let search = VcfSearch::new(storage); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf); let response = search.search(query).await; assert!(response.is_err()); @@ -377,7 +362,7 @@ pub(crate) mod tests { async fn search_non_existent_id_all_reads_aws() { with_aws_storage_fn( |storage| async move { - let search = VcfSearch::new(storage); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_reference_name("chrM"); let response = search.search(query).await; @@ -396,7 +381,7 @@ pub(crate) mod tests { async fn search_non_existent_id_header_aws() { with_aws_storage_fn( |storage| async move { - let search = VcfSearch::new(storage); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let query = Query::new_with_default_request("spec-v4.3", Format::Vcf).with_class(Header); let response = search.search(query).await; assert!(response.is_err()); @@ -409,10 +394,45 @@ pub(crate) mod tests { .await } + #[cfg(feature = "c4gh-experimental")] + #[tokio::test] + async fn search_all_c4gh() { + with_local_storage_c4gh(|storage| async move { + let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let search = VcfSearch::new(Arc::new(Storage::new(storage))); + let query = Query::new_with_default_request("spec-v4.3", Format::Vcf); + let response = search.search(query).await.unwrap(); + + println!("{:#?}", response); + + Some(("spec-v4.3.vcf.gz.c4gh".to_string(), (response, Body).into())) + }) + .await; + } + + #[cfg(feature = "c4gh-experimental")] + #[tokio::test] + async fn search_all_range_c4gh() { + with_local_storage_c4gh(|storage| async move { + let storage = C4GHStorage::new(get_decryption_keys(), Arc::try_unwrap(storage).unwrap()); + let search = VcfSearch::new(Arc::new(Storage::new(storage))); + let query = Query::new_with_default_request("spec-v4.3", Format::Vcf) + .with_reference_name("20") + .with_start(150) + .with_end(153); + let response = search.search(query).await.unwrap(); + + println!("{:#?}", response); + + Some(("spec-v4.3.vcf.gz.c4gh".to_string(), (response, Body).into())) + }) + .await; + } + async fn test_reference_name_with_seq_range( storage: Arc>, ) -> Option<(String, ConcatResponse)> { - let search = VcfSearch::new(storage.clone()); + let search = VcfSearch::new(Arc::new(Storage::new(Arc::try_unwrap(storage).unwrap()))); let filename = "sample1-bcbio-cancer"; let query = Query::new_with_default_request(filename, Format::Vcf) .with_reference_name("chrM") @@ -433,11 +453,8 @@ pub(crate) mod tests { fn expected_vcf_response(filename: &str) -> Response { Response::new( Format::Vcf, - vec![ - Url::new(expected_url(filename)) - .with_headers(Headers::default().with_header("Range", "bytes=0-3465")), - Url::new(expected_bgzf_eof_data_url()), - ], + vec![Url::new(expected_url(filename)) + .with_headers(Headers::default().with_header("Range", "bytes=0-3493"))], ) } diff --git a/htsget-storage/Cargo.toml b/htsget-storage/Cargo.toml index 1d48f0e33..92ac04591 100644 --- a/htsget-storage/Cargo.toml +++ b/htsget-storage/Cargo.toml @@ -15,7 +15,6 @@ s3-storage = [ "dep:bytes", "dep:aws-sdk-s3", "dep:aws-config", - "dep:pin-project-lite", "htsget-config/s3-storage", "htsget-test/s3-storage", "htsget-test/aws-mocks" @@ -23,10 +22,10 @@ s3-storage = [ url-storage = [ "dep:bytes", "dep:reqwest", - "dep:pin-project-lite", "htsget-config/url-storage", "htsget-test/url-storage" ] +c4gh-experimental = ["dep:crypt4gh", "dep:bincode", "htsget-config/c4gh-experimental", "htsget-test/c4gh-experimental"] default = [] [dependencies] @@ -39,6 +38,7 @@ tokio-util = { version = "0.7", features = ["io", "compat"] } futures = { version = "0.3" } futures-util = "0.3" async-trait = "0.1" +pin-project-lite = { version = "0.2" } # Amazon S3 bytes = { version = "1", optional = true } @@ -47,7 +47,10 @@ aws-config = { version = "1", optional = true } # Url storage reqwest = { version = "0.12", features = ["rustls-tls", "stream"], default-features = false, optional = true } -pin-project-lite = { version = "0.2", optional = true } + +# Crypt4GH +crypt4gh = { version = "0.4", git = "https://github.com/EGA-archive/crypt4gh-rust", optional = true } +bincode = { version = "1", optional = true } # Error control, tracing, config thiserror = "1" diff --git a/htsget-storage/README.md b/htsget-storage/README.md index afe8f2c43..7eb64f2dd 100644 --- a/htsget-storage/README.md +++ b/htsget-storage/README.md @@ -49,6 +49,7 @@ and [url] modules implement the `Storage` functionality. This crate has the following features: * `s3-storage`: used to enable `S3Storage` functionality. * `url-storage`: used to enable `UrlStorage` functionality. +* `c4gh-experimental`: used to enable `C4GHStorage` functionality. [local]: src/local.rs [s3]: src/s3.rs diff --git a/htsget-storage/src/c4gh/edit.rs b/htsget-storage/src/c4gh/edit.rs new file mode 100644 index 000000000..f4660dad4 --- /dev/null +++ b/htsget-storage/src/c4gh/edit.rs @@ -0,0 +1,268 @@ +//! Edit list functionality. +//! + +use crate::c4gh::DeserializedHeader; +use crate::error::{Result, StorageError}; +use crypt4gh::error::Crypt4GHError; +use crypt4gh::error::Crypt4GHError::InvalidPacketType; +use crypt4gh::header::{encrypt, make_packet_data_edit_list, make_packet_data_enc}; +use crypt4gh::Keys; +use std::collections::HashSet; +use tokio::io; + +/// Unencrypted byte range positions. Contains inclusive start values and exclusive end values. +#[derive(Debug, Clone)] +pub struct UnencryptedPosition { + start: u64, + end: u64, +} + +impl UnencryptedPosition { + /// Create new positions. + pub fn new(start: u64, end: u64) -> Self { + Self { start, end } + } +} + +/// Encrypted byte range positions. Contains inclusive start values and exclusive end values. +#[derive(Debug, Clone)] +pub struct ClampedPosition { + start: u64, + end: u64, +} + +impl ClampedPosition { + /// Create new positions. + pub fn new(start: u64, end: u64) -> Self { + Self { start, end } + } +} + +/// Bytes representing a header packet with an edit list. +#[derive(Debug, Clone)] +pub struct Header { + header_info: Vec, + data_enc_packets: Vec, + edit_list_packet: Vec, +} + +impl Header { + /// Create a new header. + pub fn new(header_info: Vec, data_enc_packets: Vec, edit_list_packet: Vec) -> Self { + Self { + header_info, + data_enc_packets, + edit_list_packet, + } + } + + /// Get the inner values. + pub fn into_inner(self) -> (Vec, Vec, Vec) { + ( + self.header_info, + self.data_enc_packets, + self.edit_list_packet, + ) + } +} + +impl From<(Vec, Vec, Vec)> for Header { + fn from((header_info, data_enc_packets, edit_list_packet): (Vec, Vec, Vec)) -> Self { + Self::new(header_info, data_enc_packets, edit_list_packet) + } +} + +/// The edit header struct creates and updates C4GH headers with edit lists. +pub struct EditHeader<'a> { + unencrypted_positions: Vec, + clamped_positions: Vec, + keys: &'a [Keys], + current_header: &'a mut DeserializedHeader, +} + +impl<'a> EditHeader<'a> { + /// Create a new edit header. + pub fn new( + unencrypted_positions: Vec, + clamped_positions: Vec, + keys: &'a [Keys], + current_header: &'a mut DeserializedHeader, + ) -> Self { + Self { + unencrypted_positions, + clamped_positions, + keys, + current_header, + } + } + + /// Encrypt the header packet. + pub fn encrypt_header_packet(&self, header_packet: Vec) -> Result> { + Ok( + encrypt(&header_packet, &HashSet::from_iter(self.keys.to_vec()))? + .into_iter() + .last() + .ok_or_else(|| { + Crypt4GHError::UnableToEncryptPacket("could not encrypt header packet".to_string()) + })?, + ) + } + + /// Create the edit lists from the unencrypted byte positions. + pub fn create_edit_list(&self) -> Vec { + let mut unencrypted_positions: Vec = self + .unencrypted_positions + .iter() + .flat_map(|pos| [pos.start, pos.end]) + .collect(); + + // Collect the clamped and unencrypted positions into separate edit list groups. + let (mut edit_list, last_discard) = + self + .clamped_positions + .iter() + .fold((vec![], 0), |(mut edit_list, previous_discard), pos| { + // Get the correct number of unencrypted positions that fit within this clamped position. + let partition = + unencrypted_positions.partition_point(|unencrypted_pos| unencrypted_pos <= &pos.end); + let mut positions: Vec = unencrypted_positions.drain(..partition).collect(); + + // Merge all positions. + positions.insert(0, pos.start); + positions.push(pos.end); + + // Find the difference between consecutive positions to get the edits. + let mut positions: Vec = positions + .iter() + .zip(positions.iter().skip(1)) + .map(|(start, end)| end - start) + .collect(); + + // Add the previous discard to the first edit. + if let Some(first) = positions.first_mut() { + *first += previous_discard; + } + + // If the last edit is a discard, then carry this over into the next iteration. + let next_discard = if positions.len() % 2 == 0 { + 0 + } else { + positions.pop().unwrap_or(0) + }; + + // Add edits to the accumulating edit list. + edit_list.extend(positions); + (edit_list, next_discard) + }); + + // If there is a final discard, then add this to the edit list. + if last_discard != 0 { + edit_list.push(last_discard); + } + + edit_list + } + + /// Add edit lists and return a header packet. + pub fn reencrypt_header(self) -> Result
{ + if self.current_header.contains_edit_list { + return Err(StorageError::IoError( + "edit lists already exist".to_string(), + io::Error::other(Crypt4GHError::TooManyEditListPackets), + )); + } + + let edit_list = self.create_edit_list(); + let edit_list_packet = + make_packet_data_edit_list(edit_list.into_iter().map(|edit| edit as usize).collect()); + + let edit_list_bytes = self.encrypt_header_packet(edit_list_packet)?; + let edit_list_bytes = [ + ((edit_list_bytes.len() + 4) as u32).to_le_bytes().to_vec(), + edit_list_bytes, + ] + .concat(); + + let mut header_packets = vec![]; + for session_key in self.current_header.session_keys.as_slice() { + let data_enc_packet = make_packet_data_enc( + 0, + session_key + .as_slice() + .try_into() + .map_err(|_| Crypt4GHError::NoValidHeaderPacket)?, + ); + let header_packet = self.encrypt_header_packet(data_enc_packet)?; + header_packets.push( + [ + ((header_packet.len() + 4) as u32).to_le_bytes().to_vec(), + header_packet, + ] + .concat(), + ) + } + + self.current_header.header_info.packets_count += 1 + header_packets.len() as u32; + let header_info_bytes = + bincode::serialize(&self.current_header.header_info).map_err(|_| InvalidPacketType)?; + + Ok( + ( + header_info_bytes, + header_packets.into_iter().flatten().collect(), + edit_list_bytes, + ) + .into(), + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use htsget_test::c4gh::get_decryption_keys; + use htsget_test::util::default_dir; + use std::fs::File; + use std::io::{BufReader, Cursor, Read}; + + #[tokio::test] + async fn test_create_edit_list() { + let mut src = + File::open(default_dir().join("data/c4gh/htsnexus_test_NA12878.bam.c4gh")).unwrap(); + let mut buf = vec![]; + src.read_to_end(&mut buf).unwrap(); + + let mut buf = BufReader::new(Cursor::new(buf)); + let keys = get_decryption_keys(); + + let edit = EditHeader::new( + test_unencrypted_positions(), + test_clamped_positions(), + &keys, + &mut DeserializedHeader::from_buffer(&mut buf, &keys).unwrap(), + ) + .create_edit_list(); + + assert_eq!(edit, expected_edit_list()); + } + + fn test_unencrypted_positions() -> Vec { + vec![ + UnencryptedPosition::new(0, 7853), + UnencryptedPosition::new(145110, 453039), + UnencryptedPosition::new(5485074, 5485112), + ] + } + + fn test_clamped_positions() -> Vec { + vec![ + ClampedPosition::new(0, 65536), + ClampedPosition::new(131072, 458752), + ClampedPosition::new(5439488, 5485112), + ] + } + + fn expected_edit_list() -> Vec { + vec![0, 7853, 71721, 307929, 51299, 38] + } +} diff --git a/htsget-storage/src/c4gh/mod.rs b/htsget-storage/src/c4gh/mod.rs new file mode 100644 index 000000000..0d2e476fe --- /dev/null +++ b/htsget-storage/src/c4gh/mod.rs @@ -0,0 +1,268 @@ +//! This module contains `Storage` implementations for accessing Crypt4GH encrypted data. +//! These serve as wrappers around other `Storage` implementations. +//! + +use crypt4gh::error::Crypt4GHError; +use crypt4gh::header::{DecryptedHeaderPackets, HeaderInfo}; +use crypt4gh::{header, Keys}; +use std::cmp::min; +use std::io::Read; + +mod edit; +pub mod storage; + +pub const ENCRYPTED_BLOCK_SIZE: u64 = 65536; +pub const NONCE_SIZE: u64 = 12; // ChaCha20 IETF Nonce size +pub const MAC_SIZE: u64 = 16; + +const DATA_BLOCK_SIZE: u64 = NONCE_SIZE + ENCRYPTED_BLOCK_SIZE + MAC_SIZE; + +/// Represents a C4GH which is deserialized into relevant information relevant to `C4GHStorage`. +#[derive(Debug)] +pub struct DeserializedHeader { + pub(crate) header_info: HeaderInfo, + pub(crate) session_keys: Vec>, + pub(crate) header_size: u64, + pub(crate) contains_edit_list: bool, +} + +impl DeserializedHeader { + /// Create a new deserialized header. + pub fn new( + header_info: HeaderInfo, + session_keys: Vec>, + header_size: u64, + contains_edit_list: bool, + ) -> Self { + Self { + header_info, + session_keys, + header_size, + contains_edit_list, + } + } + + /// Grab all the required information from the header. + /// This is more or less directly copied from https://github.com/EGA-archive/crypt4gh-rust/blob/2d41a1770067003bc67ab499841e0def186ed218/src/lib.rs#L283-L314 + pub fn from_buffer( + read_buffer: &mut R, + keys: &[Keys], + ) -> Result { + // Get header info + let mut temp_buf = [0_u8; 16]; // Size of the header + read_buffer + .read_exact(&mut temp_buf) + .map_err(|e| Crypt4GHError::ReadHeaderError(e.into()))?; + let header_info: header::HeaderInfo = header::deconstruct_header_info(&temp_buf)?; + + let mut bytes = vec![]; + let mut header_lengths = 0; + // Calculate header packets + let encrypted_packets = (0..header_info.packets_count) + .map(|_| { + // Get length + let mut length_buffer = [0_u8; 4]; + read_buffer + .read_exact(&mut length_buffer) + .map_err(|e| Crypt4GHError::ReadHeaderPacketLengthError(e.into()))?; + + bytes.extend(length_buffer); + + let length = bincode::deserialize::(&length_buffer) + .map_err(|e| Crypt4GHError::ParseHeaderPacketLengthError(e))?; + + header_lengths += length; + + let length = length - 4; + + // Get data + let mut encrypted_data = vec![0_u8; length as usize]; + read_buffer + .read_exact(&mut encrypted_data) + .map_err(|e| Crypt4GHError::ReadHeaderPacketDataError(e.into()))?; + + bytes.extend(encrypted_data.clone()); + + Ok(encrypted_data) + }) + .collect::>, Crypt4GHError>>()?; + + let DecryptedHeaderPackets { + data_enc_packets: session_keys, + edit_list_packet, + } = header::deconstruct_header_body(encrypted_packets, keys, &None)?; + + let header_size = 16 + header_lengths; + let contains_header = edit_list_packet.is_some(); + + Ok(DeserializedHeader::new( + header_info, + session_keys, + header_size as u64, + contains_header, + )) + } +} + +/// Convert an encrypted file position to an unencrypted position if the header length is known. +pub fn to_unencrypted(encrypted_position: u64, header_length: u64) -> u64 { + let number_data_blocks = encrypted_position / DATA_BLOCK_SIZE; + let mut additional_bytes = number_data_blocks * (NONCE_SIZE + MAC_SIZE); + + let remainder = encrypted_position % DATA_BLOCK_SIZE; + if remainder != 0 { + additional_bytes += NONCE_SIZE; + } + + encrypted_position - header_length - additional_bytes +} + +/// Convert an encrypted file size to an unencrypted file size if the header length is known. +pub fn to_unencrypted_file_size(encrypted_file_size: u64, header_length: u64) -> u64 { + to_unencrypted(encrypted_file_size, header_length) - MAC_SIZE +} + +fn to_current_data_block(pos: u64, header_len: u64) -> u64 { + header_len + (pos / ENCRYPTED_BLOCK_SIZE) * DATA_BLOCK_SIZE +} + +/// Convert an unencrypted position to an encrypted position as shown in +/// https://samtools.github.io/hts-specs/crypt4gh.pdf chapter 4.1. +pub fn unencrypted_to_data_block(pos: u64, header_len: u64, encrypted_file_size: u64) -> u64 { + min(encrypted_file_size, to_current_data_block(pos, header_len)) +} + +/// Get the next data block position from the unencrypted position. +pub fn unencrypted_to_next_data_block(pos: u64, header_len: u64, encrypted_file_size: u64) -> u64 { + min( + encrypted_file_size, + to_current_data_block(pos, header_len) + DATA_BLOCK_SIZE, + ) +} + +fn unencrypted_clamped_position(pos: u64, encrypted_file_size: u64) -> u64 { + let data_block_positions = unencrypted_to_data_block(pos, 0, encrypted_file_size); + let data_block_count = data_block_positions / DATA_BLOCK_SIZE; + + data_block_positions - ((NONCE_SIZE + MAC_SIZE) * data_block_count) +} + +/// Convert an unencrypted position to the additional bytes prior to the position that must be +/// included when encrypting data blocks. +pub fn unencrypted_clamp(pos: u64, header_length: u64, encrypted_file_size: u64) -> u64 { + min( + to_unencrypted_file_size(encrypted_file_size, header_length), + unencrypted_clamped_position(pos, encrypted_file_size), + ) +} + +/// Convert an unencrypted position to the additional bytes after to the position that must be +/// included when encrypting data blocks. +pub fn unencrypted_clamp_next(pos: u64, header_length: u64, encrypted_file_size: u64) -> u64 { + min( + to_unencrypted_file_size(encrypted_file_size, header_length), + unencrypted_clamped_position(pos, encrypted_file_size) + ENCRYPTED_BLOCK_SIZE, + ) +} + +/// Convert an unencrypted file size to an encrypted file size if the header length is known. +pub fn to_encrypted_file_size(file_size: u64, header_length: u64) -> u64 { + to_encrypted(file_size, header_length) + MAC_SIZE +} + +/// Convert an unencrypted file position to an encrypted position if the header length is known. +pub fn to_encrypted(position: u64, header_length: u64) -> u64 { + let number_data_blocks = position / ENCRYPTED_BLOCK_SIZE; + // Additional bytes include the full data block size. + let mut additional_bytes = number_data_blocks * (NONCE_SIZE + MAC_SIZE); + + // If there is left over data, then there are more nonce bytes. + let remainder = position % ENCRYPTED_BLOCK_SIZE; + if remainder != 0 { + additional_bytes += NONCE_SIZE; + } + + // Then add the extra bytes to the current position. + header_length + position + additional_bytes +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_to_encrypted() { + let pos = 80000; + let expected = 120 + 65536 + 12 + 16; + let result = unencrypted_to_data_block(pos, 120, to_encrypted_file_size(100000, 120)); + assert_eq!(result, expected); + } + + #[test] + fn test_to_encrypted_file_size() { + let pos = 110000; + let expected = 60148; + let result = unencrypted_to_data_block(pos, 120, to_encrypted_file_size(60000, 120)); + assert_eq!(result, expected); + } + + #[test] + fn test_to_encrypted_pos_greater_than_file_size() { + let pos = 110000; + let expected = 120 + 65536 + 12 + 16; + let result = unencrypted_to_data_block(pos, 120, to_encrypted_file_size(100000, 120)); + assert_eq!(result, expected); + } + + #[test] + fn test_next_data_block() { + let pos = 100000; + let expected = 120 + (65536 + 12 + 16) * 2; + let result = unencrypted_to_next_data_block(pos, 120, to_encrypted_file_size(150000, 120)); + assert_eq!(result, expected); + } + + #[test] + fn test_next_data_block_file_size() { + let pos = 110000; + let expected = 100176; + let result = unencrypted_to_next_data_block(pos, 120, to_encrypted_file_size(100000, 120)); + assert_eq!(result, expected); + } + + #[test] + fn test_unencrypted_clamp() { + let pos = 0; + let expected = 0; + let result = unencrypted_clamp(pos, 0, to_encrypted_file_size(5485112, 0)); + assert_eq!(result, expected); + + let pos = 145110; + let expected = 131072; + let result = unencrypted_clamp(pos, 0, to_encrypted_file_size(5485112, 0)); + assert_eq!(result, expected); + + let pos = 5485074; + let expected = 5439488; + let result = unencrypted_clamp(pos, 0, to_encrypted_file_size(5485112, 0)); + assert_eq!(result, expected); + } + + #[test] + fn test_unencrypted_clamp_next() { + let pos = 7853; + let expected = 65536; + let result = unencrypted_clamp_next(pos, 0, to_encrypted_file_size(5485112, 0)); + assert_eq!(result, expected); + + let pos = 453039; + let expected = 458752; + let result = unencrypted_clamp_next(pos, 0, to_encrypted_file_size(5485112, 0)); + assert_eq!(result, expected); + + let pos = 5485112; + let expected = 5485112; + let result = unencrypted_clamp_next(pos, 0, to_encrypted_file_size(5485112, 0)); + assert_eq!(result, expected); + } +} diff --git a/htsget-storage/src/c4gh/storage.rs b/htsget-storage/src/c4gh/storage.rs new file mode 100644 index 000000000..34df01165 --- /dev/null +++ b/htsget-storage/src/c4gh/storage.rs @@ -0,0 +1,267 @@ +//! Local Crypt4GH storage access. +//! + +use crate::c4gh::edit::{ClampedPosition, EditHeader, UnencryptedPosition}; +use crate::c4gh::{ + to_unencrypted_file_size, unencrypted_clamp, unencrypted_clamp_next, unencrypted_to_data_block, + unencrypted_to_next_data_block, DeserializedHeader, +}; +use crate::error::StorageError::{InternalError, IoError}; +use crate::error::{Result, StorageError}; +use crate::{ + BytesPosition, BytesPositionOptions, DataBlock, GetOptions, HeadOptions, RangeUrlOptions, + StorageTrait, Streamable, +}; +use async_trait::async_trait; +use crypt4gh::error::Crypt4GHError; +use crypt4gh::{decrypt, Keys}; +use htsget_config::types::{Class, Format, Url}; +use std::collections::HashMap; +use std::fmt::{Debug, Formatter}; +use std::io; +use std::io::{BufReader, BufWriter, Cursor}; +use std::sync::{Arc, Mutex, PoisonError}; +use tokio::io::AsyncReadExt; + +/// Max C4GH header size in bytes. Supports 50 regular sized encrypted packets. 16 + (108 * 50). +const MAX_C4GH_HEADER_SIZE: u64 = 5416; + +/// This represents the state that the C4GHStorage needs to save, like the file sizes and header +/// sizes. +#[derive(Debug)] +pub struct C4GHState { + encrypted_file_size: u64, + unencrypted_file_size: u64, + deserialized_header: DeserializedHeader, +} + +/// Implementation for the [StorageTrait] trait using the local file system for accessing Crypt4GH +/// encrypted files. [T] is the type of the server struct, which is used for formatting urls. +pub struct C4GHStorage { + keys: Vec, + inner: Box, + // Need to have a Mutex so that we can alter the state from a &self reference. + // This is a bit lazy, the proper solution would be to pass around mutable state as a parameter + // or make `StorageTrait` mutable, and synchronise somewhere else. + state: Arc>>, +} + +impl Debug for C4GHStorage { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "C4GHStorage") + } +} + +impl C4GHStorage { + /// Create a new storage from a storage trait. + pub fn new(keys: Vec, inner: impl StorageTrait + Send + Sync + 'static) -> Self { + Self { + keys, + inner: Box::new(inner), + state: Default::default(), + } + } + + /// Format a C4GH key. + pub fn format_key(key: &str) -> String { + format!("{}.c4gh", key) + } + + /// Get a C4GH object and decrypt it if it is not an index. + pub async fn get_object(&self, key: &str, options: GetOptions<'_>) -> Result { + if Format::is_index(key) { + return self.inner.get(key, options).await; + } + + let mut buf = vec![]; + self + .inner + .get(&Self::format_key(key), options) + .await? + .read_to_end(&mut buf) + .await?; + + let mut reader = BufReader::new(Cursor::new(buf)); + let mut writer = BufWriter::new(Cursor::new(vec![])); + + decrypt(&self.keys, &mut reader, &mut writer, 0, None, &None) + .map_err(|err| IoError("Crypt4GH".to_string(), io::Error::other(err)))?; + + let data = writer + .into_inner() + .map_err(|err| IoError("Writer".to_string(), io::Error::other(err)))? + .into_inner(); + Ok(Streamable::from_async_read(Cursor::new(data))) + } + + /// Get the size of the unencrypted object and update state. + pub async fn head_object_with_state(&self, key: &str, options: HeadOptions<'_>) -> Result { + // Get the file size. + let encrypted_file_size = self + .inner + .head(&Self::format_key(key), options.clone()) + .await?; + + // Also need to determine the header size. + let mut buf = vec![]; + self + .inner + .get( + &Self::format_key(key), + GetOptions::new( + BytesPosition::default().with_end(MAX_C4GH_HEADER_SIZE), + options.request_headers(), + ), + ) + .await? + .read_to_end(&mut buf) + .await?; + + let mut reader = BufReader::new(Cursor::new(buf)); + + let deserialized_header = DeserializedHeader::from_buffer(&mut reader, &self.keys)?; + let unencrypted_file_size = + to_unencrypted_file_size(encrypted_file_size, deserialized_header.header_size); + + let state = C4GHState { + encrypted_file_size, + unencrypted_file_size, + deserialized_header, + }; + let mut header_sizes = self.state.lock()?; + header_sizes.insert(key.to_string(), state); + + Ok(unencrypted_file_size) + } + + /// Compute the data blocks including edit lists, additional data encryption packets, and encrypted bytes. + pub async fn compute_data_blocks( + &self, + key: &str, + options: BytesPositionOptions<'_>, + ) -> Result> { + let mut state = self.state.lock()?; + let state = state + .get_mut(key) + .ok_or_else(|| InternalError("missing key from state".to_string()))?; + + let default_start = |pos: &BytesPosition| pos.start.unwrap_or_default(); + let default_end = |pos: &BytesPosition| pos.end.unwrap_or(state.unencrypted_file_size); + + let header_size = state.deserialized_header.header_size; + let encrypted_file_size = state.encrypted_file_size; + + // Original positions. + let mut unencrypted_positions = vec![]; + // Positions from the reference frame of creating an edit list with discards/keep bytes. + let mut clamped_positions = vec![]; + // Positions from the reference frame of someone merging bytes from htsget. + let mut encrypted_positions = vec![]; + for mut pos in options.positions { + let start = default_start(&pos); + let end = default_end(&pos); + + pos.start = Some(start); + pos.end = Some(end); + unencrypted_positions.push(pos.clone()); + + pos.start = Some(unencrypted_clamp(start, header_size, encrypted_file_size)); + pos.end = Some(unencrypted_clamp_next( + end, + header_size, + encrypted_file_size, + )); + clamped_positions.push(pos.clone()); + + pos.start = Some(unencrypted_to_data_block( + start, + header_size, + encrypted_file_size, + )); + pos.end = Some(unencrypted_to_next_data_block( + end, + header_size, + encrypted_file_size, + )); + encrypted_positions.push(pos); + } + + let unencrypted_positions = BytesPosition::merge_all(unencrypted_positions) + .into_iter() + .map(|pos| UnencryptedPosition::new(default_start(&pos), default_end(&pos))) + .collect::>(); + let clamped_positions = BytesPosition::merge_all(clamped_positions) + .into_iter() + .map(|pos| ClampedPosition::new(default_start(&pos), default_end(&pos))) + .collect::>(); + + let (header_info, reencrypted_bytes, edit_list_packet) = EditHeader::new( + unencrypted_positions, + clamped_positions, + &self.keys, + &mut state.deserialized_header, + ) + .reencrypt_header()? + .into_inner(); + + let header_info_size = header_info.len() as u64; + let current_header_size = state.deserialized_header.header_size; + let mut blocks = vec![ + DataBlock::Data(header_info, Some(Class::Header)), + DataBlock::Range( + BytesPosition::default() + .with_start(header_info_size) + .with_end(current_header_size), + ), + DataBlock::Data( + [edit_list_packet, reencrypted_bytes].concat(), + Some(Class::Header), + ), + ]; + + blocks.extend(DataBlock::from_bytes_positions(BytesPosition::merge_all( + encrypted_positions, + ))); + + Ok(blocks) + } +} + +#[async_trait] +impl StorageTrait for C4GHStorage { + /// Get the Crypt4GH file at the location of the key. + async fn get(&self, key: &str, options: GetOptions<'_>) -> Result { + self.get_object(key, options).await + } + + /// Get a url for the file at key. This refers to the underlying `StorageTrait`. + async fn range_url(&self, key: &str, options: RangeUrlOptions<'_>) -> Result { + self.inner.range_url(&Self::format_key(key), options).await + } + + /// Get the size of the underlying file and the encrypted file, updating any state. + async fn head(&self, key: &str, options: HeadOptions<'_>) -> Result { + self.head_object_with_state(key, options).await + } + + /// Update encrypted positions. + async fn update_byte_positions( + &self, + key: &str, + positions_options: BytesPositionOptions<'_>, + ) -> Result> { + self.compute_data_blocks(key, positions_options).await + } +} + +impl From for StorageError { + fn from(err: Crypt4GHError) -> Self { + IoError("Crypt4GH".to_string(), io::Error::other(err)) + } +} + +impl From> for StorageError { + fn from(err: PoisonError) -> Self { + InternalError(err.to_string()) + } +} diff --git a/htsget-storage/src/error.rs b/htsget-storage/src/error.rs new file mode 100644 index 000000000..900dbc7fc --- /dev/null +++ b/htsget-storage/src/error.rs @@ -0,0 +1,83 @@ +//! Error and result types for htsget-storage. +//! + +use htsget_config::types::HtsGetError; +use std::io; +use std::io::ErrorKind; +use std::net::AddrParseError; +use thiserror::Error; + +/// The result type for storage. +pub type Result = core::result::Result; + +/// Storage error type. +#[derive(Error, Debug)] +pub enum StorageError { + #[error("wrong key derived from ID: `{0}`")] + InvalidKey(String), + + #[error("key not found in storage: `{0}`")] + KeyNotFound(String), + + #[error("{0}: {1}")] + IoError(String, io::Error), + + #[error("server error: {0}")] + ServerError(String), + + #[error("invalid input: {0}")] + InvalidInput(String), + + #[error("invalid uri: {0}")] + InvalidUri(String), + + #[error("invalid address: {0}")] + InvalidAddress(AddrParseError), + + #[error("internal error: {0}")] + InternalError(String), + + #[error("response error: {0}")] + ResponseError(String), + + #[cfg(feature = "s3-storage")] + #[error("aws error: {0}, with key: `{1}`")] + AwsS3Error(String, String), + + #[error("parsing url: {0}")] + UrlParseError(String), +} + +impl From for HtsGetError { + fn from(err: StorageError) -> Self { + match err { + err @ StorageError::InvalidInput(_) => Self::InvalidInput(err.to_string()), + err @ (StorageError::KeyNotFound(_) + | StorageError::InvalidKey(_) + | StorageError::ResponseError(_)) => Self::NotFound(err.to_string()), + err @ StorageError::IoError(_, _) => Self::IoError(err.to_string()), + err @ (StorageError::ServerError(_) + | StorageError::InvalidUri(_) + | StorageError::InvalidAddress(_) + | StorageError::InternalError(_)) => Self::InternalError(err.to_string()), + #[cfg(feature = "s3-storage")] + err @ StorageError::AwsS3Error(_, _) => Self::IoError(err.to_string()), + err @ StorageError::UrlParseError(_) => Self::ParseError(err.to_string()), + } + } +} + +impl From for io::Error { + fn from(err: StorageError) -> Self { + match err { + StorageError::IoError(_, ref io_error) => Self::new(io_error.kind(), err), + err => Self::new(ErrorKind::Other, err), + } + } +} + +impl From for StorageError { + fn from(error: io::Error) -> Self { + Self::IoError("io error".to_string(), error) + } +} diff --git a/htsget-storage/src/lib.rs b/htsget-storage/src/lib.rs index f9ec16617..2414a8e55 100644 --- a/htsget-storage/src/lib.rs +++ b/htsget-storage/src/lib.rs @@ -9,71 +9,197 @@ pub use htsget_config::types::{ Class, Format, Headers, HtsGetError, JsonResponse, Query, Response, Url, }; -use std::cmp::Ordering; -use std::fmt::{Debug, Display, Formatter}; -use std::io; -use std::io::ErrorKind; -use std::net::AddrParseError; - use async_trait::async_trait; use base64::engine::general_purpose; use base64::Engine; +use htsget_config::storage::local::LocalStorage as LocalStorageConfig; +#[cfg(feature = "s3-storage")] +use htsget_config::storage::s3::S3Storage as S3StorageConfig; +#[cfg(feature = "url-storage")] +use htsget_config::storage::url::UrlStorageClient as UrlStorageConfig; use http::{uri, HeaderMap}; -use thiserror::Error; -use tokio::io::AsyncRead; +use pin_project_lite::pin_project; +use std::cmp::Ordering; +use std::fmt; +use std::fmt::{Debug, Display, Formatter}; +use std::num::ParseIntError; +use std::pin::Pin; +use std::str::FromStr; +use std::task::{Context, Poll}; +use tokio::io::{AsyncRead, ReadBuf}; use tracing::instrument; -use htsget_config::storage::local::LocalStorage; +#[cfg(feature = "c4gh-experimental")] +use crate::c4gh::storage::C4GHStorage; +use crate::error::Result; +use crate::error::StorageError; +use crate::local::LocalStorage; +#[cfg(feature = "s3-storage")] +use crate::s3::S3Storage; +#[cfg(feature = "url-storage")] +use crate::url::UrlStorage; +use htsget_config::storage::object::ObjectType; use htsget_config::types::Scheme; +#[cfg(feature = "c4gh-experimental")] +pub mod c4gh; +pub mod error; pub mod local; #[cfg(feature = "s3-storage")] pub mod s3; #[cfg(feature = "url-storage")] pub mod url; -type Result = core::result::Result; +pin_project! { + /// A Streamable type represents any AsyncRead data used by `StorageTrait`. + pub struct Streamable { + #[pin] + inner: Box, + } +} + +impl Streamable { + /// Create a new Streamable from an AsyncRead. + pub fn from_async_read(inner: impl AsyncRead + Send + Sync + Unpin + 'static) -> Self { + Self { + inner: Box::new(inner), + } + } +} + +impl AsyncRead for Streamable { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + self.project().inner.poll_read(cx, buf) + } +} + +/// The top-level storage type is created from any `StorageTrait`. +pub struct Storage { + inner: Box, +} + +impl Debug for Storage { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "Storage") + } +} + +#[async_trait] +impl StorageTrait for Storage { + async fn get(&self, key: &str, options: GetOptions<'_>) -> Result { + self.inner.get(key, options).await + } + + async fn range_url(&self, key: &str, options: RangeUrlOptions<'_>) -> Result { + self.inner.range_url(key, options).await + } + + async fn head(&self, key: &str, options: HeadOptions<'_>) -> Result { + self.inner.head(key, options).await + } + + fn data_url(&self, data: Vec, class: Option) -> Url { + self.inner.data_url(data, class) + } + + async fn update_byte_positions( + &self, + key: &str, + positions_options: BytesPositionOptions<'_>, + ) -> Result> { + self + .inner + .update_byte_positions(key, positions_options) + .await + } +} + +impl Storage { + /// Create from local storage config. + pub async fn from_local(config: &LocalStorageConfig) -> Result { + let storage = LocalStorage::new(config.local_path(), config.clone())?; + match config.object_type() { + ObjectType::Regular => Ok(Storage::new(storage)), + #[cfg(feature = "c4gh-experimental")] + ObjectType::C4GH { keys } => Ok(Storage::new(C4GHStorage::new( + keys.clone().into_inner(), + storage, + ))), + _ => Err(StorageError::InternalError( + "invalid object type".to_string(), + )), + } + } + + /// Create from s3 config. + #[cfg(feature = "s3-storage")] + pub async fn from_s3(s3_storage: &S3StorageConfig) -> Storage { + Storage::new( + S3Storage::new_with_default_config( + s3_storage.bucket().to_string(), + s3_storage.clone().endpoint(), + s3_storage.clone().path_style(), + ) + .await, + ) + } + + /// Create from url config. + #[cfg(feature = "url-storage")] + pub async fn from_url(url_storage_config: &UrlStorageConfig) -> Storage { + Storage::new(UrlStorage::new( + url_storage_config.client_cloned(), + url_storage_config.url().clone(), + url_storage_config.response_url().clone(), + url_storage_config.forward_headers(), + url_storage_config.header_blacklist().to_vec(), + )) + } + + pub fn new(inner: impl StorageTrait + Send + Sync + 'static) -> Self { + Self { + inner: Box::new(inner), + } + } +} /// A Storage represents some kind of object based storage (either locally or in the cloud) /// that can be used to retrieve files for alignments, variants or its respective indexes. #[async_trait] -pub trait Storage { - type Streamable: AsyncRead + Unpin + Send + Sync; - +pub trait StorageTrait { /// Get the object using the key. - async fn get + Send + Debug>( - &self, - key: K, - options: GetOptions<'_>, - ) -> Result; + async fn get(&self, key: &str, options: GetOptions<'_>) -> Result; /// Get the url of the object represented by the key using a bytes range. It is not required for /// this function to check for the existent of the key, so this should be ensured beforehand. - async fn range_url + Send + Debug>( - &self, - key: K, - options: RangeUrlOptions<'_>, - ) -> Result; + async fn range_url(&self, key: &str, options: RangeUrlOptions<'_>) -> Result; /// Get the size of the object represented by the key. - async fn head + Send + Debug>( - &self, - key: K, - options: HeadOptions<'_>, - ) -> Result; + async fn head(&self, key: &str, options: HeadOptions<'_>) -> Result; /// Get the url of the object using an inline data uri. - #[instrument(level = "trace", ret)] - fn data_url(data: Vec, class: Option) -> Url - where - Self: Sized, - { + fn data_url(&self, data: Vec, class: Option) -> Url { Url::new(format!( "data:;base64,{}", general_purpose::STANDARD.encode(data) )) .set_class(class) } + + /// Optionally update byte positions before they are passed to the other functions. + async fn update_byte_positions( + &self, + _key: &str, + positions_options: BytesPositionOptions<'_>, + ) -> Result> { + Ok(DataBlock::from_bytes_positions( + positions_options.merge_all().into_inner(), + )) + } } /// Formats a url for use with storage. @@ -82,63 +208,7 @@ pub trait UrlFormatter { fn format_url>(&self, key: K) -> Result; } -#[derive(Error, Debug)] -pub enum StorageError { - #[error("wrong key derived from ID: `{0}`")] - InvalidKey(String), - - #[error("key not found in storage: `{0}`")] - KeyNotFound(String), - - #[error("{0}: {1}")] - IoError(String, io::Error), - - #[error("server error: {0}")] - ServerError(String), - - #[error("invalid input: {0}")] - InvalidInput(String), - - #[error("invalid uri: {0}")] - InvalidUri(String), - - #[error("invalid address: {0}")] - InvalidAddress(AddrParseError), - - #[error("internal error: {0}")] - InternalError(String), - - #[error("response error: {0}")] - ResponseError(String), - - #[cfg(feature = "s3-storage")] - #[error("aws error: {0}, with key: `{1}`")] - AwsS3Error(String, String), - - #[error("parsing url: {0}")] - UrlParseError(String), -} - -impl From for HtsGetError { - fn from(err: StorageError) -> Self { - match err { - err @ StorageError::InvalidInput(_) => Self::InvalidInput(err.to_string()), - err @ (StorageError::KeyNotFound(_) - | StorageError::InvalidKey(_) - | StorageError::ResponseError(_)) => Self::NotFound(err.to_string()), - err @ StorageError::IoError(_, _) => Self::IoError(err.to_string()), - err @ (StorageError::ServerError(_) - | StorageError::InvalidUri(_) - | StorageError::InvalidAddress(_) - | StorageError::InternalError(_)) => Self::InternalError(err.to_string()), - #[cfg(feature = "s3-storage")] - err @ StorageError::AwsS3Error(_, _) => Self::IoError(err.to_string()), - err @ StorageError::UrlParseError(_) => Self::ParseError(err.to_string()), - } - } -} - -impl UrlFormatter for LocalStorage { +impl UrlFormatter for htsget_config::storage::local::LocalStorage { fn format_url>(&self, key: K) -> Result { uri::Builder::new() .scheme(match self.scheme() { @@ -153,21 +223,6 @@ impl UrlFormatter for LocalStorage { } } -impl From for io::Error { - fn from(err: StorageError) -> Self { - match err { - StorageError::IoError(_, ref io_error) => Self::new(io_error.kind(), err), - err => Self::new(ErrorKind::Other, err), - } - } -} - -impl From for StorageError { - fn from(error: io::Error) -> Self { - Self::IoError("io error".to_string(), error) - } -} - /// A DataBlock is either a range of bytes, or a data blob that gets transformed into a data uri. #[derive(Debug, PartialEq, Eq)] pub enum DataBlock { @@ -243,6 +298,38 @@ impl Display for BytesRange { } } +/// Convert from a http range to a bytes position. +impl FromStr for BytesPosition { + type Err = StorageError; + + fn from_str(range: &str) -> Result { + let range = range.replacen("bytes=", "", 1); + + let split: Vec<&str> = range.splitn(2, '-').collect(); + if split.len() > 2 { + return Err(StorageError::InternalError( + "failed to split range".to_string(), + )); + } + + let parse_range = |range: Option<&str>| { + let range = range.unwrap_or_default(); + if range.is_empty() { + Ok::<_, Self::Err>(None) + } else { + Ok(Some(range.parse().map_err(|err: ParseIntError| { + StorageError::InternalError(err.to_string()) + })?)) + } + }; + + let start = parse_range(split.first().copied())?; + let end = parse_range(split.last().copied())?.map(|value| value + 1); + + Ok(Self::new(start, end, None)) + } +} + impl From<&BytesPosition> for BytesRange { fn from(pos: &BytesPosition) -> Self { Self::new(pos.start, pos.end.map(|value| value - 1)) @@ -400,6 +487,38 @@ impl<'a> GetOptions<'a> { } } +#[derive(Debug, Clone)] +pub struct BytesPositionOptions<'a> { + positions: Vec, + headers: &'a HeaderMap, +} + +impl<'a> BytesPositionOptions<'a> { + pub fn new(positions: Vec, headers: &'a HeaderMap) -> Self { + Self { positions, headers } + } + + /// Get the response headers. + pub fn headers(&self) -> &'a HeaderMap { + self.headers + } + + pub fn positions(&self) -> &Vec { + &self.positions + } + + /// Get the inner value. + pub fn into_inner(self) -> Vec { + self.positions + } + + /// Merge all bytes positions + pub fn merge_all(mut self) -> Self { + self.positions = BytesPosition::merge_all(self.positions); + self + } +} + #[derive(Debug)] pub struct RangeUrlOptions<'a> { range: BytesPosition, @@ -447,7 +566,7 @@ impl<'a> RangeUrlOptions<'a> { } /// A struct to represent options passed to a `Storage` head call. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct HeadOptions<'a> { request_headers: &'a HeaderMap, } @@ -470,9 +589,9 @@ mod tests { use http::uri::Authority; - use htsget_config::storage::local::LocalStorage as ConfigLocalStorage; - use crate::local::LocalStorage; + use htsget_config::storage::local::LocalStorage as ConfigLocalStorage; + use htsget_test::util::default_dir; use super::*; @@ -811,8 +930,12 @@ mod tests { #[test] fn data_url() { - let result = - LocalStorage::::data_url(b"Hello World!".to_vec(), Some(Class::Header)); + let result = LocalStorage::::new( + default_dir().join("data"), + ConfigLocalStorage::default(), + ) + .unwrap() + .data_url(b"Hello World!".to_vec(), Some(Class::Header)); let url = data_url::DataUrl::process(&result.url); let (result, _) = url.unwrap().decode_to_vec().unwrap(); assert_eq!(result, b"Hello World!"); @@ -949,6 +1072,7 @@ mod tests { Authority::from_static("127.0.0.1:8080"), "data".to_string(), "/data".to_string(), + Default::default(), ); test_formatter_authority(formatter, "http"); } @@ -960,6 +1084,7 @@ mod tests { Authority::from_static("127.0.0.1:8080"), "data".to_string(), "/data".to_string(), + Default::default(), ); test_formatter_authority(formatter, "https"); } diff --git a/htsget-storage/src/local.rs b/htsget-storage/src/local.rs index 098ae7247..7f5dd3a15 100644 --- a/htsget-storage/src/local.rs +++ b/htsget-storage/src/local.rs @@ -1,10 +1,12 @@ -//! Module providing an implementation for the [Storage] trait using the local file system. +//! Module providing an implementation for the [StorageTrait] trait using the local file system. //! use std::fmt::Debug; use std::io::ErrorKind; use std::path::{Path, PathBuf}; +use crate::{HeadOptions, StorageTrait, UrlFormatter}; +use crate::{Streamable, Url as HtsGetUrl}; use async_trait::async_trait; use tokio::fs; use tokio::fs::File; @@ -12,12 +14,9 @@ use tracing::debug; use tracing::instrument; use url::Url; -use crate::Url as HtsGetUrl; -use crate::{HeadOptions, Storage, UrlFormatter}; - use super::{GetOptions, RangeUrlOptions, Result, StorageError}; -/// Implementation for the [Storage] trait using the local file system. [T] is the type of the +/// Implementation for the [StorageTrait] trait using the local file system. [T] is the type of the /// server struct, which is used for formatting urls. #[derive(Debug, Clone)] pub struct LocalStorage { @@ -79,28 +78,18 @@ impl LocalStorage { } #[async_trait] -impl Storage for LocalStorage { - type Streamable = File; - +impl StorageTrait for LocalStorage { /// Get the file at the location of the key. #[instrument(level = "debug", skip(self))] - async fn get + Send + Debug>( - &self, - key: K, - _options: GetOptions<'_>, - ) -> Result { - debug!(calling_from = ?self, key = key.as_ref(), "getting file with key {:?}", key.as_ref()); - self.get(key).await + async fn get(&self, key: &str, _options: GetOptions<'_>) -> Result { + debug!(calling_from = ?self, key = key, "getting file with key {:?}", key); + Ok(Streamable::from_async_read(self.get(key).await?)) } /// Get a url for the file at key. #[instrument(level = "debug", skip(self))] - async fn range_url + Send + Debug>( - &self, - key: K, - options: RangeUrlOptions<'_>, - ) -> Result { - let path = self.get_path_from_key(&key)?; + async fn range_url(&self, key: &str, options: RangeUrlOptions<'_>) -> Result { + let path = self.get_path_from_key(key)?; let base_url = Url::from_file_path(&self.base_path) .map_err(|_| StorageError::UrlParseError("failed to parse base path as url".to_string()))?; @@ -119,25 +108,21 @@ impl Storage for LocalStorage { let url = HtsGetUrl::new(self.url_formatter.format_url(path)?); let url = options.apply(url); - debug!(calling_from = ?self, key = key.as_ref(), ?url, "getting url with key {:?}", key.as_ref()); + debug!(calling_from = ?self, key = key, ?url, "getting url with key {:?}", key); Ok(url) } /// Get the size of the file. #[instrument(level = "debug", skip(self))] - async fn head + Send + Debug>( - &self, - key: K, - _options: HeadOptions<'_>, - ) -> Result { - let path = self.get_path_from_key(&key)?; + async fn head(&self, key: &str, _options: HeadOptions<'_>) -> Result { + let path = self.get_path_from_key(key)?; let len = fs::metadata(path) .await .map_err(|err| StorageError::KeyNotFound(err.to_string()))? .len(); - debug!(calling_from = ?self, key = key.as_ref(), len, "size of key {:?} is {}", key.as_ref(), len); + debug!(calling_from = ?self, key = key, len, "size of key {:?} is {}", key, len); Ok(len) } } @@ -172,7 +157,7 @@ pub(crate) mod tests { #[tokio::test] async fn get_folder() { with_local_storage(|storage| async move { - let result = Storage::get( + let result = StorageTrait::get( &storage, "folder", GetOptions::new_with_default_range(&Default::default()), @@ -186,7 +171,7 @@ pub(crate) mod tests { #[tokio::test] async fn get_forbidden_path() { with_local_storage(|storage| async move { - let result = Storage::get( + let result = StorageTrait::get( &storage, "folder/../../passwords", GetOptions::new_with_default_range(&Default::default()), @@ -202,7 +187,7 @@ pub(crate) mod tests { #[tokio::test] async fn get_existing_key() { with_local_storage(|storage| async move { - let result = Storage::get( + let result = StorageTrait::get( &storage, "folder/../key1", GetOptions::new_with_default_range(&Default::default()), @@ -216,7 +201,7 @@ pub(crate) mod tests { #[tokio::test] async fn url_of_non_existing_key() { with_local_storage(|storage| async move { - let result = Storage::range_url( + let result = StorageTrait::range_url( &storage, "non-existing-key", RangeUrlOptions::new_with_default_range(&Default::default()), @@ -230,7 +215,7 @@ pub(crate) mod tests { #[tokio::test] async fn url_of_folder() { with_local_storage(|storage| async move { - let result = Storage::range_url( + let result = StorageTrait::range_url( &storage, "folder", RangeUrlOptions::new_with_default_range(&Default::default()), @@ -244,7 +229,7 @@ pub(crate) mod tests { #[tokio::test] async fn url_with_forbidden_path() { with_local_storage(|storage| async move { - let result = Storage::range_url( + let result = StorageTrait::range_url( &storage, "folder/../../passwords", RangeUrlOptions::new_with_default_range(&Default::default()), @@ -260,7 +245,7 @@ pub(crate) mod tests { #[tokio::test] async fn url_of_existing_key() { with_local_storage(|storage| async move { - let result = Storage::range_url( + let result = StorageTrait::range_url( &storage, "folder/../key1", RangeUrlOptions::new_with_default_range(&Default::default()), @@ -275,7 +260,7 @@ pub(crate) mod tests { #[tokio::test] async fn url_of_existing_key_with_specified_range() { with_local_storage(|storage| async move { - let result = Storage::range_url( + let result = StorageTrait::range_url( &storage, "folder/../key1", RangeUrlOptions::new( @@ -294,7 +279,7 @@ pub(crate) mod tests { #[tokio::test] async fn url_of_existing_key_with_specified_open_ended_range() { with_local_storage(|storage| async move { - let result = Storage::range_url( + let result = StorageTrait::range_url( &storage, "folder/../key1", RangeUrlOptions::new(BytesPosition::new(Some(7), None, None), &Default::default()), @@ -310,7 +295,7 @@ pub(crate) mod tests { #[tokio::test] async fn file_size() { with_local_storage(|storage| async move { - let result = Storage::head( + let result = StorageTrait::head( &storage, "folder/../key1", HeadOptions::new(&Default::default()), @@ -349,24 +334,26 @@ pub(crate) mod tests { (folder_name.to_string(), base_path) } - async fn with_local_storage(test: F) + pub(crate) fn test_local_storage(base_path: &Path) -> LocalStorage { + LocalStorage::new( + base_path, + ConfigLocalStorage::new( + Scheme::Http, + Authority::from_static("127.0.0.1:8081"), + "data".to_string(), + "/data".to_string(), + Default::default(), + ), + ) + .unwrap() + } + + pub(crate) async fn with_local_storage(test: F) where F: FnOnce(LocalStorage) -> Fut, Fut: Future, { let (_, base_path) = create_local_test_files().await; - test( - LocalStorage::new( - base_path.path(), - ConfigLocalStorage::new( - Scheme::Http, - Authority::from_static("127.0.0.1:8081"), - "data".to_string(), - "/data".to_string(), - ), - ) - .unwrap(), - ) - .await + test(test_local_storage(base_path.path())).await } } diff --git a/htsget-storage/src/s3.rs b/htsget-storage/src/s3.rs index fe6d1edb2..ec59bc149 100644 --- a/htsget-storage/src/s3.rs +++ b/htsget-storage/src/s3.rs @@ -1,4 +1,4 @@ -//! Module providing an implementation for the [Storage] trait using Amazon's S3 object storage service. +//! Module providing an implementation for the [StorageTrait] trait using Amazon's S3 object storage service. //! use std::fmt::Debug; @@ -27,9 +27,9 @@ use tracing::{debug, warn}; use crate::s3::Retrieval::{Delayed, Immediate}; use crate::StorageError::{AwsS3Error, IoError, KeyNotFound}; -use crate::Url; use crate::{BytesPosition, HeadOptions, StorageError}; -use crate::{BytesRange, Storage}; +use crate::{BytesRange, StorageTrait}; +use crate::{Streamable, Url}; use super::{GetOptions, RangeUrlOptions, Result}; @@ -42,7 +42,7 @@ pub enum Retrieval { Delayed(StorageClass), } -/// Implementation for the [Storage] trait utilising data from an S3 bucket. +/// Implementation for the [StorageTrait] trait utilising data from an S3 bucket. #[derive(Debug, Clone)] pub struct S3Storage { client: Client, @@ -243,31 +243,21 @@ impl Stream for S3Stream { } #[async_trait] -impl Storage for S3Storage { - type Streamable = StreamReader; - +impl StorageTrait for S3Storage { /// Gets the actual s3 object as a buffered reader. #[instrument(level = "trace", skip(self))] - async fn get + Send + Debug>( - &self, - key: K, - options: GetOptions<'_>, - ) -> Result { - let key = key.as_ref(); + async fn get(&self, key: &str, options: GetOptions<'_>) -> Result { debug!(calling_from = ?self, key, "getting file with key {:?}", key); - self.create_stream_reader(key, options).await + Ok(Streamable::from_async_read( + self.create_stream_reader(key, options).await?, + )) } /// Return an S3 pre-signed htsget URL. This function does not check that the key exists, so this /// should be checked before calling it. #[instrument(level = "trace", skip(self))] - async fn range_url + Send + Debug>( - &self, - key: K, - options: RangeUrlOptions<'_>, - ) -> Result { - let key = key.as_ref(); + async fn range_url(&self, key: &str, options: RangeUrlOptions<'_>) -> Result { let presigned_url = self.s3_presign_url(key, options.range()).await?; let url = options.apply(Url::new(presigned_url)); @@ -277,13 +267,7 @@ impl Storage for S3Storage { /// Returns the size of the S3 object in bytes. #[instrument(level = "trace", skip(self))] - async fn head + Send + Debug>( - &self, - key: K, - _options: HeadOptions<'_>, - ) -> Result { - let key = key.as_ref(); - + async fn head(&self, key: &str, _options: HeadOptions<'_>) -> Result { let head = self.s3_head(key).await?; let content_length = head @@ -313,7 +297,7 @@ pub(crate) mod tests { use crate::local::tests::create_local_test_files; use crate::s3::S3Storage; use crate::Headers; - use crate::{BytesPosition, GetOptions, RangeUrlOptions, Storage}; + use crate::{BytesPosition, GetOptions, RangeUrlOptions, StorageTrait}; use crate::{HeadOptions, StorageError}; pub(crate) async fn with_aws_s3_storage_fn(test: F, folder_name: String, base_path: &Path) diff --git a/htsget-storage/src/url.rs b/htsget-storage/src/url.rs index 8302db424..e6efc86e2 100644 --- a/htsget-storage/src/url.rs +++ b/htsget-storage/src/url.rs @@ -16,8 +16,8 @@ use tracing::{debug, instrument}; use htsget_config::error; use crate::StorageError::{InternalError, KeyNotFound, ResponseError, UrlParseError}; -use crate::Url as HtsGetUrl; -use crate::{GetOptions, HeadOptions, RangeUrlOptions, Result, Storage, StorageError}; +use crate::{GetOptions, HeadOptions, RangeUrlOptions, Result, StorageError, StorageTrait}; +use crate::{Streamable, Url as HtsGetUrl}; /// A storage struct which derives data from HTTP URLs. #[derive(Debug, Clone)] @@ -192,35 +192,23 @@ impl Stream for UrlStream { } #[async_trait] -impl Storage for UrlStorage { - type Streamable = StreamReader; - +impl StorageTrait for UrlStorage { #[instrument(level = "trace", skip(self))] - async fn get + Send + Debug>( - &self, - key: K, - options: GetOptions<'_>, - ) -> Result { - let key = key.as_ref().to_string(); + async fn get(&self, key: &str, options: GetOptions<'_>) -> Result { debug!(calling_from = ?self, key, "getting file with key {:?}", key); let request_headers = self.remove_blacklisted_headers(options.request_headers().clone()); let response = self.get_key(key.to_string(), &request_headers).await?; - Ok(StreamReader::new(UrlStream::new(Box::new( - response - .bytes_stream() - .map_err(|err| ResponseError(format!("reading body from response: {}", err))), - )))) + Ok(Streamable::from_async_read(StreamReader::new( + UrlStream::new(Box::new(response.bytes_stream().map_err(|err| { + ResponseError(format!("reading body from response: {}", err)) + }))), + ))) } #[instrument(level = "trace", skip(self))] - async fn range_url + Send + Debug>( - &self, - key: K, - options: RangeUrlOptions<'_>, - ) -> Result { - let key = key.as_ref(); + async fn range_url(&self, key: &str, options: RangeUrlOptions<'_>) -> Result { debug!(calling_from = ?self, key, "getting url with key {:?}", key); let response_headers = self.remove_blacklisted_headers(options.response_headers().clone()); @@ -230,13 +218,7 @@ impl Storage for UrlStorage { } #[instrument(level = "trace", skip(self))] - async fn head + Send + Debug>( - &self, - key: K, - options: HeadOptions<'_>, - ) -> Result { - let key = key.as_ref(); - + async fn head(&self, key: &str, options: HeadOptions<'_>) -> Result { let request_headers = self.remove_blacklisted_headers(options.request_headers().clone()); let head = self.head_key(key, &request_headers).await?; diff --git a/htsget-test/Cargo.toml b/htsget-test/Cargo.toml index 00c64c34d..fbe5fe4c2 100644 --- a/htsget-test/Cargo.toml +++ b/htsget-test/Cargo.toml @@ -36,6 +36,7 @@ aws-mocks = [ ] s3-storage = ["htsget-config?/s3-storage"] url-storage = ["htsget-config?/url-storage"] +c4gh-experimental = ["dep:crypt4gh", "dep:htsget-config", "htsget-config/c4gh-experimental"] default = [] [dependencies] @@ -62,6 +63,9 @@ s3s = { version = "0.10", optional = true } s3s-fs = { version = "0.10", optional = true } s3s-aws = { version = "0.10", optional = true } +# Crypt4GH +crypt4gh = { version = "0.4", git = "https://github.com/EGA-archive/crypt4gh-rust", optional = true } + # Default dependencies rcgen = "0.13" thiserror = "1" diff --git a/htsget-test/README.md b/htsget-test/README.md index e2d6a2eee..eb25ab53e 100644 --- a/htsget-test/README.md +++ b/htsget-test/README.md @@ -36,11 +36,11 @@ This library is intended to be used as a [development dependency][dev-dependenci #### Feature flags This crate has the following features: -* `http-tests`: used to enable common functionality for HTTP tests. -* `cors-tests`: used to enable CORS tests. -* `server-tests`: used to enable server tests. +* `http`: used to enable common functionality for HTTP tests. +* `aws-mocks`: used to enable AWS mocking for tests. * `s3-storage`: used to enable `S3Storage` functionality. * `url-storage`: used to enable `UrlStorage` functionality. +* `c4gh-experimental`: used to enable `C4GHStorage` functionality. [dev-dependencies]: https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#development-dependencies diff --git a/htsget-test/src/c4gh.rs b/htsget-test/src/c4gh.rs new file mode 100644 index 000000000..5092c40eb --- /dev/null +++ b/htsget-test/src/c4gh.rs @@ -0,0 +1,40 @@ +use crate::util::default_dir; +use crypt4gh::keys::get_private_key; +use crypt4gh::{decrypt, Keys}; +use htsget_config::storage::object::c4gh::{C4GHKeys, C4GHPath}; +use std::io::{BufReader, BufWriter, Cursor}; + +pub fn decrypt_data(data: &[u8]) -> Vec { + let keys = get_private_key( + default_dir().join("data/c4gh/keys/alice.sec"), + Ok("".to_string()), + ) + .unwrap(); + + let mut reader = BufReader::new(Cursor::new(data)); + let mut writer = BufWriter::new(Cursor::new(vec![])); + + decrypt( + &[Keys { + method: 0, + privkey: keys, + recipient_pubkey: vec![], + }], + &mut reader, + &mut writer, + 0, + None, + &None, + ) + .unwrap(); + + writer.into_inner().unwrap().into_inner() +} + +pub fn get_decryption_keys() -> Vec { + let private_key = default_dir().join("data/c4gh/keys/bob.sec"); + let public_key = default_dir().join("data/c4gh/keys/alice.pub"); + let keys = C4GHKeys::try_from(C4GHPath::new(private_key, public_key)).unwrap(); + + keys.into_inner() +} diff --git a/htsget-test/src/http/concat.rs b/htsget-test/src/http/concat.rs index 1235d5ea7..694525d94 100644 --- a/htsget-test/src/http/concat.rs +++ b/htsget-test/src/http/concat.rs @@ -182,6 +182,12 @@ impl ReadRecords { self.merged_bytes.as_slice() } + /// Set the merged byte data. + pub fn set_bytes(mut self, merged_bytes: Vec) -> Self { + self.merged_bytes = merged_bytes; + self + } + /// Read records to confirm they are valid. pub async fn read_records(self) -> Result<()> { match self.format { diff --git a/htsget-test/src/http/mod.rs b/htsget-test/src/http/mod.rs index c4e7f068b..18abd7eaa 100644 --- a/htsget-test/src/http/mod.rs +++ b/htsget-test/src/http/mod.rs @@ -24,7 +24,7 @@ use htsget_config::tls::{ }; use htsget_config::types::{Scheme, TaggedTypeAll}; -use crate::util::generate_test_certificates; +use crate::util::{default_dir, default_dir_data, generate_test_certificates}; use crate::Config; /// Represents a http header. @@ -93,19 +93,6 @@ pub trait TestServer { async fn test_server(&self, request: T, expected_path: String) -> Response; } -/// Get the default directory. -pub fn default_dir() -> PathBuf { - PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf() -} - -/// Get the default directory where data is present.. -pub fn default_dir_data() -> PathBuf { - default_dir().join("data") -} - /// Get the default test storage. pub fn default_test_resolver(addr: SocketAddr, scheme: Scheme) -> Vec { let local_storage = LocalStorage::new( @@ -113,6 +100,7 @@ pub fn default_test_resolver(addr: SocketAddr, scheme: Scheme) -> Vec Authority::from_str(&addr.to_string()).unwrap(), default_dir_data().to_str().unwrap().to_string(), "/data".to_string(), + Default::default(), ); vec![ Resolver::new( diff --git a/htsget-test/src/http/server.rs b/htsget-test/src/http/server.rs index 0b183650e..87b98931c 100644 --- a/htsget-test/src/http/server.rs +++ b/htsget-test/src/http/server.rs @@ -11,7 +11,6 @@ use htsget_config::types::Class; use htsget_config::types::Format; use crate::http::{Header, Response, TestRequest, TestServer}; -use crate::util::expected_bgzf_eof_data_url; use crate::Config; /// Test response with with class. @@ -341,23 +340,20 @@ where /// An example VCF search response. pub fn expected_response(class: Class, url_path: String) -> Value { let url = format!("{url_path}/data/vcf/sample1-bcbio-cancer.vcf.gz"); - let headers = ["Range", "bytes=0-3465"]; let urls = match class { Class::Header => json!([{ "url": url, "headers": { - headers[0]: headers[1] + "Range": "bytes=0-3465" }, "class": "header" }]), Class::Body => json!([{ "url": url, "headers": { - headers[0]: headers[1] + "Range": "bytes=0-3493" }, - }, { - "url": expected_bgzf_eof_data_url() }]), }; diff --git a/htsget-test/src/lib.rs b/htsget-test/src/lib.rs index ca18fbd9c..9fa40f2be 100644 --- a/htsget-test/src/lib.rs +++ b/htsget-test/src/lib.rs @@ -6,6 +6,8 @@ pub use htsget_config::{ #[cfg(feature = "aws-mocks")] pub mod aws_mocks; +#[cfg(feature = "c4gh-experimental")] +pub mod c4gh; pub mod error; #[cfg(feature = "http")] pub mod http; diff --git a/htsget-test/src/util.rs b/htsget-test/src/util.rs index 50ffd5ac8..aaeeae99e 100644 --- a/htsget-test/src/util.rs +++ b/htsget-test/src/util.rs @@ -25,3 +25,16 @@ pub fn expected_bgzf_eof_data_url() -> String { pub fn expected_cram_eof_data_url() -> String { "data:;base64,DwAAAP////8P4EVPRgAAAAABAAW92U8AAQAGBgEAAQABAO5jAUs=".to_string() } + +/// Get the default directory. +pub fn default_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .to_path_buf() +} + +/// Get the default directory where data is present.. +pub fn default_dir_data() -> PathBuf { + default_dir().join("data") +}