diff --git a/Cargo.lock b/Cargo.lock index 25bde667ebf..526222fce58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,6 +36,20 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if 1.0.0", + "const-random", + "getrandom 0.3.1", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -51,6 +65,279 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "alloy" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e2a5d689ccd182f1d138a61f081841b905034e0089f5278f6c200f2bcdab00a" +dependencies = [ + "alloy-consensus", + "alloy-core", + "alloy-eips", + "alloy-serde", + "alloy-trie", +] + +[[package]] +name = "alloy-consensus" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d213580c17d239ae83c0d897ac3315db7cda83d2d4936a9823cc3517552f2e24" +dependencies = [ + "alloy-eips", + "alloy-primitives", + "alloy-rlp", + "alloy-serde", + "alloy-trie", + "alloy-tx-macros", + "auto_impl", + "c-kzg", + "derive_more 2.0.1", + "either", + "k256", + "once_cell", + "rand 0.8.5", + "secp256k1 0.30.0", + "serde", + "thiserror 2.0.12", +] + +[[package]] +name = "alloy-core" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfe6c56d58fbfa9f0f6299376e8ce33091fc6494239466814c3f54b55743cb09" +dependencies = [ + "alloy-json-abi", + "alloy-primitives", + "alloy-sol-types", +] + +[[package]] +name = "alloy-eip2124" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "741bdd7499908b3aa0b159bba11e71c8cddd009a2c2eb7a06e825f1ec87900a5" +dependencies = [ + "alloy-primitives", + "alloy-rlp", + "crc", + "serde", + "thiserror 2.0.12", +] + +[[package]] +name = "alloy-eip2930" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b82752a889170df67bbb36d42ca63c531eb16274f0d7299ae2a680facba17bd" +dependencies = [ + "alloy-primitives", + "alloy-rlp", + "serde", +] + +[[package]] +name = "alloy-eip7702" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d4769c6ffddca380b0070d71c8b7f30bed375543fe76bb2f74ec0acf4b7cd16" +dependencies = [ + "alloy-primitives", + "alloy-rlp", + "serde", + "thiserror 2.0.12", +] + +[[package]] +name = "alloy-eips" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a15b4b0f6bab47aae017d52bb5a739bda381553c09fb9918b7172721ef5f5de" +dependencies = [ + "alloy-eip2124", + "alloy-eip2930", + "alloy-eip7702", + "alloy-primitives", + "alloy-rlp", + "alloy-serde", + "auto_impl", + "c-kzg", + "derive_more 2.0.1", + "either", + "serde", + "serde_with", + "sha2", + "thiserror 2.0.12", +] + +[[package]] +name = "alloy-json-abi" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "125a1c373261b252e53e04d6e92c37d881833afc1315fceab53fd46045695640" +dependencies = [ + "alloy-primitives", + "alloy-sol-type-parser", + "serde", + "serde_json", +] + +[[package]] +name = "alloy-primitives" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc9485c56de23438127a731a6b4c87803d49faf1a7068dcd1d8768aca3a9edb9" +dependencies = [ + "alloy-rlp", + "bytes", + "cfg-if 1.0.0", + "const-hex", + "derive_more 2.0.1", + "hashbrown 0.15.2", + "indexmap 2.12.0", + "itoa", + "paste", + "rand 0.9.1", + "ruint", + "serde", + "tiny-keccak 2.0.2", +] + +[[package]] +name = "alloy-rlp" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f70d83b765fdc080dbcd4f4db70d8d23fe4761f2f02ebfa9146b833900634b4" +dependencies = [ + "alloy-rlp-derive", + "arrayvec 0.7.4", + "bytes", +] + +[[package]] +name = "alloy-rlp-derive" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64b728d511962dda67c1bc7ea7c03736ec275ed2cf4c35d9585298ac9ccf3b73" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "alloy-serde" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1b3b1078b8775077525bc9fe9f6577e815ceaecd6c412a4f3b4d8aa2836e8f6" +dependencies = [ + "alloy-primitives", + "serde", + "serde_json", +] + +[[package]] +name = "alloy-sol-macro" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d20d867dcf42019d4779519a1ceb55eba8d7f3d0e4f0a89bcba82b8f9eb01e48" +dependencies = [ + "alloy-sol-macro-expander", + "alloy-sol-macro-input", + "proc-macro-error2", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "alloy-sol-macro-expander" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b74e91b0b553c115d14bd0ed41898309356dc85d0e3d4b9014c4e7715e48c8ad" +dependencies = [ + "alloy-json-abi", + "alloy-sol-macro-input", + "const-hex", + "heck 0.5.0", + "indexmap 2.12.0", + "proc-macro-error2", + "proc-macro2", + "quote", + "syn 2.0.101", + "syn-solidity", + "tiny-keccak 2.0.2", +] + +[[package]] +name = "alloy-sol-macro-input" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84194d31220803f5f62d0a00f583fd3a062b36382e2bea446f1af96727754565" +dependencies = [ + "alloy-json-abi", + "const-hex", + "dunce", + "heck 0.5.0", + "macro-string", + "proc-macro2", + "quote", + "serde_json", + "syn 2.0.101", + "syn-solidity", +] + +[[package]] +name = "alloy-sol-type-parser" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe8c27b3cf6b2bb8361904732f955bc7c05e00be5f469cec7e2280b6167f3ff0" +dependencies = [ + "serde", + "winnow 0.7.13", +] + +[[package]] +name = "alloy-sol-types" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5383d34ea00079e6dd89c652bcbdb764db160cef84e6250926961a0b2295d04" +dependencies = [ + "alloy-json-abi", + "alloy-primitives", + "alloy-sol-macro", +] + +[[package]] +name = "alloy-trie" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3412d52bb97c6c6cc27ccc28d4e6e8cf605469101193b50b0bd5813b1f990b5" +dependencies = [ + "alloy-primitives", + "alloy-rlp", + "arrayvec 0.7.4", + "derive_more 2.0.1", + "nybbles", + "serde", + "smallvec", + "tracing", +] + +[[package]] +name = "alloy-tx-macros" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b5becb9c269a7d05a2f28d549f86df5a5dbc923e2667eff84fdecac8cda534c" +dependencies = [ + "alloy-primitives", + "darling 0.21.3", + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "android-tzdata" version = "0.1.1" @@ -150,6 +437,243 @@ name = "arrayvec" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +dependencies = [ + "serde", +] + +[[package]] +name = "arrow" +version = "55.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3095aaf545942ff5abd46654534f15b03a90fba78299d661e045e5d587222f0d" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30feb679425110209ae35c3fbf82404a39a4c0436bb3ec36164d8bffed2a4ce4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num", +] + +[[package]] +name = "arrow-array" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70732f04d285d49054a48b72c54f791bb3424abae92d27aafdf776c98af161c8" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.15.2", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "169b1d5d6cb390dd92ce582b06b23815c7953e9dfaaea75556e89d890d19993d" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4f12eccc3e1c05a766cafb31f6a60a46c2f8efec9b74c6e0648766d30686af8" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64 0.22.1", + "chrono", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "012c9fef3f4a11573b2c74aec53712ff9fdae4a95f4ce452d1bbf088ee00f06b" +dependencies = [ + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de1ce212d803199684b658fc4ba55fb2d7e87b213de5af415308d2fee3619c2" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-flight" +version = "55.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e0fad280f41a918d53ba48288a246ff04202d463b3b380fbc0edecdcb52cfd" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", + "base64 0.22.1", + "bytes", + "futures 0.3.31", + "once_cell", + "paste", + "prost", + "prost-types", + "tonic", +] + +[[package]] +name = "arrow-ipc" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9ea5967e8b2af39aff5d9de2197df16e305f47f404781d3230b2dc672da5d92" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", +] + +[[package]] +name = "arrow-json" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5709d974c4ea5be96d900c01576c7c0b99705f4a3eec343648cb1ca863988a9c" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap 2.12.0", + "lexical-core", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-ord" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6506e3a059e3be23023f587f79c82ef0bcf6d293587e3272d20f2d30b969b5a7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", +] + +[[package]] +name = "arrow-row" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52bf7393166beaf79b4bed9bfdf19e97472af32ce5b6b48169d321518a08cae2" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", +] + +[[package]] +name = "arrow-schema" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7686986a3bf2254c9fb130c623cdcb2f8e1f15763e7c71c310f0834da3d292" + +[[package]] +name = "arrow-select" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd2b45757d6a2373faa3352d02ff5b54b098f5e21dccebc45a21806bc34501e5" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "55.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0377d532850babb4d927a06294314b316e23311503ed580ec6ce6a0158f49d40" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] [[package]] name = "ascii" @@ -193,7 +717,7 @@ dependencies = [ "futures-util", "handlebars", "http 1.3.1", - "indexmap 2.9.0", + "indexmap 2.12.0", "mime", "multer", "num-traits", @@ -220,7 +744,7 @@ dependencies = [ "serde_json", "tokio", "tokio-stream", - "tokio-util 0.7.11", + "tokio-util 0.7.16", "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -232,7 +756,7 @@ checksum = "d8e5d0c6697def2f79ccbd972fb106b633173a6066e430b480e1ff9376a7561a" dependencies = [ "Inflector", "async-graphql-parser", - "darling", + "darling 0.20.10", "proc-macro-crate", "proc-macro2", "quote", @@ -260,7 +784,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "741110dda927420a28fbc1c310543d3416f789a6ba96859c2c265843a0a96887" dependencies = [ "bytes", - "indexmap 2.9.0", + "indexmap 2.12.0", "serde", "serde_json", ] @@ -309,6 +833,15 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -332,6 +865,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "auto_impl" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdcb70bdbc4d478427380519163274ac86e52916e10f0a8889adf0f96d3fee7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "autocfg" version = "1.3.0" @@ -472,6 +1016,12 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" +[[package]] +name = "base16ct" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" + [[package]] name = "base64" version = "0.13.1" @@ -490,6 +1040,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64ct" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" + [[package]] name = "beef" version = "0.5.2" @@ -522,6 +1078,22 @@ dependencies = [ "num-traits", ] +[[package]] +name = "bitcoin-io" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b47c4ab7a93edb0c7198c5535ed9b52b63095f4e9b45279c6736cec4b856baf" + +[[package]] +name = "bitcoin_hashes" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb18c03d0db0247e147a21a6faafd5a7eb851c743db062de72018b6b7e8e4d16" +dependencies = [ + "bitcoin-io", + "hex-conservative", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -530,9 +1102,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" [[package]] name = "bitvec" @@ -592,6 +1164,18 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blst" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fd49896f12ac9b6dcd7a5998466b9b58263a695a3dd1ecc1aaca2e12a90b080" +dependencies = [ + "cc", + "glob", + "threadpool", + "zeroize", +] + [[package]] name = "bs58" version = "0.4.0" @@ -647,6 +1231,21 @@ dependencies = [ "serde", ] +[[package]] +name = "c-kzg" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7318cfa722931cb5fe0838b98d3ce5621e75f6a6408abc21721d80de9223f2e4" +dependencies = [ + "blst", + "cc", + "glob", + "hex", + "libc", + "once_cell", + "serde", +] + [[package]] name = "cc" version = "1.2.16" @@ -774,20 +1373,59 @@ dependencies = [ "memchr", "pin-project-lite", "tokio", - "tokio-util 0.7.11", + "tokio-util 0.7.16", +] + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width 0.2.0", + "windows-sys 0.59.0", +] + +[[package]] +name = "const-hex" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dccd746bf9b1038c0507b7cec21eb2b11222db96a2902c96e8c185d6d20fb9c4" +dependencies = [ + "cfg-if 1.0.0", + "cpufeatures", + "hex", + "proptest", + "serde", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", ] [[package]] -name = "console" -version = "0.15.11" +name = "const-random-macro" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "encode_unicode", - "libc", + "getrandom 0.2.15", "once_cell", - "unicode-width 0.2.0", - "windows-sys 0.59.0", + "tiny-keccak 2.0.2", ] [[package]] @@ -1006,6 +1644,21 @@ version = "0.120.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85256fac1519a7d25a040c1d850fba67478f3f021ad5fdf738ba4425ee862dbf" +[[package]] +name = "crc" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.4.2" @@ -1077,6 +1730,18 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -1124,8 +1789,18 @@ version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.20.10", + "darling_macro 0.20.10", +] + +[[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core 0.21.3", + "darling_macro 0.21.3", ] [[package]] @@ -1142,13 +1817,39 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "darling_core" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "serde", + "strsim", + "syn 2.0.101", +] + [[package]] name = "darling_macro" version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ - "darling_core", + "darling_core 0.20.10", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core 0.21.3", "quote", "syn 2.0.101", ] @@ -1212,6 +1913,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "930c7171c8df9fb1782bdf9b918ed9ed2d33d1d22300abb754f9085bc48bf8e8" +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "zeroize", +] + [[package]] name = "deranged" version = "0.3.11" @@ -1275,7 +1986,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04001f23ba8843dc315804fa324000376084dfb1c30794ff68dd279e6e5696d5" dependencies = [ "bigdecimal 0.3.1", - "bitflags 2.6.0", + "bitflags 2.9.4", "byteorder", "chrono", "diesel_derives", @@ -1370,6 +2081,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer 0.10.4", + "const-oid", "crypto-common", "subtle", ] @@ -1443,7 +2155,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0892a17df262a24294c382f0d5997571006e7a4348b4327557c4ff1cd4a8bccc" dependencies = [ - "darling", + "darling 0.20.10", "either", "heck 0.5.0", "proc-macro2", @@ -1451,11 +2163,54 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "ecdsa" +version = "0.16.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" +dependencies = [ + "der", + "digest 0.10.7", + "elliptic-curve", + "rfc6979", + "serdect", + "signature", +] + [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +dependencies = [ + "serde", +] + +[[package]] +name = "elliptic-curve" +version = "0.13.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +dependencies = [ + "base16ct", + "crypto-bigint", + "digest 0.10.7", + "ff", + "generic-array", + "group", + "pkcs8", + "rand_core 0.6.4", + "sec1", + "serdect", + "subtle", + "zeroize", +] [[package]] name = "embedded-io" @@ -1614,6 +2369,16 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +[[package]] +name = "ff" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "firestorm" version = "0.4.6" @@ -1644,6 +2409,16 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "flatbuffers" +version = "25.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +dependencies = [ + "bitflags 2.9.4", + "rustc_version", +] + [[package]] name = "flate2" version = "1.0.30" @@ -1813,7 +2588,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27d12c0aed7f1e24276a241aadc4cb8ea9f83000f34bc062b7cc2d51e3b0fabd" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", "debugid", "fxhash", "serde", @@ -1828,6 +2603,7 @@ checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", + "zeroize", ] [[package]] @@ -1866,7 +2642,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" dependencies = [ "fallible-iterator 0.3.0", - "indexmap 2.9.0", + "indexmap 2.12.0", "stable_deref_trait", ] @@ -1892,6 +2668,12 @@ dependencies = [ "time", ] +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "globset" version = "0.4.14" @@ -1910,7 +2692,11 @@ name = "graph" version = "0.36.0" dependencies = [ "Inflector", + "ahash", + "alloy", "anyhow", + "arrow", + "arrow-flight", "async-stream", "async-trait", "atomic_refcell", @@ -1933,6 +2719,7 @@ dependencies = [ "futures 0.3.31", "graph_derive", "graphql-parser", + "half", "hex", "hex-literal 0.4.1", "http 0.2.12", @@ -1942,6 +2729,7 @@ dependencies = [ "hyper 1.6.0", "hyper-util", "itertools", + "lazy-regex", "lazy_static", "lru_time_cache", "maplit", @@ -1971,7 +2759,8 @@ dependencies = [ "slog-async", "slog-envlogger", "slog-term", - "sqlparser", + "sqlparser 0.46.0", + "sqlparser 0.57.0", "stable-hash 0.3.4", "stable-hash 0.4.4", "strum_macros 0.27.1", @@ -1980,6 +2769,7 @@ dependencies = [ "tokio", "tokio-retry", "tokio-stream", + "tokio-util 0.7.16", "toml", "tonic", "tonic-build", @@ -2059,18 +2849,29 @@ dependencies = [ name = "graph-core" version = "0.36.0" dependencies = [ + "alloy", "anyhow", + "arrow", "async-trait", "atomic_refcell", "bytes", + "chrono", "cid", + "futures 0.3.31", "graph", "graph-chain-ethereum", "graph-chain-near", "graph-chain-substreams", "graph-runtime-wasm", + "indoc", + "itertools", + "parking_lot", + "prometheus", "serde_yaml", + "slog", "thiserror 2.0.12", + "tokio", + "tokio-util 0.7.16", "tower 0.5.2 (git+https://github.com/tower-rs/tower.git)", "tower-test", "wiremock", @@ -2120,6 +2921,7 @@ dependencies = [ "serde", "shellexpand", "termcolor", + "tokio-util 0.7.16", "url", ] @@ -2257,7 +3059,7 @@ dependencies = [ "graph-runtime-wasm", "graph-server-index-node", "graph-store-postgres", - "secp256k1", + "secp256k1 0.21.3", "serde", "serde_yaml", "slog", @@ -2348,6 +3150,17 @@ dependencies = [ "serde_with", ] +[[package]] +name = "group" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "h2" version = "0.3.26" @@ -2360,10 +3173,10 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.9.0", + "indexmap 2.12.0", "slab", "tokio", - "tokio-util 0.7.11", + "tokio-util 0.7.16", "tracing", ] @@ -2379,13 +3192,25 @@ dependencies = [ "futures-core", "futures-sink", "http 1.3.1", - "indexmap 2.9.0", + "indexmap 2.12.0", "slab", "tokio", - "tokio-util 0.7.11", + "tokio-util 0.7.16", "tracing", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if 1.0.0", + "crunchy", + "num-traits", + "zerocopy", +] + [[package]] name = "handlebars" version = "5.1.2" @@ -2418,6 +3243,12 @@ dependencies = [ "serde", ] +[[package]] +name = "hashbrown" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" + [[package]] name = "hdrhistogram" version = "7.5.4" @@ -2484,6 +3315,18 @@ name = "hex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +dependencies = [ + "serde", +] + +[[package]] +name = "hex-conservative" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5313b072ce3c597065a808dbf612c4c8e8590bdbf8b579508bf7a762c5eae6cd" +dependencies = [ + "arrayvec 0.7.4", +] [[package]] name = "hex-literal" @@ -2948,13 +3791,23 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.9.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", - "hashbrown 0.15.2", + "hashbrown 0.16.0", "serde", + "serde_core", +] + +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", ] [[package]] @@ -3132,6 +3985,19 @@ dependencies = [ "tracing", ] +[[package]] +name = "k256" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6e3919bbaa2945715f0bb6d3934a173d1e9a59ac23767fbaaef277265a7411b" +dependencies = [ + "cfg-if 1.0.0", + "ecdsa", + "elliptic-curve", + "serdect", + "sha2", +] + [[package]] name = "keccak" version = "0.1.5" @@ -3141,6 +4007,29 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "lazy-regex" +version = "3.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60c7310b93682b36b98fa7ea4de998d3463ccbebd94d935d6b48ba5b6ffa7126" +dependencies = [ + "lazy-regex-proc_macros", + "once_cell", + "regex", +] + +[[package]] +name = "lazy-regex-proc_macros" +version = "3.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba01db5ef81e17eb10a5e0f2109d1b3a3e29bac3070fdbd7d156bf7dbd206a1" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn 2.0.101", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -3159,6 +4048,70 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" +[[package]] +name = "lexical-core" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.171" @@ -3177,7 +4130,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", "libc", ] @@ -3201,9 +4154,9 @@ checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" [[package]] name = "lock_api" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" dependencies = [ "autocfg", "scopeguard", @@ -3230,6 +4183,17 @@ dependencies = [ "libc", ] +[[package]] +name = "macro-string" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b27834086c65ec3f9387b096d66e99f221cf081c2b738042aa252bcd41204e3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "maplit" version = "1.0.2" @@ -3409,6 +4373,20 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91" +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint 0.4.6", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.2.6" @@ -3431,6 +4409,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -3438,11 +4425,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" [[package]] -name = "num-integer" -version = "0.1.46" +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" dependencies = [ + "num-bigint 0.4.6", + "num-integer", "num-traits", ] @@ -3453,6 +4462,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -3465,6 +4475,18 @@ dependencies = [ "libc", ] +[[package]] +name = "nybbles" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0418987d1aaed324d95b4beffc93635e19be965ed5d63ec07a35980fe3b71a4" +dependencies = [ + "cfg-if 1.0.0", + "ruint", + "serde", + "smallvec", +] + [[package]] name = "object" version = "0.36.7" @@ -3473,7 +4495,7 @@ checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ "crc32fast", "hashbrown 0.15.2", - "indexmap 2.9.0", + "indexmap 2.12.0", "memchr", ] @@ -3513,9 +4535,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.19.0" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "opaque-debug" @@ -3529,7 +4551,7 @@ version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", "cfg-if 1.0.0", "foreign-types", "libc", @@ -3616,9 +4638,9 @@ checksum = "e1ad0aff30c1da14b1254fcb2af73e1fa9a28670e584a626f53a369d0e157304" [[package]] name = "parking_lot" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" dependencies = [ "lock_api", "parking_lot_core", @@ -3626,9 +4648,9 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.10" +version = "0.9.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" dependencies = [ "cfg-if 1.0.0", "libc", @@ -3637,6 +4659,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.1" @@ -3695,7 +4723,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", - "indexmap 2.9.0", + "indexmap 2.12.0", ] [[package]] @@ -3706,7 +4734,7 @@ checksum = "7a98c6720655620a521dcc722d0ad66cd8afd5d86e34a89ef691c50b7b24de06" dependencies = [ "fixedbitset", "hashbrown 0.15.2", - "indexmap 2.9.0", + "indexmap 2.12.0", "serde", ] @@ -3760,6 +4788,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.30" @@ -3895,16 +4933,38 @@ checksum = "ef08705fa1589a1a59aa924ad77d14722cb0cd97b67dd5004ed5f4a4873fce8d" dependencies = [ "autocfg", "equivalent", - "indexmap 2.9.0", + "indexmap 2.12.0", ] [[package]] name = "proc-macro-crate" -version = "3.1.0" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit 0.23.7", +] + +[[package]] +name = "proc-macro-error-attr2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro-error2" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" dependencies = [ - "toml_edit 0.21.1", + "proc-macro-error-attr2", + "proc-macro2", + "quote", + "syn 2.0.101", ] [[package]] @@ -3944,6 +5004,20 @@ dependencies = [ "thiserror 1.0.61", ] +[[package]] +name = "proptest" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f" +dependencies = [ + "bitflags 2.9.4", + "num-traits", + "rand 0.9.1", + "rand_chacha 0.9.0", + "rand_xorshift", + "unarray", +] + [[package]] name = "prost" version = "0.13.5" @@ -4020,7 +5094,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4aeaa1f2460f1d348eeaeed86aea999ce98c1bded6f089ff8514c9d9dbdc973" dependencies = [ "anyhow", - "indexmap 2.9.0", + "indexmap 2.12.0", "log", "protobuf 3.7.2", "protobuf-support", @@ -4150,6 +5224,7 @@ dependencies = [ "libc", "rand_chacha 0.3.1", "rand_core 0.6.4", + "serde", ] [[package]] @@ -4160,6 +5235,7 @@ checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", + "serde", ] [[package]] @@ -4198,6 +5274,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ "getrandom 0.3.1", + "serde", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core 0.9.3", ] [[package]] @@ -4220,6 +5306,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.101", +] + [[package]] name = "redis" version = "0.31.0" @@ -4241,7 +5347,7 @@ dependencies = [ "sha1_smol", "socket2", "tokio", - "tokio-util 0.7.11", + "tokio-util 0.7.16", "url", ] @@ -4260,7 +5366,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", ] [[package]] @@ -4290,9 +5396,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.5" +version = "1.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" dependencies = [ "aho-corasick", "memchr", @@ -4302,9 +5408,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" dependencies = [ "aho-corasick", "memchr", @@ -4313,9 +5419,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" [[package]] name = "reqwest" @@ -4359,7 +5465,7 @@ dependencies = [ "tokio", "tokio-native-tls", "tokio-rustls", - "tokio-util 0.7.11", + "tokio-util 0.7.16", "tower 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "url", @@ -4370,6 +5476,16 @@ dependencies = [ "windows-registry", ] +[[package]] +name = "rfc6979" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" +dependencies = [ + "hmac", + "subtle", +] + [[package]] name = "ring" version = "0.17.13" @@ -4394,6 +5510,28 @@ dependencies = [ "rustc-hex", ] +[[package]] +name = "ruint" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecb38f82477f20c5c3d62ef52d7c4e536e38ea9b73fb570a20c5cae0e14bcf6" +dependencies = [ + "alloy-rlp", + "proptest", + "rand 0.8.5", + "rand 0.9.1", + "ruint-macro", + "serde", + "valuable", + "zeroize", +] + +[[package]] +name = "ruint-macro" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48fd7bd8a6377e15ad9d42a8ec25371b94ddc67abe7c8b9127bec79bebaaae18" + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -4420,9 +5558,9 @@ checksum = "3e75f6a532d0fd9f7f13144f392b6ad56a32696bfcd9c78f797f16bbb6f072d6" [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] @@ -4433,7 +5571,7 @@ version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", "errno", "libc", "linux-raw-sys 0.4.14", @@ -4446,7 +5584,7 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", "errno", "libc", "linux-raw-sys 0.9.4", @@ -4565,13 +5703,40 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sec1" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "serdect", + "subtle", + "zeroize", +] + [[package]] name = "secp256k1" version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c42e6f1735c5f00f51e43e28d6634141f2bcad10931b2609ddd74a86d751260" dependencies = [ - "secp256k1-sys", + "secp256k1-sys 0.4.2", +] + +[[package]] +name = "secp256k1" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b50c5943d326858130af85e049f2661ba3c78b26589b8ab98e65e80ae44a1252" +dependencies = [ + "bitcoin_hashes", + "rand 0.8.5", + "secp256k1-sys 0.10.1", + "serde", ] [[package]] @@ -4583,13 +5748,22 @@ dependencies = [ "cc", ] +[[package]] +name = "secp256k1-sys" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4387882333d3aa8cb20530a17c69a3752e97837832f34f6dccc760e715001d9" +dependencies = [ + "cc", +] + [[package]] name = "security-framework" version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", "core-foundation 0.9.4", "core-foundation-sys", "libc", @@ -4602,7 +5776,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", "core-foundation 0.10.0", "core-foundation-sys", "libc", @@ -4630,18 +5804,28 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -4719,7 +5903,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.9.0", + "indexmap 2.12.0", "serde", "serde_derive", "serde_json", @@ -4733,7 +5917,7 @@ version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d00caa5193a3c8362ac2b73be6b9e768aa5a4b2f721d8f4b339600c3cb51f8e" dependencies = [ - "darling", + "darling 0.20.10", "proc-macro2", "quote", "syn 2.0.101", @@ -4745,13 +5929,23 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.9.0", + "indexmap 2.12.0", "itoa", "ryu", "serde", "unsafe-libyaml", ] +[[package]] +name = "serdect" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a84f14a19e9a014bb9f4512488d9829a68e04ecabffb0f9904cd1ace94598177" +dependencies = [ + "base16ct", + "serde", +] + [[package]] name = "sha-1" version = "0.9.8" @@ -4827,6 +6021,22 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest 0.10.7", + "rand_core 0.6.4", +] + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + [[package]] name = "siphasher" version = "0.3.11" @@ -4950,6 +6160,16 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "sptr" version = "0.3.2" @@ -4965,6 +6185,28 @@ dependencies = [ "log", ] +[[package]] +name = "sqlparser" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07c5f081b292a3d19637f0b32a79e28ff14a9fd23ef47bd7fce08ff5de221eca" +dependencies = [ + "log", + "recursive", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "stable-hash" version = "0.3.4" @@ -4999,6 +6241,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +dependencies = [ + "cc", + "cfg-if 1.0.0", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -5169,6 +6424,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn-solidity" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0b198d366dbec045acfcd97295eb653a7a2b40e4dc764ef1e79aafcad439d3c" +dependencies = [ + "paste", + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "sync_wrapper" version = "0.1.2" @@ -5201,7 +6468,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -5343,6 +6610,15 @@ dependencies = [ "once_cell", ] +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + [[package]] name = "time" version = "0.3.36" @@ -5490,7 +6766,7 @@ dependencies = [ "rand 0.9.1", "socket2", "tokio", - "tokio-util 0.7.11", + "tokio-util 0.7.16", "whoami", ] @@ -5525,7 +6801,7 @@ dependencies = [ "futures-core", "pin-project-lite", "tokio", - "tokio-util 0.7.11", + "tokio-util 0.7.16", ] [[package]] @@ -5570,9 +6846,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" dependencies = [ "bytes", "futures-core", @@ -5590,7 +6866,7 @@ checksum = "ac2caab0bf757388c6c0ae23b3293fdb463fee59434529014f85e3263b995c28" dependencies = [ "serde", "serde_spanned", - "toml_datetime", + "toml_datetime 0.6.6", "toml_edit 0.22.16", ] @@ -5604,14 +6880,12 @@ dependencies = [ ] [[package]] -name = "toml_edit" -version = "0.21.1" +name = "toml_datetime" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" dependencies = [ - "indexmap 2.9.0", - "toml_datetime", - "winnow 0.5.40", + "serde_core", ] [[package]] @@ -5620,13 +6894,34 @@ version = "0.22.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "278f3d518e152219c994ce877758516bca5e118eaed6996192a774fb9fbf0788" dependencies = [ - "indexmap 2.9.0", + "indexmap 2.12.0", "serde", "serde_spanned", - "toml_datetime", + "toml_datetime 0.6.6", "winnow 0.6.13", ] +[[package]] +name = "toml_edit" +version = "0.23.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +dependencies = [ + "indexmap 2.12.0", + "toml_datetime 0.7.3", + "toml_parser", + "winnow 0.7.13", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" +dependencies = [ + "winnow 0.7.13", +] + [[package]] name = "tonic" version = "0.12.3" @@ -5689,7 +6984,7 @@ dependencies = [ "rand 0.8.5", "slab", "tokio", - "tokio-util 0.7.11", + "tokio-util 0.7.16", "tower-layer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "tower-service 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", "tracing", @@ -5719,12 +7014,12 @@ dependencies = [ "futures-core", "futures-util", "hdrhistogram", - "indexmap 2.9.0", + "indexmap 2.12.0", "pin-project-lite", "slab", "sync_wrapper 1.0.1", "tokio", - "tokio-util 0.7.11", + "tokio-util 0.7.16", "tower-layer 0.3.3 (git+https://github.com/tower-rs/tower.git)", "tower-service 0.3.3 (git+https://github.com/tower-rs/tower.git)", "tracing", @@ -5736,7 +7031,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", "bytes", "http 1.3.1", "http-body 1.0.0", @@ -5900,6 +7195,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicase" version = "2.7.0" @@ -5917,9 +7218,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-normalization" @@ -6034,6 +7335,12 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0f540e3240398cce6128b64ba83fdbdd86129c16a3aa1a3a252efd66eb3d587" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" @@ -6210,7 +7517,7 @@ version = "0.118.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77f1154f1ab868e2a01d9834a805faca7bf8b50d041b4ca714d005d0dab1c50c" dependencies = [ - "indexmap 2.9.0", + "indexmap 2.12.0", "semver", ] @@ -6220,9 +7527,9 @@ version = "0.229.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc3b1f053f5d41aa55640a1fa9b6d1b8a9e4418d118ce308d20e24ff3575a8c" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", "hashbrown 0.15.2", - "indexmap 2.9.0", + "indexmap 2.12.0", "semver", "serde", ] @@ -6233,8 +7540,8 @@ version = "0.233.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b51cb03afce7964bbfce46602d6cb358726f36430b6ba084ac6020d8ce5bc102" dependencies = [ - "bitflags 2.6.0", - "indexmap 2.9.0", + "bitflags 2.9.4", + "indexmap 2.12.0", "semver", ] @@ -6258,7 +7565,7 @@ dependencies = [ "addr2line 0.24.2", "anyhow", "async-trait", - "bitflags 2.6.0", + "bitflags 2.9.4", "bumpalo", "cc", "cfg-if 1.0.0", @@ -6266,7 +7573,7 @@ dependencies = [ "fxprof-processed-profile", "gimli 0.31.1", "hashbrown 0.15.2", - "indexmap 2.9.0", + "indexmap 2.12.0", "ittapi", "libc", "log", @@ -6393,7 +7700,7 @@ dependencies = [ "cranelift-bitset", "cranelift-entity", "gimli 0.31.1", - "indexmap 2.9.0", + "indexmap 2.12.0", "log", "object", "postcard", @@ -6499,7 +7806,7 @@ checksum = "6f8d793a398e2974d562e65c8d366f39a942fe1ce7970244d9d6e5f96f29b534" dependencies = [ "anyhow", "heck 0.5.0", - "indexmap 2.9.0", + "indexmap 2.12.0", "wit-parser", ] @@ -6558,7 +7865,7 @@ dependencies = [ "pin-project", "reqwest", "rlp", - "secp256k1", + "secp256k1 0.21.3", "serde", "serde_json", "soketto", @@ -6913,18 +8220,18 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" [[package]] name = "winnow" -version = "0.5.40" +version = "0.6.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1" dependencies = [ "memchr", ] [[package]] name = "winnow" -version = "0.6.13" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" dependencies = [ "memchr", ] @@ -6959,7 +8266,7 @@ version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.4", ] [[package]] @@ -6970,7 +8277,7 @@ checksum = "459c6ba62bf511d6b5f2a845a2a736822e38059c1cfa0b644b467bbbfae4efa6" dependencies = [ "anyhow", "id-arena", - "indexmap 2.9.0", + "indexmap 2.12.0", "log", "semver", "serde", @@ -7037,6 +8344,26 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "zerofrom" version = "0.1.6" @@ -7063,6 +8390,20 @@ name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] [[package]] name = "zerovec" diff --git a/Cargo.toml b/Cargo.toml index 2ec70e254ee..b221fee9af0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,6 +100,21 @@ substreams-entity-change = "2" substreams-near-core = "=0.10.2" rand = { version = "0.9.1", features = ["os_rng"] } +prometheus = "0.13.4" + +# Dependencies related to Amp subgraphs +ahash = "0.8.11" +alloy = { version = "1.0.12", default-features = false, features = ["json-abi", "serde"] } +arrow = { version = "=55.0.0" } +arrow-flight = { version = "=55.0.0", features = ["flight-sql-experimental"] } +futures = "0.3.31" +half = "2.7.1" +indoc = "2.0.7" +lazy-regex = "3.4.1" +parking_lot = "0.12.4" +sqlparser-latest = { version = "0.57.0", package = "sqlparser", features = ["visitor"] } +tokio-util = "0.7.15" + # Incremental compilation on Rust 1.58 causes an ICE on build. As soon as graph node builds again, these can be removed. [profile.test] incremental = false diff --git a/chain/ethereum/src/runtime/runtime_adapter.rs b/chain/ethereum/src/runtime/runtime_adapter.rs index 951958d786b..098c2e30501 100644 --- a/chain/ethereum/src/runtime/runtime_adapter.rs +++ b/chain/ethereum/src/runtime/runtime_adapter.rs @@ -164,6 +164,7 @@ impl blockchain::RuntimeAdapter for RuntimeAdapter { create_host_fns(abis, archive, call_cache, eth_adapters, eth_call_gas) } data_source::DataSource::Offchain(_) => vec![], + data_source::DataSource::Amp(_) => vec![], }; Ok(host_fns) diff --git a/core/Cargo.toml b/core/Cargo.toml index 0156ed17096..c6e5ca9fa55 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -19,6 +19,19 @@ thiserror = { workspace = true } cid = "0.11.1" anyhow = "1.0" +# Dependencies related to Amp subgraphs +alloy.workspace = true +arrow.workspace = true +chrono.workspace = true +futures.workspace = true +indoc.workspace = true +itertools.workspace = true +parking_lot.workspace = true +prometheus.workspace = true +slog.workspace = true +tokio-util.workspace = true +tokio.workspace = true + [dev-dependencies] tower-test = { git = "https://github.com/tower-rs/tower.git" } wiremock = "0.6.3" diff --git a/core/src/amp_subgraph/manager.rs b/core/src/amp_subgraph/manager.rs new file mode 100644 index 00000000000..4eb3ed2aafe --- /dev/null +++ b/core/src/amp_subgraph/manager.rs @@ -0,0 +1,166 @@ +use std::sync::Arc; + +use anyhow::Context; +use async_trait::async_trait; +use graph::{ + amp, + components::{ + link_resolver::LinkResolver, + metrics::MetricsRegistry, + store::{DeploymentLocator, SubgraphStore}, + subgraph::SubgraphInstanceManager, + }, + env::EnvVars, + log::factory::LoggerFactory, + prelude::CheapClone, +}; +use slog::{debug, error}; +use tokio_util::sync::CancellationToken; + +use super::{runner, Metrics, Monitor}; + +/// Manages Amp subgraph runner futures. +/// +/// Creates and schedules Amp subgraph runner futures for execution on demand. +/// Also handles stopping previously started Amp subgraph runners. +pub struct Manager { + logger_factory: LoggerFactory, + metrics_registry: Arc, + env_vars: Arc, + monitor: Monitor, + subgraph_store: Arc, + link_resolver: Arc, + amp_client: Arc, +} + +impl Manager +where + SS: SubgraphStore, + LR: LinkResolver, + AC: amp::Client, +{ + /// Creates a new Amp subgraph manager. + pub fn new( + logger_factory: &LoggerFactory, + metrics_registry: Arc, + env_vars: Arc, + cancel_token: &CancellationToken, + subgraph_store: Arc, + link_resolver: Arc, + amp_client: Arc, + ) -> Self { + let logger = logger_factory.component_logger("AmpSubgraphManager", None); + let logger_factory = logger_factory.with_parent(logger); + + let monitor = Monitor::new(&logger_factory, cancel_token); + + Self { + logger_factory, + metrics_registry, + env_vars, + monitor, + subgraph_store, + link_resolver, + amp_client, + } + } +} + +#[async_trait] +impl SubgraphInstanceManager for Manager +where + SS: SubgraphStore, + LR: LinkResolver, + AC: amp::Client + Send + Sync + 'static, +{ + fn can_manage( + &self, + deployment: &DeploymentLocator, + raw_manifest: &serde_yaml::Mapping, + ) -> bool { + let logger = self.logger_factory.subgraph_logger(deployment); + let can_manage = amp::manifest::is_amp_manifest(raw_manifest); + + if can_manage { + debug!(logger, "Subgraph assigned to Amp instance manager"); + } + + can_manage + } + + async fn start_subgraph( + self: Arc, + deployment: DeploymentLocator, + raw_manifest: serde_yaml::Mapping, + _stop_block: Option, + ) { + let manager = self.cheap_clone(); + + self.monitor.start( + deployment.cheap_clone(), + Box::new(move |cancel_token| { + Box::pin(async move { + let logger = manager.logger_factory.subgraph_logger(&deployment); + + let store = manager + .subgraph_store + .cheap_clone() + .writable(logger.cheap_clone(), deployment.id, Vec::new().into()) + .await + .context("failed to create writable store")?; + + let metrics = Metrics::new( + &logger, + manager.metrics_registry.cheap_clone(), + store.cheap_clone(), + deployment.hash.cheap_clone(), + ); + + let manifest = amp::Manifest::resolve::( + &logger, + manager.link_resolver.cheap_clone(), + manager.amp_client.cheap_clone(), + manager.env_vars.max_spec_version.cheap_clone(), + deployment.hash.cheap_clone(), + raw_manifest, + ) + .await?; + + store + .start_subgraph_deployment(&logger) + .await + .context("failed to start subgraph deployment")?; + + let runner_context = runner::Context::new( + &logger, + &manager.env_vars.amp, + manager.amp_client.cheap_clone(), + store, + deployment.hash.cheap_clone(), + manifest, + metrics, + ); + + let runner_result = runner::new_runner(runner_context)(cancel_token).await; + + match manager.subgraph_store.stop_subgraph(&deployment).await { + Ok(()) => { + debug!(logger, "Subgraph writer stopped"); + } + Err(e) => { + error!(logger, "Failed to stop subgraph writer"; + "e" => ?e + ); + } + } + + runner_result + }) + }), + ); + } + + async fn stop_subgraph(&self, deployment: DeploymentLocator) { + self.monitor.stop(deployment); + } +} diff --git a/core/src/amp_subgraph/metrics.rs b/core/src/amp_subgraph/metrics.rs new file mode 100644 index 00000000000..1e74a4bcb9a --- /dev/null +++ b/core/src/amp_subgraph/metrics.rs @@ -0,0 +1,260 @@ +use std::{sync::Arc, time::Duration}; + +use alloy::primitives::BlockNumber; +use graph::{ + cheap_clone::CheapClone, + components::{ + metrics::{stopwatch::StopwatchMetrics, MetricsRegistry}, + store::WritableStore, + }, + prelude::DeploymentHash, +}; +use indoc::indoc; +use prometheus::{IntCounter, IntGauge}; +use slog::Logger; + +/// Contains metrics specific to a deployment. +pub(super) struct Metrics { + pub(super) deployment_status: DeploymentStatus, + pub(super) deployment_head: DeploymentHead, + pub(super) deployment_target: DeploymentTarget, + pub(super) deployment_synced: DeploymentSynced, + pub(super) indexing_duration: IndexingDuration, + pub(super) blocks_processed: BlocksProcessed, + pub(super) stopwatch: StopwatchMetrics, +} + +impl Metrics { + /// Creates new deployment specific metrics. + pub(super) fn new( + logger: &Logger, + metrics_registry: Arc, + store: Arc, + deployment: DeploymentHash, + ) -> Self { + let stopwatch = StopwatchMetrics::new( + logger.cheap_clone(), + deployment.cheap_clone(), + "amp-process", + metrics_registry.cheap_clone(), + store.shard().to_string(), + ); + + let const_labels = [("deployment", &deployment)]; + + Self { + deployment_status: DeploymentStatus::new(&metrics_registry, const_labels.clone()), + deployment_head: DeploymentHead::new(&metrics_registry, const_labels.clone()), + deployment_target: DeploymentTarget::new(&metrics_registry, const_labels.clone()), + deployment_synced: DeploymentSynced::new(&metrics_registry, const_labels.clone()), + indexing_duration: IndexingDuration::new(&metrics_registry, const_labels.clone()), + blocks_processed: BlocksProcessed::new(&metrics_registry, const_labels.clone()), + stopwatch, + } + } +} + +/// Reports the current indexing status of a deployment. +pub(super) struct DeploymentStatus(IntGauge); + +impl DeploymentStatus { + const STATUS_STARTING: i64 = 1; + const STATUS_RUNNING: i64 = 2; + const STATUS_STOPPED: i64 = 3; + const STATUS_FAILED: i64 = 4; + + fn new( + metrics_registry: &MetricsRegistry, + const_labels: impl IntoIterator, + ) -> Self { + let int_gauge = metrics_registry + .new_int_gauge( + "amp_deployment_status", + indoc!( + " + Indicates the current indexing status of a deployment. + Possible values: + 1 - graph-node is preparing to start indexing; + 2 - deployment is being indexed; + 3 - indexing is stopped by request; + 4 - indexing failed; + " + ), + const_labels, + ) + .expect("failed to register `amp_deployment_status` gauge"); + + Self(int_gauge) + } + + /// Records that the graph-node is preparing to start indexing. + pub fn starting(&self) { + self.0.set(Self::STATUS_STARTING); + } + + /// Records that the deployment is being indexed. + pub fn running(&self) { + self.0.set(Self::STATUS_RUNNING); + } + + /// Records that the indexing stopped by request. + pub fn stopped(&self) { + self.0.set(Self::STATUS_STOPPED); + } + + /// Records that the indexing failed. + pub fn failed(&self) { + self.0.set(Self::STATUS_FAILED); + } +} + +/// Tracks the most recent block number processed by a deployment. +pub(super) struct DeploymentHead(IntGauge); + +impl DeploymentHead { + fn new( + metrics_registry: &MetricsRegistry, + const_labels: impl IntoIterator, + ) -> Self { + let int_gauge = metrics_registry + .new_int_gauge( + "amp_deployment_head", + "Tracks the most recent block number processed by a deployment", + const_labels, + ) + .expect("failed to register `amp_deployment_head` gauge"); + + Self(int_gauge) + } + + /// Updates the most recent block number processed by this deployment. + pub(super) fn update(&self, new_most_recent_block_number: BlockNumber) { + self.0.set( + i64::try_from(new_most_recent_block_number) + .expect("new most recent block number does not fit into `i64`"), + ); + } +} + +/// Tracks the target block number of a deployment. +pub(super) struct DeploymentTarget(IntGauge); + +impl DeploymentTarget { + fn new( + metrics_registry: &MetricsRegistry, + const_labels: impl IntoIterator, + ) -> Self { + let int_gauge = metrics_registry + .new_int_gauge( + "amp_deployment_target", + "Tracks the target block number of a deployment", + const_labels, + ) + .expect("failed to register `amp_deployment_target` gauge"); + + Self(int_gauge) + } + + /// Updates the target block number of this deployment. + pub(super) fn update(&self, new_target_block_number: BlockNumber) { + self.0.set( + i64::try_from(new_target_block_number) + .expect("new target block number does not fit into `i64`"), + ); + } +} + +/// Indicates whether a deployment has reached the chain head or the end block since it was deployed. +pub(super) struct DeploymentSynced(IntGauge); + +impl DeploymentSynced { + const NOT_SYNCED: i64 = 0; + const SYNCED: i64 = 1; + + pub fn new( + metrics_registry: &MetricsRegistry, + const_labels: impl IntoIterator, + ) -> Self { + let int_gauge = metrics_registry + .new_int_gauge( + "amp_deployment_synced", + indoc!( + " + Indicates whether a deployment has reached the chain head or the end block since it was deployed. + Possible values: + 0 - deployment is not synced; + 1 - deployment is synced; + " + ), + const_labels, + ) + .expect("failed to register `amp_deployment_synced` gauge"); + + Self(int_gauge) + } + + /// Records the current sync status of this deployment. + pub fn record(&self, synced: bool) { + self.0.set(if synced { + Self::SYNCED + } else { + Self::NOT_SYNCED + }); + } +} + +/// Tracks the total duration in seconds of deployment indexing. +#[derive(Clone)] +pub(super) struct IndexingDuration(IntCounter); + +impl IndexingDuration { + fn new( + metrics_registry: &MetricsRegistry, + const_labels: impl IntoIterator, + ) -> Self { + let int_counter = metrics_registry + .new_int_counter( + "amp_deployment_indexing_duration_seconds", + "Tracks the total duration in seconds of deployment indexing", + const_labels, + ) + .expect("failed to register `amp_deployment_indexing_duration_seconds` counter"); + + Self(int_counter) + } + + /// Records a new indexing duration of this deployment. + pub(super) fn record(&self, duration: Duration) { + self.0.inc_by(duration.as_secs()) + } +} + +/// Tracks the total number of blocks processed by a deployment. +pub(super) struct BlocksProcessed(IntCounter); + +impl BlocksProcessed { + fn new( + metrics_registry: &MetricsRegistry, + const_labels: impl IntoIterator, + ) -> Self { + let int_counter = metrics_registry + .new_int_counter( + "amp_deployment_blocks_processed_count", + "Tracks the total number of blocks processed by a deployment", + const_labels, + ) + .expect("failed to register `amp_deployment_blocks_processed_count` counter"); + + Self(int_counter) + } + + /// Records a new processed block. + pub(super) fn record_one(&self) { + self.record(1); + } + + /// Records the new processed blocks. + pub(super) fn record(&self, number_of_blocks_processed: usize) { + self.0.inc_by(number_of_blocks_processed as u64); + } +} diff --git a/core/src/amp_subgraph/mod.rs b/core/src/amp_subgraph/mod.rs new file mode 100644 index 00000000000..3d3846742aa --- /dev/null +++ b/core/src/amp_subgraph/mod.rs @@ -0,0 +1,8 @@ +mod manager; +mod metrics; +mod monitor; +mod runner; + +use self::{metrics::Metrics, monitor::Monitor}; + +pub use self::manager::Manager; diff --git a/core/src/amp_subgraph/monitor.rs b/core/src/amp_subgraph/monitor.rs new file mode 100644 index 00000000000..cfa1de2942d --- /dev/null +++ b/core/src/amp_subgraph/monitor.rs @@ -0,0 +1,573 @@ +//! This module is responsible for executing subgraph runner futures. +//! +//! # Terminology used in this module +//! +//! `active subgraph` - A subgraph that was started and is still tracked. +//! `running subgraph` - A subgraph that has an instance that is making progress or stopping. +//! `subgraph instance` - A background process that executes the subgraph runner future. + +use std::{ + collections::{hash_map::Entry, HashMap}, + fmt, + sync::{ + atomic::{AtomicU32, Ordering::SeqCst}, + Arc, + }, + time::Duration, +}; + +use anyhow::Result; +use futures::future::BoxFuture; +use graph::{ + cheap_clone::CheapClone, components::store::DeploymentLocator, log::factory::LoggerFactory, +}; +use slog::{debug, error, info, warn, Logger}; +use tokio::{sync::mpsc, task::JoinHandle, time::timeout}; +use tokio_util::sync::CancellationToken; + +/// Represents the maximum amount of time a subgraph instance is allowed to run +/// after it receives a cancel signal. +/// +/// If a subgraph instance does not complete its execution in this amount of time +/// it is considered unresponsive and is aborted. +const SUBGRAPH_INSTANCE_GRACE_PERIOD: Duration = { + if cfg!(test) { + Duration::from_millis(300) + } else if cfg!(debug_assertions) { + Duration::from_secs(30) + } else { + Duration::from_secs(300) + } +}; + +/// Represents the subgraph runner future. +/// +/// This is the future that performs the subgraph indexing. +/// It is expected to return only on deterministic failures or when indexing is completed. +/// All retry functionality must be handled internally by this future. +pub(super) type BoxRunner = + Box BoxFuture<'static, Result<()>> + Send + 'static>; + +/// Manages the lifecycle of subgraph runners. +/// +/// Ensures that there is at most one subgraph instance running +/// for any subgraph deployment at any point in time. +/// Handles starting, stopping and restarting subgraphs. +pub(super) struct Monitor { + logger_factory: Arc, + + /// Every subgraph instance is assigned a cancel token derived from this token. + /// + /// This means that the `Monitor` can send cancel signals to all subgraph instances at once, + /// and to each subgraph instance individually. + cancel_token: CancellationToken, + + /// The channel that is used to send subgraph commands. + /// + /// Every subgraph start and stop request results in a command that is sent to the + /// background process that manages the subgraph instances. + command_tx: mpsc::UnboundedSender, + + /// When a subgraph starts it is assigned a sequential ID. + /// The ID is then kept in memory in the list of active subgraphs. + /// + /// When the subgraph completes execution it should be removed from the + /// list of active subgraphs, so that it can be restarted. + /// + /// This ID is required to be able to check if the active subgraph + /// is the same subgraph instance that was stopped. + /// + /// If the IDs do not match, it means that the subgraph was force restarted, + /// ignoring the state of the previous subgraph instance, or that the subgraph + /// was restarted after the previous subgraph instance completed its execution + /// but before the remove request was processed. + subgraph_instance_id: Arc, +} + +impl Monitor { + /// Creates a new subgraph monitor. + /// + /// Spawns a background process that manages the subgraph start and stop requests. + /// + /// A new cancel token is derived from the `cancel_token` and only the derived token is used by the + /// subgraph monitor and its background process. + pub(super) fn new(logger_factory: &LoggerFactory, cancel_token: &CancellationToken) -> Self { + let logger = logger_factory.component_logger("AmpSubgraphMonitor", None); + let logger_factory = Arc::new(logger_factory.with_parent(logger)); + + // A derived token makes sure it is not possible to accidentally cancel the parent token + let cancel_token = cancel_token.child_token(); + + // It is safe to use an unbounded channel here, because it's pretty much unrealistic that the + // command processor will fall behind so much that the channel buffer will take up all the memory. + // The command processor is non-blocking and delegates long-running processes to detached tasks. + let (command_tx, command_rx) = mpsc::unbounded_channel::(); + + tokio::spawn(Self::command_processor( + logger_factory.cheap_clone(), + cancel_token.cheap_clone(), + command_tx.clone(), + command_rx, + )); + + Self { + logger_factory, + cancel_token, + command_tx, + subgraph_instance_id: Arc::new(AtomicU32::new(0)), + } + } + + /// Starts a subgraph. + /// + /// Sends a subgraph start request to this subgraph monitor that + /// eventually starts the subgraph. + /// + /// # Behaviour + /// + /// - If the subgraph is not active, it starts when the request is processed + /// - If the subgraph is active, it stops, and then restarts + /// - Ensures that there is only one subgraph instance for this subgraph deployment + /// - Multiple consecutive calls in a short time period force restart the subgraph, + /// aborting the active subgraph instance + pub(super) fn start(&self, deployment: DeploymentLocator, runner: BoxRunner) { + let logger = self + .logger_factory + .subgraph_logger(&deployment) + .new(slog::o!("method" => "start")); + + info!(logger, "Starting subgraph"); + handle_send_result( + &logger, + self.command_tx.send(Command::Start { + id: self.subgraph_instance_id.fetch_add(1, SeqCst), + deployment, + runner, + }), + ); + } + + /// Stops the subgraph. + /// + /// Sends a subgraph stop request to this subgraph monitor that + /// eventually stops the subgraph. + /// + /// # Behaviour + /// + /// - If the subgraph is not active does nothing + /// - If the subgraph is active, sends a cancel signal that gracefully stops the subgraph + /// - If the subgraph fails to stop after an extended period of time it aborts + pub(super) fn stop(&self, deployment: DeploymentLocator) { + let logger = self + .logger_factory + .subgraph_logger(&deployment) + .new(slog::o!("method" => "stop")); + + info!(logger, "Stopping subgraph"); + handle_send_result(&logger, self.command_tx.send(Command::Stop { deployment })); + } + + /// Processes commands sent through the command channel. + /// + /// Tracks active subgraphs and keeps a list of pending start commands. + /// Pending start commands are start commands that execute after the related subgraph stops. + async fn command_processor( + logger_factory: Arc, + cancel_token: CancellationToken, + command_tx: mpsc::UnboundedSender, + mut command_rx: mpsc::UnboundedReceiver, + ) { + let logger = logger_factory.component_logger("CommandProcessor", None); + let mut subgraph_instances: HashMap = HashMap::new(); + let mut pending_start_commands: HashMap = HashMap::new(); + + loop { + tokio::select! { + Some(command) = command_rx.recv() => { + debug!(logger, "Processing a new command"; + "command" => ?command + ); + + match &command { + Command::Start { .. } => { + Self::process_start_command( + &logger_factory, + &cancel_token, + &mut subgraph_instances, + &mut pending_start_commands, + &command_tx, + command + ); + }, + Command::Stop { .. } => { + Self::process_stop_command( + &logger_factory, + &mut subgraph_instances, + &mut pending_start_commands, + command + ); + }, + Command::Clear { .. } => { + Self::process_clear_command( + &logger_factory, + &mut subgraph_instances, + &mut pending_start_commands, + &command_tx, + command + ); + }, + } + }, + _ = cancel_token.cancelled() => { + debug!(logger, "Stopping command processor"); + + // All active subgraphs will shutdown gracefully + // because their cancel tokens are derived from this cancelled token. + return; + } + } + } + } + + /// Starts a subgraph. + /// + /// # Behaviour + /// + /// - If the subgraph is not active, it starts right away + /// - If the subgraph is active, a cancel signal is sent to the active subgraph instance + /// and this start request is stored in the list of pending start commands + /// - If the subgraph is active and there is already a pending start command, + /// the active subgraph instance aborts, and the subgraph force restarts right away + /// - If the subgraph is active, but its instance is not actually running, + /// the subgraph starts right away + fn process_start_command( + logger_factory: &LoggerFactory, + cancel_token: &CancellationToken, + subgraph_instances: &mut HashMap, + pending_start_commands: &mut HashMap, + command_tx: &mpsc::UnboundedSender, + command: Command, + ) { + let Command::Start { + id, + deployment, + runner, + } = command + else { + unreachable!(); + }; + + let logger = logger_factory.subgraph_logger(&deployment); + let command_logger = logger.new(slog::o!("command" => "start")); + + let cancel_token = cancel_token.child_token(); + let pending_start_command = pending_start_commands.remove(&deployment); + + match subgraph_instances.entry(deployment.cheap_clone()) { + Entry::Vacant(entry) => { + debug!(command_logger, "Subgraph is not active, starting"); + + let subgraph_instance = Self::start_subgraph( + logger, + cancel_token, + id, + deployment, + runner, + command_tx.clone(), + ); + + entry.insert(subgraph_instance); + } + Entry::Occupied(mut entry) => { + let subgraph_instance = entry.get_mut(); + subgraph_instance.cancel_token.cancel(); + + if pending_start_command.is_some() { + debug!(command_logger, "Subgraph is active, force restarting"); + + subgraph_instance.handle.abort(); + + *subgraph_instance = Self::start_subgraph( + logger, + cancel_token, + id, + deployment, + runner, + command_tx.clone(), + ); + + return; + } + + if subgraph_instance.handle.is_finished() { + debug!(command_logger, "Subgraph is not running, starting"); + + *subgraph_instance = Self::start_subgraph( + logger, + cancel_token, + id, + deployment, + runner, + command_tx.clone(), + ); + + return; + } + + debug!(command_logger, "Gracefully restarting subgraph"); + + pending_start_commands.insert( + deployment.cheap_clone(), + Command::Start { + id, + deployment, + runner, + }, + ); + } + } + } + + /// Stops a subgraph. + /// + /// # Behaviour + /// + /// - If the subgraph is not active, does nothing + /// - If the subgraph is active, sends a cancel signal to the active subgraph instance + fn process_stop_command( + logger_factory: &LoggerFactory, + subgraph_instances: &mut HashMap, + pending_start_commands: &mut HashMap, + command: Command, + ) { + let Command::Stop { deployment } = command else { + unreachable!(); + }; + + let logger = logger_factory + .subgraph_logger(&deployment) + .new(slog::o!("command" => "stop")); + + if let Some(subgraph_instance) = subgraph_instances.get(&deployment) { + debug!(logger, "Sending cancel signal"); + subgraph_instance.cancel_token.cancel(); + } else { + debug!(logger, "Subgraph is not active"); + } + + pending_start_commands.remove(&deployment); + } + + /// Removes a subgraph from the list of active subgraphs allowing the subgraph to be restarted. + fn process_clear_command( + logger_factory: &LoggerFactory, + subgraph_instances: &mut HashMap, + pending_start_commands: &mut HashMap, + command_tx: &mpsc::UnboundedSender, + command: Command, + ) { + let Command::Clear { id, deployment } = command else { + unreachable!(); + }; + + let logger = logger_factory + .subgraph_logger(&deployment) + .new(slog::o!("command" => "clear")); + + match subgraph_instances.get(&deployment) { + Some(subgraph_instance) if subgraph_instance.id == id => { + debug!(logger, "Removing active subgraph"); + subgraph_instances.remove(&deployment); + } + Some(_subgraph_instance) => { + debug!(logger, "Active subgraph does not need to be removed"); + return; + } + None => { + debug!(logger, "Subgraph is not active"); + } + } + + if let Some(pending_start_command) = pending_start_commands.remove(&deployment) { + debug!(logger, "Resending a pending start command"); + handle_send_result(&logger, command_tx.send(pending_start_command)); + } + } + + /// Spawns a background process that executes the subgraph runner future. + /// + /// An additional background process is spawned to handle the graceful shutdown of the subgraph runner, + /// and to ensure correct behaviour even if the subgraph runner panics. + fn start_subgraph( + logger: Logger, + cancel_token: CancellationToken, + id: u32, + deployment: DeploymentLocator, + runner: BoxRunner, + command_tx: mpsc::UnboundedSender, + ) -> SubgraphInstance { + let mut runner_handle = tokio::spawn({ + let logger = logger.new(slog::o!("process" => "subgraph_runner")); + let cancel_token = cancel_token.cheap_clone(); + + async move { + info!(logger, "Subgraph started"); + + match runner(cancel_token).await { + Ok(()) => { + info!(logger, "Subgraph stopped"); + } + Err(e) => { + error!(logger, "Subgraph failed"; + "error" => ?e + ); + } + } + } + }); + + let supervisor_handle = tokio::spawn({ + let logger = logger.new(slog::o!("process" => "subgraph_supervisor")); + let cancel_token = cancel_token.cheap_clone(); + + fn handle_runner_result(logger: &Logger, result: Result<(), tokio::task::JoinError>) { + match result { + Ok(()) => { + debug!(logger, "Subgraph completed execution"); + } + Err(e) if e.is_panic() => { + error!(logger, "Subgraph panicked"; + "error" => ?e + ); + + // TODO: Maybe abort the entire process on panic and require a full graph-node restart. + // Q: Should a bug that is triggered in a specific subgraph affect everything? + // Q: How to make this failure loud enough so it is not missed? + // + // println!("Subgraph panicked"); + // std::process::abort(); + } + Err(e) => { + error!(logger, "Subgraph failed"; + "error" => ?e + ); + } + } + } + + async move { + debug!(logger, "Subgraph supervisor started"); + + tokio::select! { + _ = cancel_token.cancelled() => { + debug!(logger, "Received cancel signal, waiting for subgraph to stop"); + + match timeout(SUBGRAPH_INSTANCE_GRACE_PERIOD, &mut runner_handle).await { + Ok(result) => { + handle_runner_result(&logger, result); + }, + Err(_) => { + warn!(logger, "Subgraph did not stop after grace period, aborting"); + + runner_handle.abort(); + let _ = runner_handle.await; + + warn!(logger, "Subgraph aborted"); + } + } + }, + result = &mut runner_handle => { + handle_runner_result(&logger, result); + cancel_token.cancel(); + } + } + + debug!(logger, "Sending clear command"); + handle_send_result(&logger, command_tx.send(Command::Clear { id, deployment })); + } + }); + + SubgraphInstance { + id, + handle: supervisor_handle, + cancel_token, + } + } +} + +impl Drop for Monitor { + fn drop(&mut self) { + // Send cancel signals to all active subgraphs so that they don't remain without an associated monitor + self.cancel_token.cancel(); + } +} + +/// Represents a background process that executes the subgraph runner future. +struct SubgraphInstance { + id: u32, + handle: JoinHandle<()>, + cancel_token: CancellationToken, +} + +/// Every command used by the subgraph monitor. +enum Command { + /// A request to start executing the subgraph runner future. + Start { + id: u32, + deployment: DeploymentLocator, + runner: BoxRunner, + }, + + /// A request to stop executing the subgraph runner future. + Stop { deployment: DeploymentLocator }, + + /// A request to remove the subgraph from the list of active subgraphs. + Clear { + id: u32, + deployment: DeploymentLocator, + }, +} + +impl fmt::Debug for Command { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Start { + id, + deployment, + runner: _, + } => f + .debug_struct("Start") + .field("id", id) + .field("deployment", deployment) + .finish_non_exhaustive(), + Self::Stop { deployment } => f + .debug_struct("Stop") + .field("deployment", deployment) + .finish(), + Self::Clear { id, deployment } => f + .debug_struct("Clear") + .field("id", id) + .field("deployment", deployment) + .finish(), + } + } +} + +fn handle_send_result( + logger: &Logger, + result: Result<(), tokio::sync::mpsc::error::SendError>, +) { + match result { + Ok(()) => { + debug!(logger, "Command was sent successfully"); + } + + // This should only happen if the parent cancel token of the subgraph monitor was cancelled + Err(e) => { + error!(logger, "Failed to send command"; + "command" => ?e.0, + "error" => ?e + ); + } + } +} diff --git a/core/src/amp_subgraph/runner/compat.rs b/core/src/amp_subgraph/runner/compat.rs new file mode 100644 index 00000000000..c695238416c --- /dev/null +++ b/core/src/amp_subgraph/runner/compat.rs @@ -0,0 +1,50 @@ +use alloy::primitives::{BlockHash, BlockNumber}; +use chrono::{DateTime, Utc}; + +mod legacy { + pub(super) use graph::{ + blockchain::{BlockHash, BlockPtr, BlockTime}, + components::store::BlockNumber, + data::store::scalar::Timestamp, + }; +} + +pub(super) trait Compat { + fn compat(&self) -> T; +} + +impl Compat for BlockNumber { + fn compat(&self) -> legacy::BlockNumber { + (*self).try_into().unwrap() + } +} + +impl Compat for legacy::BlockNumber { + fn compat(&self) -> BlockNumber { + (*self).try_into().unwrap() + } +} + +impl Compat for BlockHash { + fn compat(&self) -> legacy::BlockHash { + legacy::BlockHash(self.0.into()) + } +} + +impl Compat for legacy::BlockHash { + fn compat(&self) -> BlockHash { + BlockHash::from_slice(&self.0) + } +} + +impl Compat for DateTime { + fn compat(&self) -> legacy::BlockTime { + legacy::Timestamp(*self).into() + } +} + +impl Compat for (BlockNumber, BlockHash) { + fn compat(&self) -> legacy::BlockPtr { + legacy::BlockPtr::new(self.1.compat(), self.0.compat()) + } +} diff --git a/core/src/amp_subgraph/runner/context.rs b/core/src/amp_subgraph/runner/context.rs new file mode 100644 index 00000000000..32e96148acf --- /dev/null +++ b/core/src/amp_subgraph/runner/context.rs @@ -0,0 +1,105 @@ +use std::sync::Arc; + +use alloy::primitives::{BlockHash, BlockNumber}; +use graph::{ + amp::{log::Logger as _, Codec, Manifest}, + cheap_clone::CheapClone, + components::store::WritableStore, + data::subgraph::DeploymentHash, + env::AmpEnv, + util::backoff::ExponentialBackoff, +}; +use slog::Logger; + +use super::Compat; +use crate::amp_subgraph::Metrics; + +pub(in super::super) struct Context { + pub(super) logger: Logger, + pub(super) client: Arc, + pub(super) store: Arc, + pub(super) max_buffer_size: usize, + pub(super) max_block_range: usize, + pub(super) backoff: ExponentialBackoff, + pub(super) deployment: DeploymentHash, + pub(super) manifest: Manifest, + pub(super) metrics: Metrics, + pub(super) codec: Codec, +} + +impl Context { + pub(in super::super) fn new( + logger: &Logger, + env: &AmpEnv, + client: Arc, + store: Arc, + deployment: DeploymentHash, + manifest: Manifest, + metrics: Metrics, + ) -> Self { + let logger = logger.component("AmpSubgraphRunner"); + let backoff = ExponentialBackoff::new(env.query_retry_min_delay, env.query_retry_max_delay); + let codec = Codec::new(manifest.schema.cheap_clone()); + + Self { + logger, + client, + store, + max_buffer_size: env.max_buffer_size, + max_block_range: env.max_block_range, + backoff, + deployment, + manifest, + metrics, + codec, + } + } + + pub(super) fn indexing_completed(&self) -> bool { + let Some(last_synced_block) = self.latest_synced_block() else { + return false; + }; + + self.manifest + .data_sources + .iter() + .all(|data_source| last_synced_block >= data_source.source.end_block) + } + + pub(super) fn latest_synced_block(&self) -> Option { + self.latest_synced_block_ptr() + .map(|(block_number, _)| block_number) + } + + pub(super) fn latest_synced_block_ptr(&self) -> Option<(BlockNumber, BlockHash)> { + self.store + .block_ptr() + .map(|block_ptr| (block_ptr.number.compat(), block_ptr.hash.compat())) + } + + pub(super) fn total_queries(&self) -> usize { + self.manifest + .data_sources + .iter() + .map(|data_source| data_source.transformer.tables.len()) + .sum() + } + + pub(super) fn min_start_block(&self) -> BlockNumber { + self.manifest + .data_sources + .iter() + .map(|data_source| data_source.source.start_block) + .min() + .unwrap() + } + + pub(super) fn max_end_block(&self) -> BlockNumber { + self.manifest + .data_sources + .iter() + .map(|data_source| data_source.source.end_block) + .max() + .unwrap() + } +} diff --git a/core/src/amp_subgraph/runner/data_processing.rs b/core/src/amp_subgraph/runner/data_processing.rs new file mode 100644 index 00000000000..2801abc66aa --- /dev/null +++ b/core/src/amp_subgraph/runner/data_processing.rs @@ -0,0 +1,252 @@ +use std::sync::Arc; + +use alloy::primitives::{BlockHash, BlockNumber}; +use anyhow::anyhow; +use arrow::array::RecordBatch; +use chrono::{DateTime, Utc}; +use graph::{ + amp::{ + codec::{utils::auto_block_timestamp_decoder, DecodeOutput, DecodedEntity, Decoder}, + stream_aggregator::{RecordBatchGroup, RecordBatchGroups, StreamRecordBatch}, + }, + blockchain::block_stream::FirehoseCursor, + cheap_clone::CheapClone, + components::store::{EntityCache, ModificationsAndCache}, +}; +use slog::{debug, trace}; + +use super::{data_stream::TablePtr, Compat, Context, Error}; + +pub(super) async fn process_record_batch_groups( + cx: &mut Context, + mut entity_cache: EntityCache, + record_batch_groups: RecordBatchGroups, + stream_table_ptr: Arc<[TablePtr]>, + latest_block: BlockNumber, +) -> Result { + if record_batch_groups.is_empty() { + debug!(cx.logger, "Received no record batch groups"); + return Ok(entity_cache); + } + + let from_block = record_batch_groups + .first_key_value() + .map(|((block, _), _)| *block) + .unwrap(); + + let to_block = record_batch_groups + .last_key_value() + .map(|((block, _), _)| *block) + .unwrap(); + + debug!(cx.logger, "Processing record batch groups"; + "from_block" => from_block, + "to_block" => to_block + ); + + for ((block_number, block_hash), record_batch_group) in record_batch_groups { + trace!(cx.logger, "Processing record batch group"; + "block" => block_number, + "record_batches_count" => record_batch_group.record_batches.len() + ); + + entity_cache = process_record_batch_group( + cx, + entity_cache, + block_number, + block_hash, + record_batch_group, + &stream_table_ptr, + latest_block, + ) + .await + .map_err(|e| { + e.context(format!( + "failed to process record batch group at block '{block_number}'" + )) + })?; + + cx.metrics.deployment_head.update(block_number); + cx.metrics.blocks_processed.record_one(); + + trace!(cx.logger, "Completed processing record batch group"; + "block" => block_number + ); + } + + debug!(cx.logger, "Completed processing record batch groups"; + "from_block" => from_block, + "to_block" => to_block + ); + + Ok(entity_cache) +} + +async fn process_record_batch_group( + cx: &mut Context, + mut entity_cache: EntityCache, + block_number: BlockNumber, + block_hash: BlockHash, + record_batch_group: RecordBatchGroup, + stream_table_ptr: &[TablePtr], + latest_block: BlockNumber, +) -> Result { + let _section = cx + .metrics + .stopwatch + .start_section("process_record_batch_group"); + + let RecordBatchGroup { record_batches } = record_batch_group; + + if record_batches.is_empty() { + debug!(cx.logger, "Record batch group is empty"); + return Ok(entity_cache); + } + + let block_timestamp = decode_block_timestamp(&record_batches) + .map_err(|e| e.context("failed to decode block timestamp"))?; + + for record_batch in record_batches { + let StreamRecordBatch { + stream_index, + record_batch, + } = record_batch; + + process_record_batch( + cx, + &mut entity_cache, + block_number, + record_batch, + stream_table_ptr[stream_index], + ) + .await + .map_err(|e| { + e.context(format!( + "failed to process record batch for stream '{stream_index}'" + )) + })?; + } + + let ModificationsAndCache { + modifications, + entity_lfu_cache, + evict_stats: _, + } = entity_cache + .as_modifications(block_number.compat()) + .map_err(Error::from) + .map_err(|e| e.context("failed to extract entity modifications from the state"))?; + + let is_close_to_chain_head = latest_block.saturating_sub(block_number) <= 100; + + cx.store + .transact_block_operations( + (block_number, block_hash).compat(), + block_timestamp.compat(), + FirehoseCursor::None, + modifications, + &cx.metrics.stopwatch, + Vec::new(), + Vec::new(), + Vec::new(), + false, + is_close_to_chain_head, + ) + .await + .map_err(Error::from) + .map_err(|e| e.context("failed to transact block operations"))?; + + if is_close_to_chain_head { + cx.metrics.deployment_synced.record(true); + } + + Ok(EntityCache::with_current( + cx.store.cheap_clone(), + entity_lfu_cache, + )) +} + +async fn process_record_batch( + cx: &mut Context, + entity_cache: &mut EntityCache, + block_number: BlockNumber, + record_batch: RecordBatch, + (i, j): TablePtr, +) -> Result<(), Error> { + let _section = cx.metrics.stopwatch.start_section("process_record_batch"); + + let table = &cx.manifest.data_sources[i].transformer.tables[j]; + let entity_name = &table.name; + + let DecodeOutput { + entity_type, + id_type, + decoded_entities, + } = cx + .codec + .decode(record_batch, entity_name.as_str()) + .map_err(|e| { + Error::Deterministic( + e.context(format!("failed to decode entities of type '{entity_name}'")), + ) + })?; + + for decoded_entity in decoded_entities { + let DecodedEntity { + key, + mut entity_data, + } = decoded_entity; + + let key = match key { + Some(key) => key, + None => { + let entity_id = entity_cache + .generate_id(id_type, block_number.compat()) + .map_err(|e| { + Error::Deterministic(e.context(format!( + "failed to generate a new id for an entity of type '{entity_name}'" + ))) + })?; + + entity_data.push(("id".into(), entity_id.clone().into())); + entity_type.key(entity_id) + } + }; + + let entity_id = key.entity_id.clone(); + let entity = cx.manifest.schema.make_entity(entity_data).map_err(|e| { + Error::Deterministic(anyhow!(e).context(format!( + "failed to create a new entity of type '{entity_name}' with id '{entity_id}'" + ))) + })?; + + entity_cache + .set(key, entity, block_number.compat(), None) + .map_err(|e| { + Error::Deterministic(e.context(format!( + "failed to store a new entity of type '{entity_name}' with id '{entity_id}'" + ))) + })?; + } + + Ok(()) +} + +fn decode_block_timestamp(record_batches: &[StreamRecordBatch]) -> Result, Error> { + let mut last_error: Option = None; + + for record_batch in record_batches { + match auto_block_timestamp_decoder(&record_batch.record_batch) { + Ok((_, decoder)) => { + return decoder + .decode(0) + .map_err(|e| Error::Deterministic(e))? + .ok_or_else(|| Error::Deterministic(anyhow!("block timestamp is empty"))); + } + Err(e) => { + last_error = Some(Error::Deterministic(e)); + } + } + } + + Err(last_error.unwrap()) +} diff --git a/core/src/amp_subgraph/runner/data_stream.rs b/core/src/amp_subgraph/runner/data_stream.rs new file mode 100644 index 00000000000..7f3636a5af9 --- /dev/null +++ b/core/src/amp_subgraph/runner/data_stream.rs @@ -0,0 +1,206 @@ +use std::{collections::HashMap, ops::RangeInclusive, sync::Arc}; + +use alloy::primitives::BlockNumber; +use anyhow::anyhow; +use futures::{ + stream::{empty, BoxStream}, + StreamExt, TryStreamExt, +}; +use graph::{ + amp::{ + manifest::DataSource, + stream_aggregator::{RecordBatchGroups, StreamAggregator}, + Client, + }, + cheap_clone::CheapClone, +}; +use slog::{debug, warn}; + +use super::{Context, Error}; + +pub(super) type TablePtr = (usize, usize); + +pub(super) fn new_data_stream( + cx: &Context, + latest_block: BlockNumber, +) -> BoxStream<'static, Result<(RecordBatchGroups, Arc<[TablePtr]>), Error>> +where + AC: Client, +{ + let logger = cx.logger.new(slog::o!("process" => "new_data_stream")); + + let total_queries = cx.total_queries(); + let mut total_queries_to_execute = 0; + let mut data_streams = Vec::new(); + let mut latest_queried_block = cx.latest_synced_block(); + let mut max_end_block = BlockNumber::MIN; + + debug!(logger, "Creating data stream"; + "from_block" => latest_queried_block.unwrap_or(BlockNumber::MIN), + "to_block" => latest_block, + "min_start_block" => cx.min_start_block(), + "max_block_range" => cx.max_block_range, + ); + + loop { + let next_block_ranges = next_block_ranges(&cx, latest_queried_block, latest_block); + + if next_block_ranges.is_empty() { + if data_streams.is_empty() { + warn!(logger, "There are no unprocessed block ranges"); + } + break; + } + + let mut query_streams = Vec::with_capacity(total_queries); + let mut query_streams_table_ptr = Vec::with_capacity(total_queries); + let mut min_start_block = BlockNumber::MAX; + + for (i, data_source) in cx.manifest.data_sources.iter().enumerate() { + let Some(block_range) = next_block_ranges.get(&i) else { + continue; + }; + + if *block_range.start() < min_start_block { + min_start_block = *block_range.start(); + } + + if *block_range.end() > max_end_block { + max_end_block = *block_range.end(); + } + + for (j, table) in data_source.transformer.tables.iter().enumerate() { + let query = table.query.build_with_block_range(block_range); + let stream = cx.client.query(&cx.logger, query, None); + let stream_name = format!("{}.{}", data_source.name, table.name); + + query_streams.push((stream_name, stream)); + query_streams_table_ptr.push((i, j)); + } + } + + let query_streams_table_ptr: Arc<[TablePtr]> = query_streams_table_ptr.into(); + total_queries_to_execute += query_streams.len(); + + let mut min_start_block_checked = false; + let mut load_first_record_batch_group_section = Some( + cx.metrics + .stopwatch + .start_section("load_first_record_batch_group"), + ); + + data_streams.push( + StreamAggregator::new(&cx.logger, query_streams, cx.max_buffer_size) + .map_ok(move |response| (response, query_streams_table_ptr.cheap_clone())) + .map_err(Error::from) + .map(move |result| { + if load_first_record_batch_group_section.is_some() { + let _section = load_first_record_batch_group_section.take(); + } + + match result { + Ok(response) => { + if !min_start_block_checked { + if let Some(((first_block, _), _)) = response.0.first_key_value() { + if *first_block < min_start_block { + return Err(Error::NonDeterministic(anyhow!( + "chain reorg" + ))); + } + } + + min_start_block_checked = true; + } + + Ok(response) + } + Err(e) => Err(e), + } + }) + .boxed(), + ); + + if max_end_block >= latest_block { + break; + } + + latest_queried_block = Some(max_end_block); + } + + debug!(logger, "Created aggregated data streams"; + "total_data_streams" => data_streams.len(), + "total_queries_to_execute" => total_queries_to_execute + ); + + let mut iter = data_streams.into_iter(); + let mut merged_data_stream = iter.next().unwrap_or_else(|| empty().boxed()); + + for data_stream in iter { + merged_data_stream = merged_data_stream.chain(data_stream).boxed(); + } + + merged_data_stream +} + +fn next_block_ranges( + cx: &Context, + latest_queried_block: Option, + latest_block: BlockNumber, +) -> HashMap> { + let block_ranges = cx + .manifest + .data_sources + .iter() + .enumerate() + .filter_map(|(i, data_source)| { + next_block_range(cx, data_source, latest_queried_block, latest_block) + .map(|block_range| (i, block_range)) + }) + .collect::>(); + + let Some(min_block_range) = block_ranges + .iter() + .min_by_key(|(_, block_range)| *block_range.start()) + .map(|(_, min_block_range)| min_block_range.clone()) + else { + return HashMap::new(); + }; + + block_ranges + .into_iter() + .filter(|(_, block_range)| block_range.start() <= min_block_range.end()) + .collect() +} + +fn next_block_range( + cx: &Context, + data_source: &DataSource, + latest_queried_block: Option, + latest_block: BlockNumber, +) -> Option> { + let start_block = match latest_queried_block { + Some(latest_queried_block) => { + if latest_queried_block >= data_source.source.end_block { + return None; + } + + latest_queried_block + 1 + } + None => data_source.source.start_block, + }; + + let end_block = [ + start_block.saturating_add(cx.max_block_range as BlockNumber), + data_source.source.end_block, + latest_block, + ] + .into_iter() + .min() + .unwrap(); + + if start_block > end_block { + return None; + } + + Some(start_block..=end_block) +} diff --git a/core/src/amp_subgraph/runner/error.rs b/core/src/amp_subgraph/runner/error.rs new file mode 100644 index 00000000000..8c7077e1c68 --- /dev/null +++ b/core/src/amp_subgraph/runner/error.rs @@ -0,0 +1,43 @@ +use graph::amp::error::IsDeterministic; +use thiserror::Error; + +#[derive(Debug, Error)] +pub(super) enum Error { + #[error("runner failed with a non-deterministic error: {0:#}")] + NonDeterministic(#[source] anyhow::Error), + + #[error("runner failed with a deterministic error: {0:#}")] + Deterministic(#[source] anyhow::Error), +} + +impl Error { + pub(super) fn context(self, context: C) -> Self + where + C: std::fmt::Display + Send + Sync + 'static, + { + match self { + Self::NonDeterministic(e) => Self::NonDeterministic(e.context(context)), + Self::Deterministic(e) => Self::Deterministic(e.context(context)), + } + } + + pub(super) fn is_deterministic(&self) -> bool { + match self { + Self::Deterministic(_) => true, + Self::NonDeterministic(_) => false, + } + } +} + +impl From for Error +where + T: std::error::Error + IsDeterministic + Send + Sync + 'static, +{ + fn from(e: T) -> Self { + if e.is_deterministic() { + Self::Deterministic(e.into()) + } else { + Self::NonDeterministic(e.into()) + } + } +} diff --git a/core/src/amp_subgraph/runner/latest_blocks.rs b/core/src/amp_subgraph/runner/latest_blocks.rs new file mode 100644 index 00000000000..559aef963cd --- /dev/null +++ b/core/src/amp_subgraph/runner/latest_blocks.rs @@ -0,0 +1,179 @@ +use alloy::primitives::BlockNumber; +use anyhow::anyhow; +use arrow::array::RecordBatch; +use futures::{future::try_join_all, stream::BoxStream, StreamExt, TryFutureExt}; +use graph::amp::{ + client::ResponseBatch, + codec::{utils::block_number_decoder, Decoder}, + error::IsDeterministic, + manifest::DataSource, + Client, +}; +use itertools::Itertools; +use slog::debug; + +use super::{Context, Error}; + +pub(super) type TablePtr = (usize, usize); + +pub(super) struct LatestBlocks(Vec<(TablePtr, BlockNumber)>); + +impl LatestBlocks { + pub(super) async fn load(cx: &Context) -> Result + where + AC: Client, + { + debug!(cx.logger, "Loading latest blocks"); + let _section = cx.metrics.stopwatch.start_section("load_latest_blocks"); + + let latest_block_futs = cx + .manifest + .data_sources + .iter() + .enumerate() + .map(|(i, data_source)| { + data_source + .source + .tables + .iter() + .enumerate() + .map(move |(j, table)| ((i, j), &data_source.source.dataset, table)) + }) + .flatten() + .unique_by(|(_, dataset, table)| (dataset.to_string(), table.to_string())) + .map(|(table_ptr, dataset, table)| { + latest_block(&cx, dataset, table) + .map_ok(move |latest_block| (table_ptr, latest_block)) + .map_err(move |e| { + e.context(format!( + "failed to load latest block for '{dataset}.{table}'" + )) + }) + }); + + try_join_all(latest_block_futs).await.map(Self) + } + + pub(super) fn filter_completed(self, cx: &Context) -> Self + where + AC: Client, + { + let latest_synced_block = cx.latest_synced_block(); + + Self( + self.0 + .into_iter() + .filter(|((i, _), _)| { + !indexing_completed(&cx.manifest.data_sources[*i], &latest_synced_block) + }) + .collect(), + ) + } + + pub(super) fn min(&self) -> BlockNumber { + self.0 + .iter() + .min_by_key(|(_, latest_block)| *latest_block) + .map(|(_, latest_block)| *latest_block) + .unwrap() + } + + pub(super) async fn changed(self, cx: &Context) -> Result<(), Error> + where + AC: Client, + { + debug!(cx.logger, "Waiting for new blocks"); + let _section = cx.metrics.stopwatch.start_section("latest_blocks_changed"); + + let min_latest_block = self.min(); + let latest_synced_block = cx.latest_synced_block(); + + let latest_block_changed_futs = self + .0 + .into_iter() + .filter(|(_, latest_block)| *latest_block == min_latest_block) + .filter(|((i, _), _)| { + !indexing_completed(&cx.manifest.data_sources[*i], &latest_synced_block) + }) + .map(|((i, j), latest_block)| { + let source = &cx.manifest.data_sources[i].source; + let dataset = &source.dataset; + let table = &source.tables[j]; + + latest_block_changed(&cx, dataset, table, latest_block).map_err(move |e| { + e.context(format!( + "failed to check if the latest block changed in '{dataset}.{table}'" + )) + }) + }); + + let _response = try_join_all(latest_block_changed_futs).await?; + + Ok(()) + } + + pub(super) fn iter(&self) -> impl Iterator { + self.0.iter() + } +} + +fn indexing_completed(data_source: &DataSource, latest_synced_block: &Option) -> bool { + latest_synced_block + .as_ref() + .is_some_and(|latest_synced_block| *latest_synced_block >= data_source.source.end_block) +} + +async fn latest_block( + cx: &Context, + dataset: &str, + table: &str, +) -> Result +where + AC: Client, +{ + let query = format!("SELECT MAX(_block_num) FROM {dataset}.{table}"); + let stream = cx.client.query(&cx.logger, query, None); + let record_batch = read_once(stream).await?; + + let latest_block = block_number_decoder(&record_batch, 0) + .map_err(|e| Error::Deterministic(e))? + .decode(0) + .map_err(|e| Error::Deterministic(e))? + .ok_or_else(|| Error::NonDeterministic(anyhow!("table is empty")))?; + + Ok(latest_block) +} + +async fn latest_block_changed( + cx: &Context, + dataset: &str, + table: &str, + latest_block: BlockNumber, +) -> Result<(), Error> +where + AC: Client, +{ + let query = format!("SELECT _block_num FROM {dataset}.{table} WHERE _block_num > {latest_block} SETTINGS stream = true"); + let stream = cx.client.query(&cx.logger, query, None); + let _record_batch = read_once(stream).await?; + + Ok(()) +} + +async fn read_once( + mut stream: BoxStream<'static, Result>, +) -> Result +where + E: std::error::Error + IsDeterministic + Send + Sync + 'static, +{ + let response = stream + .next() + .await + .ok_or_else(|| Error::NonDeterministic(anyhow!("stream is empty")))? + .map_err(Error::from)?; + + match response { + ResponseBatch::Batch { data } => Ok(data), + _ => Err(Error::NonDeterministic(anyhow!("response is empty"))), + } +} diff --git a/core/src/amp_subgraph/runner/mod.rs b/core/src/amp_subgraph/runner/mod.rs new file mode 100644 index 00000000000..8fee0e9fda4 --- /dev/null +++ b/core/src/amp_subgraph/runner/mod.rs @@ -0,0 +1,181 @@ +mod compat; +mod context; +mod data_processing; +mod data_stream; +mod error; +mod latest_blocks; +mod reorg_handler; + +use std::time::{Duration, Instant}; + +use anyhow::Result; +use futures::{future::BoxFuture, StreamExt}; +use graph::{ + amp::Client, cheap_clone::CheapClone, components::store::EntityCache, + data::subgraph::schema::SubgraphError, +}; +use slog::{debug, error, warn}; +use tokio_util::sync::CancellationToken; + +use self::{ + compat::Compat, data_processing::process_record_batch_groups, data_stream::new_data_stream, + error::Error, latest_blocks::LatestBlocks, reorg_handler::check_and_handle_reorg, +}; + +pub(super) use self::context::Context; + +pub(super) fn new_runner( + mut cx: Context, +) -> Box BoxFuture<'static, Result<()>> + Send + 'static> +where + AC: Client + Send + Sync + 'static, +{ + Box::new(move |cancel_token| { + Box::pin(async move { + let indexing_duration_handle = tokio::spawn({ + let mut instant = Instant::now(); + let indexing_duration = cx.metrics.indexing_duration.clone(); + + async move { + loop { + tokio::time::sleep(Duration::from_secs(1)).await; + + let prev_instant = std::mem::replace(&mut instant, Instant::now()); + indexing_duration.record(prev_instant.elapsed()); + } + } + }); + + let result = cancel_token + .run_until_cancelled(run_indexing_with_retries(&mut cx)) + .await; + + indexing_duration_handle.abort(); + + match result { + Some(result) => result?, + None => { + debug!(cx.logger, "Processed cancel signal"); + } + } + + cx.metrics.deployment_status.stopped(); + + debug!(cx.logger, "Waiting for the store to finish processing"); + cx.store.flush().await?; + Ok(()) + }) + }) +} + +async fn run_indexing(cx: &mut Context) -> Result<(), Error> +where + AC: Client, +{ + cx.metrics.deployment_status.starting(); + + if let Some(latest_synced_block) = cx.latest_synced_block() { + cx.metrics.deployment_head.update(latest_synced_block); + } + + cx.metrics + .deployment_synced + .record(cx.store.is_deployment_synced()); + + loop { + cx.metrics.deployment_status.running(); + + debug!(cx.logger, "Running indexing"; + "latest_synced_block_ptr" => ?cx.latest_synced_block_ptr() + ); + + let mut latest_blocks = LatestBlocks::load(cx).await?; + check_and_handle_reorg(cx, &latest_blocks).await?; + + if cx.indexing_completed() { + cx.metrics.deployment_synced.record(true); + + debug!(cx.logger, "Indexing completed"); + return Ok(()); + } + + latest_blocks = latest_blocks.filter_completed(cx); + let latest_block = latest_blocks.min(); + + cx.metrics + .deployment_target + .update(latest_block.min(cx.max_end_block())); + + let mut deployment_is_failed = cx.store.health().await?.is_failed(); + let mut entity_cache = EntityCache::new(cx.store.cheap_clone()); + let mut stream = new_data_stream(cx, latest_block); + + while let Some(result) = stream.next().await { + let (record_batch_groups, stream_table_ptr) = result?; + + entity_cache = process_record_batch_groups( + cx, + entity_cache, + record_batch_groups, + stream_table_ptr, + latest_block, + ) + .await?; + + if deployment_is_failed { + if let Some(block_ptr) = cx.store.block_ptr() { + cx.store.unfail_non_deterministic_error(&block_ptr)?; + deployment_is_failed = false; + } + } + } + + debug!(cx.logger, "Completed indexing iteration"; + "latest_synced_block_ptr" => ?cx.latest_synced_block_ptr() + ); + + latest_blocks.changed(cx).await?; + cx.backoff.reset(); + } +} + +async fn run_indexing_with_retries(cx: &mut Context) -> Result<()> +where + AC: Client, +{ + loop { + match run_indexing(cx).await { + Ok(()) => return Ok(()), + Err(e) => { + cx.metrics.deployment_status.failed(); + + let deterministic = e.is_deterministic(); + + cx.store + .fail_subgraph(SubgraphError { + subgraph_id: cx.deployment.cheap_clone(), + message: format!("{e:#}"), + block_ptr: None, // TODO: Find a way to propagate the block ptr here + handler: None, + deterministic, + }) + .await?; + + if deterministic { + error!(cx.logger, "Subgraph failed with a deterministic error"; + "e" => ?e + ); + return Err(e.into()); + } + + warn!(cx.logger, "Subgraph failed with a non-deterministic error"; + "e" => ?e, + "retry_delay_seconds" => cx.backoff.delay().as_secs() + ); + + cx.backoff.sleep_async().await; + debug!(cx.logger, "Restarting indexing"); + } + } + } +} diff --git a/core/src/amp_subgraph/runner/reorg_handler.rs b/core/src/amp_subgraph/runner/reorg_handler.rs new file mode 100644 index 00000000000..911c4ebf818 --- /dev/null +++ b/core/src/amp_subgraph/runner/reorg_handler.rs @@ -0,0 +1,163 @@ +use alloy::primitives::{BlockHash, BlockNumber}; +use anyhow::anyhow; +use futures::{future::try_join_all, StreamExt, TryFutureExt}; +use graph::{ + amp::{ + client::{LatestBlockBeforeReorg, RequestMetadata, ResponseBatch, ResumeStreamingQuery}, + Client, + }, + blockchain::block_stream::FirehoseCursor, +}; +use itertools::Itertools; +use slog::debug; + +use super::{Compat, Context, Error, LatestBlocks}; + +pub(super) async fn check_and_handle_reorg( + cx: &Context, + latest_blocks: &LatestBlocks, +) -> Result<(), Error> +where + AC: Client, +{ + let logger = cx + .logger + .new(slog::o!("process" => "check_and_handle_reorg")); + + let Some((latest_synced_block_number, latest_synced_block_hash)) = cx.latest_synced_block_ptr() + else { + debug!(logger, "There are no synced blocks; Skipping reorg check"); + return Ok(()); + }; + + debug!(logger, "Running reorg check"); + + let Some(latest_block_before_reorg) = detect_deepest_reorg( + cx, + latest_blocks, + latest_synced_block_number, + latest_synced_block_hash, + ) + .await? + else { + debug!(logger, "Successfully checked for reorg: No reorg detected"; + "latest_synced_block" => latest_synced_block_number + ); + return Ok(()); + }; + + let _section = cx.metrics.stopwatch.start_section("handle_reorg"); + + debug!(logger, "Handling reorg"; + "latest_synced_block" => latest_synced_block_number, + "latest_block_before_reorg" => ?latest_block_before_reorg.block_number + ); + + let (block_number, block_hash) = match ( + latest_block_before_reorg.block_number, + latest_block_before_reorg.block_hash, + ) { + (Some(block_number), Some(block_hash)) => (block_number, block_hash), + (_, _) => { + // TODO: Handle reorgs to the genesis block + return Err(Error::Deterministic(anyhow!( + "invalid reorg: rewind to the genesis block not supported" + ))); + } + }; + + if block_number > latest_synced_block_number { + return Err(Error::Deterministic(anyhow!( + "invalid reorg: latest block before reorg cannot be higher than the invalidated block" + ))); + } else if block_number == latest_synced_block_number && block_hash == latest_synced_block_hash { + return Err(Error::Deterministic(anyhow!( + "invalid reorg: latest block before reorg cannot be equal to the invalidated block" + ))); + } + + cx.store + .revert_block_operations((block_number, block_hash).compat(), FirehoseCursor::None) + .await + .map_err(Error::from)?; + + Ok(()) +} + +async fn detect_deepest_reorg( + cx: &Context, + latest_blocks: &LatestBlocks, + latest_synced_block_number: BlockNumber, + latest_synced_block_hash: BlockHash, +) -> Result, Error> +where + AC: Client, +{ + let detect_reorg_futs = latest_blocks + .iter() + .filter(|(_, latest_block)| *latest_block >= latest_synced_block_number) + .map(|((i, j), _)| { + let data_source = &cx.manifest.data_sources[*i]; + let network = &data_source.network; + let dataset = &data_source.source.dataset; + let table = &data_source.source.tables[*j]; + + detect_reorg( + &cx, + network, + dataset, + table, + latest_synced_block_number, + latest_synced_block_hash, + ) + .map_err(move |e| e.context(format!("failed to detect reorg in '{dataset}.{table}'"))) + }); + + let deepest_reorg = try_join_all(detect_reorg_futs) + .await? + .into_iter() + .flatten() + .min_by_key(|latest_block_before_reorg| latest_block_before_reorg.block_number); + + Ok(deepest_reorg) +} + +async fn detect_reorg( + cx: &Context, + network: &str, + dataset: &str, + table: &str, + latest_synced_block_number: BlockNumber, + latest_synced_block_hash: BlockHash, +) -> Result, Error> +where + AC: Client, +{ + let query = format!("SELECT _block_num FROM {dataset}.{table} SETTINGS stream = true"); + let mut stream = cx.client.query( + &cx.logger, + query, + Some(RequestMetadata { + resume_streaming_query: Some(vec![ResumeStreamingQuery { + network: network.to_string(), + block_number: latest_synced_block_number, + block_hash: latest_synced_block_hash, + }]), + }), + ); + + let response = stream + .next() + .await + .ok_or_else(|| Error::NonDeterministic(anyhow!("stream is empty")))? + .map_err(Error::from)?; + + match response { + ResponseBatch::Batch { .. } => Ok(None), + ResponseBatch::Reorg(reorg) => reorg + .into_iter() + .exactly_one() + .map_err(|_e| Error::Deterministic(anyhow!("multi-chain datasets are not supported"))) + .map(Some), + } +} diff --git a/core/src/lib.rs b/core/src/lib.rs index 448bb1041fd..55c5d35af2e 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -1,3 +1,4 @@ +pub mod amp_subgraph; pub mod polling_monitor; mod subgraph; diff --git a/core/src/subgraph/context/instance/mod.rs b/core/src/subgraph/context/instance/mod.rs index 86b64195493..0d14ae8d758 100644 --- a/core/src/subgraph/context/instance/mod.rs +++ b/core/src/subgraph/context/instance/mod.rs @@ -182,6 +182,7 @@ where Ok(Some(host)) } } + DataSource::Amp(_) => unreachable!(), } } diff --git a/core/src/subgraph/instance_manager.rs b/core/src/subgraph/instance_manager.rs index 9ca3430a5fb..7a34b38dca4 100644 --- a/core/src/subgraph/instance_manager.rs +++ b/core/src/subgraph/instance_manager.rs @@ -9,6 +9,7 @@ use crate::subgraph::Decoder; use std::collections::BTreeSet; use crate::subgraph::runner::SubgraphRunner; +use graph::amp; use graph::blockchain::block_stream::{BlockStreamMetrics, TriggersAdapterWrapper}; use graph::blockchain::{Blockchain, BlockchainKind, DataSource, NodeCapabilities}; use graph::components::metrics::gas::GasMetrics; @@ -30,7 +31,7 @@ use super::SubgraphTriggerProcessor; use crate::subgraph::runner::SubgraphRunnerError; #[derive(Clone)] -pub struct SubgraphInstanceManager { +pub struct SubgraphInstanceManager { logger_factory: LoggerFactory, subgraph_store: Arc, chains: Arc, @@ -39,6 +40,7 @@ pub struct SubgraphInstanceManager { link_resolver: Arc, ipfs_service: IpfsService, arweave_service: ArweaveService, + amp_client: Option>, static_filters: bool, env_vars: Arc, @@ -56,7 +58,19 @@ pub struct SubgraphInstanceManager { } #[async_trait] -impl SubgraphInstanceManagerTrait for SubgraphInstanceManager { +impl SubgraphInstanceManagerTrait for SubgraphInstanceManager +where + S: SubgraphStore, + AC: amp::Client + Send + Sync + 'static, +{ + fn can_manage( + &self, + _deployment: &DeploymentLocator, + raw_manifest: &serde_yaml::Mapping, + ) -> bool { + !graph::amp::manifest::is_amp_manifest(raw_manifest) + } + async fn start_subgraph( self: Arc, loc: DeploymentLocator, @@ -168,7 +182,7 @@ impl SubgraphInstanceManagerTrait for SubgraphInstanceManager< } } -impl SubgraphInstanceManager { +impl SubgraphInstanceManager { pub fn new( logger_factory: &LoggerFactory, env_vars: Arc, @@ -179,6 +193,7 @@ impl SubgraphInstanceManager { link_resolver: Arc, ipfs_service: IpfsService, arweave_service: ArweaveService, + amp_client: Option>, static_filters: bool, ) -> Self { let logger = logger_factory.component_logger("SubgraphInstanceManager", None); @@ -192,6 +207,7 @@ impl SubgraphInstanceManager { instances: SubgraphKeepAlive::new(sg_metrics), link_resolver, ipfs_service, + amp_client, static_filters, env_vars, arweave_service, @@ -298,7 +314,12 @@ impl SubgraphInstanceManager { ); let manifest = manifest - .resolve(&link_resolver, &logger, ENV_VARS.max_spec_version.clone()) + .resolve( + &link_resolver, + self.amp_client.cheap_clone(), + &logger, + ENV_VARS.max_spec_version.clone(), + ) .await?; { diff --git a/core/src/subgraph/provider.rs b/core/src/subgraph/provider.rs index 00d379db01f..c03e48c0e61 100644 --- a/core/src/subgraph/provider.rs +++ b/core/src/subgraph/provider.rs @@ -1,8 +1,9 @@ -use std::collections::HashSet; -use std::sync::Mutex; +use std::{ + collections::{HashMap, HashSet}, + sync::Mutex, +}; use async_trait::async_trait; - use graph::{ components::store::{DeploymentId, DeploymentLocator}, prelude::{SubgraphAssignmentProvider as SubgraphAssignmentProviderTrait, *}, @@ -41,18 +42,20 @@ impl DeploymentRegistry { } } -pub struct SubgraphAssignmentProvider { +pub struct SubgraphAssignmentProvider { logger_factory: LoggerFactory, deployment_registry: DeploymentRegistry, link_resolver: Arc, - instance_manager: Arc, + instance_managers: Vec>, + instance_manager_assignments: + Arc>>>, } -impl SubgraphAssignmentProvider { +impl SubgraphAssignmentProvider { pub fn new( logger_factory: &LoggerFactory, link_resolver: Arc, - instance_manager: I, + instance_managers: Vec>, subgraph_metrics: Arc, ) -> Self { let logger = logger_factory.component_logger("SubgraphAssignmentProvider", None); @@ -62,42 +65,56 @@ impl SubgraphAssignmentProvider { SubgraphAssignmentProvider { logger_factory, link_resolver: link_resolver.with_retries().into(), - instance_manager: Arc::new(instance_manager), deployment_registry: DeploymentRegistry::new(subgraph_metrics), + instance_managers, + instance_manager_assignments: Arc::new(Mutex::new(HashMap::new())), } } } #[async_trait] -impl SubgraphAssignmentProviderTrait for SubgraphAssignmentProvider { +impl SubgraphAssignmentProviderTrait for SubgraphAssignmentProvider { async fn start( &self, - loc: DeploymentLocator, + deployment: DeploymentLocator, stop_block: Option, ) -> Result<(), SubgraphAssignmentProviderError> { - let logger = self.logger_factory.subgraph_logger(&loc); + let logger = self.logger_factory.subgraph_logger(&deployment); // If subgraph ID already in set - if !self.deployment_registry.insert(loc.id) { + if !self.deployment_registry.insert(deployment.id) { info!(logger, "Subgraph deployment is already running"); return Err(SubgraphAssignmentProviderError::AlreadyRunning( - loc.hash.clone(), + deployment.hash.clone(), )); } let file_bytes = self .link_resolver - .cat(&logger, &loc.hash.to_ipfs_link()) + .cat(&logger, &deployment.hash.to_ipfs_link()) .await .map_err(SubgraphAssignmentProviderError::ResolveError)?; - let raw: serde_yaml::Mapping = serde_yaml::from_slice(&file_bytes) + let raw_manifest: serde_yaml::Mapping = serde_yaml::from_slice(&file_bytes) .map_err(|e| SubgraphAssignmentProviderError::ResolveError(e.into()))?; - self.instance_manager + let instance_manager = self + .instance_managers + .iter() + .find(|instance_manager| instance_manager.can_manage(&deployment, &raw_manifest)) + .ok_or_else(|| { + anyhow!("there are no managers that can manage subgraph '{deployment}'") + })?; + + { + let mut lock = self.instance_manager_assignments.lock().unwrap(); + lock.insert(deployment.cheap_clone(), instance_manager.cheap_clone()); + } + + instance_manager .cheap_clone() - .start_subgraph(loc, raw, stop_block) + .start_subgraph(deployment, raw_manifest, stop_block) .await; Ok(()) @@ -107,11 +124,19 @@ impl SubgraphAssignmentProviderTrait for SubgraphAss &self, deployment: DeploymentLocator, ) -> Result<(), SubgraphAssignmentProviderError> { - // If subgraph ID was in set - if self.deployment_registry.remove(&deployment.id) { - // Shut down subgraph processing - self.instance_manager.stop_subgraph(deployment).await; + if !self.deployment_registry.remove(&deployment.id) { + return Ok(()); } + + let instance_manager = { + let mut lock = self.instance_manager_assignments.lock().unwrap(); + lock.remove(&deployment) + }; + + if let Some(instance_manager) = instance_manager { + instance_manager.stop_subgraph(deployment).await; + } + Ok(()) } } diff --git a/core/src/subgraph/registrar.rs b/core/src/subgraph/registrar.rs index 10f46c4891f..f198e47f345 100644 --- a/core/src/subgraph/registrar.rs +++ b/core/src/subgraph/registrar.rs @@ -2,39 +2,37 @@ use std::collections::HashSet; use std::time::Instant; use async_trait::async_trait; -use graph::blockchain::Blockchain; -use graph::blockchain::BlockchainKind; -use graph::blockchain::BlockchainMap; -use graph::components::store::{DeploymentId, DeploymentLocator, SubscriptionManager}; -use graph::components::subgraph::Settings; -use graph::data::subgraph::schema::DeploymentCreate; -use graph::data::subgraph::Graft; -use graph::data::value::Word; -use graph::futures01; -use graph::futures01::future; -use graph::futures01::stream; -use graph::futures01::Future; -use graph::futures01::Stream; -use graph::futures03::compat::Future01CompatExt; -use graph::futures03::compat::Stream01CompatExt; -use graph::futures03::future::FutureExt; -use graph::futures03::future::TryFutureExt; -use graph::futures03::stream::TryStreamExt; +use graph::amp; +use graph::blockchain::{Blockchain, BlockchainKind, BlockchainMap}; +use graph::components::{ + store::{DeploymentId, DeploymentLocator, SubscriptionManager}, + subgraph::Settings, +}; +use graph::data::{ + subgraph::{schema::DeploymentCreate, Graft}, + value::Word, +}; +use graph::futures01::{self, future, stream, Future, Stream}; +use graph::futures03::{ + compat::{Future01CompatExt, Stream01CompatExt}, + future::{FutureExt, TryFutureExt}, + stream::TryStreamExt, +}; use graph::prelude::{ CreateSubgraphResult, SubgraphAssignmentProvider as SubgraphAssignmentProviderTrait, SubgraphRegistrar as SubgraphRegistrarTrait, *, }; use graph::tokio_retry::Retry; -use graph::util::futures::retry_strategy; -use graph::util::futures::RETRY_DEFAULT_LIMIT; +use graph::util::futures::{retry_strategy, RETRY_DEFAULT_LIMIT}; -pub struct SubgraphRegistrar { +pub struct SubgraphRegistrar { logger: Logger, logger_factory: LoggerFactory, resolver: Arc, provider: Arc

, store: Arc, subscription_manager: Arc, + amp_client: Option>, chains: Arc, node_id: NodeId, version_switching_mode: SubgraphVersionSwitchingMode, @@ -42,11 +40,12 @@ pub struct SubgraphRegistrar { settings: Arc, } -impl SubgraphRegistrar +impl SubgraphRegistrar where P: SubgraphAssignmentProviderTrait, S: SubgraphStore, SM: SubscriptionManager, + AC: amp::Client + Send + Sync + 'static, { pub fn new( logger_factory: &LoggerFactory, @@ -54,6 +53,7 @@ where provider: Arc

, store: Arc, subscription_manager: Arc, + amp_client: Option>, chains: Arc, node_id: NodeId, version_switching_mode: SubgraphVersionSwitchingMode, @@ -71,6 +71,7 @@ where provider, store, subscription_manager, + amp_client, chains, node_id, version_switching_mode, @@ -252,11 +253,12 @@ where } #[async_trait] -impl SubgraphRegistrarTrait for SubgraphRegistrar +impl SubgraphRegistrarTrait for SubgraphRegistrar where P: SubgraphAssignmentProviderTrait, S: SubgraphStore, SM: SubscriptionManager, + AC: amp::Client + Send + Sync + 'static, { async fn create_subgraph( &self, @@ -311,7 +313,7 @@ where let deployment_locator = match kind { BlockchainKind::Ethereum => { - create_subgraph_version::( + create_subgraph_version::( &logger, self.store.clone(), self.chains.cheap_clone(), @@ -324,12 +326,13 @@ where debug_fork, self.version_switching_mode, &self.resolver, + self.amp_client.cheap_clone(), history_blocks, ) .await? } BlockchainKind::Near => { - create_subgraph_version::( + create_subgraph_version::( &logger, self.store.clone(), self.chains.cheap_clone(), @@ -342,12 +345,13 @@ where debug_fork, self.version_switching_mode, &self.resolver, + self.amp_client.cheap_clone(), history_blocks, ) .await? } BlockchainKind::Substreams => { - create_subgraph_version::( + create_subgraph_version::( &logger, self.store.clone(), self.chains.cheap_clone(), @@ -360,6 +364,7 @@ where debug_fork, self.version_switching_mode, &self.resolver, + self.amp_client.cheap_clone(), history_blocks, ) .await? @@ -533,7 +538,7 @@ async fn resolve_graft_block( }) } -async fn create_subgraph_version( +async fn create_subgraph_version( logger: &Logger, store: Arc, chains: Arc, @@ -546,6 +551,7 @@ async fn create_subgraph_version( debug_fork: Option, version_switching_mode: SubgraphVersionSwitchingMode, resolver: &Arc, + amp_client: Option>, history_blocks_override: Option, ) -> Result { let raw_string = serde_yaml::to_string(&raw).unwrap(); @@ -553,6 +559,7 @@ async fn create_subgraph_version( deployment.clone(), raw, resolver, + amp_client, logger, ENV_VARS.max_spec_version.clone(), ) diff --git a/docs/amp-powered-subgraphs.md b/docs/amp-powered-subgraphs.md new file mode 100644 index 00000000000..26255a14938 --- /dev/null +++ b/docs/amp-powered-subgraphs.md @@ -0,0 +1,407 @@ +# Amp-powered subgraphs + +> [!NOTE] +> This features is available starting from spec version `1.4.0` + +Amp-powered subgraphs are a new kind of subgraphs with SQL data sources that query and index data from the Amp servers. +They are significantly more efficient than the standard subgraphs, and the indexing time can be reduced from days and weeks, +to minutes and hours in most cases. + +## Prerequisites + +To enable Amp-powered subgraphs, the `GRAPH_AMP_FLIGHT_SERVICE_ADDRESS` ENV variable must be set to a valid Amp Flight gRPC service address. + +Additionally, if authentication is required for the Amp Flight gRPC service, the `GRAPH_AMP_FLIGHT_SERVICE_TOKEN` ENV variable must contain a valid authentication token. + +## Subgraph manifest + +Amp-powered subgraphs introduce a new structure for defining Amp subgraph data sources within the manifest. + +### Spec version + +The minimum spec version for Amp-powered subgraphs is `1.4.0`. + +

+Example YAML: + +```yaml +specVersion: 1.4.0 +# .. other fields ... +``` +
+ +### Data source structure + +### `kind` + +Every Amp data source must have the `kind` set to `amp`, and Amp-powered subgraphs must contain only Amp data sources. +This is used to assign the subgraph to the appropriate indexing process. + +
+Example YAML: + +```yaml +dataSources: + - kind: amp + # .. other fields ... +``` +
+ +### `name` + +Every Amp data source must have the `name` set to a non-empty string, containing only numbers, letters, hypens, or underscores. +This name is used for observability purposes and to identify progress and potential errors produced by the data source. + +
+Example YAML: + +```yaml +dataSources: + - name: Transfers + # .. other fields ... +``` +
+ +### `network` + +Every Amp data source must have the `network` field set to a valid network name. +This is used to validate that the SQL queries for this data source produce results for the expected network. + +> [!NOTE] +> Currently, the SQL queries are required to produce results for a single network in order to maintain compatibility with non-Amp subgraphs. + +
+Example YAML: + +```yaml +dataSources: + - network: ethereum-mainnet + # .. other fields ... +``` +
+ +### `source` + +Every Amp data source must have a valid `source` that describes the behavior of SQL queries from this data source. + +### `source.dataset` + +Contains the name of the dataset that can be queried by SQL queries in this data source. +This is used to validate that the SQL queries for this data source only query the expected dataset. + +
+Example YAML: + +```yaml +dataSources: + - source: + dataset: edgeandnode/ethereum_mainnet + # .. other fields ... +``` +
+ +### `source.tables` + +Contains the names of the tables that can be queried by SQL queries in this data source. +This is used to validate that the SQL queries for this data source only query the expected tables. + +
+Example YAML: + +```yaml +dataSources: + - source: + tables: + - blocks + - transactions + # .. other fields ... +``` +
+ +### `source.address` + +Contains the contract address with which SQL queries in the data source interact. + +Enables SQL query reuse through `sg_source_address()` calls instead of hard-coding the contract address. +SQL queries resolve `sg_source_address()` calls to this contract address. + +
+Example YAML: + +```yaml +dataSources: + - source: + address: "0xc944E90C64B2c07662A292be6244BDf05Cda44a7" + # .. other fields ... +``` +
+ +### `source.startBlock` + +Contains the minimum block number that SQL queries in the data source can query. +This is used as a starting point for the indexing process. + +_When not provided, defaults to block number `0`._ + +
+Example YAML: + +```yaml +dataSources: + - source: + startBlock: 11446769 + # .. other fields ... +``` +
+ +### `source.endBlock` + +Contains the maximum block number that SQL queries in the data source can query. +Reaching this block number will complete the indexing process. + +_When not provided, defaults to the maximum possible block number._ + +
+Example YAML: + +```yaml +dataSources: + - source: + endBlock: 23847939 + # .. other fields ... +``` +
+ +### `transformer` + +Every Amp data source must have a valid `transformer` that describes the transformations of source tables indexed by the Amp-powered subgraph. + +### `transformer.apiVersion` + +Represents the version of this transformer. Each version may contain a different set of features. + +> [!NOTE] +> Currently, only the version `0.0.1` is available. + +
+Example YAML: + +```yaml +dataSource: + - transformer: + apiVersion: 0.0.1 + # .. other fields ... +``` +
+ +### `transformer.abis` + +Contains a list of ABIs that SQL queries can reference to extract event signatures. + +Enables the use of `sg_event_signature('CONTRACT_NAME', 'EVENT_NAME')` calls in the +SQL queries which are resolved to full event signatures based on this list. + +_When not provided, defaults to an empty list._ + +
+Example YAML: + +```yaml +dataSource: + - transformer: + abis: + - name: ERC721 # The name of the contract + file: + # .. other fields ... +``` +
+ +### `transformer.tables` + +Contains a list of transformed tables that extract data from source tables into subgraph entities. + +### Transformer table structure + +### `transformer.tables[i].name` + +Represents the name of the transformed table. Must reference a valid entity name from the subgraph schema. + +
+Example: + +**GraphQL schema:** + +```graphql +type Block @entity(immutable: true) { + # .. entity fields ... +} +``` + +**YAML manifest:** +```yaml +dataSource: + - transformer: + tables: + - name: Block + # .. other fields ... +``` +
+ +### `transformer.tables[i].query` + +Contains an inline SQL query that executes on the Amp server. +This is useful for simple SQL queries like `SELECT * FROM "edgeandnode/ethereum_mainnet".blocks;`. +For more complex cases, a separate file containing the SQL query can be used in the `file` field. + +The data resulting from this SQL query execution transforms into subgraph entities. + +_When not provided, the `file` field is used instead._ + +
+Example YAML: + +```yaml +dataSource: + - transformer: + tables: + - query: SELECT * FROM "edgeandnode/ethereum_mainnet".blocks; + # .. other fields ... +``` +
+ +### `transformer.tables[i].file` + +Contains the IPFS link to the SQL query that executes on the Amp server. + +The data resulting from this SQL query execution transforms into subgraph entities. + +_Ignored when the `query` field is provided._ +_When not provided, the `query` field is used instead._ + +
+Example YAML: + +```yaml +dataSource: + - transformer: + tables: + - file: + # .. other fields ... +``` +
+ +### Amp-powered subgraph examples + +Complete examples on how to create, deploy and query Amp-powered subgraphs are available in a separate repository: +https://github.com/edgeandnode/amp-subgraph-examples + +## SQL query requirements + +### Block numbers + +Every SQL query in Amp-powered subgraphs must return the block number for every row. +This is required because subgraphs rely on this information for storing subgraph entities. + +Graph-node will look for block numbers in the following columns: +`_block_num`, `block_num`, `blockNum`, `block`, `block_number`, `blockNumber`. + +Example SQL query: `SELECT _block_num, /* .. other projections .. */ FROM "edgeandnode/ethereum_mainnet".blocks;` + +### Block hashes + +Every SQL query in Amp-powered subgraphs is expected to return the block hash for every row. +This is required because subgraphs rely on this information for storing subgraph entities. + +When a SQL query does not have the block hash projection, graph-node will attempt to get it from the +source tables specified in the subgraph manifest. + +Graph-node will look for block hashes in the following columns: +`hash`, `block_hash`, `blockHash`. + +Example SQL query: `SELECT hash, /* .. other projections .. */ FROM "edgeandnode/ethereum_mainnet".blocks;` + +> [!NOTE] +> If a table does not contain the block hash column, it can be retrieved by joining that table with another that contains the column on the `_block_num` column. + +### Block timestamps + +Every SQL query in Amp-powered subgraphs is expected to return the block timestamps for every row. +This is required because subgraphs rely on this information for storing subgraph entities. + +When a SQL query does not have the block timestamps projection, graph-node will attempt to get it from the +source tables specified in the subgraph manifest. + +Graph-node will look for block timestamps in the following columns: +`timestamp`, `block_timestamp`, `blockTimestamp`. + +Example SQL query: `SELECT timestamp, /* .. other projections .. */ FROM "edgeandnode/ethereum_mainnet".blocks;` + +> [!NOTE] +> If a table does not contain the block timestamp column, it can be retrieved by joining that table with another that contains the column on the `_block_num` column. + +## Type conversions + +Amp core SQL data types are converted intuitively to compatible subgraph entity types. + +## Schema generation + +Amp-powered subgraphs support the generation of GraphQL schemas based on the schemas of SQL queries referenced in the subgraph manifest. +This is useful when indexing entities that do not rely on complex relationships, such as contract events. + +The generated subgraph entities are immutable. + +To enable schema generation, simply remove the `schema` field from the subgraph manifest. + +> [!NOTE] +> For more flexibility and control over the schema, a manually created GraphQL schema is preferred. + +## Aggregations + +Amp-powered subgraphs fully support the subgraph aggregations feature. +This allows having complex aggregations on top of data indexed from the Amp servers. + +For more information on using the powerful subgraph aggregations feature, +refer to the [documentation](https://github.com/graphprotocol/graph-node/blob/master/docs/aggregations.md). + +## Composition + +Amp-powered subgraphs fully support the subgraph composition feature. +This allows applying complex subgraph mappings on top of data indexed from the Amp servers. + +For more information on using the powerful subgraph composition feature, +refer to the [documentation](https://github.com/graphprotocol/example-composable-subgraph). + +## ENV variables + +Amp-powered subgraphs feature introduces the following new ENV variables: + +- `GRAPH_AMP_FLIGHT_SERVICE_ADDRESS` – The address of the Amp Flight gRPC service. _Defaults to `None`, which disables support for Amp-powered subgraphs._ +- `GRAPH_AMP_FLIGHT_SERVICE_TOKEN` – Token used to authenticate Amp Flight gRPC service requests. _Defaults to `None`, which disables authentication._ +- `GRAPH_AMP_MAX_BUFFER_SIZE` – Maximum number of response batches to buffer in memory per stream for each SQL query. _Defaults to `1,000`._ +- `GRAPH_AMP_MAX_BLOCK_RANGE` – Maximum number of blocks to request per stream for each SQL query. _Defaults to `2,000,000`._ +- `GRAPH_AMP_QUERY_RETRY_MIN_DELAY_SECONDS` – Minimum time to wait before retrying a failed SQL query to the Amp server. _Defaults to `1` second._ +- `GRAPH_AMP_QUERY_RETRY_MAX_DELAY_SECONDS` – Maximum time to wait before retrying a failed SQL query to the Amp server. _Defaults to `600` seconds._ + +## Metrics + +Amp-powered subgraphs feature introduces the following new metrics: + +- `amp_deployment_status` – Indicates the current indexing status of a deployment. + + **Possible values:** + - `1` - graph-node is preparing to start indexing; + - `2` - deployment is being indexed; + - `3` - indexing is stopped by request; + - `4` - indexing failed; +- `amp_deployment_head` – Tracks the most recent block number processed by a deployment. +- `amp_deployment_target` – Tracks the target block number of a deployment. +- `amp_deployment_synced` – Indicates whether a deployment has reached the chain head or the end block since it was deployed. + + **Possible values:** + - `0` - deployment is not synced; + - `1` - deployment is synced; +- `amp_deployment_indexing_duration_seconds` – Tracks the total duration in seconds of deployment indexing. +- `amp_deployment_blocks_processed_count` – Tracks the total number of blocks processed by a deployment. + + +Additionally, the `deployment_sync_secs` is extended with a new `amp-process` stage and new sections specific to the Amp indexing process. diff --git a/graph/Cargo.toml b/graph/Cargo.toml index bb4287b2c31..a77ac555758 100644 --- a/graph/Cargo.toml +++ b/graph/Cargo.toml @@ -80,7 +80,7 @@ tokio-stream = { version = "0.1.15", features = ["sync"] } tokio-retry = "0.3.0" toml = "0.8.15" url = "2.5.4" -prometheus = "0.13.4" +prometheus.workspace = true priority-queue = "2.3.1" tonic = { workspace = true } prost = { workspace = true } @@ -103,6 +103,16 @@ serde_plain = "1.0.2" csv = "1.3.0" object_store = { version = "0.12.0", features = ["gcp"] } +# Dependencies related to Amp subgraphs +ahash.workspace = true +alloy.workspace = true +arrow-flight.workspace = true +arrow.workspace = true +half.workspace = true +lazy-regex.workspace = true +sqlparser-latest.workspace = true +tokio-util.workspace = true + [dev-dependencies] clap.workspace = true maplit = "1.0.2" diff --git a/graph/src/amp/client/flight_client.rs b/graph/src/amp/client/flight_client.rs new file mode 100644 index 00000000000..588f1a97762 --- /dev/null +++ b/graph/src/amp/client/flight_client.rs @@ -0,0 +1,383 @@ +use std::{collections::HashMap, ops::RangeInclusive, time::Duration}; + +use ahash::RandomState; +use alloy::primitives::{BlockHash, BlockNumber}; +use arrow::{datatypes::Schema, error::ArrowError}; +use arrow_flight::{ + decode::DecodedPayload, error::FlightError, flight_service_client::FlightServiceClient, + sql::client::FlightSqlServiceClient, +}; +use async_stream::try_stream; +use bytes::Bytes; +use futures03::{future::BoxFuture, stream::BoxStream, StreamExt}; +use http::Uri; +use serde::{Deserialize, Serialize}; +use slog::{debug, trace, Logger}; +use thiserror::Error; +use tonic::transport::{Channel, ClientTlsConfig, Endpoint}; + +use crate::{ + amp::{ + client::{ + Client, LatestBlockBeforeReorg, RequestMetadata, ResponseBatch, ResumeStreamingQuery, + }, + error, + log::{one_line, Logger as _}, + }, + prelude::CheapClone, +}; + +/// A client for the Amp Flight gRPC service. +/// +/// This client connects to an Amp server and executes SQL queries +/// using the Apache Arrow Flight protocol. +pub struct FlightClient { + channel: Channel, + auth_token: Option, +} + +impl FlightClient { + /// Creates a new Amp client connected to the specified Amp Flight service address. + pub async fn new(addr: Uri) -> Result { + let is_https = addr.scheme() == Some(&http::uri::Scheme::HTTPS); + let mut endpoint = Endpoint::from(addr) + .tcp_keepalive(Some(Duration::from_secs(30))) + .keep_alive_while_idle(true) + .http2_adaptive_window(true) + .initial_connection_window_size(Some(32 * 1024 * 1024)) + .initial_stream_window_size(Some(16 * 1024 * 1024)) + .connect_timeout(Duration::from_secs(10)); + + if is_https { + let mut tls_config = ClientTlsConfig::new(); + tls_config = tls_config.with_native_roots(); + + endpoint = endpoint.tls_config(tls_config).unwrap(); + } + + Ok(Self { + channel: endpoint.connect().await.map_err(Error::Connection)?, + auth_token: None, + }) + } + + /// Sets the authentication token for requests to the Amp server. + pub fn set_auth_token(&mut self, auth_token: impl Into) { + self.auth_token = Some(auth_token.into()); + } + + fn raw_client(&self) -> FlightSqlServiceClient { + let channel = self.channel.cheap_clone(); + let client = FlightServiceClient::new(channel) + .max_encoding_message_size(256 * 1024 * 1024) + .max_decoding_message_size(256 * 1024 * 1024); + + let mut client = FlightSqlServiceClient::new_from_inner(client); + if let Some(auth_token) = &self.auth_token { + client.set_token(auth_token.clone()); + } + + client + } +} + +impl Client for FlightClient { + type Error = Error; + + fn schema( + &self, + logger: &Logger, + query: impl ToString, + ) -> BoxFuture<'static, Result> { + let logger = logger.component("AmpFlightClient"); + let mut raw_client = self.raw_client(); + let query = query.to_string(); + + Box::pin(async move { + const TXN_ID: Option = None; + + debug!(logger, "Executing SQL query"; + "query" => &*one_line(&query) + ); + + let flight_info = raw_client + .execute(query, TXN_ID) + .await + .map_err(Error::Service)?; + + flight_info.try_decode_schema().map_err(Error::Service) + }) + } + + fn query( + &self, + logger: &Logger, + query: impl ToString, + request_metadata: Option, + ) -> BoxStream<'static, Result> { + let query = query.to_string(); + + // Generates a hash from the SQL query for log correlation. + // The hash allows connecting related logs without including the full SQL query in every log message. + // Constant seeds ensure consistent hashes for the same query. + let hasher = RandomState::with_seeds(0, 0, 0, 0); + + let logger = logger + .component("AmpFlightClient") + .new(slog::o!("query_hash" => hasher.hash_one(&query))); + + let mut raw_client = self.raw_client(); + let mut prev_block_ranges: Vec = Vec::new(); + + if let Some(request_metadata) = request_metadata { + let RequestMetadata { + resume_streaming_query, + } = request_metadata; + + if let Some(resume_streaming_query) = resume_streaming_query { + prev_block_ranges = resume_streaming_query + .iter() + .cloned() + .map(Into::into) + .collect(); + + let metadata = serialize_resume_streaming_query(resume_streaming_query); + debug!(logger, "Setting request metadata"; + "amp-resume" => &metadata + ); + + raw_client.set_header("amp-resume", metadata.clone()); + + // TODO: Remove when the Amp server updates to the latest version + raw_client.set_header("nozzle-resume", metadata); + } + } + + try_stream! { + const TXN_ID: Option = None; + + debug!(logger, "Executing SQL query"; + "query" => &*one_line(&query) + ); + + let flight_info = raw_client + .execute(query, TXN_ID) + .await + .map_err(Error::Service)?; + + for (endpoint_index, endpoint) in flight_info.endpoint.into_iter().enumerate() { + let Some(ticket) = endpoint.ticket else { + continue; + }; + + let mut stream = raw_client.do_get(ticket).await.map_err(Error::Service)?.into_inner(); + let mut batch_index = 0u32; + let mut prev_block_ranges = prev_block_ranges.clone(); + + while let Some(batch_result) = stream.next().await { + let flight_data = batch_result.map_err(Error::Stream)?; + let app_metadata = flight_data.inner.app_metadata; + let payload = flight_data.payload; + + let record_batch = match payload { + DecodedPayload::None => { + trace!(logger, "Received empty data"; + "endpoint_index" => endpoint_index + ); + continue + }, + DecodedPayload::Schema(_) => { + trace!(logger, "Received schema only"; + "endpoint_index" => endpoint_index + ); + continue + } + DecodedPayload::RecordBatch(record_batch) => record_batch, + }; + let block_ranges = Metadata::parse(&app_metadata)?.ranges; + + trace!(logger, "Received a new record batch"; + "endpoint_index" => endpoint_index, + "batch_index" => batch_index, + "num_rows" => record_batch.num_rows(), + "memory_size_bytes" => record_batch.get_array_memory_size(), + "block_ranges" => ?block_ranges + ); + + if let Some(reorg) = detect_reorg(&block_ranges, &prev_block_ranges) { + yield ResponseBatch::Reorg(reorg); + } + + yield ResponseBatch::Batch { data: record_batch }; + + batch_index += 1; + prev_block_ranges = block_ranges; + } + + debug!(logger, "Query execution completed successfully"; + "batch_count" => batch_index + ); + } + } + .boxed() + } +} + +#[derive(Debug, Error)] +pub enum Error { + #[error("invalid metadata: {0:#}")] + InvalidMetadata(#[source] anyhow::Error), + + #[error("connection failed: {0:#}")] + Connection(#[source] tonic::transport::Error), + + #[error("service failed: {0:#}")] + Service(#[source] ArrowError), + + #[error("stream failed: {0:#}")] + Stream(#[source] FlightError), +} + +impl error::IsDeterministic for Error { + fn is_deterministic(&self) -> bool { + let msg = match self { + Self::InvalidMetadata(_) => return true, + Self::Connection(_) => return false, + Self::Service(e) => e.to_string(), + Self::Stream(_) => return false, + }; + + static DETERMINISTIC_ERROR_PATTERNS: &[&str] = &[ + // Example SQL query: SELECT; + r#"code: InvalidArgument, message: ""#, + // Example SQL query: SELECT * FROM invalid_dataset; + // SELECT * FROM valid_dataset.invalid_table; + r#"code: Internal, message: "error creating planning context: "#, + // Example SQL query: SELECT invalid_column FROM valid_dataset.valid_table; + r#"code: Internal, message: "planning error: "#, + ]; + + for &pattern in DETERMINISTIC_ERROR_PATTERNS { + if msg.contains(pattern) { + return true; + } + } + + false + } +} + +/// Metadata received with every record batch. +#[derive(Debug, Clone, Deserialize)] +struct Metadata { + /// Block ranges processed by the Amp server to produce the record batch. + ranges: Vec, +} + +impl Metadata { + /// Parses and returns the metadata. + fn parse(app_metadata: &[u8]) -> Result { + if app_metadata.is_empty() { + return Ok(Self { ranges: Vec::new() }); + } + + serde_json::from_slice::(app_metadata).map_err(|e| Error::InvalidMetadata(e.into())) + } +} + +/// Block range processed by the Amp server to produce a record batch. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +struct BlockRange { + /// Network that contains the source data for the dataset. + network: String, + + /// Block numbers processed. + numbers: RangeInclusive, + + /// Hash of the last block in the block range. + hash: BlockHash, + + /// Hash of the parent block of the first block in the block range. + prev_hash: Option, +} + +impl BlockRange { + /// Returns the first block number in the range. + fn start(&self) -> BlockNumber { + *self.numbers.start() + } + + /// Returns the last block number in the range. + fn end(&self) -> BlockNumber { + *self.numbers.end() + } +} + +impl From for BlockRange { + fn from(resume: ResumeStreamingQuery) -> Self { + Self { + network: resume.network, + numbers: resume.block_number..=resume.block_number, + hash: resume.block_hash, + prev_hash: None, + } + } +} + +/// Serializes the information required to resume a streaming SQL query to JSON. +fn serialize_resume_streaming_query(resume_streaming_query: Vec) -> String { + #[derive(Serialize)] + struct Block { + number: BlockNumber, + hash: BlockHash, + } + + let mapping: HashMap = resume_streaming_query + .into_iter() + .map( + |ResumeStreamingQuery { + network, + block_number: number, + block_hash: hash, + }| { (network, Block { number, hash }) }, + ) + .collect(); + + serde_json::to_string(&mapping).unwrap() +} + +/// Detects whether a reorg occurred during query execution. +/// +/// Compares current block ranges with block ranges from the previous record batch +/// to detect non-incremental batches. When a non-incremental batch is detected, +/// returns the block number and hash of the parent block of the first block +/// after reorg for every processed network. +/// +/// Returns `None` when no reorgs are detected. +fn detect_reorg( + block_ranges: &[BlockRange], + prev_block_ranges: &[BlockRange], +) -> Option> { + Some( + block_ranges + .iter() + .filter_map(|block_range| { + let prev_block_range = prev_block_ranges + .iter() + .find(|prev_block_range| prev_block_range.network == block_range.network)?; + + if block_range != prev_block_range && block_range.start() <= prev_block_range.end() + { + return Some(LatestBlockBeforeReorg { + network: block_range.network.clone(), + block_number: block_range.start().checked_sub(1), + block_hash: block_range.prev_hash, + }); + } + + None + }) + .collect::>(), + ) + .filter(|v| !v.is_empty()) +} diff --git a/graph/src/amp/client/mod.rs b/graph/src/amp/client/mod.rs new file mode 100644 index 00000000000..34999da03fa --- /dev/null +++ b/graph/src/amp/client/mod.rs @@ -0,0 +1,83 @@ +pub mod flight_client; + +use std::error::Error; + +use alloy::primitives::{BlockHash, BlockNumber}; +use arrow::{array::RecordBatch, datatypes::Schema}; +use futures03::{future::BoxFuture, stream::BoxStream}; +use slog::Logger; + +use crate::amp::error; + +/// Client for connecting to Amp core and executing SQL queries. +pub trait Client { + type Error: Error + error::IsDeterministic + Send + Sync + 'static; + + /// Executes a SQL query and returns the corresponding schema. + fn schema( + &self, + logger: &Logger, + query: impl ToString, + ) -> BoxFuture<'static, Result>; + + /// Executes a SQL query and streams the requested data in batches. + fn query( + &self, + logger: &Logger, + query: impl ToString, + request_metadata: Option, + ) -> BoxStream<'static, Result>; +} + +/// Metadata sent to the Amp server with the SQL query. +#[derive(Debug, Clone)] +pub struct RequestMetadata { + /// Allows resuming streaming SQL queries from any block. + pub resume_streaming_query: Option>, +} + +/// Resumes a streaming SQL query from the specified block. +#[derive(Debug, Clone)] +pub struct ResumeStreamingQuery { + /// Network that contains the source data for the dataset. + pub network: String, + + /// Block number after which the SQL query should resume. + /// + /// An invalid block number triggers a reorg message. + pub block_number: BlockNumber, + + /// Block hash of the block after which the SQL query should resume. + /// + /// An invalid block hash triggers a reorg message. + pub block_hash: BlockHash, +} + +/// Represents a batch response resulting from query execution on the Amp server. +#[derive(Debug, Clone)] +pub enum ResponseBatch { + /// Contains the batch data received from the Amp server. + Batch { data: RecordBatch }, + + /// Contains the reorg message received from the Amp server. + /// + /// It is received before the record batch that contains the data after the reorg. + Reorg(Vec), +} + +/// Represents the parent block of the first block after the reorg. +#[derive(Debug, Clone)] +pub struct LatestBlockBeforeReorg { + /// Network that contains the source data for the dataset. + pub network: String, + + /// Block number of the parent block of the first block after the reorg. + /// + /// It is `None` when the reorg affects every block in the blockchain. + pub block_number: Option, + + /// Block hash of the parent block of the first block after the reorg. + /// + /// It is `None` when the reorg affects every block in the blockchain. + pub block_hash: Option, +} diff --git a/graph/src/amp/codec/array_decoder.rs b/graph/src/amp/codec/array_decoder.rs new file mode 100644 index 00000000000..e74a777cb12 --- /dev/null +++ b/graph/src/amp/codec/array_decoder.rs @@ -0,0 +1,2084 @@ +use std::{fmt::Display, sync::LazyLock}; + +use alloy::primitives::B256; +use anyhow::{anyhow, Result}; +use arrow::{ + array::{ + timezone::Tz, Array, ArrayAccessor, BinaryArray, BinaryViewArray, BooleanArray, + Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array, Float32Array, + Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, + LargeStringArray, PrimitiveArray, StringArray, StringViewArray, TimestampMicrosecondArray, + TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array, + UInt32Array, UInt64Array, UInt8Array, + }, + datatypes::ArrowTemporalType, +}; +use chrono::{DateTime, Utc}; + +use super::decoder::Decoder; +use crate::data::store::scalar::{BigDecimal, BigInt}; + +/// Decodes Arrow arrays into subgraph types. +pub struct ArrayDecoder<'a, T: 'static>(&'a T); + +impl<'a, T> ArrayDecoder<'a, T> +where + T: Array + 'static, +{ + /// Creates a new Arrow array decoder. + /// + /// # Errors + /// + /// Returns an error if the `array` cannot be downcasted to type `T`. + /// + /// The returned error is deterministic. + pub fn new(array: &'a dyn Array) -> Result { + Ok(Self(downcast_ref(array)?)) + } +} + +macro_rules! check_value { + ($self:ident, $row_index:ident) => { + if $row_index >= $self.0.len() { + return Ok(None); + } + + if $self.0.is_null($row_index) { + return Ok(None); + } + }; +} + +impl<'a, T> ArrayDecoder<'a, T> +where + &'a T: ArrayAccessor, +{ + fn value( + &'a self, + row_index: usize, + mapping: impl FnOnce(<&'a T as ArrayAccessor>::Item) -> Result, + ) -> Result> { + check_value!(self, row_index); + mapping(self.0.value(row_index)).map(Some) + } +} + +impl Decoder> for ArrayDecoder<'_, BooleanArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, Ok) + } +} + +impl Decoder> for ArrayDecoder<'_, Int8Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i32) + } +} + +impl Decoder> for ArrayDecoder<'_, Int8Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i64) + } +} + +impl Decoder> for ArrayDecoder<'_, Int8Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| decode_signed_big_int(x.to_le_bytes())) + } +} + +impl Decoder> for ArrayDecoder<'_, Int16Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i32) + } +} + +impl Decoder> for ArrayDecoder<'_, Int16Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i64) + } +} + +impl Decoder> for ArrayDecoder<'_, Int16Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| decode_signed_big_int(x.to_le_bytes())) + } +} + +impl Decoder> for ArrayDecoder<'_, Int32Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i32) + } +} + +impl Decoder> for ArrayDecoder<'_, Int32Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i64) + } +} + +impl Decoder> for ArrayDecoder<'_, Int32Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| decode_signed_big_int(x.to_le_bytes())) + } +} + +impl Decoder> for ArrayDecoder<'_, Int64Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i32) + } +} + +impl Decoder> for ArrayDecoder<'_, Int64Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i64) + } +} + +impl Decoder> for ArrayDecoder<'_, Int64Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| decode_signed_big_int(x.to_le_bytes())) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt8Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i32) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt8Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i64) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt8Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| decode_unsigned_big_int(x.to_le_bytes())) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt16Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i32) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt16Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i64) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt16Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| decode_unsigned_big_int(x.to_le_bytes())) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt32Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i32) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt32Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i64) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt32Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| decode_unsigned_big_int(x.to_le_bytes())) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt64Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i32) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt64Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, decode_i64) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt64Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, Ok) + } +} + +impl Decoder> for ArrayDecoder<'_, UInt64Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| decode_unsigned_big_int(x.to_le_bytes())) + } +} + +impl Decoder> for ArrayDecoder<'_, Float16Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |value| Ok(value.to_f32().into())) + } +} + +impl Decoder> for ArrayDecoder<'_, Float32Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |value| Ok(value.into())) + } +} + +impl Decoder> for ArrayDecoder<'_, Float64Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |value| Ok(value.into())) + } +} + +impl Decoder> for ArrayDecoder<'_, Decimal128Array> { + fn decode(&self, row_index: usize) -> Result> { + if self.0.scale() != 0 { + return Err(anyhow!("cannot decode `i32` from a decimal value")); + } + + self.value(row_index, decode_i32) + } +} + +impl Decoder> for ArrayDecoder<'_, Decimal128Array> { + fn decode(&self, row_index: usize) -> Result> { + if self.0.scale() != 0 { + return Err(anyhow!("cannot decode `i64` from a decimal value")); + } + + self.value(row_index, decode_i64) + } +} + +impl Decoder> for ArrayDecoder<'_, Decimal128Array> { + fn decode(&self, row_index: usize) -> Result> { + if self.0.scale() != 0 { + return Err(anyhow!("cannot decode `BigInt` from a decimal value")); + } + + self.value(row_index, |x| decode_signed_big_int(x.to_le_bytes())) + } +} + +impl Decoder> for ArrayDecoder<'_, Decimal128Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| { + let scale = self.0.scale() as i64; + let big_int = decode_signed_big_int(x.to_le_bytes())?; + + Ok(BigDecimal::new(big_int, -scale)) + }) + } +} + +impl Decoder> for ArrayDecoder<'_, Decimal256Array> { + fn decode(&self, row_index: usize) -> Result> { + if self.0.scale() != 0 { + return Err(anyhow!("cannot decode `i32` from a decimal value")); + } + + self.value(row_index, |value| { + let value = value + .to_i128() + .ok_or_else(|| anyhow!("cannot decode `i32` from a larger `i256` value"))?; + + decode_i32(value) + }) + } +} + +impl Decoder> for ArrayDecoder<'_, Decimal256Array> { + fn decode(&self, row_index: usize) -> Result> { + if self.0.scale() != 0 { + return Err(anyhow!("cannot decode `i64` from a decimal value")); + } + + self.value(row_index, |value| { + let value = value + .to_i128() + .ok_or_else(|| anyhow!("cannot decode `i64` from a larger `i256` value"))?; + + decode_i64(value) + }) + } +} + +impl Decoder> for ArrayDecoder<'_, Decimal256Array> { + fn decode(&self, row_index: usize) -> Result> { + if self.0.scale() != 0 { + return Err(anyhow!("cannot decode `BigInt` from a decimal value")); + } + + self.value(row_index, |x| decode_signed_big_int(x.to_le_bytes())) + } +} + +impl Decoder> for ArrayDecoder<'_, Decimal256Array> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| { + let scale = self.0.scale() as i64; + let big_int = decode_signed_big_int(x.to_le_bytes())?; + + Ok(BigDecimal::new(big_int, -scale)) + }) + } +} + +impl Decoder> for ArrayDecoder<'_, StringArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| Ok(x.to_string())) + } +} + +impl Decoder> for ArrayDecoder<'_, StringArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |value| { + value + .parse() + .map_err(|_| anyhow!("failed to parse `BigInt` from a non-numeric string value")) + }) + } +} + +impl Decoder> for ArrayDecoder<'_, StringArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |value| { + value.parse().map_err(|_| { + anyhow!("failed to parse `BigDecimal` from a non-numeric string value") + }) + }) + } +} + +impl Decoder> for ArrayDecoder<'_, StringViewArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| Ok(x.to_string())) + } +} + +impl Decoder> for ArrayDecoder<'_, StringViewArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |value| { + value + .parse() + .map_err(|_| anyhow!("failed to parse `BigInt` from a non-numeric string value")) + }) + } +} + +impl Decoder> for ArrayDecoder<'_, StringViewArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |value| { + value.parse().map_err(|_| { + anyhow!("failed to parse `BigDecimal` from a non-numeric string value") + }) + }) + } +} + +impl Decoder> for ArrayDecoder<'_, LargeStringArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| Ok(x.to_string())) + } +} + +impl Decoder> for ArrayDecoder<'_, LargeStringArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |value| { + value + .parse() + .map_err(|_| anyhow!("failed to parse `BigInt` from a non-numeric string value")) + }) + } +} + +impl Decoder> for ArrayDecoder<'_, LargeStringArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |value| { + value.parse().map_err(|_| { + anyhow!("failed to parse `BigDecimal` from a non-numeric string value") + }) + }) + } +} + +impl Decoder>> for ArrayDecoder<'_, BinaryArray> { + fn decode(&self, row_index: usize) -> Result>> { + self.value(row_index, |x| Ok(x.into())) + } +} + +impl Decoder>> for ArrayDecoder<'_, BinaryViewArray> { + fn decode(&self, row_index: usize) -> Result>> { + self.value(row_index, |x| Ok(x.into())) + } +} + +impl Decoder>> for ArrayDecoder<'_, FixedSizeBinaryArray> { + fn decode(&self, row_index: usize) -> Result>> { + self.value(row_index, |x| Ok(x.into())) + } +} + +impl Decoder> for ArrayDecoder<'_, FixedSizeBinaryArray> { + fn decode(&self, row_index: usize) -> Result> { + self.value(row_index, |x| { + B256::try_from(x) + .map_err(|_| anyhow!("failed to convert '{}' to 'B256'", hex::encode(x))) + }) + } +} + +impl Decoder>> for ArrayDecoder<'_, LargeBinaryArray> { + fn decode(&self, row_index: usize) -> Result>> { + self.value(row_index, |x| Ok(x.into())) + } +} + +impl Decoder>> for ArrayDecoder<'_, TimestampSecondArray> { + fn decode(&self, row_index: usize) -> Result>> { + check_value!(self, row_index); + decode_timestamp(self.0, row_index).map(Some) + } +} + +impl Decoder>> for ArrayDecoder<'_, TimestampMillisecondArray> { + fn decode(&self, row_index: usize) -> Result>> { + check_value!(self, row_index); + decode_timestamp(self.0, row_index).map(Some) + } +} + +impl Decoder>> for ArrayDecoder<'_, TimestampMicrosecondArray> { + fn decode(&self, row_index: usize) -> Result>> { + check_value!(self, row_index); + decode_timestamp(self.0, row_index).map(Some) + } +} + +impl Decoder>> for ArrayDecoder<'_, TimestampNanosecondArray> { + fn decode(&self, row_index: usize) -> Result>> { + check_value!(self, row_index); + decode_timestamp(self.0, row_index).map(Some) + } +} + +fn downcast_ref<'a, T>(array: &'a dyn Array) -> Result<&'a T> +where + T: Array + 'static, +{ + array + .as_any() + .downcast_ref() + .ok_or_else(|| anyhow!("failed to downcast array")) +} + +fn decode_i32(n: T) -> Result +where + T: TryInto + Copy + Display, +{ + n.try_into() + .map_err(|_| anyhow!("failed to convert '{n}' to 'i32'")) +} + +fn decode_i64(n: T) -> Result +where + T: TryInto + Copy + Display, +{ + n.try_into() + .map_err(|_| anyhow!("failed to convert '{n}' to 'i64'")) +} + +fn decode_signed_big_int(le_bytes: impl AsRef<[u8]>) -> Result { + let le_bytes = le_bytes.as_ref(); + + BigInt::from_signed_bytes_le(le_bytes) + .map_err(|_| anyhow!("failed to convert '{}' to 'BigInt'", hex::encode(le_bytes))) +} + +fn decode_unsigned_big_int(le_bytes: impl AsRef<[u8]>) -> Result { + let le_bytes = le_bytes.as_ref(); + + BigInt::from_unsigned_bytes_le(le_bytes) + .map_err(|_| anyhow!("failed to convert '{}' to 'BigInt'", hex::encode(le_bytes))) +} + +fn decode_timestamp(array: &PrimitiveArray, row_index: usize) -> Result> +where + T: ArrowTemporalType, + i64: From, +{ + static UTC: LazyLock = LazyLock::new(|| "+00:00".parse().unwrap()); + + let Some(timestamp) = array.value_as_datetime_with_tz(row_index, *UTC) else { + return Err(anyhow!("failed to decode timestamp; unknown timezone")); + }; + + Ok(timestamp.to_utc()) +} + +#[cfg(test)] +mod tests { + use arrow::datatypes::i256; + use chrono::TimeZone; + use half::f16; + + use super::super::test_fixtures::*; + use super::*; + + mod boolean_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, BooleanArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("boolean").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(true)); + assert_eq!(decoder.decode(1).unwrap(), Some(false)); + assert_eq!(decoder.decode(2).unwrap(), Some(true)); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("boolean").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod int8_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Int8Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("int8").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_i32_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i32)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i32)); + assert_eq!(decoder.decode(2).unwrap(), Some(i8::MAX as i32)); + } + + #[test] + fn decode_valid_i64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i64)); + assert_eq!(decoder.decode(2).unwrap(), Some(i8::MAX as i64)); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigInt::from(20))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(i8::MAX))); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("int8").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod int16_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Int16Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("int16").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_i32_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i32)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i32)); + assert_eq!(decoder.decode(2).unwrap(), Some(i16::MAX as i32)); + } + + #[test] + fn decode_valid_i64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i64)); + assert_eq!(decoder.decode(2).unwrap(), Some(i16::MAX as i64)); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigInt::from(20))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(i16::MAX))); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("int16").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod int32_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Int32Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("int32").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_i32_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i32)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i32)); + assert_eq!(decoder.decode(2).unwrap(), Some(i32::MAX)); + } + + #[test] + fn decode_valid_i64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i64)); + assert_eq!(decoder.decode(2).unwrap(), Some(i32::MAX as i64)); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigInt::from(20))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(i32::MAX))); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("int32").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod int64_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Int64Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("int64").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_i32_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i32)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i32)); + } + + #[test] + fn fail_to_decode_i32_values_from_larger_values() { + let decoder = decoder::(); + + decoder.decode(2).unwrap_err(); + } + + #[test] + fn decode_valid_i64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i64)); + assert_eq!(decoder.decode(2).unwrap(), Some(i64::MAX)); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigInt::from(20))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(i64::MAX))); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("int64").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod uint8_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, UInt8Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("uint8").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_i32_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i32)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i32)); + assert_eq!(decoder.decode(2).unwrap(), Some(u8::MAX as i32)); + } + + #[test] + fn decode_valid_i64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i64)); + assert_eq!(decoder.decode(2).unwrap(), Some(u8::MAX as i64)); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigInt::from(20))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(u8::MAX))); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("uint8").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod uint16_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, UInt16Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("uint16").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_i32_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i32)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i32)); + assert_eq!(decoder.decode(2).unwrap(), Some(u16::MAX as i32)); + } + + #[test] + fn decode_valid_i64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i64)); + assert_eq!(decoder.decode(2).unwrap(), Some(u16::MAX as i64)); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigInt::from(20))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(u16::MAX))); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("uint16").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod uint32_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, UInt32Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("uint32").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_i32_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i32)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i32)); + } + + #[test] + fn fail_to_decode_i32_values_from_larger_values() { + let decoder = decoder::(); + + decoder.decode(2).unwrap_err(); + } + + #[test] + fn decode_valid_i64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i64)); + assert_eq!(decoder.decode(2).unwrap(), Some(u32::MAX as i64)); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigInt::from(20))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(u32::MAX))); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("uint32").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod uint64_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, UInt64Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("uint64").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_i32_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i32)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i32)); + } + + #[test] + fn fail_to_decode_i32_values_from_larger_values() { + let decoder = decoder::(); + + decoder.decode(2).unwrap_err(); + } + + #[test] + fn decode_valid_i64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i64)); + } + + #[test] + fn fail_to_decode_i64_values_from_larger_values() { + let decoder = decoder::(); + + decoder.decode(2).unwrap_err(); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigInt::from(20))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(u64::MAX))); + } + + #[test] + fn decode_valid_u64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10u64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20u64)); + assert_eq!(decoder.decode(2).unwrap(), Some(u64::MAX)); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("uint64").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod float16_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Float16Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("float16").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_big_decimal_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigDecimal::from(10.0))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigDecimal::from(20.0))); + assert_eq!( + decoder.decode(2).unwrap(), + Some(BigDecimal::from(f16::MAX.to_f32())) + ); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("float16").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod float32_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Float32Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("float32").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_big_decimal_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigDecimal::from(10.0))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigDecimal::from(20.0))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigDecimal::from(f32::MAX))); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("float32").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod float64_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Float64Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("float64").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_big_decimal_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigDecimal::from(10.0))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigDecimal::from(20.0))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigDecimal::from(f64::MAX))); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("float64").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod decimal128_decoder_without_scale { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Decimal128Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("decimal128").unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_i32_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i32)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i32)); + } + + #[test] + fn fail_to_decode_i32_values_from_larger_values() { + let decoder = decoder::(); + + decoder.decode(2).unwrap_err(); + } + + #[test] + fn decode_valid_i64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i64)); + } + + #[test] + fn fail_to_decode_i64_values_from_larger_values() { + let decoder = decoder::(); + + decoder.decode(2).unwrap_err(); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigInt::from(20))); + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(i128::MAX))); + } + + #[test] + fn decode_valid_big_decimal_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigDecimal::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigDecimal::from(20))); + assert_eq!( + decoder.decode(2).unwrap(), + Some(BigDecimal::from(i128::MAX)) + ); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("decimal128").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod decimal128_decoder_with_scale { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Decimal128Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH + .column_by_name("decimal128_with_scale") + .unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn fail_to_decode_i32_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + decoder.decode(2).unwrap_err(); + } + + #[test] + fn fail_to_decode_i64_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + decoder.decode(2).unwrap_err(); + } + + #[test] + fn fail_to_decode_big_int_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + decoder.decode(2).unwrap_err(); + } + + #[test] + fn decode_valid_big_decimal_values() { + let decoder = decoder::(); + + assert_eq!( + decoder.decode(0).unwrap(), + Some(BigDecimal::new(10.into(), -10)) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Some(BigDecimal::new(20.into(), -10)) + ); + assert_eq!( + decoder.decode(2).unwrap(), + Some(BigDecimal::new(i128::MAX.into(), -10)) + ); + } + } + + mod decimal256_decoder_without_scale { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Decimal256Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("decimal256").unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_i32_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i32)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i32)); + } + + #[test] + fn fail_to_decode_i32_values_from_larger_values() { + let decoder = decoder::(); + + decoder.decode(2).unwrap_err(); + } + + #[test] + fn decode_valid_i64_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(10i64)); + assert_eq!(decoder.decode(1).unwrap(), Some(20i64)); + } + + #[test] + fn fail_to_decode_i64_values_from_larger_values() { + let decoder = decoder::(); + + decoder.decode(2).unwrap_err(); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigInt::from(20))); + assert_eq!( + decoder.decode(2).unwrap(), + Some(BigInt::from_signed_bytes_be(&i256::MAX.to_be_bytes()).unwrap()) + ); + } + + #[test] + fn decode_valid_big_decimal_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(BigDecimal::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Some(BigDecimal::from(20))); + assert_eq!( + decoder.decode(2).unwrap(), + Some(BigDecimal::new( + BigInt::from_signed_bytes_be(&i256::MAX.to_be_bytes()).unwrap(), + 0 + )) + ); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("decimal256").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod decimal256_decoder_with_scale { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, Decimal256Array>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH + .column_by_name("decimal256_with_scale") + .unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn fail_to_decode_i32_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + decoder.decode(2).unwrap_err(); + } + + #[test] + fn fail_to_decode_i64_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + decoder.decode(2).unwrap_err(); + } + + #[test] + fn fail_to_decode_big_int_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + decoder.decode(2).unwrap_err(); + } + + #[test] + fn decode_valid_big_decimal_values() { + let decoder = decoder::(); + + assert_eq!( + decoder.decode(0).unwrap(), + Some(BigDecimal::new(10.into(), -10)) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Some(BigDecimal::new(20.into(), -10)) + ); + assert_eq!( + decoder.decode(2).unwrap(), + Some(BigDecimal::new( + BigInt::from_signed_bytes_be(&i256::MAX.to_be_bytes()).unwrap(), + -10 + )) + ); + } + } + + mod utf8_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, StringArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("utf8").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_string_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some("aa".to_string())); + assert_eq!(decoder.decode(1).unwrap(), Some("bb".to_string())); + assert_eq!(decoder.decode(2).unwrap(), Some("30".to_string())); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(30))); + } + + #[test] + fn fail_to_decode_big_int_values_from_non_numeric_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + } + + #[test] + fn decode_valid_big_decimal_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(2).unwrap(), Some(BigDecimal::from(30))); + } + + #[test] + fn fail_to_decode_big_decimal_values_from_non_numeric_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("utf8").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod utf8_view_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, StringViewArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("utf8_view").unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_string_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some("aa".to_string())); + assert_eq!(decoder.decode(1).unwrap(), Some("bb".to_string())); + assert_eq!(decoder.decode(2).unwrap(), Some("30".to_string())); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(30))); + } + + #[test] + fn fail_to_decode_big_int_values_from_non_numeric_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + } + + #[test] + fn decode_valid_big_decimal_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(2).unwrap(), Some(BigDecimal::from(30))); + } + + #[test] + fn fail_to_decode_big_decimal_values_from_non_numeric_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("utf8_view").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod large_utf8_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, LargeStringArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("large_utf8").unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_string_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some("aa".to_string())); + assert_eq!(decoder.decode(1).unwrap(), Some("bb".to_string())); + assert_eq!(decoder.decode(2).unwrap(), Some("30".to_string())); + } + + #[test] + fn decode_valid_big_int_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(2).unwrap(), Some(BigInt::from(30))); + } + + #[test] + fn fail_to_decode_big_int_values_from_non_numeric_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + } + + #[test] + fn decode_valid_big_decimal_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(2).unwrap(), Some(BigDecimal::from(30))); + } + + #[test] + fn fail_to_decode_big_decimal_values_from_non_numeric_values() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("large_utf8").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod binary_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, BinaryArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new(RECORD_BATCH.column_by_name("binary").unwrap()) + .unwrap(), + ) + } + + #[test] + fn decode_valid_binary_values() { + let decoder = decoder::>(); + + assert_eq!(decoder.decode(0).unwrap(), Some((b"aa".as_slice()).into())); + assert_eq!(decoder.decode(1).unwrap(), Some((b"bb".as_slice()).into())); + assert_eq!(decoder.decode(2).unwrap(), Some((b"cc".as_slice()).into())); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::>(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("binary").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod binary_view_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, BinaryViewArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("binary_view").unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_binary_values() { + let decoder = decoder::>(); + + assert_eq!(decoder.decode(0).unwrap(), Some((b"aa".as_slice()).into())); + assert_eq!(decoder.decode(1).unwrap(), Some((b"bb".as_slice()).into())); + assert_eq!(decoder.decode(2).unwrap(), Some((b"cc".as_slice()).into())); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::>(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("binary_view").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod fixed_size_binary_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, FixedSizeBinaryArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("fixed_size_binary").unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_binary_values() { + let decoder = decoder::>(); + + assert_eq!(decoder.decode(0).unwrap(), Some((b"aa".as_slice()).into())); + assert_eq!(decoder.decode(1).unwrap(), Some((b"bb".as_slice()).into())); + assert_eq!(decoder.decode(2).unwrap(), Some((b"cc".as_slice()).into())); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::>(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_decode_b256_values_from_invalid_binary_size() { + let decoder = decoder::(); + + decoder.decode(0).unwrap_err(); + decoder.decode(1).unwrap_err(); + decoder.decode(2).unwrap_err(); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("fixed_size_binary").unwrap(), + ) + .map(|_| ()) + .unwrap_err(); + } + } + + mod fixed_size_binary_32_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, FixedSizeBinaryArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("fixed_size_binary_32").unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_b256_values() { + let decoder = decoder::(); + + assert_eq!(decoder.decode(0).unwrap(), Some(B256::from([10u8; 32]))); + assert_eq!(decoder.decode(1).unwrap(), Some(B256::from([20u8; 32]))); + assert_eq!(decoder.decode(2).unwrap(), Some(B256::from([30u8; 32]))); + } + } + + mod large_binary_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, LargeBinaryArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("large_binary").unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_binary_values() { + let decoder = decoder::>(); + + assert_eq!(decoder.decode(0).unwrap(), Some((b"aa".as_slice()).into())); + assert_eq!(decoder.decode(1).unwrap(), Some((b"bb".as_slice()).into())); + assert_eq!(decoder.decode(2).unwrap(), Some((b"cc".as_slice()).into())); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::>(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new(RECORD_BATCH.column_by_name("large_binary").unwrap()) + .map(|_| ()) + .unwrap_err(); + } + } + + mod timestamp_second_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, TimestampSecondArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("timestamp_second").unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_values() { + let decoder = decoder::>(); + + assert_eq!( + decoder.decode(0).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 10, 10, 10, 10, 10).unwrap()) + ); + assert_eq!( + decoder.decode(2).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 12, 31, 23, 59, 59).unwrap()) + ); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::>(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("timestamp_second").unwrap(), + ) + .map(|_| ()) + .unwrap_err(); + } + } + + mod timestamp_millisecond_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, TimestampMillisecondArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH + .column_by_name("timestamp_millisecond") + .unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_values() { + let decoder = decoder::>(); + + assert_eq!( + decoder.decode(0).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 10, 10, 10, 10, 10).unwrap()) + ); + assert_eq!( + decoder.decode(2).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 12, 31, 23, 59, 59).unwrap()) + ); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::>(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new( + RECORD_BATCH + .column_by_name("timestamp_millisecond") + .unwrap(), + ) + .map(|_| ()) + .unwrap_err(); + } + } + + mod timestamp_microsecond_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, TimestampMicrosecondArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH + .column_by_name("timestamp_microsecond") + .unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_values() { + let decoder = decoder::>(); + + assert_eq!( + decoder.decode(0).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 10, 10, 10, 10, 10).unwrap()) + ); + assert_eq!( + decoder.decode(2).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 12, 31, 23, 59, 59).unwrap()) + ); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::>(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new( + RECORD_BATCH + .column_by_name("timestamp_microsecond") + .unwrap(), + ) + .map(|_| ()) + .unwrap_err(); + } + } + + mod timestamp_nanosecond_decoder { + use super::*; + + fn decoder() -> Box>> + where + ArrayDecoder<'static, TimestampNanosecondArray>: Decoder>, + { + Box::new( + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("timestamp_nanosecond").unwrap(), + ) + .unwrap(), + ) + } + + #[test] + fn decode_valid_values() { + let decoder = decoder::>(); + + assert_eq!( + decoder.decode(0).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap()) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 10, 10, 10, 10, 10).unwrap()) + ); + assert_eq!( + decoder.decode(2).unwrap(), + Some(Utc.with_ymd_and_hms(2020, 12, 31, 23, 59, 59).unwrap()) + ); + } + + #[test] + fn handle_missing_values() { + let decoder = decoder::>(); + + assert!(decoder.decode(3).unwrap().is_none()); + assert!(decoder.decode(4).unwrap().is_none()); + } + + #[test] + fn fail_to_create_decoder_with_invalid_type() { + ArrayDecoder::::new( + RECORD_BATCH.column_by_name("timestamp_nanosecond").unwrap(), + ) + .map(|_| ()) + .unwrap_err(); + } + } +} diff --git a/graph/src/amp/codec/decoder.rs b/graph/src/amp/codec/decoder.rs new file mode 100644 index 00000000000..9b5c340e891 --- /dev/null +++ b/graph/src/amp/codec/decoder.rs @@ -0,0 +1,29 @@ +use anyhow::Result; + +/// Decodes Arrow data at specific row indices into subgraph types. +/// +/// This trait provides a common interface for converting Arrow format data into +/// custom types. Implementations handle the specifics of extracting data from +/// Arrow arrays and constructing the target type `T`. +pub trait Decoder { + /// Decodes and returns the value at the `row_index`. + /// + /// # Errors + /// + /// Returns an error if: + /// - The data cannot be converted to type `T` + /// - The underlying Arrow data is invalid or corrupted + /// + /// The returned error is deterministic. + fn decode(&self, row_index: usize) -> Result; +} + +/// Forwards decoding operations through boxed trait objects. +/// +/// This implementation enables using `Box>` as a decoder, +/// delegating to the underlying implementation. +impl Decoder for Box + '_> { + fn decode(&self, row_index: usize) -> Result { + (**self).decode(row_index) + } +} diff --git a/graph/src/amp/codec/list_decoder.rs b/graph/src/amp/codec/list_decoder.rs new file mode 100644 index 00000000000..4c0e2a44504 --- /dev/null +++ b/graph/src/amp/codec/list_decoder.rs @@ -0,0 +1,88 @@ +use anyhow::Result; + +use super::decoder::Decoder; + +/// Decodes Arrow lists to vectors of decoded values. +pub(super) struct ListDecoder<'a, T> { + decoder: T, + offsets: ArrayOffsets<'a>, +} + +/// Contains row index offsets used to determine how many values to decode from an Arrow list. +pub(super) enum ArrayOffsets<'a> { + Small(&'a [i32]), + Large(&'a [i64]), + Fixed(i32), +} + +impl<'a, T> ListDecoder<'a, T> { + /// Creates a new Arrow list decoder with provided `offsets`. + pub(super) fn new(decoder: T, offsets: ArrayOffsets<'a>) -> Self { + Self { decoder, offsets } + } +} + +impl<'a, T, V> Decoder>> for ListDecoder<'a, T> +where + T: Decoder, +{ + fn decode(&self, row_index: usize) -> Result>> { + let Some(range) = self.offsets.range(row_index) else { + return Ok(None); + }; + + let values = range + .map(|row_index| self.decoder.decode(row_index)) + .collect::, _>>()?; + + if values.is_empty() { + return Ok(None); + } + + Ok(Some(values)) + } +} + +impl<'a> ArrayOffsets<'a> { + /// Returns row indices belonging to a list at `row_index`. + fn range(&self, row_index: usize) -> Option> { + match self { + Self::Small(offsets) => { + let start = *offsets.get(row_index)? as usize; + let end = *offsets.get(row_index + 1)? as usize; + + Some(start..end) + } + Self::Large(offsets) => { + let start = *offsets.get(row_index)? as usize; + let end = *offsets.get(row_index + 1)? as usize; + + Some(start..end) + } + Self::Fixed(value_length) => { + let start = *value_length as usize * row_index; + let end = *value_length as usize * (row_index + 1); + + Some(start..end) + } + } + } +} + +impl<'a> From<&'a [i32]> for ArrayOffsets<'a> { + fn from(offsets: &'a [i32]) -> Self { + Self::Small(offsets) + } +} + +impl<'a> From<&'a [i64]> for ArrayOffsets<'a> { + fn from(offsets: &'a [i64]) -> Self { + Self::Large(offsets) + } +} + +impl From for ArrayOffsets<'static> { + fn from(value_length: i32) -> Self { + Self::Fixed(value_length) + } +} diff --git a/graph/src/amp/codec/mapping_decoder.rs b/graph/src/amp/codec/mapping_decoder.rs new file mode 100644 index 00000000000..b0c85e9d2e6 --- /dev/null +++ b/graph/src/amp/codec/mapping_decoder.rs @@ -0,0 +1,32 @@ +use anyhow::Result; + +use super::decoder::Decoder; + +/// Decodes Arrow arrays and maps the decoded values to a different type. +pub(super) struct MappingDecoder { + decoder: T, + mapping: Box V + 'static>, +} + +impl MappingDecoder { + /// Creates a new decoder that wraps the `decoder`. + /// + /// The `mapping` function transforms decoded values from type `U` to type `V`. + pub(super) fn new(decoder: T, mapping: impl Fn(U) -> V + 'static) -> Self { + Self { + decoder, + mapping: Box::new(mapping), + } + } +} + +impl Decoder for MappingDecoder +where + T: Decoder, +{ + fn decode(&self, row_index: usize) -> Result { + let value = self.decoder.decode(row_index)?; + + Ok((&self.mapping)(value)) + } +} diff --git a/graph/src/amp/codec/mod.rs b/graph/src/amp/codec/mod.rs new file mode 100644 index 00000000000..b642d0377c9 --- /dev/null +++ b/graph/src/amp/codec/mod.rs @@ -0,0 +1,511 @@ +mod array_decoder; +mod decoder; +mod list_decoder; +mod mapping_decoder; +mod name_cache; +mod value_decoder; + +#[cfg(test)] +mod test_fixtures; + +pub mod utils; + +use std::{ + collections::{BTreeMap, HashMap}, + sync::Arc, +}; + +use anyhow::{anyhow, bail, Context, Result}; +use arrow::array::{Array, RecordBatch}; + +use self::{list_decoder::ListDecoder, mapping_decoder::MappingDecoder, name_cache::NameCache}; +use crate::{ + data::{ + graphql::TypeExt, + store::{Id, IdType, Value}, + value::Word, + }, + schema::{EntityKey, EntityType, Field, InputSchema}, +}; + +pub use self::{array_decoder::ArrayDecoder, decoder::Decoder}; + +/// Handles decoding of record batches to subgraph entities. +pub struct Codec { + input_schema: InputSchema, + name_cache: NameCache, +} + +/// Contains the entities decoded from a record batch. +pub struct DecodeOutput { + /// The type of entities in this batch. + pub entity_type: EntityType, + + /// The type of the ID of entities in this batch. + pub id_type: IdType, + + /// A list of decoded entities of the same type. + pub decoded_entities: Vec, +} + +/// Contains a single entity decoded from a record batch. +pub struct DecodedEntity { + /// The unique ID of the entity. + /// + /// When set to `None`, the ID is expected to be auto-generated before a new entity is persisted. + pub key: Option, + + /// A list of entity field names and their values. + /// + /// This list could contain a subset of fields of an entity. + pub entity_data: Vec<(Word, Value)>, +} + +impl Codec { + /// Creates a new decoder for the `input_schema`. + pub fn new(input_schema: InputSchema) -> Self { + let name_cache = NameCache::new(); + + Self { + input_schema, + name_cache, + } + } + + /// Decodes a `record_batch` according to the schema of the entity with name `entity_name`. + /// + /// # Errors + /// + /// Returns an error if `record_batch` is not compatible with the schema of the entity with name `entity_name`. + /// + /// The returned error is deterministic. + pub fn decode(&mut self, record_batch: RecordBatch, entity_name: &str) -> Result { + let entity_type = self.entity_type(entity_name)?; + let id_type = entity_type.id_type()?; + let value_decoders = self.value_decoders(&entity_type, &record_batch)?; + let mut decoded_entities = Vec::with_capacity(record_batch.num_rows()); + + for i in 0..record_batch.num_rows() { + let err_ctx = |s: &str| format!("field '{s}' at row {i}"); + let mut entity_id: Option = None; + let mut entity_data = Vec::with_capacity(value_decoders.len()); + + for (&field_name, value_decoder) in &value_decoders { + let value = value_decoder + .decode(i) + .with_context(|| err_ctx(field_name))?; + + if field_name.eq_ignore_ascii_case("id") { + entity_id = Some(value.clone()); + } + + entity_data.push((Word::from(field_name), value)); + } + + let entity_key = entity_id + .map(Id::try_from) + .transpose() + .with_context(|| err_ctx("id"))? + .map(|entity_id| entity_type.key(entity_id)); + + decoded_entities.push(DecodedEntity { + key: entity_key, + entity_data, + }); + } + + drop(value_decoders); + + Ok(DecodeOutput { + entity_type, + id_type, + decoded_entities, + }) + } + + /// Returns the type of the entity with name `entity_name`. + /// + /// # Errors + /// + /// Returns an error if: + /// - There is no entity with name `entity_name` + /// - The entity is not an object + /// - The entity is a POI entity + /// + /// The returned error is deterministic. + fn entity_type(&self, entity_name: &str) -> Result { + let entity_type = self + .input_schema + .entity_type(entity_name) + .context("entity not found")?; + + if !entity_type.is_object_type() { + return Err(anyhow!("entity is not an object")); + } + + if entity_type.is_poi() { + return Err(anyhow!("entity is POI entity")); + } + + Ok(entity_type) + } + + /// Creates and returns value decoders for the fields of the entity with name `entity_name`. + /// + /// # Errors + /// + /// Returns an error if a decoder could not be created for a required field. + /// + /// The returned error is deterministic. + fn value_decoders<'a>( + &mut self, + entity_type: &'a EntityType, + record_batch: &'a RecordBatch, + ) -> Result + 'a>>> { + let object_type = entity_type.object_type().unwrap(); + let columns = record_batch + .schema_ref() + .fields() + .into_iter() + .zip(record_batch.columns()) + .map(|(field, array)| Ok((self.ident(field.name()), array.as_ref()))) + .collect::>>()?; + + let mut value_decoders = BTreeMap::new(); + for field in &object_type.fields { + let Some(value_decoder) = self.value_decoder(field, &columns)? else { + continue; + }; + + value_decoders.insert(field.name.as_str(), value_decoder); + } + + Ok(value_decoders) + } + + /// Creates and returns a value decoder for the `field`. + /// + /// Returns `None` when the `field` does not require a decoder. + /// This happens for derived fields, reserved fields, and when there is no associated + /// Arrow array for a nullable `field` or a `field` that could be auto-generated. + /// + /// # Errors + /// + /// Returns an error if: + /// - There is no associated Arrow array for a required `field` + /// - The `field` type is not compatible with the Arrow array + /// + /// The returned error is deterministic. + fn value_decoder<'a>( + &mut self, + field: &'a Field, + columns: &HashMap, &'a dyn Array>, + ) -> Result + 'a>>> { + // VIDs are auto-generated + if field.name.eq_ignore_ascii_case("vid") { + return Ok(None); + } + + // Derived fields are handled automatically + if field.is_derived() { + return Ok(None); + } + + let normalized_name = self.ident(&field.name); + let array = match columns.get(&normalized_name) { + Some(&array) => array, + None => { + // Allow ID auto-generation + if field.name.eq_ignore_ascii_case("id") { + return Ok(None); + } + + // Allow partial entities + if !field.field_type.is_non_null() { + return Ok(None); + } + + bail!("failed to get column for field '{}'", field.name); + } + }; + + let decoder = value_decoder::value_decoder(field.value_type, field.is_list(), array) + .with_context(|| format!("failed to create decoder for field '{}'", field.name))?; + + Ok(Some(decoder)) + } + + fn ident(&mut self, name: impl AsRef) -> Arc { + self.name_cache.ident(name.as_ref()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use arrow::array::{BinaryArray, BooleanArray, Int64Array, Int8Array}; + use arrow::datatypes::{DataType, Field, Schema}; + + use crate::data::subgraph::DeploymentHash; + + use super::*; + + static SCHEMA: LazyLock = LazyLock::new(|| { + InputSchema::parse_latest( + r#" + type Id @entity { + id: Int8! + } + + type BlockNumber @entity { + id: Int8! + blockNumber: BigInt! + } + + type OptionalBlockNumber @entity { + id: Int8! + blockNumber: BigInt + } + + type Block @entity { + id: Int8! + number: Int8! + hash: Bytes! + value: BigInt + } + "#, + DeploymentHash::default(), + ) + .unwrap() + }); + + #[inline] + fn new_codec() -> Codec { + Codec::new(SCHEMA.clone()) + } + + #[test] + fn fail_to_decode_unknown_entity() { + let schema = Schema::new(vec![Field::new("some_field", DataType::Boolean, true)]); + let record_batch = RecordBatch::new_empty(schema.into()); + + let mut codec = new_codec(); + let e = codec + .decode(record_batch, "SomeEntity") + .map(|_| ()) + .unwrap_err(); + + assert!(format!("{e:#}").contains("entity not found")) + } + + #[test] + fn do_not_fail_on_empty_record_batch() { + let schema = Schema::new(vec![Field::new("some_field", DataType::Boolean, true)]); + let record_batch = RecordBatch::new_empty(schema.into()); + + let mut codec = new_codec(); + let decode_output = codec.decode(record_batch, "Id").unwrap(); + + assert!(decode_output.decoded_entities.is_empty()); + } + + #[test] + fn allow_entity_ids_to_be_auto_generated() { + let schema = Schema::new(vec![Field::new("some_field", DataType::Boolean, true)]); + let record_batch = RecordBatch::try_new( + schema.into(), + vec![Arc::new(BooleanArray::from(vec![true, false]))], + ) + .unwrap(); + + let mut codec = new_codec(); + let decode_output = codec.decode(record_batch, "Id").unwrap(); + let decoded_entities = decode_output.decoded_entities; + + assert_eq!(decoded_entities.len(), 2); + + for decoded_entity in decoded_entities { + assert!(decoded_entity.key.is_none()); + assert!(decoded_entity.entity_data.is_empty()); + } + } + + #[test] + fn decode_entity_ids() { + let schema = Schema::new(vec![Field::new("id", DataType::Int8, true)]); + let record_batch = RecordBatch::try_new( + schema.into(), + vec![Arc::new(Int8Array::from(vec![10, 20, 30]))], + ) + .unwrap(); + + let mut codec = new_codec(); + let decode_output = codec.decode(record_batch, "Id").unwrap(); + let decoded_entities = decode_output.decoded_entities; + + assert_eq!(decoded_entities.len(), 3); + + assert_eq!( + decoded_entities[0].key.as_ref().unwrap().entity_id, + Id::Int8(10), + ); + assert_eq!( + &decoded_entities[0].entity_data, + &[(Word::from("id"), Value::Int8(10))], + ); + + assert_eq!( + decoded_entities[1].key.as_ref().unwrap().entity_id, + Id::Int8(20) + ); + assert_eq!( + &decoded_entities[1].entity_data, + &[(Word::from("id"), Value::Int8(20))], + ); + + assert_eq!( + decoded_entities[2].key.as_ref().unwrap().entity_id, + Id::Int8(30) + ); + assert_eq!( + &decoded_entities[2].entity_data, + &[(Word::from("id"), Value::Int8(30))], + ); + } + + #[test] + fn fail_to_decode_entity_when_a_required_field_is_missing() { + let schema = Schema::new(vec![Field::new("some_field", DataType::Int8, true)]); + let record_batch = + RecordBatch::try_new(schema.into(), vec![Arc::new(Int8Array::from(vec![10]))]).unwrap(); + + let mut codec = new_codec(); + let e = codec + .decode(record_batch, "BlockNumber") + .map(|_| ()) + .unwrap_err(); + + assert!(format!("{e:#}").contains("failed to get column for field 'blockNumber'")); + } + + #[test] + fn decode_entity_when_an_optional_field_is_missing() { + let schema = Schema::new(vec![Field::new("some_field", DataType::Int8, true)]); + let record_batch = + RecordBatch::try_new(schema.into(), vec![Arc::new(Int8Array::from(vec![10]))]).unwrap(); + + let mut codec = new_codec(); + let decode_output = codec.decode(record_batch, "OptionalBlockNumber").unwrap(); + let decoded_entitites = decode_output.decoded_entities; + + assert_eq!(decoded_entitites.len(), 1); + assert!(decoded_entitites[0].entity_data.is_empty()); + } + + #[test] + fn match_entity_field_name_with_column_name_ignoring_case() { + for column_name in [ + "block_number", + "Block_Number", + "BLOCK_NUMBER", + "blocknumber", + "blockNumber", + "BlockNumber", + "BLOCKNUMBER", + ] { + let schema = Schema::new(vec![Field::new(column_name, DataType::Int8, true)]); + let record_batch = RecordBatch::try_new( + schema.into(), + vec![Arc::new(Int8Array::from(vec![10, 20, 30]))], + ) + .unwrap(); + + let mut codec = new_codec(); + let decode_output = codec.decode(record_batch, "BlockNumber").unwrap(); + let decoded_entitites = decode_output.decoded_entities; + + assert_eq!(decoded_entitites.len(), 3); + + assert_eq!( + &decoded_entitites[0].entity_data, + &[(Word::from("blockNumber"), Value::BigInt(10.into()))] + ); + assert_eq!( + &decoded_entitites[1].entity_data, + &[(Word::from("blockNumber"), Value::BigInt(20.into()))] + ); + assert_eq!( + &decoded_entitites[2].entity_data, + &[(Word::from("blockNumber"), Value::BigInt(30.into()))] + ); + } + } + + #[test] + fn fail_to_decode_entity_when_field_type_and_column_type_are_incompatible() { + let schema = Schema::new(vec![Field::new("block_number", DataType::Boolean, true)]); + let record_batch = RecordBatch::try_new( + schema.into(), + vec![Arc::new(BooleanArray::from(vec![true]))], + ) + .unwrap(); + + let mut codec = new_codec(); + let e = codec + .decode(record_batch, "BlockNumber") + .map(|_| ()) + .unwrap_err(); + + assert!(format!("{e:#}").contains("failed to create decoder for field 'blockNumber'")) + } + + #[test] + fn decode_entities_with_multiple_fields() { + let schema = Schema::new(vec![ + Field::new("number", DataType::Int8, true), + Field::new("hash", DataType::Binary, true), + Field::new("value", DataType::Int64, true), + ]); + let record_batch = RecordBatch::try_new( + schema.into(), + vec![ + Arc::new(Int8Array::from(vec![10, 20, 30])), + Arc::new(BinaryArray::from(vec![b"aa".as_ref(), b"bb", b"cc"])), + Arc::new(Int64Array::from(vec![100, 200, 300])), + ], + ) + .unwrap(); + + let mut codec = new_codec(); + let decode_output = codec.decode(record_batch, "Block").unwrap(); + let decoded_entitites = decode_output.decoded_entities; + + assert_eq!(decoded_entitites.len(), 3); + + assert_eq!( + &decoded_entitites[0].entity_data, + &[ + (Word::from("hash"), Value::Bytes(b"aa".as_ref().into())), + (Word::from("number"), Value::Int8(10)), + (Word::from("value"), Value::BigInt(100.into())) + ] + ); + assert_eq!( + &decoded_entitites[1].entity_data, + &[ + (Word::from("hash"), Value::Bytes(b"bb".as_ref().into())), + (Word::from("number"), Value::Int8(20)), + (Word::from("value"), Value::BigInt(200.into())) + ] + ); + assert_eq!( + &decoded_entitites[2].entity_data, + &[ + (Word::from("hash"), Value::Bytes(b"cc".as_ref().into())), + (Word::from("number"), Value::Int8(30)), + (Word::from("value"), Value::BigInt(300.into())) + ] + ); + } +} diff --git a/graph/src/amp/codec/name_cache.rs b/graph/src/amp/codec/name_cache.rs new file mode 100644 index 00000000000..9ad28f7a3b1 --- /dev/null +++ b/graph/src/amp/codec/name_cache.rs @@ -0,0 +1,34 @@ +use std::{collections::HashMap, sync::Arc}; + +use inflector::Inflector; + +use crate::cheap_clone::CheapClone; + +/// Normalizes and caches identifiers that are used to match Arrow columns and subgraph entity fields. +pub(super) struct NameCache { + cache: HashMap, Arc>, +} + +impl NameCache { + /// Creates a new empty cache. + pub(super) fn new() -> Self { + Self { + cache: HashMap::new(), + } + } + + /// Normalizes and returns the identifier for the given name. + /// + /// If the identifier exists in the cache, returns the cached version. + /// Otherwise, creates a new normalized identifier, caches it, and returns it. + pub(super) fn ident(&mut self, name: &str) -> Arc { + if let Some(ident) = self.cache.get(name) { + return ident.cheap_clone(); + } + + let ident: Arc = name.to_camel_case().to_lowercase().into(); + self.cache.insert(name.into(), ident.cheap_clone()); + + ident + } +} diff --git a/graph/src/amp/codec/test_fixtures.rs b/graph/src/amp/codec/test_fixtures.rs new file mode 100644 index 00000000000..a55001439b2 --- /dev/null +++ b/graph/src/amp/codec/test_fixtures.rs @@ -0,0 +1,364 @@ +use std::sync::{Arc, LazyLock}; + +use arrow::{ + array::{ + BinaryArray, BinaryViewArray, BooleanArray, BooleanBuilder, Decimal128Builder, + Decimal256Builder, FixedSizeBinaryArray, FixedSizeListBuilder, Float16Array, Float32Array, + Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, + LargeListBuilder, LargeListViewBuilder, LargeStringArray, ListBuilder, ListViewBuilder, + RecordBatch, StringArray, StringViewArray, TimestampMicrosecondArray, + TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array, + UInt32Array, UInt64Array, UInt8Array, + }, + datatypes::{ + i256, DataType, Field, Schema, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION, + }, +}; +use chrono::{TimeZone, Utc}; +use half::f16; + +pub static RECORD_BATCH: LazyLock = LazyLock::new(|| { + let record_batches = [ + &BOOLEAN_RECORD_BATCH, + &INT_RECORD_BATCH, + &UINT_RECORD_BATCH, + &DECIMAL_RECORD_BATCH, + &FLOAT_RECORD_BATCH, + &STRING_RECORD_BATCH, + &BINARY_RECORD_BATCH, + &TIMESTAMP_RECORD_BATCH, + ]; + + let schemas = record_batches + .iter() + .map(|record_batch| (*record_batch.schema()).clone()); + + let columns = record_batches + .into_iter() + .map(|record_batch| record_batch.columns()) + .flatten() + .map(|column| column.clone()) + .collect::>(); + + RecordBatch::try_new(Schema::try_merge(schemas).unwrap().into(), columns).unwrap() +}); + +pub static BOOLEAN_RECORD_BATCH: LazyLock = LazyLock::new(|| { + let schema = Schema::new(vec![ + Field::new("boolean", DataType::Boolean, true), + Field::new( + "boolean_list", + DataType::List(Arc::new(Field::new("item", DataType::Boolean, true))), + true, + ), + Field::new( + "boolean_list_view", + DataType::ListView(Arc::new(Field::new("item", DataType::Boolean, true))), + true, + ), + Field::new( + "boolean_fixed_size_list", + DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Boolean, true)), 3), + true, + ), + Field::new( + "boolean_large_list", + DataType::LargeList(Arc::new(Field::new("item", DataType::Boolean, true))), + true, + ), + Field::new( + "boolean_large_list_view", + DataType::LargeListView(Arc::new(Field::new("item", DataType::Boolean, true))), + true, + ), + ]); + + let builder = || { + let mut builder = BooleanBuilder::new(); + builder.append_value(true); + builder.append_value(false); + builder.append_value(true); + builder + }; + + RecordBatch::try_new( + schema.into(), + vec![ + Arc::new(BooleanArray::from(vec![true, false, true])), + Arc::new({ + let mut list_builder = ListBuilder::new(builder()); + list_builder.append(true); + list_builder.append(false); + list_builder.append(false); + list_builder.finish() + }), + Arc::new({ + let mut list_builder = ListViewBuilder::new(builder()); + list_builder.append(true); + list_builder.append(false); + list_builder.append(false); + list_builder.finish() + }), + Arc::new({ + let mut list_builder = FixedSizeListBuilder::new(builder(), 3); + list_builder.append(true); + list_builder.values().append_null(); + list_builder.values().append_null(); + list_builder.values().append_null(); + list_builder.append(false); + list_builder.values().append_null(); + list_builder.values().append_null(); + list_builder.values().append_null(); + list_builder.append(false); + list_builder.finish() + }), + Arc::new({ + let mut list_builder = LargeListBuilder::new(builder()); + list_builder.append(true); + list_builder.append(false); + list_builder.append(false); + list_builder.finish() + }), + Arc::new({ + let mut list_builder = LargeListViewBuilder::new(builder()); + list_builder.append(true); + list_builder.append(false); + list_builder.append(false); + list_builder.finish() + }), + ], + ) + .unwrap() +}); + +pub static INT_RECORD_BATCH: LazyLock = LazyLock::new(|| { + let schema = Schema::new(vec![ + Field::new("int8", DataType::Int8, true), + Field::new("int16", DataType::Int16, true), + Field::new("int32", DataType::Int32, true), + Field::new("int64", DataType::Int64, true), + ]); + + RecordBatch::try_new( + schema.into(), + vec![ + Arc::new(Int8Array::from(vec![10, 20, i8::MAX])), + Arc::new(Int16Array::from(vec![10, 20, i16::MAX])), + Arc::new(Int32Array::from(vec![10, 20, i32::MAX])), + Arc::new(Int64Array::from(vec![10, 20, i64::MAX])), + ], + ) + .unwrap() +}); + +pub static UINT_RECORD_BATCH: LazyLock = LazyLock::new(|| { + let schema = Schema::new(vec![ + Field::new("uint8", DataType::UInt8, true), + Field::new("uint16", DataType::UInt16, true), + Field::new("uint32", DataType::UInt32, true), + Field::new("uint64", DataType::UInt64, true), + ]); + + RecordBatch::try_new( + schema.into(), + vec![ + Arc::new(UInt8Array::from(vec![10, 20, u8::MAX])), + Arc::new(UInt16Array::from(vec![10, 20, u16::MAX])), + Arc::new(UInt32Array::from(vec![10, 20, u32::MAX])), + Arc::new(UInt64Array::from(vec![10, 20, u64::MAX])), + ], + ) + .unwrap() +}); + +pub static DECIMAL_RECORD_BATCH: LazyLock = LazyLock::new(|| { + let schema = Schema::new(vec![ + Field::new( + "decimal128", + DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0), + true, + ), + Field::new( + "decimal128_with_scale", + DataType::Decimal128(DECIMAL128_MAX_PRECISION, 10), + true, + ), + Field::new( + "decimal256", + DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0), + true, + ), + Field::new( + "decimal256_with_scale", + DataType::Decimal256(DECIMAL256_MAX_PRECISION, 10), + true, + ), + ]); + + let decimal_128_array = |scale: i8| { + let mut builder = Decimal128Builder::new() + .with_precision_and_scale(DECIMAL128_MAX_PRECISION, scale) + .unwrap(); + + builder.append_value(10); + builder.append_value(20); + builder.append_value(i128::MAX); + builder.finish() + }; + + let decimal_256_array = |scale: i8| { + let mut builder = Decimal256Builder::new() + .with_precision_and_scale(DECIMAL256_MAX_PRECISION, scale) + .unwrap(); + + builder.append_value(10.into()); + builder.append_value(20.into()); + builder.append_value(i256::MAX); + builder.finish() + }; + + RecordBatch::try_new( + schema.into(), + vec![ + Arc::new(decimal_128_array(0)), + Arc::new(decimal_128_array(10)), + Arc::new(decimal_256_array(0)), + Arc::new(decimal_256_array(10)), + ], + ) + .unwrap() +}); + +pub static FLOAT_RECORD_BATCH: LazyLock = LazyLock::new(|| { + let schema = Schema::new(vec![ + Field::new("float16", DataType::Float16, true), + Field::new("float32", DataType::Float32, true), + Field::new("float64", DataType::Float64, true), + ]); + + RecordBatch::try_new( + schema.into(), + vec![ + Arc::new(Float16Array::from(vec![ + f16::from_f32(10.0), + f16::from_f32(20.0), + f16::MAX, + ])), + Arc::new(Float32Array::from(vec![10.0, 20.0, f32::MAX])), + Arc::new(Float64Array::from(vec![10.0, 20.0, f64::MAX])), + ], + ) + .unwrap() +}); + +pub static STRING_RECORD_BATCH: LazyLock = LazyLock::new(|| { + let schema = Schema::new(vec![ + Field::new("utf8", DataType::Utf8, true), + Field::new("utf8_view", DataType::Utf8View, true), + Field::new("large_utf8", DataType::LargeUtf8, true), + ]); + + RecordBatch::try_new( + schema.into(), + vec![ + Arc::new(StringArray::from(vec!["aa", "bb", "30"])), + Arc::new(StringViewArray::from(vec!["aa", "bb", "30"])), + Arc::new(LargeStringArray::from(vec!["aa", "bb", "30"])), + ], + ) + .unwrap() +}); + +pub static BINARY_RECORD_BATCH: LazyLock = LazyLock::new(|| { + let schema = Schema::new(vec![ + Field::new("binary", DataType::Binary, true), + Field::new("binary_view", DataType::BinaryView, true), + Field::new("fixed_size_binary", DataType::FixedSizeBinary(2), true), + Field::new("fixed_size_binary_32", DataType::FixedSizeBinary(32), true), + Field::new("large_binary", DataType::LargeBinary, true), + ]); + + RecordBatch::try_new( + schema.into(), + vec![ + Arc::new(BinaryArray::from(vec![b"aa".as_ref(), b"bb", b"cc"])), + Arc::new(BinaryViewArray::from(vec![b"aa".as_ref(), b"bb", b"cc"])), + Arc::new(FixedSizeBinaryArray::from(vec![b"aa", b"bb", b"cc"])), + Arc::new(FixedSizeBinaryArray::from(vec![ + &[10; 32], &[20; 32], &[30; 32], + ])), + Arc::new(LargeBinaryArray::from(vec![b"aa".as_ref(), b"bb", b"cc"])), + ], + ) + .unwrap() +}); + +pub static TIMESTAMP_RECORD_BATCH: LazyLock = LazyLock::new(|| { + let schema = Schema::new(vec![ + Field::new( + "timestamp_second", + DataType::Timestamp(TimeUnit::Second, None), + true, + ), + Field::new( + "timestamp_millisecond", + DataType::Timestamp(TimeUnit::Millisecond, None), + true, + ), + Field::new( + "timestamp_microsecond", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + ), + Field::new( + "timestamp_nanosecond", + DataType::Timestamp(TimeUnit::Nanosecond, None), + true, + ), + ]); + + let date_time_one = Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap(); + let date_time_two = Utc.with_ymd_and_hms(2020, 10, 10, 10, 10, 10).unwrap(); + let date_time_three = Utc.with_ymd_and_hms(2020, 12, 31, 23, 59, 59).unwrap(); + + RecordBatch::try_new( + schema.into(), + vec![ + Arc::new(TimestampSecondArray::from(vec![ + date_time_one.timestamp(), + date_time_two.timestamp(), + date_time_three.timestamp(), + ])), + Arc::new(TimestampMillisecondArray::from(vec![ + date_time_one.timestamp_millis(), + date_time_two.timestamp_millis(), + date_time_three.timestamp_millis(), + ])), + Arc::new(TimestampMicrosecondArray::from(vec![ + date_time_one.timestamp_micros(), + date_time_two.timestamp_micros(), + date_time_three.timestamp_micros(), + ])), + Arc::new(TimestampNanosecondArray::from(vec![ + date_time_one.timestamp_nanos_opt().unwrap(), + date_time_two.timestamp_nanos_opt().unwrap(), + date_time_three.timestamp_nanos_opt().unwrap(), + ])), + ], + ) + .unwrap() +}); + +#[test] +fn record_batch_is_valid() { + let _schema = BOOLEAN_RECORD_BATCH.schema(); + let _schema = INT_RECORD_BATCH.schema(); + let _schema = UINT_RECORD_BATCH.schema(); + let _schema = DECIMAL_RECORD_BATCH.schema(); + let _schema = FLOAT_RECORD_BATCH.schema(); + let _schema = STRING_RECORD_BATCH.schema(); + let _schema = BINARY_RECORD_BATCH.schema(); + let _schema = TIMESTAMP_RECORD_BATCH.schema(); + + let _schema = RECORD_BATCH.schema(); +} diff --git a/graph/src/amp/codec/utils.rs b/graph/src/amp/codec/utils.rs new file mode 100644 index 00000000000..4f6ba4ff0b1 --- /dev/null +++ b/graph/src/amp/codec/utils.rs @@ -0,0 +1,120 @@ +use alloy::primitives::{BlockHash, BlockNumber}; +use anyhow::{bail, Context, Result}; +use arrow::array::{ + Array, FixedSizeBinaryArray, RecordBatch, TimestampNanosecondArray, UInt64Array, +}; +use chrono::{DateTime, Utc}; + +use super::{ArrayDecoder, Decoder}; +use crate::amp::common::column_aliases; + +pub fn auto_block_number_decoder<'a>( + record_batch: &'a RecordBatch, +) -> Result<(&'static str, Box> + 'a>)> { + let (&column_name, column_index) = find_column(record_batch, column_aliases::BLOCK_NUMBER) + .with_context(|| { + format!( + "failed to find block numbers column; expected one of: {}", + column_aliases::BLOCK_NUMBER.join(", ") + ) + })?; + + block_number_decoder(record_batch, column_index) + .map(|decoder| (column_name, decoder)) + .with_context(|| format!("column '{column_name}' is not valid")) +} + +pub fn block_number_decoder<'a>( + record_batch: &'a RecordBatch, + column_index: usize, +) -> Result> + 'a>> { + column_decoder::(record_batch, column_index, false) +} + +pub fn auto_block_hash_decoder<'a>( + record_batch: &'a RecordBatch, +) -> Result<(&'static str, Box> + 'a>)> { + let (&column_name, column_index) = find_column(record_batch, column_aliases::BLOCK_HASH) + .with_context(|| { + format!( + "failed to find block hashes column; expected one of: {}", + column_aliases::BLOCK_HASH.join(", ") + ) + })?; + + block_hash_decoder(record_batch, column_index) + .map(|decoder| (column_name, decoder)) + .with_context(|| format!("column '{column_name}' is not valid")) +} + +pub fn block_hash_decoder<'a>( + record_batch: &'a RecordBatch, + column_index: usize, +) -> Result> + 'a>> { + column_decoder::(record_batch, column_index, false) +} + +pub fn auto_block_timestamp_decoder<'a>( + record_batch: &'a RecordBatch, +) -> Result<(&'static str, Box>> + 'a>)> { + let (&column_name, column_index) = find_column(record_batch, column_aliases::BLOCK_TIMESTAMP) + .with_context(|| { + format!( + "failed to find block timestamps column; expected one of: {}", + column_aliases::BLOCK_TIMESTAMP.join(", ") + ) + })?; + + block_timestamp_decoder(record_batch, column_index) + .map(|decoder| (column_name, decoder)) + .with_context(|| format!("column '{column_name}' is not valid")) +} + +pub fn block_timestamp_decoder<'a>( + record_batch: &'a RecordBatch, + column_index: usize, +) -> Result>> + 'a>> { + column_decoder::>(record_batch, column_index, false) +} + +pub fn find_column( + record_batch: &RecordBatch, + column_names: impl IntoIterator, +) -> Option<(T, usize)> +where + T: AsRef, +{ + let schema_ref = record_batch.schema_ref(); + + for column_name in column_names { + if let Some((column_index, _)) = schema_ref.column_with_name(column_name.as_ref()) { + return Some((column_name, column_index)); + } + } + + return None; +} + +pub fn column_decoder<'a, T: 'static, U>( + record_batch: &'a RecordBatch, + column_index: usize, + nullable: bool, +) -> Result> + 'a>> +where + T: Array, + ArrayDecoder<'a, T>: Decoder>, +{ + if column_index >= record_batch.num_columns() { + bail!("column does not exist"); + } + + let array = record_batch.column(column_index); + + if !nullable && array.is_nullable() { + bail!("column must not have nullable values"); + } + + let decoder = ArrayDecoder::::new(array)?; + + Ok(Box::new(decoder)) +} diff --git a/graph/src/amp/codec/value_decoder.rs b/graph/src/amp/codec/value_decoder.rs new file mode 100644 index 00000000000..c6e4e7162a2 --- /dev/null +++ b/graph/src/amp/codec/value_decoder.rs @@ -0,0 +1,873 @@ +use anyhow::{anyhow, Context, Result}; +use arrow::{ + array::{ + Array, BinaryArray, BinaryViewArray, BooleanArray, Decimal128Array, Decimal256Array, + FixedSizeBinaryArray, FixedSizeListArray, Float16Array, Float32Array, Float64Array, + Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, LargeListArray, + LargeListViewArray, LargeStringArray, ListArray, ListViewArray, StringArray, + StringViewArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, + UInt8Array, + }, + datatypes::{DataType, TimeUnit}, +}; +use chrono::{DateTime, Utc}; + +use super::{ArrayDecoder, Decoder, ListDecoder, MappingDecoder}; +use crate::data::store::{ + scalar::{BigDecimal, BigInt, Bytes, Timestamp}, + Value, ValueType, +}; + +/// Returns a decoder that converts an Arrow array into subgraph store values. +/// +/// # Errors +/// +/// Returns an error if the subgraph store type is not compatible with the Arrow array type. +/// +/// The returned error is deterministic. +pub(super) fn value_decoder<'a>( + value_type: ValueType, + is_list: bool, + array: &'a dyn Array, +) -> Result + 'a>> { + let decoder = if is_list { + list_value_decoder(value_type, array) + } else { + single_value_decoder(value_type, array) + }; + + decoder.with_context(|| { + format!( + "failed to decode '{}' from '{}'", + value_type.to_str(), + array.data_type(), + ) + }) +} + +fn list_value_decoder<'a>( + value_type: ValueType, + array: &'a dyn Array, +) -> Result + 'a>> { + match array.data_type() { + DataType::List(_) => { + let list = array.as_any().downcast_ref::().unwrap(); + let decoder = single_value_decoder(value_type, list.values())?; + let list_decoder = ListDecoder::new(decoder, list.value_offsets().into()); + + Ok(mapping_decoder(list_decoder, Value::List)) + } + DataType::ListView(_) => { + let list = array.as_any().downcast_ref::().unwrap(); + let decoder = single_value_decoder(value_type, list.values())?; + let list_decoder = ListDecoder::new(decoder, list.value_offsets().into()); + + Ok(mapping_decoder(list_decoder, Value::List)) + } + DataType::FixedSizeList(_, _) => { + let list = array.as_any().downcast_ref::().unwrap(); + let decoder = single_value_decoder(value_type, list.values())?; + let list_decoder = ListDecoder::new(decoder, list.value_length().into()); + + Ok(mapping_decoder(list_decoder, Value::List)) + } + DataType::LargeList(_) => { + let list = array.as_any().downcast_ref::().unwrap(); + let decoder = single_value_decoder(value_type, list.values())?; + let list_decoder = ListDecoder::new(decoder, list.value_offsets().into()); + + Ok(mapping_decoder(list_decoder, Value::List)) + } + DataType::LargeListView(_) => { + let list = array.as_any().downcast_ref::().unwrap(); + let decoder = single_value_decoder(value_type, list.values())?; + let list_decoder = ListDecoder::new(decoder, list.value_offsets().into()); + + Ok(mapping_decoder(list_decoder, Value::List)) + } + _ => { + let decoder = single_value_decoder(value_type, array)?; + + Ok(Box::new(MappingDecoder::new(decoder, |value| { + if matches!(value, Value::Null) { + return Value::Null; + } + + Value::List(vec![value]) + }))) + } + } +} + +fn single_value_decoder<'a>( + value_type: ValueType, + array: &'a dyn Array, +) -> Result + 'a>> { + let incompatible_types_err = || Err(anyhow!("incompatible types")); + + let decoder = match (value_type, array.data_type()) { + (ValueType::Boolean, DataType::Boolean) => { + let array_decoder = ArrayDecoder::::new(array)?; + mapping_decoder(array_decoder, Value::Bool) + } + (ValueType::Boolean, _) => return incompatible_types_err(), + + (ValueType::Int, data_type) if is_integer(data_type) => { + let integer_decoder = integer_decoder::>(array)?; + mapping_decoder(integer_decoder, Value::Int) + } + (ValueType::Int, _) => return incompatible_types_err(), + + (ValueType::Int8, data_type) if is_integer(data_type) => { + let integer_decoder = integer_decoder::>(array)?; + mapping_decoder(integer_decoder, Value::Int8) + } + (ValueType::Int8, _) => return incompatible_types_err(), + + (ValueType::BigInt, data_type) if is_integer(data_type) => { + let integer_decoder = integer_decoder::>(array)?; + mapping_decoder(integer_decoder, Value::BigInt) + } + (ValueType::BigInt, data_type) if is_string(data_type) => { + let string_decoder = string_decoder::>(array)?; + mapping_decoder(string_decoder, Value::BigInt) + } + (ValueType::BigInt, _) => return incompatible_types_err(), + + (ValueType::BigDecimal, data_type) if is_decimal(data_type) => { + let decimal_decoder = decimal_decoder::>(array)?; + mapping_decoder(decimal_decoder, Value::BigDecimal) + } + (ValueType::BigDecimal, data_type) if is_string(data_type) => { + let string_decoder = string_decoder::>(array)?; + mapping_decoder(string_decoder, Value::BigDecimal) + } + (ValueType::BigDecimal, _) => return incompatible_types_err(), + + (ValueType::Bytes, data_type) if is_binary(data_type) => { + let binary_decoder = binary_decoder::>>(array)?; + mapping_decoder(binary_decoder, |x| Bytes::from(&*x).into()) + } + (ValueType::Bytes, _) => return incompatible_types_err(), + + (ValueType::String, data_type) if is_string(data_type) => { + let string_decoder = string_decoder::>(array)?; + mapping_decoder(string_decoder, Value::String) + } + (ValueType::String, data_type) if is_integer(data_type) => { + let integer_decoder = integer_decoder::>(array)?; + mapping_decoder(integer_decoder, |x| x.to_string().into()) + } + (ValueType::String, data_type) if is_binary(data_type) => { + let binary_decoder = binary_decoder::>>(array)?; + mapping_decoder(binary_decoder, |x| format!("0x{}", hex::encode(x)).into()) + } + (ValueType::String, _) => return incompatible_types_err(), + + (ValueType::Timestamp, data_type) if is_timestamp(data_type) => { + let timestamp_decoder = timestamp_decoder::>>(array)?; + mapping_decoder(timestamp_decoder, |x| Timestamp(x).into()) + } + (ValueType::Timestamp, _) => return incompatible_types_err(), + }; + + Ok(decoder) +} + +fn mapping_decoder<'a, T, U: 'static>( + array_decoder: T, + mapping: fn(U) -> Value, +) -> Box + 'a> +where + T: Decoder> + 'a, +{ + Box::new(MappingDecoder::new( + array_decoder, + move |value: Option| match value { + Some(value) => mapping(value), + None => Value::Null, + }, + )) +} + +fn is_integer(data_type: &DataType) -> bool { + use DataType::*; + + matches! { + data_type, + Int8 | Int16 | Int32 | Int64 | + UInt8 | UInt16 | UInt32 | UInt64 | + Decimal128(_, 0) | Decimal256(_, 0) + } +} + +fn integer_decoder<'a, T>(array: &'a dyn Array) -> Result + 'a>> +where + T: 'static, + ArrayDecoder<'a, Int8Array>: Decoder, + ArrayDecoder<'a, Int16Array>: Decoder, + ArrayDecoder<'a, Int32Array>: Decoder, + ArrayDecoder<'a, Int64Array>: Decoder, + ArrayDecoder<'a, UInt8Array>: Decoder, + ArrayDecoder<'a, UInt16Array>: Decoder, + ArrayDecoder<'a, UInt32Array>: Decoder, + ArrayDecoder<'a, UInt64Array>: Decoder, + ArrayDecoder<'a, Decimal128Array>: Decoder, + ArrayDecoder<'a, Decimal256Array>: Decoder, +{ + use DataType::*; + + let array_decoder: Box> = match array.data_type() { + Int8 => Box::new(ArrayDecoder::::new(array)?), + Int16 => Box::new(ArrayDecoder::::new(array)?), + Int32 => Box::new(ArrayDecoder::::new(array)?), + Int64 => Box::new(ArrayDecoder::::new(array)?), + UInt8 => Box::new(ArrayDecoder::::new(array)?), + UInt16 => Box::new(ArrayDecoder::::new(array)?), + UInt32 => Box::new(ArrayDecoder::::new(array)?), + UInt64 => Box::new(ArrayDecoder::::new(array)?), + Decimal128(_, 0) => Box::new(ArrayDecoder::::new(array)?), + Decimal256(_, 0) => Box::new(ArrayDecoder::::new(array)?), + data_type => return Err(anyhow!("'{data_type}' is not a supported integer type")), + }; + + Ok(array_decoder) +} + +fn is_decimal(data_type: &DataType) -> bool { + use DataType::*; + + matches! { + data_type, + Float16 | Float32 | Float64 | + Decimal128(_, _) | Decimal256(_, _) + } +} + +fn decimal_decoder<'a, T>(array: &'a dyn Array) -> Result + 'a>> +where + T: 'static, + ArrayDecoder<'a, Float16Array>: Decoder, + ArrayDecoder<'a, Float32Array>: Decoder, + ArrayDecoder<'a, Float64Array>: Decoder, + ArrayDecoder<'a, Decimal128Array>: Decoder, + ArrayDecoder<'a, Decimal256Array>: Decoder, +{ + use DataType::*; + + let array_decoder: Box> = match array.data_type() { + Float16 => Box::new(ArrayDecoder::::new(array)?), + Float32 => Box::new(ArrayDecoder::::new(array)?), + Float64 => Box::new(ArrayDecoder::::new(array)?), + Decimal128(_, _) => Box::new(ArrayDecoder::::new(array)?), + Decimal256(_, _) => Box::new(ArrayDecoder::::new(array)?), + data_type => return Err(anyhow!("'{data_type}' is not a supported decimal type")), + }; + + Ok(array_decoder) +} + +fn is_binary(data_type: &DataType) -> bool { + use DataType::*; + + matches! { + data_type, + Binary | BinaryView | FixedSizeBinary(_) | LargeBinary + } +} + +fn binary_decoder<'a, T>(array: &'a dyn Array) -> Result + 'a>> +where + T: 'static, + ArrayDecoder<'a, BinaryArray>: Decoder, + ArrayDecoder<'a, BinaryViewArray>: Decoder, + ArrayDecoder<'a, FixedSizeBinaryArray>: Decoder, + ArrayDecoder<'a, LargeBinaryArray>: Decoder, +{ + use DataType::*; + + let array_decoder: Box> = match array.data_type() { + Binary => Box::new(ArrayDecoder::::new(array)?), + BinaryView => Box::new(ArrayDecoder::::new(array)?), + FixedSizeBinary(_) => Box::new(ArrayDecoder::::new(array)?), + LargeBinary => Box::new(ArrayDecoder::::new(array)?), + data_type => return Err(anyhow!("'{data_type}' is not a supported binary type")), + }; + + Ok(array_decoder) +} + +fn is_string(data_type: &DataType) -> bool { + use DataType::*; + + matches! { + data_type, + Utf8 | Utf8View | LargeUtf8 + } +} + +fn string_decoder<'a, T>(array: &'a dyn Array) -> Result + 'a>> +where + T: 'static, + ArrayDecoder<'a, StringArray>: Decoder, + ArrayDecoder<'a, StringViewArray>: Decoder, + ArrayDecoder<'a, LargeStringArray>: Decoder, +{ + use DataType::*; + + let array_decoder: Box> = match array.data_type() { + Utf8 => Box::new(ArrayDecoder::::new(array)?), + Utf8View => Box::new(ArrayDecoder::::new(array)?), + LargeUtf8 => Box::new(ArrayDecoder::::new(array)?), + data_type => return Err(anyhow!("'{data_type}' is not a supported string type")), + }; + + Ok(array_decoder) +} + +fn is_timestamp(data_type: &DataType) -> bool { + use DataType::*; + + matches! { + data_type, + Timestamp(TimeUnit::Second, _) | + Timestamp(TimeUnit::Millisecond, _) | + Timestamp(TimeUnit::Microsecond, _) | + Timestamp(TimeUnit::Nanosecond, _) + } +} + +fn timestamp_decoder<'a, T>(array: &'a dyn Array) -> Result + 'a>> +where + T: 'static, + ArrayDecoder<'a, TimestampSecondArray>: Decoder, + ArrayDecoder<'a, TimestampMillisecondArray>: Decoder, + ArrayDecoder<'a, TimestampMicrosecondArray>: Decoder, + ArrayDecoder<'a, TimestampNanosecondArray>: Decoder, +{ + use DataType::*; + + let array_decoder: Box> = match array.data_type() { + Timestamp(TimeUnit::Second, _) => { + Box::new(ArrayDecoder::::new(array)?) // + } + Timestamp(TimeUnit::Millisecond, _) => { + Box::new(ArrayDecoder::::new(array)?) // + } + Timestamp(TimeUnit::Microsecond, _) => { + Box::new(ArrayDecoder::::new(array)?) // + } + Timestamp(TimeUnit::Nanosecond, _) => { + Box::new(ArrayDecoder::::new(array)?) // + } + data_type => return Err(anyhow!("'{data_type}' is not a supported timestamp type")), + }; + + Ok(array_decoder) +} + +#[cfg(test)] +mod tests { + use super::super::test_fixtures::*; + use super::*; + + mod boolean_value_decoder { + use super::*; + + fn decoder(column_name: &str, is_list: bool) -> Box> { + value_decoder( + ValueType::Boolean, + is_list, + RECORD_BATCH.column_by_name(column_name).unwrap(), + ) + .unwrap() + } + + #[test] + fn decode_single_values() { + let decoder = decoder("boolean", false); + + assert_eq!(decoder.decode(0).unwrap(), Value::Bool(true)); + assert_eq!(decoder.decode(1).unwrap(), Value::Bool(false)); + assert_eq!(decoder.decode(2).unwrap(), Value::Bool(true)); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + + #[test] + fn decode_single_values_as_lists() { + let decoder = decoder("boolean", true); + + assert_eq!( + decoder.decode(0).unwrap(), + Value::List(vec![Value::Bool(true)]) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Value::List(vec![Value::Bool(false)]) + ); + assert_eq!( + decoder.decode(2).unwrap(), + Value::List(vec![Value::Bool(true)]) + ); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + + #[test] + fn decode_list_values() { + let decoder = decoder("boolean_list", true); + + assert_eq!( + decoder.decode(0).unwrap(), + Value::List(vec![ + Value::Bool(true), + Value::Bool(false), + Value::Bool(true), + ]) + ); + assert_eq!(decoder.decode(1).unwrap(), Value::Null); + } + + #[test] + fn decode_list_view_values() { + let decoder = decoder("boolean_list_view", true); + + assert_eq!( + decoder.decode(0).unwrap(), + Value::List(vec![ + Value::Bool(true), + Value::Bool(false), + Value::Bool(true), + ]) + ); + assert_eq!(decoder.decode(1).unwrap(), Value::Null); + } + + #[test] + fn decode_fixed_size_list_values() { + let decoder = decoder("boolean_fixed_size_list", true); + + assert_eq!( + decoder.decode(0).unwrap(), + Value::List(vec![ + Value::Bool(true), + Value::Bool(false), + Value::Bool(true), + ]) + ); + } + + #[test] + fn decode_large_list_values() { + let decoder = decoder("boolean_large_list", true); + + assert_eq!( + decoder.decode(0).unwrap(), + Value::List(vec![ + Value::Bool(true), + Value::Bool(false), + Value::Bool(true), + ]) + ); + assert_eq!(decoder.decode(1).unwrap(), Value::Null); + } + + #[test] + fn decode_large_list_view_values() { + let decoder = decoder("boolean_large_list_view", true); + + assert_eq!( + decoder.decode(0).unwrap(), + Value::List(vec![ + Value::Bool(true), + Value::Bool(false), + Value::Bool(true), + ]) + ); + assert_eq!(decoder.decode(1).unwrap(), Value::Null); + } + + #[test] + fn fail_to_decode_values_of_other_types() { + value_decoder(ValueType::Boolean, false, BINARY_RECORD_BATCH.column(0)) + .map(|_| ()) + .unwrap_err(); + } + } + + mod int_value_decoder { + use super::*; + + fn decoder(column_name: &str) -> Box> { + value_decoder( + ValueType::Int, + false, + RECORD_BATCH.column_by_name(column_name).unwrap(), + ) + .unwrap() + } + + #[test] + fn decode_values() { + for column in [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "decimal128", + "decimal256", + ] { + let decoder = decoder(column); + + assert_eq!(decoder.decode(0).unwrap(), Value::Int(10)); + assert_eq!(decoder.decode(1).unwrap(), Value::Int(20)); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn fail_to_decode_values_of_other_types() { + value_decoder(ValueType::Int, false, BOOLEAN_RECORD_BATCH.column(0)) + .map(|_| ()) + .unwrap_err(); + } + } + + mod int8_value_decoder { + use super::*; + + fn decoder(column_name: &str) -> Box> { + value_decoder( + ValueType::Int8, + false, + RECORD_BATCH.column_by_name(column_name).unwrap(), + ) + .unwrap() + } + + #[test] + fn decode_values() { + for column in [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "decimal128", + "decimal256", + ] { + let decoder = decoder(column); + + assert_eq!(decoder.decode(0).unwrap(), Value::Int8(10)); + assert_eq!(decoder.decode(1).unwrap(), Value::Int8(20)); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn fail_to_decode_values_of_other_types() { + value_decoder(ValueType::Int8, false, BOOLEAN_RECORD_BATCH.column(0)) + .map(|_| ()) + .unwrap_err(); + } + } + + mod big_int_value_decoder { + use super::*; + + fn decoder(column_name: &str) -> Box> { + value_decoder( + ValueType::BigInt, + false, + RECORD_BATCH.column_by_name(column_name).unwrap(), + ) + .unwrap() + } + + #[test] + fn decode_values() { + for column in [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "decimal128", + "decimal256", + ] { + let decoder = decoder(column); + + assert_eq!(decoder.decode(0).unwrap(), Value::BigInt(BigInt::from(10))); + assert_eq!(decoder.decode(1).unwrap(), Value::BigInt(BigInt::from(20))); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn decode_values_from_numerical_strings() { + for column in ["utf8", "utf8_view", "large_utf8"] { + let decoder = decoder(column); + + assert_eq!(decoder.decode(2).unwrap(), Value::BigInt(BigInt::from(30))); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn fail_to_decode_values_from_non_numerical_strings() { + for column in ["utf8", "utf8_view", "large_utf8"] { + let decoder = decoder(column); + + decoder.decode(0).unwrap_err(); + } + } + + #[test] + fn fail_to_decode_values_of_other_types() { + value_decoder(ValueType::BigInt, false, BOOLEAN_RECORD_BATCH.column(0)) + .map(|_| ()) + .unwrap_err(); + } + } + + mod big_decimal_value_decoder { + use super::*; + + fn decoder(column_name: &str) -> Box> { + value_decoder( + ValueType::BigDecimal, + false, + RECORD_BATCH.column_by_name(column_name).unwrap(), + ) + .unwrap() + } + + #[test] + fn decode_values() { + for column in ["float16", "float32", "float64", "decimal128", "decimal256"] { + let decoder = decoder(column); + + assert_eq!( + decoder.decode(0).unwrap(), + Value::BigDecimal(BigDecimal::from(10.0)) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Value::BigDecimal(BigDecimal::from(20.0)) + ); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn decode_values_from_numerical_strings() { + for column in ["utf8", "utf8_view", "large_utf8"] { + let decoder = decoder(column); + + assert_eq!( + decoder.decode(2).unwrap(), + Value::BigDecimal(BigDecimal::from(30.0)) + ); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn fail_to_decode_values_from_non_numerical_strings() { + for column in ["utf8", "utf8_view", "large_utf8"] { + let decoder = decoder(column); + + decoder.decode(0).unwrap_err(); + } + } + + #[test] + fn fail_to_decode_values_of_other_types() { + value_decoder(ValueType::BigDecimal, false, BOOLEAN_RECORD_BATCH.column(0)) + .map(|_| ()) + .unwrap_err(); + } + } + + mod bytes_value_decoder { + use super::*; + + fn decoder(column_name: &str) -> Box> { + value_decoder( + ValueType::Bytes, + false, + RECORD_BATCH.column_by_name(column_name).unwrap(), + ) + .unwrap() + } + + #[test] + fn decode_values() { + for column in ["binary", "binary_view", "fixed_size_binary", "large_binary"] { + let decoder = decoder(column); + + assert_eq!( + decoder.decode(0).unwrap(), + Value::Bytes(b"aa".as_slice().into()) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Value::Bytes(b"bb".as_slice().into()) + ); + assert_eq!( + decoder.decode(2).unwrap(), + Value::Bytes(b"cc".as_slice().into()) + ); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn fail_to_decode_values_of_other_types() { + value_decoder(ValueType::Bytes, false, BOOLEAN_RECORD_BATCH.column(0)) + .map(|_| ()) + .unwrap_err(); + } + } + + mod string_value_decoder { + use super::*; + + fn decoder(column_name: &str) -> Box> { + value_decoder( + ValueType::String, + false, + RECORD_BATCH.column_by_name(column_name).unwrap(), + ) + .unwrap() + } + + #[test] + fn decode_values_from_strings() { + for column in ["utf8", "utf8_view", "large_utf8"] { + let decoder = decoder(column); + + assert_eq!(decoder.decode(0).unwrap(), Value::String("aa".to_string())); + assert_eq!(decoder.decode(1).unwrap(), Value::String("bb".to_string())); + assert_eq!(decoder.decode(2).unwrap(), Value::String("30".to_string())); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn decode_values_from_numbers() { + for column in [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "decimal128", + "decimal256", + ] { + let decoder = decoder(column); + + assert_eq!(decoder.decode(0).unwrap(), Value::String("10".to_string())); + assert_eq!(decoder.decode(1).unwrap(), Value::String("20".to_string())); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn decode_values_from_bytes() { + for column in ["binary", "binary_view", "fixed_size_binary", "large_binary"] { + let decoder = decoder(column); + + assert_eq!( + decoder.decode(0).unwrap(), + Value::String(format!("0x{}", hex::encode(b"aa"))) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Value::String(format!("0x{}", hex::encode(b"bb"))) + ); + assert_eq!( + decoder.decode(2).unwrap(), + Value::String(format!("0x{}", hex::encode(b"cc"))) + ); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn fail_to_decode_values_of_other_types() { + value_decoder(ValueType::String, false, BOOLEAN_RECORD_BATCH.column(0)) + .map(|_| ()) + .unwrap_err(); + } + } + + mod timestamp_value_decoder { + use chrono::{TimeZone, Utc}; + + use super::*; + + fn decoder(column_name: &str) -> Box> { + value_decoder( + ValueType::Timestamp, + false, + RECORD_BATCH.column_by_name(column_name).unwrap(), + ) + .unwrap() + } + + #[test] + fn decode_values() { + for column in [ + "timestamp_second", + "timestamp_millisecond", + "timestamp_microsecond", + "timestamp_nanosecond", + ] { + let decoder = decoder(column); + + assert_eq!( + decoder.decode(0).unwrap(), + Value::Timestamp(Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap().into()) + ); + assert_eq!( + decoder.decode(1).unwrap(), + Value::Timestamp( + Utc.with_ymd_and_hms(2020, 10, 10, 10, 10, 10) + .unwrap() + .into() + ) + ); + assert_eq!( + decoder.decode(2).unwrap(), + Value::Timestamp( + Utc.with_ymd_and_hms(2020, 12, 31, 23, 59, 59) + .unwrap() + .into() + ) + ); + assert_eq!(decoder.decode(3).unwrap(), Value::Null); + } + } + + #[test] + fn fail_to_decode_values_of_other_types() { + value_decoder(ValueType::Timestamp, false, BOOLEAN_RECORD_BATCH.column(0)) + .map(|_| ()) + .unwrap_err(); + } + } +} diff --git a/graph/src/amp/common/mod.rs b/graph/src/amp/common/mod.rs new file mode 100644 index 00000000000..d98fbea3b1b --- /dev/null +++ b/graph/src/amp/common/mod.rs @@ -0,0 +1,24 @@ +pub(super) mod column_aliases { + pub(in crate::amp) static BLOCK_NUMBER: &[&str] = &[ + "_block_num", // Meta column present in all tables + "block_num", // Standard column in most raw tables + "blockNum", // Common alternative name + "blocknum", // Common alternative name + "block", // Common alternative name + "block_number", // Common alternative name + "blockNumber", // Common alternative name + "blocknumber", // Common alternative name + ]; + pub(in crate::amp) static BLOCK_HASH: &[&str] = &[ + "hash", // Standard column in some raw tables + "block_hash", // Standard column in most raw tables and common alternative name + "blockHash", // Common alternative name + "blockhash", // Common alternative name + ]; + pub(in crate::amp) static BLOCK_TIMESTAMP: &[&str] = &[ + "timestamp", // Standard column in most raw tables + "block_timestamp", // Common alternative name + "blockTimestamp", // Common alternative name + "blocktimestamp", // Common alternative name + ]; +} diff --git a/graph/src/amp/error.rs b/graph/src/amp/error.rs new file mode 100644 index 00000000000..3489d7a94de --- /dev/null +++ b/graph/src/amp/error.rs @@ -0,0 +1,5 @@ +/// Checks whether errors are deterministic. +pub trait IsDeterministic { + /// Returns `true` if the error is deterministic. + fn is_deterministic(&self) -> bool; +} diff --git a/graph/src/amp/log.rs b/graph/src/amp/log.rs new file mode 100644 index 00000000000..e11c129b6b7 --- /dev/null +++ b/graph/src/amp/log.rs @@ -0,0 +1,20 @@ +use std::borrow::Cow; + +use lazy_regex::regex_replace_all; + +/// Extends the [slog::Logger] with methods commonly used in Amp modules +pub trait Logger { + /// Creates a new child logger scoped to a specific component + fn component(&self, name: &'static str) -> slog::Logger; +} + +impl Logger for slog::Logger { + fn component(&self, name: &'static str) -> slog::Logger { + self.new(slog::o!("component" => name)) + } +} + +/// Removes newlines and extra spaces from a string +pub fn one_line<'a>(s: &'a str) -> Cow<'a, str> { + regex_replace_all!(r"(\\r)?(\\n)?\s+", s, " ") +} diff --git a/graph/src/amp/manifest/data_source/mod.rs b/graph/src/amp/manifest/data_source/mod.rs new file mode 100644 index 00000000000..9d8ec97b56e --- /dev/null +++ b/graph/src/amp/manifest/data_source/mod.rs @@ -0,0 +1,112 @@ +pub mod raw; + +use alloy::{ + json_abi::JsonAbi, + primitives::{Address, BlockNumber}, +}; +use arrow::datatypes::Schema; +use semver::Version; + +use crate::{amp::sql::BlockRangeQueryBuilder, data::subgraph::SPEC_VERSION_1_4_0}; + +pub use self::raw::RawDataSource; + +/// Represents a valid data source of an Amp subgraph. +/// +/// This data source contains parsed, formatted, and resolved data. +#[derive(Debug, Clone)] +pub struct DataSource { + /// The name of the data source. + /// + /// Used for observability to identify progress and errors produced by this data source. + pub name: String, + + /// The network name of the data source. + pub network: String, + + /// Contains the sources used by this data source. + pub source: Source, + + /// Contains the transformations of source tables indexed by the subgraph. + pub transformer: Transformer, +} + +impl DataSource { + pub const KIND: &str = "amp"; + pub const MIN_SPEC_VERSION: Version = SPEC_VERSION_1_4_0; +} + +/// Contains the sources that a data source uses. +#[derive(Debug, Clone)] +pub struct Source { + /// The dataset from which SQL queries in the data source can query. + pub dataset: String, + + /// The tables from which SQL queries in the data source can query. + pub tables: Vec, + + /// The contract address with which SQL queries in the data source interact. + /// + /// This address enables SQL query reuse through `sg_source_address()` calls instead of hard-coding the contract address. + /// The `sg_source_address()` calls in SQL queries of the data source resolve to this contract address. + /// + /// SQL queries are not limited to using only this contract address. + /// + /// Defaults to an empty contract address. + pub address: Address, + + /// The minimum block number that SQL queries in the data source can query. + /// + /// Defaults to the minimum possible block number. + pub start_block: BlockNumber, + + /// The maximum block number that SQL queries in the data source can query. + /// + /// Defaults to the maximum possible block number. + pub end_block: BlockNumber, +} + +/// Contains the transformations of source tables indexed by the subgraph. +#[derive(Debug, Clone)] +pub struct Transformer { + /// The version of this transformer. + pub api_version: Version, + + /// The ABIs that SQL queries can reference to extract event signatures. + /// + /// The `sg_event_signature('CONTRACT_NAME', 'EVENT_NAME')` calls in the + /// SQL queries resolve to a full event signature based on this list. + pub abis: Vec, + + /// The transformed tables that extract data from source tables for indexing. + pub tables: Vec, +} + +/// Represents an ABI of a smart contract. +#[derive(Debug, Clone)] +pub struct Abi { + /// The name of the contract. + pub name: String, + + /// The JSON ABI of the contract. + pub contract: JsonAbi, +} + +/// Represents a transformed table that extracts data from source tables for indexing. +#[derive(Debug, Clone)] +pub struct Table { + /// The name of the transformed table. + /// + /// Must reference a valid entity name from the subgraph schema. + pub name: String, + + /// The SQL query that executes on the Amp server. + /// + /// The data resulting from this SQL query execution transforms into subgraph entities. + pub query: BlockRangeQueryBuilder, + + /// The Arrow schema of this transformed table SQL query. + /// + /// This schema loads from the Amp server. + pub schema: Schema, +} diff --git a/graph/src/amp/manifest/data_source/raw.rs b/graph/src/amp/manifest/data_source/raw.rs new file mode 100644 index 00000000000..016d8bd353f --- /dev/null +++ b/graph/src/amp/manifest/data_source/raw.rs @@ -0,0 +1,688 @@ +use std::{collections::HashSet, sync::LazyLock}; + +use alloy::{ + json_abi::JsonAbi, + primitives::{Address, BlockNumber}, +}; +use anyhow::anyhow; +use arrow::{array::RecordBatch, datatypes::Schema}; +use futures03::future::try_join_all; +use lazy_regex::regex_is_match; +use semver::Version; +use serde::Deserialize; +use slog::{debug, error, Logger}; +use thiserror::Error; + +use super::{Abi, DataSource, Source, Table, Transformer}; +use crate::{ + amp::{ + self, + codec::utils::{ + auto_block_hash_decoder, auto_block_number_decoder, auto_block_timestamp_decoder, + }, + error::IsDeterministic, + sql::{BlockRangeQueryBuilder, ContextQuery, ValidQuery}, + }, + components::link_resolver::LinkResolver, +}; + +/// Supported API versions for data source transformers. +static API_VERSIONS: LazyLock> = + LazyLock::new(|| HashSet::from([Version::new(0, 0, 1)])); + +/// Represents an unmodified input data source of an Amp subgraph. +/// +/// May contain invalid or partial data. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawDataSource { + /// The name of the data source. + /// + /// Must be a valid, non-empty identifier with no spaces or special characters. + pub name: String, + + /// The kind of the data source. + /// + /// Must be equal to `amp`. + pub kind: String, + + /// The network name of the data source. + pub network: String, + + /// Contains sources used by this data source. + pub source: RawSource, + + /// Contains transformations of source tables indexed by the subgraph. + pub transformer: RawTransformer, +} + +impl RawDataSource { + /// Parses, formats, and resolves the input data source into a valid data source. + pub async fn resolve( + self, + logger: &Logger, + link_resolver: &dyn LinkResolver, + amp_client: &impl amp::Client, + ) -> Result { + let Self { + name, + kind, + network, + source, + transformer, + } = self; + + let logger = logger.new(slog::o!("data_source" => name.clone())); + debug!(logger, "Resolving data source"); + + validate_ident(&name).map_err(|e| e.source_context("invalid `name`"))?; + Self::validate_kind(kind)?; + + let source = source + .resolve() + .map_err(|e| e.source_context("invalid `source`"))?; + + let transformer = transformer + .resolve(&logger, link_resolver, amp_client, &network, &source) + .await + .map_err(|e| e.source_context("invalid `transformer`"))?; + + Ok(DataSource { + name, + network, + source, + transformer, + }) + } + + fn validate_kind(kind: String) -> Result<(), Error> { + if !kind.eq_ignore_ascii_case(DataSource::KIND) { + return Err(Error::InvalidValue(anyhow!("invalid `kind`"))); + } + + Ok(()) + } +} + +/// Contains an unmodified input source used by the data source. +/// +/// May contain invalid or partial data. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawSource { + /// The dataset that SQL queries in the data source can query. + /// + /// Must reference a valid dataset name from the Amp server. + pub dataset: String, + + /// The tables that SQL queries in the data source can query. + /// + /// Must reference valid table names of the dataset from the Amp server. + pub tables: Vec, + + /// The contract address used by SQL queries in the data source. + /// + /// Enables SQL query reuse through `sg_source_address()` calls instead of hard-coding the contract address. + /// SQL queries resolve `sg_source_address()` calls to this contract address. + pub address: Option
, + + /// The minimum block number that SQL queries in the data source can query. + pub start_block: Option, + + /// The maximum block number that SQL queries in the data source can query. + pub end_block: Option, +} + +impl RawSource { + /// Parses, formats, and resolves the input source into a valid source. + fn resolve(self) -> Result { + let Self { + dataset, + tables, + address, + start_block, + end_block, + } = self; + + if dataset.is_empty() { + return Err(Error::InvalidValue(anyhow!("`dataset` cannot be empty"))); + } + Self::validate_tables(&tables)?; + + let address = address.unwrap_or(Address::ZERO); + let start_block = start_block.unwrap_or(BlockNumber::MIN); + let end_block = end_block.unwrap_or(BlockNumber::MAX); + + if start_block >= end_block { + return Err(Error::InvalidValue(anyhow!( + "`end_block` must be greater than `start_block`" + ))); + } + + Ok(Source { + dataset, + tables, + address, + start_block, + end_block, + }) + } + + fn validate_tables(tables: &[String]) -> Result<(), Error> { + const MAX_TABLES: usize = 100; + + if tables.is_empty() { + return Err(Error::InvalidValue(anyhow!("`tables` cannot be empty"))); + } + + if tables.len() > MAX_TABLES { + return Err(Error::InvalidValue(anyhow!( + "`tables` cannot have more than {MAX_TABLES} tables" + ))); + } + + for (i, table) in tables.iter().enumerate() { + if table.is_empty() { + return Err(Error::InvalidValue(anyhow!( + "`table` at index {i} cannot be empty" + ))); + } + } + + Ok(()) + } +} + +/// Contains unmodified input transformations of source tables indexed by the subgraph. +/// +/// May contain invalid or partial data. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawTransformer { + /// The version of this transformer. + /// + /// Must be a supported API version of the Amp subgraph transformers API. + pub api_version: Version, + + /// The ABIs that SQL queries can reference to extract event signatures. + /// + /// SQL queries resolve `sg_event_signature('CONTRACT_NAME', 'EVENT_NAME')` calls + /// to full event signatures based on this list. + pub abis: Option>, + + /// The transformed tables that extract data from source tables for indexing. + pub tables: Vec, +} + +impl RawTransformer { + /// Parses, formats, and resolves the input transformer into a valid transformer. + async fn resolve( + self, + logger: &Logger, + link_resolver: &dyn LinkResolver, + amp_client: &impl amp::Client, + network: &str, + source: &Source, + ) -> Result { + let Self { + api_version, + abis, + tables, + } = self; + Self::validate_api_version(&api_version)?; + + let abis = Self::resolve_abis(logger, link_resolver, abis).await?; + let tables = Self::resolve_tables( + logger, + link_resolver, + amp_client, + network, + tables, + source, + &abis, + ) + .await?; + + Ok(Transformer { + api_version, + abis, + tables, + }) + } + + fn validate_api_version(api_version: &Version) -> Result<(), Error> { + if !API_VERSIONS.contains(api_version) { + return Err(Error::InvalidValue(anyhow!("invalid `api_version`"))); + } + + Ok(()) + } + + async fn resolve_abis( + logger: &Logger, + link_resolver: &dyn LinkResolver, + abis: Option>, + ) -> Result, Error> { + const MAX_ABIS: usize = 100; + + let Some(abis) = abis else { + return Ok(Vec::new()); + }; + + if abis.len() > MAX_ABIS { + return Err(Error::InvalidValue(anyhow!( + "`abis` cannot have more than {MAX_ABIS} ABIs" + ))); + } + + let abi_futs = abis.into_iter().enumerate().map(|(i, abi)| async move { + let logger = logger.new(slog::o!("abi_name" => abi.name.clone())); + debug!(logger, "Resolving ABI"; + "file" => &abi.file, + ); + + abi.resolve(&logger, link_resolver) + .await + .map_err(|e| e.source_context(format!("invalid `abis` at index {i}"))) + }); + + try_join_all(abi_futs).await + } + + async fn resolve_tables( + logger: &Logger, + link_resolver: &dyn LinkResolver, + amp_client: &impl amp::Client, + network: &str, + tables: Vec, + source: &Source, + abis: &[Abi], + ) -> Result, Error> { + const MAX_TABLES: usize = 100; + + if tables.is_empty() { + return Err(Error::InvalidValue(anyhow!("`tables` cannot be empty"))); + } + + if tables.len() > MAX_TABLES { + return Err(Error::InvalidValue(anyhow!( + "`tables` cannot have more than {MAX_TABLES} tables" + ))); + } + + let table_futs = tables.into_iter().enumerate().map(|(i, table)| async move { + let logger = logger.new(slog::o!("table_name" => table.name.clone())); + debug!(logger, "Resolving table"; + "file" => ?&table.file + ); + + table + .resolve(&logger, link_resolver, amp_client, network, source, abis) + .await + .map_err(|e| e.source_context(format!("invalid `tables` at index {i}"))) + }); + + try_join_all(table_futs).await + } +} + +/// Represents an unmodified input ABI of a smart contract. +/// +/// May contain invalid or partial data. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawAbi { + /// The name of the contract. + pub name: String, + + /// The IPFS link to the JSON ABI of the contract. + pub file: String, +} + +impl RawAbi { + /// Parses, formats, and resolves the input ABI into a valid ABI. + async fn resolve( + self, + logger: &Logger, + link_resolver: &dyn LinkResolver, + ) -> Result { + let Self { name, file } = self; + + validate_ident(&name).map_err(|e| e.source_context("invalid `name`"))?; + let contract = Self::resolve_contract(logger, link_resolver, file).await?; + + Ok(Abi { name, contract }) + } + + async fn resolve_contract( + logger: &Logger, + link_resolver: &dyn LinkResolver, + file: String, + ) -> Result { + if file.is_empty() { + return Err(Error::InvalidValue(anyhow!("`file` cannot be empty"))); + } + + let file_bytes = link_resolver + .cat(logger, &(file.into())) + .await + .map_err(|e| Error::FailedToResolveFile(e.context("invalid `file`")))?; + + let contract: JsonAbi = serde_json::from_slice(&file_bytes) + .map_err(|e| Error::InvalidValue(anyhow!(e).context("invalid `file`")))?; + + Ok(contract) + } +} + +/// Represents an unmodified input transformed table that extracts data from source tables for indexing. +/// +/// May contain invalid or partial data. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct RawTable { + /// The name of the transformed table. + /// + /// Must reference a valid entity name from the subgraph schema. + pub name: String, + + /// The SQL query that executes on the Amp server. + /// + /// Transforms the execution results into subgraph entities. + pub query: Option, + + /// The IPFS link to the SQL query that executes on the Amp server. + /// + /// Transforms the execution results into subgraph entities. + /// + /// Ignored when `query` is set. + pub file: Option, +} + +impl RawTable { + /// Parses, formats, and resolves the input table into a valid transformed table. + async fn resolve( + self, + logger: &Logger, + link_resolver: &dyn LinkResolver, + amp_client: &impl amp::Client, + network: &str, + source: &Source, + abis: &[Abi], + ) -> Result { + let Self { name, query, file } = self; + + validate_ident(&name).map_err(|e| e.source_context("invalid `name`"))?; + let query = match Self::resolve_query(query, source, abis)? { + Some(query) => query, + None => Self::resolve_file(logger, link_resolver, file, source, abis).await?, + }; + + debug!(logger, "Resolving query schema"); + let schema = Self::resolve_schema(logger, amp_client, &query).await?; + + for field in schema.fields() { + validate_ident(field.name()).map_err(|e| { + e.source_context(format!( + "invalid query output schema: invalid column '{}'", + field.name() + )) + })?; + } + + let block_range_query_builder = Self::resolve_block_range_query_builder( + logger, + amp_client, + network, + source, + query, + schema.clone(), + ) + .await?; + + Ok(Table { + name, + query: block_range_query_builder, + schema, + }) + } + + fn resolve_query( + query: Option, + source: &Source, + abis: &[Abi], + ) -> Result, Error> { + let Some(query) = query else { + return Ok(None); + }; + + if query.is_empty() { + return Err(Error::InvalidValue(anyhow!("`query` cannot be empty"))); + } + + ValidQuery::new( + &query, + source.dataset.as_str(), + source.tables.iter().map(|table| table.as_str()), + &source.address, + abis.iter().map(|abi| (abi.name.as_str(), &abi.contract)), + ) + .map(Some) + .map_err(|e| Error::InvalidValue(e.context("invalid `query`"))) + } + + async fn resolve_file( + logger: &Logger, + link_resolver: &dyn LinkResolver, + file: Option, + source: &Source, + abis: &[Abi], + ) -> Result { + debug!(logger, "Resolving query file"); + + let Some(file) = file else { + return Err(Error::InvalidValue(anyhow!("`file` cannot be empty"))); + }; + + if file.is_empty() { + return Err(Error::InvalidValue(anyhow!("`file` cannot be empty"))); + } + + let file_bytes = link_resolver + .cat(logger, &(file.into())) + .await + .map_err(|e| Error::FailedToResolveFile(e.context("invalid `file`")))?; + + let query = String::from_utf8(file_bytes) + .map_err(|e| Error::InvalidValue(anyhow!(e).context("invalid `file`")))?; + + if query.is_empty() { + return Err(Error::InvalidValue(anyhow!("`file` cannot be empty"))); + } + + ValidQuery::new( + &query, + source.dataset.as_str(), + source.tables.iter().map(|table| table.as_str()), + &source.address, + abis.iter().map(|abi| (abi.name.as_str(), &abi.contract)), + ) + .map_err(|e| Error::InvalidValue(e.context("invalid `file`"))) + } + + async fn resolve_schema( + logger: &Logger, + amp_client: &impl amp::Client, + query: impl ToString, + ) -> Result { + amp_client + .schema(logger, query) + .await + .map_err(|e| Error::FailedToExecuteQuery { + is_deterministic: e.is_deterministic(), + source: anyhow!(e).context("failed to load schema"), + }) + } + + async fn resolve_block_range_query_builder( + logger: &Logger, + amp_client: &impl amp::Client, + network: &str, + source: &Source, + query: ValidQuery, + schema: Schema, + ) -> Result { + debug!(logger, "Resolving block range query builder"); + + let record_batch = RecordBatch::new_empty(schema.into()); + let (block_number_column, _) = + auto_block_number_decoder(&record_batch).map_err(|e| Error::InvalidQuery(e))?; + + let has_block_hash_column = auto_block_hash_decoder(&record_batch).is_ok(); + let has_block_timestamp_column = auto_block_timestamp_decoder(&record_batch).is_ok(); + + if has_block_hash_column && has_block_timestamp_column { + return Ok(BlockRangeQueryBuilder::new(query, block_number_column)); + } + + debug!(logger, "Resolving context query"); + let mut context_query: Option = None; + + // TODO: Context is embedded in the original query using INNER JOIN to ensure availability for every output row. + // This requires all source tables to match or exceed the expected query output size. + let context_sources_iter = source + .tables + .iter() + .map(|table| (source.dataset.as_str(), table.as_str())) + // TODO: Replace hardcoded values with schema metadata sources when available + .chain(match network { + "ethereum-mainnet" => vec![("edgeandnode/ethereum_mainnet", "blocks")], + "base-mainnet" => vec![("edgeandnode/base_mainnet", "blocks")], + "base-sepolia" => vec![("edgeandnode/base_sepolia", "blocks")], + "arbitrum-one" => vec![("edgeandnode/arbitrum_one", "blocks")], + _ => vec![], + }); + + for (dataset, table) in context_sources_iter { + let context_logger = logger.new(slog::o!( + "context_dataset" => dataset.to_string(), + "context_table" => table.to_string() + )); + debug!(context_logger, "Loading context schema"); + let schema_query = format!("SELECT * FROM {dataset}.{table}"); + let schema = match Self::resolve_schema(logger, amp_client, schema_query).await { + Ok(schema) => schema, + Err(e) => { + error!(context_logger, "Failed to load context schema"; + "e" => ?e + ); + continue; + } + }; + + let record_batch = RecordBatch::new_empty(schema.clone().into()); + let mut columns = Vec::new(); + + if !has_block_hash_column { + let Ok((block_hash_column, _)) = auto_block_hash_decoder(&record_batch) else { + debug!( + context_logger, + "Context schema does not contain block hash column, skipping" + ); + continue; + }; + + columns.push(block_hash_column); + } + + if !has_block_timestamp_column { + let Ok((block_timestamp_column, _)) = auto_block_timestamp_decoder(&record_batch) + else { + debug!( + context_logger, + "Context schema does not contain block timestamp column, skipping" + ); + continue; + }; + + columns.push(block_timestamp_column); + } + + debug!(context_logger, "Creating context query"); + context_query = Some(ContextQuery::new( + query, + block_number_column, + dataset, + table, + columns, + )); + break; + } + + if let Some(context_query) = context_query { + return Ok(BlockRangeQueryBuilder::new_with_context(context_query)); + } + + Err(Error::InvalidQuery(anyhow!( + "query is required to output block numbers, block hashes and block timestamps" + ))) + } +} + +#[derive(Debug, Error)] +pub enum Error { + #[error("invalid value: {0:#}")] + InvalidValue(#[source] anyhow::Error), + + #[error("invalid query: {0:#}")] + InvalidQuery(#[source] anyhow::Error), + + #[error("failed to resolve file: {0:#}")] + FailedToResolveFile(#[source] anyhow::Error), + + #[error("failed to execute query: {source:#}")] + FailedToExecuteQuery { + source: anyhow::Error, + is_deterministic: bool, + }, +} + +impl Error { + /// Extends the source errors with additional context keeping the original error kind and the determinism. + fn source_context(self, cx: impl Into) -> Self { + match self { + Self::InvalidValue(e) => Self::InvalidValue(e.context(cx.into())), + Self::InvalidQuery(e) => Self::InvalidQuery(e.context(cx.into())), + Self::FailedToResolveFile(e) => Self::FailedToResolveFile(e.context(cx.into())), + Self::FailedToExecuteQuery { + source, + is_deterministic, + } => Self::FailedToExecuteQuery { + source: source.context(cx.into()), + is_deterministic, + }, + } + } +} + +impl IsDeterministic for Error { + fn is_deterministic(&self) -> bool { + match self { + Self::InvalidValue(_) => true, + Self::InvalidQuery(_) => true, + Self::FailedToResolveFile(_) => false, + Self::FailedToExecuteQuery { + is_deterministic, .. + } => *is_deterministic, + } + } +} + +fn validate_ident(s: &str) -> Result<(), Error> { + if !regex_is_match!("^[a-zA-Z_][a-zA-Z0-9_-]{0,100}$", s) { + return Err(Error::InvalidValue( + anyhow!("invalid identifier '{s}': must start with a letter or an underscore, and contain only letters, numbers, hyphens, and underscores") + )); + } + Ok(()) +} diff --git a/graph/src/amp/manifest/mod.rs b/graph/src/amp/manifest/mod.rs new file mode 100644 index 00000000000..0a70273abbc --- /dev/null +++ b/graph/src/amp/manifest/mod.rs @@ -0,0 +1,109 @@ +pub mod data_source; + +use std::sync::Arc; + +use anyhow::{bail, Context, Result}; +use itertools::Itertools; +use semver::Version; +use slog::Logger; + +use crate::{ + amp::Client, + blockchain::Blockchain, + components::link_resolver::LinkResolver, + data::subgraph::{BaseSubgraphManifest, DeploymentHash, UnresolvedSubgraphManifest}, + data_source::DataSource as GenericDataSource, + schema::InputSchema, +}; + +pub use self::data_source::DataSource; + +/// Represents a valid Amp subgraph manifest. +/// +/// This manifest contains parsed, formatted, and resolved data. +#[derive(Debug, Clone)] +pub struct Manifest { + /// The schema of the subgraph. + /// + /// Contains all the entities, aggregations, and relationships between them. + pub schema: InputSchema, + + /// The Amp data sources of the subgraph. + /// + /// An Amp subgraph can only contain Amp data sources. + pub data_sources: Vec, +} + +impl Manifest { + /// Resolves and returns a valid Amp subgraph manifest. + pub async fn resolve( + logger: &Logger, + link_resolver: Arc, + amp_client: Arc, + max_spec_version: Version, + deployment: DeploymentHash, + raw_manifest: serde_yaml::Mapping, + ) -> Result { + let unresolved_manifest = UnresolvedSubgraphManifest::::parse(deployment, raw_manifest) + .context("failed to parse subgraph manifest")?; + + let resolved_manifest = unresolved_manifest + .resolve(&link_resolver, Some(amp_client), logger, max_spec_version) + .await + .context("failed to resolve subgraph manifest")?; + + let BaseSubgraphManifest { + id: _, + spec_version: _, + features: _, + description: _, + repository: _, + schema, + data_sources, + graft: _, + templates: _, + chain: _, + indexer_hints: _, + } = resolved_manifest; + + let data_sources_count = data_sources.len(); + let amp_data_sources = data_sources + .into_iter() + .filter_map(|data_source| match data_source { + GenericDataSource::Amp(amp_data_source) => Some(amp_data_source), + _ => None, + }) + .collect_vec(); + + if amp_data_sources.is_empty() { + bail!("invalid subgraph manifest: failed to find Amp data sources"); + } + + if amp_data_sources.len() != data_sources_count { + bail!("invalid subgraph manifest: only Amp data sources are allowed"); + } + + Ok(Self { + schema, + data_sources: amp_data_sources, + }) + } +} + +/// Returns `true` if the raw manifest contains any Amp data sources. +pub fn is_amp_manifest(raw_manifest: &serde_yaml::Mapping) -> bool { + use serde_yaml::Value; + + raw_manifest + .get("dataSources") + .and_then(Value::as_sequence) + .and_then(|seq| { + seq.iter() + .filter_map(Value::as_mapping) + .filter_map(|map| map.get("kind")) + .filter_map(Value::as_str) + .filter(|kind| *kind == DataSource::KIND) + .next() + }) + .is_some() +} diff --git a/graph/src/amp/mod.rs b/graph/src/amp/mod.rs new file mode 100644 index 00000000000..9541d450626 --- /dev/null +++ b/graph/src/amp/mod.rs @@ -0,0 +1,17 @@ +//! This module contains the functionality required to support Amp subgraphs. + +pub mod client; +pub mod codec; +pub mod common; +pub mod error; +pub mod log; +pub mod manifest; +pub mod schema; +pub mod sql; +pub mod stream_aggregator; + +pub use self::{ + client::{flight_client::FlightClient, Client}, + codec::Codec, + manifest::Manifest, +}; diff --git a/graph/src/amp/schema/generator/entity.rs b/graph/src/amp/schema/generator/entity.rs new file mode 100644 index 00000000000..7e3fa5b8f6c --- /dev/null +++ b/graph/src/amp/schema/generator/entity.rs @@ -0,0 +1,171 @@ +use std::fmt; + +use anyhow::{bail, Context, Result}; +use inflector::Inflector; + +use crate::data::store::ValueType; + +/// A minimal representation of a subgraph entity. +pub(super) struct SchemaEntity { + name: String, + fields: Vec, +} + +impl SchemaEntity { + /// Converts the Arrow schema to a subgraph entity. + /// + /// # Errors + /// + /// Returns an error if Arrow fields cannot be converted to subgraph entity fields. + /// + /// The returned error is deterministic. + pub(super) fn new(name: String, arrow_schema: arrow::datatypes::Schema) -> Result { + let mut fields = arrow_schema + .fields() + .iter() + .map(|field| { + SchemaField::new(field) + .with_context(|| format!("failed to create field '{}'", field.name())) + }) + .collect::, _>>()?; + + if !fields + .iter() + .any(|field| field.name.as_str().eq_ignore_ascii_case("id")) + { + fields.push(SchemaField::id()); + } + + fields.sort_unstable_by_key(|field| field.name.clone()); + + Ok(Self { name, fields }) + } +} + +impl fmt::Display for SchemaEntity { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write! {f, "type {} @entity(immutable: true)", self.name.to_pascal_case()}?; + write! {f, " {{\n"}?; + for field in &self.fields { + write! {f, "\t{field}\n"}?; + } + write! {f, "}}"} + } +} + +/// A minimal representation of a subgraph entity field. +struct SchemaField { + name: String, + value_type: ValueType, + is_list: bool, + is_required: bool, +} + +impl SchemaField { + /// Converts the Arrow field to a subgraph entity field. + /// + /// # Errors + /// + /// Returns an error if: + /// - The Arrow field has an invalid name + /// - The Arrow field type cannot be converted to a subgraph entity value type + /// + /// The returned error is deterministic. + fn new(arrow_field: &arrow::datatypes::Field) -> Result { + let name = arrow_field.name().to_string(); + let (value_type, is_list) = arrow_data_type_to_value_type(arrow_field.data_type())?; + let is_required = !arrow_field.is_nullable(); + + Ok(Self { + name, + value_type, + is_list, + is_required, + }) + } + + /// Creates an `ID` subgraph entity field. + fn id() -> Self { + Self { + name: "id".to_string(), + value_type: ValueType::Bytes, + is_list: false, + is_required: true, + } + } +} + +impl fmt::Display for SchemaField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write! {f, "{}: ", self.name.to_camel_case()}?; + if self.is_list { + write! {f, "["}?; + } + write! {f, "{}", self.value_type.to_str()}?; + if self.is_list { + write! {f, "]"}?; + } + if self.is_required { + write! {f, "!"}?; + } + Ok(()) + } +} + +fn arrow_data_type_to_value_type( + data_type: &arrow::datatypes::DataType, +) -> Result<(ValueType, bool)> { + use arrow::datatypes::DataType::*; + + let type_not_supported = || bail!("type '{data_type}' not supported"); + let value_type = match data_type { + Null => return type_not_supported(), + Boolean => ValueType::Boolean, + Int8 => ValueType::Int, + Int16 => ValueType::Int, + Int32 => ValueType::Int, + Int64 => ValueType::Int8, + UInt8 => ValueType::Int, + UInt16 => ValueType::Int, + UInt32 => ValueType::Int8, + UInt64 => ValueType::BigInt, + Float16 => ValueType::BigDecimal, + Float32 => ValueType::BigDecimal, + Float64 => ValueType::BigDecimal, + Timestamp(_, _) => ValueType::Timestamp, + Date32 => ValueType::Timestamp, + Date64 => ValueType::Timestamp, + Time32(_) => return type_not_supported(), + Time64(_) => return type_not_supported(), + Duration(_) => return type_not_supported(), + Interval(_) => return type_not_supported(), + Binary => ValueType::Bytes, + FixedSizeBinary(_) => ValueType::Bytes, + LargeBinary => ValueType::Bytes, + BinaryView => ValueType::Bytes, + Utf8 => ValueType::String, + LargeUtf8 => ValueType::String, + Utf8View => ValueType::String, + List(field) + | ListView(field) + | FixedSizeList(field, _) + | LargeList(field) + | LargeListView(field) => { + if field.data_type().is_nested() { + return type_not_supported(); + } + + return arrow_data_type_to_value_type(field.data_type()) + .map(|(value_type, _)| (value_type, true)); + } + Struct(_) => return type_not_supported(), + Union(_, _) => return type_not_supported(), + Dictionary(_, _) => return type_not_supported(), + Decimal128(_, _) => ValueType::BigDecimal, + Decimal256(_, _) => ValueType::BigDecimal, + Map(_, _) => return type_not_supported(), + RunEndEncoded(_, _) => return type_not_supported(), + }; + + Ok((value_type, false)) +} diff --git a/graph/src/amp/schema/generator/mod.rs b/graph/src/amp/schema/generator/mod.rs new file mode 100644 index 00000000000..117d710adbe --- /dev/null +++ b/graph/src/amp/schema/generator/mod.rs @@ -0,0 +1,65 @@ +mod entity; + +use anyhow::{Context, Result}; +use itertools::Itertools; + +use self::entity::SchemaEntity; +use crate::{data::subgraph::DeploymentHash, schema::InputSchema}; + +/// Generates a subgraph schema from a list of Arrow schemas. +/// +/// # Limitations +/// +/// The generated subgraph entities are immutable and do not contain any relationships to other entities within the schema. +/// +/// # Errors +/// +/// Returns an error if any of the Arrow schemas cannot be represented as valid subgraph entities. +/// +/// The returned error is deterministic. +pub fn generate_subgraph_schema( + deployment_hash: &DeploymentHash, + named_schemas: impl IntoIterator, +) -> Result { + let mut named_schemas = merge_related_schemas(named_schemas)?; + named_schemas.sort_unstable_by_key(|(name, _)| name.clone()); + + let entities = create_entities(named_schemas)?; + let mut subgraph_schema = String::new(); + + for entity in entities { + subgraph_schema.extend(std::iter::once(entity.to_string())); + subgraph_schema.push_str("\n\n"); + } + + let input_schema = InputSchema::parse_latest(&subgraph_schema, deployment_hash.to_owned()) + .context("failed to parse subgraph schema")?; + + Ok(input_schema) +} + +fn merge_related_schemas( + named_schemas: impl IntoIterator, +) -> Result> { + named_schemas + .into_iter() + .into_group_map_by(|(name, _)| name.clone()) + .into_iter() + .map(|(name, related_schemas)| { + let related_schemas = related_schemas.into_iter().map(|(_, schema)| schema); + + arrow::datatypes::Schema::try_merge(related_schemas).map(|schema| (name, schema)) + }) + .collect::, _>>() + .context("failed to merge schemas of related SQL queries") +} + +fn create_entities(queries: Vec<(String, arrow::datatypes::Schema)>) -> Result> { + queries + .into_iter() + .map(|(name, schema)| { + SchemaEntity::new(name.clone(), schema) + .with_context(|| format!("failed to create entity '{}'", name)) + }) + .collect::, _>>() +} diff --git a/graph/src/amp/schema/mod.rs b/graph/src/amp/schema/mod.rs new file mode 100644 index 00000000000..546777a14ff --- /dev/null +++ b/graph/src/amp/schema/mod.rs @@ -0,0 +1,3 @@ +mod generator; + +pub use self::generator::generate_subgraph_schema; diff --git a/graph/src/amp/sql/mod.rs b/graph/src/amp/sql/mod.rs new file mode 100644 index 00000000000..02355895afa --- /dev/null +++ b/graph/src/amp/sql/mod.rs @@ -0,0 +1,3 @@ +pub mod query_builder; + +pub use self::query_builder::{BlockRangeQueryBuilder, ContextQuery, ValidQuery}; diff --git a/graph/src/amp/sql/query_builder/block_range_query.rs b/graph/src/amp/sql/query_builder/block_range_query.rs new file mode 100644 index 00000000000..e9b91ca5136 --- /dev/null +++ b/graph/src/amp/sql/query_builder/block_range_query.rs @@ -0,0 +1,189 @@ +use std::{ + collections::BTreeMap, + hash::{BuildHasher, Hash, Hasher}, + ops::{ControlFlow, RangeInclusive}, +}; + +use ahash::RandomState; +use alloy::primitives::BlockNumber; +use sqlparser_latest::ast::{self, VisitMut, VisitorMut}; + +use super::{extract_tables, parse_query, TableReference}; + +/// Limits the query execution to the specified block range. +/// +/// Wraps the `query` in a CTE, and creates CTEs for every table it references. +/// These CTEs load data from the referenced tables only on the specified block range. +/// All the table references in the original SQL query are replaced with the created CTE names. +/// +/// The output is ordered by block numbers. +pub(super) fn new_block_range_query<'a>( + query: &ast::Query, + block_number_column: &str, + block_range: &RangeInclusive, +) -> ast::Query { + // CTE names are unique within a SQL query. + // The hasher ensures that CTEs created for block range do not collide with user-defined CTEs. + // Constant seeds ensure consistent block range queries for the same input parameters. + let mut hasher = RandomState::with_seeds(0, 0, 0, 0).build_hasher(); + + let tables_to_ctes_mapping = new_tables_to_ctes_mapping(query, &mut hasher); + assert!(!tables_to_ctes_mapping.is_empty()); + + let mut cte_tables = Vec::with_capacity(tables_to_ctes_mapping.len()); + for (table, cte_table) in &tables_to_ctes_mapping { + cte_tables.push(format!( + "{cte_table} AS (SELECT * FROM {table} WHERE _block_num BETWEEN {start_block} AND {end_block})", + start_block = block_range.start(), + end_block = block_range.end() + )) + } + + let mut query = query.clone(); + let mut table_replacer = TableReplacer::new(tables_to_ctes_mapping); + let _: ControlFlow<()> = VisitMut::visit(&mut query, &mut table_replacer); + + let block_range_query = format!( + "WITH {cte_tables}, {source} AS ({query}) SELECT {source}.* FROM {source} ORDER BY {source}.{block_number_column}", + cte_tables = cte_tables.join(", "), + source = format!("source_{}", hasher.finish()) + ); + + parse_query(block_range_query).unwrap() +} + +/// Creates unique CTE names for every table referenced by the SQL query. +fn new_tables_to_ctes_mapping( + query: &ast::Query, + hasher: &mut impl Hasher, +) -> BTreeMap { + extract_tables(query) + .into_iter() + .map(|table| { + table.hash(hasher); + + (table, format!("block_range_{}", hasher.finish())) + }) + .collect() +} + +/// Visits the SQL query AST and replaces referenced table names with CTE names. +struct TableReplacer { + tables_to_ctes_mapping: BTreeMap, +} + +impl TableReplacer { + /// Creates a new table replacer. + fn new(tables_to_ctes_mapping: BTreeMap) -> Self { + Self { + tables_to_ctes_mapping, + } + } + + /// Replaces the table name of the current `table_factor` with the associated CTE name. + fn visit_table_factor(&mut self, table_factor: &mut ast::TableFactor) { + let ast::TableFactor::Table { name, alias, .. } = table_factor else { + return; + }; + + let Some(cte_table) = self + .tables_to_ctes_mapping + .get(&TableReference::with_object_name(name)) + else { + return; + }; + + // Set the alias to the original table name so that queries like `SELECT table.column FROM table` do not break + if alias.is_none() { + let last_name_part = name.0.last().unwrap(); + + *alias = Some(ast::TableAlias { + name: last_name_part.as_ident().unwrap().clone(), + columns: Vec::new(), + }) + } + + *name = ast::ObjectName(vec![ast::ObjectNamePart::Identifier(ast::Ident::new( + cte_table, + ))]); + } +} + +impl VisitorMut for TableReplacer { + type Break = (); + + fn pre_visit_table_factor( + &mut self, + table_factor: &mut ast::TableFactor, + ) -> ControlFlow { + self.visit_table_factor(table_factor); + ControlFlow::Continue(()) + } +} + +#[cfg(test)] +mod tests { + use super::super::parse_query; + use super::*; + + #[test] + fn query_with_one_table_reference_is_wrapped_with_block_range() { + let query = parse_query("SELECT a, b, c FROM d").unwrap(); + let block_number_column = "b"; + let block_range = 0..=1_000_000; + let block_range_query = new_block_range_query(&query, block_number_column, &block_range); + + assert_eq!( + block_range_query, + parse_query( + r#" + WITH block_range_1164572571450379730 AS ( + SELECT * FROM "d" WHERE _block_num BETWEEN 0 AND 1000000 + ), + source_1164572571450379730 AS ( + SELECT a, b, c FROM block_range_1164572571450379730 AS d + ) + SELECT + source_1164572571450379730.* + FROM + source_1164572571450379730 + ORDER BY + source_1164572571450379730.b + "# + ) + .unwrap(), + ) + } + + #[test] + fn query_with_multiple_table_references_is_wrapped_with_block_range() { + let query = parse_query("SELECT a, b, c FROM d JOIN e ON e.e = d.d").unwrap(); + let block_number_column = "b"; + let block_range = 0..=1_000_000; + let block_range_query = new_block_range_query(&query, block_number_column, &block_range); + + assert_eq!( + block_range_query, + parse_query( + r#" + WITH block_range_1164572571450379730 AS ( + SELECT * FROM "d" WHERE _block_num BETWEEN 0 AND 1000000 + ), + block_range_13063992259633584610 AS ( + SELECT * FROM "e" WHERE _block_num BETWEEN 0 AND 1000000 + ), + source_13063992259633584610 AS ( + SELECT a, b, c FROM block_range_1164572571450379730 AS d JOIN block_range_13063992259633584610 AS e ON e.e = d.d + ) + SELECT + source_13063992259633584610.* + FROM + source_13063992259633584610 + ORDER BY + source_13063992259633584610.b + "# + ) + .unwrap(), + ) + } +} diff --git a/graph/src/amp/sql/query_builder/context_query.rs b/graph/src/amp/sql/query_builder/context_query.rs new file mode 100644 index 00000000000..cdff33ca4a3 --- /dev/null +++ b/graph/src/amp/sql/query_builder/context_query.rs @@ -0,0 +1,103 @@ +use ahash::RandomState; +use itertools::Itertools; +use sqlparser_latest::ast; + +use super::parse_query; + +/// Wraps the SQL query with additional context columns from a separate dataset. +/// +/// Creates two CTEs: one wrapping the input `query` and another loading context columns +/// from the specified context dataset and table. Joins both CTEs on block numbers to +/// include the context columns in the original query's output. +/// +/// This enables including columns required by Amp subgraphs in the original SQL query. +pub(super) fn new_context_query<'a>( + query: &ast::Query, + block_number_column: &str, + context_dataset: &str, + context_table: &str, + context_columns: impl IntoIterator, +) -> ast::Query { + // CTE names are unique within a SQL query. + // The hasher ensures that CTEs created for context do not collide with user-defined CTEs. + // Constant seeds ensure consistent context queries for the same input parameters. + let hasher = RandomState::with_seeds(0, 0, 0, 0); + let query_hash = hasher.hash_one(query); + + let context_columns = context_columns.into_iter().collect_vec(); + assert!(!context_columns.is_empty()); + + let context_cte = format!("context_{query_hash}"); + let source_cte = format!("source_{query_hash}"); + + let context_query = format!( + " + WITH {context_cte} AS ( + SELECT DISTINCT _block_num, {input_context_columns} FROM {context_dataset}.{context_table} + ), + {source_cte} AS ( + {query} + ) + SELECT + {output_context_columns}, + {source_cte}.* + FROM + {source_cte} + INNER JOIN {context_cte} ON + {context_cte}._block_num = {source_cte}.{block_number_column} + ", + input_context_columns = context_columns.join(", "), + output_context_columns = context_columns + .iter() + .map(|context_column| format!("{context_cte}.{context_column}")) + .join(", "), + ); + + parse_query(context_query).unwrap() +} + +#[cfg(test)] +mod tests { + use super::super::parse_query; + use super::*; + + #[test] + fn query_is_wrapped_with_context() { + let query = parse_query("SELECT a, b, c FROM d").unwrap(); + let block_number_column = "b"; + let context_dataset = "cx_a"; + let context_table = "cx_b"; + let context_columns = ["cx_c", "cx_d"]; + + let context_query = new_context_query( + &query, + block_number_column, + context_dataset, + context_table, + context_columns, + ); + + assert_eq!( + context_query, + parse_query( + " + WITH context_10500256449332496249 AS ( + SELECT DISTINCT _block_num, cx_c, cx_d FROM cx_a.cx_b + ), + source_10500256449332496249 AS ( + SELECT a, b, c FROM d + ) + SELECT + context_10500256449332496249.cx_c, + context_10500256449332496249.cx_d, + source_10500256449332496249.* + FROM + source_10500256449332496249 + INNER JOIN context_10500256449332496249 ON + context_10500256449332496249._block_num = source_10500256449332496249.b + " + ) + .unwrap() + ) + } +} diff --git a/graph/src/amp/sql/query_builder/event_signature_resolver.rs b/graph/src/amp/sql/query_builder/event_signature_resolver.rs new file mode 100644 index 00000000000..89ab8a31a51 --- /dev/null +++ b/graph/src/amp/sql/query_builder/event_signature_resolver.rs @@ -0,0 +1,183 @@ +use std::ops::ControlFlow; + +use alloy::json_abi::JsonAbi; +use anyhow::{bail, Context, Result}; +use sqlparser_latest::ast::{self, visit_expressions_mut}; + +static FUNCTION_NAME: &str = "sg_event_signature"; + +/// Replaces `sg_event_signature('CONTRACT_NAME', 'EVENT_NAME')` function calls with +/// the correct event signature based on `abis`. +/// +/// # Errors +/// +/// Returns an error if: +/// - The function is called with incorrect arguments +/// - The contract name is not found in `abis` +/// - The event name is not found in `abis` +/// +/// The returned error is deterministic. +pub(super) fn resolve_event_signatures( + query: &mut ast::Query, + abis: &[(&str, &JsonAbi)], +) -> Result<()> { + let visit_result = visit_expressions_mut(query, |expr| match visit_expr(expr, abis) { + Ok(()) => ControlFlow::Continue(()), + Err(e) => ControlFlow::Break(e), + }); + + if let ControlFlow::Break(e) = visit_result { + return Err(e).with_context(|| format!("failed to resolve '{FUNCTION_NAME}' calls")); + } + + Ok(()) +} + +fn visit_expr(expr: &mut ast::Expr, abis: &[(&str, &JsonAbi)]) -> Result<()> { + let ast::Expr::Function(function) = expr else { + return Ok(()); + }; + + if !FUNCTION_NAME.eq_ignore_ascii_case(&function.name.to_string()) { + return Ok(()); + } + + let Some((contract_name, event_name)) = get_args(function) else { + bail!("invalid function call: expected `{FUNCTION_NAME}('CONTRACT_NAME', 'EVENT_NAME')`, found: `{function}`"); + }; + + let Some(event) = get_event(abis, contract_name, event_name) else { + bail!("invalid function call: unknown contract '{contract_name}' or event '{event_name}'"); + }; + + let signature = ast::Value::SingleQuotedString(event.full_signature()).with_empty_span(); + *expr = ast::Expr::Value(signature); + + Ok(()) +} + +fn get_args<'a>(function: &'a ast::Function) -> Option<(&'a str, &'a str)> { + let ast::FunctionArguments::List(args) = &function.args else { + return None; + }; + + if args.args.len() != 2 { + return None; + } + + match (get_arg(&args.args[0]), get_arg(&args.args[1])) { + (Some(contract_name), Some(event_name)) => Some((contract_name, event_name)), + _ => None, + } +} + +fn get_arg<'a>(arg: &'a ast::FunctionArg) -> Option<&'a str> { + let ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(expr)) = arg else { + return None; + }; + + match expr { + ast::Expr::Value(ast::ValueWithSpan { + value: ast::Value::SingleQuotedString(value), + .. + }) if !value.is_empty() => Some(value), + _ => None, + } +} + +fn get_event<'a>( + abis: &'a [(&str, &JsonAbi)], + contract_name: &str, + event_name: &str, +) -> Option<&'a alloy::json_abi::Event> { + abis.iter() + .filter(|(name, _)| *name == contract_name) + .map(|(_, contract)| contract.event(event_name)) + .flatten() + .map(|events| events.first()) + .flatten() + .next() +} + +#[cfg(test)] +mod tests { + use super::super::parse_query; + use super::*; + + use self::fixtures::*; + + mod fixtures { + use std::sync::LazyLock; + + use super::*; + + pub(super) static ABIS: LazyLock> = LazyLock::new(|| { + vec![ + ("ContractA", JsonAbi::parse([&*event("TransferA")]).unwrap()), + ("ContractB", JsonAbi::parse([&*event("TransferB")]).unwrap()), + ("ContractB", JsonAbi::parse([&*event("TransferC")]).unwrap()), + ] + }); + + pub(super) fn event(name: &str) -> String { + format!("event {name}(address indexed from, address indexed to, address value)") + } + } + + macro_rules! test_resolve_event_signatures { + ($($name:ident: $query:expr => $expected:expr),* $(,)?) => { + $( + #[test] + fn $name() { + let mut query = parse_query($query).unwrap(); + let abis = ABIS.iter().map(|abi| (abi.0, &abi.1)).collect::>(); + let result = resolve_event_signatures(&mut query, &abis); + + match $expected { + Result::<&str, ()>::Ok(expected) => { + result.unwrap(); + assert_eq!(query, parse_query(expected).unwrap()); + }, + Err(_) => { + result.unwrap_err(); + } + } + } + )* + }; + } + + test_resolve_event_signatures! { + nothing_to_resolve: "SELECT a FROM b" => Ok("SELECT a FROM b"), + + call_with_no_arguments: "SELECT a FROM b WHERE c = sg_event_signature()" => Err(()), + call_with_one_argument: "SELECT a FROM b WHERE c = sg_event_signature('ContractA')" => Err(()), + call_with_first_invalid_argument: "SELECT a FROM b WHERE c = sg_event_signature(ContractA, 'TransferA')" => Err(()), + call_with_second_invalid_argument: "SELECT a FROM b WHERE c = sg_event_signature('ContractA', TransferA)" => Err(()), + call_with_two_invalid_arguments: "SELECT a FROM b WHERE c = sg_event_signature(ContractA, TransferA)" => Err(()), + call_with_unknown_contract: "SELECT a FROM b WHERE c = sg_event_signature('ContractX', 'TransferA')" => Err(()), + call_with_unknown_event: "SELECT a FROM b WHERE c = sg_event_signature('ContractA', 'TransferX')" => Err(()), + call_with_contract_and_event_mismatch: "SELECT a FROM b WHERE c = sg_event_signature('ContractA', 'TransferB')" => Err(()), + call_with_invalid_argument_cases: "SELECT a FROM b WHERE c = sg_event_signature('contractA', 'transferA')" => Err(()), + + resolve_one_call: + "SELECT a FROM b WHERE c = sg_event_signature('ContractA', 'TransferA')" => + Ok(&*format!("SELECT a FROM b WHERE c = '{}'", event("TransferA"))), + + resolve_multiple_calls: + "SELECT a FROM b WHERE c = sg_event_signature('ContractA', 'TransferA') OR d = sg_event_signature('ContractA', 'TransferA')" => + Ok(&*format!("SELECT a FROM b WHERE c = '{}' OR d = '{}'", event("TransferA"), event("TransferA"))), + + resolve_multiple_calls_with_different_arguments: + "SELECT a FROM b WHERE c = sg_event_signature('ContractA', 'TransferA') OR d = sg_event_signature('ContractB', 'TransferB')" => + Ok(&*format!("SELECT a FROM b WHERE c = '{}' OR d = '{}'", event("TransferA"), event("TransferB"))), + + resolve_multiple_calls_with_events_from_different_abis_with_the_same_name: + "SELECT a FROM b WHERE c = sg_event_signature('ContractB', 'TransferB') OR d = sg_event_signature('ContractB', 'TransferC')" => + Ok(&*format!("SELECT a FROM b WHERE c = '{}' OR d = '{}'", event("TransferB"), event("TransferC"))), + + resolve_calls_with_case_insensitive_function_name: + "SELECT a FROM b WHERE c = sg_Event_SIGNATURE('ContractA', 'TransferA')" => + Ok(&*format!("SELECT a FROM b WHERE c = '{}'", event("TransferA"))), + } +} diff --git a/graph/src/amp/sql/query_builder/mod.rs b/graph/src/amp/sql/query_builder/mod.rs new file mode 100644 index 00000000000..5f5458ec092 --- /dev/null +++ b/graph/src/amp/sql/query_builder/mod.rs @@ -0,0 +1,191 @@ +mod block_range_query; +mod context_query; +mod event_signature_resolver; +mod parser; +mod source_address_resolver; +mod table_extractor; +mod table_validator; + +use std::{fmt, ops::RangeInclusive}; + +use alloy::{ + json_abi::JsonAbi, + primitives::{Address, BlockNumber}, +}; +use anyhow::{bail, Context, Result}; +use itertools::Itertools; +use sqlparser_latest::ast; + +use self::{ + block_range_query::new_block_range_query, + context_query::new_context_query, + event_signature_resolver::resolve_event_signatures, + parser::parse_query, + source_address_resolver::resolve_source_address, + table_extractor::{extract_tables, TableReference}, + table_validator::validate_tables, +}; + +/// Represents a valid SQL query that can be executed on an Amp server. +#[derive(Debug, Clone)] +pub struct ValidQuery { + query: ast::Query, +} + +impl ValidQuery { + /// Parses, validates and resolves the input SQL query. + /// + /// # Errors + /// + /// Returns an error if: + /// - The SQL query cannot be parsed + /// - The SQL query is not valid + /// - The SQL query cannot be resolved + /// + /// The returned error is deterministic. + pub fn new<'a>( + sql: &str, + dataset: &str, + tables: impl IntoIterator, + source_address: &Address, + abis: impl IntoIterator, + ) -> Result { + let mut query = parse_query(sql).context("failed to parse SQL query")?; + + Self::validate(&query, dataset, tables).context("failed to validate SQL query")?; + Self::resolve(&mut query, source_address, abis).context("failed to resolve SQL query")?; + + Ok(Self { query }) + } + + /// Validates the SQL query. + /// + /// # Errors + /// + /// Returns an error if: + /// - The SQL query references unknown datasets or tables + /// - The SQL query uses custom `SETTINGS` + /// + /// The returned error is deterministic. + fn validate<'a>( + query: &ast::Query, + dataset: &str, + tables: impl IntoIterator, + ) -> Result<()> { + validate_tables(query, dataset, tables)?; + + if query.settings.is_some() { + bail!("custom SETTINGS are not allowed"); + } + + Ok(()) + } + + /// Resolves subgraph-specific function calls in the SQL query. + /// + /// # Errors + /// + /// Returns an error if: + /// - Source address function calls cannot be resolved + /// - Event signature function calls cannot be resolved + /// + /// The returned error is deterministic. + fn resolve<'a>( + query: &mut ast::Query, + source_address: &Address, + abis: impl IntoIterator, + ) -> Result<()> { + resolve_source_address(query, source_address)?; + resolve_event_signatures(query, &abis.into_iter().collect_vec())?; + + Ok(()) + } +} + +impl fmt::Display for ValidQuery { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.query) + } +} + +/// Represents a valid SQL query that contains columns required by Amp subgraphs. +#[derive(Debug, Clone)] +pub struct ContextQuery { + query: ast::Query, + block_number_column: String, +} + +impl ContextQuery { + /// Wraps the SQL query with additional context columns from a separate dataset. + /// + /// Creates two CTEs: one wrapping the input `query` and another loading context columns + /// from the specified context dataset and table. Joins both CTEs on block numbers to + /// include the context columns in the original query's output. + /// + /// This enables including columns required by Amp subgraphs in the original SQL query. + pub fn new<'a>( + valid_query: ValidQuery, + block_number_column: &str, + context_dataset: &str, + context_table: &str, + context_columns: impl IntoIterator, + ) -> Self { + let ValidQuery { query } = valid_query; + + let query = new_context_query( + &query, + block_number_column, + context_dataset, + context_table, + context_columns, + ); + + Self { + query, + block_number_column: block_number_column.to_string(), + } + } +} + +/// Builds valid SQL queries for execution on an Amp server with block range limits. +#[derive(Debug, Clone)] +pub struct BlockRangeQueryBuilder { + query: ast::Query, + block_number_column: String, +} + +impl BlockRangeQueryBuilder { + /// Creates a new block range query builder with the specified valid SQL query. + pub fn new(valid_query: ValidQuery, block_number_column: &str) -> Self { + let ValidQuery { query } = valid_query; + + Self { + query, + block_number_column: block_number_column.to_string(), + } + } + + /// Creates a new block range query builder with the specified context SQL query. + pub fn new_with_context(context_query: ContextQuery) -> Self { + let ContextQuery { + query, + block_number_column, + } = context_query; + + Self { + query, + block_number_column, + } + } + + /// Limits the query execution to the specified block range. + /// + /// Wraps this SQL query in a CTE, and creates CTEs for every table it references. + /// These CTEs load data from the referenced tables only on the specified block range. + /// All the table references in the original SQL query are replaced with the created CTE names. + /// + /// The output is ordered by block numbers. + pub fn build_with_block_range(&self, block_range: &RangeInclusive) -> String { + new_block_range_query(&self.query, &self.block_number_column, block_range).to_string() + } +} diff --git a/graph/src/amp/sql/query_builder/parser.rs b/graph/src/amp/sql/query_builder/parser.rs new file mode 100644 index 00000000000..1f965b955b6 --- /dev/null +++ b/graph/src/amp/sql/query_builder/parser.rs @@ -0,0 +1,115 @@ +use std::ops::ControlFlow; + +use anyhow::{anyhow, bail, Context, Result}; +use itertools::Itertools; +use sqlparser_latest::{ + ast::{self, Visit, Visitor}, + dialect::GenericDialect, + parser::Parser, +}; + +/// Parses a SQL query and returns its AST. +/// +/// # Errors +/// +/// Returns an error if: +/// - The SQL query cannot be parsed +/// - The SQL query contains multiple SQL statements +/// - The SQL query is not a `SELECT` query +/// +/// The returned error is deterministic. +pub(super) fn parse_query(s: impl AsRef) -> Result { + let statement = Parser::parse_sql(&GenericDialect {}, s.as_ref()) + .context("invalid SQL query")? + .into_iter() + .exactly_one() + .map_err(|e| anyhow!("expected exactly one SQL statement, found {}", e.count()))?; + + let query = match statement { + ast::Statement::Query(query) => *query, + _ => bail!("invalid SQL query: only SELECT statements are allowed"), + }; + + if let ControlFlow::Break(e) = query.visit(&mut AllowOnlySelectQueries) { + return Err(e); + } + + Ok(query) +} + +/// Validates that the SQL query AST contains only `SELECT` queries in subqueries. +struct AllowOnlySelectQueries; + +impl AllowOnlySelectQueries { + /// Returns an error if the `set_expr` is not a `SELECT` expression. + fn visit_set_expr(&self, set_expr: &ast::SetExpr) -> Result<()> { + match set_expr { + ast::SetExpr::Select(_) + | ast::SetExpr::Query(_) + | ast::SetExpr::Values(_) + | ast::SetExpr::Table(_) => Ok(()), + ast::SetExpr::SetOperation { left, right, .. } => { + self.visit_set_expr(left)?; + self.visit_set_expr(right)?; + Ok(()) + } + ast::SetExpr::Insert(_) | ast::SetExpr::Update(_) | ast::SetExpr::Delete(_) => { + bail!("invalid SQL query: only SELECT queries are allowed") + } + } + } +} + +impl Visitor for AllowOnlySelectQueries { + type Break = anyhow::Error; + + fn pre_visit_query(&mut self, query: &ast::Query) -> ControlFlow { + match self.visit_set_expr(&query.body) { + Ok(()) => ControlFlow::Continue(()), + Err(e) => ControlFlow::Break(e), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! test_parse_query { + ($($name:ident: $input:expr => $expected:expr),* $(,)?) => { + $( + #[test] + fn $name() { + let result = parse_query($input); + + match $expected { + Result::<&str, &str>::Ok(expected) => { + assert_eq!(result.unwrap().to_string(), expected); + }, + Err(e) => { + assert_eq!(result.unwrap_err().to_string(), e); + } + } + } + )* + }; + } + + test_parse_query! { + invalid_query: "SELECT" => Err("invalid SQL query"), + multiple_statements: "SELECT a FROM b; SELECT c FROM d" => Err("expected exactly one SQL statement, found 2"), + insert_statement: "INSERT INTO a VALUES (b)" => Err("invalid SQL query: only SELECT statements are allowed"), + update_statement: "UPDATE a SET b = c" => Err("invalid SQL query: only SELECT statements are allowed"), + delete_statement: "DELETE FROM a WHERE b = c" => Err("invalid SQL query: only SELECT statements are allowed"), + truncate_statement: "TRUNCATE TABLE a" => Err("invalid SQL query: only SELECT statements are allowed"), + drop_statement: "DROP TABLE a" => Err("invalid SQL query: only SELECT statements are allowed"), + + nested_insert_query: "WITH a AS (INSERT INTO b VALUES (c) RETURNING d) SELECT * FROM a" => Err("invalid SQL query: only SELECT queries are allowed"), + nested_update_query: "WITH a AS (UPDATE b SET c = d RETURNING e) SELECT * FROM a" => Err("invalid SQL query: only SELECT queries are allowed"), + nested_delete_query: "WITH a AS (DELETE FROM b WHERE c = d RETURNING e) SELECT * FROM a" => Err("invalid SQL query: only SELECT queries are allowed"), + + valid_query: "SELECT a FROM b" => Ok("SELECT a FROM b"), + valid_query_with_cte: "WITH a AS (SELECT b FROM c) SELECT * FROM a" => Ok("WITH a AS (SELECT b FROM c) SELECT * FROM a"), + valid_query_with_join: "SELECT a FROM b INNER JOIN c ON c.c = b.b" => Ok("SELECT a FROM b INNER JOIN c ON c.c = b.b"), + } +} diff --git a/graph/src/amp/sql/query_builder/source_address_resolver.rs b/graph/src/amp/sql/query_builder/source_address_resolver.rs new file mode 100644 index 00000000000..579e0873bb6 --- /dev/null +++ b/graph/src/amp/sql/query_builder/source_address_resolver.rs @@ -0,0 +1,133 @@ +use std::ops::ControlFlow; + +use alloy::primitives::Address; +use anyhow::{bail, Context, Result}; +use sqlparser_latest::ast::{self, visit_expressions_mut}; + +static FUNCTION_NAME: &str = "sg_source_address"; + +/// Replaces `sg_source_address()` function calls in the SQL query with the `source_address`. +/// +/// # Errors +/// +/// Returns an error if the function is called with any arguments. +/// +/// The returned error is deterministic. +pub(super) fn resolve_source_address( + query: &mut ast::Query, + source_address: &Address, +) -> Result<()> { + let visit_result = + visit_expressions_mut(query, |expr| match visit_expr(expr, source_address) { + Ok(()) => ControlFlow::Continue(()), + Err(e) => ControlFlow::Break(e), + }); + + if let ControlFlow::Break(e) = visit_result { + return Err(e).with_context(|| format!("failed to resolve '{FUNCTION_NAME}' calls")); + } + + Ok(()) +} + +fn visit_expr(expr: &mut ast::Expr, source_address: &Address) -> Result<()> { + let ast::Expr::Function(function) = expr else { + return Ok(()); + }; + + if !FUNCTION_NAME.eq_ignore_ascii_case(&function.name.to_string()) { + return Ok(()); + } + + match &function.args { + ast::FunctionArguments::None => {} + ast::FunctionArguments::List(args) if args.args.is_empty() => {} + _ => { + bail!("invalid function call: function '{FUNCTION_NAME}' does not accept arguments"); + } + } + + *function = ast::Function { + name: ast::ObjectName(vec![ast::ObjectNamePart::Identifier(ast::Ident::new( + "arrow_cast", + ))]), + uses_odbc_syntax: false, + parameters: ast::FunctionArguments::None, + args: ast::FunctionArguments::List(ast::FunctionArgumentList { + duplicate_treatment: None, + args: vec![ + ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(ast::Expr::Value( + ast::Value::HexStringLiteral(hex::encode(source_address)).with_empty_span(), + ))), + ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(ast::Expr::Value( + ast::Value::SingleQuotedString("FixedSizeBinary(20)".to_string()) + .with_empty_span(), + ))), + ], + clauses: vec![], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::super::parse_query; + use super::*; + + use self::fixtures::*; + + mod fixtures { + use super::*; + + pub(super) const SOURCE_ADDRESS: Address = Address::ZERO; + + pub(super) const RESOLVED_FUNCTION_CALL: &str = + "arrow_cast(X'0000000000000000000000000000000000000000', 'FixedSizeBinary(20)')"; + } + + macro_rules! test_resolve_source_address { + ($($name:ident: $query:expr => $expected:expr),* $(,)?) => { + $( + #[test] + fn $name() { + let mut query = parse_query($query).unwrap(); + let result = resolve_source_address(&mut query, &SOURCE_ADDRESS); + + match $expected { + Result::<&str, ()>::Ok(expected) => { + result.unwrap(); + assert_eq!(query, parse_query(expected).unwrap()); + }, + Err(_) => { + result.unwrap_err(); + } + } + } + )* + }; + } + + test_resolve_source_address! { + nothing_to_resolve: "SELECT a FROM b" => Ok("SELECT a FROM b"), + call_with_one_argument: "SELECT a FROM b WHERE c = sg_source_address(d)" => Err(()), + call_with_multiple_argument: "SELECT a FROM b WHERE c = sg_source_address(d, e)" => Err(()), + + resolve_one_call: + "SELECT a FROM b WHERE c = sg_source_address()" => + Ok(&*format!("SELECT a FROM b WHERE c = {RESOLVED_FUNCTION_CALL}")), + + resolve_multiple_calls: + "SELECT a FROM b WHERE c = sg_source_address() OR d = sg_source_address()" => + Ok(&*format!("SELECT a FROM b WHERE c = {RESOLVED_FUNCTION_CALL} OR d = {RESOLVED_FUNCTION_CALL}")), + + resolve_calls_with_case_insensitive_function_name: + "SELECT a FROM b WHERE c = sg_Source_ADDRESS()" => + Ok(&*format!("SELECT a FROM b WHERE c = {RESOLVED_FUNCTION_CALL}")), + } +} diff --git a/graph/src/amp/sql/query_builder/table_extractor.rs b/graph/src/amp/sql/query_builder/table_extractor.rs new file mode 100644 index 00000000000..b3cbc9d9d03 --- /dev/null +++ b/graph/src/amp/sql/query_builder/table_extractor.rs @@ -0,0 +1,207 @@ +use std::{collections::BTreeSet, fmt, ops::ControlFlow}; + +use sqlparser_latest::ast::{self, Visit, Visitor}; + +/// Returns all tables that are referenced by the SQL query. +/// +/// The table names are lowercased and quotes are ignored. +pub(super) fn extract_tables(query: &ast::Query) -> BTreeSet { + let mut table_extractor = TableExtractor::new(); + let _: ControlFlow<()> = Visit::visit(query, &mut table_extractor); + + table_extractor.tables +} + +/// Contains a normalized table reference. +/// +/// Used to compare physical table references with CTE names and custom tables. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(super) struct TableReference(ast::ObjectName); + +impl TableReference { + const QUOTE_STYLE: char = '"'; + + /// Creates a new table reference from a custom dataset and table. + pub(super) fn new(dataset: &str, table: &str) -> Self { + Self( + vec![ + ast::Ident::with_quote(Self::QUOTE_STYLE, dataset), + ast::Ident::with_quote(Self::QUOTE_STYLE, table), + ] + .into(), + ) + } + + /// Creates a new table reference from an object name. + pub(super) fn with_object_name(object_name: &ast::ObjectName) -> Self { + Self::with_idents( + object_name + .0 + .iter() + .map(|object_name_part| match object_name_part { + ast::ObjectNamePart::Identifier(ident) => ident, + }), + ) + } + + /// Creates a new table reference from a list of identifiers. + pub(super) fn with_idents<'a>(idents: impl IntoIterator) -> Self { + Self( + idents + .into_iter() + .map(|ident| { + let ast::Ident { + value, + quote_style, + span: _, + } = ident; + + ast::Ident::with_quote(Self::QUOTE_STYLE, { + if quote_style.is_none() { + value.to_lowercase() + } else { + value.to_owned() + } + }) + }) + .collect::>() + .into(), + ) + } +} + +impl fmt::Display for TableReference { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +/// Visits the SQL query AST and extracts referenced table names, ignoring CTEs. +struct TableExtractor { + tables: BTreeSet, + cte_stack: CteStack, +} + +impl TableExtractor { + /// Creates a new empty table extractor. + fn new() -> Self { + Self { + tables: BTreeSet::new(), + cte_stack: CteStack::new(), + } + } + + /// Extracts and stores the table name from the current `table_factor`. + fn visit_table_factor(&mut self, table_factor: &ast::TableFactor) { + let ast::TableFactor::Table { name, .. } = table_factor else { + return; + }; + + let table_reference = TableReference::with_object_name(name); + if self.cte_stack.contains(&table_reference) { + return; + } + + self.tables.insert(table_reference); + } +} + +impl Visitor for TableExtractor { + type Break = (); + + fn pre_visit_query(&mut self, query: &ast::Query) -> ControlFlow { + self.cte_stack.pre_visit_query(query); + ControlFlow::Continue(()) + } + + fn post_visit_query(&mut self, _query: &ast::Query) -> ControlFlow { + self.cte_stack.post_visit_query(); + ControlFlow::Continue(()) + } + + fn pre_visit_table_factor( + &mut self, + table_factor: &ast::TableFactor, + ) -> ControlFlow { + self.visit_table_factor(table_factor); + ControlFlow::Continue(()) + } +} + +/// Maintains a list of active CTEs for each subquery scope. +struct CteStack { + stack: Vec>, +} + +impl CteStack { + /// Creates a new empty CTE stack. + fn new() -> Self { + Self { stack: Vec::new() } + } + + /// Returns `true` if the `table_reference` is present in the CTE list at any scope. + fn contains(&self, table_reference: &TableReference) -> bool { + self.stack + .iter() + .any(|scope| scope.contains(table_reference)) + } + + /// Creates a new subquery scope with all the CTEs of the current `query`. + fn pre_visit_query(&mut self, query: &ast::Query) { + let cte_tables = match &query.with { + Some(with) => with + .cte_tables + .iter() + .map(|cte_table| TableReference::with_idents([&cte_table.alias.name])) + .collect(), + None => BTreeSet::new(), + }; + + self.stack.push(cte_tables); + } + + /// Removes all the CTEs from the most recent subquery scope. + fn post_visit_query(&mut self) { + self.stack.pop(); + } +} + +#[cfg(test)] +mod tests { + use super::super::parse_query; + use super::*; + + macro_rules! test_extract_tables { + ($($name:ident: $input:expr => $expected:expr),* $(,)?) => { + $( + #[test] + fn $name() { + let query = parse_query($input).unwrap(); + assert_eq!( + extract_tables(&query).into_iter().map(|table| table.to_string()).collect::>(), + $expected.into_iter().map(|table| table.to_string()).collect::>() + ); + } + )* + }; + } + + test_extract_tables! { + one_table: "SELECT a FROM b" => [r#""b""#], + multiple_tables_with_one_join: "SELECT a FROM b JOIN c ON c.c = b.b" => [r#""b""#, r#""c""#], + multiple_tables_with_multiple_joins: "SELECT a FROM b JOIN c ON c.c = b.b JOIN d ON d.d = b.b" => [r#""b""#, r#""c""#, r#""d""#], + one_table_with_one_cte: "WITH a AS (SELECT * FROM b) SELECT * FROM a" => [r#""b""#], + one_table_with_multiple_ctes: "WITH a AS (SELECT * FROM b), c AS (SELECT * FROM a) SELECT * FROM c" => [r#""b""#], + multiple_tables_with_multiple_ctes: "WITH a AS (SELECT * FROM b), c AS (SELECT * FROM d) SELECT * FROM a JOIN c ON c.c = a.a" => [r#""b""#, r#""d""#], + multiple_tables_with_nested_ctes: "WITH a AS (WITH b AS (SELECT * FROM c) SELECT * FROM d JOIN b ON b.b = d.d) SELECT * FROM a" => [r#""c""#, r#""d""#], + multiple_tables_with_union: "SELECT a FROM b UNION SELECT c FROM d" => [r#""b""#, r#""d""#], + multiple_tables_with_union_all: "SELECT a FROM b UNION ALL SELECT c FROM d" => [r#""b""#, r#""d""#], + + namespace_is_preserved: "SELECT a FROM b.c" => [r#""b"."c""#], + catalog_is_preserved: "SELECT a FROM b.c.d" => [r#""b"."c"."d""#], + unquoted_tables_are_lowercased: "SELECT a FROM B.C" => [r#""b"."c""#], + single_quotes_in_tables_are_converted_to_double_quotes: "SELECT a FROM 'B'.'C'" => [r#""B"."C""#], + double_quotes_in_tables_are_preserved: r#"SELECT a FROM "B"."C""# => [r#""B"."C""#], + backticks_in_tables_are_converted_to_double_quotes: "SELECT a FROM `B`.`C`" => [r#""B"."C""#], + } +} diff --git a/graph/src/amp/sql/query_builder/table_validator.rs b/graph/src/amp/sql/query_builder/table_validator.rs new file mode 100644 index 00000000000..c3aac82f2d3 --- /dev/null +++ b/graph/src/amp/sql/query_builder/table_validator.rs @@ -0,0 +1,99 @@ +use std::collections::BTreeSet; + +use anyhow::{bail, Result}; +use sqlparser_latest::ast; + +use super::{extract_tables, TableReference}; + +/// Validates that SQL query references only allowed dataset and tables. +/// +/// # Errors +/// +/// Returns an error if: +/// - The `query` does not reference any tables +/// - The `query` references a table not in `allowed_tables` +/// - The `query` references a dataset other than `allowed_dataset` +/// +/// The returned error is deterministic. +pub(super) fn validate_tables<'a>( + query: &ast::Query, + allowed_dataset: &str, + allowed_tables: impl IntoIterator, +) -> Result<()> { + let used_tables = extract_tables(query); + + if used_tables.is_empty() { + bail!("query does not use any tables"); + } + + let allowed_tables = allowed_tables + .into_iter() + .map(|allowed_table| TableReference::new(allowed_dataset, allowed_table)) + .collect::>(); + + for used_table in used_tables { + if !allowed_tables.contains(&used_table) { + bail!("table '{used_table}' not allowed"); + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::super::parse_query; + use super::*; + + macro_rules! test_validate_tables { + ($($name:ident: $input:expr, $dataset:expr, $tables:expr => $expected:expr),* $(,)?) => { + $( + #[test] + fn $name() { + let query = parse_query($input).unwrap(); + let result = validate_tables(&query, $dataset, $tables); + + match $expected { + Result::<(), &str>::Ok(()) => { + result.unwrap(); + }, + Err(e) => { + assert_eq!(result.unwrap_err().to_string(), e); + } + } + } + )* + }; + } + + test_validate_tables! { + no_table_references: "SELECT *", "a", ["b"] => Err("query does not use any tables"), + missing_dataset: "SELECT * FROM b", "a", ["b"] => Err(r#"table '"b"' not allowed"#), + missing_table: "SELECT * FROM a", "a", ["b"] => Err(r#"table '"a"' not allowed"#), + invalid_dataset: "SELECT * FROM c.b", "a", ["b"] => Err(r#"table '"c"."b"' not allowed"#), + invalid_nested_dataset: "WITH a AS (SELECT * FROM c.b) SELECT * FROM a", "a", ["b"] => Err(r#"table '"c"."b"' not allowed"#), + invalid_table: "SELECT * FROM a.c", "a", ["b"] => Err(r#"table '"a"."c"' not allowed"#), + invalid_nested_table: "WITH a AS (SELECT * FROM a.c) SELECT * FROM a", "a", ["b"] => Err(r#"table '"a"."c"' not allowed"#), + using_catalog: "SELECT * FROM c.a.b", "a", ["b"] => Err(r#"table '"c"."a"."b"' not allowed"#), + + one_valid_table: "SELECT * FROM a.b", "a", ["b"] => Ok(()), + one_valid_nested_table: "WITH a AS (SELECT * FROM a.b) SELECT * FROM a", "a", ["b"] => Ok(()), + multiple_valid_tables: "SELECT * FROM a.b JOIN a.c ON a.c.c = a.b.b", "a", ["b", "c"] => Ok(()), + multiple_valid_nested_tables: "WITH a AS (SELECT * FROM a.b JOIN a.c ON a.c.c = a.b.b) SELECT * FROM a", "a", ["b", "c"] => Ok(()), + + unquoted_dataset_is_case_insensitive: "SELECT * FROM A.b", "a", ["b"] => Ok(()), + unquoted_tables_are_case_insensitive: "SELECT * FROM a.B", "a", ["b"] => Ok(()), + + single_quoted_dataset_is_case_sensitive: "SELECT * FROM 'A'.b", "a", ["b"] => Err(r#"table '"A"."b"' not allowed"#), + single_quoted_tables_are_case_sensitive: "SELECT * FROM a.'B'", "a", ["b"] => Err(r#"table '"a"."B"' not allowed"#), + + double_quoted_dataset_is_case_sensitive: r#"SELECT * FROM "A".b"#, "a", ["b"] => Err(r#"table '"A"."b"' not allowed"#), + double_quoted_tables_are_case_sensitive: r#"SELECT * FROM a."B""#, "a", ["b"] => Err(r#"table '"a"."B"' not allowed"#), + + backtick_quoted_dataset_is_case_sensitive: "SELECT * FROM `A`.b", "a", ["b"] => Err(r#"table '"A"."b"' not allowed"#), + backtick_quoted_tables_are_case_sensitive: "SELECT * FROM a.`B`", "a", ["b"] => Err(r#"table '"a"."B"' not allowed"#), + + allowed_dataset_is_case_sensitive: "SELECT * FROM a.b", "A", ["b"] => Err(r#"table '"a"."b"' not allowed"#), + allowed_tables_are_case_sensitive: "SELECT * FROM a.b", "a", ["B"] => Err(r#"table '"a"."b"' not allowed"#), + } +} diff --git a/graph/src/amp/stream_aggregator/error.rs b/graph/src/amp/stream_aggregator/error.rs new file mode 100644 index 00000000000..a2ba55f71e2 --- /dev/null +++ b/graph/src/amp/stream_aggregator/error.rs @@ -0,0 +1,51 @@ +use std::sync::Arc; + +use thiserror::Error; + +use crate::amp::error::IsDeterministic; + +#[derive(Debug, Error)] +pub enum Error { + #[error("failed to aggregate record batches: {0:#}")] + Aggregation(#[source] anyhow::Error), + + #[error("failed to buffer record batches from stream '{stream_name}': {source:#}")] + Buffer { + stream_name: Arc, + source: anyhow::Error, + }, + + #[error("failed to read record batch from stream '{stream_name}': {source:#}")] + Stream { + stream_name: Arc, + source: anyhow::Error, + is_deterministic: bool, + }, +} + +impl Error { + pub(super) fn stream(stream_name: Arc, e: E) -> Self + where + E: std::error::Error + IsDeterministic + Send + Sync + 'static, + { + let is_deterministic = e.is_deterministic(); + + Self::Stream { + stream_name, + source: anyhow::Error::from(e), + is_deterministic, + } + } +} + +impl IsDeterministic for Error { + fn is_deterministic(&self) -> bool { + match self { + Self::Aggregation(_) => true, + Self::Buffer { .. } => true, + Self::Stream { + is_deterministic, .. + } => *is_deterministic, + } + } +} diff --git a/graph/src/amp/stream_aggregator/mod.rs b/graph/src/amp/stream_aggregator/mod.rs new file mode 100644 index 00000000000..e2f0892252f --- /dev/null +++ b/graph/src/amp/stream_aggregator/mod.rs @@ -0,0 +1,231 @@ +mod error; +mod record_batch; + +use std::{ + pin::Pin, + sync::Arc, + task::{self, Poll}, +}; + +use anyhow::{anyhow, Result}; +use arrow::array::RecordBatch; +use futures03::{stream::BoxStream, Stream, StreamExt, TryStreamExt}; +use slog::{debug, info, Logger}; + +use self::record_batch::Buffer; +use crate::{ + amp::{client::ResponseBatch, error::IsDeterministic, log::Logger as _}, + cheap_clone::CheapClone, +}; + +pub use self::{ + error::Error, + record_batch::{RecordBatchGroup, RecordBatchGroups, StreamRecordBatch}, +}; + +/// Reads record batches from multiple streams and groups them by block number and hash pairs. +/// +/// Processes each row in the response record batches and groups them by block number +/// and hash. When processing starts for a new block, all data from previous blocks +/// is grouped and streamed in batches. +/// +/// The reason the aggregation is required is to ensure compatibility with the existing +/// subgraph storage implementation. +/// +/// # Stream requirements +/// +/// - Every record batch must have valid block number and hash columns +/// - Every record batch must contain blocks in ascending order +/// +/// # Performance +/// +/// To ensure data consistency and ordered output, the aggregator waits for slower streams +/// to catch up with faster streams. The output stream speed matches the slowest input stream. +pub struct StreamAggregator { + named_streams: Vec<(Arc, BoxStream<'static, Result>)>, + buffer: Buffer, + logger: Logger, + + /// Indicates whether all streams are fully consumed. + is_finalized: bool, + + /// Indicates whether any stream has produced an error. + /// + /// When `true`, the stream aggregator stops polling all other streams. + is_failed: bool, +} + +impl StreamAggregator { + /// Creates a new stream aggregator from the `streams` with a bounded buffer. + pub fn new( + logger: &Logger, + named_streams: impl IntoIterator>)>, + max_buffer_size: usize, + ) -> Self + where + E: std::error::Error + IsDeterministic + Send + Sync + 'static, + { + let logger = logger.component("AmpStreamAggregator"); + + let named_streams = named_streams + .into_iter() + .map(|(stream_name, stream)| { + let stream_name: Arc = stream_name.into(); + ( + stream_name.cheap_clone(), + stream + .map_err({ + let stream_name = stream_name.cheap_clone(); + move |e| Error::stream(stream_name.cheap_clone(), e) + }) + .try_filter_map({ + let stream_name = stream_name.cheap_clone(); + move |response_batch| { + let stream_name = stream_name.cheap_clone(); + async move { + match response_batch { + ResponseBatch::Batch { data } => Ok(Some(data)), + ResponseBatch::Reorg(_) => Err(Error::Stream { + stream_name: stream_name.cheap_clone(), + source: anyhow!("chain reorg"), + is_deterministic: false, + }), + } + } + } + }) + .boxed(), + ) + }) + .collect::>(); + + let num_streams = named_streams.len(); + + info!(logger, "Initializing stream aggregator"; + "num_streams" => num_streams, + "max_buffer_size" => max_buffer_size + ); + + Self { + named_streams, + buffer: Buffer::new(num_streams, max_buffer_size), + logger, + is_finalized: false, + is_failed: false, + } + } + + fn poll_all_streams( + &mut self, + cx: &mut task::Context<'_>, + ) -> Poll>> { + let mut made_progress = false; + + for (stream_index, (stream_name, stream)) in self.named_streams.iter_mut().enumerate() { + let logger = self.logger.new(slog::o!( + "stream_index" => stream_index, + "stream_name" => stream_name.cheap_clone() + )); + + if self.buffer.is_finalized(stream_index) { + continue; + } + + if self.buffer.is_blocked(stream_index) { + self.is_failed = true; + + return Poll::Ready(Some(Err(Error::Buffer { + stream_name: stream_name.cheap_clone(), + source: anyhow!("buffer is blocked"), + }))); + } + + if !self.buffer.has_capacity(stream_index) { + continue; + } + + match stream.poll_next_unpin(cx) { + Poll::Ready(Some(Ok(record_batch))) if record_batch.num_rows() != 0 => { + let buffer_result = + self.buffer + .extend(stream_index, record_batch) + .map_err(|e| Error::Buffer { + stream_name: stream_name.cheap_clone(), + source: e, + }); + + match buffer_result { + Ok(()) => { + made_progress = true; + + debug!(logger, "Buffered record batch"; + "buffer_size" => self.buffer.size(stream_index), + "has_capacity" => self.buffer.has_capacity(stream_index) + ); + } + Err(e) => { + self.is_failed = true; + + return Poll::Ready(Some(Err(e))); + } + } + } + Poll::Ready(Some(Ok(_empty_record_batch))) => { + debug!(logger, "Received an empty record batch"); + } + Poll::Ready(Some(Err(e))) => { + self.is_failed = true; + + return Poll::Ready(Some(Err(e))); + } + Poll::Ready(None) => { + self.buffer.finalize(stream_index); + + if self.buffer.all_finalized() { + self.is_finalized = true; + } + + made_progress = true; + + info!(logger, "Stream completed"; + "buffer_size" => self.buffer.size(stream_index) + ); + } + Poll::Pending => { + // + } + } + } + + if made_progress { + if let Some(completed_groups) = + self.buffer.completed_groups().map_err(Error::Aggregation)? + { + debug!(self.logger, "Sending completed record batch groups"; + "num_completed_groups" => completed_groups.len() + ); + + return Poll::Ready(Some(Ok(completed_groups))); + } + } + + if self.is_finalized { + info!(self.logger, "All streams completed"); + return Poll::Ready(None); + } + + Poll::Pending + } +} + +impl Stream for StreamAggregator { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll> { + if self.is_finalized || self.is_failed { + return Poll::Ready(None); + } + + self.poll_all_streams(cx) + } +} diff --git a/graph/src/amp/stream_aggregator/record_batch/aggregator.rs b/graph/src/amp/stream_aggregator/record_batch/aggregator.rs new file mode 100644 index 00000000000..f513a2752ed --- /dev/null +++ b/graph/src/amp/stream_aggregator/record_batch/aggregator.rs @@ -0,0 +1,230 @@ +use std::{ + collections::{btree_map::Entry, BTreeMap, HashSet}, + sync::{Arc, Weak}, +}; + +use alloy::primitives::{BlockHash, BlockNumber}; +use anyhow::{bail, Context, Result}; +use arrow::array::RecordBatch; + +use super::{Decoder, GroupData}; +use crate::cheap_clone::CheapClone; + +/// Groups record batches by block number and hash pairs. +/// +/// This aggregator collects and organizes record batches based on their +/// associated block identifiers. +pub(super) struct Aggregator { + buffer: BTreeMap<(BlockNumber, BlockHash), GroupData>, + buffered_record_batches: Vec>, + is_finalized: bool, +} + +impl Aggregator { + /// Creates a new empty aggregator. + pub(super) fn new() -> Self { + Self { + buffer: BTreeMap::new(), + buffered_record_batches: Vec::new(), + is_finalized: false, + } + } + + /// Extends this aggregator with data from a new `record_batch`. + /// + /// Processes each row in the `record_batch` and groups them by block number + /// and hash. Each unique block is stored in the internal buffer with references + /// to all rows that belong to that block. + /// + /// # Errors + /// + /// Returns an error if: + /// - `record_batch` does not contain block numbers or hashes + /// - `record_batch` contains invalid block numbers or hashes + /// - `record_batch` data is not ordered + /// - `record_batch` data is not consistent + /// + /// The returned error is deterministic. + /// + /// # Panics + /// + /// Panics if this aggregator has already been finalized. + pub(super) fn extend(&mut self, record_batch: RecordBatch) -> Result<()> { + assert!(!self.is_finalized); + + let record_batch = Arc::new(record_batch); + let decoder = Decoder::new(&record_batch)?; + + self.buffered_record_batches + .push(Arc::downgrade(&record_batch)); + + let num_rows = record_batch.num_rows(); + let mut record_batch_buffered: HashSet<(BlockNumber, BlockHash)> = + HashSet::with_capacity(num_rows); + + for row_index in 0..num_rows { + let err_cx = || format!("invalid group data at row {row_index}"); + let block_number = decoder.block_number(row_index).with_context(err_cx)?; + let block_hash = decoder.block_hash(row_index).with_context(err_cx)?; + let block_ptr = (block_number, block_hash); + + self.ensure_incremental_update(&block_ptr) + .with_context(err_cx)?; + + match self.buffer.entry(block_ptr) { + Entry::Vacant(entry) => { + entry.insert(GroupData::new(record_batch.cheap_clone(), row_index)); + record_batch_buffered.insert(block_ptr); + } + Entry::Occupied(mut entry) => { + let group_data = entry.get_mut(); + + if !record_batch_buffered.contains(&block_ptr) { + group_data.add(record_batch.cheap_clone(), row_index); + record_batch_buffered.insert(block_ptr); + } else { + group_data.add_row_index(row_index); + } + } + } + } + + Ok(()) + } + + /// Returns the block number and hash pair for the most recent completed group. + /// + /// A group is considered complete when: + /// - There is a group with a higher block number in the internal buffer + /// - This aggregator is finalized + /// + /// Any group in this aggregator with a lower block number than the one returned by + /// this method is also considered complete. + pub(super) fn max_completed_block_ptr(&self) -> Option<&(BlockNumber, BlockHash)> { + let mut iter = self.buffer.keys().rev(); + + if self.is_finalized { + return iter.next(); + } + + iter.skip(1).next() + } + + /// Returns `true` if this aggregator contains completed groups. + /// + /// A group is considered complete when: + /// - There is a group with a higher block number in the internal buffer + /// - This aggregator is finalized + pub(super) fn has_completed_groups(&self) -> bool { + (self.is_finalized && !self.buffer.is_empty()) || self.buffer.len() > 1 + } + + /// Removes and returns completed groups from this aggregator up to `max_block_ptr`. + /// + /// # Errors + /// + /// Returns an error if groups cannot be converted into record batches. + /// + /// The returned error is deterministic. + /// + /// # Panics + /// + /// Panics if `max_block_ptr` is greater than the most recent completed block in this aggregator. + pub(super) fn completed_groups( + &mut self, + max_block_ptr: &(BlockNumber, BlockHash), + ) -> Result>> { + if self.buffer.is_empty() { + return Ok(None); + } + + let Some(max_completed_block_ptr) = self.max_completed_block_ptr() else { + return Ok(None); + }; + + assert!(max_block_ptr <= max_completed_block_ptr); + let incomplete_groups = self.buffer.split_off(max_block_ptr); + let mut completed_groups = std::mem::replace(&mut self.buffer, incomplete_groups); + + if let Some((block_ptr, _)) = self.buffer.first_key_value() { + if block_ptr == max_block_ptr { + let (block_ptr, group_data) = self.buffer.pop_first().unwrap(); + completed_groups.insert(block_ptr, group_data); + } + } + + if completed_groups.is_empty() { + return Ok(None); + } + + let completed_groups = completed_groups + .into_iter() + .map(|(block_ptr, group_data)| Ok((block_ptr, group_data.into_record_batch()?))) + .collect::>>()?; + + self.buffered_record_batches + .retain(|weak_ref| weak_ref.strong_count() > 0); + + Ok(Some(completed_groups)) + } + + /// Marks this aggregator as finalized. + /// + /// A finalized aggregator cannot be extended. + pub(super) fn finalize(&mut self) { + self.is_finalized = true; + } + + /// Returns `true` if this aggregator is finalized. + pub(super) fn is_finalized(&self) -> bool { + self.is_finalized + } + + /// Returns the number of record batches that this aggregator holds strong references to. + pub(super) fn len(&self) -> usize { + self.buffered_record_batches + .iter() + .filter(|weak_ref| weak_ref.strong_count() > 0) + .count() + } + + /// Ensures that block updates arrive in sequential order. + /// + /// Validates that the provided block number and hash represent a valid + /// incremental update relative to the last block in the buffer. + /// + /// # Errors + /// + /// Returns an error if: + /// - The block number is less than the maximum stored block number + /// - The block number equals the maximum but has a different hash + /// + /// The returned error is deterministic. + /// + /// # Note + /// + /// Potential reorgs are not handled at this level and are + /// treated as data corruption. + fn ensure_incremental_update( + &self, + (block_number, block_hash): &(BlockNumber, BlockHash), + ) -> Result<()> { + let Some(((max_block_number, max_block_hash), _)) = self.buffer.last_key_value() else { + return Ok(()); + }; + + if block_number < max_block_number { + bail!("received block number {block_number} after {max_block_number}"); + } + + if block_number == max_block_number && block_hash != max_block_hash { + bail!( + "received block hash '0x{}' after '0x{}' for block number {block_number}", + hex::encode(&block_hash), + hex::encode(&max_block_hash) + ); + } + + Ok(()) + } +} diff --git a/graph/src/amp/stream_aggregator/record_batch/buffer.rs b/graph/src/amp/stream_aggregator/record_batch/buffer.rs new file mode 100644 index 00000000000..4b45680636c --- /dev/null +++ b/graph/src/amp/stream_aggregator/record_batch/buffer.rs @@ -0,0 +1,209 @@ +use std::collections::{btree_map::Entry, BTreeMap}; + +use alloy::primitives::{BlockHash, BlockNumber}; +use anyhow::{bail, Result}; +use arrow::array::RecordBatch; + +use super::{Aggregator, RecordBatchGroup, RecordBatchGroups, StreamRecordBatch}; + +/// Buffers record batches from multiple streams in memory and creates +/// groups of record batches by block number and hash pairs. +pub(in super::super) struct Buffer { + aggregators: Vec, + num_streams: usize, + max_buffer_size: usize, +} + +impl Buffer { + /// Creates a new buffer that can handle exactly `num_streams` number of streams. + /// + /// Creates a new associated `Aggregator` for each stream. + /// The `max_buffer_size` specifies how many record batches for each stream can be buffered at most. + pub(in super::super) fn new(num_streams: usize, max_buffer_size: usize) -> Self { + let aggregators = (0..num_streams).map(|_| Aggregator::new()).collect(); + + Self { + aggregators, + num_streams, + max_buffer_size, + } + } + + /// Extends the aggregator for `stream_index` with data from a new `record_batch`. + /// + /// # Errors + /// + /// Errors if the aggregator cannot be extended. + /// + /// The returned error is deterministic. + /// + /// # Panics + /// + /// Panics if the `stream_index` is greater than the initialized number of streams. + pub(in super::super) fn extend( + &mut self, + stream_index: usize, + record_batch: RecordBatch, + ) -> Result<()> { + assert!(stream_index < self.num_streams); + self.aggregators[stream_index].extend(record_batch) + } + + /// Removes and returns all completed groups from this buffer. + /// + /// # Errors + /// + /// Errors if aggregators fail to return completed groups. + /// + /// The returned error is deterministic. + /// + /// # Panics + /// + /// Panics if aggregators return inconsistent responses. + pub(in super::super) fn completed_groups(&mut self) -> Result> { + let Some(max_completed_block_ptr) = self.max_completed_block_ptr()? else { + return Ok(None); + }; + + let mut ordered_completed_groups = BTreeMap::new(); + + for (stream_index, agg) in self.aggregators.iter_mut().enumerate() { + let Some(completed_groups) = agg.completed_groups(&max_completed_block_ptr)? else { + continue; + }; + + for (block_ptr, record_batch) in completed_groups { + match ordered_completed_groups.entry(block_ptr) { + Entry::Vacant(entry) => { + entry.insert(RecordBatchGroup { + record_batches: vec![StreamRecordBatch { + stream_index, + record_batch, + }], + }); + } + Entry::Occupied(mut entry) => { + entry.get_mut().record_batches.push(StreamRecordBatch { + stream_index, + record_batch, + }); + } + } + } + } + + assert!(!ordered_completed_groups.is_empty()); + Ok(Some(ordered_completed_groups)) + } + + /// Marks the aggregator for the `stream_index` as finalized. + /// + /// A finalized aggregator cannot be extended. + /// + /// # Panics + /// + /// Panics if the `stream_index` is greater than the initialized number of streams. + pub(in super::super) fn finalize(&mut self, stream_index: usize) { + assert!(stream_index < self.num_streams); + self.aggregators[stream_index].finalize(); + } + + /// Returns `true` if the aggregator for `stream_index` is finalized. + /// + /// # Panics + /// + /// Panics if the `stream_index` is greater than the initialized number of streams. + pub(in super::super) fn is_finalized(&self, stream_index: usize) -> bool { + assert!(stream_index < self.num_streams); + self.aggregators[stream_index].is_finalized() + } + + /// Returns `true` if all aggregators are finalized. + pub(in super::super) fn all_finalized(&self) -> bool { + self.aggregators.iter().all(|agg| agg.is_finalized()) + } + + /// Returns `true` if the aggregator for `stream_index` can be extended. + /// + /// # Panics + /// + /// Panics if the `stream_index` is greater than the initialized number of streams. + pub(in super::super) fn has_capacity(&self, stream_index: usize) -> bool { + assert!(stream_index < self.num_streams); + self.aggregators[stream_index].len() < self.max_buffer_size + } + + /// Returns `true` if the stream `stream_index` is not allowed to make progress and + /// its aggregator does not contain any completed groups. + /// + /// # Panics + /// + /// Panics if the `stream_index` is greater than the initialized number of streams. + pub(in super::super) fn is_blocked(&self, stream_index: usize) -> bool { + !self.has_capacity(stream_index) + && !self.is_finalized(stream_index) + && !self.aggregators[stream_index].has_completed_groups() + } + + /// Returns the number of record batches stream `stream_index` has buffered. + /// + /// # Panics + /// + /// Panics if the `stream_index` is greater than the initialized number of streams. + pub(in super::super) fn size(&self, stream_index: usize) -> usize { + assert!(stream_index < self.num_streams); + self.aggregators[stream_index].len() + } + + /// Returns the block number and hash pair for the most recent completed group across all streams. + /// + /// Finds the highest block number that all streams have completed. This ensures + /// slower streams can still produce valid completed groups without skipping any groups. + /// The function returns the minimum of all maximum completed blocks to maintain consistency. + /// + /// # Errors + /// + /// Returns an error if multiple streams return the same block number but different hashes. + /// + /// The returned error is deterministic. + /// + /// # Note + /// + /// Potential reorgs are not handled at this level and are treated as data corruption. + fn max_completed_block_ptr(&self) -> Result> { + let mut max_completed_block_ptrs: BTreeMap<&BlockNumber, &BlockHash> = BTreeMap::new(); + + for (stream_index, agg) in self.aggregators.iter().enumerate() { + let Some((max_completed_block_number, max_completed_block_hash)) = + agg.max_completed_block_ptr() + else { + if !agg.is_finalized() { + return Ok(None); + } + + continue; + }; + + match max_completed_block_ptrs.entry(max_completed_block_number) { + Entry::Vacant(entry) => { + entry.insert(max_completed_block_hash); + } + Entry::Occupied(entry) => { + if *entry.get() != max_completed_block_hash { + bail!("aggregated data is corrupted: stream {} produced block hash '0x{}' for block {}, but a previous stream set the block hash to '0x{}'", + stream_index, + hex::encode(max_completed_block_hash), + max_completed_block_number, + hex::encode(entry.get()), + ); + } + } + }; + } + + Ok(max_completed_block_ptrs + .into_iter() + .next() + .map(|(block_number, block_hash)| (*block_number, *block_hash))) + } +} diff --git a/graph/src/amp/stream_aggregator/record_batch/decoder.rs b/graph/src/amp/stream_aggregator/record_batch/decoder.rs new file mode 100644 index 00000000000..a2c5cf92daf --- /dev/null +++ b/graph/src/amp/stream_aggregator/record_batch/decoder.rs @@ -0,0 +1,62 @@ +use alloy::primitives::{BlockHash, BlockNumber}; +use anyhow::{anyhow, Result}; +use arrow::array::RecordBatch; + +use crate::amp::codec::{ + self, + utils::{auto_block_hash_decoder, auto_block_number_decoder}, +}; + +/// Decodes the data required for stream aggregation. +pub(super) struct Decoder<'a> { + /// Block numbers serve as group keys for related record batches. + block_number: Box> + 'a>, + + /// Block hashes ensure data consistency across tables and datasets. + block_hash: Box> + 'a>, +} + +impl<'a> Decoder<'a> { + /// Constructs a new decoder for `record_batch`. + /// + /// # Errors + /// + /// Returns an error if: + /// - `record_batch` does not contain valid block number or hash columns + /// + /// The returned error is deterministic. + pub(super) fn new(record_batch: &'a RecordBatch) -> Result { + Ok(Self { + block_number: auto_block_number_decoder(record_batch)?.1, + block_hash: auto_block_hash_decoder(record_batch)?.1, + }) + } + + /// Returns the block number at `row_index`. + /// + /// # Errors + /// + /// Returns an error if: + /// - The block number at `row_index` is null + /// + /// The returned error is deterministic. + pub(super) fn block_number(&self, row_index: usize) -> Result { + self.block_number + .decode(row_index)? + .ok_or_else(|| anyhow!("block number is empty")) + } + + /// Returns the block hash at `row_index`. + /// + /// # Errors + /// + /// Returns an error if: + /// - The block hash at `row_index` is null or invalid + /// + /// The returned error is deterministic. + pub(super) fn block_hash(&self, row_index: usize) -> Result { + self.block_hash + .decode(row_index)? + .ok_or_else(|| anyhow!("block hash is empty")) + } +} diff --git a/graph/src/amp/stream_aggregator/record_batch/group_data.rs b/graph/src/amp/stream_aggregator/record_batch/group_data.rs new file mode 100644 index 00000000000..32d3317c585 --- /dev/null +++ b/graph/src/amp/stream_aggregator/record_batch/group_data.rs @@ -0,0 +1,88 @@ +use std::sync::Arc; + +use anyhow::{Context, Result}; +use arrow::{ + array::{RecordBatch, UInt64Array}, + compute::{concat_batches, take_record_batch}, +}; + +/// Contains references to all record batches and rows of a group. +pub(super) struct GroupData { + parts: Vec, +} + +struct Part { + record_batch: Arc, + row_indices: Vec, +} + +impl GroupData { + /// Creates a new group with an initial `record_batch` and `row_index`. + pub(super) fn new(record_batch: Arc, row_index: usize) -> Self { + Self { + parts: vec![Part { + record_batch, + row_indices: vec![row_index as u64], + }], + } + } + + /// Adds a new `record_batch` and `row_index` to this group. + pub(super) fn add(&mut self, record_batch: Arc, row_index: usize) { + self.parts.push(Part { + record_batch, + row_indices: vec![row_index as u64], + }) + } + + /// Adds a `row_index` to the most recent record batch in this group. + /// + /// # Panics + /// + /// Panics if this group is empty. + pub(super) fn add_row_index(&mut self, row_index: usize) { + assert!(!self.parts.is_empty()); + + self.parts + .last_mut() + .unwrap() + .row_indices + .push(row_index as u64); + } + + /// Converts this group into a single record batch. + /// + /// Merges all group rows from all record batches together. + /// + /// # Errors + /// + /// Returns an error if the record batches in this group have incompatible types. + /// + /// The returned error is deterministic. + /// + /// # Panics + /// + /// Panics if: + /// - This group is empty + /// - This group contains invalid row indices + pub(super) fn into_record_batch(self) -> Result { + assert!(!self.parts.is_empty()); + + let schema = self.parts[0].record_batch.schema(); + let mut partial_record_batches = Vec::with_capacity(self.parts.len()); + + for part in self.parts { + let Part { + record_batch, + row_indices, + } = part; + + let row_indices = UInt64Array::from(row_indices); + let partial_record_batch = take_record_batch(&record_batch, &row_indices).unwrap(); + + partial_record_batches.push(partial_record_batch); + } + + concat_batches(&schema, &partial_record_batches).context("failed to merge record batches") + } +} diff --git a/graph/src/amp/stream_aggregator/record_batch/mod.rs b/graph/src/amp/stream_aggregator/record_batch/mod.rs new file mode 100644 index 00000000000..171f360f5fa --- /dev/null +++ b/graph/src/amp/stream_aggregator/record_batch/mod.rs @@ -0,0 +1,38 @@ +//! This module handles grouping record batches from multiple streams. +//! +//! # Safety +//! +//! The implementation occasionally uses `assert` and `unwrap` to ensure consistency +//! between related types and methods. +//! +//! This is safe because the functionality is internal and not exposed to other modules. +//! +//! A panic indicates a critical error in the grouping algorithm. + +mod aggregator; +mod buffer; +mod decoder; +mod group_data; + +use std::collections::BTreeMap; + +use alloy::primitives::{BlockHash, BlockNumber}; +use arrow::array::RecordBatch; + +use self::{aggregator::Aggregator, decoder::Decoder, group_data::GroupData}; + +pub(super) use buffer::Buffer; + +/// Maps block number and hash pairs to record batches. +pub type RecordBatchGroups = BTreeMap<(BlockNumber, BlockHash), RecordBatchGroup>; + +/// Contains record batches associated with a specific block number and hash pair. +pub struct RecordBatchGroup { + pub record_batches: Vec, +} + +/// Contains a record batch and the index of its source stream. +pub struct StreamRecordBatch { + pub stream_index: usize, + pub record_batch: RecordBatch, +} diff --git a/graph/src/blockchain/mod.rs b/graph/src/blockchain/mod.rs index 00a9ac33e4e..4b3f9e89124 100644 --- a/graph/src/blockchain/mod.rs +++ b/graph/src/blockchain/mod.rs @@ -585,6 +585,7 @@ impl FromStr for BlockchainKind { "near" => Ok(BlockchainKind::Near), "substreams" => Ok(BlockchainKind::Substreams), "subgraph" => Ok(BlockchainKind::Ethereum), // TODO(krishna): We should detect the blockchain kind from the source subgraph + "amp" => Ok(BlockchainKind::Ethereum), // TODO: Maybe get this from the Amp server _ => Err(anyhow!("unknown blockchain kind {}", s)), } } diff --git a/graph/src/blockchain/types.rs b/graph/src/blockchain/types.rs index f3e2642e840..659a7cee9f9 100644 --- a/graph/src/blockchain/types.rs +++ b/graph/src/blockchain/types.rs @@ -564,6 +564,12 @@ impl Default for BlockTime { } } +impl From for BlockTime { + fn from(timestamp: Timestamp) -> Self { + Self(timestamp) + } +} + impl TryFrom for U256 { type Error = anyhow::Error; diff --git a/graph/src/cheap_clone.rs b/graph/src/cheap_clone.rs index b8863d3918e..fc9c98ab7d1 100644 --- a/graph/src/cheap_clone.rs +++ b/graph/src/cheap_clone.rs @@ -106,6 +106,7 @@ cheap_clone_is_clone!(Channel); // reqwest::Client uses Arc internally, so it is CheapClone. cheap_clone_is_clone!(reqwest::Client); cheap_clone_is_clone!(slog::Logger); +cheap_clone_is_clone!(semver::Version); cheap_clone_is_copy!( (), @@ -119,3 +120,5 @@ cheap_clone_is_copy!( std::time::Duration ); cheap_clone_is_copy!(ethabi::Address); + +cheap_clone_is_clone!(tokio_util::sync::CancellationToken); diff --git a/graph/src/components/metrics/registry.rs b/graph/src/components/metrics/registry.rs index 93cf51b3bd1..b41f27bc785 100644 --- a/graph/src/components/metrics/registry.rs +++ b/graph/src/components/metrics/registry.rs @@ -1,8 +1,8 @@ use std::collections::HashMap; use std::sync::{Arc, RwLock}; -use prometheus::IntGauge; use prometheus::{labels, Histogram, IntCounterVec}; +use prometheus::{IntCounter, IntGauge}; use slog::debug; use crate::components::metrics::{counter_with_labels, gauge_with_labels}; @@ -349,6 +349,23 @@ impl MetricsRegistry { Ok(counter) } + pub fn new_int_counter( + &self, + name: impl AsRef, + help: impl AsRef, + const_labels: impl IntoIterator, + ) -> Result { + let opts = Opts::new(name.as_ref(), help.as_ref()).const_labels( + const_labels + .into_iter() + .map(|(key, value)| (key.to_string(), value.to_string())) + .collect(), + ); + let int_counter = IntCounter::with_opts(opts)?; + self.register(name.as_ref(), Box::new(int_counter.clone())); + Ok(int_counter) + } + pub fn new_counter_with_labels( &self, name: &str, @@ -500,12 +517,12 @@ impl MetricsRegistry { &self, name: impl AsRef, help: impl AsRef, - const_labels: impl IntoIterator, impl Into)>, + const_labels: impl IntoIterator, ) -> Result { let opts = Opts::new(name.as_ref(), help.as_ref()).const_labels( const_labels .into_iter() - .map(|(a, b)| (a.into(), b.into())) + .map(|(key, value)| (key.to_string(), value.to_string())) .collect(), ); let gauge = IntGauge::with_opts(opts)?; diff --git a/graph/src/components/store/err.rs b/graph/src/components/store/err.rs index 446b73408f1..d59a835d57b 100644 --- a/graph/src/components/store/err.rs +++ b/graph/src/components/store/err.rs @@ -247,3 +247,9 @@ impl From for StoreError { StoreError::Unknown(anyhow!("{}", e.to_string())) } } + +impl crate::amp::error::IsDeterministic for StoreError { + fn is_deterministic(&self) -> bool { + StoreError::is_deterministic(self) + } +} diff --git a/graph/src/components/store/write.rs b/graph/src/components/store/write.rs index 76c71ce5e39..c033373042e 100644 --- a/graph/src/components/store/write.rs +++ b/graph/src/components/store/write.rs @@ -145,7 +145,7 @@ impl EntityModification { /// Return the details of the write if `self` is a write operation for a /// new or an existing entity - fn as_write(&self) -> Option { + fn as_write(&self) -> Option> { EntityWrite::try_from(self).ok() } @@ -823,7 +823,7 @@ impl Batch { &self, entity_type: &EntityType, at: BlockNumber, - ) -> impl Iterator { + ) -> impl Iterator> { self.mods .group(entity_type) .map(|group| group.effective_ops(at)) diff --git a/graph/src/components/subgraph/instance_manager.rs b/graph/src/components/subgraph/instance_manager.rs index c04fd5237b4..3887a58fc7b 100644 --- a/graph/src/components/subgraph/instance_manager.rs +++ b/graph/src/components/subgraph/instance_manager.rs @@ -10,6 +10,13 @@ use crate::components::store::DeploymentLocator; /// subgraph instance manager stops and removes the corresponding instance. #[async_trait::async_trait] pub trait SubgraphInstanceManager: Send + Sync + 'static { + /// Returns `true` if this manager has the necessary capabilities to manage the subgraph. + fn can_manage( + &self, + deployment: &DeploymentLocator, + raw_manifest: &serde_yaml::Mapping, + ) -> bool; + async fn start_subgraph( self: Arc, deployment: DeploymentLocator, diff --git a/graph/src/components/subgraph/proof_of_indexing/reference.rs b/graph/src/components/subgraph/proof_of_indexing/reference.rs index 5c7d269d7a7..3a11a4db4e3 100644 --- a/graph/src/components/subgraph/proof_of_indexing/reference.rs +++ b/graph/src/components/subgraph/proof_of_indexing/reference.rs @@ -9,6 +9,7 @@ use web3::types::{Address, H256}; /// well-implemented (without conflicting sequence numbers, or other oddities). /// It's just way easier to check that this works, and serves as a kind of /// documentation as a side-benefit. +#[allow(dead_code)] pub struct PoI<'a> { pub causality_regions: HashMap>, pub subgraph_id: DeploymentHash, @@ -16,10 +17,12 @@ pub struct PoI<'a> { pub indexer: Option
, } +#[allow(dead_code)] fn h256_as_bytes(val: &H256) -> AsBytes<&[u8]> { AsBytes(val.as_bytes()) } +#[allow(dead_code)] fn indexer_opt_as_bytes(val: &Option
) -> Option> { val.as_ref().map(|v| AsBytes(v.as_bytes())) } diff --git a/graph/src/data/store/scalar/bigdecimal.rs b/graph/src/data/store/scalar/bigdecimal.rs index b8b62f573fb..65738563a67 100644 --- a/graph/src/data/store/scalar/bigdecimal.rs +++ b/graph/src/data/store/scalar/bigdecimal.rs @@ -138,12 +138,24 @@ impl From for BigDecimal { } } +impl From for BigDecimal { + fn from(n: i128) -> Self { + Self::from(OldBigDecimal::new(BigInt::from(n).inner(), 0)) + } +} + impl From for BigDecimal { fn from(n: u64) -> Self { Self::from(OldBigDecimal::from(n)) } } +impl From for BigDecimal { + fn from(n: f32) -> Self { + Self::from(OldBigDecimal::from_f32(n).unwrap_or_default()) + } +} + impl From for BigDecimal { fn from(n: f64) -> Self { Self::from(OldBigDecimal::from_f64(n).unwrap_or_default()) diff --git a/graph/src/data/store/scalar/bigint.rs b/graph/src/data/store/scalar/bigint.rs index c344ec83a6d..554aac83d6b 100644 --- a/graph/src/data/store/scalar/bigint.rs +++ b/graph/src/data/store/scalar/bigint.rs @@ -224,14 +224,20 @@ impl BigInt { } } -impl From for BigInt { - fn from(i: i32) -> BigInt { +impl From for BigInt { + fn from(i: i8) -> BigInt { BigInt::unchecked_new(i.into()) } } -impl From for BigInt { - fn from(i: u64) -> BigInt { +impl From for BigInt { + fn from(i: i16) -> BigInt { + BigInt::unchecked_new(i.into()) + } +} + +impl From for BigInt { + fn from(i: i32) -> BigInt { BigInt::unchecked_new(i.into()) } } @@ -242,6 +248,36 @@ impl From for BigInt { } } +impl From for BigInt { + fn from(i: i128) -> BigInt { + BigInt::unchecked_new(i.into()) + } +} + +impl From for BigInt { + fn from(i: u8) -> BigInt { + BigInt::unchecked_new(i.into()) + } +} + +impl From for BigInt { + fn from(i: u16) -> BigInt { + BigInt::unchecked_new(i.into()) + } +} + +impl From for BigInt { + fn from(i: u32) -> BigInt { + BigInt::unchecked_new(i.into()) + } +} + +impl From for BigInt { + fn from(i: u64) -> BigInt { + BigInt::unchecked_new(i.into()) + } +} + impl From for BigInt { /// This implementation assumes that U64 represents an unsigned U64, /// and not a signed U64 (aka int64 in Solidity). Right now, this is diff --git a/graph/src/data/store/scalar/timestamp.rs b/graph/src/data/store/scalar/timestamp.rs index 02769d4adf8..58b2ef10cb8 100644 --- a/graph/src/data/store/scalar/timestamp.rs +++ b/graph/src/data/store/scalar/timestamp.rs @@ -90,6 +90,12 @@ impl stable_hash_legacy::StableHash for Timestamp { } } +impl From> for Timestamp { + fn from(value: DateTime) -> Self { + Self(value) + } +} + impl Display for Timestamp { fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { write!(f, "{}", self.as_microseconds_since_epoch()) diff --git a/graph/src/data/subgraph/api_version.rs b/graph/src/data/subgraph/api_version.rs index fbda95b2792..ba89460c7dd 100644 --- a/graph/src/data/subgraph/api_version.rs +++ b/graph/src/data/subgraph/api_version.rs @@ -60,8 +60,11 @@ pub const SPEC_VERSION_1_2_0: Version = Version::new(1, 2, 0); // represents the write order across all entity types in the subgraph. pub const SPEC_VERSION_1_3_0: Version = Version::new(1, 3, 0); +// Enables support for Amp data sources; +pub const SPEC_VERSION_1_4_0: Version = Version::new(1, 4, 0); + // The latest spec version available -pub const LATEST_VERSION: &Version = &SPEC_VERSION_1_3_0; +pub const LATEST_VERSION: &Version = &SPEC_VERSION_1_4_0; pub const MIN_SPEC_VERSION: Version = Version::new(0, 0, 2); diff --git a/graph/src/data/subgraph/mod.rs b/graph/src/data/subgraph/mod.rs index 77c8ba67d36..ae62cc31a42 100644 --- a/graph/src/data/subgraph/mod.rs +++ b/graph/src/data/subgraph/mod.rs @@ -12,7 +12,7 @@ pub use features::{SubgraphFeature, SubgraphFeatureValidationError}; use crate::{cheap_clone::CheapClone, components::store::BLOCK_NUMBER_MAX, object}; use anyhow::{anyhow, Context, Error}; -use futures03::{future::try_join, stream::FuturesOrdered, TryStreamExt as _}; +use futures03::future::try_join_all; use itertools::Itertools; use semver::Version; use serde::{ @@ -32,7 +32,7 @@ use wasmparser; use web3::types::Address; use crate::{ - bail, + amp, bail, blockchain::{BlockPtr, Blockchain}, components::{ link_resolver::LinkResolver, @@ -360,6 +360,8 @@ pub enum SubgraphManifestValidationError { FeatureValidationError(#[from] SubgraphFeatureValidationError), #[error("data source {0} is invalid: {1}")] DataSourceValidation(String, Error), + #[error("failed to validate Amp subgraph: {0:#}")] + Amp(#[source] Error), } #[derive(Error, Debug)] @@ -659,7 +661,7 @@ impl<'de> de::Deserialize<'de> for Prune { /// SubgraphManifest with IPFS links unresolved pub type UnresolvedSubgraphManifest = BaseSubgraphManifest< C, - UnresolvedSchema, + Option, UnresolvedDataSource, UnresolvedDataSourceTemplate, >; @@ -742,15 +744,24 @@ impl UnvalidatedSubgraphManifest { /// Entry point for resolving a subgraph definition. /// Right now the only supported links are of the form: /// `/ipfs/QmUmg7BZC1YP1ca66rRtWKxpXp77WgVHrnv263JtDuvs2k` - pub async fn resolve( + pub async fn resolve( id: DeploymentHash, raw: serde_yaml::Mapping, resolver: &Arc, + amp_client: Option>, logger: &Logger, max_spec_version: semver::Version, ) -> Result { Ok(Self( - SubgraphManifest::resolve_from_raw(id, raw, resolver, logger, max_spec_version).await?, + SubgraphManifest::resolve_from_raw( + id, + raw, + resolver, + amp_client, + logger, + max_spec_version, + ) + .await?, )) } @@ -815,6 +826,8 @@ impl UnvalidatedSubgraphManifest { &self.0.spec_version, )); + errors.append(&mut Self::validate_amp_subgraph(&self.0)); + match errors.is_empty() { true => Ok(self.0), false => Err(errors), @@ -824,20 +837,83 @@ impl UnvalidatedSubgraphManifest { pub fn spec_version(&self) -> &Version { &self.0.spec_version } + + fn validate_amp_subgraph( + manifest: &SubgraphManifest, + ) -> Vec { + use api_version::SPEC_VERSION_1_4_0; + + let BaseSubgraphManifest { + id: _, + spec_version, + features, + description: _, + repository: _, + schema: _, + data_sources, + graft, + templates, + chain: _, + indexer_hints: _, + } = manifest; + + let amp_data_sources = data_sources + .iter() + .filter_map(|data_source| match data_source { + DataSource::Amp(amp_data_source) => Some(amp_data_source), + _ => None, + }) + .collect_vec(); + + if amp_data_sources.is_empty() { + // Not an Amp subgraph + return Vec::new(); + } + + let mut errors = Vec::new(); + let err = |msg: &str| SubgraphManifestValidationError::Amp(anyhow!(msg.to_owned())); + + if data_sources.len() != amp_data_sources.len() { + errors.push(err("multiple data source kinds are not supported")); + } + + if amp_data_sources.len() > 10 { + errors.push(err("too many data sources")); + } + + if *spec_version < SPEC_VERSION_1_4_0 { + errors.push(err("spec version is not supported")); + } + + if !features.is_empty() { + errors.push(err("manifest features are not supported")); + } + + if graft.is_some() { + errors.push(err("grafting is not supported")); + } + + if !templates.is_empty() { + errors.push(err("data source templates are not supported")); + } + + errors + } } impl SubgraphManifest { /// Entry point for resolving a subgraph definition. - pub async fn resolve_from_raw( + pub async fn resolve_from_raw( id: DeploymentHash, raw: serde_yaml::Mapping, resolver: &Arc, + amp_client: Option>, logger: &Logger, max_spec_version: semver::Version, ) -> Result { let unresolved = UnresolvedSubgraphManifest::parse(id, raw)?; let resolved = unresolved - .resolve(resolver, logger, max_spec_version) + .resolve(resolver, amp_client, logger, max_spec_version) .await?; Ok(resolved) } @@ -973,9 +1049,10 @@ impl UnresolvedSubgraphManifest { serde_yaml::from_value(raw.into()).map_err(Into::into) } - pub async fn resolve( + pub async fn resolve( self, resolver: &Arc, + amp_client: Option>, logger: &Logger, max_spec_version: semver::Version, ) -> Result, SubgraphManifestResolveError> { @@ -985,7 +1062,7 @@ impl UnresolvedSubgraphManifest { features, description, repository, - schema, + schema: unresolved_schema, data_sources, graft, templates, @@ -1003,37 +1080,63 @@ impl UnresolvedSubgraphManifest { ).into()); } - let ds_count = data_sources.len(); - if ds_count as u64 + templates.len() as u64 > u32::MAX as u64 { + if data_sources.len() + templates.len() > u32::MAX as usize { return Err( - anyhow!("Subgraph has too many declared data sources and templates",).into(), + anyhow!("subgraph has too many declared data sources and templates").into(), ); } - let schema = schema - .resolve(&spec_version, id.clone(), resolver, logger) + let data_sources = + try_join_all(data_sources.into_iter().enumerate().map(|(idx, ds)| { + ds.resolve(resolver, amp_client.cheap_clone(), logger, idx as u32) + })) .await?; - let (data_sources, templates) = try_join( - data_sources - .into_iter() - .enumerate() - .map(|(idx, ds)| ds.resolve(resolver, logger, idx as u32)) - .collect::>() - .try_collect::>(), - templates - .into_iter() - .enumerate() - .map(|(idx, template)| { - template.resolve(resolver, &schema, logger, ds_count as u32 + idx as u32) - }) - .collect::>() - .try_collect::>(), - ) + let amp_data_sources = data_sources + .iter() + .filter_map(|data_source| match data_source { + DataSource::Amp(amp_data_source) => Some(amp_data_source), + _ => None, + }) + .collect_vec(); + + let schema = match unresolved_schema { + Some(unresolved_schema) => { + unresolved_schema + .resolve(&spec_version, id.cheap_clone(), resolver, logger) + .await? + } + None if amp_data_sources.len() == data_sources.len() => { + let table_schemas = amp_data_sources + .iter() + .map(|data_source| { + data_source + .transformer + .tables + .iter() + .map(|table| (table.name.clone(), table.schema.clone())) + }) + .flatten(); + + amp::schema::generate_subgraph_schema(&id, table_schemas)? + } + None => { + return Err(anyhow!("subgraph schema is required").into()); + } + }; + + let templates = try_join_all(templates.into_iter().enumerate().map(|(idx, template)| { + template.resolve( + resolver, + &schema, + logger, + data_sources.len() as u32 + idx as u32, + ) + })) .await?; let is_substreams = data_sources.iter().any(|ds| ds.kind() == SUBSTREAMS_KIND); - if is_substreams && ds_count > 1 { + if is_substreams && data_sources.len() > 1 { return Err(anyhow!( "A Substreams-based subgraph can only contain a single data source." ) @@ -1104,7 +1207,7 @@ impl UnresolvedSubgraphManifest { ); } - Ok(SubgraphManifest { + let manifest = SubgraphManifest { id, spec_version, features, @@ -1116,7 +1219,16 @@ impl UnresolvedSubgraphManifest { templates, chain, indexer_hints, - }) + }; + + if let Some(e) = UnvalidatedSubgraphManifest::::validate_amp_subgraph(&manifest) + .into_iter() + .next() + { + return Err(anyhow::Error::from(e).into()); + } + + Ok(manifest) } } diff --git a/graph/src/data_source/mod.rs b/graph/src/data_source/mod.rs index 4c56e99ea9b..a3bad417e5f 100644 --- a/graph/src/data_source/mod.rs +++ b/graph/src/data_source/mod.rs @@ -23,7 +23,7 @@ use crate::{ prelude::{CheapClone as _, DataSourceContext}, schema::{EntityType, InputSchema}, }; -use anyhow::Error; +use anyhow::{anyhow, Context, Error}; use semver::Version; use serde::{de::IntoDeserializer as _, Deserialize, Deserializer}; use slog::{Logger, SendSyncRefUnwindSafeKV}; @@ -34,11 +34,14 @@ use std::{ }; use thiserror::Error; +use crate::amp; + #[derive(Debug)] pub enum DataSource { Onchain(C::DataSource), Offchain(offchain::DataSource), Subgraph(subgraph::DataSource), + Amp(amp::manifest::DataSource), } #[derive(Error, Debug)] @@ -94,6 +97,7 @@ impl DataSource { Self::Onchain(ds) => Some(ds), Self::Offchain(_) => None, Self::Subgraph(_) => None, + Self::Amp(_) => None, } } @@ -102,6 +106,7 @@ impl DataSource { Self::Onchain(_) => None, Self::Offchain(_) => None, Self::Subgraph(ds) => Some(ds), + Self::Amp(_) => None, } } @@ -110,6 +115,7 @@ impl DataSource { Self::Onchain(_) => true, Self::Offchain(_) => false, Self::Subgraph(_) => true, + Self::Amp(_) => true, } } @@ -118,6 +124,7 @@ impl DataSource { Self::Onchain(_) => None, Self::Offchain(ds) => Some(ds), Self::Subgraph(_) => None, + Self::Amp(_) => None, } } @@ -126,6 +133,7 @@ impl DataSource { DataSourceEnum::Onchain(ds) => ds.network(), DataSourceEnum::Offchain(_) => None, DataSourceEnum::Subgraph(ds) => ds.network(), + Self::Amp(ds) => Some(&ds.network), } } @@ -134,6 +142,7 @@ impl DataSource { DataSourceEnum::Onchain(ds) => Some(ds.start_block()), DataSourceEnum::Offchain(_) => None, DataSourceEnum::Subgraph(ds) => Some(ds.source.start_block), + Self::Amp(ds) => Some(ds.source.start_block as i32), } } @@ -150,6 +159,7 @@ impl DataSource { Self::Onchain(ds) => ds.address().map(ToOwned::to_owned), Self::Offchain(ds) => ds.address(), Self::Subgraph(ds) => ds.address(), + Self::Amp(ds) => Some(ds.source.address.to_vec()), } } @@ -158,6 +168,7 @@ impl DataSource { Self::Onchain(ds) => ds.name(), Self::Offchain(ds) => &ds.name, Self::Subgraph(ds) => &ds.name, + Self::Amp(ds) => ds.name.as_str(), } } @@ -166,6 +177,7 @@ impl DataSource { Self::Onchain(ds) => ds.kind().to_owned(), Self::Offchain(ds) => ds.kind.to_string(), Self::Subgraph(ds) => ds.kind.clone(), + Self::Amp(_) => amp::manifest::DataSource::KIND.to_string(), } } @@ -174,6 +186,7 @@ impl DataSource { Self::Onchain(ds) => ds.min_spec_version(), Self::Offchain(ds) => ds.min_spec_version(), Self::Subgraph(ds) => ds.min_spec_version(), + Self::Amp(_) => amp::manifest::DataSource::MIN_SPEC_VERSION, } } @@ -182,6 +195,7 @@ impl DataSource { Self::Onchain(ds) => ds.end_block(), Self::Offchain(_) => None, Self::Subgraph(_) => None, + Self::Amp(ds) => Some(ds.source.end_block as i32), } } @@ -190,6 +204,7 @@ impl DataSource { Self::Onchain(ds) => ds.creation_block(), Self::Offchain(ds) => ds.creation_block, Self::Subgraph(ds) => ds.creation_block, + Self::Amp(_) => None, } } @@ -198,6 +213,7 @@ impl DataSource { Self::Onchain(ds) => ds.context(), Self::Offchain(ds) => ds.context.clone(), Self::Subgraph(ds) => ds.context.clone(), + Self::Amp(_) => Arc::new(None), } } @@ -206,6 +222,7 @@ impl DataSource { Self::Onchain(ds) => ds.api_version(), Self::Offchain(ds) => ds.mapping.api_version.clone(), Self::Subgraph(ds) => ds.mapping.api_version.clone(), + Self::Amp(ds) => ds.transformer.api_version.clone(), } } @@ -214,6 +231,7 @@ impl DataSource { Self::Onchain(ds) => ds.runtime(), Self::Offchain(ds) => Some(ds.mapping.runtime.cheap_clone()), Self::Subgraph(ds) => Some(ds.mapping.runtime.cheap_clone()), + Self::Amp(_) => None, } } @@ -224,6 +242,7 @@ impl DataSource { Self::Onchain(_) => EntityTypeAccess::Any, Self::Offchain(ds) => EntityTypeAccess::Restriced(ds.mapping.entities.clone()), Self::Subgraph(_) => EntityTypeAccess::Any, + Self::Amp(_) => EntityTypeAccess::Any, } } @@ -232,6 +251,7 @@ impl DataSource { Self::Onchain(ds) => ds.handler_kinds(), Self::Offchain(ds) => vec![ds.handler_kind()].into_iter().collect(), Self::Subgraph(ds) => vec![ds.handler_kind()].into_iter().collect(), + Self::Amp(_) => HashSet::new(), } } @@ -240,6 +260,7 @@ impl DataSource { Self::Onchain(ds) => ds.has_declared_calls(), Self::Offchain(_) => false, Self::Subgraph(_) => false, + Self::Amp(_) => false, } } @@ -266,6 +287,7 @@ impl DataSource { | (Self::Offchain(_), TriggerData::Subgraph(_)) | (Self::Subgraph(_), TriggerData::Onchain(_)) | (Self::Subgraph(_), TriggerData::Offchain(_)) => Ok(None), + (Self::Amp(_), _) => Ok(None), } } @@ -282,6 +304,7 @@ impl DataSource { Self::Onchain(ds) => ds.as_stored_dynamic_data_source(), Self::Offchain(ds) => ds.as_stored_dynamic_data_source(), Self::Subgraph(_) => todo!(), // TODO(krishna) + Self::Amp(_) => unreachable!(), } } @@ -307,6 +330,7 @@ impl DataSource { Self::Onchain(ds) => ds.validate(spec_version), Self::Offchain(_) => vec![], Self::Subgraph(_) => vec![], // TODO(krishna) + Self::Amp(_) => Vec::new(), } } @@ -315,6 +339,7 @@ impl DataSource { Self::Onchain(_) => CausalityRegion::ONCHAIN, Self::Offchain(ds) => ds.causality_region, Self::Subgraph(_) => CausalityRegion::ONCHAIN, + Self::Amp(_) => CausalityRegion::ONCHAIN, } } } @@ -324,12 +349,14 @@ pub enum UnresolvedDataSource { Onchain(C::UnresolvedDataSource), Offchain(offchain::UnresolvedDataSource), Subgraph(subgraph::UnresolvedDataSource), + Amp(amp::manifest::data_source::RawDataSource), } impl UnresolvedDataSource { - pub async fn resolve( + pub async fn resolve( self, resolver: &Arc, + amp_client: Option>, logger: &Logger, manifest_idx: u32, ) -> Result, anyhow::Error> { @@ -339,7 +366,7 @@ impl UnresolvedDataSource { .await .map(DataSource::Onchain), Self::Subgraph(unresolved) => unresolved - .resolve::(resolver, logger, manifest_idx) + .resolve::(resolver, amp_client, logger, manifest_idx) .await .map(DataSource::Subgraph), Self::Offchain(_unresolved) => { @@ -348,7 +375,16 @@ impl UnresolvedDataSource { for details see https://github.com/graphprotocol/graph-node/issues/3864" ); } + Self::Amp(raw_data_source) => match amp_client { + Some(amp_client) => raw_data_source + .resolve(logger, resolver.as_ref(), amp_client.as_ref()) + .await + .map(DataSource::Amp) + .map_err(Error::from), + None => Err(anyhow!("support for Amp data sources is not enabled")), + }, } + .with_context(|| format!("failed to resolve data source at index {manifest_idx}")) } } @@ -594,58 +630,95 @@ impl MappingTrigger { } } -macro_rules! clone_data_source { - ($t:ident) => { - impl Clone for $t { - fn clone(&self) -> Self { - match self { - Self::Onchain(ds) => Self::Onchain(ds.clone()), - Self::Offchain(ds) => Self::Offchain(ds.clone()), - Self::Subgraph(ds) => Self::Subgraph(ds.clone()), - } - } +impl Clone for DataSource { + fn clone(&self) -> Self { + match self { + Self::Onchain(ds) => Self::Onchain(ds.clone()), + Self::Offchain(ds) => Self::Offchain(ds.clone()), + Self::Subgraph(ds) => Self::Subgraph(ds.clone()), + Self::Amp(ds) => Self::Amp(ds.clone()), } - }; + } } -clone_data_source!(DataSource); -clone_data_source!(DataSourceTemplate); - -macro_rules! deserialize_data_source { - ($t:ident) => { - impl<'de, C: Blockchain> Deserialize<'de> for $t { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let map: BTreeMap = BTreeMap::deserialize(deserializer)?; - let kind = map - .get("kind") - .ok_or(serde::de::Error::missing_field("kind"))? - .as_str() - .unwrap_or("?"); - if OFFCHAIN_KINDS.contains_key(&kind) { - offchain::$t::deserialize(map.into_deserializer()) - .map_err(serde::de::Error::custom) - .map($t::Offchain) - } else if SUBGRAPH_DS_KIND == kind { - subgraph::$t::deserialize(map.into_deserializer()) - .map_err(serde::de::Error::custom) - .map($t::Subgraph) - } else if (&C::KIND.to_string() == kind) || C::ALIASES.contains(&kind) { - C::$t::deserialize(map.into_deserializer()) - .map_err(serde::de::Error::custom) - .map($t::Onchain) - } else { - Err(serde::de::Error::custom(format!( - "data source has invalid `kind`; expected {}, file/ipfs", - C::KIND, - ))) - } - } +impl Clone for DataSourceTemplate { + fn clone(&self) -> Self { + match self { + Self::Onchain(ds) => Self::Onchain(ds.clone()), + Self::Offchain(ds) => Self::Offchain(ds.clone()), + Self::Subgraph(ds) => Self::Subgraph(ds.clone()), } - }; + } } -deserialize_data_source!(UnresolvedDataSource); -deserialize_data_source!(UnresolvedDataSourceTemplate); +impl<'de, C: Blockchain> Deserialize<'de> for UnresolvedDataSource { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let map: BTreeMap = BTreeMap::deserialize(deserializer)?; + + let kind = map + .get("kind") + .ok_or(serde::de::Error::missing_field("kind"))? + .as_str() + .unwrap_or("?"); + + if OFFCHAIN_KINDS.contains_key(&kind) { + offchain::UnresolvedDataSource::deserialize(map.into_deserializer()) + .map_err(serde::de::Error::custom) + .map(UnresolvedDataSource::Offchain) + } else if SUBGRAPH_DS_KIND == kind { + subgraph::UnresolvedDataSource::deserialize(map.into_deserializer()) + .map_err(serde::de::Error::custom) + .map(UnresolvedDataSource::Subgraph) + } else if amp::manifest::DataSource::KIND == kind { + amp::manifest::data_source::RawDataSource::deserialize(map.into_deserializer()) + .map(UnresolvedDataSource::Amp) + .map_err(serde::de::Error::custom) + } else if (&C::KIND.to_string() == kind) || C::ALIASES.contains(&kind) { + C::UnresolvedDataSource::deserialize(map.into_deserializer()) + .map_err(serde::de::Error::custom) + .map(UnresolvedDataSource::Onchain) + } else { + Err(serde::de::Error::custom(format!( + "data source has invalid `kind`; expected {}, file/ipfs", + C::KIND, + ))) + } + } +} + +impl<'de, C: Blockchain> Deserialize<'de> for UnresolvedDataSourceTemplate { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let map: BTreeMap = BTreeMap::deserialize(deserializer)?; + + let kind = map + .get("kind") + .ok_or(serde::de::Error::missing_field("kind"))? + .as_str() + .unwrap_or("?"); + + if OFFCHAIN_KINDS.contains_key(&kind) { + offchain::UnresolvedDataSourceTemplate::deserialize(map.into_deserializer()) + .map_err(serde::de::Error::custom) + .map(UnresolvedDataSourceTemplate::Offchain) + } else if SUBGRAPH_DS_KIND == kind { + subgraph::UnresolvedDataSourceTemplate::deserialize(map.into_deserializer()) + .map_err(serde::de::Error::custom) + .map(UnresolvedDataSourceTemplate::Subgraph) + } else if (&C::KIND.to_string() == kind) || C::ALIASES.contains(&kind) { + C::UnresolvedDataSourceTemplate::deserialize(map.into_deserializer()) + .map_err(serde::de::Error::custom) + .map(UnresolvedDataSourceTemplate::Onchain) + } else { + Err(serde::de::Error::custom(format!( + "data source has invalid `kind`; expected {}, file/ipfs", + C::KIND, + ))) + } + } +} diff --git a/graph/src/data_source/subgraph.rs b/graph/src/data_source/subgraph.rs index 87b44e66174..f4c9852feb9 100644 --- a/graph/src/data_source/subgraph.rs +++ b/graph/src/data_source/subgraph.rs @@ -23,6 +23,7 @@ use super::{ common::{CallDecls, FindMappingABI, MappingABI, UnresolvedMappingABI}, DataSourceTemplateInfo, TriggerWithHandler, }; +use crate::amp; pub const SUBGRAPH_DS_KIND: &str = "subgraph"; @@ -254,9 +255,10 @@ impl UnresolvedDataSource { Ok(()) } - async fn resolve_source_manifest( + async fn resolve_source_manifest( &self, resolver: &Arc, + amp_client: Option>, logger: &Logger, ) -> Result>, Error> { let source_raw = resolver @@ -282,7 +284,7 @@ impl UnresolvedDataSource { ))?; source_manifest - .resolve(resolver, logger, LATEST_VERSION.clone()) + .resolve(resolver, amp_client, logger, LATEST_VERSION.clone()) .await .context(format!( "Failed to resolve source subgraph [{}] manifest", @@ -292,9 +294,10 @@ impl UnresolvedDataSource { } /// Recursively verifies that all grafts in the chain meet the minimum spec version requirement for a subgraph source - async fn verify_graft_chain_sourcable( + async fn verify_graft_chain_sourcable( manifest: Arc>, resolver: &Arc, + amp_client: Option>, logger: &Logger, graft_chain: &mut Vec, ) -> Result<(), Error> { @@ -324,13 +327,19 @@ impl UnresolvedDataSource { let graft_manifest = UnresolvedSubgraphManifest::::parse(graft.base.clone(), graft_raw) .context("Failed to parse graft base manifest")? - .resolve(resolver, logger, LATEST_VERSION.clone()) + .resolve( + resolver, + amp_client.cheap_clone(), + logger, + LATEST_VERSION.clone(), + ) .await .context("Failed to resolve graft base manifest")?; Box::pin(Self::verify_graft_chain_sourcable( Arc::new(graft_manifest), resolver, + amp_client, logger, graft_chain, )) @@ -341,9 +350,10 @@ impl UnresolvedDataSource { } #[allow(dead_code)] - pub(super) async fn resolve( + pub(super) async fn resolve( self, resolver: &Arc, + amp_client: Option>, logger: &Logger, manifest_idx: u32, ) -> Result { @@ -354,7 +364,9 @@ impl UnresolvedDataSource { ); let kind = self.kind.clone(); - let source_manifest = self.resolve_source_manifest::(resolver, logger).await?; + let source_manifest = self + .resolve_source_manifest::(resolver, amp_client.cheap_clone(), logger) + .await?; let source_spec_version = &source_manifest.spec_version; if source_spec_version < &SPEC_VERSION_1_3_0 { return Err(anyhow!( @@ -370,6 +382,7 @@ impl UnresolvedDataSource { Self::verify_graft_chain_sourcable( source_manifest.clone(), resolver, + amp_client, logger, &mut graft_chain, ) diff --git a/graph/src/env/amp.rs b/graph/src/env/amp.rs new file mode 100644 index 00000000000..ef4fff7c1dc --- /dev/null +++ b/graph/src/env/amp.rs @@ -0,0 +1,76 @@ +use std::time::Duration; + +/// Contains environment variables related to Amp subgraphs. +#[derive(Debug)] +pub struct AmpEnv { + /// Maximum number of record batches to buffer in memory per stream for each SQL query. + /// This is the maximum number of record batches that can be output by a single block. + /// + /// Defaults to `1,000`. + pub max_buffer_size: usize, + + /// Maximum number of blocks to request per stream for each SQL query. + /// Limiting this value reduces load on the Amp server when processing heavy queries. + /// + /// Defaults to `2,000,000`. + pub max_block_range: usize, + + /// Minimum time to wait before retrying a failed SQL query to the Amp server. + /// + /// Defaults to `1` second. + pub query_retry_min_delay: Duration, + + /// Maximum time to wait before retrying a failed SQL query to the Amp server. + /// + /// Defaults to `600` seconds. + pub query_retry_max_delay: Duration, + + /// Token used to authenticate Amp Flight gRPC service requests. + /// + /// Defaults to `None`. + pub flight_service_token: Option, +} + +impl AmpEnv { + const DEFAULT_MAX_BUFFER_SIZE: usize = 1_000; + const DEFAULT_MAX_BLOCK_RANGE: usize = 2_000_000; + const DEFAULT_QUERY_RETRY_MIN_DELAY: Duration = Duration::from_secs(1); + const DEFAULT_QUERY_RETRY_MAX_DELAY: Duration = Duration::from_secs(600); + + pub(super) fn new(raw_env: &super::Inner) -> Self { + Self { + max_buffer_size: raw_env + .amp_max_buffer_size + .and_then(|value| { + if value == 0 { + return None; + } + Some(value) + }) + .unwrap_or(Self::DEFAULT_MAX_BUFFER_SIZE), + max_block_range: raw_env + .amp_max_block_range + .and_then(|mut value| { + if value == 0 { + value = usize::MAX; + } + Some(value) + }) + .unwrap_or(Self::DEFAULT_MAX_BLOCK_RANGE), + query_retry_min_delay: raw_env + .amp_query_retry_min_delay_seconds + .map(Duration::from_secs) + .unwrap_or(Self::DEFAULT_QUERY_RETRY_MIN_DELAY), + query_retry_max_delay: raw_env + .amp_query_retry_max_delay_seconds + .map(Duration::from_secs) + .unwrap_or(Self::DEFAULT_QUERY_RETRY_MAX_DELAY), + flight_service_token: raw_env.amp_flight_service_token.as_ref().and_then(|value| { + if value.is_empty() { + return None; + } + Some(value.to_string()) + }), + } + } +} diff --git a/graph/src/env/mod.rs b/graph/src/env/mod.rs index 8ff3335cfd5..6a41ac630d0 100644 --- a/graph/src/env/mod.rs +++ b/graph/src/env/mod.rs @@ -1,11 +1,13 @@ +mod amp; mod graphql; mod mappings; mod store; +use std::{collections::HashSet, env::VarError, fmt, str::FromStr, sync::Arc, time::Duration}; + use envconfig::Envconfig; use lazy_static::lazy_static; use semver::Version; -use std::{collections::HashSet, env::VarError, fmt, str::FromStr, time::Duration}; use self::graphql::*; use self::mappings::*; @@ -15,6 +17,8 @@ use crate::{ runtime::gas::CONST_MAX_GAS_PER_HANDLER, }; +pub use self::amp::AmpEnv; + #[cfg(debug_assertions)] use std::sync::Mutex; @@ -49,6 +53,7 @@ pub struct EnvVars { pub graphql: EnvVarsGraphQl, pub mappings: EnvVarsMapping, pub store: EnvVarsStore, + pub amp: Arc, /// Enables query throttling when getting database connections goes over this value. /// Load management can be disabled by setting this to 0. @@ -285,6 +290,7 @@ impl EnvVars { graphql, mappings: mapping_handlers, store, + amp: Arc::new(AmpEnv::new(&inner)), load_threshold: Duration::from_millis(inner.load_threshold_in_ms), load_jail_threshold: inner.load_jail_threshold, @@ -435,7 +441,7 @@ struct Inner { default = "false" )] allow_non_deterministic_fulltext_search: EnvVarBoolean, - #[envconfig(from = "GRAPH_MAX_SPEC_VERSION", default = "1.3.0")] + #[envconfig(from = "GRAPH_MAX_SPEC_VERSION", default = "1.4.0")] max_spec_version: Version, #[envconfig(from = "GRAPH_LOAD_WINDOW_SIZE", default = "300")] load_window_size_in_secs: u64, @@ -546,6 +552,17 @@ struct Inner { firehose_block_fetch_batch_size: usize, #[envconfig(from = "GRAPH_IPFS_REQUEST_TIMEOUT")] ipfs_request_timeout: Option, + + #[envconfig(from = "GRAPH_AMP_MAX_BUFFER_SIZE")] + amp_max_buffer_size: Option, + #[envconfig(from = "GRAPH_AMP_MAX_BLOCK_RANGE")] + amp_max_block_range: Option, + #[envconfig(from = "GRAPH_AMP_QUERY_RETRY_MIN_DELAY_SECONDS")] + amp_query_retry_min_delay_seconds: Option, + #[envconfig(from = "GRAPH_AMP_QUERY_RETRY_MAX_DELAY_SECONDS")] + amp_query_retry_max_delay_seconds: Option, + #[envconfig(from = "GRAPH_AMP_FLIGHT_SERVICE_TOKEN")] + amp_flight_service_token: Option, } #[derive(Clone, Debug)] diff --git a/graph/src/lib.rs b/graph/src/lib.rs index ee288c5729a..60cc5dbd21d 100644 --- a/graph/src/lib.rs +++ b/graph/src/lib.rs @@ -37,6 +37,8 @@ pub mod env; pub mod ipfs; +pub mod amp; + /// Wrapper for spawning tasks that abort on panic, which is our default. mod task_spawn; pub use task_spawn::{ diff --git a/graph/src/util/timed_rw_lock.rs b/graph/src/util/timed_rw_lock.rs index 4a52d531604..e8ff394be44 100644 --- a/graph/src/util/timed_rw_lock.rs +++ b/graph/src/util/timed_rw_lock.rs @@ -20,7 +20,7 @@ impl TimedRwLock { } } - pub fn write(&self, logger: &Logger) -> parking_lot::RwLockWriteGuard { + pub fn write(&self, logger: &Logger) -> parking_lot::RwLockWriteGuard<'_, T> { loop { let mut elapsed = Duration::from_secs(0); match self.lock.try_write_for(self.log_threshold) { @@ -36,11 +36,11 @@ impl TimedRwLock { } } - pub fn try_read(&self) -> Option> { + pub fn try_read(&self) -> Option> { self.lock.try_read() } - pub fn read(&self, logger: &Logger) -> parking_lot::RwLockReadGuard { + pub fn read(&self, logger: &Logger) -> parking_lot::RwLockReadGuard<'_, T> { loop { let mut elapsed = Duration::from_secs(0); match self.lock.try_read_for(self.log_threshold) { @@ -73,7 +73,7 @@ impl TimedMutex { } } - pub fn lock(&self, logger: &Logger) -> parking_lot::MutexGuard { + pub fn lock(&self, logger: &Logger) -> parking_lot::MutexGuard<'_, T> { let start = Instant::now(); let guard = self.lock.lock(); let elapsed = start.elapsed(); diff --git a/node/Cargo.toml b/node/Cargo.toml index 7ebbfef82f6..61c7784b79a 100644 --- a/node/Cargo.toml +++ b/node/Cargo.toml @@ -39,3 +39,6 @@ termcolor = "1.4.1" diesel = { workspace = true } prometheus = { version = "0.13.4", features = ["push"] } json-structural-diff = { version = "0.2", features = ["colorize"] } + +# Dependencies related to Amp subgraphs +tokio-util.workspace = true diff --git a/node/src/bin/manager.rs b/node/src/bin/manager.rs index e1606b1864d..58bdd8edafd 100644 --- a/node/src/bin/manager.rs +++ b/node/src/bin/manager.rs @@ -105,6 +105,15 @@ pub struct Opt { pub fork_base: Option, #[clap(long, help = "version label, used for prometheus metrics")] pub version_label: Option, + + #[clap( + long, + value_name = "{HOST:PORT|URL}", + env = "GRAPH_AMP_FLIGHT_SERVICE_ADDRESS", + help = "The address of the Amp Flight gRPC service" + )] + pub amp_flight_service_address: Option, + #[clap(subcommand)] pub cmd: Command, } @@ -1328,6 +1337,7 @@ async fn main() -> anyhow::Result<()> { network_name, ipfs_url, arweave_url, + opt.amp_flight_service_address.clone(), config, metrics_ctx, node_id, diff --git a/node/src/main.rs b/node/src/main.rs index 0c5744513bb..5b38e3f516a 100644 --- a/node/src/main.rs +++ b/node/src/main.rs @@ -4,6 +4,7 @@ use graph::futures01::Future as _; use graph::futures03::compat::Future01CompatExt; use graph::futures03::future::TryFutureExt; +use graph::amp; use graph::blockchain::{Blockchain, BlockchainKind}; use graph::components::link_resolver::{ArweaveClient, FileSizeLimit}; use graph::components::subgraph::Settings; @@ -37,6 +38,7 @@ use std::io::{BufRead, BufReader}; use std::path::Path; use std::time::Duration; use tokio::sync::mpsc; +use tokio_util::sync::CancellationToken; git_testament!(TESTAMENT); @@ -95,6 +97,7 @@ fn main() { async fn main_inner() { env_logger::init(); + let cancel_token = shutdown_token(); let env_vars = Arc::new(EnvVars::from_env().unwrap()); let opt = opt::Opt::parse(); @@ -372,11 +375,54 @@ async fn main_inner() { )); let graphql_server = GraphQLQueryServer::new(&logger_factory, graphql_runner.clone()); + let mut subgraph_instance_managers: Vec< + Arc, + > = Vec::new(); + + let amp_client = match opt.amp_flight_service_address.as_deref() { + Some(amp_flight_service_address) => { + let addr = amp_flight_service_address + .parse() + .expect("Invalid Amp Flight service address"); + + let mut amp_client = amp::FlightClient::new(addr) + .await + .expect("Failed to connect to Amp Flight service"); + + if let Some(auth_token) = &env_vars.amp.flight_service_token { + amp_client.set_auth_token(auth_token); + } + + let amp_client = Arc::new(amp_client); + let amp_instance_manager = graph_core::amp_subgraph::Manager::new( + &logger_factory, + metrics_registry.cheap_clone(), + env_vars.cheap_clone(), + &cancel_token, + network_store.subgraph_store(), + link_resolver.cheap_clone(), + amp_client.cheap_clone(), + ); + + subgraph_instance_managers.push(Arc::new(amp_instance_manager)); + + Some(amp_client) + } + None => { + warn!( + logger, + "Amp Flight gRPC service address not set; Support for Amp-powered subgraphs disabled" + ); + None + } + }; + let index_node_server = IndexNodeServer::new( &logger_factory, blockchain_map.clone(), network_store.clone(), link_resolver.clone(), + amp_client.cheap_clone(), ); if !opt.disable_block_ingestor { @@ -416,14 +462,17 @@ async fn main_inner() { link_resolver.clone(), ipfs_service, arweave_service, + amp_client.cheap_clone(), static_filters, ); + subgraph_instance_managers.push(Arc::new(subgraph_instance_manager)); + // Create IPFS-based subgraph provider let subgraph_provider = IpfsSubgraphAssignmentProvider::new( &logger_factory, link_resolver.clone(), - subgraph_instance_manager, + subgraph_instance_managers, sg_count, ); @@ -437,6 +486,7 @@ async fn main_inner() { Arc::new(subgraph_provider), network_store.subgraph_store(), subscription_manager, + amp_client.cheap_clone(), blockchain_map, node_id.clone(), version_switching_mode, @@ -606,3 +656,39 @@ fn make_graphman_server_config<'a>( auth_token: auth_token.to_owned(), }) } + +fn shutdown_token() -> CancellationToken { + use tokio::signal; + + let cancel_token = CancellationToken::new(); + let cancel_token_clone = cancel_token.clone(); + + async fn shutdown_signal_handler() { + let ctrl_c = async { + signal::ctrl_c().await.unwrap(); + }; + + #[cfg(unix)] + let terminate = async { + signal::unix::signal(signal::unix::SignalKind::terminate()) + .unwrap() + .recv() + .await; + }; + + #[cfg(not(unix))] + let terminate = std::future::pending::<()>(); + + tokio::select! { + _ = ctrl_c => {}, + _ = terminate => {}, + }; + } + + tokio::spawn(async move { + shutdown_signal_handler().await; + cancel_token_clone.cancel(); + }); + + cancel_token +} diff --git a/node/src/manager/commands/run.rs b/node/src/manager/commands/run.rs index f79a0497477..fef57d22f03 100644 --- a/node/src/manager/commands/run.rs +++ b/node/src/manager/commands/run.rs @@ -7,6 +7,7 @@ use crate::manager::PanicSubscriptionManager; use crate::network_setup::Networks; use crate::store_builder::StoreBuilder; use crate::MetricsContext; +use graph::amp; use graph::anyhow::bail; use graph::cheap_clone::CheapClone; use graph::components::link_resolver::{ArweaveClient, FileSizeLimit}; @@ -26,6 +27,7 @@ use graph_core::{ SubgraphAssignmentProvider as IpfsSubgraphAssignmentProvider, SubgraphInstanceManager, SubgraphRegistrar as IpfsSubgraphRegistrar, }; +use tokio_util::sync::CancellationToken; fn locate(store: &dyn SubgraphStore, hash: &str) -> Result { let mut locators = store.locators(hash)?; @@ -42,6 +44,7 @@ pub async fn run( _network_name: String, ipfs_url: Vec, arweave_url: String, + amp_flight_service_address: Option, config: Config, metrics_ctx: MetricsContext, node_id: NodeId, @@ -53,6 +56,7 @@ pub async fn run( subgraph, stop_block ); + let cancel_token = CancellationToken::new(); let env_vars = Arc::new(EnvVars::from_env().unwrap()); let metrics_registry = metrics_ctx.registry.clone(); let logger_factory = LoggerFactory::new(logger.clone(), None, metrics_ctx.registry.clone()); @@ -142,6 +146,42 @@ pub async fn run( let sg_metrics = Arc::new(SubgraphCountMetric::new(metrics_registry.clone())); + let mut subgraph_instance_managers: Vec< + Arc, + > = Vec::new(); + + let amp_client = match amp_flight_service_address { + Some(amp_flight_service_address) => { + let addr = amp_flight_service_address + .parse() + .expect("Invalid Amp Flight service address"); + + let mut amp_client = amp::FlightClient::new(addr) + .await + .expect("Failed to connect to Amp Flight service"); + + if let Some(auth_token) = &env_vars.amp.flight_service_token { + amp_client.set_auth_token(auth_token); + } + + let amp_client = Arc::new(amp_client); + let amp_instance_manager = graph_core::amp_subgraph::Manager::new( + &logger_factory, + metrics_registry.cheap_clone(), + env_vars.cheap_clone(), + &cancel_token, + network_store.subgraph_store(), + link_resolver.cheap_clone(), + amp_client.cheap_clone(), + ); + + subgraph_instance_managers.push(Arc::new(amp_instance_manager)); + + Some(amp_client) + } + None => None, + }; + let subgraph_instance_manager = SubgraphInstanceManager::new( &logger_factory, env_vars.cheap_clone(), @@ -152,14 +192,17 @@ pub async fn run( link_resolver.cheap_clone(), ipfs_service, arweave_service, + amp_client.cheap_clone(), static_filters, ); + subgraph_instance_managers.push(Arc::new(subgraph_instance_manager)); + // Create IPFS-based subgraph provider let subgraph_provider = Arc::new(IpfsSubgraphAssignmentProvider::new( &logger_factory, link_resolver.cheap_clone(), - subgraph_instance_manager, + subgraph_instance_managers, sg_metrics, )); @@ -171,6 +214,7 @@ pub async fn run( subgraph_provider.clone(), subgraph_store.clone(), panicking_subscription_manager, + amp_client, blockchain_map, node_id.clone(), SubgraphVersionSwitchingMode::Instant, diff --git a/node/src/opt.rs b/node/src/opt.rs index 9928144396a..3708a7da493 100644 --- a/node/src/opt.rs +++ b/node/src/opt.rs @@ -230,6 +230,14 @@ pub struct Opt { help = "Port for the graphman GraphQL server" )] pub graphman_port: u16, + + #[clap( + long, + value_name = "{HOST:PORT|URL}", + env = "GRAPH_AMP_FLIGHT_SERVICE_ADDRESS", + help = "The address of the Amp Flight gRPC service" + )] + pub amp_flight_service_address: Option, } impl From for config::Opt { diff --git a/runtime/wasm/src/host.rs b/runtime/wasm/src/host.rs index bc5610a63d0..aa079381a94 100644 --- a/runtime/wasm/src/host.rs +++ b/runtime/wasm/src/host.rs @@ -363,6 +363,7 @@ impl RuntimeHostTrait for RuntimeHost { DataSource::Onchain(_) => None, DataSource::Offchain(ds) => ds.done_at(), DataSource::Subgraph(_) => None, + DataSource::Amp(_) => None, } } @@ -371,6 +372,7 @@ impl RuntimeHostTrait for RuntimeHost { DataSource::Onchain(_) => {} DataSource::Offchain(ds) => ds.set_done_at(block), DataSource::Subgraph(_) => {} + DataSource::Amp(_) => {} } } diff --git a/runtime/wasm/src/module/instance.rs b/runtime/wasm/src/module/instance.rs index 63845e81c60..cddac22f9fc 100644 --- a/runtime/wasm/src/module/instance.rs +++ b/runtime/wasm/src/module/instance.rs @@ -141,7 +141,7 @@ impl WasmInstance { self.store.into_data() } - pub(crate) fn instance_ctx(&mut self) -> WasmInstanceContext { + pub(crate) fn instance_ctx(&mut self) -> WasmInstanceContext<'_> { WasmInstanceContext::new(&mut self.store) } diff --git a/runtime/wasm/src/to_from/external.rs b/runtime/wasm/src/to_from/external.rs index 6bb7122613f..3f19716f487 100644 --- a/runtime/wasm/src/to_from/external.rs +++ b/runtime/wasm/src/to_from/external.rs @@ -466,6 +466,7 @@ where } #[derive(Debug, Clone, Eq, PartialEq, AscType)] +#[allow(dead_code)] pub enum AscSubgraphEntityOp { Create, Modify, diff --git a/server/index-node/src/resolver.rs b/server/index-node/src/resolver.rs index 0b7761094b2..2018cd304ee 100644 --- a/server/index-node/src/resolver.rs +++ b/server/index-node/src/resolver.rs @@ -7,6 +7,7 @@ use graph::schema::EntityType; use web3::types::Address; use git_testament::{git_testament, CommitKind}; +use graph::amp; use graph::blockchain::{Blockchain, BlockchainKind, BlockchainMap}; use graph::components::store::{BlockPtrForNumber, BlockStore, QueryPermit, Store}; use graph::components::versions::VERSIONS; @@ -94,20 +95,26 @@ impl IntoValue for PublicProofOfIndexingResult { /// Resolver for the index node GraphQL API. #[derive(Clone)] -pub struct IndexNodeResolver { +pub struct IndexNodeResolver { logger: Logger, blockchain_map: Arc, store: Arc, #[allow(dead_code)] link_resolver: Arc, + amp_client: Option>, bearer_token: Option, } -impl IndexNodeResolver { +impl IndexNodeResolver +where + S: Store, + AC: amp::Client + Send + Sync + 'static, +{ pub fn new( logger: &Logger, store: Arc, link_resolver: Arc, + amp_client: Option>, bearer_token: Option, blockchain_map: Arc, ) -> Self { @@ -118,6 +125,7 @@ impl IndexNodeResolver { blockchain_map, store, link_resolver, + amp_client, bearer_token, } } @@ -511,6 +519,7 @@ impl IndexNodeResolver { deployment_hash.clone(), raw_yaml, &self.link_resolver, + self.amp_client.cheap_clone(), &self.logger, max_spec_version, ) @@ -528,6 +537,7 @@ impl IndexNodeResolver { deployment_hash.clone(), raw_yaml, &self.link_resolver, + self.amp_client.cheap_clone(), &self.logger, max_spec_version, ) @@ -545,6 +555,7 @@ impl IndexNodeResolver { deployment_hash.clone(), raw_yaml, &self.link_resolver, + self.amp_client.cheap_clone(), &self.logger, max_spec_version, ) @@ -679,7 +690,11 @@ impl IndexNodeResolver { } #[async_trait] -impl BlockPtrForNumber for IndexNodeResolver { +impl BlockPtrForNumber for IndexNodeResolver +where + S: Store, + AC: amp::Client + Send + Sync + 'static, +{ async fn block_ptr_for_number( &self, network: String, @@ -752,7 +767,11 @@ fn entity_changes_to_graphql(entity_changes: Vec) -> r::Value { } #[async_trait] -impl Resolver for IndexNodeResolver { +impl Resolver for IndexNodeResolver +where + S: Store, + AC: amp::Client + Send + Sync + 'static, +{ const CACHEABLE: bool = false; async fn query_permit(&self) -> QueryPermit { diff --git a/server/index-node/src/server.rs b/server/index-node/src/server.rs index 326d633b896..00b62c09ca2 100644 --- a/server/index-node/src/server.rs +++ b/server/index-node/src/server.rs @@ -1,5 +1,7 @@ use graph::{ + amp, blockchain::BlockchainMap, + cheap_clone::CheapClone, components::{ server::server::{start, ServerHandle}, store::Store, @@ -10,16 +12,18 @@ use graph::{ use crate::service::IndexNodeService; /// A GraphQL server based on Hyper. -pub struct IndexNodeServer { +pub struct IndexNodeServer { logger: Logger, blockchain_map: Arc, store: Arc, link_resolver: Arc, + amp_client: Option>, } -impl IndexNodeServer +impl IndexNodeServer where S: Store, + AC: amp::Client + Send + Sync + 'static, { /// Creates a new GraphQL server. pub fn new( @@ -27,6 +31,7 @@ where blockchain_map: Arc, store: Arc, link_resolver: Arc, + amp_client: Option>, ) -> Self { let logger = logger_factory.component_logger( "IndexNodeServer", @@ -42,6 +47,7 @@ where blockchain_map, store, link_resolver, + amp_client, } } @@ -62,6 +68,7 @@ where self.blockchain_map.clone(), store, self.link_resolver.clone(), + self.amp_client.cheap_clone(), )); start(logger_for_service.clone(), port, move |req| { diff --git a/server/index-node/src/service.rs b/server/index-node/src/service.rs index d07d9b9e5e3..5aa00058e6c 100644 --- a/server/index-node/src/service.rs +++ b/server/index-node/src/service.rs @@ -15,6 +15,7 @@ use graph::hyper::header::{ }; use graph::hyper::{body::Body, Method, Request, Response, StatusCode}; +use graph::amp; use graph::components::{server::query::ServerError, store::Store}; use graph::data::query::{Query, QueryError, QueryResult, QueryResults}; use graph::prelude::{q, serde_json}; @@ -39,23 +40,26 @@ impl GraphQLMetrics for NoopGraphQLMetrics { /// A Hyper Service that serves GraphQL over a POST / endpoint. #[derive(Debug)] -pub struct IndexNodeService { +pub struct IndexNodeService { logger: Logger, blockchain_map: Arc, store: Arc, explorer: Arc>, link_resolver: Arc, + amp_client: Option>, } -impl IndexNodeService +impl IndexNodeService where S: Store, + AC: amp::Client + Send + Sync + 'static, { pub fn new( logger: Logger, blockchain_map: Arc, store: Arc, link_resolver: Arc, + amp_client: Option>, ) -> Self { let explorer = Arc::new(Explorer::new(store.clone())); @@ -65,6 +69,7 @@ where store, explorer, link_resolver, + amp_client, } } @@ -138,6 +143,7 @@ where &logger, store, self.link_resolver.clone(), + self.amp_client.cheap_clone(), validated.bearer_token, self.blockchain_map.clone(), ); diff --git a/store/postgres/src/relational/dsl.rs b/store/postgres/src/relational/dsl.rs index e804a4d06ca..13cab9dd9d0 100644 --- a/store/postgres/src/relational/dsl.rs +++ b/store/postgres/src/relational/dsl.rs @@ -176,7 +176,7 @@ impl<'a> Table<'a> { } /// Reference a column in this table and use the correct SQL type `ST` - fn bind(&self, name: &str) -> Option> { + fn bind(&self, name: &str) -> Option> { self.column(name).map(|c| c.bind()) } diff --git a/store/postgres/src/relational_queries.rs b/store/postgres/src/relational_queries.rs index 533990c42b9..79d96edd30c 100644 --- a/store/postgres/src/relational_queries.rs +++ b/store/postgres/src/relational_queries.rs @@ -4964,6 +4964,7 @@ impl<'a, Conn> RunQueryDsl for CountCurrentVersionsQuery<'a> {} /// Helper struct for returning the id's touched by the RevertRemove and /// RevertExtend queries #[derive(QueryableByName, PartialEq, Eq, Hash)] +#[allow(dead_code)] pub struct CopyVid { #[diesel(sql_type = BigInt)] pub vid: i64, diff --git a/store/postgres/src/subgraph_store.rs b/store/postgres/src/subgraph_store.rs index 2ba2a1a58f6..5f91afbe1f4 100644 --- a/store/postgres/src/subgraph_store.rs +++ b/store/postgres/src/subgraph_store.rs @@ -789,7 +789,7 @@ impl SubgraphStoreInner { /// connections can deadlock the entire process if the pool runs out /// of connections in between getting the first one and trying to get the /// second one. - pub(crate) fn primary_conn(&self) -> Result { + pub(crate) fn primary_conn(&self) -> Result, StoreError> { let conn = self.mirror.primary().get()?; Ok(primary::Connection::new(conn)) } diff --git a/store/postgres/src/writable.rs b/store/postgres/src/writable.rs index 74b516433b6..9c512e27ae7 100644 --- a/store/postgres/src/writable.rs +++ b/store/postgres/src/writable.rs @@ -49,7 +49,7 @@ use crate::{primary, primary::Site, relational::Layout, SubgraphStore}; struct WritableSubgraphStore(SubgraphStore); impl WritableSubgraphStore { - fn primary_conn(&self) -> Result { + fn primary_conn(&self) -> Result, StoreError> { self.0.primary_conn() } diff --git a/store/test-store/tests/chain/ethereum/manifest.rs b/store/test-store/tests/chain/ethereum/manifest.rs index f025be2e626..f72ffe4d14e 100644 --- a/store/test-store/tests/chain/ethereum/manifest.rs +++ b/store/test-store/tests/chain/ethereum/manifest.rs @@ -4,6 +4,7 @@ use std::str::FromStr; use std::sync::Arc; use std::time::Duration; +use graph::amp; use graph::blockchain::DataSource; use graph::components::store::BLOCK_NUMBER_MAX; use graph::data::store::scalar::Bytes; @@ -130,7 +131,15 @@ async fn try_resolve_manifest( let resolver: Arc = Arc::new(resolver); let raw = serde_yaml::from_str(text)?; - Ok(SubgraphManifest::resolve_from_raw(id, raw, &resolver, &LOGGER, max_spec_version).await?) + Ok(SubgraphManifest::resolve_from_raw( + id, + raw, + &resolver, + Option::>::None, + &LOGGER, + max_spec_version, + ) + .await?) } async fn resolve_manifest( @@ -152,9 +161,16 @@ async fn resolve_unvalidated(text: &str) -> UnvalidatedSubgraphManifest { let resolver: Arc = Arc::new(resolver); let raw = serde_yaml::from_str(text).unwrap(); - UnvalidatedSubgraphManifest::resolve(id, raw, &resolver, &LOGGER, SPEC_VERSION_0_0_4.clone()) - .await - .expect("Parsing simple manifest works") + UnvalidatedSubgraphManifest::resolve( + id, + raw, + &resolver, + Option::>::None, + &LOGGER, + SPEC_VERSION_0_0_4.clone(), + ) + .await + .expect("Parsing simple manifest works") } // Some of these manifest tests should be made chain-independent, but for @@ -221,7 +237,7 @@ dataSources: entities: - Gravatar network: mainnet - source: + source: address: 'QmSource' startBlock: 9562480 mapping: @@ -264,7 +280,7 @@ dataSources: entities: - Gravatar network: mainnet - source: + source: address: 'QmSource' startBlock: 9562480 mapping: @@ -300,7 +316,7 @@ dataSources: entities: - Gravatar network: mainnet - source: + source: address: 'QmSource' startBlock: 9562480 mapping: @@ -1305,6 +1321,7 @@ schema: id, raw, &resolver, + Option::>::None, &LOGGER, SPEC_VERSION_0_0_4.clone(), ) @@ -1357,6 +1374,7 @@ schema: id, raw, &resolver, + Option::>::None, &LOGGER, SPEC_VERSION_0_0_4.clone(), ) @@ -1433,6 +1451,7 @@ dataSources: id, raw, &resolver, + Option::>::None, &LOGGER, SPEC_VERSION_0_0_4.clone(), ) @@ -1511,6 +1530,7 @@ dataSources: id, raw, &resolver, + Option::>::None, &LOGGER, SPEC_VERSION_0_0_4.clone(), ) @@ -1620,6 +1640,7 @@ dataSources: id, raw, &resolver, + Option::>::None, &LOGGER, SPEC_VERSION_1_2_0.clone(), ) @@ -1651,7 +1672,7 @@ dataSources: entities: - Gravatar network: mainnet - source: + source: address: 'QmSource' startBlock: 9562480 mapping: @@ -1693,6 +1714,7 @@ dataSources: id, raw, &resolver, + Option::>::None, &LOGGER, SPEC_VERSION_1_3_0.clone(), ) @@ -1721,7 +1743,7 @@ dataSources: entities: - User network: mainnet - source: + source: address: 'QmSource' startBlock: 9562480 mapping: @@ -1780,7 +1802,7 @@ dataSources: entities: - User network: mainnet - source: + source: address: 'QmNestedSource' startBlock: 9562480 mapping: @@ -1843,6 +1865,7 @@ specVersion: 1.3.0 id, raw, &resolver, + Option::>::None, &LOGGER, SPEC_VERSION_1_3_0.clone(), ) @@ -1873,7 +1896,7 @@ dataSources: entities: - Gravatar network: mainnet - source: + source: address: 'QmSource' startBlock: 9562480 mapping: @@ -1909,7 +1932,7 @@ dataSources: entities: - Gravatar network: mainnet - source: + source: address: 'QmSource' startBlock: 9562480 mapping: diff --git a/tests/src/fixture/mod.rs b/tests/src/fixture/mod.rs index cc99e406c1c..bbf5b662c15 100644 --- a/tests/src/fixture/mod.rs +++ b/tests/src/fixture/mod.rs @@ -8,6 +8,7 @@ use std::time::{Duration, Instant}; use anyhow::Error; use async_stream::stream; +use graph::amp; use graph::blockchain::block_stream::{ BlockRefetcher, BlockStream, BlockStreamBuilder, BlockStreamError, BlockStreamEvent, BlockWithTriggers, FirehoseCursor, @@ -155,21 +156,18 @@ pub trait TestChainTrait { pub struct TestContext { pub logger: Logger, - pub provider: Arc< - IpfsSubgraphAssignmentProvider< - SubgraphInstanceManager, - >, - >, + pub provider: Arc, pub store: Arc, pub deployment: DeploymentLocator, pub subgraph_name: SubgraphName, - pub instance_manager: SubgraphInstanceManager, + pub instance_manager: + Arc>, pub link_resolver: Arc, pub arweave_resolver: Arc, pub env_vars: Arc, pub ipfs: Arc, graphql_runner: Arc, - indexing_status_service: Arc>, + indexing_status_service: Arc>, } #[derive(Deserialize)] @@ -504,7 +502,7 @@ pub async fn setup( let sg_count = Arc::new(SubgraphCountMetric::new(mock_registry.cheap_clone())); let blockchain_map = Arc::new(blockchain_map); - let subgraph_instance_manager = SubgraphInstanceManager::new( + let subgraph_instance_manager = Arc::new(SubgraphInstanceManager::new( &logger_factory, env_vars.cheap_clone(), subgraph_store.clone(), @@ -514,8 +512,9 @@ pub async fn setup( link_resolver.cheap_clone(), ipfs_service, arweave_service, + None, static_filters, - ); + )); // Graphql runner let load_manager = LoadManager::new(&logger, Vec::new(), Vec::new(), mock_registry.clone()); @@ -531,13 +530,14 @@ pub async fn setup( blockchain_map.cheap_clone(), stores.network_store.cheap_clone(), link_resolver.cheap_clone(), + None, )); // Create IPFS-based subgraph provider let subgraph_provider = Arc::new(IpfsSubgraphAssignmentProvider::new( &logger_factory, link_resolver.cheap_clone(), - subgraph_instance_manager.clone(), + vec![subgraph_instance_manager.cheap_clone()], sg_count, )); @@ -549,6 +549,7 @@ pub async fn setup( subgraph_provider.clone(), subgraph_store.clone(), panicking_subscription_manager, + Option::>::None, blockchain_map.clone(), node_id.clone(), SubgraphVersionSwitchingMode::Instant,