diff --git a/Cargo.lock b/Cargo.lock index 6c175377a63c1..9c8043be03893 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -18,15 +18,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" -[[package]] -name = "addr2line" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" version = "2.0.0" @@ -180,12 +171,6 @@ dependencies = [ "url", ] -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -320,6 +305,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] + [[package]] name = "arbitrary" version = "1.3.2" @@ -353,10 +347,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c6368f9ae5c6ec403ca910327ae0c9437b0a85255b6950c90d497e6177f6e5e" dependencies = [ "proc-macro-hack", - "quote 1.0.40", + "quote 1.0.42", "syn 1.0.109", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + [[package]] name = "arrayvec" version = "0.7.4" @@ -365,16 +365,18 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" dependencies = [ "arrow-arith", "arrow-array", "arrow-buffer", "arrow-cast", + "arrow-csv", "arrow-data", "arrow-ipc", + "arrow-json", "arrow-ord", "arrow-row", "arrow-schema", @@ -384,82 +386,104 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", - "num", + "num-traits", ] [[package]] name = "arrow-array" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" dependencies = [ "ahash 0.8.11", "arrow-buffer", "arrow-data", "arrow-schema", "chrono", + "chrono-tz", "half", - "hashbrown 0.16.0", - "num", + "hashbrown 0.16.1", + "num-complex", + "num-integer", + "num-traits", ] [[package]] name = "arrow-buffer" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "half", - "num", + "num-bigint", + "num-traits", ] [[package]] name = "arrow-cast" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "atoi", "base64 0.22.1", "chrono", + "comfy-table", "half", "lexical-core", - "num", + "num-traits", "ryu", ] +[[package]] +name = "arrow-csv" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" +dependencies = [ + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "regex", +] + [[package]] name = "arrow-data" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" dependencies = [ "arrow-buffer", "arrow-schema", "half", - "num", + "num-integer", + "num-traits", ] [[package]] name = "arrow-ipc" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" dependencies = [ "arrow-array", "arrow-buffer", @@ -467,13 +491,39 @@ dependencies = [ "arrow-schema", "arrow-select", "flatbuffers", + "lz4_flex 0.12.0", + "zstd 0.13.2", +] + +[[package]] +name = "arrow-json" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap 2.12.1", + "itoa", + "lexical-core", + "memchr", + "num-traits", + "ryu", + "serde_core", + "serde_json", + "simdutf8", ] [[package]] name = "arrow-ord" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" dependencies = [ "arrow-array", "arrow-buffer", @@ -484,9 +534,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" dependencies = [ "arrow-array", "arrow-buffer", @@ -497,29 +547,35 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" +dependencies = [ + "bitflags 2.9.0", + "serde", + "serde_core", + "serde_json", +] [[package]] name = "arrow-select" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" dependencies = [ "ahash 0.8.11", "arrow-array", "arrow-buffer", "arrow-data", "arrow-schema", - "num", + "num-traits", ] [[package]] name = "arrow-string" -version = "56.2.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" dependencies = [ "arrow-array", "arrow-buffer", @@ -527,7 +583,7 @@ dependencies = [ "arrow-schema", "arrow-select", "memchr", - "num", + "num-traits", "regex", "regex-syntax", ] @@ -610,15 +666,20 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.30" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "977eb15ea9efd848bb8a4a1a2500347ed7f0bf794edf0dc3ddcf439f43d36b23" +checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" dependencies = [ - "compression-codecs", - "compression-core", + "brotli 7.0.0", + "bzip2 0.5.2", + "flate2", "futures-core", + "memchr", "pin-project-lite", "tokio", + "xz2", + "zstd 0.13.2", + "zstd-safe 7.2.1", ] [[package]] @@ -685,13 +746,13 @@ dependencies = [ "async-stream", "async-trait", "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "fnv", "futures-timer", "futures-util", "http 1.3.1", - "indexmap 2.12.0", + "indexmap 2.12.1", "mime", "multer", "num-traits", @@ -715,9 +776,9 @@ dependencies = [ "darling 0.20.11", "proc-macro-crate 3.2.0", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "strum 0.26.3", - "syn 2.0.106", + "syn 2.0.113", "thiserror 1.0.68", ] @@ -739,8 +800,8 @@ version = "7.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34ecdaff7c9cffa3614a9f9999bf9ee4c3078fe3ce4d6a6e161736b56febf2de" dependencies = [ - "bytes 1.10.1", - "indexmap 2.12.0", + "bytes 1.11.0", + "indexmap 2.12.1", "serde", "serde_json", ] @@ -823,7 +884,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08f6da6d49a956424ca4e28fe93656f790d748b469eaccbc7488fec545315180" dependencies = [ "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "futures 0.3.31", "memchr", "nkeys", @@ -899,8 +960,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -939,8 +1000,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -956,8 +1017,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -983,9 +1044,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "aws-config" -version = "1.6.1" +version = "1.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c39646d1a6b51240a1a23bb57ea4eebede7e16fbc237fdc876980233dcecb4f" +checksum = "96571e6996817bf3d58f6b569e4b9fd2e9d2fcf9f7424eed07b2ce9bb87535e5" dependencies = [ "aws-credential-types", "aws-runtime", @@ -999,7 +1060,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "hex", "http 1.3.1", @@ -1013,9 +1074,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.6" +version = "1.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d025db5d9f52cbc413b167136afb3d8aeea708c0d8884783cf6253be5e22f6f2" +checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -1023,11 +1084,33 @@ dependencies = [ "zeroize", ] +[[package]] +name = "aws-lc-rs" +version = "1.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88aab2464f1f25453baa7a07c84c5b7684e274054ba06817f382357f77a288" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45afffdee1e7c9126814751f88dddc747f41d91da16c9551a0f1e8a11e788a1" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + [[package]] name = "aws-runtime" -version = "1.5.10" +version = "1.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c034a1bc1d70e16e7f4e4caf7e9f7693e4c9c24cd91cf17c2a0b21abaebc7c8b" +checksum = "d81b5b2898f6798ad58f484856768bca817e3cd9de0974c24ae0f1113fe88f1b" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -1038,7 +1121,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", "http-body 0.4.6", @@ -1091,7 +1174,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", "once_cell", @@ -1099,6 +1182,28 @@ dependencies = [ "tracing 0.1.41", ] +[[package]] +name = "aws-sdk-dynamodb" +version = "1.101.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6f98cd9e5f2fc790aff1f393bc3c8680deea31c05d3c6f23b625cdc50b1b6b4" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes 1.11.0", + "fastrand 2.3.0", + "http 0.2.9", + "regex-lite", + "tracing 0.1.41", +] + [[package]] name = "aws-sdk-elasticsearch" version = "1.67.0" @@ -1114,7 +1219,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", "once_cell", @@ -1137,7 +1242,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", "once_cell", @@ -1161,7 +1266,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", "once_cell", @@ -1184,7 +1289,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", "regex-lite", @@ -1210,7 +1315,7 @@ dependencies = [ "aws-smithy-types", "aws-smithy-xml", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "hex", "hmac", @@ -1241,7 +1346,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", "regex-lite", @@ -1286,7 +1391,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", "once_cell", @@ -1296,9 +1401,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.64.0" +version = "1.91.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02d4bdb0e5f80f0689e61c77ab678b2b9304af329616af38aef5b6b967b8e736" +checksum = "8ee6402a36f27b52fe67661c6732d684b2635152b676aa2babbfb5204f99115d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1309,19 +1414,18 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", - "once_cell", "regex-lite", "tracing 0.1.41", ] [[package]] name = "aws-sdk-ssooidc" -version = "1.65.0" +version = "1.93.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbbb3ce8da257aedbccdcb1aadafbbb6a5fe9adf445db0e1ea897bdc7e22d08" +checksum = "a45a7f750bbd170ee3677671ad782d90b894548f4e4ae168302c57ec9de5cb3e" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1332,19 +1436,18 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", - "once_cell", "regex-lite", "tracing 0.1.41", ] [[package]] name = "aws-sdk-sts" -version = "1.73.0" +version = "1.95.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1e9c3c24e36183e2f698235ed38dcfbbdff1d09b9232dc866c4be3011e0b47e" +checksum = "55542378e419558e6b1f398ca70adb0b2088077e79ad9f14eb09441f2f7b2164" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1365,16 +1468,16 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.4" +version = "1.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c" +checksum = "69e523e1c4e8e7e8ff219d732988e22bfeae8a1cafdbe6d9eca1546fa080be7c" dependencies = [ "aws-credential-types", "aws-smithy-eventstream", "aws-smithy-http", "aws-smithy-runtime-api", "aws-smithy-types", - "bytes 1.10.1", + "bytes 1.11.0", "form_urlencoded", "hex", "hmac", @@ -1388,9 +1491,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.5" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c" +checksum = "9ee19095c7c4dda59f1697d028ce704c24b2d33c6718790c7f1d5a3015b4107c" dependencies = [ "futures-util", "pin-project-lite", @@ -1405,7 +1508,7 @@ checksum = "b65d21e1ba6f2cdec92044f904356a19f5ad86961acf015741106cdfafd747c0" dependencies = [ "aws-smithy-http", "aws-smithy-types", - "bytes 1.10.1", + "bytes 1.11.0", "crc32c", "crc32fast", "crc64fast-nvme", @@ -1427,7 +1530,7 @@ checksum = "c41172a5393f54e26d6b1bfbfce5d0abaa5c46870a1641c1c1899b527f8b6388" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", - "bytes 1.10.1", + "bytes 1.11.0", "flate2", "futures-util", "http 0.2.9", @@ -1438,27 +1541,28 @@ dependencies = [ [[package]] name = "aws-smithy-eventstream" -version = "0.60.10" +version = "0.60.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "604c7aec361252b8f1c871a7641d5e0ba3a7f5a586e51b66bc9510a5519594d9" +checksum = "dc12f8b310e38cad85cf3bef45ad236f470717393c613266ce0a89512286b650" dependencies = [ "aws-smithy-types", - "bytes 1.10.1", + "bytes 1.11.0", "crc32fast", ] [[package]] name = "aws-smithy-http" -version = "0.62.3" +version = "0.62.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9" +checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" dependencies = [ "aws-smithy-eventstream", "aws-smithy-runtime-api", "aws-smithy-types", - "bytes 1.10.1", + "bytes 1.11.0", "bytes-utils", "futures-core", + "futures-util", "http 0.2.9", "http 1.3.1", "http-body 0.4.6", @@ -1470,9 +1574,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.1" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147e8eea63a40315d704b97bf9bc9b8c1402ae94f89d5ad6f7550d963309da1b" +checksum = "59e62db736db19c488966c8d787f52e6270be565727236fd5579eaa301e7bc4a" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -1480,38 +1584,47 @@ dependencies = [ "h2 0.3.26", "h2 0.4.12", "http 0.2.9", + "http 1.3.1", "http-body 0.4.6", "hyper 0.14.32", + "hyper 1.7.0", "hyper-rustls 0.24.2", + "hyper-rustls 0.27.5", + "hyper-util", "pin-project-lite", "rustls 0.21.12", + "rustls 0.23.35", + "rustls-native-certs 0.8.1", + "rustls-pki-types", "tokio", + "tokio-rustls 0.26.2", + "tower 0.5.2", "tracing 0.1.41", ] [[package]] name = "aws-smithy-json" -version = "0.61.3" +version = "0.61.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92144e45819cae7dc62af23eac5a038a58aa544432d2102609654376a900bd07" +checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-observability" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393" +checksum = "17f616c3f2260612fe44cede278bafa18e73e6479c4e393e2c4518cf2a9a228a" dependencies = [ "aws-smithy-runtime-api", ] [[package]] name = "aws-smithy-query" -version = "0.60.7" +version = "0.60.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" +checksum = "ae5d689cf437eae90460e944a58b5668530d433b4ff85789e69d2f2a556e057d" dependencies = [ "aws-smithy-types", "urlencoding", @@ -1519,9 +1632,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.2" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185" +checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -1529,7 +1642,7 @@ dependencies = [ "aws-smithy-observability", "aws-smithy-runtime-api", "aws-smithy-types", - "bytes 1.10.1", + "bytes 1.11.0", "fastrand 2.3.0", "http 0.2.9", "http 1.3.1", @@ -1543,13 +1656,13 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.9.0" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07f5e0fc8a6b3f2303f331b94504bbf754d85488f402d6f1dd7a6080f99afe56" +checksum = "ab0d43d899f9e508300e587bf582ba54c27a452dd0a9ea294690669138ae14a2" dependencies = [ "aws-smithy-async", "aws-smithy-types", - "bytes 1.10.1", + "bytes 1.11.0", "http 0.2.9", "http 1.3.1", "pin-project-lite", @@ -1560,12 +1673,12 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.3.2" +version = "1.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8" +checksum = "905cb13a9895626d49cf2ced759b062d913834c7482c38e49557eac4e6193f01" dependencies = [ "base64-simd", - "bytes 1.10.1", + "bytes 1.11.0", "bytes-utils", "futures-core", "http 0.2.9", @@ -1586,18 +1699,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.9" +version = "0.60.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab0b0166827aa700d3dc519f72f8b3a91c35d0b8d042dc5d643a91e6f80648fc" +checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.8" +version = "1.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390" +checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1616,7 +1729,7 @@ dependencies = [ "async-trait", "axum-core 0.3.4", "bitflags 1.3.2", - "bytes 1.10.1", + "bytes 1.11.0", "futures-util", "http 0.2.9", "http-body 0.4.6", @@ -1644,7 +1757,7 @@ checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" dependencies = [ "async-trait", "axum-core 0.4.5", - "bytes 1.10.1", + "bytes 1.11.0", "futures-util", "http 1.3.1", "http-body 1.0.0", @@ -1670,7 +1783,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" dependencies = [ "async-trait", - "bytes 1.10.1", + "bytes 1.11.0", "futures-util", "http 0.2.9", "http-body 0.4.6", @@ -1687,7 +1800,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" dependencies = [ "async-trait", - "bytes 1.10.1", + "bytes 1.11.0", "futures-util", "http 1.3.1", "http-body 1.0.0", @@ -1708,7 +1821,7 @@ checksum = "7b552ad43a45a746461ec3d3a51dfb6466b4759209414b439c165eb6a6b7729e" dependencies = [ "async-trait", "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "dyn-clone", "futures 0.3.31", "getrandom 0.2.15", @@ -1719,7 +1832,7 @@ dependencies = [ "pin-project", "quick-xml 0.31.0", "rand 0.8.5", - "reqwest 0.12.9", + "reqwest 0.12.26", "rustc_version", "serde", "serde_json", @@ -1737,7 +1850,7 @@ checksum = "82c33c072c9d87777262f35abfe2a64b609437076551d4dac8373e60f0e3fde9" dependencies = [ "async-lock 3.4.0", "async-trait", - "bytes 1.10.1", + "bytes 1.11.0", "futures 0.3.31", "openssl", "pin-project", @@ -1777,7 +1890,7 @@ dependencies = [ "async-lock 3.4.0", "async-trait", "azure_core 0.21.0", - "bytes 1.10.1", + "bytes 1.11.0", "serde", "serde_derive", "time", @@ -1796,7 +1909,7 @@ dependencies = [ "azure_core 0.21.0", "azure_storage", "azure_svc_blobstorage", - "bytes 1.10.1", + "bytes 1.11.0", "futures 0.3.31", "serde", "serde_derive", @@ -1814,7 +1927,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e6c6f20c5611b885ba94c7bae5e02849a267381aecb8aee577e8c35ff4064c6" dependencies = [ "azure_core 0.21.0", - "bytes 1.10.1", + "bytes 1.11.0", "futures 0.3.31", "log", "once_cell", @@ -1845,21 +1958,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "backtrace" -version = "0.3.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" -dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets 0.52.6", -] - [[package]] name = "base16" version = "0.2.1" @@ -1940,11 +2038,11 @@ dependencies = [ "clang-sys", "itertools 0.13.0", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "regex", "rustc-hash", "shlex", - "syn 2.0.106", + "syn 2.0.113", ] [[package]] @@ -1983,8 +2081,8 @@ version = "2.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6cbbb8f56245b5a479b30a62cdc86d26e2f35c2b9f594bc4671654b03851380" dependencies = [ - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -1999,6 +2097,28 @@ dependencies = [ "wyz", ] +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -2050,7 +2170,7 @@ checksum = "8796b390a5b4c86f9f2e8173a68c2791f4fa6b038b84e96dbc01c016d1e6722c" dependencies = [ "base64 0.22.1", "bollard-stubs", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "futures-core", "futures-util", @@ -2065,7 +2185,7 @@ dependencies = [ "hyperlocal", "log", "pin-project-lite", - "rustls 0.23.23", + "rustls 0.23.35", "rustls-native-certs 0.8.1", "rustls-pemfile 2.1.0", "rustls-pki-types", @@ -2115,9 +2235,9 @@ dependencies = [ "ident_case", "prettyplease 0.2.15", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "rustversion", - "syn 2.0.106", + "syn 2.0.113", ] [[package]] @@ -2145,8 +2265,19 @@ dependencies = [ "once_cell", "proc-macro-crate 3.2.0", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", +] + +[[package]] +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor 4.0.3", ] [[package]] @@ -2157,7 +2288,17 @@ checksum = "cf19e729cdbd51af9a397fb9ef8ac8378007b797f8273cfbfdf45dcaa316167b" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", - "brotli-decompressor", + "brotli-decompressor 5.0.0", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", ] [[package]] @@ -2182,7 +2323,7 @@ dependencies = [ "getrandom 0.2.15", "getrandom 0.3.4", "hex", - "indexmap 2.12.0", + "indexmap 2.12.1", "js-sys", "once_cell", "rand 0.9.2", @@ -2211,7 +2352,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" dependencies = [ "memchr", - "regex-automata 0.4.8", + "regex-automata 0.4.13", "serde", ] @@ -2239,7 +2380,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7ec4c6f261935ad534c0c22dbef2201b45918860eb1c574b972bd213a76af61" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "syn 1.0.109", ] @@ -2273,9 +2414,9 @@ dependencies = [ [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" dependencies = [ "serde", ] @@ -2286,7 +2427,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e47d3a8076e283f3acd27400535992edb3ba4b5bb72f8891ad8fbe7932a7d4b9" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "either", ] @@ -2296,6 +2437,34 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5c434ae3cf0089ca203e9019ebe529c47ff45cefe8af7c85ecb734ef541822f" +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" +dependencies = [ + "libbz2-rs-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "cargo-lock" version = "10.1.0" @@ -2340,10 +2509,11 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.15" +version = "1.2.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c736e259eea577f443d5c86c304f9f4ae0295c43f3ba05c21f1d66b5f06001af" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" dependencies = [ + "find-msvc-tools", "jobserver", "libc", "shlex", @@ -2421,17 +2591,16 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "serde", "wasm-bindgen", - "windows-link 0.1.0", + "windows-link 0.2.0", ] [[package]] @@ -2550,8 +2719,8 @@ checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" dependencies = [ "heck 0.5.0", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -2596,7 +2765,7 @@ dependencies = [ "apache-avro 0.20.0", "arrow", "async-trait", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "csv-core", "derivative", @@ -2679,7 +2848,7 @@ version = "4.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "futures-core", "memchr", "pin-project-lite", @@ -2687,6 +2856,19 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "comfy-table" +version = "7.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +dependencies = [ + "crossterm 0.27.0", + "crossterm 0.28.1", + "strum 0.26.3", + "strum_macros 0.26.4", + "unicode-width 0.2.0", +] + [[package]] name = "community-id" version = "0.2.3" @@ -2715,26 +2897,6 @@ dependencies = [ "static_assertions", ] -[[package]] -name = "compression-codecs" -version = "0.4.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "485abf41ac0c8047c07c87c72c8fb3eb5197f6e9d7ded615dfd1a00ae00a0f64" -dependencies = [ - "brotli", - "compression-core", - "flate2", - "memchr", - "zstd 0.13.2", - "zstd-safe 7.2.1", -] - -[[package]] -name = "compression-core" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e47641d3deaf41fb1538ac1f54735925e275eaf3bf4d55c81b137fba797e5cbb" - [[package]] name = "concurrent-queue" version = "2.5.0" @@ -2834,6 +2996,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "convert_case" version = "0.4.0" @@ -2858,6 +3026,15 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "convert_case" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db05ffb6856bf0ecdf6367558a76a0e8a77b1713044eb92845c692100ed50190" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "cookie" version = "0.18.1" @@ -2877,9 +3054,9 @@ checksum = "396de984970346b0d9e93d1415082923c679e5ae5c3ee3dcbd104f5610af126b" [[package]] name = "cookie_store" -version = "0.21.1" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eac901828f88a5241ee0600950ab981148a18f2f756900ffba1b125ca6a3ef9" +checksum = "3fc4bff745c9b4c7fb1e97b25d13153da2bc7796260141df62378998d070207f" dependencies = [ "cookie", "document-features", @@ -3068,6 +3245,19 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags 2.9.0", + "crossterm_winapi", + "libc", + "parking_lot 0.12.4", + "winapi", +] + [[package]] name = "crossterm" version = "0.28.1" @@ -3177,17 +3367,33 @@ dependencies = [ ] [[package]] -name = "ctr" -version = "0.9.2" +name = "ctor" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e" dependencies = [ - "cipher", + "ctor-proc-macro", + "dtor", ] [[package]] -name = "curl-sys" -version = "0.4.84+curl-8.17.0" +name = "ctor-proc-macro" +version = "0.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" + +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + +[[package]] +name = "curl-sys" +version = "0.4.84+curl-8.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abc4294dc41b882eaff37973c2ec3ae203d0091341ee68fbadd1d06e0c18a73b" dependencies = [ @@ -3223,8 +3429,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -3256,9 +3462,9 @@ dependencies = [ "fnv", "ident_case", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "strsim", - "syn 2.0.106", + "syn 2.0.113", ] [[package]] @@ -3270,115 +3476,966 @@ dependencies = [ "fnv", "ident_case", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "strsim", - "syn 2.0.106", + "syn 2.0.113", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core 0.20.11", + "quote 1.0.42", + "syn 2.0.113", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core 0.21.3", + "quote 1.0.42", + "syn 2.0.113", +] + +[[package]] +name = "dary_heap" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04" + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core 0.9.11", +] + +[[package]] +name = "data-encoding" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" + +[[package]] +name = "data-url" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c297a1c74b71ae29df00c3e22dd9534821d60eb9af5a0192823fa2acea70c2a" + +[[package]] +name = "databend-client" +version = "0.28.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d689ffeaa08b1e4be3f035fcdadd4ea69db3dbf529ec5668c6911b8a301fc06" +dependencies = [ + "cookie", + "log", + "once_cell", + "parking_lot 0.12.4", + "percent-encoding", + "reqwest 0.12.26", + "semver", + "serde", + "serde_json", + "tokio", + "tokio-retry", + "tokio-stream", + "tokio-util", + "url", + "uuid", +] + +[[package]] +name = "datafusion" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" +dependencies = [ + "arrow", + "arrow-schema", + "async-trait", + "bytes 1.11.0", + "bzip2 0.6.1", + "chrono", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-datasource-arrow", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-adapter", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-session", + "datafusion-sql", + "flate2", + "futures 0.3.31", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot 0.12.4", + "parquet", + "rand 0.9.2", + "regex", + "rstest", + "sqlparser", + "tempfile", + "tokio", + "url", + "uuid", + "xz2", + "zstd 0.13.2", +] + +[[package]] +name = "datafusion-catalog" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-session", + "futures 0.3.31", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot 0.12.4", + "tokio", +] + +[[package]] +name = "datafusion-catalog-listing" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-adapter", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "futures 0.3.31", + "itertools 0.14.0", + "log", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-common" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-ipc", + "chrono", + "half", + "hashbrown 0.14.5", + "indexmap 2.12.1", + "libc", + "log", + "object_store", + "parquet", + "paste", + "recursive", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" +dependencies = [ + "futures 0.3.31", + "log", + "tokio", +] + +[[package]] +name = "datafusion-datasource" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" +dependencies = [ + "arrow", + "async-compression", + "async-trait", + "bytes 1.11.0", + "bzip2 0.6.1", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-physical-expr-adapter", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "flate2", + "futures 0.3.31", + "glob", + "itertools 0.14.0", + "log", + "object_store", + "rand 0.9.2", + "tokio", + "tokio-util", + "url", + "xz2", + "zstd 0.13.2", +] + +[[package]] +name = "datafusion-datasource-arrow" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" +dependencies = [ + "arrow", + "arrow-ipc", + "async-trait", + "bytes 1.11.0", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures 0.3.31", + "itertools 0.14.0", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-datasource-csv" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" +dependencies = [ + "arrow", + "async-trait", + "bytes 1.11.0", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures 0.3.31", + "object_store", + "regex", + "tokio", +] + +[[package]] +name = "datafusion-datasource-json" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" +dependencies = [ + "arrow", + "async-trait", + "bytes 1.11.0", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-session", + "futures 0.3.31", + "object_store", + "tokio", +] + +[[package]] +name = "datafusion-datasource-parquet" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" +dependencies = [ + "arrow", + "async-trait", + "bytes 1.11.0", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-adapter", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-pruning", + "datafusion-session", + "futures 0.3.31", + "itertools 0.14.0", + "log", + "object_store", + "parking_lot 0.12.4", + "parquet", + "tokio", +] + +[[package]] +name = "datafusion-doc" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" + +[[package]] +name = "datafusion-execution" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures 0.3.31", + "log", + "object_store", + "parking_lot 0.12.4", + "rand 0.9.2", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap 2.12.1", + "itertools 0.14.0", + "paste", + "recursive", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr-common" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" +dependencies = [ + "arrow", + "datafusion-common", + "indexmap 2.12.1", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" +dependencies = [ + "arrow", + "arrow-buffer", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", + "hex", + "itertools 0.14.0", + "log", + "md-5", + "num-traits", + "rand 0.9.2", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-functions-nested" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" +dependencies = [ + "arrow", + "arrow-ord", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr-common", + "itertools 0.14.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-table" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot 0.12.4", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" +dependencies = [ + "datafusion-doc", + "quote 1.0.42", + "syn 2.0.113", +] + +[[package]] +name = "datafusion-optimizer" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "indexmap 2.12.1", + "itertools 0.14.0", + "log", + "recursive", + "regex", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap 2.12.1", + "itertools 0.14.0", + "parking_lot 0.12.4", + "paste", + "petgraph 0.8.3", +] + +[[package]] +name = "datafusion-physical-expr-adapter" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-functions", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" +dependencies = [ + "ahash 0.8.11", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-pruning", + "itertools 0.14.0", + "recursive", +] + +[[package]] +name = "datafusion-physical-plan" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" +dependencies = [ + "ahash 0.8.11", + "arrow", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures 0.3.31", + "half", + "hashbrown 0.14.5", + "indexmap 2.12.1", + "itertools 0.14.0", + "log", + "parking_lot 0.12.4", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-proto" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d368093a98a17d1449b1083ac22ed16b7128e4c67789991869480d8c4a40ecb9" +dependencies = [ + "arrow", + "chrono", + "datafusion-catalog", + "datafusion-catalog-listing", + "datafusion-common", + "datafusion-datasource", + "datafusion-datasource-arrow", + "datafusion-datasource-csv", + "datafusion-datasource-json", + "datafusion-datasource-parquet", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-table", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-proto-common", + "object_store", + "prost 0.14.1", +] + +[[package]] +name = "datafusion-proto-common" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b6aef3d5e5c1d2bc3114c4876730cb76a9bdc5a8df31ef1b6db48f0c1671895" +dependencies = [ + "arrow", + "datafusion-common", + "prost 0.14.1", +] + +[[package]] +name = "datafusion-pruning" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-datasource", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "itertools 0.14.0", + "log", +] + +[[package]] +name = "datafusion-session" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" +dependencies = [ + "async-trait", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot 0.12.4", +] + +[[package]] +name = "datafusion-sql" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" +dependencies = [ + "arrow", + "bigdecimal", + "chrono", + "datafusion-common", + "datafusion-expr", + "indexmap 2.12.1", + "log", + "recursive", + "regex", + "sqlparser", +] + +[[package]] +name = "dbl" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd2735a791158376708f9347fe8faba9667589d82427ef3aed6794a8981de3d9" +dependencies = [ + "generic-array", +] + +[[package]] +name = "deadpool" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ed5957ff93768adf7a65ab167a17835c3d2c3c50d084fe305174c112f468e2f" +dependencies = [ + "deadpool-runtime", + "num_cpus", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63dfa964fe2a66f3fde91fc70b267fe193d822c7e603e2a675a49a7f46ad3f49" +dependencies = [ + "tokio", +] + +[[package]] +name = "delta_kernel" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1eb81d155d4f2423b931c7bf7e58a3124b23ee9a074a4771e1751b72af7fdc5" +dependencies = [ + "arrow", + "bytes 1.11.0", + "chrono", + "comfy-table", + "crc", + "delta_kernel_derive", + "futures 0.3.31", + "indexmap 2.12.1", + "itertools 0.14.0", + "object_store", + "parquet", + "reqwest 0.12.26", + "roaring", + "rustc_version", + "serde", + "serde_json", + "strum 0.27.2", + "thiserror 2.0.17", + "tokio", + "tracing 0.1.41", + "url", + "uuid", + "z85", ] [[package]] -name = "darling_macro" -version = "0.20.11" +name = "delta_kernel_derive" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +checksum = "c9e6474dabfc8e0b849ee2d68f8f13025230d1945b28c69695e9a21b9219ac8e" dependencies = [ - "darling_core 0.20.11", - "quote 1.0.40", - "syn 2.0.106", + "proc-macro2 1.0.101", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] -name = "darling_macro" -version = "0.21.3" +name = "deltalake" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +checksum = "822f8f58cd5a5a436925814138580012fb4fe1e2b522e079c5e1721b243f873d" dependencies = [ - "darling_core 0.21.3", - "quote 1.0.40", - "syn 2.0.106", + "ctor", + "delta_kernel", + "deltalake-aws", + "deltalake-core", + "deltalake-gcp", ] [[package]] -name = "dary_heap" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04" - -[[package]] -name = "dashmap" -version = "6.1.0" +name = "deltalake-aws" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +checksum = "63b470ec0212b5a704424db8a7f44ae90d8c2b4fc96246860dea2b90f80fe1ee" dependencies = [ - "cfg-if", - "crossbeam-utils", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core 0.9.11", + "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-dynamodb", + "aws-sdk-sts", + "aws-smithy-runtime-api", + "backon", + "bytes 1.11.0", + "chrono", + "deltalake-core", + "futures 0.3.31", + "object_store", + "regex", + "thiserror 2.0.17", + "tokio", + "tracing 0.1.41", + "typed-builder 0.23.2", + "url", + "uuid", ] [[package]] -name = "data-encoding" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" - -[[package]] -name = "data-url" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c297a1c74b71ae29df00c3e22dd9534821d60eb9af5a0192823fa2acea70c2a" - -[[package]] -name = "databend-client" -version = "0.28.2" +name = "deltalake-core" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d689ffeaa08b1e4be3f035fcdadd4ea69db3dbf529ec5668c6911b8a301fc06" +checksum = "0fd42250f1dc45510e9745f5f747201ed9de72c13911ca5c11dd2cc27fe207e3" dependencies = [ - "cookie", - "log", - "once_cell", + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "async-trait", + "bytes 1.11.0", + "cfg-if", + "chrono", + "dashmap", + "datafusion", + "datafusion-proto", + "delta_kernel", + "deltalake-derive", + "dirs", + "either", + "futures 0.3.31", + "humantime", + "indexmap 2.12.1", + "itertools 0.14.0", + "num_cpus", + "object_store", "parking_lot 0.12.4", + "parquet", "percent-encoding", - "reqwest 0.12.9", - "semver", + "percent-encoding-rfc3986", + "pin-project-lite", + "rand 0.8.5", + "regex", "serde", "serde_json", + "sqlparser", + "strum 0.27.2", + "thiserror 2.0.17", "tokio", - "tokio-retry", - "tokio-stream", - "tokio-util", + "tracing 0.1.41", "url", "uuid", + "validator", ] [[package]] -name = "dbl" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd2735a791158376708f9347fe8faba9667589d82427ef3aed6794a8981de3d9" -dependencies = [ - "generic-array", -] - -[[package]] -name = "deadpool" -version = "0.12.2" +name = "deltalake-derive" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ed5957ff93768adf7a65ab167a17835c3d2c3c50d084fe305174c112f468e2f" +checksum = "3963d9fe965af7b1dea433271389e1e39c6a97ffdbc2e81d808f5b329e4577b3" dependencies = [ - "deadpool-runtime", - "num_cpus", - "tokio", + "convert_case 0.9.0", + "itertools 0.14.0", + "proc-macro2 1.0.101", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] -name = "deadpool-runtime" -version = "0.1.3" +name = "deltalake-gcp" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63dfa964fe2a66f3fde91fc70b267fe193d822c7e603e2a675a49a7f46ad3f49" +checksum = "6fc0da3f4db3e508d180650b0f802d63d494aafc2cec0f5031e85ef4f93dd78e" dependencies = [ + "async-trait", + "bytes 1.11.0", + "deltalake-core", + "futures 0.3.31", + "object_store", + "thiserror 2.0.17", "tokio", + "tracing 0.1.41", + "url", ] [[package]] @@ -3409,7 +4466,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "syn 1.0.109", ] @@ -3420,8 +4477,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d65d7ce8132b7c0e54497a4d9a55a1c2a0912a0d786cf894472ba818fba45762" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -3431,8 +4488,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef941ded77d15ca19b40374869ac6000af1c9f2a4c0f3d4c70926287e6364a8f" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -3442,8 +4499,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -3463,8 +4520,8 @@ checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" dependencies = [ "darling 0.20.11", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -3474,7 +4531,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core", - "syn 2.0.106", + "syn 2.0.113", ] [[package]] @@ -3485,7 +4542,7 @@ checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" dependencies = [ "convert_case 0.4.0", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "rustc_version", "syn 1.0.109", ] @@ -3517,6 +4574,15 @@ dependencies = [ "dirs-sys", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -3557,8 +4623,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -3589,7 +4655,7 @@ version = "0.1.0" dependencies = [ "anyhow", "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "chrono-tz", "dnsmsg-parser", @@ -3638,7 +4704,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a11dd7f04a6a6d2aea0153c6e31f5ea7af8b2efdf52cdaeea7a9a592c7fefef9" dependencies = [ "bumpalo", - "bytes 1.10.1", + "bytes 1.11.0", "domain-macros", "futures-util", "hashbrown 0.14.5", @@ -3660,8 +4726,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e197fdfd2cdb5fdeb7f8ddcf3aed5d5d04ecde2890d448b14ffb716f7376b70" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -3670,6 +4736,21 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "dtor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301" +dependencies = [ + "dtor-proc-macro", +] + +[[package]] +name = "dtor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" + [[package]] name = "duct" version = "0.13.6" @@ -3682,6 +4763,12 @@ dependencies = [ "shared_child", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "dyn-clone" version = "1.0.20" @@ -3820,8 +4907,8 @@ checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" dependencies = [ "heck 0.4.1", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -3832,8 +4919,8 @@ checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" dependencies = [ "once_cell", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -3852,8 +4939,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67c78a4d8fdf9953a5c9d458f9efe940fd97a0cab0941c075a813ac594733827" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -4002,7 +5089,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "332b1937705b7ed2fce76837024e9ae6f41cd2ad18a32c052de081f89982561b" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "syn 1.0.109", ] @@ -4056,7 +5143,7 @@ checksum = "d6215aee357f8c7c989ebb4b8466ca4d7dc93b3957039f2fc3ea2ade8ea5f279" dependencies = [ "bit-set", "derivative", - "regex-automata 0.4.8", + "regex-automata 0.4.13", "regex-syntax", ] @@ -4067,7 +5154,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf04c5ec15464ace8355a7b440a33aece288993475556d461154d7a62ad9947c" dependencies = [ "bit-set", - "regex-automata 0.4.8", + "regex-automata 0.4.13", "regex-syntax", ] @@ -4107,13 +5194,13 @@ name = "file-source" version = "0.1.0" dependencies = [ "async-compression", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "file-source-common", "futures 0.3.31", "futures-util", "glob", - "indexmap 2.12.0", + "indexmap 2.12.1", "libc", "quickcheck", "tempfile", @@ -4129,7 +5216,7 @@ version = "0.1.0" dependencies = [ "async-compression", "bstr 1.12.0", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "crc", "dashmap", @@ -4147,6 +5234,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + [[package]] name = "finl_unicode" version = "1.2.0" @@ -4159,6 +5252,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flatbuffers" version = "25.9.23" @@ -4171,9 +5270,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.2" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" dependencies = [ "crc32fast", "libz-rs-sys", @@ -4261,9 +5360,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" dependencies = [ "percent-encoding", ] @@ -4278,6 +5377,12 @@ dependencies = [ "num", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "fsevent-sys" version = "4.1.0" @@ -4400,8 +5505,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -4491,12 +5596,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "gimli" -version = "0.31.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" - [[package]] name = "git2" version = "0.20.2" @@ -4552,7 +5651,7 @@ dependencies = [ "arc-swap", "futures 0.3.31", "log", - "reqwest 0.12.9", + "reqwest 0.12.26", "serde", "serde_derive", "serde_json", @@ -4625,7 +5724,7 @@ dependencies = [ "heck 0.4.1", "lazy_static", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "serde", "serde_json", "syn 1.0.109", @@ -4705,13 +5804,13 @@ version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "fnv", "futures-core", "futures-sink", "futures-util", "http 0.2.9", - "indexmap 2.12.0", + "indexmap 2.12.1", "slab", "tokio", "tokio-util", @@ -4725,12 +5824,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" dependencies = [ "atomic-waker", - "bytes 1.10.1", + "bytes 1.11.0", "fnv", "futures-core", "futures-sink", "http 1.3.1", - "indexmap 2.12.0", + "indexmap 2.12.1", "slab", "tokio", "tokio-util", @@ -4739,13 +5838,14 @@ dependencies = [ [[package]] name = "half" -version = "2.4.1" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "cfg-if", "crunchy", "num-traits", + "zerocopy 0.8.31", ] [[package]] @@ -4792,9 +5892,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" dependencies = [ "allocator-api2", "equivalent", @@ -4831,7 +5931,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06683b93020a07e3dbcf5f8c0f6d40080d725bea7936fc01ad345c01b97dc270" dependencies = [ "base64 0.21.7", - "bytes 1.10.1", + "bytes 1.11.0", "headers-core", "http 0.2.9", "httpdate", @@ -5129,7 +6229,7 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "fnv", "itoa", ] @@ -5140,7 +6240,7 @@ version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "fnv", "itoa", ] @@ -5151,7 +6251,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "http 0.2.9", "pin-project-lite", ] @@ -5162,7 +6262,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "http 1.3.1", ] @@ -5172,7 +6272,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "futures-util", "http 1.3.1", "http-body 1.0.0", @@ -5239,7 +6339,7 @@ version = "0.14.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "futures-channel", "futures-core", "futures-util", @@ -5264,7 +6364,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" dependencies = [ "atomic-waker", - "bytes 1.10.1", + "bytes 1.11.0", "futures-channel", "futures-core", "h2 0.4.12", @@ -5338,7 +6438,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca815a891b24fdfb243fa3239c86154392b0953ee584aa1a2a1f66d20cbe75cc" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "futures 0.3.31", "headers", "http 0.2.9", @@ -5360,7 +6460,6 @@ dependencies = [ "hyper 0.14.32", "log", "rustls 0.21.12", - "rustls-native-certs 0.6.3", "tokio", "tokio-rustls 0.24.1", ] @@ -5375,7 +6474,7 @@ dependencies = [ "http 1.3.1", "hyper 1.7.0", "hyper-util", - "rustls 0.23.23", + "rustls 0.23.35", "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", @@ -5415,7 +6514,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "hyper 0.14.32", "native-tls", "tokio", @@ -5428,7 +6527,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "http-body-util", "hyper 1.7.0", "hyper-util", @@ -5440,18 +6539,23 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.9" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" dependencies = [ - "bytes 1.10.1", + "base64 0.22.1", + "bytes 1.11.0", "futures-channel", + "futures-core", "futures-util", "http 1.3.1", "http-body 1.0.0", "hyper 1.7.0", + "ipnet", + "libc", + "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.0", "tokio", "tower-service", "tracing 0.1.41", @@ -5609,8 +6713,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -5621,9 +6725,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", "smallvec", @@ -5653,12 +6757,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "serde", "serde_core", ] @@ -5695,7 +6799,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22fa7ee6be451ea0b1912b962c91c8380835e97cf1584a77e18264e908448dcb" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "log", "nom 7.1.3", "smallvec", @@ -5738,8 +6842,8 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b23a0c8dfe501baac4adf6ebbfa6eddf8f0c07f56b058cc1288017e32397846c" dependencies = [ - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -5754,6 +6858,12 @@ dependencies = [ "web-sys", ] +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + [[package]] name = "inventory" version = "0.3.21" @@ -5774,17 +6884,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "io-uring" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" -dependencies = [ - "bitflags 2.9.0", - "cfg-if", - "libc", -] - [[package]] name = "iovec" version = "0.1.4" @@ -5830,6 +6929,16 @@ version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf370abdafd54d13e54a620e8c3e1145f28e46cc9d704bc6d94414559df41763" +[[package]] +name = "iri-string" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "is-terminal" version = "0.4.9" @@ -6028,7 +7137,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d9455388f4977de4d0934efa9f7d36296295537d774574113a20f6082de03da" dependencies = [ "base64 0.13.1", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "serde", "serde-value", @@ -6116,7 +7225,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d81336eb3a5b10a40c97a5a97ad66622e92bad942ce05ee789edd730aa4f8603" dependencies = [ "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "either", "futures 0.3.31", @@ -6200,7 +7309,7 @@ dependencies = [ "ena", "itertools 0.13.0", "lalrpop-util", - "petgraph", + "petgraph 0.6.4", "regex", "regex-syntax", "sha3", @@ -6216,7 +7325,7 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "feee752d43abd0f4807a921958ab4131f692a44d4d599733d4419c5d586176ce" dependencies = [ - "regex-automata 0.4.8", + "regex-automata 0.4.13", "rustversion", ] @@ -6308,11 +7417,17 @@ dependencies = [ "lexical-util", ] +[[package]] +name = "libbz2-rs-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" + [[package]] name = "libc" -version = "0.2.175" +version = "0.2.179" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +checksum = "c5a2d376baa530d1238d133232d15e239abad80d05838b4b59354e5268af431f" [[package]] name = "libflate" @@ -6334,7 +7449,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a599cb10a9cd92b1300debcef28da8f70b935ec937f44fcd1b70a7c986a11c5c" dependencies = [ "core2", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "rle-decode-fast", ] @@ -6582,6 +7697,26 @@ dependencies = [ "twox-hash", ] +[[package]] +name = "lz4_flex" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "macaddr" version = "1.0.1" @@ -6605,8 +7740,8 @@ checksum = "cc33f9f0351468d26fbc53d9ce00a096c8522ecb42f19b50f34f2c422f76d21d" dependencies = [ "macro_magic_core", "macro_magic_macros", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -6619,8 +7754,8 @@ dependencies = [ "derive-syn-parse", "macro_magic_core_macros", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -6630,8 +7765,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b02abfe41815b5bd98dbd4260173db2c116dda171dc0fe7838cb206333b83308" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -6641,8 +7776,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73ea28ee64b88876bf45277ed9a5817c1817df061a74f2b988971a12570e5869" dependencies = [ "macro_magic_core", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -6666,7 +7801,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" dependencies = [ - "regex-automata 0.4.8", + "regex-automata 0.4.13", ] [[package]] @@ -6767,7 +7902,7 @@ version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1ada651cd6bdffe01e5f35067df53491f1fe853d2b154008ca2bd30b3d3fcf6" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "itoa", "lockfree-object-pool", "metrics", @@ -6788,7 +7923,7 @@ dependencies = [ "crossbeam-epoch", "crossbeam-utils", "hashbrown 0.15.2", - "indexmap 2.12.0", + "indexmap 2.12.1", "metrics", "ordered-float 4.6.0", "quanta", @@ -6825,6 +7960,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -6879,9 +8015,9 @@ dependencies = [ "once_cell", "proc-macro-error2", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "regex", - "syn 2.0.106", + "syn 2.0.113", ] [[package]] @@ -6962,7 +8098,7 @@ dependencies = [ "percent-encoding", "rand 0.8.5", "rustc_version_runtime", - "rustls 0.23.23", + "rustls 0.23.35", "rustversion", "serde", "serde_bytes", @@ -6990,8 +8126,8 @@ checksum = "63981427a0f26b89632fd2574280e069d09fb2912a3138da15de0174d11dd077" dependencies = [ "macro_magic", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -7000,7 +8136,7 @@ version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a15d522be0a9c3e46fd2632e272d178f56387bdb5c9fbb3a36c649062e9b5219" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "encoding_rs", "futures-util", "http 1.3.1", @@ -7056,7 +8192,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17ebbe97acce52d06aebed4cd4a87c0941f4b2519b59b82b4feb5bd0ce003dfd" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.13.0", "ndarray", "noisy_float", @@ -7115,7 +8251,7 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16c903aa70590cb93691bf97a767c8d1d6122d2cc9070433deb3bbf36ce8bd23" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "futures 0.3.31", "libc", "log", @@ -7366,9 +8502,9 @@ checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" [[package]] name = "num-complex" -version = "0.4.4" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" dependencies = [ "num-traits", ] @@ -7478,8 +8614,8 @@ checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" dependencies = [ "proc-macro-crate 1.3.1", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -7490,8 +8626,8 @@ checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" dependencies = [ "proc-macro-crate 3.2.0", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -7553,20 +8689,58 @@ dependencies = [ [[package]] name = "object" -version = "0.36.7" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] +[[package]] +name = "object_store" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes 1.11.0", + "chrono", + "form_urlencoded", + "futures 0.3.31", + "http 1.3.1", + "http-body-util", + "httparse", + "humantime", + "hyper 1.7.0", + "itertools 0.14.0", + "md-5", + "parking_lot 0.12.4", + "percent-encoding", + "quick-xml 0.38.4", + "rand 0.9.2", + "reqwest 0.12.26", + "ring", + "rustls-pemfile 2.1.0", + "serde", + "serde_json", + "serde_urlencoded", + "thiserror 2.0.17", + "tokio", + "tracing 0.1.41", + "url", + "walkdir", + "wasm-bindgen-futures", + "web-time", +] + [[package]] name = "octseq" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "126c3ca37c9c44cec575247f43a3e4374d8927684f129d2beeb0d2cef262fe12" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "serde", "smallvec", ] @@ -7633,7 +8807,7 @@ dependencies = [ "anyhow", "backon", "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "futures 0.3.31", "getrandom 0.2.15", @@ -7643,7 +8817,7 @@ dependencies = [ "md-5", "percent-encoding", "quick-xml 0.37.4", - "reqwest 0.12.9", + "reqwest 0.12.26", "serde", "serde_json", "tokio", @@ -7704,8 +8878,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -7740,7 +8914,7 @@ dependencies = [ name = "opentelemetry-proto" version = "0.1.0" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "glob", "hex", @@ -7891,6 +9065,43 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "parquet" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64 0.22.1", + "brotli 8.0.0", + "bytes 1.11.0", + "chrono", + "flate2", + "futures 0.3.31", + "half", + "hashbrown 0.16.1", + "lz4_flex 0.12.0", + "num-bigint", + "num-integer", + "num-traits", + "object_store", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd 0.13.2", +] + [[package]] name = "parse-size" version = "1.1.0" @@ -7949,6 +9160,12 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "percent-encoding-rfc3986" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3637c05577168127568a64e9dc5a6887da720efef07b3d9472d45f63ab191166" + [[package]] name = "pest" version = "2.7.11" @@ -7979,8 +9196,8 @@ dependencies = [ "pest", "pest_meta", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -8000,8 +9217,20 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ - "fixedbitset", - "indexmap 2.12.0", + "fixedbitset 0.4.2", + "indexmap 2.12.1", +] + +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset 0.5.7", + "hashbrown 0.15.2", + "indexmap 2.12.1", + "serde", ] [[package]] @@ -8065,8 +9294,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -8242,7 +9471,7 @@ checksum = "76ff0abab4a9b844b93ef7b81f1efc0a366062aaef2cd702c76256b5dc075c54" dependencies = [ "base64 0.22.1", "byteorder", - "bytes 1.10.1", + "bytes 1.11.0", "fallible-iterator", "hmac", "md-5", @@ -8258,7 +9487,7 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613283563cd90e1dfc3518d548caee47e0e725455ed619881f5cf21f36de4b48" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "fallible-iterator", "postgres-protocol", @@ -8337,7 +9566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2 1.0.101", - "syn 2.0.106", + "syn 2.0.113", ] [[package]] @@ -8388,7 +9617,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", ] [[package]] @@ -8399,8 +9628,8 @@ checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" dependencies = [ "proc-macro-error-attr2", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -8437,7 +9666,7 @@ dependencies = [ name = "prometheus-parser" version = "0.1.0" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "nom 8.0.0", "prost 0.12.6", "prost-build 0.12.6", @@ -8473,8 +9702,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "095a99f75c69734802359b682be8daaf8980296731f6470434ea2c652af1dd30" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -8483,7 +9712,7 @@ version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "prost-derive 0.11.9", ] @@ -8493,7 +9722,7 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "prost-derive 0.12.6", ] @@ -8503,23 +9732,33 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "prost-derive 0.13.5", ] +[[package]] +name = "prost" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +dependencies = [ + "bytes 1.11.0", + "prost-derive 0.14.1", +] + [[package]] name = "prost-build" version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "heck 0.4.1", "itertools 0.10.5", "lazy_static", "log", "multimap", - "petgraph", + "petgraph 0.6.4", "prettyplease 0.1.25", "prost 0.11.9", "prost-types 0.11.9", @@ -8535,18 +9774,18 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "heck 0.5.0", "itertools 0.12.1", "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.6.4", "prettyplease 0.2.15", "prost 0.12.6", "prost-types 0.12.6", "regex", - "syn 2.0.106", + "syn 2.0.113", "tempfile", ] @@ -8561,12 +9800,12 @@ dependencies = [ "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.6.4", "prettyplease 0.2.15", "prost 0.13.5", "prost-types 0.13.5", "regex", - "syn 2.0.106", + "syn 2.0.113", "tempfile", ] @@ -8579,7 +9818,7 @@ dependencies = [ "anyhow", "itertools 0.10.5", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "syn 1.0.109", ] @@ -8592,8 +9831,8 @@ dependencies = [ "anyhow", "itertools 0.12.1", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -8605,8 +9844,21 @@ dependencies = [ "anyhow", "itertools 0.14.0", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", +] + +[[package]] +name = "prost-derive" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2 1.0.101", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -8665,6 +9917,16 @@ version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" +[[package]] +name = "psm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +dependencies = [ + "ar_archive_writer", + "cc", +] + [[package]] name = "ptr_meta" version = "0.1.4" @@ -8681,7 +9943,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "syn 1.0.109", ] @@ -8703,7 +9965,7 @@ checksum = "6cee616af00383c461f9ceb0067d15dee68e7d313ae47dbd7f8543236aed7ee9" dependencies = [ "async-channel 2.3.1", "async-trait", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "crc", "data-url", @@ -8766,7 +10028,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ed1a693391a16317257103ad06a88c6529ac640846021da7c435a06fffdacd7" dependencies = [ "chrono", - "indexmap 2.12.0", + "indexmap 2.12.1", "newtype-uuid", "quick-xml 0.37.4", "strip-ansi-escapes", @@ -8794,6 +10056,16 @@ dependencies = [ "serde", ] +[[package]] +name = "quick-xml" +version = "0.38.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quickcheck" version = "1.0.3" @@ -8812,8 +10084,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f71ee38b42f8459a88d3362be6f9b841ad2d5421844f61eb1c59c11bff3ac14a" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -8822,12 +10094,12 @@ version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "pin-project-lite", "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.23", + "rustls 0.23.35", "socket2 0.5.10", "thiserror 2.0.17", "tokio", @@ -8840,12 +10112,12 @@ version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "getrandom 0.2.15", "rand 0.8.5", "ring", "rustc-hash", - "rustls 0.23.23", + "rustls 0.23.35", "rustls-pki-types", "slab", "thiserror 2.0.17", @@ -8879,9 +10151,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.40" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2 1.0.101", ] @@ -9003,7 +10275,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff" dependencies = [ "getrandom 0.3.4", - "zerocopy 0.8.16", + "zerocopy 0.8.31", ] [[package]] @@ -9142,6 +10414,26 @@ dependencies = [ "futures-io", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote 1.0.42", + "syn 2.0.113", +] + [[package]] name = "redis" version = "0.32.5" @@ -9150,7 +10442,7 @@ checksum = "7cd3650deebc68526b304898b192fa4102a4ef0b9ada24da096559cb60e0eef8" dependencies = [ "arc-swap", "backon", - "bytes 1.10.1", + "bytes 1.11.0", "cfg-if", "combine 4.6.6", "futures-channel", @@ -9234,8 +10526,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -9247,7 +10539,7 @@ dependencies = [ "ahash 0.8.11", "fluent-uri 0.4.1", "getrandom 0.3.4", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "parking_lot 0.12.4", "percent-encoding", "serde_json", @@ -9255,13 +10547,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.2" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.8", + "regex-automata 0.4.13", "regex-syntax", ] @@ -9273,9 +10565,9 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -9289,7 +10581,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c11639076bf147be211b90e47790db89f4c22b6c8a9ca6e960833869da67166" dependencies = [ "aho-corasick", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.13.0", "nohash", "regex", @@ -9304,9 +10596,9 @@ checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "relative-path" @@ -9330,7 +10622,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78bf93c4af7a8bb7d879d51cebe797356ff10ae8516ace542b5182d9dcac10b2" dependencies = [ "base64 0.21.7", - "bytes 1.10.1", + "bytes 1.11.0", "encoding_rs", "futures-core", "futures-util", @@ -9369,13 +10661,12 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.9" +version = "0.12.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" +checksum = "3b4c14b2d9afca6a60277086b0cc6a6ae0b568f6f7916c943a8cdc79f8be240f" dependencies = [ - "async-compression", "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "cookie", "cookie_store", "futures-channel", @@ -9389,19 +10680,15 @@ dependencies = [ "hyper-rustls 0.27.5", "hyper-tls 0.6.0", "hyper-util", - "ipnet", "js-sys", "log", - "mime", "mime_guess", "native-tls", - "once_cell", "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.23", + "rustls 0.23.35", "rustls-native-certs 0.8.1", - "rustls-pemfile 2.1.0", "rustls-pki-types", "serde", "serde_json", @@ -9411,14 +10698,15 @@ dependencies = [ "tokio-native-tls", "tokio-rustls 0.26.2", "tokio-util", + "tower 0.5.2", + "tower-http 0.6.8", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 0.26.1", - "windows-registry", + "webpki-roots 1.0.4", ] [[package]] @@ -9430,7 +10718,7 @@ dependencies = [ "anyhow", "async-trait", "http 1.3.1", - "reqwest 0.12.9", + "reqwest 0.12.26", "serde", "thiserror 1.0.68", "tower-service", @@ -9449,7 +10737,7 @@ dependencies = [ "http 1.3.1", "hyper 1.7.0", "parking_lot 0.11.2", - "reqwest 0.12.9", + "reqwest 0.12.26", "reqwest-middleware", "retry-policies", "thiserror 1.0.68", @@ -9504,7 +10792,7 @@ checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" dependencies = [ "bitvec", "bytecheck", - "bytes 1.10.1", + "bytes 1.11.0", "hashbrown 0.12.3", "ptr_meta", "rend", @@ -9521,7 +10809,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "syn 1.0.109", ] @@ -9622,11 +10910,11 @@ dependencies = [ "glob", "proc-macro-crate 3.2.0", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "regex", "relative-path", "rustc_version", - "syn 2.0.106", + "syn 2.0.113", "unicode-ident", ] @@ -9636,7 +10924,7 @@ version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1568e15fab2d546f940ed3a21f48bbbd1c494c90c99c4481339364a497f94a9" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "flume 0.11.0", "futures-util", "log", @@ -9656,7 +10944,7 @@ checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282" dependencies = [ "arrayvec", "borsh", - "bytes 1.10.1", + "bytes 1.11.0", "num-traits", "rand 0.8.5", "rkyv", @@ -9664,12 +10952,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "rustc-demangle" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" - [[package]] name = "rustc-hash" version = "2.1.1" @@ -9763,31 +11045,20 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.23" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ + "aws-lc-rs", "log", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.102.8", + "rustls-webpki 0.103.8", "subtle", "zeroize", ] -[[package]] -name = "rustls-native-certs" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" -dependencies = [ - "openssl-probe", - "rustls-pemfile 1.0.4", - "schannel", - "security-framework 2.10.0", -] - [[package]] name = "rustls-native-certs" version = "0.7.0" @@ -9834,11 +11105,12 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" +checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" dependencies = [ "web-time", + "zeroize", ] [[package]] @@ -9862,6 +11134,18 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustls-webpki" +version = "0.103.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -10081,6 +11365,12 @@ dependencies = [ "serde", ] +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + [[package]] name = "serde" version = "1.0.228" @@ -10136,8 +11426,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -10147,8 +11437,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -10157,7 +11447,7 @@ version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "itoa", "memchr", "ryu", @@ -10211,8 +11501,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3081f5ffbb02284dda55132aa26daecedd7372a42417bbbab6f14ab7d6bb9145" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -10255,7 +11545,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.12.0", + "indexmap 2.12.1", "schemars 0.9.0", "schemars 1.0.3", "serde", @@ -10273,8 +11563,8 @@ checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" dependencies = [ "darling 0.20.11", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -10283,7 +11573,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "itoa", "ryu", "serde", @@ -10311,8 +11601,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -10435,6 +11725,12 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "simdutf8" version = "0.1.5" @@ -10565,7 +11861,7 @@ checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" dependencies = [ "heck 0.4.1", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "syn 1.0.109", ] @@ -10577,8 +11873,8 @@ checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ "heck 0.5.0", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -10651,6 +11947,28 @@ dependencies = [ "der", ] +[[package]] +name = "sqlparser" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" +dependencies = [ + "log", + "recursive", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2 1.0.101", + "quote 1.0.42", + "syn 2.0.113", +] + [[package]] name = "sqlx" version = "0.8.6" @@ -10671,7 +11989,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" dependencies = [ "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "crc", "crossbeam-queue", @@ -10683,7 +12001,7 @@ dependencies = [ "futures-util", "hashbrown 0.15.2", "hashlink", - "indexmap 2.12.0", + "indexmap 2.12.1", "log", "memchr", "once_cell", @@ -10706,10 +12024,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "sqlx-core", "sqlx-macros-core", - "syn 2.0.106", + "syn 2.0.113", ] [[package]] @@ -10724,7 +12042,7 @@ dependencies = [ "hex", "once_cell", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "serde", "serde_json", "sha2", @@ -10732,7 +12050,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.106", + "syn 2.0.113", "tokio", "url", ] @@ -10747,7 +12065,7 @@ dependencies = [ "base64 0.22.1", "bitflags 2.9.0", "byteorder", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "crc", "digest", @@ -10849,6 +12167,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -10931,6 +12262,9 @@ name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros 0.27.2", +] [[package]] name = "strum_macros" @@ -10940,9 +12274,9 @@ checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ "heck 0.4.1", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "rustversion", - "syn 2.0.106", + "syn 2.0.113", ] [[package]] @@ -10953,9 +12287,9 @@ checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" dependencies = [ "heck 0.5.0", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "rustversion", - "syn 2.0.106", + "syn 2.0.113", ] [[package]] @@ -10966,8 +12300,8 @@ checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" dependencies = [ "heck 0.5.0", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -11013,18 +12347,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "unicode-ident", ] [[package]] name = "syn" -version = "2.0.106" +version = "2.0.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "678faa00651c9eb72dd2020cbdf275d92eccb2400d568e419efdd64838145cb4" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "unicode-ident", ] @@ -11050,8 +12384,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -11253,8 +12587,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7c61ec9a6f64d2793d8a45faba21efbe3ced62a886d44c36a009b2b519b4c7e" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -11264,8 +12598,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -11277,6 +12611,17 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float 2.10.1", +] + [[package]] name = "tikv-jemalloc-sys" version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7" @@ -11377,23 +12722,20 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.47.1" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ - "backtrace", - "bytes 1.10.1", - "io-uring", + "bytes 1.11.0", "libc", "mio", "parking_lot 0.12.4", "pin-project-lite", "signal-hook-registry", - "slab", "socket2 0.6.0", "tokio-macros", "tracing 0.1.41", - "windows-sys 0.59.0", + "windows-sys 0.61.0", ] [[package]] @@ -11419,13 +12761,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -11457,7 +12799,7 @@ checksum = "6c95d533c83082bb6490e0189acaa0bbeef9084e60471b696ca6988cd0541fb0" dependencies = [ "async-trait", "byteorder", - "bytes 1.10.1", + "bytes 1.11.0", "fallible-iterator", "futures-channel", "futures-util", @@ -11513,7 +12855,7 @@ version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ - "rustls 0.23.23", + "rustls 0.23.35", "tokio", ] @@ -11536,7 +12878,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7" dependencies = [ "async-stream", - "bytes 1.10.1", + "bytes 1.11.0", "futures-core", "tokio", "tokio-stream", @@ -11569,10 +12911,11 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.13" -source = "git+https://github.com/vectordotdev/tokio?branch=tokio-util-0.7.13-framed-read-continue-on-error#b4bdfda8fe8aa24eba36de0d60063b14f30c7fe7" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "futures-core", "futures-io", "futures-sink", @@ -11588,7 +12931,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f591660438b3038dd04d16c938271c79e7e06260ad2ea2885a4861bfb238605d" dependencies = [ "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "futures-core", "futures-sink", "http 1.3.1", @@ -11620,7 +12963,7 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0dc8b1fb61449e27716ec0e1bdf0f6b8f3e8f6b05391e8497b8b6d7804ea6d8" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "serde_core", "serde_spanned 1.0.3", "toml_datetime 0.7.3", @@ -11653,7 +12996,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "toml_datetime 0.6.11", "winnow 0.5.18", ] @@ -11664,7 +13007,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "serde", "serde_spanned 0.6.9", "toml_datetime 0.6.11", @@ -11703,7 +13046,7 @@ dependencies = [ "async-trait", "axum 0.6.20", "base64 0.21.7", - "bytes 1.10.1", + "bytes 1.11.0", "flate2", "h2 0.3.26", "http 0.2.9", @@ -11736,7 +13079,7 @@ dependencies = [ "async-trait", "axum 0.7.5", "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "h2 0.4.12", "http 1.3.1", "http-body 1.0.0", @@ -11765,7 +13108,7 @@ dependencies = [ "prettyplease 0.1.25", "proc-macro2 1.0.101", "prost-build 0.11.9", - "quote 1.0.40", + "quote 1.0.42", "syn 1.0.109", ] @@ -11778,8 +13121,8 @@ dependencies = [ "prettyplease 0.2.15", "proc-macro2 1.0.101", "prost-build 0.12.6", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -11810,7 +13153,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", - "indexmap 2.12.0", + "indexmap 2.12.1", "pin-project-lite", "slab", "sync_wrapper 1.0.1", @@ -11829,7 +13172,7 @@ checksum = "61c5bb1d698276a2443e5ecfabc1008bf15a36c12e6a7176e7bf089ea9131140" dependencies = [ "async-compression", "bitflags 2.9.0", - "bytes 1.10.1", + "bytes 1.11.0", "futures-core", "futures-util", "http 0.2.9", @@ -11851,7 +13194,7 @@ checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" dependencies = [ "base64 0.21.7", "bitflags 2.9.0", - "bytes 1.10.1", + "bytes 1.11.0", "http 1.3.1", "http-body 1.0.0", "http-body-util", @@ -11862,6 +13205,29 @@ dependencies = [ "tracing 0.1.41", ] +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "async-compression", + "bitflags 2.9.0", + "bytes 1.11.0", + "futures-core", + "futures-util", + "http 1.3.1", + "http-body 1.0.0", + "http-body-util", + "iri-string", + "pin-project-lite", + "tokio", + "tokio-util", + "tower 0.5.2", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" version = "0.3.3" @@ -11917,8 +13283,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -12014,7 +13380,7 @@ dependencies = [ "matchers", "nu-ansi-term", "once_cell", - "regex-automata 0.4.8", + "regex-automata 0.4.13", "serde", "serde_json", "sharded-slab", @@ -12043,8 +13409,8 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" dependencies = [ - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -12087,7 +13453,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e3dac10fd62eaf6617d3a904ae222845979aec67c615d1c842b4002c7666fb9" dependencies = [ "byteorder", - "bytes 1.10.1", + "bytes 1.11.0", "data-encoding", "http 0.2.9", "httparse", @@ -12106,7 +13472,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ef1a641ea34f399a848dea702823bbecfb4c486f911735368f1f137cb8257e1" dependencies = [ "byteorder", - "bytes 1.10.1", + "bytes 1.11.0", "data-encoding", "http 1.3.1", "httparse", @@ -12142,6 +13508,15 @@ dependencies = [ "typed-builder-macro 0.20.1", ] +[[package]] +name = "typed-builder" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31aa81521b70f94402501d848ccc0ecaa8f93c8eb6999eb9747e72287757ffda" +dependencies = [ + "typed-builder-macro 0.23.2", +] + [[package]] name = "typed-builder-macro" version = "0.16.2" @@ -12149,8 +13524,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -12160,8 +13535,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", +] + +[[package]] +name = "typed-builder-macro" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076a02dc54dd46795c2e9c8282ed40bcfb1e22747e955de9389a1de28190fb26" +dependencies = [ + "proc-macro2 1.0.101", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -12190,13 +13576,13 @@ checksum = "5879ce67ba9e525fe088c882ede1337c32c3f80e83e72d9fd3cc6c8e05bcb3d7" dependencies = [ "async-trait", "base64 0.22.1", - "bytes 1.10.1", + "bytes 1.11.0", "dyn-clone", "futures 0.3.31", "getrandom 0.2.15", "pin-project", "rand 0.8.5", - "reqwest 0.12.9", + "reqwest 0.12.26", "serde", "serde_json", "time", @@ -12215,9 +13601,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6cbccdbe531c8d553812a609bdb70c0d1002ad91333498e18df42c98744b15cc" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "rustc_version", - "syn 2.0.106", + "syn 2.0.113", ] [[package]] @@ -12240,8 +13626,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35f5380909ffc31b4de4f4bdf96b877175a016aa2ca98cee39fcfd8c4d53d952" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -12393,9 +13779,9 @@ dependencies = [ [[package]] name = "url" -version = "2.5.4" +version = "2.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" dependencies = [ "form_urlencoded", "idna", @@ -12441,14 +13827,14 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ "getrandom 0.3.4", "js-sys", "rand 0.9.2", - "serde", + "serde_core", "wasm-bindgen", ] @@ -12462,6 +13848,36 @@ dependencies = [ "vsimd", ] +[[package]] +name = "validator" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0b4a29d8709210980a09379f27ee31549b73292c87ab9899beee1c0d3be6303" +dependencies = [ + "idna", + "once_cell", + "regex", + "serde", + "serde_derive", + "serde_json", + "url", + "validator_derive", +] + +[[package]] +name = "validator_derive" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bac855a2ce6f843beb229757e6e570a42e837bcb15e5f449dd48d5747d41bf77" +dependencies = [ + "darling 0.20.11", + "once_cell", + "proc-macro-error2", + "proc-macro2 1.0.101", + "quote 1.0.42", + "syn 2.0.113", +] + [[package]] name = "valuable" version = "0.1.0" @@ -12488,7 +13904,7 @@ dependencies = [ "git2", "glob", "hex", - "indexmap 2.12.0", + "indexmap 2.12.1", "indicatif", "indoc", "itertools 0.14.0", @@ -12555,7 +13971,7 @@ dependencies = [ "bloomy", "bollard", "byteorder", - "bytes 1.10.1", + "bytes 1.11.0", "bytesize", "cfg-if", "chrono", @@ -12567,6 +13983,7 @@ dependencies = [ "csv", "databend-client", "deadpool", + "deltalake", "derivative", "dirs-next", "dnsmsg-parser", @@ -12601,7 +14018,7 @@ dependencies = [ "hyper 0.14.32", "hyper-openssl 0.9.2", "hyper-proxy", - "indexmap 2.12.0", + "indexmap 2.12.1", "indoc", "inventory", "ipnet", @@ -12744,7 +14161,7 @@ dependencies = [ "async-stream", "async-trait", "bytecheck", - "bytes 1.10.1", + "bytes 1.11.0", "clap", "crc32fast", "criterion", @@ -12785,12 +14202,12 @@ name = "vector-common" version = "0.1.0" dependencies = [ "async-stream", - "bytes 1.10.1", + "bytes 1.11.0", "chrono", "crossbeam-utils", "derivative", "futures 0.3.31", - "indexmap 2.12.0", + "indexmap 2.12.1", "itertools 0.14.0", "metrics", "paste", @@ -12811,8 +14228,8 @@ name = "vector-common-macros" version = "0.1.0" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -12824,7 +14241,7 @@ dependencies = [ "chrono-tz", "encoding_rs", "http 0.2.9", - "indexmap 2.12.0", + "indexmap 2.12.1", "inventory", "no-proxy", "num-traits", @@ -12847,10 +14264,10 @@ dependencies = [ "convert_case 0.8.0", "darling 0.20.11", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "serde", "serde_json", - "syn 2.0.106", + "syn 2.0.113", "tracing 0.1.41", ] @@ -12860,10 +14277,10 @@ version = "0.1.0" dependencies = [ "darling 0.20.11", "proc-macro2 1.0.101", - "quote 1.0.40", + "quote 1.0.42", "serde", "serde_derive_internals", - "syn 2.0.106", + "syn 2.0.113", "vector-config", "vector-config-common", ] @@ -12875,7 +14292,7 @@ dependencies = [ "async-trait", "base64 0.22.1", "bitmask-enum", - "bytes 1.10.1", + "bytes 1.11.0", "cfg-if", "chrono", "chrono-tz", @@ -12891,7 +14308,7 @@ dependencies = [ "headers", "http 0.2.9", "hyper-proxy", - "indexmap 2.12.0", + "indexmap 2.12.1", "inventory", "ipnet", "metrics", @@ -13117,7 +14534,7 @@ dependencies = [ "base16", "base62", "base64-simd", - "bytes 1.10.1", + "bytes 1.11.0", "cbc", "cfb-mode", "cfg-if", @@ -13149,7 +14566,7 @@ dependencies = [ "hostname 0.4.0", "iana-time-zone", "idna", - "indexmap 2.12.0", + "indexmap 2.12.1", "indoc", "influxdb-line-protocol", "ipcrypt-rs", @@ -13157,7 +14574,7 @@ dependencies = [ "jsonschema", "lalrpop", "lalrpop-util", - "lz4_flex", + "lz4_flex 0.11.5", "md-5", "mlua", "nom 8.0.0", @@ -13183,7 +14600,7 @@ dependencies = [ "quoted_printable", "rand 0.8.5", "regex", - "reqwest 0.12.9", + "reqwest 0.12.26", "reqwest-middleware", "reqwest-retry", "roxmltree", @@ -13271,7 +14688,7 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4378d202ff965b011c64817db11d5829506d3404edeadb61f190d111da3f231c" dependencies = [ - "bytes 1.10.1", + "bytes 1.11.0", "futures-channel", "futures-util", "headers", @@ -13341,8 +14758,8 @@ dependencies = [ "bumpalo", "log", "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", "wasm-bindgen-shared", ] @@ -13364,7 +14781,7 @@ version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ - "quote 1.0.40", + "quote 1.0.42", "wasm-bindgen-macro-support", ] @@ -13375,8 +14792,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -13470,6 +14887,15 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "webpki-roots" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "which" version = "4.4.2" @@ -13599,8 +15025,8 @@ dependencies = [ "windows-implement", "windows-interface", "windows-link 0.1.0", - "windows-result 0.3.1", - "windows-strings 0.3.1", + "windows-result", + "windows-strings", ] [[package]] @@ -13620,8 +15046,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -13631,8 +15057,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -13657,26 +15083,6 @@ dependencies = [ "windows-link 0.1.0", ] -[[package]] -name = "windows-registry" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" -dependencies = [ - "windows-result 0.2.0", - "windows-strings 0.1.0", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-result" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" -dependencies = [ - "windows-targets 0.52.6", -] - [[package]] name = "windows-result" version = "0.3.1" @@ -13697,16 +15103,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "windows-strings" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" -dependencies = [ - "windows-result 0.2.0", - "windows-targets 0.52.6", -] - [[package]] name = "windows-strings" version = "0.3.1" @@ -14112,6 +15508,15 @@ version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yoke" version = "0.7.4" @@ -14131,11 +15536,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", "synstructure", ] +[[package]] +name = "z85" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b3a41ce106832b4da1c065baa4c31cf640cf965fa1483816402b7f6b96f0a64" + [[package]] name = "zerocopy" version = "0.7.31" @@ -14147,11 +15558,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.16" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b8c07a70861ce02bad1607b5753ecb2501f67847b9f9ada7c160fff0ec6300c" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" dependencies = [ - "zerocopy-derive 0.8.16", + "zerocopy-derive 0.8.31", ] [[package]] @@ -14161,19 +15572,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] name = "zerocopy-derive" -version = "0.8.16" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5226bc9a9a9836e7428936cde76bb6b22feea1a8bfdbc0d241136e4d13417e25" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -14192,8 +15603,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", "synstructure", ] @@ -14221,8 +15632,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2 1.0.101", - "quote 1.0.40", - "syn 2.0.106", + "quote 1.0.42", + "syn 2.0.113", ] [[package]] @@ -14278,3 +15689,8 @@ dependencies = [ "cc", "pkg-config", ] + +[[patch.unused]] +name = "tokio-util" +version = "0.7.13" +source = "git+https://github.com/vectordotdev/tokio?branch=tokio-util-0.7.13-framed-read-continue-on-error#b4bdfda8fe8aa24eba36de0d60063b14f30c7fe7" diff --git a/Cargo.toml b/Cargo.toml index 9cc3b90078953..f5b21eaf6f718 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -270,7 +270,7 @@ aws-types = { version = "1.3.8", default-features = false, optional = true } # The sts crate is needed despite not being referred to anywhere in the code because we need to set the # `behavior-version-latest` feature. Without this we get a runtime panic when `auth.assume_role` authentication # is configured. -aws-sdk-sts = { version = "1.73.0", default-features = false, features = ["behavior-version-latest", "rt-tokio"], optional = true } +aws-sdk-sts = { version = "1.95.0", default-features = false, features = ["behavior-version-latest", "rt-tokio"], optional = true } # The `aws-sdk-sts` crate is needed despite not being referred to anywhere in the code because we need to set the # `behavior-version-latest` feature. Without this we get a runtime panic when `auth.assume_role` authentication is configured. @@ -297,6 +297,9 @@ azure_core_for_storage = { package = "azure_core", version = "0.21.0", default-f # OpenDAL opendal = { version = "0.54", default-features = false, features = ["services-webhdfs"], optional = true } +# Delta Lake +deltalake = { version = "0.30.0", default-features = false, features = ["datafusion", "gcs", "s3", "rustls"], optional = true } + # Tower tower = { version = "0.5.2", default-features = false, features = ["buffer", "limit", "retry", "timeout", "util", "balance", "discover"] } tower-http = { version = "0.4.4", default-features = false, features = ["compression-full", "decompression-gzip", "trace"] } @@ -338,10 +341,13 @@ greptimedb-ingester = { git = "https://github.com/GreptimeTeam/greptimedb-ingest # External libs arc-swap = { version = "1.7", default-features = false, optional = true } -async-compression = { version = "0.4.27", default-features = false, features = ["tokio", "gzip", "zstd"], optional = true } +# Pinned to 0.4.19 to use xz2 instead of liblzma (avoids conflict with datafusion's xz2) +# See: https://github.com/apache/datafusion/issues/15342 +async-compression = { version = "=0.4.19", default-features = false, features = ["tokio", "gzip", "zstd"], optional = true } apache-avro = { version = "0.16.0", default-features = false, optional = true } -arrow = { version = "56.2.0", default-features = false, features = ["ipc"], optional = true } -arrow-schema = { version = "56.2.0", default-features = false, optional = true } +# Updated to 57.1.0 for compatibility with deltalake 0.30.0 +arrow = { version = "57.1.0", default-features = false, features = ["ipc"], optional = true } +arrow-schema = { version = "57.1.0", default-features = false, optional = true } axum = { version = "0.6.20", default-features = false } base64 = { workspace = true, optional = true } bloomy = { version = "1.2.0", default-features = false, optional = true } @@ -493,7 +499,6 @@ nix = { git = "https://github.com/vectordotdev/nix.git", branch = "memfd/gnu/mus # The `heim` crates depend on `ntapi` 0.3.7 on Windows, but that version has an # unaligned access bug fixed in the following revision. ntapi = { git = "https://github.com/MSxDOS/ntapi.git", rev = "24fc1e47677fc9f6e38e5f154e6011dc9b270da6" } - [features] # Default features for *-unknown-linux-gnu and *-apple-darwin default = ["api", "api-client", "enrichment-tables", "sinks", "sources", "sources-dnstap", "transforms", "unix", "rdkafka?/gssapi-vendored", "secrets"] @@ -601,6 +606,7 @@ sources-logs = [ "sources-aws_s3", "sources-aws_sqs", "sources-datadog_agent", + "sources-delta_lake_cdf", "sources-demo_logs", "sources-docker_logs", "sources-exec", @@ -653,6 +659,7 @@ sources-aws_kinesis_firehose = ["dep:base64"] sources-aws_s3 = ["aws-core", "dep:aws-sdk-sqs", "dep:aws-sdk-s3", "dep:async-compression", "sources-aws_sqs", "tokio-util/io"] sources-aws_sqs = ["aws-core", "dep:aws-sdk-sqs"] sources-datadog_agent = ["sources-utils-http-encoding", "protobuf-build", "dep:prost"] +sources-delta_lake_cdf = ["dep:deltalake", "codecs-arrow"] sources-demo_logs = ["dep:fakedata"] sources-dnstap = ["sources-utils-net-tcp", "dep:base64", "dep:hickory-proto", "dep:dnsmsg-parser", "dep:dnstap-parser", "protobuf-build", "dep:prost"] sources-docker_logs = ["docker"] @@ -789,6 +796,7 @@ sinks-logs = [ "sinks-clickhouse", "sinks-console", "sinks-databend", + "sinks-delta_lake", "sinks-datadog_events", "sinks-datadog_logs", "sinks-datadog_traces", @@ -855,6 +863,7 @@ sinks-chronicle = [] sinks-clickhouse = ["dep:rust_decimal", "codecs-arrow"] sinks-console = [] sinks-databend = ["dep:databend-client"] +sinks-delta_lake = ["dep:deltalake", "codecs-arrow", "dep:opendal", "dep:arc-swap"] sinks-datadog_events = [] sinks-datadog_logs = [] sinks-datadog_metrics = ["protobuf-build", "dep:prost", "dep:prost-reflect"] @@ -906,6 +915,7 @@ all-integration-tests = [ "chronicle-integration-tests", "clickhouse-integration-tests", "databend-integration-tests", + "delta-lake-integration-tests", "datadog-agent-integration-tests", "datadog-logs-integration-tests", "datadog-metrics-integration-tests", @@ -972,6 +982,8 @@ azure-blob-integration-tests = ["sinks-azure_blob"] chronicle-integration-tests = ["sinks-gcp"] clickhouse-integration-tests = ["sinks-clickhouse"] databend-integration-tests = ["sinks-databend"] +delta-lake-integration-tests = ["sinks-delta_lake", "dep:deltalake"] +delta-lake-cdf-integration-tests = ["sources-delta_lake_cdf", "dep:deltalake"] datadog-agent-integration-tests = ["sources-datadog_agent"] datadog-logs-integration-tests = ["sinks-datadog_logs"] datadog-metrics-integration-tests = ["sinks-datadog_metrics", "dep:prost"] diff --git a/lib/codecs/Cargo.toml b/lib/codecs/Cargo.toml index 7a622d52edc2b..ccbb2bb75d00c 100644 --- a/lib/codecs/Cargo.toml +++ b/lib/codecs/Cargo.toml @@ -14,7 +14,7 @@ path = "tests/bin/generate-avro-fixtures.rs" [dependencies] apache-avro = { version = "0.20.0", default-features = false } -arrow = { version = "56.2.0", default-features = false, features = ["ipc"] } +arrow = { version = "57.1.0", default-features = false, features = ["ipc"] } async-trait.workspace = true bytes.workspace = true chrono.workspace = true diff --git a/lib/codecs/src/encoding/format/arrow/builder.rs b/lib/codecs/src/encoding/format/arrow/builder.rs index 240edb9ec7a1c..e7a4d8be83988 100644 --- a/lib/codecs/src/encoding/format/arrow/builder.rs +++ b/lib/codecs/src/encoding/format/arrow/builder.rs @@ -391,7 +391,7 @@ fn build_array_for_field(events: &[Event], field: &Field) -> Result Result { diff --git a/lib/codecs/src/encoding/format/arrow/mod.rs b/lib/codecs/src/encoding/format/arrow/mod.rs index c21c65a68bdb5..67b79869734e1 100644 --- a/lib/codecs/src/encoding/format/arrow/mod.rs +++ b/lib/codecs/src/encoding/format/arrow/mod.rs @@ -20,7 +20,7 @@ use snafu::Snafu; use std::sync::Arc; use vector_config::configurable_component; -use builder::build_record_batch; +pub use builder::build_record_batch; /// Provides Arrow schema for encoding. /// @@ -123,6 +123,11 @@ impl ArrowStreamSerializer { schema: SchemaRef::new(schema), }) } + + /// Get a reference to the Arrow schema used by this serializer + pub fn schema(&self) -> &SchemaRef { + &self.schema + } } impl tokio_util::codec::Encoder> for ArrowStreamSerializer { @@ -234,8 +239,11 @@ pub fn encode_events_to_arrow_ipc_stream( Ok(buffer.into_inner().freeze()) } -/// Recursively makes a Field and all its nested fields nullable -fn make_field_nullable(field: &arrow::datatypes::Field) -> arrow::datatypes::Field { +/// Recursively makes a Field and all its nested fields nullable. +/// +/// This is useful for schema evolution scenarios where new fields need to be nullable, +/// or when events may have missing fields that should be represented as null. +pub fn make_field_nullable(field: &arrow::datatypes::Field) -> arrow::datatypes::Field { let new_data_type = match field.data_type() { DataType::List(inner_field) => DataType::List(make_field_nullable(inner_field).into()), DataType::Struct(fields) => { diff --git a/lib/codecs/src/encoding/format/mod.rs b/lib/codecs/src/encoding/format/mod.rs index ccafb2b969cd7..1a22d04f0ff5f 100644 --- a/lib/codecs/src/encoding/format/mod.rs +++ b/lib/codecs/src/encoding/format/mod.rs @@ -25,6 +25,7 @@ use std::fmt::Debug; #[cfg(feature = "arrow")] pub use arrow::{ ArrowEncodingError, ArrowStreamSerializer, ArrowStreamSerializerConfig, SchemaProvider, + build_record_batch, make_field_nullable, }; pub use avro::{AvroSerializer, AvroSerializerConfig, AvroSerializerOptions}; pub use cef::{CefSerializer, CefSerializerConfig}; diff --git a/lib/file-source-common/Cargo.toml b/lib/file-source-common/Cargo.toml index 3b912196c5d72..5e8305ff2fa27 100644 --- a/lib/file-source-common/Cargo.toml +++ b/lib/file-source-common/Cargo.toml @@ -20,7 +20,7 @@ serde_json = { version = "1.0.143", default-features = false } bstr = { version = "1.12", default-features = false } bytes = { version = "1.10.1", default-features = false, features = ["serde"] } dashmap = { version = "6.1", default-features = false } -async-compression = { version = "0.4.27", features = ["tokio", "gzip"] } +async-compression = { version = "=0.4.19", features = ["tokio", "gzip"] } vector-common = { path = "../vector-common", default-features = false } vector-config = { path = "../vector-config", default-features = false } tokio = { workspace = true, features = ["full"] } diff --git a/lib/file-source/Cargo.toml b/lib/file-source/Cargo.toml index 684cbad18ef3a..87b6babe04823 100644 --- a/lib/file-source/Cargo.toml +++ b/lib/file-source/Cargo.toml @@ -21,7 +21,7 @@ futures = { version = "0.3.31", default-features = false, features = ["executor" futures-util.workspace = true vector-common = { path = "../vector-common", default-features = false } file-source-common = { path = "../file-source-common" } -async-compression = { version = "0.4.27", features = ["tokio", "gzip"] } +async-compression = { version = "=0.4.19", features = ["tokio", "gzip"] } [dev-dependencies] tokio = { workspace = true, features = ["full"] } diff --git a/src/sinks/delta_lake/config.rs b/src/sinks/delta_lake/config.rs new file mode 100644 index 0000000000000..24f43450f8c91 --- /dev/null +++ b/src/sinks/delta_lake/config.rs @@ -0,0 +1,285 @@ +//! Configuration for the Delta Lake sink. + +use std::collections::HashMap; +use std::sync::Arc; + +use arc_swap::ArcSwap; +use tower::ServiceBuilder; +use url::Url; +use vector_lib::codecs::encoding::SchemaProvider; +use vector_lib::configurable::configurable_component; +use vector_lib::sink::VectorSink; + +use crate::config::{AcknowledgementsConfig, GenerateConfig, Input, SinkConfig, SinkContext}; +use crate::sinks::util::{ + BatchConfig, RealtimeSizeBasedDefaultBatchSettings, ServiceBuilderExt, TowerRequestConfig, +}; +use crate::sinks::{Healthcheck, prelude::*}; + +use super::request_builder::{DeltaLakeRequestBuilder, SharedSchema}; +use super::schema::DeltaLakeSchemaProvider; +use super::service::{DeltaLakeRetryLogic, DeltaLakeService}; +use super::sink::DeltaLakeSink; + +/// Default value for allow_nullable_fields - enabled by default for better compatibility +const fn default_allow_nullable_fields() -> bool { + true +} + +/// Configuration for the `delta_lake` sink. +#[configurable_component(sink( + "delta_lake", + "Write log events to Delta Lake tables on cloud object storage." +))] +#[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] +pub struct DeltaLakeConfig { + /// Full URI to the Delta Lake table. + /// + /// Supports multiple storage backends: + /// - Google Cloud Storage: `gs://bucket/path/to/table` + /// - Amazon S3: `s3://bucket/path/to/table` + /// - S3-compatible (MinIO, etc.): `s3://bucket/path/to/table` with custom endpoint + /// + /// The table must already exist - automatic table creation is not yet supported. + #[configurable(metadata(docs::examples = "gs://my-bucket/analytics/events"))] + #[configurable(metadata(docs::examples = "s3://my-bucket/logs/application"))] + pub table_uri: String, + + /// Storage-specific options. + /// + /// Configuration options specific to the storage backend: + /// + /// **For GCS:** + /// - `google_service_account`: Path to service account JSON file + /// + /// **For S3:** + /// - `aws_access_key_id`: AWS access key + /// - `aws_secret_access_key`: AWS secret key + /// - `aws_region`: AWS region (e.g., "us-east-1") + /// - `aws_endpoint`: Custom S3 endpoint (for MinIO, LocalStack, etc.) + /// - `aws_allow_http`: Allow HTTP connections (for local testing) + /// - `aws_s3_path_style`: Use path-style addressing (for MinIO) + /// + /// **Client options (all backends):** + /// - `timeout`: Request timeout (e.g., "30s") + /// - `connect_timeout`: Connection establishment timeout (e.g., "5s") + /// - `pool_idle_timeout`: Keep-alive timeout for idle connections (e.g., "90s") + /// + /// If not provided, the sink will use default credentials from the environment. + #[serde(default)] + pub storage_options: HashMap, + + /// Enable automatic schema evolution. + /// + /// When enabled, the sink will: + /// - Discover new fields from incoming events and include them in writes + /// - Allow Delta Lake to merge new columns into the table schema + /// - Handle external schema changes by reloading and retrying + /// + /// Discovered fields are always nullable since existing table rows won't have them. + /// + /// Disable this for strict schema enforcement where events must match the table schema exactly. + #[configurable(metadata(docs::examples = true))] + #[serde(default)] + pub schema_evolution: bool, + + /// Allow nullable fields in the schema. + /// + /// When enabled, all fields in the schema will be treated as nullable, + /// allowing events with missing fields to be written without errors. + /// This is useful when incoming events may not contain all fields defined in the table schema. + #[configurable(metadata(docs::examples = true))] + #[serde(default = "default_allow_nullable_fields")] + pub allow_nullable_fields: bool, + + /// Batching behavior configuration. + /// + /// For optimal Delta Lake performance, larger batch sizes (50-100MB) are recommended. + #[configurable(derived)] + #[serde(default)] + pub batch: BatchConfig, + + /// Request handling configuration. + #[configurable(derived)] + #[serde(default)] + pub request: TowerRequestConfig, + + /// Acknowledgements configuration. + #[configurable(derived)] + #[serde( + default, + deserialize_with = "crate::serde::bool_or_struct", + skip_serializing_if = "crate::serde::is_default" + )] + pub acknowledgements: AcknowledgementsConfig, +} + +impl GenerateConfig for DeltaLakeConfig { + fn generate_config() -> toml::Value { + toml::Value::try_from(Self { + table_uri: "gs://my-bucket/analytics/events".to_string(), + storage_options: HashMap::new(), + schema_evolution: false, + allow_nullable_fields: default_allow_nullable_fields(), + batch: BatchConfig::default(), + request: TowerRequestConfig::default(), + acknowledgements: AcknowledgementsConfig::default(), + }) + .unwrap() + } +} + +#[async_trait::async_trait] +#[typetag::serde(name = "delta_lake")] +impl SinkConfig for DeltaLakeConfig { + async fn build(&self, _cx: SinkContext) -> crate::Result<(VectorSink, Healthcheck)> { + let table_uri = Url::parse(&self.table_uri) + .map_err(|e| format!("Invalid table URI {}: {}", self.table_uri, e))?; + + match table_uri.scheme() { + "gs" | "s3" | "s3a" | "file" | "abfs" | "abfss" | "az" => {} + scheme => { + return Err(format!( + "Unsupported URI scheme '{}'. Supported: gs, s3, s3a, file, abfs, abfss, az", + scheme + ) + .into()); + } + } + + let mut table = deltalake::open_table_with_storage_options( + table_uri.clone(), + self.storage_options.clone(), + ) + .await + .map_err(|e| format!("Failed to open Delta table at {}: {}", self.table_uri, e))?; + + // Get schema from the Delta table + let schema_provider = DeltaLakeSchemaProvider::new(&table); + let schema = schema_provider + .get_schema() + .await + .map_err(|e| format!("Failed to fetch schema from Delta table: {}", e))?; + + let shared_schema: SharedSchema = Arc::new(ArcSwap::from_pointee(schema)); + + let request_builder = DeltaLakeRequestBuilder { + transformer: Transformer::default(), + schema_evolution: self.schema_evolution, + allow_nullable_fields: self.allow_nullable_fields, + shared_schema: Arc::clone(&shared_schema), + }; + + let service = DeltaLakeService::new( + self.table_uri.clone(), + self.storage_options.clone(), + self.schema_evolution, + shared_schema, + ); + let service = ServiceBuilder::new() + .settings(self.request.into_settings(), DeltaLakeRetryLogic) + .service(service); + + let batch_settings = self + .batch + .into_batcher_settings() + .map_err(|e| format!("Failed to configure batching: {}", e))?; + let sink = DeltaLakeSink::new(service, request_builder, batch_settings); + + // 9. Healthcheck - verify table is accessible + let healthcheck = Box::pin(async move { + table + .load() + .await + .map_err(|e| format!("Health check failed: unable to load Delta table: {}", e))?; + Ok(()) + }); + + Ok((VectorSink::from_event_streamsink(sink), healthcheck)) + } + + fn input(&self) -> Input { + Input::log() + } + + fn acknowledgements(&self) -> &AcknowledgementsConfig { + &self.acknowledgements + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn generate_config() { + crate::test_util::test_generate_config::(); + } + + #[test] + fn test_config_gcs() { + let config_str = r#" + table_uri = "gs://test-bucket/test/table" + + [storage_options] + google_service_account = "/path/to/creds.json" + + [batch] + max_bytes = 104857600 + timeout_secs = 300 + "#; + + let config: DeltaLakeConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!(config.table_uri, "gs://test-bucket/test/table"); + assert_eq!( + config.storage_options.get("google_service_account"), + Some(&"/path/to/creds.json".to_string()) + ); + } + + #[test] + fn test_config_s3() { + let config_str = r#" + table_uri = "s3://test-bucket/test/table" + + [storage_options] + aws_access_key_id = "AKIAIOSFODNN7EXAMPLE" + aws_secret_access_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + aws_region = "us-east-1" + "#; + + let config: DeltaLakeConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!(config.table_uri, "s3://test-bucket/test/table"); + assert_eq!( + config.storage_options.get("aws_region"), + Some(&"us-east-1".to_string()) + ); + } + + #[test] + fn test_config_s3_minio() { + let config_str = r#" + table_uri = "s3://test-bucket/test/table" + + [storage_options] + aws_access_key_id = "minioadmin" + aws_secret_access_key = "minioadmin" + aws_region = "us-east-1" + aws_endpoint = "http://localhost:9000" + aws_allow_http = "true" + aws_s3_path_style = "true" + "#; + + let config: DeltaLakeConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!(config.table_uri, "s3://test-bucket/test/table"); + assert_eq!( + config.storage_options.get("aws_endpoint"), + Some(&"http://localhost:9000".to_string()) + ); + assert_eq!( + config.storage_options.get("aws_allow_http"), + Some(&"true".to_string()) + ); + } +} diff --git a/src/sinks/delta_lake/integration_tests.rs b/src/sinks/delta_lake/integration_tests.rs new file mode 100644 index 0000000000000..82b0c167865a7 --- /dev/null +++ b/src/sinks/delta_lake/integration_tests.rs @@ -0,0 +1,1021 @@ +#![cfg(all(test, feature = "delta-lake-integration-tests"))] + +use std::collections::HashMap; +use std::sync::Arc; + +use deltalake::arrow::array::Array; +use deltalake::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; +use deltalake::datafusion::prelude::SessionContext; +use deltalake::kernel::StructType; +use deltalake::kernel::engine::arrow_conversion::TryFromArrow; +use deltalake::operations::create::CreateBuilder; +use deltalake::protocol::SaveMode; +use deltalake::{DeltaTable, open_table_with_storage_options}; +use url::Url; + +use crate::config::SinkConfig; + +/// Get MinIO endpoint from environment or use default +fn minio_endpoint() -> String { + std::env::var("MINIO_ENDPOINT").unwrap_or_else(|_| "http://localhost:9000".into()) +} + +/// Create storage options for S3-compatible MinIO +fn minio_storage_options() -> HashMap { + let mut options = HashMap::new(); + + // MinIO credentials + options.insert("aws_access_key_id".to_string(), "minioadmin".to_string()); + options.insert( + "aws_secret_access_key".to_string(), + "minioadmin".to_string(), + ); + + // MinIO endpoint + let endpoint = minio_endpoint(); + options.insert("aws_endpoint".to_string(), endpoint); + options.insert("aws_region".to_string(), "us-east-1".to_string()); + + // S3-compatible settings + options.insert("aws_allow_http".to_string(), "true".to_string()); + options.insert("aws_s3_path_style".to_string(), "true".to_string()); + + options +} + +/// Create a new Delta Lake table in MinIO +async fn create_delta_table(bucket: &str, table_path: &str, schema: Arc) -> DeltaTable { + let table_uri = format!("s3://{}/{}", bucket, table_path); + let storage_options = minio_storage_options(); + + // Convert Arrow schema to Delta schema using built-in conversion + let delta_schema = StructType::try_from_arrow(schema.as_ref()) + .expect("Failed to convert Arrow schema to Delta schema"); + let delta_fields: Vec<_> = delta_schema.fields().cloned().collect(); + + // Create table using Delta operations + CreateBuilder::new() + .with_location(&table_uri) + .with_columns(delta_fields) + .with_save_mode(SaveMode::Ignore) + .with_storage_options(storage_options.clone()) + .await + .expect("Failed to create Delta table") +} + +/// Open an existing Delta table from MinIO +async fn open_delta_table(bucket: &str, table_path: &str) -> DeltaTable { + let table_uri = format!("s3://{}/{}", bucket, table_path); + let table_url = Url::parse(&table_uri).expect("Failed to parse table URI"); + let storage_options = minio_storage_options(); + + open_table_with_storage_options(table_url, storage_options) + .await + .expect("Failed to open Delta table") +} + +/// Read-path validation helper: verifies data can be read back correctly from a Delta table. +/// +/// This function uses DataFusion to query the Delta table and verify: +/// - Data is readable (not corrupted) +/// - Row counts match expectations +/// - Column values can be accessed +/// +/// # Arguments +/// * `table` - The Delta table to read from (should be loaded with latest snapshot) +/// * `expected_min_rows` - Minimum number of rows expected in the table +/// +/// # Returns +/// The total number of rows read from the table +async fn assert_data_readable(table: &DeltaTable, expected_min_rows: usize) -> usize { + let ctx = SessionContext::new(); + + // Register the Delta table with DataFusion + ctx.register_table("delta_table", Arc::new(table.clone())) + .expect("Failed to register Delta table with DataFusion"); + + // Query all data from the table + let df = ctx + .sql("SELECT * FROM delta_table") + .await + .expect("Failed to execute SQL query"); + + // Collect results to verify data is readable + let batches = df.collect().await.expect("Failed to collect query results"); + + // Count total rows + let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + + assert!( + total_rows >= expected_min_rows, + "Expected at least {} rows, but found {}", + expected_min_rows, + total_rows + ); + + total_rows +} + +/// Read-path validation helper: verifies specific column values exist in the table. +/// +/// This function uses DataFusion to query the Delta table and verify that +/// a specific value exists in a column. +/// +/// # Arguments +/// * `table` - The Delta table to read from +/// * `column` - The column name to check +/// * `expected_value` - The string value expected to exist in the column +/// +/// # Returns +/// The number of rows matching the filter +async fn assert_column_value_exists( + table: &DeltaTable, + column: &str, + expected_value: &str, +) -> usize { + let ctx = SessionContext::new(); + + ctx.register_table("delta_table", Arc::new(table.clone())) + .expect("Failed to register Delta table with DataFusion"); + + // Query with filter for the specific value + let query = format!( + "SELECT * FROM delta_table WHERE {} = '{}'", + column, expected_value + ); + let df = ctx + .sql(&query) + .await + .expect("Failed to execute filtered SQL query"); + + let batches = df + .collect() + .await + .expect("Failed to collect filtered results"); + + let matching_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + + assert!( + matching_rows > 0, + "Expected to find rows where {} = '{}', but found none", + column, + expected_value + ); + + matching_rows +} + +/// Polls a Delta table until a condition is met or timeout occurs. +/// +/// # Arguments +/// * `bucket` - The S3 bucket name +/// * `table_path` - The path to the table within the bucket +/// * `condition` - A closure that takes a &DeltaTable and returns true when the condition is met +/// * `timeout_msg` - Message to display if timeout occurs +/// +/// # Returns +/// The loaded DeltaTable once the condition is satisfied +async fn poll_table_until( + bucket: &str, + table_path: &str, + condition: F, + timeout_msg: &str, +) -> DeltaTable +where + F: Fn(&DeltaTable) -> bool, +{ + let mut table = open_delta_table(bucket, table_path).await; + for attempt in 0..20 { + table.load().await.expect("Failed to load table"); + if condition(&table) { + return table; + } + if attempt == 19 { + panic!("{}", timeout_msg); + } + tokio::time::sleep(tokio::time::Duration::from_millis(50)).await; + } + table +} + +/// Read-path validation helper: reads all data and returns column values for verification. +/// +/// This is useful for more complex assertions about the data content. +/// +/// # Arguments +/// * `table` - The Delta table to read from +/// * `column` - The column name to extract values from +/// +/// # Returns +/// A vector of string representations of the column values +async fn read_column_values(table: &DeltaTable, column: &str) -> Vec { + let ctx = SessionContext::new(); + + ctx.register_table("delta_table", Arc::new(table.clone())) + .expect("Failed to register Delta table with DataFusion"); + + let query = format!("SELECT {} FROM delta_table", column); + let df = ctx + .sql(&query) + .await + .expect("Failed to execute column query"); + + let batches = df + .collect() + .await + .expect("Failed to collect column results"); + + let mut values = Vec::new(); + for batch in batches { + if batch.num_columns() > 0 { + let col = batch.column(0); + for i in 0..col.len() { + // Convert array value to string representation + let value = if col.is_null(i) { + "NULL".to_string() + } else { + // Use Arrow's display formatting + deltalake::arrow::util::display::array_value_to_string(col, i) + .unwrap_or_else(|_| "ERROR".to_string()) + }; + values.push(value); + } + } + } + + values +} + +#[tokio::test] +async fn test_delta_table_creation() { + // Test that we can create a Delta Lake table in MinIO + let bucket = "test-bucket"; + let table_path = format!("test-creation-{}", uuid::Uuid::new_v4()); + + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("message", DataType::Utf8, true), + ])); + + let _table = create_delta_table(bucket, &table_path, schema.clone()).await; + + // Verify table was created and can be opened + let mut table = open_delta_table(bucket, &table_path).await; + table.load().await.expect("Failed to load table"); + + // Verify schema + let table_schema = table.snapshot().unwrap().schema(); + let fields: Vec<_> = table_schema.fields().collect(); + assert_eq!(fields.len(), 2); + assert_eq!(fields[0].name(), "id"); + assert_eq!(fields[1].name(), "message"); +} + +#[tokio::test] +async fn test_delta_table_schema_fields() { + // Test schema field details + let bucket = "test-bucket"; + let table_path = format!("test-schema-{}", uuid::Uuid::new_v4()); + + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + Field::new("count", DataType::Int32, true), + Field::new( + "created_at", + // Delta Lake requires microsecond timestamps with timezone + DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())), + true, + ), + ])); + + create_delta_table(bucket, &table_path, schema.clone()).await; + + let mut table = open_delta_table(bucket, &table_path).await; + table.load().await.unwrap(); + + let table_schema = table.snapshot().unwrap().schema(); + let fields: Vec<_> = table_schema.fields().collect(); + assert_eq!(fields.len(), 4); + + // Verify field names + let field_names: Vec<&str> = fields.iter().map(|f| f.name().as_str()).collect(); + assert_eq!(field_names, vec!["id", "name", "count", "created_at"]); + + // Verify nullability + assert!(!fields[0].is_nullable()); // id is not nullable + assert!(fields[1].is_nullable()); // name is nullable +} + +#[tokio::test] +async fn test_delta_table_multiple_tables() { + // Test creating multiple independent tables + let bucket = "test-bucket"; + + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("data", DataType::Utf8, true), + ])); + + // Create multiple tables + let table1_path = format!("test-multi-1-{}", uuid::Uuid::new_v4()); + let table2_path = format!("test-multi-2-{}", uuid::Uuid::new_v4()); + let table3_path = format!("test-multi-3-{}", uuid::Uuid::new_v4()); + + create_delta_table(bucket, &table1_path, schema.clone()).await; + create_delta_table(bucket, &table2_path, schema.clone()).await; + create_delta_table(bucket, &table3_path, schema.clone()).await; + + // Verify all can be opened independently + let mut t1 = open_delta_table(bucket, &table1_path).await; + let mut t2 = open_delta_table(bucket, &table2_path).await; + let mut t3 = open_delta_table(bucket, &table3_path).await; + + t1.load().await.unwrap(); + t2.load().await.unwrap(); + t3.load().await.unwrap(); + + // All should have version 0 (just created) + assert_eq!(t1.version(), Some(0)); + assert_eq!(t2.version(), Some(0)); + assert_eq!(t3.version(), Some(0)); +} + +#[tokio::test] +async fn test_delta_table_with_complex_types() { + // Test table with various complex Arrow types + let bucket = "test-bucket"; + let table_path = format!("test-complex-{}", uuid::Uuid::new_v4()); + + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("int8_field", DataType::Int8, true), + Field::new("int16_field", DataType::Int16, true), + Field::new("int32_field", DataType::Int32, true), + Field::new("float32_field", DataType::Float32, true), + Field::new("float64_field", DataType::Float64, true), + Field::new("bool_field", DataType::Boolean, true), + Field::new("binary_field", DataType::Binary, true), + Field::new("string_field", DataType::Utf8, true), + ])); + + create_delta_table(bucket, &table_path, schema.clone()).await; + + let mut table = open_delta_table(bucket, &table_path).await; + table.load().await.unwrap(); + + let table_schema = table.snapshot().unwrap().schema(); + let fields: Vec<_> = table_schema.fields().collect(); + assert_eq!(fields.len(), 9); +} + +#[tokio::test] +async fn test_delta_lake_basic_write() { + use crate::config::SinkContext; + use crate::sinks::delta_lake::DeltaLakeConfig; + use crate::test_util::components::run_and_assert_sink_compliance; + use futures::stream; + use vector_lib::event::{BatchNotifier, BatchStatus, Event, LogEvent}; + + // Setup - create unique table for this test + let bucket = "test-bucket"; + let table_path = format!("test-basic-write-{}", uuid::Uuid::new_v4()); + + // Create Delta table with simple schema + let schema = Arc::new(Schema::new(vec![ + Field::new("message", DataType::Utf8, false), + Field::new("timestamp", DataType::Int64, false), + ])); + + create_delta_table(bucket, &table_path, schema.clone()).await; + + // Build sink configuration + // Using allow_nullable_fields: false to test strict schema enforcement + let table_uri = format!("s3://{}/{}", bucket, table_path); + let config = DeltaLakeConfig { + table_uri, + storage_options: minio_storage_options(), + allow_nullable_fields: false, + schema_evolution: false, + batch: Default::default(), + request: Default::default(), + acknowledgements: Default::default(), + }; + + // Build the sink + let cx = SinkContext::default(); + let (sink, _healthcheck) = config.build(cx).await.expect("Failed to build sink"); + + // Create test events with all required fields + let (batch, receiver) = BatchNotifier::new_with_receiver(); + let events: Vec = (0..10) + .map(|i| { + let mut log = LogEvent::default(); + log.insert("message", format!("test message {}", i)); + log.insert("timestamp", i as i64); + Event::Log(log) + }) + .collect(); + + let events_with_batch = events + .into_iter() + .map(|e| e.with_batch_notifier(&batch)) + .collect::>(); + + drop(batch); + + // Write events through the sink + run_and_assert_sink_compliance(sink, stream::iter(events_with_batch), &[]).await; + + // Verify delivery + assert_eq!(receiver.await, BatchStatus::Delivered); + + // Verify data was written to Delta Lake + let mut table = open_delta_table(bucket, &table_path).await; + table.load().await.expect("Failed to load table"); + + // Verify table version increased (data was committed) + assert!( + table.version().unwrap() > 0, + "Table version should be > 0 after write" + ); + + // Verify data was written by checking file URIs + let files: Vec<_> = table + .get_file_uris() + .expect("Failed to get file URIs") + .collect(); + assert!(!files.is_empty(), "No files written to Delta table"); + + // Read-path validation: verify data can be read back correctly + let total_rows = assert_data_readable(&table, 10).await; + println!("Read back {} rows from Delta table", total_rows); + + // Verify specific message content exists + assert_column_value_exists(&table, "message", "test message 5").await; + + // Read all message values and verify they contain expected patterns + let messages = read_column_values(&table, "message").await; + assert_eq!(messages.len(), 10, "Expected 10 messages"); + for i in 0..10 { + let expected = format!("test message {}", i); + assert!( + messages.contains(&expected), + "Missing message: {}", + expected + ); + } + + println!( + "Successfully wrote and verified {} files to Delta table", + files.len() + ); +} + +#[tokio::test] +async fn test_delta_lake_schema_evolution() { + use crate::config::SinkContext; + use crate::sinks::delta_lake::DeltaLakeConfig; + use crate::test_util::components::run_and_assert_sink_compliance; + use futures::stream; + use vector_lib::event::{BatchNotifier, BatchStatus, Event, LogEvent}; + + // Setup - create unique table for this test + let bucket = "test-bucket"; + let table_path = format!("test-schema-evolution-{}", uuid::Uuid::new_v4()); + + // Create Delta table with initial schema (only 'id' and 'name') + let initial_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + ])); + + create_delta_table(bucket, &table_path, initial_schema.clone()).await; + + // Build first sink to write initial data + let table_uri = format!("s3://{}/{}", bucket, table_path); + let config1 = DeltaLakeConfig { + table_uri: table_uri.clone(), + storage_options: minio_storage_options(), + allow_nullable_fields: true, + schema_evolution: true, + batch: Default::default(), + request: Default::default(), + acknowledgements: Default::default(), + }; + + let cx = SinkContext::default(); + let (sink1, _) = config1 + .build(cx.clone()) + .await + .expect("Failed to build sink1"); + + // Write initial events (only id and name) + let (batch1, receiver1) = BatchNotifier::new_with_receiver(); + let events1: Vec = (0..5) + .map(|i| { + let mut log = LogEvent::default(); + log.insert("id", i as i64); + log.insert("name", format!("user_{}", i)); + Event::Log(log) + }) + .collect(); + + let events1_with_batch = events1 + .into_iter() + .map(|e| e.with_batch_notifier(&batch1)) + .collect::>(); + drop(batch1); + + run_and_assert_sink_compliance(sink1, stream::iter(events1_with_batch), &[]).await; + assert_eq!(receiver1.await, BatchStatus::Delivered); + + // Poll until table commit is visible (version > 0) + let table = poll_table_until( + bucket, + &table_path, + |t| t.version().unwrap_or(0) > 0, + "Timeout waiting for initial write to commit", + ) + .await; + let version_after_first_write = table.version().unwrap(); + + // Now manually add a new column to the Delta table schema + // This simulates an external process (or another writer) evolving the schema + let mut table = open_delta_table(bucket, &table_path).await; + table.load().await.expect("Failed to load table"); + + // Use Delta Lake's merge operation to add a new column + // We'll do this by creating a record batch with the new schema and using DeltaTable methods + use deltalake::arrow::array::{Int64Array, RecordBatch, StringArray}; + + // Create a record batch with the new schema including 'email' + let evolved_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + Field::new("email", DataType::Utf8, true), + ])); + + let id_array = Int64Array::from(vec![100]); // Use a different ID to avoid conflicts + let name_array = StringArray::from(vec!["schema_evolution_test"]); + let email_array = StringArray::from(vec!["test@example.com"]); + + let record_batch = RecordBatch::try_new( + Arc::clone(&evolved_schema), + vec![ + Arc::new(id_array), + Arc::new(name_array), + Arc::new(email_array), + ], + ) + .expect("Failed to create record batch"); + + // Write with schema merge enabled using DeltaTable methods directly + let _result = table + .clone() + .write(vec![record_batch]) + .with_save_mode(deltalake::protocol::SaveMode::Append) + .with_schema_mode(deltalake::operations::write::SchemaMode::Merge) + .await + .expect("Failed to write with schema evolution"); + + // Poll until schema evolution commit is visible (version increased and schema has 'email' field) + let _table = poll_table_until( + bucket, + &table_path, + |t| { + if t.version().unwrap_or(0) <= version_after_first_write { + return false; + } + let schema = t.snapshot().unwrap().schema(); + schema.fields().any(|f| f.name() == "email") + }, + "Timeout waiting for schema evolution to commit", + ) + .await; + + // Build second sink - it should automatically detect and handle the evolved schema + let config2 = DeltaLakeConfig { + table_uri: table_uri.clone(), + storage_options: minio_storage_options(), + allow_nullable_fields: true, + schema_evolution: true, + batch: Default::default(), + request: Default::default(), + acknowledgements: Default::default(), + }; + + let cx2 = SinkContext::default(); + let (sink2, _) = config2.build(cx2).await.expect("Failed to build sink2"); + + // Write events with the original schema (without email) + // The sink should handle the schema mismatch by reloading + let (batch2, receiver2) = BatchNotifier::new_with_receiver(); + let events2: Vec = (5..10) + .map(|i| { + let mut log = LogEvent::default(); + log.insert("id", i as i64); + log.insert("name", format!("user_{}", i)); + // Intentionally NOT including 'email' to test schema evolution handling + Event::Log(log) + }) + .collect(); + + let events2_with_batch = events2 + .into_iter() + .map(|e| e.with_batch_notifier(&batch2)) + .collect::>(); + drop(batch2); + + run_and_assert_sink_compliance(sink2, stream::iter(events2_with_batch), &[]).await; + assert_eq!(receiver2.await, BatchStatus::Delivered); + + // Verify schema evolved and data was written + let mut table_final = open_delta_table(bucket, &table_path).await; + table_final + .load() + .await + .expect("Failed to load final table"); + + let final_schema = table_final.snapshot().unwrap().schema(); + let field_names: Vec<&str> = final_schema.fields().map(|f| f.name().as_str()).collect(); + + // The schema should include the email field from the manual evolution + println!("Final schema fields: {:?}", field_names); + assert!( + field_names.contains(&"email"), + "Schema should include 'email' field" + ); + assert!( + table_final.version().unwrap() >= 2, + "Table should have at least 2 versions" + ); + + // Read-path validation: verify all data including schema evolution + // Initial write: 5 rows (id 0-4), schema evolution write: 1 row (id 100), second write: 5 rows (id 5-9) + let total_rows = assert_data_readable(&table_final, 11).await; + println!("Read back {} rows after schema evolution", total_rows); + + // Verify data from first write exists + assert_column_value_exists(&table_final, "name", "user_0").await; + assert_column_value_exists(&table_final, "name", "user_4").await; + + // Verify data from schema evolution write exists (with email) + assert_column_value_exists(&table_final, "email", "test@example.com").await; + + // Verify data from second write exists + assert_column_value_exists(&table_final, "name", "user_9").await; + + // Read all IDs and verify expected range + let ids = read_column_values(&table_final, "id").await; + assert_eq!(ids.len(), 11, "Expected 11 total rows"); + println!("All IDs in table: {:?}", ids); +} + +#[tokio::test] +async fn test_delta_lake_concurrent_writes() { + use crate::config::SinkContext; + use crate::sinks::delta_lake::DeltaLakeConfig; + use crate::test_util::components::run_and_assert_sink_compliance; + use futures::stream; + use vector_lib::event::{BatchNotifier, BatchStatus, Event, LogEvent}; + + // Setup - create unique table for this test + let bucket = "test-bucket"; + let table_path = format!("test-concurrent-{}", uuid::Uuid::new_v4()); + + // Create Delta table + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("worker_id", DataType::Int32, false), + Field::new("data", DataType::Utf8, true), + ])); + + create_delta_table(bucket, &table_path, schema.clone()).await; + + // Spawn multiple concurrent writers + let num_workers = 3; + let events_per_worker = 10; + + let mut handles = vec![]; + + for worker_id in 0..num_workers { + let table_uri = format!("s3://{}/{}", bucket, table_path); + let storage_opts = minio_storage_options(); + + let handle = tokio::spawn(async move { + // Each worker builds its own sink + let config = DeltaLakeConfig { + table_uri, + storage_options: storage_opts, + allow_nullable_fields: true, + schema_evolution: true, + batch: Default::default(), + request: Default::default(), + acknowledgements: Default::default(), + }; + + let cx = SinkContext::default(); + let (sink, _) = config.build(cx).await.expect("Failed to build sink"); + + // Create events for this worker + let (batch, receiver) = BatchNotifier::new_with_receiver(); + let events: Vec = (0..events_per_worker) + .map(|i| { + let mut log = LogEvent::default(); + log.insert("id", (worker_id * events_per_worker + i) as i64); + log.insert("worker_id", worker_id as i32); + log.insert("data", format!("worker_{}_event_{}", worker_id, i)); + Event::Log(log) + }) + .collect(); + + let events_with_batch = events + .into_iter() + .map(|e| e.with_batch_notifier(&batch)) + .collect::>(); + drop(batch); + + // Write concurrently + run_and_assert_sink_compliance(sink, stream::iter(events_with_batch), &[]).await; + + receiver.await + }); + + handles.push(handle); + } + + // Wait for all workers to complete + for handle in handles { + let status = handle.await.expect("Worker task failed"); + assert_eq!(status, BatchStatus::Delivered); + } + + // Verify all data was written + let mut table = open_delta_table(bucket, &table_path).await; + table.load().await.expect("Failed to load table"); + + // Verify table has multiple versions (concurrent writes succeeded) + let version = table.version().unwrap(); + assert!( + version > 0, + "Table should have commits from concurrent writes" + ); + + // Read-path validation: verify all concurrent writes landed + let expected_total = num_workers * events_per_worker; // 3 workers * 10 events = 30 rows + let total_rows = assert_data_readable(&table, expected_total).await; + println!("Read back {} rows from concurrent writes", total_rows); + + // Verify data from each worker exists + for worker_id in 0..num_workers { + let expected_data = format!("worker_{}_event_0", worker_id); + assert_column_value_exists(&table, "data", &expected_data).await; + } + + // Read all worker_ids and verify distribution + let worker_ids = read_column_values(&table, "worker_id").await; + assert_eq!( + worker_ids.len(), + expected_total, + "Expected {} total rows", + expected_total + ); + + // Count events per worker + for worker_id in 0..num_workers { + let worker_count = worker_ids + .iter() + .filter(|&w| w == &worker_id.to_string()) + .count(); + assert_eq!( + worker_count, events_per_worker, + "Worker {} should have {} events, found {}", + worker_id, events_per_worker, worker_count + ); + } + + println!( + "Concurrent writes completed successfully. Table version: {}. Total rows: {}", + version, total_rows + ); +} + +#[tokio::test] +async fn test_delta_lake_large_batch() { + use crate::config::SinkContext; + use crate::sinks::delta_lake::DeltaLakeConfig; + use crate::sinks::util::BatchConfig; + use crate::test_util::components::run_and_assert_sink_compliance; + use futures::stream; + use vector_lib::event::{BatchNotifier, BatchStatus, Event, LogEvent}; + + // Setup - create unique table for this test + let bucket = "test-bucket"; + let table_path = format!("test-large-batch-{}", uuid::Uuid::new_v4()); + + // Create Delta table + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("message", DataType::Utf8, true), + Field::new("payload", DataType::Utf8, true), + ])); + + create_delta_table(bucket, &table_path, schema.clone()).await; + + // Build sink with larger batch size + let table_uri = format!("s3://{}/{}", bucket, table_path); + let mut batch_config = BatchConfig::default(); + batch_config.max_events = Some(500); // Batch every 500 events + + let config = DeltaLakeConfig { + table_uri, + storage_options: minio_storage_options(), + allow_nullable_fields: true, + schema_evolution: true, + batch: batch_config, + request: Default::default(), + acknowledgements: Default::default(), + }; + + let cx = SinkContext::default(); + let (sink, _) = config.build(cx).await.expect("Failed to build sink"); + + // Create 1000+ events with large payloads + let num_events = 1500; + let large_payload = "x".repeat(1024); // 1KB payload per event + + let (batch, receiver) = BatchNotifier::new_with_receiver(); + let events: Vec = (0..num_events) + .map(|i| { + let mut log = LogEvent::default(); + log.insert("id", i as i64); + log.insert("message", format!("Large event number {}", i)); + log.insert("payload", large_payload.clone()); + Event::Log(log) + }) + .collect(); + + let events_with_batch = events + .into_iter() + .map(|e| e.with_batch_notifier(&batch)) + .collect::>(); + drop(batch); + + // Write large batch + run_and_assert_sink_compliance(sink, stream::iter(events_with_batch), &[]).await; + + // Verify delivery + assert_eq!(receiver.await, BatchStatus::Delivered); + + // Verify data was written + let mut table = open_delta_table(bucket, &table_path).await; + table.load().await.expect("Failed to load table"); + + // Verify table has data + assert!( + table.version().unwrap() > 0, + "Table should have data written" + ); + + let files: Vec<_> = table + .get_file_uris() + .expect("Failed to get file URIs") + .collect(); + assert!(!files.is_empty(), "No files written to Delta table"); + + // Read-path validation: verify all events in large batch are readable + let total_rows = assert_data_readable(&table, num_events).await; + println!("Read back {} rows from large batch", total_rows); + + // Verify specific events exist (sample a few from different positions) + assert_column_value_exists(&table, "message", "Large event number 0").await; + assert_column_value_exists(&table, "message", "Large event number 500").await; + assert_column_value_exists(&table, "message", "Large event number 1499").await; + + // Verify IDs span the expected range + let ids = read_column_values(&table, "id").await; + assert_eq!(ids.len(), num_events, "Expected {} events", num_events); + + // Verify first and last IDs + assert!(ids.contains(&"0".to_string()), "Missing ID 0"); + assert!(ids.contains(&"1499".to_string()), "Missing ID 1499"); + + println!( + "Successfully wrote and verified {} events in large batch. Files: {}", + num_events, + files.len() + ); +} + +#[tokio::test] +async fn test_delta_lake_schema_inference() { + use crate::config::SinkContext; + use crate::sinks::delta_lake::DeltaLakeConfig; + use crate::test_util::components::run_and_assert_sink_compliance; + use futures::stream; + use vector_lib::event::{BatchNotifier, BatchStatus, Event, LogEvent}; + + // Setup - create unique table for this test + let bucket = "test-bucket"; + let table_path = format!("test-schema-inference-{}", uuid::Uuid::new_v4()); + + // Create Delta table with minimal schema (only 'id') + let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)])); + + create_delta_table(bucket, &table_path, schema.clone()).await; + + // Build sink with auto schema evolution enabled + let table_uri = format!("s3://{}/{}", bucket, table_path); + let config = DeltaLakeConfig { + table_uri, + storage_options: minio_storage_options(), + allow_nullable_fields: true, + schema_evolution: true, // Enable schema inference and evolution + batch: Default::default(), + request: Default::default(), + acknowledgements: Default::default(), + }; + + // Build the sink + let cx = SinkContext::default(); + let (sink, _healthcheck) = config.build(cx).await.expect("Failed to build sink"); + + // Create test events with extra fields NOT in the table schema + let (batch, receiver) = BatchNotifier::new_with_receiver(); + let events: Vec = (0..10) + .map(|i| { + let mut log = LogEvent::default(); + log.insert("id", i as i64); + log.insert("message", format!("test message {}", i)); // Not in schema + log.insert("count", (i * 10) as i64); // Not in schema + log.insert("active", i % 2 == 0); // Not in schema (boolean) + Event::Log(log) + }) + .collect(); + + let events_with_batch = events + .into_iter() + .map(|e| e.with_batch_notifier(&batch)) + .collect::>(); + + drop(batch); + + // Write events through the sink + run_and_assert_sink_compliance(sink, stream::iter(events_with_batch), &[]).await; + + // Verify delivery + assert_eq!(receiver.await, BatchStatus::Delivered); + + // Verify schema evolved to include inferred fields + let mut table = open_delta_table(bucket, &table_path).await; + table.load().await.expect("Failed to load table"); + + let table_schema = table.snapshot().unwrap().schema(); + let field_names: Vec<&str> = table_schema.fields().map(|f| f.name().as_str()).collect(); + + println!("Schema after inference: {:?}", field_names); + + // Original field should exist + assert!(field_names.contains(&"id"), "Schema should contain 'id'"); + + // Inferred fields should be added + assert!( + field_names.contains(&"message"), + "Schema should contain inferred 'message' field" + ); + assert!( + field_names.contains(&"count"), + "Schema should contain inferred 'count' field" + ); + assert!( + field_names.contains(&"active"), + "Schema should contain inferred 'active' field" + ); + + // Verify data was written correctly + let total_rows = assert_data_readable(&table, 10).await; + println!("Read back {} rows with inferred schema", total_rows); + + // Verify specific values exist + assert_column_value_exists(&table, "message", "test message 5").await; + + // Read count values and verify they are integers + let counts = read_column_values(&table, "count").await; + assert_eq!(counts.len(), 10); + assert!(counts.contains(&"0".to_string())); + assert!(counts.contains(&"90".to_string())); + + // Verify booleans were inferred correctly + let active_values = read_column_values(&table, "active").await; + assert_eq!(active_values.len(), 10); + assert!( + active_values.contains(&"true".to_string()) || active_values.contains(&"TRUE".to_string()) + ); + assert!( + active_values.contains(&"false".to_string()) + || active_values.contains(&"FALSE".to_string()) + ); + + println!("Schema inference test passed. Inferred fields: message, count, active"); +} diff --git a/src/sinks/delta_lake/mod.rs b/src/sinks/delta_lake/mod.rs new file mode 100644 index 0000000000000..cb29a908316b2 --- /dev/null +++ b/src/sinks/delta_lake/mod.rs @@ -0,0 +1,17 @@ +//! Delta Lake sink for writing log events to Delta Lake tables. +//! +//! This sink writes batched log events to Delta Lake tables stored on cloud object storage (GCS). +//! It leverages the existing Arrow batch encoding infrastructure and the deltalake Rust crate +//! for transaction log management. + +pub mod config; +pub mod request_builder; +pub mod schema; +pub mod schema_inference; +pub mod service; +pub mod sink; + +#[cfg(all(test, feature = "delta-lake-integration-tests"))] +mod integration_tests; + +pub use config::DeltaLakeConfig; diff --git a/src/sinks/delta_lake/request_builder.rs b/src/sinks/delta_lake/request_builder.rs new file mode 100644 index 0000000000000..8345c552b89c2 --- /dev/null +++ b/src/sinks/delta_lake/request_builder.rs @@ -0,0 +1,139 @@ +//! Request builder for Delta Lake sink. +//! +//! This module converts batches of Vector events into Arrow RecordBatches +//! for writing to Delta Lake tables. + +use std::io; +use std::num::NonZeroUsize; +use std::sync::Arc; + +use arc_swap::ArcSwap; +use arrow::array::RecordBatch; +use arrow::datatypes::{FieldRef, Schema, SchemaRef}; +use vector_lib::codecs::encoding::format::{build_record_batch, make_field_nullable}; + +use crate::sinks::prelude::*; + +use super::schema_inference::build_inferred_schema; + +/// Transform a schema to make all fields nullable. +fn make_schema_nullable(schema: &Schema) -> Schema { + Schema::new_with_metadata( + schema + .fields() + .iter() + .map(|f| make_field_nullable(f).into()) + .collect::>(), + schema.metadata().clone(), + ) +} + +/// Request payload for Delta Lake writes. +/// +/// Contains Arrow RecordBatches ready for writing to Delta Lake. +/// By passing RecordBatches directly (instead of serializing to Parquet and back), +/// we avoid an expensive serialization round-trip. +#[derive(Clone)] +pub struct DeltaLakeRequest { + /// Arrow RecordBatches to write + pub batches: Vec, + + /// Event finalizers for acknowledgments + pub finalizers: EventFinalizers, + + /// Request metadata for metrics + pub request_metadata: RequestMetadata, + + /// Byte size of the batches (for metrics) + pub byte_size: usize, +} + +impl MetaDescriptive for DeltaLakeRequest { + fn get_metadata(&self) -> &RequestMetadata { + &self.request_metadata + } + + fn metadata_mut(&mut self) -> &mut RequestMetadata { + &mut self.request_metadata + } +} + +impl crate::event::Finalizable for DeltaLakeRequest { + fn take_finalizers(&mut self) -> EventFinalizers { + std::mem::take(&mut self.finalizers) + } +} + +/// Shared schema reference that can be updated after successful writes. +/// Uses ArcSwap for lock-free reads with atomic updates. +pub type SharedSchema = Arc>; + +/// Request builder for Delta Lake. +/// +/// This builder converts batches of Vector events directly into Arrow RecordBatches, +/// avoiding the overhead of serializing to Parquet and deserializing back. +#[derive(Clone)] +pub struct DeltaLakeRequestBuilder { + /// Transformer for event processing + pub transformer: Transformer, + + /// Whether to enable automatic schema evolution (infer new fields from events) + pub schema_evolution: bool, + + /// Whether to make all schema fields nullable + pub allow_nullable_fields: bool, + + /// Shared schema reference, updated after successful writes by the service + pub shared_schema: SharedSchema, +} + +impl DeltaLakeRequestBuilder { + /// Build a DeltaLakeRequest from a batch of events. + pub fn build_request(&self, mut events: Vec) -> Result { + // Extract finalizers before transformation + let finalizers = events.take_finalizers(); + let metadata_builder = RequestMetadataBuilder::from_events(&events); + + // Transform events + let mut transformed_events = Vec::with_capacity(events.len()); + for mut event in events { + self.transformer.transform(&mut event); + transformed_events.push(event); + } + + // Get base schema from the shared schema reference (updated after successful writes) + let base_schema: SchemaRef = Arc::clone(&self.shared_schema.load()); + + // Determine final schema: either base schema or merged with inferred fields + let schema = if self.schema_evolution { + build_inferred_schema(&base_schema, &transformed_events) + } else { + base_schema + }; + + // Make all fields nullable if configured + let schema: SchemaRef = if self.allow_nullable_fields { + Arc::new(make_schema_nullable(&schema)) + } else { + schema + }; + + // Build RecordBatch directly - no Parquet serialization + let record_batch = build_record_batch(Arc::clone(&schema), &transformed_events) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?; + + // Calculate byte size for metrics (Arrow in-memory size) + let byte_size = record_batch.get_array_memory_size(); + + // Build request metadata using the byte size (since we don't have EncodeResult) + let request_size = NonZeroUsize::new(byte_size).unwrap_or(NonZeroUsize::MIN); + let request_metadata = metadata_builder.with_request_size(request_size); + + Ok(DeltaLakeRequest { + batches: vec![record_batch], + finalizers, + request_metadata, + byte_size, + }) + } +} diff --git a/src/sinks/delta_lake/schema.rs b/src/sinks/delta_lake/schema.rs new file mode 100644 index 0000000000000..b2a091559f4c0 --- /dev/null +++ b/src/sinks/delta_lake/schema.rs @@ -0,0 +1,53 @@ +//! Schema provider for Delta Lake tables. +//! +//! This module implements the `SchemaProvider` trait to fetch Arrow schemas +//! from existing Delta Lake tables at sink startup. + +use async_trait::async_trait; +use deltalake::DeltaTable; +use deltalake::arrow::datatypes::Schema; +use deltalake::kernel::engine::arrow_conversion::TryIntoArrow; +use vector_lib::codecs::encoding::format::{ArrowEncodingError, SchemaProvider}; + +/// Schema provider that fetches Arrow schema from Delta Lake table metadata. +/// +/// Delta Lake stores table schemas in Arrow format natively, allowing zero-cost +/// schema conversion. The schema is fetched once at sink startup from the latest +/// table snapshot. +#[derive(Clone, Debug)] +pub struct DeltaLakeSchemaProvider<'a> { + table: &'a DeltaTable, +} + +impl<'a> DeltaLakeSchemaProvider<'a> { + /// Create a new schema provider for the given Delta table. + pub const fn new(table: &'a DeltaTable) -> Self { + Self { table } + } +} + +#[async_trait] +impl SchemaProvider for DeltaLakeSchemaProvider<'_> { + async fn get_schema(&self) -> Result { + // Load the latest table snapshot + let snapshot = self + .table + .snapshot() + .map_err(|e| ArrowEncodingError::SchemaFetchError { + message: format!("Failed to load Delta table snapshot: {}", e), + })?; + + // Get Arrow schema from Delta table metadata + // Delta Lake stores schema in Arrow format natively + let delta_schema = snapshot.schema(); + + // Convert Delta schema to Arrow schema using TryIntoArrow trait + let arrow_schema = delta_schema.as_ref().try_into_arrow().map_err(|e| { + ArrowEncodingError::SchemaFetchError { + message: format!("Failed to convert Delta schema to Arrow: {}", e), + } + })?; + + Ok(arrow_schema) + } +} diff --git a/src/sinks/delta_lake/schema_inference.rs b/src/sinks/delta_lake/schema_inference.rs new file mode 100644 index 0000000000000..9173f7dfa5d96 --- /dev/null +++ b/src/sinks/delta_lake/schema_inference.rs @@ -0,0 +1,205 @@ +//! Schema inference for Delta Lake sink. +//! +//! This module provides functionality to infer Arrow schema from Vector events, +//! merging discovered fields with an existing table schema. + +use std::collections::HashMap; + +use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef, TimeUnit}; + +use crate::sinks::prelude::*; + +/// Infers an Arrow DataType from a Vector Value. +/// +/// Returns `None` for null values since type cannot be determined. +pub fn infer_type_from_value(value: &Value) -> Option { + match value { + Value::Integer(_) => Some(DataType::Int64), + Value::Float(_) => Some(DataType::Float64), + Value::Boolean(_) => Some(DataType::Boolean), + Value::Timestamp(_) => Some(DataType::Timestamp( + TimeUnit::Microsecond, + Some("UTC".into()), + )), + Value::Bytes(_) => Some(DataType::Utf8), + Value::Object(_) => Some(DataType::Utf8), // Serialize as JSON + Value::Array(_) => Some(DataType::Utf8), // Serialize as JSON + Value::Regex(_) => Some(DataType::Utf8), // Serialize as string + Value::Null => None, // Cannot infer type from null + } +} + +/// Discovers fields from a batch of events. +/// +/// Returns a HashMap of field_name -> DataType for all fields found +/// across all events in the batch. First non-null value determines the type. +pub fn discover_fields_from_events(events: &[Event]) -> HashMap { + let mut discovered: HashMap = HashMap::new(); + + for event in events { + let Event::Log(log) = event else { + continue; + }; + + let Some(fields) = log.all_event_fields() else { + continue; + }; + + for (key, value) in fields { + // Skip if we've already discovered this field + if discovered.contains_key(key.as_ref()) { + continue; + } + + if let Some(data_type) = infer_type_from_value(value) { + discovered.insert(key.to_string(), data_type); + } + } + } + + discovered +} + +/// Merges discovered fields with an existing schema. +/// +/// - Fields in base_schema are preserved with their original types +/// - New fields are added as nullable (since existing rows won't have them) +/// - Returns a new schema containing both base and discovered fields +pub fn merge_schema_with_discovered( + base_schema: &Schema, + discovered: &HashMap, +) -> Schema { + let mut fields: Vec = base_schema.fields().iter().cloned().collect(); + let existing_names: std::collections::HashSet = + fields.iter().map(|f| f.name().clone()).collect(); + + for (name, data_type) in discovered { + if !existing_names.contains(name) { + // New fields are always nullable + fields.push(Field::new(name, data_type.clone(), true).into()); + } + } + + Schema::new_with_metadata(fields, base_schema.metadata().clone()) +} + +/// Builds a merged schema from base schema and events. +/// +/// This is the main entry point for schema inference. +pub fn build_inferred_schema(base_schema: &Schema, events: &[Event]) -> SchemaRef { + let discovered = discover_fields_from_events(events); + let merged = merge_schema_with_discovered(base_schema, &discovered); + SchemaRef::new(merged) +} + +#[cfg(test)] +mod tests { + use super::*; + use vrl::value::ObjectMap; + + #[test] + fn test_infer_type_from_value() { + assert_eq!( + infer_type_from_value(&Value::Integer(42)), + Some(DataType::Int64) + ); + assert_eq!( + infer_type_from_value(&Value::Float(ordered_float::NotNan::new(3.15).unwrap())), + Some(DataType::Float64) + ); + assert_eq!( + infer_type_from_value(&Value::Boolean(true)), + Some(DataType::Boolean) + ); + assert_eq!( + infer_type_from_value(&Value::Bytes("hello".into())), + Some(DataType::Utf8) + ); + assert_eq!(infer_type_from_value(&Value::Null), None); + } + + #[test] + fn test_discover_fields_from_events() { + let mut log = LogEvent::default(); + log.insert("foo", 42i64); + log.insert("bar", "hello"); + let events = vec![Event::Log(log)]; + + let discovered = discover_fields_from_events(&events); + + assert_eq!(discovered.get("foo"), Some(&DataType::Int64)); + assert_eq!(discovered.get("bar"), Some(&DataType::Utf8)); + } + + #[test] + fn test_discover_fields_first_type_wins() { + // First event has foo as integer + let mut log1 = LogEvent::default(); + log1.insert("foo", 42i64); + + // Second event has foo as string (should be ignored) + let mut log2 = LogEvent::default(); + log2.insert("foo", "string_value"); + + let events = vec![Event::Log(log1), Event::Log(log2)]; + let discovered = discover_fields_from_events(&events); + + // First type (Int64) should win + assert_eq!(discovered.get("foo"), Some(&DataType::Int64)); + } + + #[test] + fn test_merge_schema_with_discovered() { + let base = Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + ]); + + let mut discovered = HashMap::new(); + discovered.insert("foo".to_string(), DataType::Boolean); + discovered.insert("id".to_string(), DataType::Int64); // Duplicate, should be ignored + + let merged = merge_schema_with_discovered(&base, &discovered); + + assert_eq!(merged.fields().len(), 3); + // Base fields preserved + assert_eq!(merged.field(0).name(), "id"); + assert!(!merged.field(0).is_nullable()); + // New field added as nullable + assert!(merged.field_with_name("foo").unwrap().is_nullable()); + } + + #[test] + fn test_build_inferred_schema() { + let base = Schema::new(vec![Field::new("id", DataType::Int64, false)]); + + let mut log = LogEvent::default(); + log.insert("id", 1i64); + log.insert("new_field", "value"); + let events = vec![Event::Log(log)]; + + let merged = build_inferred_schema(&base, &events); + + assert_eq!(merged.fields().len(), 2); + assert!(merged.field_with_name("id").is_ok()); + assert!(merged.field_with_name("new_field").is_ok()); + assert!(merged.field_with_name("new_field").unwrap().is_nullable()); + } + + #[test] + fn test_nested_fields_are_flattened() { + // Test that nested object fields are flattened + let mut log = LogEvent::default(); + let mut obj = ObjectMap::new(); + obj.insert("nested".into(), Value::Integer(42)); + log.insert("obj_field", Value::Object(obj)); + + let events = vec![Event::Log(log)]; + let discovered = discover_fields_from_events(&events); + + // Flattened field should be Int64 + assert_eq!(discovered.get("obj_field.nested"), Some(&DataType::Int64)); + // The parent object key itself is not in the flattened output + assert_eq!(discovered.get("obj_field"), None); + } +} diff --git a/src/sinks/delta_lake/service.rs b/src/sinks/delta_lake/service.rs new file mode 100644 index 0000000000000..7428a534187fd --- /dev/null +++ b/src/sinks/delta_lake/service.rs @@ -0,0 +1,398 @@ +use std::collections::HashMap; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use deltalake::DeltaTableBuilder; +use deltalake::DeltaTableError; +use deltalake::ObjectStoreError; +use deltalake::datafusion::datasource::TableProvider; +use deltalake::kernel::transaction::CommitProperties; +use deltalake::logstore::IORuntime; +use deltalake::operations::write::SchemaMode; +use deltalake::protocol::SaveMode; +use url::Url; + +use crate::internal_events::EndpointBytesSent; +use crate::sinks::prelude::*; + +use super::request_builder::{DeltaLakeRequest, SharedSchema}; + +/// Classification of Delta Lake write errors. +/// +/// Provides a single source of truth for error handling decisions. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum WriteErrorKind { + /// Concurrent transaction conflict (e.g., from optimize/vacuum or another writer). + /// Retried internally with table reload. + ConcurrentConflict, + /// Schema mismatch between incoming data and table schema. + /// Retried internally with schema reload (if schema evolution is enabled). + SchemaMismatch, + /// Transient error (network, IO, timeout). Retried at Tower level. + Transient, + /// Non-retriable error (permissions, not found, invalid config, etc.). + NonRetriable, +} + +impl WriteErrorKind { + /// Classify a DeltaTableError into a WriteErrorKind. + pub fn from_delta_error(error: &DeltaTableError) -> Self { + match error { + DeltaTableError::ObjectStore { source } => Self::from_object_store_error(source), + + DeltaTableError::Transaction { source } => { + let s = source.to_string(); + if s.contains("ConcurrentDeleteRead") + || s.contains("concurrent transaction deleted") + { + WriteErrorKind::ConcurrentConflict + } else { + WriteErrorKind::NonRetriable + } + } + + DeltaTableError::Arrow { .. } + | DeltaTableError::InvalidData { .. } + | DeltaTableError::SchemaMismatch { .. } => WriteErrorKind::SchemaMismatch, + + DeltaTableError::Io { .. } => WriteErrorKind::Transient, + + DeltaTableError::Kernel { .. } => WriteErrorKind::NonRetriable, + + _ => { + let error_str = error.to_string().to_lowercase(); + if error_str.contains("schema") + || error_str.contains("field") + || error_str.contains("column") + || error_str.contains("incompatible") + || error_str.contains("type mismatch") + { + WriteErrorKind::SchemaMismatch + } else { + WriteErrorKind::Transient + } + } + } + } + + fn from_object_store_error(error: &ObjectStoreError) -> Self { + match error { + ObjectStoreError::Precondition { .. } | ObjectStoreError::AlreadyExists { .. } => { + WriteErrorKind::ConcurrentConflict + } + ObjectStoreError::PermissionDenied { .. } + | ObjectStoreError::Unauthenticated { .. } + | ObjectStoreError::NotFound { .. } + | ObjectStoreError::NotSupported { .. } + | ObjectStoreError::NotImplemented + | ObjectStoreError::UnknownConfigurationKey { .. } + | ObjectStoreError::InvalidPath { .. } => WriteErrorKind::NonRetriable, + _ => { + warn!( + message = "ObjectStore transient error, will retry at Tower level", + error = %error, + error_debug = ?error, + ); + WriteErrorKind::Transient + } + } + } + + /// Returns true if this error should be retried at the Tower level. + pub const fn is_retriable_at_tower_level(&self) -> bool { + matches!(self, WriteErrorKind::Transient) + } + + /// Returns true if this is a concurrent conflict requiring table reload. + pub const fn is_concurrent_conflict(&self) -> bool { + matches!(self, WriteErrorKind::ConcurrentConflict) + } + + /// Returns true if this is a schema mismatch requiring schema reload. + pub const fn is_schema_mismatch(&self) -> bool { + matches!(self, WriteErrorKind::SchemaMismatch) + } +} + +/// Response from Delta Lake write operations. +/// +/// Contains metrics about the write operation including the number of files +/// written and bytes transferred. +#[derive(Debug)] +pub struct DeltaLakeResponse { + /// Event byte size for metrics + pub events_byte_size: GroupedCountByteSize, + + /// Number of Parquet files written + pub files_written: usize, + + /// Bytes written (compressed Parquet size) + pub bytes_written: usize, +} + +impl DriverResponse for DeltaLakeResponse { + fn event_status(&self) -> EventStatus { + EventStatus::Delivered + } + + fn events_sent(&self) -> &GroupedCountByteSize { + &self.events_byte_size + } + + fn bytes_sent(&self) -> Option { + Some(self.bytes_written) + } +} + +/// Tower service for Delta Lake writes. +#[derive(Clone)] +pub struct DeltaLakeService { + /// Table URI for opening fresh connections + table_uri: String, + /// Storage options for authentication + storage_options: HashMap, + schema_evolution: bool, + /// Shared schema reference, updated after successful writes + shared_schema: SharedSchema, +} + +impl DeltaLakeService { + /// Create a new Delta Lake service for the given table. + pub const fn new( + table_uri: String, + storage_options: HashMap, + schema_evolution: bool, + shared_schema: SharedSchema, + ) -> Self { + Self { + table_uri, + storage_options, + schema_evolution, + shared_schema, + } + } +} + +impl Service for DeltaLakeService { + type Response = DeltaLakeResponse; + type Error = DeltaTableError; + type Future = BoxFuture<'static, Result>; + + fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { + // Delta Lake writes are async, always ready + Poll::Ready(Ok(())) + } + + fn call(&mut self, request: DeltaLakeRequest) -> Self::Future { + let table_uri = self.table_uri.clone(); + let storage_options = self.storage_options.clone(); + let schema_evolution = self.schema_evolution; + let shared_schema = Arc::clone(&self.shared_schema); + + Box::pin(async move { + // Use batches directly from request - no Parquet deserialization needed + let batches = request.batches; + + let parsed_url = Url::parse(&table_uri).map_err(|e| { + DeltaTableError::Generic(format!("Invalid table URI {}: {}", table_uri, e)) + })?; + + debug!(message = "Opening Delta table", table_uri = %table_uri); + let open_start = std::time::Instant::now(); + + // Use DeltaTableBuilder with IORuntime to prevent blocking I/O deadlocks. + // IORuntime spawns I/O operations on a separate runtime, preventing tokio + // worker threads from being blocked by synchronous operations in object_store. + let mut table = DeltaTableBuilder::from_url(parsed_url) + .map_err(|e| { + DeltaTableError::Generic(format!( + "Failed to create table builder for {}: {}", + table_uri, e + )) + })? + .with_io_runtime(IORuntime::default()) + .with_storage_options(storage_options.clone()) + .load() + .await + .map_err(|e| { + DeltaTableError::Generic(format!("Failed to open table {}: {}", table_uri, e)) + })?; + + info!( + message = "Delta table opened", + table_uri = %table_uri, + version = table.version(), + elapsed_ms = open_start.elapsed().as_millis() as u64, + ); + + // Retry loop for schema mismatch errors only + // Concurrent conflicts are handled automatically by delta-rs via CommitProperties + const MAX_CONFLICT_RETRIES: usize = 5; + const MAX_SCHEMA_RETRIES: usize = 1; + let mut schema_retry_count = 0; + + loop { + if schema_retry_count > 0 { + info!( + message = "Retrying write after schema reload", + retry_count = schema_retry_count, + ); + } + + // Build write operation with built-in conflict retry via CommitProperties + let mut write_builder = table + .clone() + .write(batches.clone()) + .with_save_mode(SaveMode::Append) + .with_commit_properties( + CommitProperties::default().with_max_retries(MAX_CONFLICT_RETRIES), + ); + + if schema_evolution { + write_builder = write_builder.with_schema_mode(SchemaMode::Merge); + } + + // Execute write and commit + let write_start = std::time::Instant::now(); + let batch_count = batches.len(); + let row_count: usize = batches.iter().map(|b| b.num_rows()).sum(); + info!( + message = "Starting Delta write operation", + version = table.version(), + batch_count = batch_count, + row_count = row_count, + schema_retry_count = schema_retry_count, + ); + + match write_builder.await { + Ok(new_table) => { + // Get the byte size from the request (Arrow in-memory size) + let bytes_written = request.byte_size; + let new_version = new_table.version(); + + // Only update schema cache if schema evolution is enabled. + // NOTE: new_table.schema() can block on I/O (reads snapshot metadata), + // so we wrap it in spawn_blocking to avoid deadlocking the async runtime. + if schema_evolution { + let shared_schema_clone = Arc::clone(&shared_schema); + if let Ok(new_schema) = + tokio::task::spawn_blocking(move || new_table.schema()).await + { + let old_schema = shared_schema_clone.load(); + let new_fields: Vec<_> = new_schema + .fields() + .iter() + .filter(|f| old_schema.field_with_name(f.name()).is_err()) + .map(|f| f.name().as_str()) + .collect(); + + if !new_fields.is_empty() { + info!( + message = "Schema evolution: new fields added to table", + new_fields = ?new_fields, + total_fields = new_schema.fields().len(), + version = new_version, + ); + shared_schema_clone.store(new_schema); + } + } + } + + info!( + message = "Delta write operation completed", + version = new_version, + elapsed_ms = write_start.elapsed().as_millis() as u64, + bytes_written = bytes_written, + ); + + emit!(EndpointBytesSent { + byte_size: bytes_written, + protocol: "delta_lake", + endpoint: table.table_url().as_str(), + }); + + return Ok(DeltaLakeResponse { + events_byte_size: request + .request_metadata + .into_events_estimated_json_encoded_byte_size(), + files_written: 1, // One commit creates one file typically + bytes_written, + }); + } + Err(e) => { + // Classify error for schema mismatch handling + // (concurrent conflicts are handled by delta-rs via CommitProperties) + let error_kind = WriteErrorKind::from_delta_error(&e); + let elapsed_ms = write_start.elapsed().as_millis() as u64; + + warn!( + message = "Delta Lake write error occurred", + error = %e, + error_kind = ?error_kind, + schema_retry_count = schema_retry_count, + elapsed_ms = elapsed_ms, + ); + + // Handle schema mismatch errors with reload and retry (if enabled) + if schema_evolution + && error_kind.is_schema_mismatch() + && schema_retry_count < MAX_SCHEMA_RETRIES + { + schema_retry_count += 1; + warn!( + message = "Schema mismatch detected, reloading table schema and retrying", + error = %e, + retry_count = schema_retry_count, + max_retries = MAX_SCHEMA_RETRIES, + ); + + // Reload the table to get the latest schema + table.load().await.map_err(|load_err| { + DeltaTableError::Generic(format!( + "Failed to reload table after schema mismatch: {}", + load_err + )) + })?; + + continue; + } + + // Log final error + if error_kind.is_schema_mismatch() { + error!( + message = "Schema mismatch - exhausted retries or schema evolution disabled", + error = %e, + schema_evolution = schema_evolution, + retry_count = schema_retry_count, + ); + } else { + error!( + message = "Delta Lake write failed", + error = %e, + error_kind = ?error_kind, + ); + } + return Err(e); + } + } + } + }) + } +} + +/// Retry logic for Delta Lake operations. +/// +/// Determines which errors are retriable at the Tower level. +/// Uses WriteErrorKind for consistent error classification across the codebase. +#[derive(Debug, Clone, Default)] +pub struct DeltaLakeRetryLogic; + +impl RetryLogic for DeltaLakeRetryLogic { + type Error = DeltaTableError; + type Request = DeltaLakeRequest; + type Response = DeltaLakeResponse; + + fn is_retriable_error(&self, error: &Self::Error) -> bool { + WriteErrorKind::from_delta_error(error).is_retriable_at_tower_level() + } +} diff --git a/src/sinks/delta_lake/sink.rs b/src/sinks/delta_lake/sink.rs new file mode 100644 index 0000000000000..4ae1a803af714 --- /dev/null +++ b/src/sinks/delta_lake/sink.rs @@ -0,0 +1,107 @@ +//! Sink implementation for Delta Lake. +//! +//! This module implements the main sink that orchestrates the flow of events +//! from Vector to Delta Lake tables. + +use std::sync::Arc; + +use tracing::Span; + +use crate::sinks::prelude::*; +use crate::sinks::util::builder::SinkBuilderExt; +use crate::sinks::util::request_builder::default_request_builder_concurrency_limit; + +use super::request_builder::{DeltaLakeRequest, DeltaLakeRequestBuilder}; + +/// Sink for writing events to Delta Lake tables. +/// +/// This sink batches events, converts them directly to Arrow RecordBatches, +/// and writes them to Delta Lake with proper transaction log management. +pub struct DeltaLakeSink { + service: S, + request_builder: DeltaLakeRequestBuilder, + batch_settings: BatcherSettings, +} + +impl DeltaLakeSink +where + S: Service + Send + 'static, + S::Future: Send + 'static, + S::Response: DriverResponse + Send + 'static, + S::Error: std::fmt::Debug + Into + Send, +{ + /// Create a new Delta Lake sink. + /// + /// # Arguments + /// + /// * `service` - The Delta Lake service for handling writes + /// * `request_builder` - Builder for converting events to RecordBatch requests + /// * `batch_settings` - Configuration for event batching + pub const fn new( + service: S, + request_builder: DeltaLakeRequestBuilder, + batch_settings: BatcherSettings, + ) -> Self { + Self { + service, + request_builder, + batch_settings, + } + } + + async fn run_inner(self: Box, input: BoxStream<'_, Event>) -> Result<(), ()> { + let batch_settings = self.batch_settings.as_byte_size_config(); + let request_builder = Arc::new(self.request_builder); + let concurrency = default_request_builder_concurrency_limit(); + + let span = Arc::new(Span::current()); + + input + .batched(batch_settings) + .concurrent_map(concurrency, move |events| { + let builder = Arc::clone(&request_builder); + let span = Arc::clone(&span); + Box::pin(async move { + let _entered = span.enter(); + builder.build_request(events) + }) + }) + .filter_map(|request| async { + match request { + Err(error) => { + emit!(SinkRequestBuildError { error }); + None + } + Ok(req) => Some(req), + } + }) + .into_driver(self.service) + .run() + .await + } +} + +#[async_trait::async_trait] +impl StreamSink for DeltaLakeSink +where + S: Service + Send + 'static, + S::Future: Send + 'static, + S::Response: DriverResponse + Send + 'static, + S::Error: std::fmt::Debug + Into + Send, +{ + async fn run(self: Box, input: BoxStream<'_, Event>) -> Result<(), ()> { + self.run_inner(input).await + } +} + +#[cfg(test)] +mod tests { + // Note: Comprehensive testing requires a full integration test with a real Delta table + // These tests just verify the structure compiles correctly + + #[test] + fn test_sink_structure() { + // This test just verifies the sink can be constructed + // Real functionality requires the full service stack + } +} diff --git a/src/sinks/mod.rs b/src/sinks/mod.rs index b5a45a462566e..3a29188d3698d 100644 --- a/src/sinks/mod.rs +++ b/src/sinks/mod.rs @@ -45,6 +45,8 @@ pub mod databend; feature = "sinks-datadog_traces" ))] pub mod datadog; +#[cfg(feature = "sinks-delta_lake")] +pub mod delta_lake; #[cfg(feature = "sinks-elasticsearch")] pub mod elasticsearch; #[cfg(feature = "sinks-file")] diff --git a/src/sources/delta_lake_cdf/checkpoint.rs b/src/sources/delta_lake_cdf/checkpoint.rs new file mode 100644 index 0000000000000..eafb191545647 --- /dev/null +++ b/src/sources/delta_lake_cdf/checkpoint.rs @@ -0,0 +1,208 @@ +//! Checkpoint persistence for Delta Lake CDF source. +//! +//! The checkpointer stores the next version to read from, allowing the source +//! to resume from where it left off after restarts. + +use std::io; +use std::path::{Path, PathBuf}; + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +const CHECKPOINT_FILE_NAME: &str = "delta_cdf_checkpoint.json"; + +/// Checkpoint data persisted to disk. +#[derive(Debug, Clone, Serialize, Deserialize)] +struct DeltaCdfCheckpoint { + /// The next version to read from. + next_version: i64, + /// Table URI for validation (ensure checkpoint matches current table). + table_uri: String, + /// Timestamp of last checkpoint update. + updated_at: DateTime, +} + +/// Manages checkpoint persistence for the Delta Lake CDF source. +#[derive(Debug)] +pub struct DeltaLakeCdfCheckpointer { + checkpoint_path: PathBuf, + table_uri: String, +} + +impl DeltaLakeCdfCheckpointer { + /// Create a new checkpointer for the given data directory and table URI. + pub fn new(data_dir: &Path, table_uri: &str) -> Self { + Self { + checkpoint_path: data_dir.join(CHECKPOINT_FILE_NAME), + table_uri: table_uri.to_string(), + } + } + + /// Read the checkpoint from disk. + /// + /// Returns `None` if the checkpoint file doesn't exist or is invalid. + /// Returns `Some(version)` with the next version to read from. + pub fn read_checkpoint(&self) -> Option { + let content = match std::fs::read_to_string(&self.checkpoint_path) { + Ok(content) => content, + Err(e) => { + if e.kind() != io::ErrorKind::NotFound { + warn!( + message = "Failed to read checkpoint file", + path = ?self.checkpoint_path, + error = %e, + ); + } + return None; + } + }; + + let checkpoint: DeltaCdfCheckpoint = match serde_json::from_str(&content) { + Ok(cp) => cp, + Err(e) => { + warn!( + message = "Failed to parse checkpoint file", + path = ?self.checkpoint_path, + error = %e, + ); + return None; + } + }; + + // Validate table URI matches + if checkpoint.table_uri != self.table_uri { + warn!( + message = "Checkpoint table URI mismatch, ignoring checkpoint", + checkpoint_uri = %checkpoint.table_uri, + current_uri = %self.table_uri, + ); + return None; + } + + debug!( + message = "Loaded checkpoint", + next_version = checkpoint.next_version, + updated_at = %checkpoint.updated_at, + ); + + Some(checkpoint.next_version) + } + + /// Write the checkpoint to disk. + /// + /// Uses atomic write (write to temp file, then rename) to prevent corruption. + pub fn write_checkpoint(&self, next_version: i64) -> io::Result<()> { + let checkpoint = DeltaCdfCheckpoint { + next_version, + table_uri: self.table_uri.clone(), + updated_at: Utc::now(), + }; + + let content = serde_json::to_string_pretty(&checkpoint) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + + // Write to temp file first for atomic operation + let tmp_path = self.checkpoint_path.with_extension("tmp"); + + // Ensure parent directory exists + if let Some(parent) = self.checkpoint_path.parent() { + std::fs::create_dir_all(parent)?; + } + + std::fs::write(&tmp_path, content)?; + + // Atomic rename + std::fs::rename(&tmp_path, &self.checkpoint_path)?; + + debug!( + message = "Checkpoint saved", + next_version = next_version, + path = ?self.checkpoint_path, + ); + + Ok(()) + } + + /// Get the checkpoint file path (for testing/debugging). + #[cfg(test)] + pub fn checkpoint_path(&self) -> &Path { + &self.checkpoint_path + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_checkpoint_roundtrip() { + let temp_dir = TempDir::new().unwrap(); + let checkpointer = DeltaLakeCdfCheckpointer::new(temp_dir.path(), "s3://bucket/table"); + + // Initially no checkpoint + assert!(checkpointer.read_checkpoint().is_none()); + + // Write checkpoint + checkpointer.write_checkpoint(42).unwrap(); + + // Read it back + assert_eq!(checkpointer.read_checkpoint(), Some(42)); + + // Update checkpoint + checkpointer.write_checkpoint(100).unwrap(); + assert_eq!(checkpointer.read_checkpoint(), Some(100)); + } + + #[test] + fn test_checkpoint_uri_mismatch() { + let temp_dir = TempDir::new().unwrap(); + + // Write checkpoint for one table + let checkpointer1 = DeltaLakeCdfCheckpointer::new(temp_dir.path(), "s3://bucket/table1"); + checkpointer1.write_checkpoint(42).unwrap(); + + // Try to read with different table URI + let checkpointer2 = DeltaLakeCdfCheckpointer::new(temp_dir.path(), "s3://bucket/table2"); + assert!(checkpointer2.read_checkpoint().is_none()); + } + + #[test] + fn test_checkpoint_file_format() { + let temp_dir = TempDir::new().unwrap(); + let checkpointer = DeltaLakeCdfCheckpointer::new(temp_dir.path(), "s3://bucket/table"); + + checkpointer.write_checkpoint(42).unwrap(); + + // Verify the file content is valid JSON + let content = std::fs::read_to_string(checkpointer.checkpoint_path()).unwrap(); + let parsed: serde_json::Value = serde_json::from_str(&content).unwrap(); + + assert_eq!(parsed["next_version"], 42); + assert_eq!(parsed["table_uri"], "s3://bucket/table"); + assert!(parsed["updated_at"].is_string()); + } + + #[test] + fn test_checkpoint_corrupt_file() { + let temp_dir = TempDir::new().unwrap(); + let checkpointer = DeltaLakeCdfCheckpointer::new(temp_dir.path(), "s3://bucket/table"); + + // Write corrupt data + std::fs::write(checkpointer.checkpoint_path(), "not valid json").unwrap(); + + // Should return None for corrupt file + assert!(checkpointer.read_checkpoint().is_none()); + } + + #[test] + fn test_checkpoint_missing_dir() { + let temp_dir = TempDir::new().unwrap(); + let nested_path = temp_dir.path().join("nested").join("dir"); + let checkpointer = DeltaLakeCdfCheckpointer::new(&nested_path, "s3://bucket/table"); + + // Should create parent directories + checkpointer.write_checkpoint(42).unwrap(); + assert_eq!(checkpointer.read_checkpoint(), Some(42)); + } +} diff --git a/src/sources/delta_lake_cdf/config.rs b/src/sources/delta_lake_cdf/config.rs new file mode 100644 index 0000000000000..5f2e8cccae632 --- /dev/null +++ b/src/sources/delta_lake_cdf/config.rs @@ -0,0 +1,521 @@ +//! Configuration for the Delta Lake CDF source. + +use std::collections::HashMap; +use std::path::PathBuf; +use std::time::Duration; + +use serde_with::serde_as; +use url::Url; +use vector_lib::config::{DataType, LegacyKey, LogNamespace}; +use vector_lib::configurable::configurable_component; +use vector_lib::lookup::owned_value_path; +use vrl::value::Kind; + +use crate::config::{GenerateConfig, SourceConfig, SourceContext, SourceOutput}; + +use super::checkpoint::DeltaLakeCdfCheckpointer; +use super::source::run_cdf_source; + +/// Configuration for the `delta_lake_cdf` source. +#[serde_as] +#[configurable_component(source( + "delta_lake_cdf", + "Stream Change Data Feed (CDC) events from Delta Lake tables." +))] +#[derive(Clone, Debug)] +#[serde(deny_unknown_fields)] +pub struct DeltaLakeCdfConfig { + /// Full URI to the Delta Lake table. + /// + /// Supports multiple storage backends: + /// - Google Cloud Storage: `gs://bucket/path/to/table` + /// - Amazon S3: `s3://bucket/path/to/table` + /// - S3-compatible (MinIO, etc.): `s3://bucket/path/to/table` with custom endpoint + /// - Azure Blob Storage: `abfs://container@account/path/to/table` + /// - Local filesystem: `file:///path/to/table` + /// + /// The table must have Change Data Feed enabled (`delta.enableChangeDataFeed = true`). + #[configurable(metadata(docs::examples = "s3://my-bucket/data/events"))] + #[configurable(metadata(docs::examples = "gs://my-bucket/data/events"))] + pub table_uri: String, + + /// Storage-specific options. + /// + /// Configuration options specific to the storage backend: + /// + /// **For GCS:** + /// - `google_service_account`: Path to service account JSON file + /// + /// **For S3:** + /// - `aws_access_key_id`: AWS access key + /// - `aws_secret_access_key`: AWS secret key + /// - `aws_region`: AWS region (e.g., "us-east-1") + /// - `aws_endpoint`: Custom S3 endpoint (for MinIO, LocalStack, etc.) + /// - `aws_allow_http`: Allow HTTP connections (for local testing) + /// - `aws_s3_path_style`: Use path-style addressing (for MinIO) + /// + /// If not provided, the source will use default credentials from the environment. + #[serde(default)] + pub storage_options: HashMap, + + /// Interval between polling for new table versions. + /// + /// The source periodically checks for new commits to the Delta table + /// and reads any new Change Data Feed records. + #[serde(default = "default_poll_interval")] + #[serde_as(as = "serde_with::DurationSeconds")] + #[configurable(metadata(docs::type_unit = "seconds"))] + #[configurable(metadata(docs::examples = 10))] + #[configurable(metadata(docs::examples = 30))] + pub poll_interval_secs: Duration, + + /// Where to start reading from on first run (when no checkpoint exists). + #[serde(default)] + #[configurable(derived)] + pub start_position: StartPosition, + + /// Whether to include the full row data in events. + /// + /// When `true` (default), events include all columns from the Delta table. + /// When `false`, events only include CDF metadata columns + /// (`_change_type`, `_commit_version`, `_commit_timestamp`). + #[serde(default = "default_include_data")] + pub include_data: bool, + + /// Filter events by change type. + /// + /// If empty (default), all change types are included. + /// Specify one or more types to filter the output. + #[serde(default)] + pub change_types: Vec, + + /// Stop reading at this version (inclusive). + /// + /// Useful for bounded reads during testing. If not specified, + /// the source continues reading indefinitely. + #[serde(default)] + pub ending_version: Option, + + /// Maximum number of versions to process per poll cycle. + /// + /// When catching up on a large backlog of versions, processing too many + /// versions at once can cause the query planner to hang while listing + /// thousands of CDF files. This option limits how many versions are + /// processed in each poll cycle, allowing incremental catchup. + /// + /// If not specified, all available versions are processed at once. + #[serde(default)] + #[configurable(metadata(docs::examples = 100))] + #[configurable(metadata(docs::examples = 1000))] + pub max_versions_per_poll: Option, + + /// Directory for storing checkpoints. + /// + /// The source persists its current position (version) to disk + /// so it can resume from where it left off after restarts. + /// + /// Defaults to the global `data_dir` if not specified. + #[serde(default)] + pub data_dir: Option, + + /// The namespace to use for logs. This overrides the global setting. + #[serde(default)] + #[configurable(metadata(docs::hidden))] + pub log_namespace: Option, +} + +/// Start position for reading Change Data Feed. +#[configurable_component] +#[derive(Clone, Debug, Default, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum StartPosition { + /// Start from the beginning of CDF history (version 0). + /// + /// This will read all historical changes if CDF was enabled from table creation. + #[default] + Beginning, + + /// Start from the latest version. + /// + /// Skip all existing data and only read new changes going forward. + Latest, + + /// Start from a specific version number. + Version(i64), +} + +/// Change Data Feed change types. +#[configurable_component] +#[derive(Clone, Debug, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ChangeType { + /// Row was inserted. + Insert, + /// Previous value of an updated row (before the update). + UpdatePreimage, + /// New value of an updated row (after the update). + UpdatePostimage, + /// Row was deleted. + Delete, +} + +impl ChangeType { + /// Convert from CDF string representation. + pub fn from_cdf_string(s: &str) -> Option { + match s { + "insert" => Some(Self::Insert), + "update_preimage" => Some(Self::UpdatePreimage), + "update_postimage" => Some(Self::UpdatePostimage), + "delete" => Some(Self::Delete), + _ => None, + } + } +} + +const fn default_poll_interval() -> Duration { + Duration::from_secs(10) +} + +const fn default_include_data() -> bool { + true +} + +impl Default for DeltaLakeCdfConfig { + fn default() -> Self { + Self { + table_uri: String::new(), + storage_options: HashMap::new(), + poll_interval_secs: default_poll_interval(), + start_position: StartPosition::default(), + include_data: default_include_data(), + change_types: Vec::new(), + ending_version: None, + max_versions_per_poll: None, + data_dir: None, + log_namespace: None, + } + } +} + +impl DeltaLakeCdfConfig { + /// Validate the table URI scheme. + fn validate_uri(&self) -> crate::Result { + let table_uri = Url::parse(&self.table_uri) + .map_err(|e| format!("Invalid table URI '{}': {}", self.table_uri, e))?; + + match table_uri.scheme() { + "gs" | "s3" | "s3a" | "file" | "abfs" | "abfss" | "az" => {} + scheme => { + return Err(format!( + "Unsupported URI scheme '{}'. Supported: gs, s3, s3a, file, abfs, abfss, az", + scheme + ) + .into()); + } + } + + Ok(table_uri) + } +} + +impl GenerateConfig for DeltaLakeCdfConfig { + fn generate_config() -> toml::Value { + toml::Value::try_from(Self { + table_uri: "s3://my-bucket/data/events".to_string(), + storage_options: HashMap::new(), + poll_interval_secs: default_poll_interval(), + start_position: StartPosition::default(), + include_data: default_include_data(), + change_types: Vec::new(), + ending_version: None, + max_versions_per_poll: None, + data_dir: None, + log_namespace: None, + }) + .unwrap() + } +} + +#[async_trait::async_trait] +#[typetag::serde(name = "delta_lake_cdf")] +impl SourceConfig for DeltaLakeCdfConfig { + async fn build(&self, cx: SourceContext) -> crate::Result { + let log_namespace = cx.log_namespace(self.log_namespace); + + // Validate URI scheme + let table_uri = self.validate_uri()?; + + // Open Delta table with storage options + let table = deltalake::open_table_with_storage_options( + table_uri.clone(), + self.storage_options.clone(), + ) + .await + .map_err(|e| format!("Failed to open Delta table at {}: {}", self.table_uri, e))?; + + // Verify Change Data Feed is enabled on the table + let cdf_enabled = table + .snapshot() + .map_err(|e| format!("Failed to get table snapshot: {}", e))? + .table_config() + .enable_change_data_feed + .unwrap_or(false); + + if !cdf_enabled { + return Err(format!( + "Change Data Feed is not enabled on table '{}'. \ + Set table property 'delta.enableChangeDataFeed' to 'true'.", + self.table_uri + ) + .into()); + } + + // Initialize checkpoint manager + let data_dir = cx + .globals + .resolve_and_make_data_subdir(self.data_dir.as_ref(), cx.key.id())?; + let checkpointer = DeltaLakeCdfCheckpointer::new(&data_dir, &self.table_uri); + + // Determine starting version + let start_version = determine_start_version(&checkpointer, &self.start_position, &table); + + Ok(Box::pin(run_cdf_source( + table, + self.clone(), + start_version, + checkpointer, + cx.shutdown, + cx.out, + log_namespace, + ))) + } + + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + let log_namespace = global_log_namespace.merge(self.log_namespace); + + // Define the schema for CDF events + let schema_definition = + vector_lib::schema::Definition::default_for_namespace(&[log_namespace].into()) + .with_standard_vector_source_metadata() + .with_source_metadata( + DeltaLakeCdfConfig::NAME, + Some(LegacyKey::Overwrite(owned_value_path!("_change_type"))), + &owned_value_path!("change_type"), + Kind::bytes(), + Some("change_type"), + ) + .with_source_metadata( + DeltaLakeCdfConfig::NAME, + Some(LegacyKey::Overwrite(owned_value_path!("_commit_version"))), + &owned_value_path!("commit_version"), + Kind::integer(), + Some("commit_version"), + ) + .with_source_metadata( + DeltaLakeCdfConfig::NAME, + Some(LegacyKey::Overwrite(owned_value_path!("_commit_timestamp"))), + &owned_value_path!("commit_timestamp"), + Kind::timestamp(), + Some("commit_timestamp"), + ); + + vec![SourceOutput::new_maybe_logs( + DataType::Log, + schema_definition, + )] + } + + fn can_acknowledge(&self) -> bool { + false + } +} + +/// Determine the starting version based on checkpoint and configuration. +fn determine_start_version( + checkpointer: &DeltaLakeCdfCheckpointer, + start_position: &StartPosition, + table: &deltalake::DeltaTable, +) -> i64 { + if let Some(checkpoint_version) = checkpointer.read_checkpoint() { + info!( + message = "Resuming from checkpoint", + version = checkpoint_version, + ); + return checkpoint_version; + } + + // No checkpoint, use configured start position + match start_position { + StartPosition::Beginning => 0, + StartPosition::Latest => { + let version = table.version().unwrap_or(0); + // Start from next version (don't process current state) + version + 1 + } + StartPosition::Version(v) => *v, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn generate_config() { + crate::test_util::test_generate_config::(); + } + + #[test] + fn test_config_s3() { + let config_str = r#" + table_uri = "s3://test-bucket/test/table" + poll_interval_secs = 30 + + [storage_options] + aws_access_key_id = "AKIAIOSFODNN7EXAMPLE" + aws_secret_access_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + aws_region = "us-east-1" + "#; + + let config: DeltaLakeCdfConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!(config.table_uri, "s3://test-bucket/test/table"); + assert_eq!(config.poll_interval_secs, Duration::from_secs(30)); + assert_eq!( + config.storage_options.get("aws_region"), + Some(&"us-east-1".to_string()) + ); + } + + #[test] + fn test_config_minio() { + let config_str = r#" + table_uri = "s3://test-bucket/test/table" + + [storage_options] + aws_access_key_id = "minioadmin" + aws_secret_access_key = "minioadmin" + aws_region = "us-east-1" + aws_endpoint = "http://localhost:9000" + aws_allow_http = "true" + aws_s3_path_style = "true" + "#; + + let config: DeltaLakeCdfConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!( + config.storage_options.get("aws_endpoint"), + Some(&"http://localhost:9000".to_string()) + ); + } + + #[test] + fn test_config_start_position_beginning() { + let config_str = r#" + table_uri = "s3://bucket/table" + start_position = "beginning" + "#; + + let config: DeltaLakeCdfConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!(config.start_position, StartPosition::Beginning); + } + + #[test] + fn test_config_start_position_latest() { + let config_str = r#" + table_uri = "s3://bucket/table" + start_position = "latest" + "#; + + let config: DeltaLakeCdfConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!(config.start_position, StartPosition::Latest); + } + + #[test] + fn test_config_change_type_filter() { + let config_str = r#" + table_uri = "s3://bucket/table" + change_types = ["insert", "delete"] + "#; + + let config: DeltaLakeCdfConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!(config.change_types.len(), 2); + assert!(config.change_types.contains(&ChangeType::Insert)); + assert!(config.change_types.contains(&ChangeType::Delete)); + } + + #[test] + fn test_config_ending_version() { + let config_str = r#" + table_uri = "s3://bucket/table" + ending_version = 100 + "#; + + let config: DeltaLakeCdfConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!(config.ending_version, Some(100)); + } + + #[test] + fn test_validate_uri_valid() { + let config = DeltaLakeCdfConfig { + table_uri: "s3://bucket/table".to_string(), + ..Default::default() + }; + assert!(config.validate_uri().is_ok()); + + let config = DeltaLakeCdfConfig { + table_uri: "gs://bucket/table".to_string(), + ..Default::default() + }; + assert!(config.validate_uri().is_ok()); + } + + #[test] + fn test_validate_uri_invalid_scheme() { + let config = DeltaLakeCdfConfig { + table_uri: "http://bucket/table".to_string(), + ..Default::default() + }; + assert!(config.validate_uri().is_err()); + } + + #[test] + fn test_change_type_from_string() { + assert_eq!( + ChangeType::from_cdf_string("insert"), + Some(ChangeType::Insert) + ); + assert_eq!( + ChangeType::from_cdf_string("update_preimage"), + Some(ChangeType::UpdatePreimage) + ); + assert_eq!( + ChangeType::from_cdf_string("update_postimage"), + Some(ChangeType::UpdatePostimage) + ); + assert_eq!( + ChangeType::from_cdf_string("delete"), + Some(ChangeType::Delete) + ); + assert_eq!(ChangeType::from_cdf_string("unknown"), None); + } + + #[test] + fn test_config_max_versions_per_poll() { + let config_str = r#" + table_uri = "s3://bucket/table" + max_versions_per_poll = 100 + "#; + + let config: DeltaLakeCdfConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!(config.max_versions_per_poll, Some(100)); + } + + #[test] + fn test_config_max_versions_per_poll_default() { + let config_str = r#" + table_uri = "s3://bucket/table" + "#; + + let config: DeltaLakeCdfConfig = toml::from_str(config_str).expect("Config should parse"); + assert_eq!(config.max_versions_per_poll, None); + } +} diff --git a/src/sources/delta_lake_cdf/event.rs b/src/sources/delta_lake_cdf/event.rs new file mode 100644 index 0000000000000..276c9d582106a --- /dev/null +++ b/src/sources/delta_lake_cdf/event.rs @@ -0,0 +1,577 @@ +//! Convert Arrow RecordBatches to Vector LogEvents. + +use bytes::Bytes; +use chrono::{DateTime, TimeZone, Utc}; +use deltalake::arrow::array::{ + Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, Float32Array, + Float64Array, Int8Array, Int16Array, Int32Array, Int64Array, LargeBinaryArray, + LargeStringArray, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, + UInt64Array, +}; +use deltalake::arrow::datatypes::DataType; +use deltalake::arrow::record_batch::RecordBatch; +use deltalake::delta_datafusion::cdf::{CHANGE_TYPE_COL, COMMIT_TIMESTAMP_COL, COMMIT_VERSION_COL}; +use vector_lib::config::{LegacyKey, LogNamespace}; +use vector_lib::event::{Event, LogEvent}; +use vector_lib::lookup::path; +use vrl::value::Value; + +use super::config::{ChangeType, DeltaLakeCdfConfig}; + +/// Convert a batch of CDF RecordBatches to Vector Events. +pub fn convert_cdf_batches_to_events( + batches: Vec, + config: &DeltaLakeCdfConfig, + log_namespace: LogNamespace, +) -> Result, String> { + let mut events = Vec::new(); + + for batch in batches { + convert_batch_to_events(&batch, config, log_namespace, &mut events)?; + } + + Ok(events) +} + +/// Convert a single RecordBatch to Vector Events. +fn convert_batch_to_events( + batch: &RecordBatch, + config: &DeltaLakeCdfConfig, + log_namespace: LogNamespace, + events: &mut Vec, +) -> Result<(), String> { + let schema = batch.schema(); + let num_rows = batch.num_rows(); + + if num_rows == 0 { + return Ok(()); + } + + // Find CDF metadata column indices + let change_type_idx = schema + .index_of(CHANGE_TYPE_COL) + .map_err(|_| format!("Missing {} column in CDF data", CHANGE_TYPE_COL))?; + let commit_version_idx = schema + .index_of(COMMIT_VERSION_COL) + .map_err(|_| format!("Missing {} column in CDF data", COMMIT_VERSION_COL))?; + let commit_timestamp_idx = schema + .index_of(COMMIT_TIMESTAMP_COL) + .map_err(|_| format!("Missing {} column in CDF data", COMMIT_TIMESTAMP_COL))?; + + // Get CDF metadata columns + let change_type_col = batch.column(change_type_idx); + let commit_version_col = batch.column(commit_version_idx); + let commit_timestamp_col = batch.column(commit_timestamp_idx); + + for row_idx in 0..num_rows { + // Extract change type + let change_type_str = extract_string(change_type_col, row_idx) + .ok_or_else(|| format!("Null change_type at row {}", row_idx))?; + + // Filter by change type if specified + if !config.change_types.is_empty() + && let Some(ct) = ChangeType::from_cdf_string(&change_type_str) + && !config.change_types.contains(&ct) + { + continue; + } + + let mut log = LogEvent::default(); + + // Insert CDF metadata columns + insert_cdf_metadata( + &mut log, + log_namespace, + &change_type_str, + commit_version_col, + commit_timestamp_col, + row_idx, + )?; + + // Include data columns if configured + if config.include_data { + for (col_idx, field) in schema.fields().iter().enumerate() { + let field_name = field.name(); + + // Skip CDF metadata columns (already handled above) + if field_name == CHANGE_TYPE_COL + || field_name == COMMIT_VERSION_COL + || field_name == COMMIT_TIMESTAMP_COL + { + continue; + } + + let column = batch.column(col_idx); + let value = arrow_value_to_vrl(column, row_idx, field.data_type())?; + + log.insert(field_name.as_str(), value); + } + } + + // Add standard Vector source metadata + log_namespace.insert_standard_vector_source_metadata( + &mut log, + DeltaLakeCdfConfig::NAME, + Utc::now(), + ); + + events.push(Event::Log(log)); + } + + Ok(()) +} + +/// Insert CDF metadata columns into the log event. +fn insert_cdf_metadata( + log: &mut LogEvent, + log_namespace: LogNamespace, + change_type: &str, + commit_version_col: &ArrayRef, + commit_timestamp_col: &ArrayRef, + row_idx: usize, +) -> Result<(), String> { + // Change type + log_namespace.insert_source_metadata( + DeltaLakeCdfConfig::NAME, + log, + Some(LegacyKey::Overwrite(path!("_change_type"))), + path!("change_type"), + change_type.to_string(), + ); + + // Commit version + let commit_version = extract_int64(commit_version_col, row_idx) + .ok_or_else(|| format!("Null commit_version at row {}", row_idx))?; + + log_namespace.insert_source_metadata( + DeltaLakeCdfConfig::NAME, + log, + Some(LegacyKey::Overwrite(path!("_commit_version"))), + path!("commit_version"), + commit_version, + ); + + // Commit timestamp + let commit_timestamp = extract_timestamp(commit_timestamp_col, row_idx) + .ok_or_else(|| format!("Null commit_timestamp at row {}", row_idx))?; + + log_namespace.insert_source_metadata( + DeltaLakeCdfConfig::NAME, + log, + Some(LegacyKey::Overwrite(path!("_commit_timestamp"))), + path!("commit_timestamp"), + commit_timestamp, + ); + + Ok(()) +} + +/// Convert an Arrow array value to a VRL Value. +fn arrow_value_to_vrl( + column: &ArrayRef, + row_idx: usize, + data_type: &DataType, +) -> Result { + if column.is_null(row_idx) { + return Ok(Value::Null); + } + + match data_type { + DataType::Boolean => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to BooleanArray")?; + Ok(Value::Boolean(arr.value(row_idx))) + } + + // Signed integers + DataType::Int8 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to Int8Array")?; + Ok(Value::Integer(arr.value(row_idx) as i64)) + } + DataType::Int16 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to Int16Array")?; + Ok(Value::Integer(arr.value(row_idx) as i64)) + } + DataType::Int32 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to Int32Array")?; + Ok(Value::Integer(arr.value(row_idx) as i64)) + } + DataType::Int64 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to Int64Array")?; + Ok(Value::Integer(arr.value(row_idx))) + } + + // Unsigned integers + DataType::UInt8 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to UInt8Array")?; + Ok(Value::Integer(arr.value(row_idx) as i64)) + } + DataType::UInt16 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to UInt16Array")?; + Ok(Value::Integer(arr.value(row_idx) as i64)) + } + DataType::UInt32 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to UInt32Array")?; + Ok(Value::Integer(arr.value(row_idx) as i64)) + } + DataType::UInt64 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to UInt64Array")?; + // Note: Large u64 values may overflow i64 + Ok(Value::Integer(arr.value(row_idx) as i64)) + } + + // Floating point + DataType::Float32 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to Float32Array")?; + Ok(Value::Float( + ordered_float::NotNan::new(arr.value(row_idx) as f64) + .unwrap_or(ordered_float::NotNan::new(0.0).unwrap()), + )) + } + DataType::Float64 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to Float64Array")?; + Ok(Value::Float( + ordered_float::NotNan::new(arr.value(row_idx)) + .unwrap_or(ordered_float::NotNan::new(0.0).unwrap()), + )) + } + + // Strings + DataType::Utf8 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to StringArray")?; + Ok(Value::Bytes(Bytes::from(arr.value(row_idx).to_string()))) + } + DataType::LargeUtf8 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to LargeStringArray")?; + Ok(Value::Bytes(Bytes::from(arr.value(row_idx).to_string()))) + } + + // Binary + DataType::Binary => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to BinaryArray")?; + Ok(Value::Bytes(Bytes::from(arr.value(row_idx).to_vec()))) + } + DataType::LargeBinary => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to LargeBinaryArray")?; + Ok(Value::Bytes(Bytes::from(arr.value(row_idx).to_vec()))) + } + + // Timestamps + DataType::Timestamp(unit, _tz) => { + let ts = extract_timestamp_by_unit(column, row_idx, *unit) + .ok_or("Failed to extract timestamp")?; + Ok(Value::Timestamp(ts)) + } + + // Dates + DataType::Date32 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to Date32Array")?; + let days = arr.value(row_idx); + let ts = Utc + .timestamp_opt(days as i64 * 86400, 0) + .single() + .ok_or("Invalid date32 value")?; + Ok(Value::Timestamp(ts)) + } + DataType::Date64 => { + let arr = column + .as_any() + .downcast_ref::() + .ok_or("Failed to downcast to Date64Array")?; + let millis = arr.value(row_idx); + let ts = Utc + .timestamp_millis_opt(millis) + .single() + .ok_or("Invalid date64 value")?; + Ok(Value::Timestamp(ts)) + } + + // Complex types - serialize to JSON string + DataType::Struct(_) | DataType::List(_) | DataType::LargeList(_) | DataType::Map(_, _) => { + // Serialize complex types to JSON for maximum compatibility + let json_str = format_complex_value(column, row_idx)?; + Ok(Value::Bytes(Bytes::from(json_str))) + } + + // Fallback for other types + _ => { + // Try to get a string representation + let display = deltalake::arrow::util::display::array_value_to_string(column, row_idx) + .map_err(|e| format!("Failed to convert value to string: {}", e))?; + Ok(Value::Bytes(Bytes::from(display))) + } + } +} + +/// Extract a string value from an Arrow array. +fn extract_string(column: &ArrayRef, row_idx: usize) -> Option { + if column.is_null(row_idx) { + return None; + } + + match column.data_type() { + DataType::Utf8 => { + let arr = column.as_any().downcast_ref::()?; + Some(arr.value(row_idx).to_string()) + } + DataType::LargeUtf8 => { + let arr = column.as_any().downcast_ref::()?; + Some(arr.value(row_idx).to_string()) + } + _ => None, + } +} + +/// Extract an i64 value from an Arrow array. +fn extract_int64(column: &ArrayRef, row_idx: usize) -> Option { + if column.is_null(row_idx) { + return None; + } + + match column.data_type() { + DataType::Int64 => { + let arr = column.as_any().downcast_ref::()?; + Some(arr.value(row_idx)) + } + DataType::Int32 => { + let arr = column.as_any().downcast_ref::()?; + Some(arr.value(row_idx) as i64) + } + _ => None, + } +} + +/// Extract a timestamp from various Arrow timestamp types. +fn extract_timestamp(column: &ArrayRef, row_idx: usize) -> Option> { + if column.is_null(row_idx) { + return None; + } + + match column.data_type() { + DataType::Timestamp(unit, _) => extract_timestamp_by_unit(column, row_idx, *unit), + DataType::Int64 => { + // Assume milliseconds if stored as plain Int64 + let arr = column.as_any().downcast_ref::()?; + Utc.timestamp_millis_opt(arr.value(row_idx)).single() + } + _ => None, + } +} + +/// Extract timestamp by time unit. +fn extract_timestamp_by_unit( + column: &ArrayRef, + row_idx: usize, + unit: deltalake::arrow::datatypes::TimeUnit, +) -> Option> { + use deltalake::arrow::datatypes::TimeUnit; + + match unit { + TimeUnit::Second => { + let arr = column.as_any().downcast_ref::()?; + Utc.timestamp_opt(arr.value(row_idx), 0).single() + } + TimeUnit::Millisecond => { + let arr = column + .as_any() + .downcast_ref::()?; + Utc.timestamp_millis_opt(arr.value(row_idx)).single() + } + TimeUnit::Microsecond => { + let arr = column + .as_any() + .downcast_ref::()?; + let micros = arr.value(row_idx); + let secs = micros / 1_000_000; + let nanos = ((micros % 1_000_000) * 1000) as u32; + Utc.timestamp_opt(secs, nanos).single() + } + TimeUnit::Nanosecond => { + let arr = column.as_any().downcast_ref::()?; + let nanos = arr.value(row_idx); + let secs = nanos / 1_000_000_000; + let subsec_nanos = (nanos % 1_000_000_000) as u32; + Utc.timestamp_opt(secs, subsec_nanos).single() + } + } +} + +/// Format complex Arrow values (struct, list, map) as JSON strings. +fn format_complex_value(column: &ArrayRef, row_idx: usize) -> Result { + // Use Arrow's display utility for complex types + // This provides a string representation that can be parsed as JSON-like format + deltalake::arrow::util::display::array_value_to_string(column, row_idx) + .map_err(|e| format!("Failed to format complex value: {}", e)) +} + +#[cfg(test)] +mod tests { + use super::*; + use deltalake::arrow::array::{Int64Builder, StringBuilder}; + use deltalake::arrow::datatypes::{Field, Schema}; + use std::sync::Arc; + + fn create_test_batch() -> RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("_change_type", DataType::Utf8, false), + Field::new("_commit_version", DataType::Int64, false), + Field::new( + "_commit_timestamp", + DataType::Timestamp(deltalake::arrow::datatypes::TimeUnit::Millisecond, None), + false, + ), + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + ])); + + let mut change_type_builder = StringBuilder::new(); + let mut commit_version_builder = Int64Builder::new(); + let mut commit_timestamp_builder = + deltalake::arrow::array::TimestampMillisecondBuilder::new(); + let mut id_builder = Int64Builder::new(); + let mut name_builder = StringBuilder::new(); + + // Add test data + change_type_builder.append_value("insert"); + commit_version_builder.append_value(1); + commit_timestamp_builder.append_value(1704067200000); // 2024-01-01 00:00:00 UTC + id_builder.append_value(100); + name_builder.append_value("Alice"); + + change_type_builder.append_value("delete"); + commit_version_builder.append_value(2); + commit_timestamp_builder.append_value(1704153600000); // 2024-01-02 00:00:00 UTC + id_builder.append_value(100); + name_builder.append_value("Alice"); + + RecordBatch::try_new( + schema, + vec![ + Arc::new(change_type_builder.finish()), + Arc::new(commit_version_builder.finish()), + Arc::new(commit_timestamp_builder.finish()), + Arc::new(id_builder.finish()), + Arc::new(name_builder.finish()), + ], + ) + .unwrap() + } + + #[test] + fn test_convert_batch_basic() { + let batch = create_test_batch(); + let config = DeltaLakeCdfConfig { + include_data: true, + ..Default::default() + }; + + let events = + convert_cdf_batches_to_events(vec![batch], &config, LogNamespace::Legacy).unwrap(); + + assert_eq!(events.len(), 2); + + // Check first event (insert) + let log1 = events[0].as_log(); + assert_eq!( + log1.get("_change_type").unwrap().to_string_lossy(), + "insert" + ); + assert_eq!(log1.get("id").unwrap(), &Value::Integer(100)); + assert_eq!(log1.get("name").unwrap().to_string_lossy(), "Alice"); + + // Check second event (delete) + let log2 = events[1].as_log(); + assert_eq!( + log2.get("_change_type").unwrap().to_string_lossy(), + "delete" + ); + } + + #[test] + fn test_convert_batch_filter_change_types() { + let batch = create_test_batch(); + let config = DeltaLakeCdfConfig { + include_data: true, + change_types: vec![ChangeType::Insert], + ..Default::default() + }; + + let events = + convert_cdf_batches_to_events(vec![batch], &config, LogNamespace::Legacy).unwrap(); + + // Should only have insert events + assert_eq!(events.len(), 1); + let log = events[0].as_log(); + assert_eq!(log.get("_change_type").unwrap().to_string_lossy(), "insert"); + } + + #[test] + fn test_convert_batch_exclude_data() { + let batch = create_test_batch(); + let config = DeltaLakeCdfConfig { + include_data: false, + ..Default::default() + }; + + let events = + convert_cdf_batches_to_events(vec![batch], &config, LogNamespace::Legacy).unwrap(); + + assert_eq!(events.len(), 2); + + // Should only have CDF metadata, not data columns + let log = events[0].as_log(); + assert!(log.get("_change_type").is_some()); + assert!(log.get("_commit_version").is_some()); + assert!(log.get("_commit_timestamp").is_some()); + assert!(log.get("id").is_none()); + assert!(log.get("name").is_none()); + } +} diff --git a/src/sources/delta_lake_cdf/integration_tests.rs b/src/sources/delta_lake_cdf/integration_tests.rs new file mode 100644 index 0000000000000..353b41c7daa43 --- /dev/null +++ b/src/sources/delta_lake_cdf/integration_tests.rs @@ -0,0 +1,367 @@ +//! Integration tests for Delta Lake CDF source. +//! +//! These tests require a running MinIO instance with Delta tables. +//! Run with: `cargo test --features delta-lake-cdf-integration-tests` + +#![cfg(all(test, feature = "delta-lake-cdf-integration-tests"))] + +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; + +use deltalake::DeltaTable; +use deltalake::arrow::array::{Int64Builder, StringBuilder}; +use deltalake::arrow::datatypes::{DataType, Field, Schema}; +use deltalake::arrow::record_batch::RecordBatch; +use deltalake::kernel::StructType; +use deltalake::kernel::engine::arrow_conversion::TryFromArrow; +use deltalake::operations::create::CreateBuilder; +use deltalake::protocol::SaveMode; + +use super::config::{DeltaLakeCdfConfig, StartPosition}; + +/// Get MinIO endpoint from environment or use default +fn minio_endpoint() -> String { + std::env::var("MINIO_ENDPOINT").unwrap_or_else(|_| "http://localhost:9000".into()) +} + +/// Create storage options for S3-compatible MinIO +fn minio_storage_options() -> HashMap { + let mut options = HashMap::new(); + options.insert("aws_access_key_id".to_string(), "minioadmin".to_string()); + options.insert( + "aws_secret_access_key".to_string(), + "minioadmin".to_string(), + ); + options.insert("aws_endpoint".to_string(), minio_endpoint()); + options.insert("aws_region".to_string(), "us-east-1".to_string()); + options.insert("aws_allow_http".to_string(), "true".to_string()); + options.insert("aws_s3_path_style".to_string(), "true".to_string()); + options +} + +/// Create a test Delta table with CDF enabled +#[allow(dead_code)] +async fn create_cdf_enabled_table(bucket: &str, table_path: &str) -> DeltaTable { + let table_uri = format!("s3://{}/{}", bucket, table_path); + let storage_options = minio_storage_options(); + + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + ])); + + let delta_schema = + StructType::try_from_arrow(schema.as_ref()).expect("Failed to convert Arrow schema"); + let delta_fields: Vec<_> = delta_schema.fields().cloned().collect(); + + // Create table with CDF enabled + CreateBuilder::new() + .with_location(&table_uri) + .with_columns(delta_fields) + .with_save_mode(SaveMode::Ignore) + .with_storage_options(storage_options.clone()) + .with_configuration_property( + deltalake::TableProperty::EnableChangeDataFeed, + Some("true".to_string()), + ) + .await + .expect("Failed to create Delta table with CDF") +} + +/// Insert test data into table +#[allow(dead_code)] +async fn insert_data(table: &DeltaTable, id: i64, name: &str) { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + ])); + + let mut id_builder = Int64Builder::new(); + let mut name_builder = StringBuilder::new(); + + id_builder.append_value(id); + name_builder.append_value(name); + + let batch = RecordBatch::try_new( + schema, + vec![ + Arc::new(id_builder.finish()), + Arc::new(name_builder.finish()), + ], + ) + .unwrap(); + + table + .clone() + .write(vec![batch]) + .await + .expect("Failed to write to table"); +} + +#[tokio::test] +async fn test_cdf_source_basic() { + // This test requires MinIO running with a test bucket + // Skip if not available + let table_uri = format!( + "s3://test-bucket/cdf-test-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis() + ); + + let config = DeltaLakeCdfConfig { + table_uri: table_uri.clone(), + storage_options: minio_storage_options(), + poll_interval_secs: Duration::from_secs(1), + start_position: StartPosition::Beginning, + include_data: true, + change_types: Vec::new(), + ending_version: Some(2), + data_dir: None, + log_namespace: None, + }; + + // Note: This test would need a pre-existing CDF-enabled table + // For now, just validate the config parses correctly + assert_eq!(config.table_uri, table_uri); + assert_eq!(config.poll_interval_secs, Duration::from_secs(1)); +} + +#[test] +fn test_config_serialization() { + let config_str = r#" + table_uri = "s3://test-bucket/table" + poll_interval_secs = 30 + start_position = "latest" + include_data = true + change_types = ["insert", "delete"] + + [storage_options] + aws_access_key_id = "test" + aws_secret_access_key = "test" + aws_region = "us-east-1" + "#; + + let config: DeltaLakeCdfConfig = toml::from_str(config_str).expect("Config should parse"); + + assert_eq!(config.table_uri, "s3://test-bucket/table"); + assert_eq!(config.poll_interval_secs, Duration::from_secs(30)); + assert_eq!(config.start_position, StartPosition::Latest); + assert!(config.include_data); + assert_eq!(config.change_types.len(), 2); +} + +#[test] +fn generate_config() { + crate::test_util::test_generate_config::(); +} + +/// Test that the source auto-recovers when checkpoint points to a version +/// that has been removed by VACUUM. +#[tokio::test] +async fn test_auto_recovery_after_vacuum() { + use futures::StreamExt; + use tempfile::TempDir; + + use super::checkpoint::DeltaLakeCdfCheckpointer; + use super::source::run_cdf_source; + use crate::SourceSender; + use crate::shutdown::ShutdownSignal; + use vector_lib::config::LogNamespace; + + // Create unique table path + let table_path = format!( + "cdf-vacuum-test-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis() + ); + let table_uri = format!("s3://test-bucket/{}", table_path); + let storage_options = minio_storage_options(); + + // Step 1: Create table with CDF enabled + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + ])); + + let delta_schema = + StructType::try_from_arrow(schema.as_ref()).expect("Failed to convert Arrow schema"); + let delta_fields: Vec<_> = delta_schema.fields().cloned().collect(); + + let mut table = CreateBuilder::new() + .with_location(&table_uri) + .with_columns(delta_fields) + .with_save_mode(SaveMode::ErrorIfExists) + .with_storage_options(storage_options.clone()) + .with_configuration_property( + deltalake::TableProperty::EnableChangeDataFeed, + Some("true".to_string()), + ) + .await + .expect("Failed to create Delta table"); + + // Step 2: Create versions with Overwrites to generate "garbage" files + // Each overwrite removes the previous file from the latest state, + // making it eligible for VACUUM deletion. + + // Write v1 (File A) + table = table + .write(vec![create_test_batch(1, "Alice")]) + .with_save_mode(SaveMode::Overwrite) + .await + .expect("Failed to write v1"); + + // Write v2 (File B) - removes File A from latest state + table = table + .write(vec![create_test_batch(2, "Bob")]) + .with_save_mode(SaveMode::Overwrite) + .await + .expect("Failed to write v2"); + + // Write v3 (File C) - removes File B from latest state + table = table + .write(vec![create_test_batch(3, "Charlie")]) + .with_save_mode(SaveMode::Overwrite) + .await + .expect("Failed to write v3"); + + let current_version = table.version().unwrap(); + assert!(current_version >= 3, "Should have at least 3 versions"); + + // Step 3: Create a checkpoint pointing to version 0 (will be vacuumed) + let checkpoint_dir = TempDir::new().expect("Failed to create temp dir"); + let checkpointer = DeltaLakeCdfCheckpointer::new(checkpoint_dir.path(), &table_uri); + checkpointer + .write_checkpoint(0) + .expect("Failed to write checkpoint"); + + // Verify checkpoint was created + assert_eq!(checkpointer.read_checkpoint(), Some(0)); + + // Step 4: Run VACUUM with 0 retention to remove old versions + let (table, metrics) = table + .vacuum() + .with_retention_period(chrono::Duration::zero()) + .with_enforce_retention_duration(false) + .await + .expect("Failed to vacuum table"); + + // Verify VACUUM actually deleted files (precondition for this test) + assert!( + !metrics.files_deleted.is_empty(), + "VACUUM should have deleted files - test precondition failed. \ + Files deleted: {:?}", + metrics.files_deleted + ); + println!( + "VACUUM deleted {} files: {:?}", + metrics.files_deleted.len(), + metrics.files_deleted + ); + + // Get the version AFTER vacuum (VACUUM creates new table versions) + let version_after_vacuum = table.version().unwrap(); + println!( + "Version before VACUUM: {}, after VACUUM: {}", + current_version, version_after_vacuum + ); + + // Step 5: Start the source - it should auto-recover + let config = DeltaLakeCdfConfig { + table_uri: table_uri.clone(), + storage_options: storage_options.clone(), + poll_interval_secs: Duration::from_millis(100), + start_position: StartPosition::Beginning, + include_data: true, + change_types: Vec::new(), + ending_version: Some(version_after_vacuum), // Bounded read for test + data_dir: None, + log_namespace: None, + }; + + let (tx, rx) = SourceSender::new_test(); + let (shutdown_trigger, shutdown_signal, _shutdown_done) = ShutdownSignal::new_wired(); + + let source_handle = tokio::spawn(run_cdf_source( + table, + config, + 0, // Start from checkpointed version (which was vacuumed) + checkpointer, + shutdown_signal, + tx, + LogNamespace::Legacy, + )); + + // Wait a bit for auto-recovery and potential events + tokio::time::sleep(Duration::from_secs(2)).await; + + // Trigger shutdown + shutdown_trigger.cancel(); + + // Wait for source to complete + let result = tokio::time::timeout(Duration::from_secs(5), source_handle) + .await + .expect("Source should complete within timeout") + .expect("Source task should not panic"); + + // Source should complete successfully (auto-recovered, not errored) + assert!( + result.is_ok(), + "Source should auto-recover after VACUUM, not fail" + ); + + // Check that checkpoint jumped to latest_version + 1 (skipped unavailable history) + let new_checkpoint = DeltaLakeCdfCheckpointer::new(checkpoint_dir.path(), &table_uri); + let checkpointed_version = new_checkpoint + .read_checkpoint() + .expect("Checkpoint should still exist after recovery"); + + // The checkpoint should have jumped from 0 to version_after_vacuum + 1 + // (skipping all the versions whose files were deleted by VACUUM) + assert_eq!( + checkpointed_version, + version_after_vacuum + 1, + "Checkpoint should have jumped to latest+1 after auto-recovery. \ + Expected {}, got {}. This indicates auto-recovery did not work correctly.", + version_after_vacuum + 1, + checkpointed_version + ); + + // Drain any events that were received + let events: Vec<_> = rx.take(100).collect().await; + + println!( + "Auto-recovery test completed successfully! \ + VACUUM deleted files, source recovered from version 0 to {}, \ + events received: {}", + checkpointed_version, + events.len() + ); +} + +/// Helper to create a test record batch +fn create_test_batch(id: i64, name: &str) -> RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + ])); + + let mut id_builder = Int64Builder::new(); + let mut name_builder = StringBuilder::new(); + + id_builder.append_value(id); + name_builder.append_value(name); + + RecordBatch::try_new( + schema, + vec![ + Arc::new(id_builder.finish()), + Arc::new(name_builder.finish()), + ], + ) + .unwrap() +} diff --git a/src/sources/delta_lake_cdf/mod.rs b/src/sources/delta_lake_cdf/mod.rs new file mode 100644 index 0000000000000..33e2360bb18d6 --- /dev/null +++ b/src/sources/delta_lake_cdf/mod.rs @@ -0,0 +1,11 @@ +mod checkpoint; +mod config; +mod event; +mod source; + +#[cfg(all(test, feature = "delta-lake-cdf-integration-tests"))] +mod integration_tests; + +pub use config::DeltaLakeCdfConfig; + +use crate::sources::Source; diff --git a/src/sources/delta_lake_cdf/source.rs b/src/sources/delta_lake_cdf/source.rs new file mode 100644 index 0000000000000..b370239707ed7 --- /dev/null +++ b/src/sources/delta_lake_cdf/source.rs @@ -0,0 +1,384 @@ +//! Main source logic for Delta Lake CDF streaming. + +use deltalake::DeltaTable; +use deltalake::DeltaTableError; +use deltalake::datafusion::prelude::SessionContext; +use deltalake::delta_datafusion::DeltaCdfTableProvider; +use futures::StreamExt; +use std::sync::Arc; +use tokio::time::interval; +use vector_lib::EstimatedJsonEncodedSizeOf; +use vector_lib::config::LogNamespace; +use vector_lib::internal_event::{ + ByteSize, BytesReceived, CountByteSize, InternalEventHandle as _, Protocol, +}; + +use crate::SourceSender; +use crate::internal_events::{EventsReceived, StreamClosedError}; +use crate::shutdown::ShutdownSignal; + +use super::checkpoint::DeltaLakeCdfCheckpointer; +use super::config::DeltaLakeCdfConfig; +use super::event::convert_cdf_batches_to_events; + +/// Run the CDF source main loop. +/// +/// This function: +/// 1. Polls the Delta table at configured intervals for new versions +/// 2. Reads Change Data Feed records for any new versions +/// 3. Converts records to Vector events and sends them downstream +/// 4. Persists checkpoints for resumption after restart +pub async fn run_cdf_source( + mut table: DeltaTable, + config: DeltaLakeCdfConfig, + mut current_version: i64, + checkpointer: DeltaLakeCdfCheckpointer, + mut shutdown: ShutdownSignal, + mut out: SourceSender, + log_namespace: LogNamespace, +) -> Result<(), ()> { + let poll_interval = config.poll_interval_secs; + let mut ticker = interval(poll_interval); + + // Create DataFusion context once for reuse + let ctx = SessionContext::new(); + + // Register metrics + let bytes_received = register!(BytesReceived::from(Protocol::HTTP)); + let events_received = register!(EventsReceived); + + info!( + message = "Delta Lake CDF source started", + table_uri = %config.table_uri, + current_version = current_version, + ); + + loop { + tokio::select! { + _ = &mut shutdown => { + // Save checkpoint before shutting down + if let Err(e) = checkpointer.write_checkpoint(current_version) { + error!( + message = "Failed to save checkpoint on shutdown", + error = %e, + ); + } + info!( + message = "Delta Lake CDF source shutting down", + final_version = current_version, + ); + return Ok(()); + } + + _ = ticker.tick() => { + // Reload table to check for new versions + if let Err(e) = table.load().await { + error!( + message = "Failed to reload Delta table", + error = %e, + ); + continue; + } + + let latest_version = table.version().unwrap_or(0); + + // Check for bounded read completion + if let Some(end_version) = config.ending_version + && current_version > end_version + { + info!( + message = "Reached ending version, stopping", + ending_version = end_version, + ); + return Ok(()); + } + + // Skip if no new versions + if latest_version < current_version { + debug!( + message = "No new versions available", + current_version = current_version, + latest_version = latest_version, + ); + continue; + } + + // Determine the end version for this batch + // Apply max_versions_per_poll limit to prevent query planner from + // hanging when catching up on large backlogs + let max_end_from_batch_limit = config + .max_versions_per_poll + .map(|max| current_version.saturating_add(max).saturating_sub(1)) + .unwrap_or(i64::MAX); + + let end_version = config + .ending_version + .map(|e| e.min(latest_version)) + .unwrap_or(latest_version) + .min(max_end_from_batch_limit); + + // Log at info level when catching up with batching, debug otherwise + let versions_behind = latest_version.saturating_sub(end_version); + if versions_behind > 0 && config.max_versions_per_poll.is_some() { + info!( + message = "Processing CDF versions (catching up)", + start_version = current_version, + end_version = end_version, + versions_behind = versions_behind, + ); + } else { + debug!( + message = "Processing CDF versions", + start_version = current_version, + end_version = end_version, + ); + } + + // Create streaming CDF reader for the version range + match create_cdf_stream(&ctx, &table, current_version, end_version).await { + Ok(mut stream) => { + let mut total_events: usize = 0; + let mut stream_error = false; + + // Process batches as they arrive - no memory accumulation + while let Some(batch_result) = stream.next().await { + match batch_result { + Ok(batch) => { + if batch.num_rows() == 0 { + continue; + } + + // Emit byte metrics for this batch + let byte_size = batch.get_array_memory_size(); + bytes_received.emit(ByteSize(byte_size)); + + // Convert batch to events + match convert_cdf_batches_to_events(vec![batch], &config, log_namespace) { + Ok(events) => { + if events.is_empty() { + continue; + } + + let event_count = events.len(); + let json_size = events.estimated_json_encoded_size_of(); + events_received.emit(CountByteSize(event_count, json_size)); + + // Send events downstream immediately + if out.send_batch(events).await.is_err() { + emit!(StreamClosedError { count: event_count }); + return Err(()); + } + + total_events += event_count; + } + Err(e) => { + error!( + message = "Failed to convert CDF batch to events", + error = %e, + ); + stream_error = true; + break; + } + } + } + Err(e) => { + // Check if this is a "file not found" error + if is_file_not_found_error(&e) { + let new_version = latest_version + 1; + warn!( + message = "CDF data files not found during stream (likely removed by VACUUM), skipping to latest", + error = %e, + old_version = current_version, + new_version = new_version, + ); + current_version = new_version; + if let Err(e) = checkpointer.write_checkpoint(current_version) { + error!(message = "Failed to save checkpoint", error = %e); + } + stream_error = true; + break; + } + + error!( + message = "Error reading CDF stream", + error = %e, + ); + stream_error = true; + break; + } + } + } + + // Only update checkpoint if stream completed successfully + if !stream_error { + if total_events > 0 { + debug!( + message = "Sent CDF events", + count = total_events, + versions = format!("{}..{}", current_version, end_version), + ); + } else { + debug!(message = "No CDF data in version range"); + } + + current_version = end_version + 1; + if let Err(e) = checkpointer.write_checkpoint(current_version) { + error!( + message = "Failed to save checkpoint", + error = %e, + version = current_version, + ); + } + } + } + Err(e) => { + // Check if this is a "file not found" error (vacuum deleted the files) + if is_file_not_found_error(&e) { + // Auto-recover: reset to latest version (skip unavailable history) + let new_version = latest_version + 1; + warn!( + message = "CDF data files not found (likely removed by VACUUM), skipping to latest", + error = %e, + old_version = current_version, + new_version = new_version, + ); + current_version = new_version; + if let Err(e) = checkpointer.write_checkpoint(current_version) { + error!(message = "Failed to save checkpoint", error = %e); + } + continue; + } + + match &e { + DeltaTableError::ChangeDataNotEnabled { version } => { + error!( + message = "Change Data Feed is not enabled on table", + %version, + hint = "Enable CDF with: ALTER TABLE ... SET TBLPROPERTIES (delta.enableChangeDataFeed = true)", + ); + return Err(()); + } + DeltaTableError::ChangeDataNotRecorded { version, .. } => { + // Auto-recover: reset to latest version (skip unavailable history) + let new_version = latest_version + 1; + warn!( + message = "CDF data not available for version (likely removed by VACUUM), skipping to latest", + unavailable_version = %version, + new_version = new_version, + ); + current_version = new_version; + if let Err(e) = checkpointer.write_checkpoint(current_version) { + error!(message = "Failed to save checkpoint", error = %e); + } + continue; + } + DeltaTableError::ChangeDataInvalidVersionRange { start, end } => { + error!( + message = "Invalid CDF version range", + start_version = %start, + end_version = %end, + ); + } + _ => { + error!( + message = "Failed to load CDF data", + error = %e, + start_version = current_version, + end_version = end_version, + ); + } + } + } + } + } + } + } +} + +/// Create a streaming CDF reader for a version range. +/// +/// Returns a stream of RecordBatches instead of collecting all data into memory. +/// This allows processing large version ranges incrementally without OOM risk. +async fn create_cdf_stream( + ctx: &SessionContext, + table: &DeltaTable, + start_version: i64, + end_version: i64, +) -> Result< + impl futures::Stream>, + DeltaTableError, +> { + // Clone table and create CDF builder + let cdf_builder = table + .clone() + .scan_cdf() + .with_starting_version(start_version) + .with_ending_version(end_version); + + // Create CDF table provider + let cdf_provider = DeltaCdfTableProvider::try_new(cdf_builder)?; + + // Execute the CDF scan using DataFusion and return as stream + let df = ctx.read_table(Arc::new(cdf_provider))?; + let stream = df.execute_stream().await?; + + // Map the DataFusion error type to DeltaTableError + Ok(stream.map(|result| { + result.map_err(|e| DeltaTableError::Generic(format!("DataFusion stream error: {}", e))) + })) +} + +/// Check if an error indicates that files were not found (likely deleted by VACUUM). +fn is_file_not_found_error(error: &DeltaTableError) -> bool { + let error_str = error.to_string(); + error_str.contains("not found") + || error_str.contains("404") + || error_str.contains("NoSuchKey") + || error_str.contains("NotFound") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_detects_s3_not_found_error() { + // S3-style error + let error = DeltaTableError::Generic( + "Failed to parse parquet: External: Object at location ... not found: \ + Error performing GET ... 404 Not Found: NoSuchKey" + .to_string(), + ); + assert!(is_file_not_found_error(&error)); + } + + #[test] + fn test_detects_gcs_not_found_error() { + // GCS-style error (from user's actual error) + let error = DeltaTableError::Generic( + "Failed to parse parquet: External: Object at location \ + delta/vector_events/part-00000-xxx.snappy.parquet not found: \ + Error performing GET https://storage.googleapis.com/... \ + 404 Not Found: NoSuchKey" + .to_string(), + ); + assert!(is_file_not_found_error(&error)); + } + + #[test] + fn test_detects_azure_not_found_error() { + // Azure-style error + let error = DeltaTableError::Generic("Object not found: BlobNotFound".to_string()); + assert!(is_file_not_found_error(&error)); + } + + #[test] + fn test_does_not_match_unrelated_errors() { + let error = + DeltaTableError::Generic("Schema mismatch: expected 5 columns, got 3".to_string()); + assert!(!is_file_not_found_error(&error)); + + let error = DeltaTableError::Generic("Connection timeout after 30 seconds".to_string()); + assert!(!is_file_not_found_error(&error)); + } +} diff --git a/src/sources/mod.rs b/src/sources/mod.rs index 77258bdb77a46..16ad759746a32 100644 --- a/src/sources/mod.rs +++ b/src/sources/mod.rs @@ -15,6 +15,8 @@ pub mod aws_s3; pub mod aws_sqs; #[cfg(feature = "sources-datadog_agent")] pub mod datadog_agent; +#[cfg(feature = "sources-delta_lake_cdf")] +pub mod delta_lake_cdf; #[cfg(feature = "sources-demo_logs")] pub mod demo_logs; #[cfg(feature = "sources-dnstap")] diff --git a/tests/integration/delta-lake-cdf/config/compose.yaml b/tests/integration/delta-lake-cdf/config/compose.yaml new file mode 100644 index 0000000000000..d9e4ff9ec4e31 --- /dev/null +++ b/tests/integration/delta-lake-cdf/config/compose.yaml @@ -0,0 +1,36 @@ +version: '3.8' + +services: + # MinIO for S3-compatible object storage + minio: + image: minio/minio:latest + ports: + - "9000:9000" + - "9001:9001" + environment: + - MINIO_ROOT_USER=minioadmin + - MINIO_ROOT_PASSWORD=minioadmin + command: server /data --console-address ":9001" + healthcheck: + test: ["CMD", "mc", "ready", "local"] + interval: 5s + timeout: 3s + retries: 3 + # Create bucket and setup MinIO + minio-setup: + image: minio/mc:latest + depends_on: + minio: + condition: service_healthy + entrypoint: > + /bin/sh -c " + mc alias set minio http://minio:9000 minioadmin minioadmin; + mc mb minio/test-bucket --ignore-existing; + mc mb minio/cdf-test --ignore-existing; + echo 'MinIO setup complete'; + " + +networks: + default: + name: ${VECTOR_NETWORK} + external: true diff --git a/tests/integration/delta-lake-cdf/config/test.yaml b/tests/integration/delta-lake-cdf/config/test.yaml new file mode 100644 index 0000000000000..8cac0fb717642 --- /dev/null +++ b/tests/integration/delta-lake-cdf/config/test.yaml @@ -0,0 +1,23 @@ +# Test configuration for Delta Lake CDF source integration tests with MinIO + +features: +- delta-lake-cdf-integration-tests + +test_filter: '::delta_lake_cdf::integration_tests::' + +runner: + env: + MINIO_ENDPOINT: http://minio:9000 + AWS_ACCESS_KEY_ID: minioadmin + AWS_SECRET_ACCESS_KEY: minioadmin + AWS_REGION: us-east-1 + AWS_ALLOW_HTTP: "true" + +matrix: + version: ['latest'] + +# Changes to these files/paths will invoke the integration test in CI +# Expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sources/delta_lake_cdf/**" +- "tests/integration/delta-lake-cdf/**" diff --git a/tests/integration/delta-lake/config/compose.yaml b/tests/integration/delta-lake/config/compose.yaml new file mode 100644 index 0000000000000..398def8a1ab12 --- /dev/null +++ b/tests/integration/delta-lake/config/compose.yaml @@ -0,0 +1,36 @@ +version: '3.8' + +services: + # MinIO for S3-compatible object storage + minio: + image: minio/minio:latest + ports: + - "9000:9000" + - "9001:9001" + environment: + - MINIO_ROOT_USER=minioadmin + - MINIO_ROOT_PASSWORD=minioadmin + command: server /data --console-address ":9001" + healthcheck: + test: ["CMD", "mc", "ready", "local"] + interval: 5s + timeout: 3s + retries: 3 + # Create bucket and setup MinIO + minio-setup: + image: minio/mc:latest + depends_on: + minio: + condition: service_healthy + entrypoint: > + /bin/sh -c " + mc alias set minio http://minio:9000 minioadmin minioadmin; + mc mb minio/test-bucket --ignore-existing; + mc mb minio/delta-test --ignore-existing; + echo 'MinIO setup complete'; + " + +networks: + default: + name: ${VECTOR_NETWORK} + external: true diff --git a/tests/integration/delta-lake/config/test.yaml b/tests/integration/delta-lake/config/test.yaml new file mode 100644 index 0000000000000..ae67de7faa08c --- /dev/null +++ b/tests/integration/delta-lake/config/test.yaml @@ -0,0 +1,24 @@ +# Test configuration for Delta Lake sink integration tests with MinIO + +features: +- delta-lake-integration-tests + +test_filter: '::delta_lake::integration_tests::' + +runner: + env: + MINIO_ENDPOINT: http://minio:9000 + AWS_ACCESS_KEY_ID: minioadmin + AWS_SECRET_ACCESS_KEY: minioadmin + AWS_REGION: us-east-1 + AWS_ALLOW_HTTP: "true" + +matrix: + version: ['latest'] + +# Changes to these files/paths will invoke the integration test in CI +# Expressions are evaluated using https://github.com/micromatch/picomatch +paths: +- "src/sinks/delta_lake/**" +- "lib/codecs/src/encoding/format/arrow/**" +- "tests/integration/delta-lake/**"