diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml index eabc3144d9..d21e98f5b8 100644 --- a/.github/workflows/bindings_python_ci.yml +++ b/.github/workflows/bindings_python_ci.yml @@ -24,6 +24,7 @@ on: pull_request: branches: - main + - 'sqllogic-test' concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7d94370fe5..b61d09e50f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,9 +21,11 @@ on: push: branches: - main + - 'sqllogic-test' pull_request: branches: - main + - 'sqllogic-test' concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} @@ -53,6 +55,18 @@ jobs: uses: taiki-e/install-action@v2 with: tool: taplo-cli@0.9.3 + + - name: Install protobuf + run: | + if [[ "$RUNNER_OS" == "Linux" ]]; then + sudo apt-get update && sudo apt-get install -y protobuf-compiler + elif [[ "$RUNNER_OS" == "macOS" ]]; then + brew install protobuf + elif [[ "$RUNNER_OS" == "Windows" ]]; then + choco install protoc + fi + shell: bash + - name: Check toml format run: make check-toml @@ -89,6 +103,17 @@ jobs: - name: Cache Rust artifacts uses: Swatinem/rust-cache@v2 + - name: Install protobuf + run: | + if [[ "$RUNNER_OS" == "Linux" ]]; then + sudo apt-get update && sudo apt-get install -y protobuf-compiler + elif [[ "$RUNNER_OS" == "macOS" ]]; then + brew install protobuf + elif [[ "$RUNNER_OS" == "Windows" ]]; then + choco install protoc + fi + shell: bash + - name: Build run: make build @@ -109,6 +134,17 @@ jobs: - name: Cache Rust artifacts uses: Swatinem/rust-cache@v2 + - name: Install protobuf + run: | + if [[ "$RUNNER_OS" == "Linux" ]]; then + sudo apt-get update && sudo apt-get install -y protobuf-compiler + elif [[ "$RUNNER_OS" == "macOS" ]]; then + brew install protobuf + elif [[ "$RUNNER_OS" == "Windows" ]]; then + choco install protoc + fi + shell: bash + - name: Build run: cargo build -p iceberg --no-default-features @@ -134,6 +170,9 @@ jobs: - name: Cache Rust artifacts uses: Swatinem/rust-cache@v2 + - name: Install protobuf + run: sudo apt-get update && sudo apt-get install -y protobuf-compiler + - name: Test run: cargo test --no-fail-fast --all-targets --all-features --workspace @@ -150,6 +189,10 @@ jobs: - uses: actions/checkout@v5 - name: Setup Nightly Rust toolchain uses: ./.github/actions/setup-builder + + - name: Install protobuf + run: sudo apt-get update && sudo apt-get install -y protobuf-compiler libprotobuf-dev + - name: Generate minimal versions lockfile run: | cargo generate-lockfile -Z direct-minimal-versions -Z minimal-versions diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml index e79e0a0acd..2699aeae94 100644 --- a/.github/workflows/ci_typos.yml +++ b/.github/workflows/ci_typos.yml @@ -24,6 +24,7 @@ on: pull_request: branches: - main + - 'sqllogic-test' concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml index 7f17192af8..ed8c4f308a 100644 --- a/.github/workflows/website.yml +++ b/.github/workflows/website.yml @@ -24,6 +24,7 @@ on: pull_request: branches: - main + - 'sqllogic-test' concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} @@ -49,6 +50,11 @@ jobs: - name: Copy asf file run: cp .asf.yaml ./website/book/.asf.yaml + - name: Install protobuf + run: | + sudo apt-get update && sudo apt-get install -y protobuf-compiler + shell: bash + - name: Build API docs run: | cargo doc --no-deps --workspace --all-features diff --git a/.licenserc.yaml b/.licenserc.yaml index 01625e32cb..8c050828b0 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -33,4 +33,5 @@ header: - 'dist/*' - 'Cargo.lock' - '.github/PULL_REQUEST_TEMPLATE.md' + - '**/*.slt' comment: on-failure diff --git a/Cargo.lock b/Cargo.lock index 48a995242f..0ecb6be7ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -592,6 +592,28 @@ dependencies = [ "wasm-bindgen-futures", ] +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "async-task" version = "4.7.1" @@ -721,9 +743,9 @@ dependencies = [ [[package]] name = "aws-sdk-glue" -version = "1.116.0" +version = "1.117.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b44ba32edf94e0723dfa1ee340170925858012ac0c981f4ee220c7455014bf" +checksum = "0f68344fb124bf37061b07186ec9e5aaa4560097c916091ae039021dde64cce9" dependencies = [ "aws-credential-types", "aws-runtime", @@ -809,9 +831,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.83.0" +version = "1.84.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5468593c47efc31fdbe6c902d1a5fde8d9c82f78a3f8ccfe907b1e9434748cb" +checksum = "91abcdbfb48c38a0419eb75e0eac772a4783a96750392680e4f3c25a8a0535b9" dependencies = [ "aws-credential-types", "aws-runtime", @@ -909,7 +931,7 @@ dependencies = [ "rustls-pki-types", "tokio", "tokio-rustls 0.26.2", - "tower", + "tower 0.5.2", "tracing", ] @@ -1031,6 +1053,51 @@ dependencies = [ "tracing", ] +[[package]] +name = "axum" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" +dependencies = [ + "async-trait", + "axum-core", + "bitflags 1.3.2", + "bytes", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper 0.1.2", + "tower 0.4.13", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "mime", + "rustversion", + "tower-layer", + "tower-service", +] + [[package]] name = "backon" version = "1.5.2" @@ -1111,7 +1178,7 @@ version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ - "bitflags", + "bitflags 2.9.2", "cexpr", "clang-sys", "itertools 0.12.1", @@ -1128,6 +1195,12 @@ dependencies = [ "which", ] +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.9.2" @@ -1375,9 +1448,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "cfg_aliases" @@ -2355,7 +2428,7 @@ dependencies = [ "itertools 0.14.0", "log", "paste", - "petgraph", + "petgraph 0.7.1", ] [[package]] @@ -2483,7 +2556,7 @@ dependencies = [ "sqllogictest", "sqlparser", "tempfile", - "thiserror 2.0.15", + "thiserror 2.0.16", "tokio", ] @@ -2821,6 +2894,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -2833,7 +2912,7 @@ version = "25.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" dependencies = [ - "bitflags", + "bitflags 2.9.2", "rustc_version", ] @@ -2873,9 +2952,9 @@ checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "form_urlencoded" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" dependencies = [ "percent-encoding", ] @@ -3022,9 +3101,9 @@ dependencies = [ [[package]] name = "generator" -version = "0.8.5" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d18470a76cb7f8ff746cf1f7470914f900252ec36bbc40b569d74b1258446827" +checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" dependencies = [ "cc", "cfg-if", @@ -3102,7 +3181,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" dependencies = [ - "bitflags", + "bitflags 2.9.2", "ignore", "walkdir", ] @@ -3428,6 +3507,18 @@ dependencies = [ "webpki-roots 1.0.2", ] +[[package]] +name = "hyper-timeout" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" +dependencies = [ + "hyper 0.14.32", + "pin-project-lite", + "tokio", + "tokio-io-timeout", +] + [[package]] name = "hyper-util" version = "0.1.16" @@ -3722,12 +3813,23 @@ name = "iceberg-sqllogictest" version = "0.6.0" dependencies = [ "anyhow", + "async-stream", "async-trait", "datafusion", "datafusion-sqllogictest", "enum-ordinalize", + "env_logger", + "iceberg", + "iceberg-catalog-rest", + "iceberg-datafusion", + "iceberg_test_utils", "indicatif", + "itertools 0.13.0", + "libtest-mimic 0.7.3", + "log", + "spark-connect-rs", "sqllogictest", + "tokio", "toml", ] @@ -3833,9 +3935,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", "smallvec", @@ -3935,7 +4037,7 @@ version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" dependencies = [ - "bitflags", + "bitflags 2.9.2", "cfg-if", "libc", ] @@ -4204,7 +4306,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" dependencies = [ - "bitflags", + "bitflags 2.9.2", "libc", "redox_syscall", ] @@ -4220,6 +4322,18 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libtest-mimic" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc0bda45ed5b3a2904262c1bb91e526127aa70e7ef3758aba2ef93cf896b9b58" +dependencies = [ + "clap", + "escape8259", + "termcolor", + "threadpool", +] + [[package]] name = "libtest-mimic" version = "0.8.1" @@ -4343,6 +4457,12 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "md-5" version = "0.10.6" @@ -4390,6 +4510,12 @@ dependencies = [ "libmimalloc-sys", ] +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -4511,6 +4637,12 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "multimap" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" + [[package]] name = "munge" version = "0.4.6" @@ -4558,7 +4690,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ - "bitflags", + "bitflags 2.9.2", "cfg-if", "cfg_aliases", "libc", @@ -4750,7 +4882,7 @@ dependencies = [ "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.1", + "quick-xml 0.38.2", "rand 0.9.2", "reqwest", "ring", @@ -4758,7 +4890,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "thiserror 2.0.15", + "thiserror 2.0.16", "tokio", "tracing", "url", @@ -4978,9 +5110,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "pest" @@ -4989,7 +5121,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323" dependencies = [ "memchr", - "thiserror 2.0.15", + "thiserror 2.0.16", "ucd-trie", ] @@ -5026,13 +5158,23 @@ dependencies = [ "sha2", ] +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset 0.4.2", + "indexmap 2.10.0", +] + [[package]] name = "petgraph" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ - "fixedbitset", + "fixedbitset 0.5.7", "indexmap 2.10.0", ] @@ -5353,6 +5495,59 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +dependencies = [ + "bytes", + "heck", + "itertools 0.12.1", + "log", + "multimap", + "once_cell", + "petgraph 0.6.5", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn 2.0.106", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +dependencies = [ + "anyhow", + "itertools 0.12.1", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "prost-types" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +dependencies = [ + "prost", +] + [[package]] name = "psm" version = "0.1.26" @@ -5420,9 +5615,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.38.1" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9845d9dccf565065824e69f9f235fafba1587031eda353c1f1561cd6a6be78f4" +checksum = "d200a41a7797e6461bd04e4e95c3347053a731c32c87f066f2f0dda22dbdbba8" dependencies = [ "memchr", "serde", @@ -5442,7 +5637,7 @@ dependencies = [ "rustc-hash 2.1.1", "rustls 0.23.31", "socket2 0.5.10", - "thiserror 2.0.15", + "thiserror 2.0.16", "tokio", "tracing", "web-time", @@ -5463,7 +5658,7 @@ dependencies = [ "rustls 0.23.31", "rustls-pki-types", "slab", - "thiserror 2.0.15", + "thiserror 2.0.16", "tinyvec", "tracing", "web-time", @@ -5610,7 +5805,7 @@ version = "0.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" dependencies = [ - "bitflags", + "bitflags 2.9.2", ] [[package]] @@ -5621,7 +5816,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.16", "libredox", - "thiserror 2.0.15", + "thiserror 2.0.16", ] [[package]] @@ -5769,11 +5964,11 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tokio-rustls 0.26.2", "tokio-util", - "tower", + "tower 0.5.2", "tower-http", "tower-service", "url", @@ -5957,7 +6152,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags", + "bitflags 2.9.2", "errno", "libc", "linux-raw-sys 0.4.15", @@ -5970,7 +6165,7 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" dependencies = [ - "bitflags", + "bitflags 2.9.2", "errno", "libc", "linux-raw-sys 0.9.4", @@ -6090,7 +6285,7 @@ version = "15.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ee1e066dc922e513bda599c6ccb5f3bb2b0ea5870a579448f2622993f0a9a2f" dependencies = [ - "bitflags", + "bitflags 2.9.2", "cfg-if", "clipboard-win", "fd-lock", @@ -6208,7 +6403,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags", + "bitflags 2.9.2", "core-foundation 0.9.4", "core-foundation-sys", "libc", @@ -6221,7 +6416,7 @@ version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" dependencies = [ - "bitflags", + "bitflags 2.9.2", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -6431,7 +6626,7 @@ checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" dependencies = [ "num-bigint", "num-traits", - "thiserror 2.0.15", + "thiserror 2.0.16", "time", ] @@ -6518,7 +6713,7 @@ dependencies = [ "simdutf8", "sonic-number", "sonic-simd", - "thiserror 2.0.15", + "thiserror 2.0.16", ] [[package]] @@ -6530,6 +6725,30 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "spark-connect-rs" +version = "0.0.2" +source = "git+https://github.com/apache/spark-connect-rust.git?rev=061cb3ecb187b039141f20c722c7984e915f3b9d#061cb3ecb187b039141f20c722c7984e915f3b9d" +dependencies = [ + "arrow", + "arrow-ipc", + "chrono", + "futures-util", + "http-body 0.4.6", + "prost", + "prost-types", + "rand 0.9.2", + "regex", + "serde_json", + "thiserror 2.0.16", + "tokio", + "tonic", + "tonic-build", + "tower 0.5.2", + "url", + "uuid", +] + [[package]] name = "spin" version = "0.9.8" @@ -6562,7 +6781,7 @@ dependencies = [ "glob", "humantime", "itertools 0.13.0", - "libtest-mimic", + "libtest-mimic 0.8.1", "md-5", "owo-colors", "rand 0.8.5", @@ -6570,7 +6789,7 @@ dependencies = [ "similar", "subst", "tempfile", - "thiserror 2.0.15", + "thiserror 2.0.16", "tracing", ] @@ -6637,7 +6856,7 @@ dependencies = [ "serde_json", "sha2", "smallvec", - "thiserror 2.0.15", + "thiserror 2.0.16", "tokio", "tokio-stream", "tracing", @@ -6689,7 +6908,7 @@ checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" dependencies = [ "atoi", "base64 0.22.1", - "bitflags", + "bitflags 2.9.2", "byteorder", "bytes", "crc", @@ -6717,7 +6936,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.15", + "thiserror 2.0.16", "tracing", "whoami", ] @@ -6730,7 +6949,7 @@ checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" dependencies = [ "atoi", "base64 0.22.1", - "bitflags", + "bitflags 2.9.2", "byteorder", "crc", "dotenvy", @@ -6754,7 +6973,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror 2.0.15", + "thiserror 2.0.16", "tracing", "whoami", ] @@ -6778,7 +6997,7 @@ dependencies = [ "serde", "serde_urlencoded", "sqlx-core", - "thiserror 2.0.15", + "thiserror 2.0.16", "tracing", "url", ] @@ -6903,6 +7122,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + [[package]] name = "sync_wrapper" version = "1.0.2" @@ -6937,15 +7162,15 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tempfile" -version = "3.20.0" +version = "3.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e" dependencies = [ "fastrand", "getrandom 0.3.3", "once_cell", "rustix 1.0.8", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -6970,6 +7195,15 @@ dependencies = [ "unic-segment", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "termtree" version = "0.5.1" @@ -6987,11 +7221,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.15" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d76d3f064b981389ecb4b6b7f45a0bf9fdac1d5b9204c7bd6714fecc302850" +checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" dependencies = [ - "thiserror-impl 2.0.15", + "thiserror-impl 2.0.16", ] [[package]] @@ -7007,9 +7241,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.15" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d29feb33e986b6ea906bd9c3559a856983f92371b3eaa5e83782a351623de0" +checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" dependencies = [ "proc-macro2", "quote", @@ -7132,6 +7366,16 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "tokio-io-timeout" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bd86198d9ee903fedd2f9a2e72014287c0d9167e4ae43b5853007205dda1b76" +dependencies = [ + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-macros" version = "2.5.0" @@ -7228,6 +7472,66 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +[[package]] +name = "tonic" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76c4eb7a4e9ef9d4763600161f12f5070b92a578e1b634db88a6887844c91a13" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.21.7", + "bytes", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper-timeout", + "percent-encoding", + "pin-project", + "prost", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic-build" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4ef6dd70a610078cb4e338a0f79d06bc759ff1b22d2120c2ff02ae264ba9c2" +dependencies = [ + "prettyplease", + "proc-macro2", + "prost-build", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand 0.8.5", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower" version = "0.5.2" @@ -7237,7 +7541,7 @@ dependencies = [ "futures-core", "futures-util", "pin-project-lite", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tower-layer", "tower-service", @@ -7249,14 +7553,14 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" dependencies = [ - "bitflags", + "bitflags 2.9.2", "bytes", "futures-util", "http 1.3.1", "http-body 1.0.1", "iri-string", "pin-project-lite", - "tower", + "tower 0.5.2", "tower-layer", "tower-service", ] @@ -7508,9 +7812,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.4" +version = "2.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +checksum = "137a3c834eaf7139b73688502f3f1141a0337c5d8e4d9b536f9b8c796e26a7c4" dependencies = [ "form_urlencoded", "idna", @@ -7590,10 +7894,10 @@ dependencies = [ "pin-project", "rand 0.9.2", "socket2 0.5.10", - "thiserror 2.0.15", + "thiserror 2.0.16", "tokio", "tokio-stream", - "tower", + "tower 0.5.2", "tracing", ] @@ -7621,7 +7925,7 @@ dependencies = [ "rustc-hash 2.1.1", "scopeguard", "sonic-rs", - "thiserror 2.0.15", + "thiserror 2.0.16", "tokio", "tracing", "volo", @@ -7835,11 +8139,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -8196,7 +8500,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags", + "bitflags 2.9.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 868284e751..4f249d99b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ rust-version = "1.85" anyhow = "1.0.72" apache-avro = "0.17" array-init = "2" +arrow = { version = "55" } arrow-arith = { version = "55" } arrow-array = { version = "55" } arrow-buffer = { version = "55" } @@ -65,6 +66,7 @@ clap = { version = "4.5.35", features = ["derive", "cargo"] } ctor = "0.2.8" datafusion = "47" datafusion-cli = "47" +datafusion-common = { version = "47" } datafusion-sqllogictest = "47" derive_builder = "0.20" dirs = "6" @@ -77,6 +79,7 @@ futures = "0.3" hive_metastore = "0.2.0" http = "1.2" iceberg = { version = "0.6.0", path = "./crates/iceberg" } +iceberg-catalog-loader = { version = "0.6.0", path = "./crates/catalog/loader" } iceberg-catalog-rest = { version = "0.6.0", path = "./crates/catalog/rest" } iceberg-catalog-glue = { version = "0.6.0", path = "./crates/catalog/glue" } iceberg-catalog-s3tables = { version = "0.6.0", path = "./crates/catalog/s3tables" } @@ -124,4 +127,4 @@ url = "2.5.4" uuid = { version = "1.16", features = ["v7"] } volo = "0.10.6" volo-thrift = "0.10.8" -zstd = "0.13.2" +zstd = "0.13.2" \ No newline at end of file diff --git a/Makefile b/Makefile index 2cc847d7b3..67283d3e62 100644 --- a/Makefile +++ b/Makefile @@ -17,14 +17,16 @@ .EXPORT_ALL_VARIABLES: +SQL_LOGIC_TEST := iceberg-sqllogictest + build: - cargo build --all-targets --all-features --workspace + cargo build --all-targets --all-features --workspace --exclude $(SQL_LOGIC_TEST) check-fmt: cargo fmt --all -- --check check-clippy: - cargo clippy --all-targets --all-features --workspace -- -D warnings + cargo clippy --all-targets --all-features --workspace --exclude $(SQL_LOGIC_TEST) -- -D warnings install-cargo-machete: cargo install cargo-machete@0.7.0 @@ -44,13 +46,16 @@ check-toml: install-taplo-cli check: check-fmt check-clippy check-toml cargo-machete doc-test: - cargo test --no-fail-fast --doc --all-features --workspace + cargo test --no-fail-fast --doc --all-features --workspace --exclude $(SQL_LOGIC_TEST) unit-test: doc-test - cargo test --no-fail-fast --lib --all-features --workspace + cargo test --no-fail-fast --lib --all-features --workspace --exclude $(SQL_LOGIC_TEST) test: doc-test - cargo test --no-fail-fast --all-targets --all-features --workspace + cargo test --no-fail-fast --all-targets --all-features --workspace --exclude $(SQL_LOGIC_TEST) + +sqllogictest: + cargo test -p iceberg-sqllogictest --no-fail-fast clean: cargo clean diff --git a/crates/sqllogictest/Cargo.toml b/crates/sqllogictest/Cargo.toml index ba149daeab..8e0fe4b9e8 100644 --- a/crates/sqllogictest/Cargo.toml +++ b/crates/sqllogictest/Cargo.toml @@ -31,8 +31,27 @@ datafusion = { workspace = true } datafusion-sqllogictest = { workspace = true } enum-ordinalize = { workspace = true } indicatif = { workspace = true } +iceberg = { workspace = true } +iceberg_test_utils = { path = "../test_utils", features = ["tests"] } +iceberg-catalog-rest = { workspace = true } +iceberg-datafusion = { workspace = true } sqllogictest = { workspace = true } toml = { workspace = true } +tokio = { workspace = true } +env_logger = "0.11.8" +log = "0.4.25" +itertools = "0.13.0" +spark-connect-rs = { git = "https://github.com/apache/spark-connect-rust.git", rev = "061cb3ecb187b039141f20c722c7984e915f3b9d" } +#spark-connect-rs = "0.0.2" + +[dev-dependencies] +libtest-mimic = "0.7.3" +async-stream = ">=0.3.5, <0.4" + +[[test]] +harness = false +name = "sqllogictests" +path = "tests/sqllogictests.rs" [package.metadata.cargo-machete] # These dependencies are added to ensure minimal dependency version diff --git a/crates/sqllogictest/README.md b/crates/sqllogictest/README.md index ddcfe851c5..72b592bfa0 100644 --- a/crates/sqllogictest/README.md +++ b/crates/sqllogictest/README.md @@ -24,7 +24,7 @@ This crate contains a suite of [sqllogictest](https://crates.io/crates/sqllogict Just run the following command: ```bash -cargo test +cargo test --test sqllogictests ``` ## Sql Engines diff --git a/crates/sqllogictest/src/engine/datafusion.rs b/crates/sqllogictest/src/engine/datafusion.rs index f95cfb247d..669f120df6 100644 --- a/crates/sqllogictest/src/engine/datafusion.rs +++ b/crates/sqllogictest/src/engine/datafusion.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -22,46 +23,106 @@ use anyhow::{Context, anyhow}; use datafusion::catalog::CatalogProvider; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_sqllogictest::DataFusion; +use iceberg::CatalogBuilder; +use iceberg_catalog_rest::{REST_CATALOG_PROP_URI, RestCatalogBuilder}; +use iceberg_datafusion::IcebergCatalogProvider; use indicatif::ProgressBar; use sqllogictest::runner::AsyncDB; +use sqllogictest::{MakeConnection, Record, parse_file}; use toml::Table as TomlTable; -use crate::engine::Engine; -use crate::error::Result; +use crate::engine::EngineRunner; +use crate::error::{Error, Result}; pub struct DataFusionEngine { - datafusion: DataFusion, + relative_path: PathBuf, + pb: ProgressBar, + config: TomlTable, } #[async_trait::async_trait] -impl Engine for DataFusionEngine { - async fn new(config: TomlTable) -> Result { +impl EngineRunner for DataFusionEngine { + async fn run_slt_file(&mut self, path: &Path) -> Result<()> { let session_config = SessionConfig::new().with_target_partitions(4); let ctx = SessionContext::new_with_config(session_config); - ctx.register_catalog("default", Self::create_catalog(&config).await?); - - Ok(Self { - datafusion: DataFusion::new(ctx, PathBuf::from("testdata"), ProgressBar::new(100)), - }) - } + ctx.register_catalog("default", Self::create_catalog(&self.config).await?); - async fn run_slt_file(&mut self, path: &Path) -> Result<()> { - let content = std::fs::read_to_string(path) - .with_context(|| format!("Failed to read slt file {:?}", path)) - .map_err(|e| anyhow!(e))?; + let runner = sqllogictest::Runner::new(|| async { + Ok(DataFusion::new( + ctx.clone(), + self.relative_path.clone(), + self.pb.clone(), + )) + }); - self.datafusion - .run(content.as_str()) - .await - .with_context(|| format!("Failed to run slt file {:?}", path)) - .map_err(|e| anyhow!(e))?; + let result: std::result::Result<(), Error> = Self::run_file_in_runner(path, runner).await; + self.pb.finish_and_clear(); - Ok(()) + result } } impl DataFusionEngine { + pub async fn new(config: TomlTable) -> Result { + Ok(Self { + relative_path: PathBuf::from("testdata"), + pb: ProgressBar::new(100), + config, + }) + } + async fn create_catalog(_: &TomlTable) -> anyhow::Result> { - todo!() + let catalog = RestCatalogBuilder::default() + .load( + "rest", + HashMap::from([ + ( + REST_CATALOG_PROP_URI.to_string(), + "http://localhost:8181".to_string(), + ), + ( + "s3.endpoint".to_string(), + "http://localhost:9000".to_string(), + ), + ("s3.access-key-id".to_string(), "admin".to_string()), + ("s3.secret-access-key".to_string(), "password".to_string()), + ("s3.region".to_string(), "us-east-1".to_string()), + ("s3.disable-config-load".to_string(), "true".to_string()), + ]), + ) + .await?; + + Ok(Arc::new( + IcebergCatalogProvider::try_new(Arc::new(catalog)).await?, + )) + } + + async fn run_file_in_runner>( + path: &Path, + mut runner: sqllogictest::Runner, + ) -> Result<()> { + println!("run file in runner"); + + let records = parse_file(path).context("Failed to parse slt file")?; + + let mut errs = vec![]; + for record in records.into_iter() { + if let Record::Halt { .. } = record { + break; + } + if let Err(err) = runner.run_async(record).await { + errs.push(format!("{err}")); + } + } + + if !errs.is_empty() { + let mut msg = format!("{} errors in file {}\n\n", errs.len(), path.display()); + for (i, err) in errs.iter().enumerate() { + msg.push_str(&format!("{}. {err}\n\n", i + 1)); + } + return Err(Error(anyhow!(msg))); + } + + Ok(()) } } diff --git a/crates/sqllogictest/src/engine/mod.rs b/crates/sqllogictest/src/engine/mod.rs index 61722f663f..c07ca8fb6f 100644 --- a/crates/sqllogictest/src/engine/mod.rs +++ b/crates/sqllogictest/src/engine/mod.rs @@ -16,15 +16,25 @@ // under the License. mod datafusion; +mod spark; use std::path::Path; use toml::Table as TomlTable; +use crate::engine::datafusion::DataFusionEngine; +use crate::engine::spark::SparkEngine; use crate::error::Result; #[async_trait::async_trait] -pub trait Engine: Sized { - async fn new(config: TomlTable) -> Result; +pub trait EngineRunner { async fn run_slt_file(&mut self, path: &Path) -> Result<()>; } + +pub async fn load_engine(engine_type: &str, cfg: TomlTable) -> Result> { + match engine_type { + "datafusion" => Ok(Box::new(DataFusionEngine::new(cfg).await?)), + "spark-connect" => Ok(Box::new(SparkEngine::new(cfg).await?)), + _ => Err(anyhow::anyhow!("Unsupported engine type: {}", engine_type).into()), + } +} diff --git a/crates/sqllogictest/src/engine/spark.rs b/crates/sqllogictest/src/engine/spark.rs new file mode 100644 index 0000000000..26b177bb6b --- /dev/null +++ b/crates/sqllogictest/src/engine/spark.rs @@ -0,0 +1,132 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::path::Path; +use std::time::Duration; + +use anyhow::{Context, anyhow}; +use datafusion_sqllogictest::{DFColumnType, DFOutput, convert_batches, convert_schema_to_types}; +use spark_connect_rs::{SparkSession, SparkSessionBuilder}; +use sqllogictest::{AsyncDB, DBOutput, Record, parse_file}; +use toml::Table as TomlTable; + +use crate::engine::EngineRunner; +use crate::error::{Error, Result}; + +pub struct SparkEngine { + session: SparkSession, +} + +#[async_trait::async_trait] +impl AsyncDB for SparkEngine { + type Error = Error; + type ColumnType = DFColumnType; + + async fn run(&mut self, sql: &str) -> Result> { + Self::run_query(&self.session, sql).await + } + + async fn shutdown(&mut self) {} + + fn engine_name(&self) -> &str { + "SparkConnect" + } + + async fn sleep(dur: Duration) { + tokio::time::sleep(dur).await; + } +} + +#[async_trait::async_trait] +impl EngineRunner for SparkEngine { + async fn run_slt_file(&mut self, path: &Path) -> crate::error::Result<()> { + let path_dir = path.to_str().unwrap(); + println!("engine running slt file on path: {path_dir}"); + + let session = self.session.clone(); + let runner = sqllogictest::Runner::new(move || { + let session = session.clone(); + async move { Ok(SparkEngine { session }) } + }); + + let result: std::result::Result<(), Error> = Self::run_file_in_runner(path, runner).await; + + result + } +} + +impl SparkEngine { + pub async fn new(configs: TomlTable) -> Result { + let url = configs + .get("url") + .ok_or_else(|| anyhow!("url property doesn't exist for spark engine"))? + .as_str() + .ok_or_else(|| anyhow!("url property is not a string for spark engine"))?; + + let session = SparkSessionBuilder::remote(url) + .app_name("SparkConnect") + .build() + .await + .map_err(|e| anyhow!(e))?; + + Ok(Self { session }) + } + + pub async fn run_query(session: &SparkSession, sql: impl Into) -> Result { + let df = session.sql(sql.into().as_str()).await.unwrap(); + let batches = df.collect().await.unwrap(); + let schema = batches.schema(); + let types = convert_schema_to_types(schema.fields()); + + // Convert batches to rows of strings + let rows = convert_batches(vec![batches]).unwrap(); + + Ok(DBOutput::Rows { types, rows }) + } + + async fn run_file_in_runner( + path: &Path, + mut runner: sqllogictest::Runner, + ) -> Result<()> + where + M: sqllogictest::MakeConnection, + { + println!("run file in runner"); + + let records = parse_file(path).context("Failed to parse slt file")?; + + let mut errs = vec![]; + for record in records.into_iter() { + if let Record::Halt { .. } = record { + break; + } + if let Err(err) = runner.run_async(record).await { + errs.push(format!("{err}")); + } + } + + if !errs.is_empty() { + let mut msg = format!("{} errors in file {}\n\n", errs.len(), path.display()); + for (i, err) in errs.iter().enumerate() { + msg.push_str(&format!("{}. {err}\n\n", i + 1)); + } + return Err(Error(anyhow!(msg))); + } + + Ok(()) + } +} diff --git a/crates/sqllogictest/src/lib.rs b/crates/sqllogictest/src/lib.rs index c72d50c429..34a3c654aa 100644 --- a/crates/sqllogictest/src/lib.rs +++ b/crates/sqllogictest/src/lib.rs @@ -18,7 +18,6 @@ // This lib contains codes copied from // [Apache Datafusion](https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest) -#[allow(dead_code)] mod engine; -#[allow(dead_code)] -mod error; +pub mod error; +pub mod schedule; diff --git a/crates/sqllogictest/src/schedule.rs b/crates/sqllogictest/src/schedule.rs new file mode 100644 index 0000000000..67bd8038ec --- /dev/null +++ b/crates/sqllogictest/src/schedule.rs @@ -0,0 +1,153 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::HashMap; +use std::fs::read_to_string; +use std::path::{Path, PathBuf}; + +use anyhow::anyhow; +use itertools::Itertools; +use toml::{Table, Value}; + +use crate::engine::{EngineRunner, load_engine}; + +/// Schedule of engines to run tests. +/// Controls the engine, storage, and catalog being used for the test steps +pub struct Schedule { + // Map of engine names to engine instances. + engines: HashMap>, + // List of steps to run, each step is a sql file. + steps: Vec, + // catalog: Box, +} + +pub struct Step { + /// Name of engine to execute. + engine_name: String, + /// Name of sql file. + sql: String, +} + +impl Schedule { + pub async fn parse>(schedule_def_file: P) -> anyhow::Result { + let content = read_to_string(schedule_def_file)?; + let toml_value = content.parse::()?; + let toml_table = toml_value + .as_table() + .ok_or_else(|| anyhow::anyhow!("Schedule file must be a TOML table"))?; + + let engines = Schedule::parse_engines(toml_table).await?; + let steps = Schedule::parse_steps(toml_table).await?; + + Ok(Self { engines, steps }) + } + + async fn parse_engines( + table: &Table, + ) -> anyhow::Result>> { + println!("parsing engine..."); + let engines = table + .get("engines") + .ok_or_else(|| anyhow::anyhow!("Schedule file must have an 'engines' table"))? + .as_table() + .ok_or_else(|| anyhow::anyhow!("'engines' must be a table"))?; + + let mut result = HashMap::new(); + for (name, engine_config) in engines { + println!("engine: {name}, config: {engine_config}"); + let engine_configs = engine_config + .as_table() + .ok_or_else(|| anyhow::anyhow!("Config of engine {name} is not a table"))?; + + println!("name {name}, engine config {engine_configs}"); + + let engine_type = engine_configs + .get("type") + .ok_or_else(|| anyhow::anyhow!("Engine {name} doesn't have a 'type' field"))? + .as_str() + .ok_or_else(|| anyhow::anyhow!("Engine {name} type must be a string"))?; + + let engine = load_engine(engine_type, engine_configs.clone()).await?; + + result.insert(name.clone(), engine); + } + + Ok(result) + } + + async fn parse_steps(table: &Table) -> anyhow::Result> { + let steps = table + .get("steps") + .ok_or_else(|| anyhow!("steps not found"))? + .as_array() + .ok_or_else(|| anyhow!("steps is not array"))?; + + steps.iter().map(Schedule::parse_step).try_collect() + } + + fn parse_step(value: &Value) -> anyhow::Result { + let t = value + .as_table() + .ok_or_else(|| anyhow!("Step must be a table!"))?; + + let engine_name = t + .get("engine") + .ok_or_else(|| anyhow!("Property engine is missing in step"))? + .as_str() + .ok_or_else(|| anyhow!("Property engine is not a string in step"))? + .to_string(); + + let sql = t + .get("sql") + .ok_or_else(|| anyhow!("Property sql is missing in step"))? + .as_str() + .ok_or_else(|| anyhow!("Property sqlis not a string in step"))? + .to_string(); + + println!("engine: {engine_name}, sql: {sql}"); + Ok(Step { engine_name, sql }) + } + + pub async fn run(mut self) -> anyhow::Result<()> { + println!("running steps"); + + for step_idx in 0..self.steps.len() { + self.run_step(step_idx).await?; + } + + Ok(()) + } + + async fn run_step(&mut self, step_index: usize) -> anyhow::Result<()> { + println!("running step: {step_index}"); + + let step = &self.steps[step_index]; + + let engine = self + .engines + .get_mut(&step.engine_name) + .ok_or_else(|| anyhow!("Engine {} not found!", step.engine_name))?; + + let step_sql_path = PathBuf::from(format!( + "{}/testdata/slts/{}", + env!("CARGO_MANIFEST_DIR"), + &step.sql + )); + engine.run_slt_file(step_sql_path.as_path()).await?; + Ok(()) + } +} diff --git a/crates/sqllogictest/testdata/docker/docker-compose.yml b/crates/sqllogictest/testdata/docker/docker-compose.yml new file mode 100644 index 0000000000..b4335af423 --- /dev/null +++ b/crates/sqllogictest/testdata/docker/docker-compose.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +networks: + rest_bridge: + +services: + rest: + image: apache/iceberg-rest-fixture:1.9.2 + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + - CATALOG_CATALOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog + - CATALOG_URI=jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory + - CATALOG_WAREHOUSE=s3://icebergdata/demo + - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO + - CATALOG_S3_ENDPOINT=http://minio:9000 + depends_on: + - minio + networks: + rest_bridge: + ports: + - 8181:8181 + expose: + - 8181 + + minio: + image: minio/minio:RELEASE.2025-05-24T17-08-30Z + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + - MINIO_DEFAULT_BUCKETS=icebergdata + hostname: icebergdata.minio + networks: + rest_bridge: + ports: + - 9000:9000 + - 9001:9001 + expose: + - 9001 + - 9000 + command: ["server", "/data", "--console-address", ":9001"] + + mc: + depends_on: + - minio + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + entrypoint: > + /bin/sh -c " until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc rm -r --force minio/icebergdata; /usr/bin/mc mb minio/icebergdata; /usr/bin/mc policy set public minio/icebergdata; tail -f /dev/null " + networks: + rest_bridge: + + spark: + depends_on: + - rest + - minio + image: apache/spark:3.5.6-java17 + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + - SPARK_HOME=/opt/spark + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin:/opt/spark/sbin + user: root + networks: + rest_bridge: + ports: + - "15002:15002" + healthcheck: + test: netstat -ltn | grep -c 15002 + interval: 1s + retries: 1200 + volumes: + - ./spark:/spark-script + entrypoint: [ "/spark-script/spark-connect-server.sh" ] \ No newline at end of file diff --git a/crates/sqllogictest/testdata/docker/spark/spark-connect-server.sh b/crates/sqllogictest/testdata/docker/spark/spark-connect-server.sh new file mode 100755 index 0000000000..404599099f --- /dev/null +++ b/crates/sqllogictest/testdata/docker/spark/spark-connect-server.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -ex + +SPARK_VERSION="3.5.6" +ICEBERG_VERSION="1.9.2" + +PACKAGES="org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:$ICEBERG_VERSION" +PACKAGES="$PACKAGES,org.apache.iceberg:iceberg-aws-bundle:$ICEBERG_VERSION" +PACKAGES="$PACKAGES,org.apache.spark:spark-connect_2.12:$SPARK_VERSION" + +/opt/spark/sbin/start-connect-server.sh \ + --packages $PACKAGES \ + --master local[3] \ + --conf spark.driver.extraJavaOptions="-Dlog4j.configuration=file:///spark-script/log4j2.properties" \ + --conf spark.driver.bindAddress=0.0.0.0 \ + --conf spark.sql.catalog.default=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.default.catalog-impl=org.apache.iceberg.rest.RESTCatalog \ + --conf spark.sql.catalog.default.uri=http://rest:8181 \ + --conf spark.sql.catalog.default.s3.endpoint=http://minio:9000 \ + --conf spark.sql.catalog.default.s3.path.style.access=true \ + --conf spark.sql.catalog.default.s3.access.key=admin \ + --conf spark.sql.catalog.default.s3.secret.key=password \ + --conf spark.sql.defaultCatalog=default + +tail -f /opt/spark/logs/spark*.out diff --git a/crates/sqllogictest/testdata/schedules/test.toml b/crates/sqllogictest/testdata/schedules/test.toml new file mode 100644 index 0000000000..10c9577e83 --- /dev/null +++ b/crates/sqllogictest/testdata/schedules/test.toml @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[engines] +sc = { type = "spark-connect", url = "sc://localhost:15002" } +df = { type = "datafusion", url = "http://localhost:8181" } + +[[steps]] +engine = "sc" +sql = "demo/prepare.slt" + +[[steps]] +engine = "df" +sql = "demo/verify.slt" diff --git a/crates/sqllogictest/testdata/slts/demo/prepare.slt b/crates/sqllogictest/testdata/slts/demo/prepare.slt new file mode 100644 index 0000000000..e25e4682d6 --- /dev/null +++ b/crates/sqllogictest/testdata/slts/demo/prepare.slt @@ -0,0 +1,18 @@ +statement ok +CREATE DATABASE IF NOT EXISTS s1; + +statement ok +USE DATABASE s1; + +statement ok +CREATE TABLE t1 (id INTEGER); + +statement ok +INSERT INTO t1 VALUES (1), (2), (3); + +query I +SELECT * FROM t1 ORDER BY id +---- +1 +2 +3 diff --git a/crates/sqllogictest/testdata/slts/demo/verify.slt b/crates/sqllogictest/testdata/slts/demo/verify.slt new file mode 100644 index 0000000000..7f371d70ce --- /dev/null +++ b/crates/sqllogictest/testdata/slts/demo/verify.slt @@ -0,0 +1,6 @@ +query I +SELECT * FROM default.s1.t1 ORDER BY id; +---- +1 +2 +3 diff --git a/crates/sqllogictest/tests/sqllogictests.rs b/crates/sqllogictest/tests/sqllogictests.rs new file mode 100644 index 0000000000..cda9fb089f --- /dev/null +++ b/crates/sqllogictest/tests/sqllogictests.rs @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fs; +use std::path::PathBuf; +use std::sync::RwLock; + +use iceberg_sqllogictest::schedule::Schedule; +use iceberg_test_utils::docker::DockerCompose; +use iceberg_test_utils::normalize_test_name; +use libtest_mimic::{Arguments, Trial}; +use tokio::runtime::Handle; + +static DOCKER_COMPOSE_ENV: RwLock> = RwLock::new(None); + +pub fn main() { + env_logger::init(); + + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + // Parse command line arguments + let args = Arguments::from_args(); + + setup_env(); + + let tests = collect_trials(rt.handle().clone()).unwrap(); + let result = libtest_mimic::run(&args, tests); + + teardown_env(); + drop(rt); + + result.exit(); +} + +fn setup_env() { + let mut guard = DOCKER_COMPOSE_ENV.write().unwrap(); + let docker_compose = DockerCompose::new( + normalize_test_name(module_path!()), + format!("{}/testdata/docker", env!("CARGO_MANIFEST_DIR")), + ); + docker_compose.up(); + guard.replace(docker_compose); +} + +fn teardown_env() { + let mut guard = DOCKER_COMPOSE_ENV.write().unwrap(); + guard.take(); +} + +pub(crate) fn collect_trials(handle: Handle) -> anyhow::Result> { + let schedule_files = collect_schedule_files()?; + log::debug!( + "Found {} schedules files: {:?}", + schedule_files.len(), + &schedule_files + ); + let mut trials = Vec::with_capacity(schedule_files.len()); + for schedule_file in schedule_files { + let h = handle.clone(); + let trial_name = format!( + "schedule: {}", + schedule_file + .file_name() + .expect("Schedule file should have a name") + .to_string_lossy() + ); + let trial = Trial::test(trial_name, move || { + Ok(h.block_on(run_schedule(schedule_file.clone()))?) + }); + trials.push(trial); + } + Ok(trials) +} + +pub(crate) fn collect_schedule_files() -> anyhow::Result> { + let dir = PathBuf::from(format!("{}/testdata/schedules", env!("CARGO_MANIFEST_DIR"))); + let mut schedule_files = Vec::with_capacity(32); + for entry in fs::read_dir(&dir)? { + let entry = entry?; + let path = entry.path(); + if path.is_file() { + schedule_files.push(fs::canonicalize(dir.join(path))?); + } + } + Ok(schedule_files) +} + +pub(crate) async fn run_schedule(schedule_file: PathBuf) -> anyhow::Result<()> { + let schedules = Schedule::parse(schedule_file).await?; + schedules.run().await?; + + Ok(()) +}