diff --git a/Cargo.lock b/Cargo.lock index 6e88493e928e..a95df94c1d54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -124,12 +124,6 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -262,7 +256,7 @@ dependencies = [ "proc-macro-error2", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -300,9 +294,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c26b57282a08ae92f727497805122fec964c6245cfa0e13f0e75452eaf3bc41f" +checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" dependencies = [ "arrow-arith", "arrow-array", @@ -321,9 +315,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cebf38ca279120ff522f4954b81a39527425b6e9f615e6b72842f4de1ffe02b8" +checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" dependencies = [ "arrow-array", "arrow-buffer", @@ -335,9 +329,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744109142cdf8e7b02795e240e20756c2a782ac9180d4992802954a8f871c0de" +checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -346,15 +340,15 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.15.4", + "hashbrown 0.16.0", "num", ] [[package]] name = "arrow-buffer" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601bb103c4c374bcd1f62c66bcea67b42a2ee91a690486c37d4c180236f11ccc" +checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" dependencies = [ "bytes", "half", @@ -363,9 +357,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed61d9d73eda8df9e3014843def37af3050b5080a9acbe108f045a316d5a0be" +checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" dependencies = [ "arrow-array", "arrow-buffer", @@ -384,9 +378,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa95b96ce0c06b4d33ac958370db8c0d31e88e54f9d6e08b0353d18374d9f991" +checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" dependencies = [ "arrow-array", "arrow-cast", @@ -399,9 +393,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43407f2c6ba2367f64d85d4603d6fb9c4b92ed79d2ffd21021b37efa96523e12" +checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" dependencies = [ "arrow-buffer", "arrow-schema", @@ -411,9 +405,9 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7c66c5e4a7aedc2bfebffeabc2116d76adb22e08d230b968b995da97f8b11ca" +checksum = "8c8b0ba0784d56bc6266b79f5de7a24b47024e7b3a0045d2ad4df3d9b686099f" dependencies = [ "arrow-array", "arrow-buffer", @@ -430,9 +424,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4b0487c4d2ad121cbc42c4db204f1509f8618e589bc77e635e9c40b502e3b90" +checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" dependencies = [ "arrow-array", "arrow-buffer", @@ -446,9 +440,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d747573390905905a2dc4c5a61a96163fe2750457f90a04ee2a88680758c79" +checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" dependencies = [ "arrow-array", "arrow-buffer", @@ -457,7 +451,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.10.0", + "indexmap 2.11.4", "lexical-core", "memchr", "num", @@ -468,9 +462,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c142a147dceb59d057bad82400f1693847c80dca870d008bf7b91caf902810ae" +checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" dependencies = [ "arrow-array", "arrow-buffer", @@ -481,9 +475,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac6620667fccdab4204689ca173bd84a15de6bb6b756c3a8764d4d7d0c2fc04" +checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" dependencies = [ "arrow-array", "arrow-buffer", @@ -494,9 +488,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfa93af9ff2bb80de539e6eb2c1c8764abd0f4b73ffb0d7c82bf1f9868785e66" +checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" dependencies = [ "serde", "serde_json", @@ -504,9 +498,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be8b2e0052cd20d36d64f32640b68a5ab54d805d24a473baee5d52017c85536c" +checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -518,9 +512,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "56.1.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2155e26e17f053c8975c546fc70cf19c00542f9abf43c23a88a46ef7204204f" +checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" dependencies = [ "arrow-array", "arrow-buffer", @@ -530,7 +524,7 @@ dependencies = [ "memchr", "num", "regex", - "regex-syntax 0.8.5", + "regex-syntax 0.8.7", ] [[package]] @@ -644,7 +638,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -666,7 +660,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -677,13 +671,13 @@ checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" [[package]] name = "async-trait" -version = "0.1.88" +version = "0.1.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -763,7 +757,7 @@ checksum = "ffdcb70bdbc4d478427380519163274ac86e52916e10f0a8889adf0f96d3fee7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -918,7 +912,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1066,7 +1060,7 @@ dependencies = [ "regex", "rustc-hash 2.1.1", "shlex", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1195,7 +1189,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1218,7 +1212,7 @@ dependencies = [ "proc-macro-crate 3.3.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1560,7 +1554,7 @@ version = "0.13.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fe45e18904af7af10e4312df7c97251e98af98c70f42f1f2587aecfcbee56bf" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.4", "lazy_static", "num-traits", "regex", @@ -1614,17 +1608,16 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "serde", "wasm-bindgen", - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -1746,7 +1739,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -1811,7 +1804,6 @@ dependencies = [ "session", "snafu 0.8.6", "store-api", - "substrait 0.18.0", "table", "tempfile", "tokio", @@ -1956,7 +1948,6 @@ dependencies = [ "snafu 0.8.6", "standalone", "store-api", - "substrait 0.18.0", "table", "temp-env", "tempfile", @@ -1986,11 +1977,12 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "comfy-table" -version = "7.1.4" +version = "7.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" +checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" dependencies = [ - "unicode-segmentation", + "strum 0.26.3", + "strum_macros 0.26.4", "unicode-width 0.2.1", ] @@ -2033,6 +2025,7 @@ dependencies = [ "common-base", "common-error", "common-macro", + "common-stat", "common-telemetry", "common-test-util", "common-wal", @@ -2040,13 +2033,11 @@ dependencies = [ "datanode", "humantime-serde", "meta-client", - "num_cpus", "object-store", "serde", "serde_json", "serde_with", "snafu 0.8.6", - "sysinfo", "temp-env", "tempfile", "toml 0.8.23", @@ -2075,7 +2066,7 @@ dependencies = [ "lazy_static", "object-store", "object_store_opendal", - "orc-rust 0.6.3", + "orc-rust", "parquet", "paste", "regex", @@ -2291,7 +2282,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -2465,7 +2456,7 @@ dependencies = [ "futures-util", "serde", "snafu 0.8.6", - "sqlparser 0.55.0-greptime", + "sqlparser", "sqlparser_derive 0.1.1", "store-api", "tokio", @@ -2546,16 +2537,19 @@ dependencies = [ "jsonb", "serde_json", "snafu 0.8.6", - "sqlparser 0.55.0-greptime", + "sqlparser", ] [[package]] name = "common-stat" version = "0.18.0" dependencies = [ + "common-base", "lazy_static", "nix 0.30.1", + "num_cpus", "prometheus", + "sysinfo", ] [[package]] @@ -3176,7 +3170,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -3190,7 +3184,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -3212,7 +3206,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -3223,7 +3217,7 @@ checksum = "2b5be8a7a562d315a5b92a630c30cec6bcf663e6673f00fbb69cca66a6f521b9" dependencies = [ "darling_core 0.21.1", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -3248,8 +3242,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "arrow-ipc", @@ -3276,6 +3270,7 @@ dependencies = [ "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-optimizer", "datafusion-physical-plan", @@ -3283,7 +3278,6 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "hex", "itertools 0.14.0", "log", "object_store", @@ -3291,7 +3285,8 @@ dependencies = [ "parquet", "rand 0.9.1", "regex", - "sqlparser 0.55.0", + "rstest", + "sqlparser", "tempfile", "tokio", "url", @@ -3302,8 +3297,8 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "async-trait", @@ -3316,7 +3311,6 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-plan", "datafusion-session", - "datafusion-sql", "futures", "itertools 0.14.0", "log", @@ -3327,8 +3321,8 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "async-trait", @@ -3349,33 +3343,31 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "ahash 0.8.12", "arrow", "arrow-ipc", - "base64 0.22.1", "chrono", "half", "hashbrown 0.14.5", - "hex", - "indexmap 2.10.0", + "indexmap 2.11.4", "libc", "log", "object_store", "parquet", "paste", "recursive", - "sqlparser 0.55.0", + "sqlparser", "tokio", "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "futures", "log", @@ -3384,8 +3376,8 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "async-compression 0.4.19", @@ -3398,6 +3390,7 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -3407,9 +3400,7 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "parquet", "rand 0.9.1", - "tempfile", "tokio", "tokio-util", "url", @@ -3419,19 +3410,17 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", @@ -3443,71 +3432,66 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", "futures", "object_store", - "serde_json", "tokio", ] [[package]] name = "datafusion-datasource-parquet" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "async-trait", "bytes", - "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", "datafusion-datasource", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", + "datafusion-functions-aggregate-common", "datafusion-physical-expr", + "datafusion-physical-expr-adapter", "datafusion-physical-expr-common", - "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-pruning", "datafusion-session", "futures", - "hex", "itertools 0.14.0", "log", "object_store", "parking_lot 0.12.4", "parquet", - "rand 0.9.1", "tokio", ] [[package]] name = "datafusion-doc" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" [[package]] name = "datafusion-execution" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", + "async-trait", "dashmap", "datafusion-common", "datafusion-expr", @@ -3515,7 +3499,6 @@ dependencies = [ "log", "object_store", "parking_lot 0.12.4", - "parquet", "rand 0.9.1", "tempfile", "url", @@ -3523,8 +3506,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "async-trait", @@ -3535,29 +3518,30 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.10.0", + "indexmap 2.11.4", + "itertools 0.14.0", "paste", "recursive", "serde_json", - "sqlparser 0.55.0", + "sqlparser", ] [[package]] name = "datafusion-expr-common" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.10.0", + "indexmap 2.11.4", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-functions" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "arrow-buffer", @@ -3584,8 +3568,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "ahash 0.8.12", "arrow", @@ -3604,8 +3588,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "ahash 0.8.12", "arrow", @@ -3616,8 +3600,8 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "arrow-ord", @@ -3625,6 +3609,7 @@ dependencies = [ "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-expr-common", "datafusion-functions", "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", @@ -3637,8 +3622,8 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "async-trait", @@ -3652,8 +3637,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "datafusion-common", @@ -3669,8 +3654,8 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -3678,18 +3663,18 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ - "datafusion-expr", + "datafusion-doc", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] name = "datafusion-optimizer" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "chrono", @@ -3697,18 +3682,18 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.10.0", + "indexmap 2.11.4", "itertools 0.14.0", "log", "recursive", "regex", - "regex-syntax 0.8.5", + "regex-syntax 0.8.7", ] [[package]] name = "datafusion-orc" version = "0.4.1" -source = "git+https://github.com/GreptimeTeam/datafusion-orc?rev=a0a5f902158f153119316eaeec868cff3fc8a99d#a0a5f902158f153119316eaeec868cff3fc8a99d" +source = "git+https://github.com/datafusion-contrib/datafusion-orc?tag=v0.5.0#ad8b804a783a8f1f6c1135dfb59f9ca97a1de18c" dependencies = [ "arrow", "async-trait", @@ -3718,14 +3703,15 @@ dependencies = [ "futures", "futures-util", "object_store", - "orc-rust 0.6.0", + "orc-rust", "tokio", ] [[package]] name = "datafusion-pg-catalog" -version = "0.9.0" -source = "git+https://github.com/datafusion-contrib/datafusion-postgres?rev=3d1b7c7d5b82dd49bafc2803259365e633f654fa#3d1b7c7d5b82dd49bafc2803259365e633f654fa" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f258caedd1593e7dca3bf53912249de6685fa224bcce897ede1fbb7b040ac6f6" dependencies = [ "async-trait", "datafusion", @@ -3737,8 +3723,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "ahash 0.8.12", "arrow", @@ -3749,17 +3735,31 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap 2.10.0", + "indexmap 2.11.4", "itertools 0.14.0", - "log", + "parking_lot 0.12.4", "paste", - "petgraph 0.8.2", + "petgraph 0.8.3", +] + +[[package]] +name = "datafusion-physical-expr-adapter" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-expr", + "datafusion-functions", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "itertools 0.14.0", ] [[package]] name = "datafusion-physical-expr-common" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "ahash 0.8.12", "arrow", @@ -3771,8 +3771,8 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "datafusion-common", @@ -3784,14 +3784,13 @@ dependencies = [ "datafusion-physical-plan", "datafusion-pruning", "itertools 0.14.0", - "log", "recursive", ] [[package]] name = "datafusion-physical-plan" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "ahash 0.8.12", "arrow", @@ -3810,7 +3809,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.10.0", + "indexmap 2.11.4", "itertools 0.14.0", "log", "parking_lot 0.12.4", @@ -3820,11 +3819,10 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", @@ -3837,47 +3835,38 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ - "arrow", "async-trait", - "dashmap", "datafusion-common", - "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-physical-expr", "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", "parking_lot 0.12.4", - "tokio", ] [[package]] name = "datafusion-sql" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "arrow", "bigdecimal 0.4.8", + "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.10.0", + "indexmap 2.11.4", "log", "recursive", "regex", - "sqlparser 0.55.0", + "sqlparser", ] [[package]] name = "datafusion-substrait" -version = "49.0.0" -source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=7d5214512740b4dfb742b6b3d91ed9affcc2c9d0#7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" +version = "50.1.0" +source = "git+https://github.com/GreptimeTeam/datafusion.git?rev=78041eabebf014b8661f50d5ce8b3ae2f70105f7#78041eabebf014b8661f50d5ce8b3ae2f70105f7" dependencies = [ "async-recursion", "async-trait", @@ -3890,6 +3879,7 @@ dependencies = [ "substrait 0.58.0", "tokio", "url", + "uuid", ] [[package]] @@ -3929,6 +3919,7 @@ dependencies = [ "file-engine", "futures", "futures-util", + "hostname 0.4.1", "humantime-serde", "lazy_static", "log-store", @@ -3947,7 +3938,6 @@ dependencies = [ "session", "snafu 0.8.6", "store-api", - "substrait 0.18.0", "table", "tokio", "toml 0.8.23", @@ -3978,7 +3968,7 @@ dependencies = [ "serde", "serde_json", "snafu 0.8.6", - "sqlparser 0.55.0-greptime", + "sqlparser", "sqlparser_derive 0.1.1", ] @@ -4083,7 +4073,7 @@ checksum = "2cdc8d50f426189eef89dac62fabfa0abb27d5cc008f25bf4156a0203325becc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4094,7 +4084,7 @@ checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4136,7 +4126,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4156,7 +4146,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core 0.20.2", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4176,7 +4166,7 @@ checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "unicode-xid", ] @@ -4188,7 +4178,7 @@ checksum = "ccfae181bab5ab6c5478b2ccb69e4c68a02f8c3ec72f6616bfec9dbc599d2ee0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4247,7 +4237,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4322,7 +4312,7 @@ checksum = "0e197fdfd2cdb5fdeb7f8ddcf3aed5d5d04ecde2890d448b14ffb716f7376b70" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4430,7 +4420,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4442,7 +4432,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4481,7 +4471,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4594,7 +4584,7 @@ checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" dependencies = [ "bit-set", "regex-automata 0.4.9", - "regex-syntax 0.8.5", + "regex-syntax 0.8.7", ] [[package]] @@ -4706,9 +4696,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" dependencies = [ "crc32fast", "libz-rs-sys", @@ -4788,6 +4778,7 @@ dependencies = [ "common-query", "common-recordbatch", "common-runtime", + "common-stat", "common-telemetry", "common-time", "common-version", @@ -4804,6 +4795,7 @@ dependencies = [ "futures", "get-size2", "greptime-proto", + "hostname 0.4.1", "http 1.3.1", "humantime-serde", "itertools 0.14.0", @@ -4868,9 +4860,9 @@ checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "form_urlencoded" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" dependencies = [ "percent-encoding", ] @@ -4920,6 +4912,7 @@ dependencies = [ "datanode", "datatypes", "futures", + "hostname 0.4.1", "humantime", "humantime-serde", "lazy_static", @@ -4942,10 +4935,9 @@ dependencies = [ "session", "snafu 0.8.6", "sql", - "sqlparser 0.55.0-greptime", + "sqlparser", "store-api", "strfmt", - "substrait 0.18.0", "table", "tokio", "tokio-util", @@ -4985,7 +4977,7 @@ checksum = "a0b4095fc99e1d858e5b8c7125d2638372ec85aa0fe6c807105cf10b0265ca6c" dependencies = [ "frunk_proc_macro_helpers", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -4997,7 +4989,7 @@ dependencies = [ "frunk_core", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -5009,7 +5001,7 @@ dependencies = [ "frunk_core", "frunk_proc_macro_helpers", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -5133,7 +5125,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -5325,7 +5317,7 @@ dependencies = [ [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=d75496d5d09dedcd0edcade57ccf0a522f4393ae#d75496d5d09dedcd0edcade57ccf0a522f4393ae" +source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=69a6089933daa573c96808ec4bbc48f447ec6e8c#69a6089933daa573c96808ec4bbc48f447ec6e8c" dependencies = [ "prost 0.13.5", "prost-types 0.13.5", @@ -5359,7 +5351,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.10.0", + "indexmap 2.11.4", "slab", "tokio", "tokio-util", @@ -5378,7 +5370,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.3.1", - "indexmap 2.10.0", + "indexmap 2.11.4", "slab", "tokio", "tokio-util", @@ -5464,6 +5456,12 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hashbrown" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" + [[package]] name = "hashlink" version = "0.10.0" @@ -5599,7 +5597,7 @@ checksum = "a56f203cd1c76362b69e3863fd987520ac36cf70a8c92627449b2f64a8cf7d65" dependencies = [ "cfg-if", "libc", - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -6018,9 +6016,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", "smallvec", @@ -6063,7 +6061,7 @@ dependencies = [ "libflate", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -6119,6 +6117,7 @@ dependencies = [ "serde", "serde_json", "snafu 0.8.6", + "store-api", "tantivy", "tantivy-jieba", "tempfile", @@ -6140,13 +6139,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.10.0" +version = "2.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" dependencies = [ "equivalent", - "hashbrown 0.15.4", - "serde", + "hashbrown 0.16.0", ] [[package]] @@ -6162,7 +6160,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" dependencies = [ "ahash 0.8.12", - "indexmap 2.10.0", + "indexmap 2.11.4", "is-terminal", "itoa", "log", @@ -6185,7 +6183,7 @@ dependencies = [ "crossbeam-utils", "dashmap", "env_logger", - "indexmap 2.10.0", + "indexmap 2.11.4", "itoa", "log", "num-format", @@ -6227,7 +6225,7 @@ checksum = "6c38228f24186d9cc68c729accb4d413be9eaed6ad07ff79e0270d9e56f3de13" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -6572,7 +6570,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ee7893dab2e44ae5f9d0173f26ff4aa327c10b01b06a72b52dd9405b628640d" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.4", ] [[package]] @@ -6657,7 +6655,7 @@ dependencies = [ "http 1.3.1", "json-patch", "k8s-openapi", - "schemars 0.8.22", + "schemars", "serde", "serde_json", "thiserror 1.0.69", @@ -6673,7 +6671,7 @@ dependencies = [ "proc-macro2", "quote", "serde_json", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -6717,7 +6715,7 @@ dependencies = [ "lalrpop-util", "petgraph 0.7.1", "regex", - "regex-syntax 0.8.5", + "regex-syntax 0.8.7", "sha3", "string_cache", "term", @@ -6759,7 +6757,7 @@ dependencies = [ "proc-macro-crate 1.3.1", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -6782,7 +6780,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -6872,9 +6870,9 @@ checksum = "775bf80d5878ab7c2b1080b5351a48b2f737d9f6f8b383574eebcc22be0dfccb" [[package]] name = "libc" -version = "0.2.175" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "libflate" @@ -7115,7 +7113,7 @@ dependencies = [ "num-traits", "quote", "regex", - "regex-syntax 0.8.5", + "regex-syntax 0.8.7", "serde", "vergen", ] @@ -7130,7 +7128,7 @@ dependencies = [ "cactus", "cfgrammar", "filetime", - "indexmap 2.10.0", + "indexmap 2.11.4", "lazy_static", "lrtable", "num-traits", @@ -7412,6 +7410,7 @@ dependencies = [ "etcd-client", "futures", "h2 0.3.26", + "hostname 0.4.1", "http-body-util", "humantime", "humantime-serde", @@ -7547,6 +7546,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -7609,6 +7609,7 @@ dependencies = [ "common-query", "common-recordbatch", "common-runtime", + "common-stat", "common-telemetry", "common-test-util", "common-time", @@ -7624,6 +7625,7 @@ dependencies = [ "dotenv", "either", "futures", + "greptime-proto", "humantime-serde", "index", "itertools 0.14.0", @@ -7686,7 +7688,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -7785,7 +7787,7 @@ dependencies = [ "proc-macro-error2", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "termcolor", "thiserror 1.0.69", ] @@ -7803,7 +7805,7 @@ dependencies = [ "proc-macro-error2", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "termcolor", "thiserror 2.0.17", ] @@ -7939,7 +7941,7 @@ checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -8166,7 +8168,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -8279,7 +8281,7 @@ dependencies = [ "proc-macro-crate 1.3.1", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -8291,7 +8293,7 @@ dependencies = [ "proc-macro-crate 3.3.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -8348,9 +8350,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496" +checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" dependencies = [ "async-trait", "bytes", @@ -8664,7 +8666,7 @@ dependencies = [ "session", "snafu 0.8.6", "sql", - "sqlparser 0.55.0-greptime", + "sqlparser", "store-api", "substrait 0.18.0", "table", @@ -8674,31 +8676,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "orc-rust" -version = "0.6.0" -source = "git+https://github.com/GreptimeTeam/orc-rust?rev=d1690a06eec754e97beecf2cf7690267fc818726#d1690a06eec754e97beecf2cf7690267fc818726" -dependencies = [ - "arrow", - "async-trait", - "bytemuck", - "bytes", - "chrono", - "chrono-tz", - "fallible-streaming-iterator", - "flate2", - "futures", - "futures-util", - "lz4_flex", - "lzokay-native", - "num", - "prost 0.13.5", - "snafu 0.8.6", - "snap", - "tokio", - "zstd 0.13.3", -] - [[package]] name = "orc-rust" version = "0.6.3" @@ -8816,7 +8793,7 @@ dependencies = [ "otlp-model", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -8926,9 +8903,9 @@ dependencies = [ [[package]] name = "parquet" -version = "56.0.0" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7288a07ed5d25939a90f9cb1ca5afa6855faa08ec7700613511ae64bdb0620c" +checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -8945,13 +8922,12 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.15.4", + "hashbrown 0.16.0", "lz4_flex", "num", "num-bigint", "object_store", "paste", - "ring", "seq-macro", "simdutf8", "snap", @@ -8998,7 +8974,7 @@ dependencies = [ "session", "snafu 0.8.6", "sql", - "sqlparser 0.55.0-greptime", + "sqlparser", "store-api", "table", ] @@ -9095,9 +9071,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "permutation" @@ -9136,7 +9112,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -9156,7 +9132,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset 0.4.2", - "indexmap 2.10.0", + "indexmap 2.11.4", ] [[package]] @@ -9166,26 +9142,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset 0.5.7", - "indexmap 2.10.0", + "indexmap 2.11.4", ] [[package]] name = "petgraph" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset 0.5.7", "hashbrown 0.15.4", - "indexmap 2.10.0", + "indexmap 2.11.4", "serde", ] [[package]] name = "pgwire" version = "0.33.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f58d371668e6151da16be31308989058156c01257277ea8af0f97524e87cfa31" +source = "git+https://github.com/sunng87/pgwire.git?rev=658e37936da5039dfb8495af9cac7c511dbfaf90#658e37936da5039dfb8495af9cac7c511dbfaf90" dependencies = [ "async-trait", "base64 0.22.1", @@ -9201,6 +9176,8 @@ dependencies = [ "ring", "rust_decimal", "rustls-pki-types", + "serde", + "serde_json", "stringprep", "thiserror 2.0.17", "tokio", @@ -9303,7 +9280,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -9440,7 +9417,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d77244ce2d584cd84f6a15f86195b8c9b2a0dfbfd817c09e0464244091a58ed" dependencies = [ "base64 0.22.1", - "indexmap 2.10.0", + "indexmap 2.11.4", "quick-xml 0.37.5", "serde", "time", @@ -9669,7 +9646,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a" dependencies = [ "proc-macro2", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -9710,7 +9687,7 @@ dependencies = [ "proc-macro-error-attr2", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -9862,7 +9839,7 @@ dependencies = [ "prost 0.12.6", "prost-types 0.12.6", "regex", - "syn 2.0.104", + "syn 2.0.106", "tempfile", ] @@ -9882,7 +9859,7 @@ dependencies = [ "prost 0.13.5", "prost-types 0.13.5", "regex", - "syn 2.0.104", + "syn 2.0.106", "tempfile", ] @@ -9909,7 +9886,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -9922,7 +9899,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -10150,6 +10127,7 @@ dependencies = [ "num-traits", "object-store", "once_cell", + "parking_lot 0.12.4", "partition", "paste", "pretty_assertions", @@ -10164,7 +10142,7 @@ dependencies = [ "session", "snafu 0.8.6", "sql", - "sqlparser 0.55.0-greptime", + "sqlparser", "store-api", "substrait 0.18.0", "table", @@ -10251,9 +10229,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.40" +version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] @@ -10440,7 +10418,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -10478,7 +10456,7 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -10490,7 +10468,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata 0.4.9", - "regex-syntax 0.8.5", + "regex-syntax 0.8.7", ] [[package]] @@ -10510,7 +10488,7 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.5", + "regex-syntax 0.8.7", ] [[package]] @@ -10520,11 +10498,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c11639076bf147be211b90e47790db89f4c22b6c8a9ca6e960833869da67166" dependencies = [ "aho-corasick", - "indexmap 2.10.0", + "indexmap 2.11.4", "itertools 0.13.0", "nohash", "regex", - "regex-syntax 0.8.5", + "regex-syntax 0.8.7", ] [[package]] @@ -10541,9 +10519,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "c3160422bbd54dd5ecfdca71e5fd59b7b8fe2b1697ab2baf64f6d05dcc66d298" [[package]] name = "regress" @@ -10867,7 +10845,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.104", + "syn 2.0.106", "unicode-ident", ] @@ -10879,7 +10857,7 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14" dependencies = [ "quote", "rand 0.8.5", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -10902,7 +10880,7 @@ dependencies = [ "proc-macro2", "quote", "rust-embed-utils", - "syn 2.0.104", + "syn 2.0.106", "walkdir", ] @@ -11214,30 +11192,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "schemars" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" -dependencies = [ - "dyn-clone", - "ref-cast", - "serde", - "serde_json", -] - -[[package]] -name = "schemars" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1375ba8ef45a6f15d83fa8748f1079428295d403d6ea991d09ab100155fbc06d" -dependencies = [ - "dyn-clone", - "ref-cast", - "serde", - "serde_json", -] - [[package]] name = "schemars_derive" version = "0.8.22" @@ -11247,7 +11201,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11293,7 +11247,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "thiserror 2.0.17", ] @@ -11312,7 +11266,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11403,7 +11357,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11414,7 +11368,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11447,7 +11401,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11468,7 +11422,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11485,19 +11439,15 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.14.0" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" +checksum = "21e47d95bc83ed33b2ecf84f4187ad1ab9685d18ff28db000c99deac8ce180e3" dependencies = [ - "base64 0.22.1", + "base64 0.21.7", "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.10.0", - "schemars 0.9.0", - "schemars 1.0.3", "serde", - "serde_derive", "serde_json", "serde_with_macros", "time", @@ -11505,14 +11455,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.14.0" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" +checksum = "ea3cee93715c2e266b9338b7544da68a9f24e227722ba482bd1c024367c77c65" dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11521,7 +11471,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.4", "itoa", "ryu", "serde", @@ -11585,7 +11535,7 @@ dependencies = [ "humantime", "humantime-serde", "hyper 1.6.0", - "indexmap 2.10.0", + "indexmap 2.11.4", "influxdb_line_protocol", "itertools 0.14.0", "json5", @@ -11777,6 +11727,12 @@ dependencies = [ "wide", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + [[package]] name = "simd-json" version = "0.15.1" @@ -11909,7 +11865,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -11996,6 +11952,7 @@ dependencies = [ "common-macro", "common-query", "common-sql", + "common-telemetry", "common-time", "datafusion", "datafusion-common", @@ -12014,7 +11971,7 @@ dependencies = [ "serde", "serde_json", "snafu 0.8.6", - "sqlparser 0.55.0-greptime", + "sqlparser", "sqlparser_derive 0.1.1", "store-api", "table", @@ -12071,26 +12028,14 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.55.0-greptime" -source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea#39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea" +version = "0.58.0" +source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=4b519a5caa95472cc3988f5556813a583dd35af1#4b519a5caa95472cc3988f5556813a583dd35af1" dependencies = [ "lazy_static", "log", "recursive", "regex", "serde", - "sqlparser 0.55.0", - "sqlparser_derive 0.3.0-greptime", -] - -[[package]] -name = "sqlparser" -version = "0.55.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4521174166bac1ff04fe16ef4524c70144cd29682a45978978ca3d7f4e0be11" -dependencies = [ - "log", - "recursive", "sqlparser_derive 0.3.0", ] @@ -12105,25 +12050,14 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "sqlparser_derive" -version = "0.3.0-greptime" -source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea#39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.104", -] - [[package]] name = "sqlparser_derive" version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=4b519a5caa95472cc3988f5556813a583dd35af1#4b519a5caa95472cc3988f5556813a583dd35af1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -12158,7 +12092,7 @@ dependencies = [ "futures-util", "hashbrown 0.15.4", "hashlink", - "indexmap 2.10.0", + "indexmap 2.11.4", "log", "memchr", "once_cell", @@ -12186,7 +12120,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -12209,7 +12143,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.104", + "syn 2.0.106", "tokio", "url", ] @@ -12354,6 +12288,7 @@ dependencies = [ "common-options", "common-procedure", "common-query", + "common-stat", "common-telemetry", "common-time", "common-version", @@ -12362,6 +12297,7 @@ dependencies = [ "file-engine", "flow", "frontend", + "hostname 0.4.1", "log-store", "mito2", "query", @@ -12410,7 +12346,7 @@ dependencies = [ "serde", "serde_json", "snafu 0.8.6", - "sqlparser 0.55.0-greptime", + "sqlparser", "strum 0.27.1", "tokio", "uuid", @@ -12481,6 +12417,12 @@ dependencies = [ "strum_macros 0.25.3", ] +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + [[package]] name = "strum" version = "0.27.1" @@ -12500,7 +12442,20 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.104", + "syn 2.0.106", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.106", ] [[package]] @@ -12513,7 +12468,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -12559,12 +12514,12 @@ dependencies = [ "prost 0.13.5", "prost-build 0.13.5", "prost-types 0.13.5", - "schemars 0.8.22", + "schemars", "semver", "serde", "serde_json", "serde_yaml", - "syn 2.0.104", + "syn 2.0.106", "typify 0.1.0", "walkdir", ] @@ -12584,12 +12539,12 @@ dependencies = [ "prost-build 0.13.5", "prost-types 0.13.5", "regress 0.10.3", - "schemars 0.8.22", + "schemars", "semver", "serde", "serde_json", "serde_yaml", - "syn 2.0.104", + "syn 2.0.106", "typify 0.4.2", "walkdir", ] @@ -12636,9 +12591,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.104" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", @@ -12668,7 +12623,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -12730,7 +12685,7 @@ dependencies = [ "serde", "serde_json", "snafu 0.8.6", - "sqlparser 0.55.0-greptime", + "sqlparser", "store-api", "tokio", "tokio-util", @@ -12840,7 +12795,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" dependencies = [ "byteorder", - "regex-syntax 0.8.5", + "regex-syntax 0.8.7", "utf8-ranges", ] @@ -12994,13 +12949,13 @@ dependencies = [ "rand 0.9.1", "rand_chacha 0.9.0", "reqwest", - "schemars 0.8.22", + "schemars", "serde", "serde_json", "serde_yaml", "snafu 0.8.6", "sql", - "sqlparser 0.55.0-greptime", + "sqlparser", "sqlx", "store-api", "strum 0.27.1", @@ -13131,7 +13086,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -13142,7 +13097,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -13322,7 +13277,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -13455,7 +13410,7 @@ version = "0.8.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.4", "serde", "serde_spanned", "toml_datetime", @@ -13477,7 +13432,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.4", "toml_datetime", "winnow 0.5.40", ] @@ -13488,7 +13443,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.10.0", + "indexmap 2.11.4", "serde", "serde_spanned", "toml_datetime", @@ -13574,7 +13529,7 @@ dependencies = [ "prost-build 0.13.5", "prost-types 0.13.5", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -13619,7 +13574,7 @@ dependencies = [ "futures-core", "futures-util", "hdrhistogram", - "indexmap 2.10.0", + "indexmap 2.11.4", "pin-project-lite", "slab", "sync_wrapper 1.0.2", @@ -13724,7 +13679,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -13848,7 +13803,7 @@ checksum = "35f5380909ffc31b4de4f4bdf96b877175a016aa2ca98cee39fcfd8c4d53d952" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -13882,11 +13837,11 @@ dependencies = [ "proc-macro2", "quote", "regress 0.9.1", - "schemars 0.8.22", + "schemars", "semver", "serde", "serde_json", - "syn 2.0.104", + "syn 2.0.106", "thiserror 1.0.69", "unicode-ident", ] @@ -13902,11 +13857,11 @@ dependencies = [ "proc-macro2", "quote", "regress 0.10.3", - "schemars 0.8.22", + "schemars", "semver", "serde", "serde_json", - "syn 2.0.104", + "syn 2.0.106", "thiserror 2.0.17", "unicode-ident", ] @@ -13919,12 +13874,12 @@ checksum = "f8e6491896e955692d68361c68db2b263e3bec317ec0b684e0e2fa882fb6e31e" dependencies = [ "proc-macro2", "quote", - "schemars 0.8.22", + "schemars", "semver", "serde", "serde_json", "serde_tokenstream", - "syn 2.0.104", + "syn 2.0.106", "typify-impl 0.1.0", ] @@ -13936,12 +13891,12 @@ checksum = "7560adf816a1e8dad7c63d8845ef6e31e673e39eab310d225636779230cbedeb" dependencies = [ "proc-macro2", "quote", - "schemars 0.8.22", + "schemars", "semver", "serde", "serde_json", "serde_tokenstream", - "syn 2.0.104", + "syn 2.0.106", "typify-impl 0.4.2", ] @@ -14086,13 +14041,14 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.4" +version = "2.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" dependencies = [ "form_urlencoded", "idna", "percent-encoding", + "serde", ] [[package]] @@ -14127,9 +14083,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.17.0" +version = "1.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" dependencies = [ "getrandom 0.3.3", "js-sys", @@ -14175,7 +14131,7 @@ dependencies = [ "proc-macro-crate 1.3.1", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "variadics", ] @@ -14260,7 +14216,7 @@ dependencies = [ "hostname 0.4.1", "iana-time-zone", "idna", - "indexmap 2.10.0", + "indexmap 2.11.4", "indoc", "influxdb-line-protocol", "itertools 0.14.0", @@ -14389,7 +14345,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "wasm-bindgen-shared", ] @@ -14424,7 +14380,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -14570,7 +14526,7 @@ dependencies = [ "windows-collections", "windows-core 0.61.2", "windows-future", - "windows-link", + "windows-link 0.1.3", "windows-numerics", ] @@ -14603,7 +14559,7 @@ checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement 0.60.0", "windows-interface 0.59.1", - "windows-link", + "windows-link 0.1.3", "windows-result 0.3.4", "windows-strings", ] @@ -14615,7 +14571,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ "windows-core 0.61.2", - "windows-link", + "windows-link 0.1.3", "windows-threading", ] @@ -14627,7 +14583,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -14638,7 +14594,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -14649,7 +14605,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -14660,7 +14616,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -14669,6 +14625,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-numerics" version = "0.2.0" @@ -14676,7 +14638,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" dependencies = [ "windows-core 0.61.2", - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -14694,7 +14656,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -14703,7 +14665,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -14770,7 +14732,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" dependencies = [ - "windows-link", + "windows-link 0.1.3", ] [[package]] @@ -15028,7 +14990,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "synstructure", ] @@ -15049,7 +15011,7 @@ checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -15069,7 +15031,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", "synstructure", ] @@ -15090,7 +15052,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] @@ -15123,7 +15085,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn 2.0.106", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 1185c9b2d573..46558b510098 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -99,12 +99,12 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } # See for more detaiils: https://github.com/rust-lang/cargo/issues/11329 ahash = { version = "0.8", features = ["compile-time-rng"] } aquamarine = "0.6" -arrow = { version = "56.0", features = ["prettyprint"] } -arrow-array = { version = "56.0", default-features = false, features = ["chrono-tz"] } -arrow-buffer = "56.0" -arrow-flight = "56.0" -arrow-ipc = { version = "56.0", default-features = false, features = ["lz4", "zstd"] } -arrow-schema = { version = "56.0", features = ["serde"] } +arrow = { version = "56.2", features = ["prettyprint"] } +arrow-array = { version = "56.2", default-features = false, features = ["chrono-tz"] } +arrow-buffer = "56.2" +arrow-flight = "56.2" +arrow-ipc = { version = "56.2", default-features = false, features = ["lz4", "zstd"] } +arrow-schema = { version = "56.2", features = ["serde"] } async-stream = "0.3" async-trait = "0.1" # Remember to update axum-extra, axum-macros when updating axum @@ -123,18 +123,18 @@ clap = { version = "4.4", features = ["derive"] } config = "0.13.0" crossbeam-utils = "0.8" dashmap = "6.1" -datafusion = "49" -datafusion-common = "49" -datafusion-expr = "49" -datafusion-functions = "49" -datafusion-functions-aggregate-common = "49" -datafusion-optimizer = "49" -datafusion-orc = { git = "https://github.com/GreptimeTeam/datafusion-orc", rev = "a0a5f902158f153119316eaeec868cff3fc8a99d" } -datafusion-pg-catalog = { git = "https://github.com/datafusion-contrib/datafusion-postgres", rev = "3d1b7c7d5b82dd49bafc2803259365e633f654fa" } -datafusion-physical-expr = "49" -datafusion-physical-plan = "49" -datafusion-sql = "49" -datafusion-substrait = "49" +datafusion = "50" +datafusion-common = "50" +datafusion-expr = "50" +datafusion-functions = "50" +datafusion-functions-aggregate-common = "50" +datafusion-optimizer = "50" +datafusion-orc = { git = "https://github.com/datafusion-contrib/datafusion-orc", tag = "v0.5.0" } +datafusion-pg-catalog = "0.11" +datafusion-physical-expr = "50" +datafusion-physical-plan = "50" +datafusion-sql = "50" +datafusion-substrait = "50" deadpool = "0.12" deadpool-postgres = "0.14" derive_builder = "0.20" @@ -147,7 +147,7 @@ etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62d fst = "0.4.7" futures = "0.3" futures-util = "0.3" -greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "d75496d5d09dedcd0edcade57ccf0a522f4393ae" } +greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "69a6089933daa573c96808ec4bbc48f447ec6e8c" } hex = "0.4" http = "1" humantime = "2.1" @@ -180,7 +180,7 @@ otel-arrow-rust = { git = "https://github.com/GreptimeTeam/otel-arrow", rev = "2 "server", ] } parking_lot = "0.12" -parquet = { version = "56.0", default-features = false, features = ["arrow", "async", "object_store"] } +parquet = { version = "56.2", default-features = false, features = ["arrow", "async", "object_store"] } paste = "1.0" pin-project = "1.0" pretty_assertions = "1.4.0" @@ -207,6 +207,7 @@ rstest_reuse = "0.7" rust_decimal = "1.33" rustc-hash = "2.0" # It is worth noting that we should try to avoid using aws-lc-rs until it can be compiled on various platforms. +hostname = "0.4.0" rustls = { version = "0.23.25", default-features = false } sea-query = "0.32" serde = { version = "1.0", features = ["derive"] } @@ -216,10 +217,7 @@ simd-json = "0.15" similar-asserts = "1.6.0" smallvec = { version = "1", features = ["serde"] } snafu = "0.8" -sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea", features = [ - "visitor", - "serde", -] } # branch = "v0.55.x" +sqlparser = { version = "0.58.0", default-features = false, features = ["std", "visitor", "serde"] } sqlx = { version = "0.8", features = [ "runtime-tokio-rustls", "mysql", @@ -321,16 +319,19 @@ git = "https://github.com/GreptimeTeam/greptime-meter.git" rev = "5618e779cf2bb4755b499c630fba4c35e91898cb" [patch.crates-io] -datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" } -datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" } -datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" } -datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" } -datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" } -datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" } -datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" } -datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" } -datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" } -datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" } +datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-physical-expr-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-datasource = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "78041eabebf014b8661f50d5ce8b3ae2f70105f7" } +sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "4b519a5caa95472cc3988f5556813a583dd35af1" } # branch = "v0.58.x" [profile.release] debug = 1 diff --git a/Makefile b/Makefile index d94426554e2a..a200244029f1 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked IMAGE_REGISTRY ?= docker.io IMAGE_NAMESPACE ?= greptime IMAGE_TAG ?= latest -DEV_BUILDER_IMAGE_TAG ?= 2025-05-19-f55023f3-20250829091211 +DEV_BUILDER_IMAGE_TAG ?= 2025-10-01-8fe17d43-20251011080129 BUILDX_MULTI_PLATFORM_BUILD ?= false BUILDX_BUILDER_NAME ?= gtbuilder BASE_IMAGE ?= ubuntu diff --git a/src/api/src/helper.rs b/src/api/src/helper.rs index a091d997997b..0b70ea865d6b 100644 --- a/src/api/src/helper.rs +++ b/src/api/src/helper.rs @@ -13,7 +13,6 @@ // limitations under the License. use std::collections::HashSet; -use std::sync::Arc; use common_decimal::Decimal128; use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION}; @@ -21,30 +20,22 @@ use common_time::time::Time; use common_time::timestamp::TimeUnit; use common_time::{Date, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp}; use datatypes::prelude::{ConcreteDataType, ValueRef}; -use datatypes::scalars::ScalarVector; use datatypes::types::{ - Int8Type, Int16Type, IntervalType, StructField, StructType, TimeType, TimestampType, UInt8Type, - UInt16Type, + IntervalType, JsonFormat, StructField, StructType, TimeType, TimestampType, }; use datatypes::value::{ ListValue, ListValueRef, OrderedF32, OrderedF64, StructValue, StructValueRef, Value, }; -use datatypes::vectors::{ - BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector, - Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector, - IntervalYearMonthVector, PrimitiveVector, StringVector, TimeMicrosecondVector, - TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector, - TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt32Vector, - UInt64Vector, VectorRef, -}; +use datatypes::vectors::VectorRef; use greptime_proto::v1::column_data_type_extension::TypeExt; use greptime_proto::v1::ddl_request::Expr; use greptime_proto::v1::greptime_request::Request; use greptime_proto::v1::query_request::Query; use greptime_proto::v1::value::ValueData; use greptime_proto::v1::{ - self, ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonTypeExtension, - ListTypeExtension, QueryRequest, Row, SemanticType, StructTypeExtension, VectorTypeExtension, + self, ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonNativeTypeExtension, + JsonTypeExtension, ListTypeExtension, QueryRequest, Row, SemanticType, StructTypeExtension, + VectorTypeExtension, }; use paste::paste; use snafu::prelude::*; @@ -116,7 +107,30 @@ impl From for ConcreteDataType { ConcreteDataType::binary_datatype() } } - ColumnDataType::Json => ConcreteDataType::json_datatype(), + ColumnDataType::Json => { + let type_ext = datatype_wrapper + .datatype_ext + .as_ref() + .and_then(|datatype_ext| datatype_ext.type_ext.as_ref()); + match type_ext { + Some(TypeExt::JsonType(_)) => { + // legacy json type + ConcreteDataType::json_datatype() + } + Some(TypeExt::JsonNativeType(type_ext)) => { + // native json type + let inner_type = ColumnDataTypeWrapper { + datatype: type_ext.datatype(), + datatype_ext: type_ext.datatype_extension.clone().map(|d| *d), + }; + ConcreteDataType::json_native_datatype(inner_type.into()) + } + _ => { + // invalid state, type extension is missing or invalid + ConcreteDataType::null_datatype() + } + } + } ColumnDataType::String => ConcreteDataType::string_datatype(), ColumnDataType::Date => ConcreteDataType::date_datatype(), ColumnDataType::Datetime => ConcreteDataType::timestamp_microsecond_datatype(), @@ -383,9 +397,28 @@ impl TryFrom for ColumnDataTypeWrapper { })), }) } - ColumnDataType::Json => datatype.as_json().map(|_| ColumnDataTypeExtension { - type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())), - }), + ColumnDataType::Json => { + if let Some(json_type) = datatype.as_json() { + match &json_type.format { + JsonFormat::Jsonb => Some(ColumnDataTypeExtension { + type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())), + }), + JsonFormat::Native(inner) => { + let inner_type = ColumnDataTypeWrapper::try_from(*inner.clone())?; + Some(ColumnDataTypeExtension { + type_ext: Some(TypeExt::JsonNativeType(Box::new( + JsonNativeTypeExtension { + datatype: inner_type.datatype.into(), + datatype_extension: inner_type.datatype_ext.map(Box::new), + }, + ))), + }) + } + } + } else { + None + } + } ColumnDataType::Vector => { datatype .as_vector() @@ -549,7 +582,10 @@ pub fn values_with_capacity(datatype: ColumnDataType, capacity: usize) -> Values ..Default::default() }, ColumnDataType::Json => Values { + // TODO(sunng87): remove this when we finally sunset legacy jsonb string_values: Vec::with_capacity(capacity), + // for native json + json_values: Vec::with_capacity(capacity), ..Default::default() }, ColumnDataType::Vector => Values { @@ -762,256 +798,23 @@ pub fn pb_value_to_value_ref<'a>( }; ValueRef::Struct(struct_value_ref) } - } -} - -pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) -> VectorRef { - match data_type { - ConcreteDataType::Boolean(_) => Arc::new(BooleanVector::from(values.bool_values)), - ConcreteDataType::Int8(_) => Arc::new(PrimitiveVector::::from_iter_values( - values.i8_values.into_iter().map(|x| x as i8), - )), - ConcreteDataType::Int16(_) => Arc::new(PrimitiveVector::::from_iter_values( - values.i16_values.into_iter().map(|x| x as i16), - )), - ConcreteDataType::Int32(_) => Arc::new(Int32Vector::from_vec(values.i32_values)), - ConcreteDataType::Int64(_) => Arc::new(Int64Vector::from_vec(values.i64_values)), - ConcreteDataType::UInt8(_) => Arc::new(PrimitiveVector::::from_iter_values( - values.u8_values.into_iter().map(|x| x as u8), - )), - ConcreteDataType::UInt16(_) => Arc::new(PrimitiveVector::::from_iter_values( - values.u16_values.into_iter().map(|x| x as u16), - )), - ConcreteDataType::UInt32(_) => Arc::new(UInt32Vector::from_vec(values.u32_values)), - ConcreteDataType::UInt64(_) => Arc::new(UInt64Vector::from_vec(values.u64_values)), - ConcreteDataType::Float32(_) => Arc::new(Float32Vector::from_vec(values.f32_values)), - ConcreteDataType::Float64(_) => Arc::new(Float64Vector::from_vec(values.f64_values)), - ConcreteDataType::Binary(_) => Arc::new(BinaryVector::from(values.binary_values)), - ConcreteDataType::String(_) => Arc::new(StringVector::from_vec(values.string_values)), - ConcreteDataType::Date(_) => Arc::new(DateVector::from_vec(values.date_values)), - ConcreteDataType::Timestamp(unit) => match unit { - TimestampType::Second(_) => Arc::new(TimestampSecondVector::from_vec( - values.timestamp_second_values, - )), - TimestampType::Millisecond(_) => Arc::new(TimestampMillisecondVector::from_vec( - values.timestamp_millisecond_values, - )), - TimestampType::Microsecond(_) => Arc::new(TimestampMicrosecondVector::from_vec( - values.timestamp_microsecond_values, - )), - TimestampType::Nanosecond(_) => Arc::new(TimestampNanosecondVector::from_vec( - values.timestamp_nanosecond_values, - )), - }, - ConcreteDataType::Time(unit) => match unit { - TimeType::Second(_) => Arc::new(TimeSecondVector::from_iter_values( - values.time_second_values.iter().map(|x| *x as i32), - )), - TimeType::Millisecond(_) => Arc::new(TimeMillisecondVector::from_iter_values( - values.time_millisecond_values.iter().map(|x| *x as i32), - )), - TimeType::Microsecond(_) => Arc::new(TimeMicrosecondVector::from_vec( - values.time_microsecond_values, - )), - TimeType::Nanosecond(_) => Arc::new(TimeNanosecondVector::from_vec( - values.time_nanosecond_values, - )), - }, - - ConcreteDataType::Interval(unit) => match unit { - IntervalType::YearMonth(_) => Arc::new(IntervalYearMonthVector::from_vec( - values.interval_year_month_values, - )), - IntervalType::DayTime(_) => Arc::new(IntervalDayTimeVector::from_iter_values( - values - .interval_day_time_values - .iter() - .map(|x| IntervalDayTime::from_i64(*x).into()), - )), - IntervalType::MonthDayNano(_) => { - Arc::new(IntervalMonthDayNanoVector::from_iter_values( - values - .interval_month_day_nano_values - .iter() - .map(|x| IntervalMonthDayNano::new(x.months, x.days, x.nanoseconds).into()), - )) - } - }, - ConcreteDataType::Decimal128(d) => Arc::new(Decimal128Vector::from_values( - values.decimal128_values.iter().map(|x| { - Decimal128::from_value_precision_scale(x.hi, x.lo, d.precision(), d.scale()).into() - }), - )), - ConcreteDataType::Vector(_) => Arc::new(BinaryVector::from_vec(values.binary_values)), - ConcreteDataType::Null(_) - | ConcreteDataType::List(_) - | ConcreteDataType::Struct(_) - | ConcreteDataType::Dictionary(_) - | ConcreteDataType::Duration(_) - | ConcreteDataType::Json(_) => { - unreachable!() - } - } -} -pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec { - match data_type { - ConcreteDataType::Int64(_) => values - .i64_values - .into_iter() - .map(|val| val.into()) - .collect(), - ConcreteDataType::Float64(_) => values - .f64_values - .into_iter() - .map(|val| val.into()) - .collect(), - ConcreteDataType::String(_) => values - .string_values - .into_iter() - .map(|val| val.into()) - .collect(), - ConcreteDataType::Boolean(_) => values - .bool_values - .into_iter() - .map(|val| val.into()) - .collect(), - ConcreteDataType::Int8(_) => values - .i8_values - .into_iter() - // Safety: Since i32 only stores i8 data here, so i32 as i8 is safe. - .map(|val| (val as i8).into()) - .collect(), - ConcreteDataType::Int16(_) => values - .i16_values - .into_iter() - // Safety: Since i32 only stores i16 data here, so i32 as i16 is safe. - .map(|val| (val as i16).into()) - .collect(), - ConcreteDataType::Int32(_) => values - .i32_values - .into_iter() - .map(|val| val.into()) - .collect(), - ConcreteDataType::UInt8(_) => values - .u8_values - .into_iter() - // Safety: Since i32 only stores u8 data here, so i32 as u8 is safe. - .map(|val| (val as u8).into()) - .collect(), - ConcreteDataType::UInt16(_) => values - .u16_values - .into_iter() - // Safety: Since i32 only stores u16 data here, so i32 as u16 is safe. - .map(|val| (val as u16).into()) - .collect(), - ConcreteDataType::UInt32(_) => values - .u32_values - .into_iter() - .map(|val| val.into()) - .collect(), - ConcreteDataType::UInt64(_) => values - .u64_values - .into_iter() - .map(|val| val.into()) - .collect(), - ConcreteDataType::Float32(_) => values - .f32_values - .into_iter() - .map(|val| val.into()) - .collect(), - ConcreteDataType::Binary(_) => values - .binary_values - .into_iter() - .map(|val| val.into()) - .collect(), - ConcreteDataType::Date(_) => values - .date_values - .into_iter() - .map(|v| Value::Date(v.into())) - .collect(), - ConcreteDataType::Timestamp(TimestampType::Second(_)) => values - .timestamp_second_values - .into_iter() - .map(|v| Value::Timestamp(Timestamp::new_second(v))) - .collect(), - ConcreteDataType::Timestamp(TimestampType::Millisecond(_)) => values - .timestamp_millisecond_values - .into_iter() - .map(|v| Value::Timestamp(Timestamp::new_millisecond(v))) - .collect(), - ConcreteDataType::Timestamp(TimestampType::Microsecond(_)) => values - .timestamp_microsecond_values - .into_iter() - .map(|v| Value::Timestamp(Timestamp::new_microsecond(v))) - .collect(), - ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)) => values - .timestamp_nanosecond_values - .into_iter() - .map(|v| Value::Timestamp(Timestamp::new_nanosecond(v))) - .collect(), - ConcreteDataType::Time(TimeType::Second(_)) => values - .time_second_values - .into_iter() - .map(|v| Value::Time(Time::new_second(v))) - .collect(), - ConcreteDataType::Time(TimeType::Millisecond(_)) => values - .time_millisecond_values - .into_iter() - .map(|v| Value::Time(Time::new_millisecond(v))) - .collect(), - ConcreteDataType::Time(TimeType::Microsecond(_)) => values - .time_microsecond_values - .into_iter() - .map(|v| Value::Time(Time::new_microsecond(v))) - .collect(), - ConcreteDataType::Time(TimeType::Nanosecond(_)) => values - .time_nanosecond_values - .into_iter() - .map(|v| Value::Time(Time::new_nanosecond(v))) - .collect(), + ValueData::JsonValue(inner_value) => { + let json_datatype_ext = datatype_ext + .as_ref() + .and_then(|ext| { + if let Some(TypeExt::JsonNativeType(l)) = &ext.type_ext { + Some(l) + } else { + None + } + }) + .expect("json value must contain datatype ext"); - ConcreteDataType::Interval(IntervalType::YearMonth(_)) => values - .interval_year_month_values - .into_iter() - .map(|v| Value::IntervalYearMonth(IntervalYearMonth::from_i32(v))) - .collect(), - ConcreteDataType::Interval(IntervalType::DayTime(_)) => values - .interval_day_time_values - .into_iter() - .map(|v| Value::IntervalDayTime(IntervalDayTime::from_i64(v))) - .collect(), - ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => values - .interval_month_day_nano_values - .into_iter() - .map(|v| { - Value::IntervalMonthDayNano(IntervalMonthDayNano::new( - v.months, - v.days, - v.nanoseconds, - )) - }) - .collect(), - ConcreteDataType::Decimal128(d) => values - .decimal128_values - .into_iter() - .map(|v| { - Value::Decimal128(Decimal128::from_value_precision_scale( - v.hi, - v.lo, - d.precision(), - d.scale(), - )) - }) - .collect(), - ConcreteDataType::Vector(_) => values.binary_values.into_iter().map(|v| v.into()).collect(), - ConcreteDataType::Null(_) - | ConcreteDataType::List(_) - | ConcreteDataType::Struct(_) - | ConcreteDataType::Dictionary(_) - | ConcreteDataType::Duration(_) - | ConcreteDataType::Json(_) => { - unreachable!() + ValueRef::Json(Box::new(pb_value_to_value_ref( + inner_value, + json_datatype_ext.datatype_extension.as_deref(), + ))) } } } @@ -1133,6 +936,9 @@ pub fn to_proto_value(value: Value) -> v1::Value { items: convert_struct_to_pb_values(struct_value), })), }, + Value::Json(v) => v1::Value { + value_data: Some(ValueData::JsonValue(Box::new(to_proto_value(*v)))), + }, Value::Duration(_) => v1::Value { value_data: None }, } } @@ -1187,6 +993,7 @@ pub fn proto_value_type(value: &v1::Value) -> Option { ValueData::Decimal128Value(_) => ColumnDataType::Decimal128, ValueData::ListValue(_) => ColumnDataType::List, ValueData::StructValue(_) => ColumnDataType::Struct, + ValueData::JsonValue(_) => ColumnDataType::Json, }; Some(value_type) } @@ -1257,6 +1064,9 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue { .collect(); Some(ValueData::StructValue(v1::StructValue { items })) } + Value::Json(inner_value) => Some(ValueData::JsonValue(Box::new(value_to_grpc_value( + *inner_value, + )))), Value::Duration(_) => unreachable!(), }, } @@ -1350,13 +1160,11 @@ mod tests { use std::sync::Arc; use common_time::interval::IntervalUnit; - use datatypes::types::{ - Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType, - TimeMillisecondType, TimeSecondType, TimestampMillisecondType, TimestampSecondType, - UInt32Type, + use datatypes::scalars::ScalarVector; + use datatypes::types::{Int8Type, Int32Type, UInt8Type, UInt32Type}; + use datatypes::vectors::{ + BooleanVector, DateVector, Float32Vector, PrimitiveVector, StringVector, }; - use datatypes::vectors::BooleanVector; - use paste::paste; use super::*; use crate::v1::Column; @@ -1446,6 +1254,10 @@ mod tests { let values = values_with_capacity(ColumnDataType::Struct, 2); let values = values.struct_values; assert_eq!(2, values.capacity()); + + let values = values_with_capacity(ColumnDataType::Json, 2); + assert_eq!(2, values.json_values.capacity()); + assert_eq!(2, values.string_values.capacity()); } #[test] @@ -1569,6 +1381,54 @@ mod tests { ]) .into() ); + assert_eq!( + ConcreteDataType::json_native_datatype(ConcreteDataType::struct_datatype( + struct_type.clone() + )), + ColumnDataTypeWrapper::new( + ColumnDataType::Json, + Some(ColumnDataTypeExtension { + type_ext: Some(TypeExt::JsonNativeType(Box::new(JsonNativeTypeExtension { + datatype: ColumnDataType::Struct.into(), + datatype_extension: Some(Box::new(ColumnDataTypeExtension { + type_ext: Some(TypeExt::StructType(StructTypeExtension { + fields: vec![ + v1::StructField { + name: "id".to_string(), + datatype: ColumnDataTypeWrapper::int64_datatype() + .datatype() + .into(), + datatype_extension: None + }, + v1::StructField { + name: "name".to_string(), + datatype: ColumnDataTypeWrapper::string_datatype() + .datatype() + .into(), + datatype_extension: None + }, + v1::StructField { + name: "age".to_string(), + datatype: ColumnDataTypeWrapper::int32_datatype() + .datatype() + .into(), + datatype_extension: None + }, + v1::StructField { + name: "address".to_string(), + datatype: ColumnDataTypeWrapper::string_datatype() + .datatype() + .into(), + datatype_extension: None + } + ] + })) + })) + }))) + }) + ) + .into() + ) } #[test] @@ -1694,7 +1554,71 @@ mod tests { ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()), true ) ])).try_into().expect("Failed to create column datatype from Struct(StructType { fields: [StructField { name: \"a\", data_type: Int64(Int64Type) }, StructField { name: \"a.a\", data_type: List(ListType { item_type: String(StringType) }) }] })") - ) + ); + + let struct_type = StructType::new(vec![ + StructField::new("id".to_string(), ConcreteDataType::int64_datatype(), true), + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int32_datatype(), true), + StructField::new( + "address".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + ]); + assert_eq!( + ColumnDataTypeWrapper::new( + ColumnDataType::Json, + Some(ColumnDataTypeExtension { + type_ext: Some(TypeExt::JsonNativeType(Box::new(JsonNativeTypeExtension { + datatype: ColumnDataType::Struct.into(), + datatype_extension: Some(Box::new(ColumnDataTypeExtension { + type_ext: Some(TypeExt::StructType(StructTypeExtension { + fields: vec![ + v1::StructField { + name: "id".to_string(), + datatype: ColumnDataTypeWrapper::int64_datatype() + .datatype() + .into(), + datatype_extension: None + }, + v1::StructField { + name: "name".to_string(), + datatype: ColumnDataTypeWrapper::string_datatype() + .datatype() + .into(), + datatype_extension: None + }, + v1::StructField { + name: "age".to_string(), + datatype: ColumnDataTypeWrapper::int32_datatype() + .datatype() + .into(), + datatype_extension: None + }, + v1::StructField { + name: "address".to_string(), + datatype: ColumnDataTypeWrapper::string_datatype() + .datatype() + .into(), + datatype_extension: None + } + ] + })) + })) + }))) + }) + ), + ConcreteDataType::json_native_datatype(ConcreteDataType::struct_datatype( + struct_type.clone() + )) + .try_into() + .expect("failed to convert json type") + ); } #[test] @@ -1706,269 +1630,6 @@ mod tests { assert_eq!(interval.nanoseconds, 3); } - #[test] - fn test_convert_timestamp_values() { - // second - let actual = pb_values_to_values( - &ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType)), - Values { - timestamp_second_values: vec![1_i64, 2_i64, 3_i64], - ..Default::default() - }, - ); - let expect = vec![ - Value::Timestamp(Timestamp::new_second(1_i64)), - Value::Timestamp(Timestamp::new_second(2_i64)), - Value::Timestamp(Timestamp::new_second(3_i64)), - ]; - assert_eq!(expect, actual); - - // millisecond - let actual = pb_values_to_values( - &ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType)), - Values { - timestamp_millisecond_values: vec![1_i64, 2_i64, 3_i64], - ..Default::default() - }, - ); - let expect = vec![ - Value::Timestamp(Timestamp::new_millisecond(1_i64)), - Value::Timestamp(Timestamp::new_millisecond(2_i64)), - Value::Timestamp(Timestamp::new_millisecond(3_i64)), - ]; - assert_eq!(expect, actual); - } - - #[test] - fn test_convert_time_values() { - // second - let actual = pb_values_to_values( - &ConcreteDataType::Time(TimeType::Second(TimeSecondType)), - Values { - time_second_values: vec![1_i64, 2_i64, 3_i64], - ..Default::default() - }, - ); - let expect = vec![ - Value::Time(Time::new_second(1_i64)), - Value::Time(Time::new_second(2_i64)), - Value::Time(Time::new_second(3_i64)), - ]; - assert_eq!(expect, actual); - - // millisecond - let actual = pb_values_to_values( - &ConcreteDataType::Time(TimeType::Millisecond(TimeMillisecondType)), - Values { - time_millisecond_values: vec![1_i64, 2_i64, 3_i64], - ..Default::default() - }, - ); - let expect = vec![ - Value::Time(Time::new_millisecond(1_i64)), - Value::Time(Time::new_millisecond(2_i64)), - Value::Time(Time::new_millisecond(3_i64)), - ]; - assert_eq!(expect, actual); - } - - #[test] - fn test_convert_interval_values() { - // year_month - let actual = pb_values_to_values( - &ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType)), - Values { - interval_year_month_values: vec![1_i32, 2_i32, 3_i32], - ..Default::default() - }, - ); - let expect = vec![ - Value::IntervalYearMonth(IntervalYearMonth::new(1_i32)), - Value::IntervalYearMonth(IntervalYearMonth::new(2_i32)), - Value::IntervalYearMonth(IntervalYearMonth::new(3_i32)), - ]; - assert_eq!(expect, actual); - - // day_time - let actual = pb_values_to_values( - &ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType)), - Values { - interval_day_time_values: vec![1_i64, 2_i64, 3_i64], - ..Default::default() - }, - ); - let expect = vec![ - Value::IntervalDayTime(IntervalDayTime::from_i64(1_i64)), - Value::IntervalDayTime(IntervalDayTime::from_i64(2_i64)), - Value::IntervalDayTime(IntervalDayTime::from_i64(3_i64)), - ]; - assert_eq!(expect, actual); - - // month_day_nano - let actual = pb_values_to_values( - &ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType)), - Values { - interval_month_day_nano_values: vec![ - v1::IntervalMonthDayNano { - months: 1, - days: 2, - nanoseconds: 3, - }, - v1::IntervalMonthDayNano { - months: 5, - days: 6, - nanoseconds: 7, - }, - v1::IntervalMonthDayNano { - months: 9, - days: 10, - nanoseconds: 11, - }, - ], - ..Default::default() - }, - ); - let expect = vec![ - Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 2, 3)), - Value::IntervalMonthDayNano(IntervalMonthDayNano::new(5, 6, 7)), - Value::IntervalMonthDayNano(IntervalMonthDayNano::new(9, 10, 11)), - ]; - assert_eq!(expect, actual); - } - - macro_rules! test_convert_values { - ($grpc_data_type: ident, $values: expr, $concrete_data_type: ident, $expected_ret: expr) => { - paste! { - #[test] - fn []() { - let values = Values { - [<$grpc_data_type _values>]: $values, - ..Default::default() - }; - - let data_type = ConcreteDataType::[<$concrete_data_type _datatype>](); - let result = pb_values_to_values(&data_type, values); - - assert_eq!( - $expected_ret, - result - ); - } - } - }; - } - - test_convert_values!( - i8, - vec![1_i32, 2, 3], - int8, - vec![Value::Int8(1), Value::Int8(2), Value::Int8(3)] - ); - - test_convert_values!( - u8, - vec![1_u32, 2, 3], - uint8, - vec![Value::UInt8(1), Value::UInt8(2), Value::UInt8(3)] - ); - - test_convert_values!( - i16, - vec![1_i32, 2, 3], - int16, - vec![Value::Int16(1), Value::Int16(2), Value::Int16(3)] - ); - - test_convert_values!( - u16, - vec![1_u32, 2, 3], - uint16, - vec![Value::UInt16(1), Value::UInt16(2), Value::UInt16(3)] - ); - - test_convert_values!( - i32, - vec![1, 2, 3], - int32, - vec![Value::Int32(1), Value::Int32(2), Value::Int32(3)] - ); - - test_convert_values!( - u32, - vec![1, 2, 3], - uint32, - vec![Value::UInt32(1), Value::UInt32(2), Value::UInt32(3)] - ); - - test_convert_values!( - i64, - vec![1, 2, 3], - int64, - vec![Value::Int64(1), Value::Int64(2), Value::Int64(3)] - ); - - test_convert_values!( - u64, - vec![1, 2, 3], - uint64, - vec![Value::UInt64(1), Value::UInt64(2), Value::UInt64(3)] - ); - - test_convert_values!( - f32, - vec![1.0, 2.0, 3.0], - float32, - vec![ - Value::Float32(1.0.into()), - Value::Float32(2.0.into()), - Value::Float32(3.0.into()) - ] - ); - - test_convert_values!( - f64, - vec![1.0, 2.0, 3.0], - float64, - vec![ - Value::Float64(1.0.into()), - Value::Float64(2.0.into()), - Value::Float64(3.0.into()) - ] - ); - - test_convert_values!( - string, - vec!["1".to_string(), "2".to_string(), "3".to_string()], - string, - vec![ - Value::String("1".into()), - Value::String("2".into()), - Value::String("3".into()) - ] - ); - - test_convert_values!( - binary, - vec!["1".into(), "2".into(), "3".into()], - binary, - vec![ - Value::Binary(b"1".to_vec().into()), - Value::Binary(b"2".to_vec().into()), - Value::Binary(b"3".to_vec().into()) - ] - ); - - test_convert_values!( - date, - vec![1, 2, 3], - date, - vec![ - Value::Date(1.into()), - Value::Date(2.into()), - Value::Date(3.into()) - ] - ); - #[test] fn test_vectors_to_rows_for_different_types() { let boolean_vec = BooleanVector::from_vec(vec![true, false, true]); diff --git a/src/catalog/src/system_schema/information_schema/cluster_info.rs b/src/catalog/src/system_schema/information_schema/cluster_info.rs index 4082629bd45f..f45dc5be06a7 100644 --- a/src/catalog/src/system_schema/information_schema/cluster_info.rs +++ b/src/catalog/src/system_schema/information_schema/cluster_info.rs @@ -50,8 +50,9 @@ const PEER_TYPE_METASRV: &str = "METASRV"; const PEER_ID: &str = "peer_id"; const PEER_TYPE: &str = "peer_type"; const PEER_ADDR: &str = "peer_addr"; -const CPUS: &str = "cpus"; -const MEMORY_BYTES: &str = "memory_bytes"; +const PEER_HOSTNAME: &str = "peer_hostname"; +const TOTAL_CPU_MILLICORES: &str = "total_cpu_millicores"; +const TOTAL_MEMORY_BYTES: &str = "total_memory_bytes"; const VERSION: &str = "version"; const GIT_COMMIT: &str = "git_commit"; const START_TIME: &str = "start_time"; @@ -66,14 +67,15 @@ const INIT_CAPACITY: usize = 42; /// - `peer_id`: the peer server id. /// - `peer_type`: the peer type, such as `datanode`, `frontend`, `metasrv` etc. /// - `peer_addr`: the peer gRPC address. -/// - `cpus`: the number of CPUs of the peer. -/// - `memory_bytes`: the memory bytes of the peer. +/// - `total_cpu_millicores`: the total CPU millicores of the peer. +/// - `total_memory_bytes`: the total memory bytes of the peer. /// - `version`: the build package version of the peer. /// - `git_commit`: the build git commit hash of the peer. /// - `start_time`: the starting time of the peer. /// - `uptime`: the uptime of the peer. /// - `active_time`: the time since the last activity of the peer. /// - `node_status`: the status info of the peer. +/// - `peer_hostname`: the hostname of the peer. /// #[derive(Debug)] pub(super) struct InformationSchemaClusterInfo { @@ -94,8 +96,17 @@ impl InformationSchemaClusterInfo { ColumnSchema::new(PEER_ID, ConcreteDataType::int64_datatype(), false), ColumnSchema::new(PEER_TYPE, ConcreteDataType::string_datatype(), false), ColumnSchema::new(PEER_ADDR, ConcreteDataType::string_datatype(), true), - ColumnSchema::new(CPUS, ConcreteDataType::uint32_datatype(), false), - ColumnSchema::new(MEMORY_BYTES, ConcreteDataType::uint64_datatype(), false), + ColumnSchema::new(PEER_HOSTNAME, ConcreteDataType::string_datatype(), true), + ColumnSchema::new( + TOTAL_CPU_MILLICORES, + ConcreteDataType::uint32_datatype(), + false, + ), + ColumnSchema::new( + TOTAL_MEMORY_BYTES, + ConcreteDataType::uint64_datatype(), + false, + ), ColumnSchema::new(VERSION, ConcreteDataType::string_datatype(), false), ColumnSchema::new(GIT_COMMIT, ConcreteDataType::string_datatype(), false), ColumnSchema::new( @@ -155,6 +166,7 @@ struct InformationSchemaClusterInfoBuilder { peer_ids: Int64VectorBuilder, peer_types: StringVectorBuilder, peer_addrs: StringVectorBuilder, + peer_hostnames: StringVectorBuilder, cpus: UInt32VectorBuilder, memory_bytes: UInt64VectorBuilder, versions: StringVectorBuilder, @@ -173,6 +185,7 @@ impl InformationSchemaClusterInfoBuilder { peer_ids: Int64VectorBuilder::with_capacity(INIT_CAPACITY), peer_types: StringVectorBuilder::with_capacity(INIT_CAPACITY), peer_addrs: StringVectorBuilder::with_capacity(INIT_CAPACITY), + peer_hostnames: StringVectorBuilder::with_capacity(INIT_CAPACITY), cpus: UInt32VectorBuilder::with_capacity(INIT_CAPACITY), memory_bytes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), versions: StringVectorBuilder::with_capacity(INIT_CAPACITY), @@ -203,6 +216,7 @@ impl InformationSchemaClusterInfoBuilder { (PEER_ID, &Value::from(peer_id)), (PEER_TYPE, &Value::from(peer_type)), (PEER_ADDR, &Value::from(node_info.peer.addr.as_str())), + (PEER_HOSTNAME, &Value::from(node_info.hostname.as_str())), (VERSION, &Value::from(node_info.version.as_str())), (GIT_COMMIT, &Value::from(node_info.git_commit.as_str())), ]; @@ -214,6 +228,7 @@ impl InformationSchemaClusterInfoBuilder { self.peer_ids.push(Some(peer_id)); self.peer_types.push(Some(peer_type)); self.peer_addrs.push(Some(&node_info.peer.addr)); + self.peer_hostnames.push(Some(&node_info.hostname)); self.versions.push(Some(&node_info.version)); self.git_commits.push(Some(&node_info.git_commit)); if node_info.start_time_ms > 0 { @@ -253,6 +268,7 @@ impl InformationSchemaClusterInfoBuilder { Arc::new(self.peer_ids.finish()), Arc::new(self.peer_types.finish()), Arc::new(self.peer_addrs.finish()), + Arc::new(self.peer_hostnames.finish()), Arc::new(self.cpus.finish()), Arc::new(self.memory_bytes.finish()), Arc::new(self.versions.finish()), diff --git a/src/catalog/src/system_schema/information_schema/tables.rs b/src/catalog/src/system_schema/information_schema/tables.rs index 01f6f0d0bd01..507dedc5475d 100644 --- a/src/catalog/src/system_schema/information_schema/tables.rs +++ b/src/catalog/src/system_schema/information_schema/tables.rs @@ -371,7 +371,8 @@ impl InformationSchemaTablesBuilder { self.auto_increment.push(Some(0)); self.row_format.push(Some("Fixed")); self.table_collation.push(Some("utf8_bin")); - self.update_time.push(None); + self.update_time + .push(Some(table_info.meta.updated_on.timestamp().into())); self.check_time.push(None); // use mariadb default table version number here self.version.push(Some(11)); diff --git a/src/catalog/src/system_schema/pg_catalog.rs b/src/catalog/src/system_schema/pg_catalog.rs index b3ddec5b3b80..08aad2d6dd3e 100644 --- a/src/catalog/src/system_schema/pg_catalog.rs +++ b/src/catalog/src/system_schema/pg_catalog.rs @@ -27,6 +27,7 @@ use datafusion::error::DataFusionError; use datafusion::execution::TaskContext; use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter; use datafusion_pg_catalog::pg_catalog::catalog_info::CatalogInfo; +use datafusion_pg_catalog::pg_catalog::context::EmptyContextProvider; use datafusion_pg_catalog::pg_catalog::{ PG_CATALOG_TABLES, PgCatalogSchemaProvider, PgCatalogStaticTables, PgCatalogTable, }; @@ -44,7 +45,7 @@ use crate::system_schema::{ /// [`PGCatalogProvider`] is the provider for a schema named `pg_catalog`, it is not a catalog. pub struct PGCatalogProvider { catalog_name: String, - inner: PgCatalogSchemaProvider, + inner: PgCatalogSchemaProvider, tables: HashMap, table_ids: HashMap<&'static str, u32>, } @@ -69,6 +70,7 @@ impl PGCatalogProvider { catalog_manager, }, Arc::new(static_tables), + EmptyContextProvider, ) .expect("Failed to initialize PgCatalogSchemaProvider"); diff --git a/src/cli/Cargo.toml b/src/cli/Cargo.toml index a824fda51194..3c3e91c403a8 100644 --- a/src/cli/Cargo.toml +++ b/src/cli/Cargo.toml @@ -61,7 +61,6 @@ servers.workspace = true session.workspace = true snafu.workspace = true store-api.workspace = true -substrait.workspace = true table.workspace = true tokio.workspace = true tracing-appender.workspace = true diff --git a/src/cli/src/bench.rs b/src/cli/src/bench.rs index 34fbbe9fac1d..4a5c676b147b 100644 --- a/src/cli/src/bench.rs +++ b/src/cli/src/bench.rs @@ -157,6 +157,7 @@ fn create_table_info(table_id: TableId, table_name: TableName) -> RawTableInfo { schema: RawSchema::new(column_schemas), engine: "mito".to_string(), created_on: chrono::DateTime::default(), + updated_on: chrono::DateTime::default(), primary_key_indices: vec![], next_column_id: columns as u32 + 1, value_indices: vec![], diff --git a/src/cmd/Cargo.toml b/src/cmd/Cargo.toml index 8baa1dc50d4e..744d13faeb77 100644 --- a/src/cmd/Cargo.toml +++ b/src/cmd/Cargo.toml @@ -82,7 +82,6 @@ similar-asserts.workspace = true snafu.workspace = true common-stat.workspace = true store-api.workspace = true -substrait.workspace = true table.workspace = true tokio.workspace = true toml.workspace = true diff --git a/src/cmd/src/lib.rs b/src/cmd/src/lib.rs index 41c40da03271..7055be2498a9 100644 --- a/src/cmd/src/lib.rs +++ b/src/cmd/src/lib.rs @@ -18,7 +18,7 @@ use async_trait::async_trait; use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_mem_prof::activate_heap_profile; -use common_stat::{get_cpu_limit, get_memory_limit}; +use common_stat::{get_total_cpu_millicores, get_total_memory_bytes}; use common_telemetry::{error, info, warn}; use crate::error::Result; @@ -125,7 +125,8 @@ pub fn log_versions(version: &str, short_version: &str, app: &str) { } pub fn create_resource_limit_metrics(app: &str) { - if let Some(cpu_limit) = get_cpu_limit() { + let cpu_limit = get_total_cpu_millicores(); + if cpu_limit > 0 { info!( "GreptimeDB start with cpu limit in millicores: {}", cpu_limit @@ -133,7 +134,8 @@ pub fn create_resource_limit_metrics(app: &str) { CPU_LIMIT.with_label_values(&[app]).set(cpu_limit); } - if let Some(memory_limit) = get_memory_limit() { + let memory_limit = get_total_memory_bytes(); + if memory_limit > 0 { info!( "GreptimeDB start with memory limit in bytes: {}", memory_limit diff --git a/src/cmd/src/standalone.rs b/src/cmd/src/standalone.rs index 19195b7567f0..2b1cc407ebf1 100644 --- a/src/cmd/src/standalone.rs +++ b/src/cmd/src/standalone.rs @@ -19,6 +19,7 @@ use std::{fs, path}; use async_trait::async_trait; use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry}; +use catalog::information_schema::InformationExtensionRef; use catalog::kvbackend::KvBackendCatalogManagerBuilder; use catalog::process_manager::ProcessManager; use clap::Parser; @@ -404,6 +405,8 @@ impl StartCommand { procedure_manager.clone(), )); + plugins.insert::(information_extension.clone()); + let process_manager = Arc::new(ProcessManager::new(opts.grpc.server_addr.clone(), None)); let builder = KvBackendCatalogManagerBuilder::new( information_extension.clone(), diff --git a/src/common/config/Cargo.toml b/src/common/config/Cargo.toml index 17279e5eb77a..1d2b21602f48 100644 --- a/src/common/config/Cargo.toml +++ b/src/common/config/Cargo.toml @@ -11,15 +11,14 @@ workspace = true common-base.workspace = true common-error.workspace = true common-macro.workspace = true +common-stat.workspace = true config.workspace = true humantime-serde.workspace = true -num_cpus.workspace = true object-store.workspace = true serde.workspace = true serde_json.workspace = true serde_with.workspace = true snafu.workspace = true -sysinfo.workspace = true toml.workspace = true [dev-dependencies] diff --git a/src/common/config/src/utils.rs b/src/common/config/src/utils.rs index 912fdb0c1335..1bc986b77ef6 100644 --- a/src/common/config/src/utils.rs +++ b/src/common/config/src/utils.rs @@ -13,61 +13,22 @@ // limitations under the License. use common_base::readable_size::ReadableSize; -use sysinfo::System; - -/// Get the CPU core number of system, aware of cgroups. -pub fn get_cpus() -> usize { - // This function will check cgroups - num_cpus::get() -} - -/// Get the total memory of the system. -/// If `cgroup_limits` is enabled, it will also check it. -pub fn get_sys_total_memory() -> Option { - if sysinfo::IS_SUPPORTED_SYSTEM { - let mut sys_info = System::new(); - sys_info.refresh_memory(); - let mut total_memory = sys_info.total_memory(); - // Compare with cgroups memory limit, use smaller values - // This method is only implemented for Linux. It always returns None for all other systems. - if let Some(cgroup_limits) = sys_info.cgroup_limits() { - total_memory = total_memory.min(cgroup_limits.total_memory) - } - Some(ReadableSize(total_memory)) - } else { - None - } -} +use common_stat::{get_total_cpu_millicores, get_total_memory_readable}; /// `ResourceSpec` holds the static resource specifications of a node, /// such as CPU cores and memory capacity. These values are fixed /// at startup and do not change dynamically during runtime. #[derive(Debug, Clone, Copy)] pub struct ResourceSpec { - pub cpus: usize, + pub cpus: i64, pub memory: Option, } impl Default for ResourceSpec { fn default() -> Self { Self { - cpus: get_cpus(), - memory: get_sys_total_memory(), + cpus: get_total_cpu_millicores(), + memory: get_total_memory_readable(), } } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_get_cpus() { - assert!(get_cpus() > 0); - } - - #[test] - fn test_get_sys_total_memory() { - assert!(get_sys_total_memory().unwrap() > ReadableSize::mb(0)); - } -} diff --git a/src/common/datasource/src/file_format.rs b/src/common/datasource/src/file_format.rs index b6d4d6c30a4b..7c4e8d6c88e0 100644 --- a/src/common/datasource/src/file_format.rs +++ b/src/common/datasource/src/file_format.rs @@ -33,7 +33,7 @@ use bytes::{Buf, Bytes}; use datafusion::datasource::physical_plan::FileOpenFuture; use datafusion::error::{DataFusionError, Result as DataFusionResult}; use datafusion::physical_plan::SendableRecordBatchStream; -use futures::StreamExt; +use futures::{StreamExt, TryStreamExt}; use object_store::ObjectStore; use snafu::ResultExt; use tokio_util::compat::FuturesAsyncWriteCompatExt; @@ -179,7 +179,7 @@ pub fn open_with_decoder DataFusionResult>( Poll::Ready(decoder.flush().transpose()) }); - Ok(stream.boxed()) + Ok(stream.map_err(Into::into).boxed()) })) } diff --git a/src/common/function/Cargo.toml b/src/common/function/Cargo.toml index d5b928e2a16f..e2917f8254d6 100644 --- a/src/common/function/Cargo.toml +++ b/src/common/function/Cargo.toml @@ -36,8 +36,8 @@ datafusion.workspace = true datafusion-common.workspace = true datafusion-expr.workspace = true datafusion-functions-aggregate-common.workspace = true -datafusion-pg-catalog.workspace = true datafusion-physical-expr.workspace = true +datafusion-pg-catalog.workspace = true datatypes.workspace = true derive_more = { version = "1", default-features = false, features = ["display"] } geo = { version = "0.29", optional = true } diff --git a/src/common/function/src/admin/flush_compact_table.rs b/src/common/function/src/admin/flush_compact_table.rs index 378b4181cd68..e45f2f0f84d5 100644 --- a/src/common/function/src/admin/flush_compact_table.rs +++ b/src/common/function/src/admin/flush_compact_table.rs @@ -37,6 +37,8 @@ const COMPACT_TYPE_STRICT_WINDOW: &str = "strict_window"; /// Compact type: strict window (short name). const COMPACT_TYPE_STRICT_WINDOW_SHORT: &str = "swcs"; +const DEFAULT_COMPACTION_PARALLELISM: u32 = 1; + #[admin_fn( name = FlushTableFunction, display_name = flush_table, @@ -95,7 +97,7 @@ pub(crate) async fn compact_table( query_ctx: &QueryContextRef, params: &[ValueRef<'_>], ) -> Result { - let request = parse_compact_params(params, query_ctx)?; + let request = parse_compact_request(params, query_ctx)?; info!("Compact table request: {:?}", request); let affected_rows = table_mutation_handler @@ -117,37 +119,46 @@ fn compact_signature() -> Signature { /// - `[]`: only tables name provided, using default compaction type: regular /// - `[, ]`: specify table name and compaction type. The compaction options will be default. /// - `[, , ]`: provides both type and type-specific options. -fn parse_compact_params( +/// - For `twcs`, it accepts `parallelism=[N]` where N is an unsigned 32 bits number +/// - For `swcs`, it accepts two numeric parameter: `parallelism` and `window`. +fn parse_compact_request( params: &[ValueRef<'_>], query_ctx: &QueryContextRef, ) -> Result { ensure!( - !params.is_empty(), + !params.is_empty() && params.len() <= 3, InvalidFuncArgsSnafu { - err_msg: "Args cannot be empty", + err_msg: format!( + "The length of the args is not correct, expect 1-4, have: {}", + params.len() + ), } ); - let (table_name, compact_type) = match params { + let (table_name, compact_type, parallelism) = match params { + // 1. Only table name, strategy defaults to twcs and default parallelism. [ValueRef::String(table_name)] => ( table_name, compact_request::Options::Regular(Default::default()), + DEFAULT_COMPACTION_PARALLELISM, ), + // 2. Both table name and strategy are provided. [ ValueRef::String(table_name), ValueRef::String(compact_ty_str), ] => { - let compact_type = parse_compact_type(compact_ty_str, None)?; - (table_name, compact_type) + let (compact_type, parallelism) = parse_compact_options(compact_ty_str, None)?; + (table_name, compact_type, parallelism) } - + // 3. Table name, strategy and strategy specific options [ ValueRef::String(table_name), ValueRef::String(compact_ty_str), ValueRef::String(options_str), ] => { - let compact_type = parse_compact_type(compact_ty_str, Some(options_str))?; - (table_name, compact_type) + let (compact_type, parallelism) = + parse_compact_options(compact_ty_str, Some(options_str))?; + (table_name, compact_type, parallelism) } _ => { return UnsupportedInputDataTypeSnafu { @@ -167,35 +178,126 @@ fn parse_compact_params( schema_name, table_name, compact_options: compact_type, + parallelism, }) } -/// Parses compaction strategy type. For `strict_window` or `swcs` strict window compaction is chose, +/// Parses compaction strategy type. For `strict_window` or `swcs` strict window compaction is chosen, /// otherwise choose regular (TWCS) compaction. -fn parse_compact_type(type_str: &str, option: Option<&str>) -> Result { +fn parse_compact_options( + type_str: &str, + option: Option<&str>, +) -> Result<(compact_request::Options, u32)> { if type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW) | type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW_SHORT) { - let window_seconds = option - .map(|v| { - i64::from_str(v).map_err(|_| { - InvalidFuncArgsSnafu { - err_msg: format!( - "Compact window is expected to be a valid number, provided: {}", - v - ), + let Some(option_str) = option else { + return Ok(( + compact_request::Options::StrictWindow(StrictWindow { window_seconds: 0 }), + DEFAULT_COMPACTION_PARALLELISM, + )); + }; + + // For compatibility, accepts single number as window size. + if let Ok(window_seconds) = i64::from_str(option_str) { + return Ok(( + compact_request::Options::StrictWindow(StrictWindow { window_seconds }), + DEFAULT_COMPACTION_PARALLELISM, + )); + }; + + // Parse keyword arguments in forms: `key1=value1,key2=value2` + let mut window_seconds = 0i64; + let mut parallelism = DEFAULT_COMPACTION_PARALLELISM; + + let pairs: Vec<&str> = option_str.split(',').collect(); + for pair in pairs { + let kv: Vec<&str> = pair.trim().split('=').collect(); + if kv.len() != 2 { + return InvalidFuncArgsSnafu { + err_msg: format!("Invalid key-value pair: {}", pair.trim()), + } + .fail(); + } + + let key = kv[0].trim(); + let value = kv[1].trim(); + + match key { + "window" | "window_seconds" => { + window_seconds = i64::from_str(value).map_err(|_| { + InvalidFuncArgsSnafu { + err_msg: format!("Invalid value for window: {}", value), + } + .build() + })?; + } + "parallelism" => { + parallelism = value.parse::().map_err(|_| { + InvalidFuncArgsSnafu { + err_msg: format!("Invalid value for parallelism: {}", value), + } + .build() + })?; + } + _ => { + return InvalidFuncArgsSnafu { + err_msg: format!("Unknown parameter: {}", key), } - .build() - }) - }) - .transpose()? - .unwrap_or(0); - - Ok(compact_request::Options::StrictWindow(StrictWindow { - window_seconds, - })) + .fail(); + } + } + } + + Ok(( + compact_request::Options::StrictWindow(StrictWindow { window_seconds }), + parallelism, + )) } else { - Ok(compact_request::Options::Regular(Default::default())) + // TWCS strategy + let Some(option_str) = option else { + return Ok(( + compact_request::Options::Regular(Default::default()), + DEFAULT_COMPACTION_PARALLELISM, + )); + }; + + let mut parallelism = DEFAULT_COMPACTION_PARALLELISM; + let pairs: Vec<&str> = option_str.split(',').collect(); + for pair in pairs { + let kv: Vec<&str> = pair.trim().split('=').collect(); + if kv.len() != 2 { + return InvalidFuncArgsSnafu { + err_msg: format!("Invalid key-value pair: {}", pair.trim()), + } + .fail(); + } + + let key = kv[0].trim(); + let value = kv[1].trim(); + + match key { + "parallelism" => { + parallelism = value.parse::().map_err(|_| { + InvalidFuncArgsSnafu { + err_msg: format!("Invalid value for parallelism: {}", value), + } + .build() + })?; + } + _ => { + return InvalidFuncArgsSnafu { + err_msg: format!("Unknown parameter: {}", key), + } + .fail(); + } + } + } + + Ok(( + compact_request::Options::Regular(Default::default()), + parallelism, + )) } } @@ -301,7 +403,7 @@ mod tests { assert_eq!( expected, - &parse_compact_params(¶ms, &QueryContext::arc()).unwrap() + &parse_compact_request(¶ms, &QueryContext::arc()).unwrap() ); } } @@ -316,6 +418,7 @@ mod tests { schema_name: DEFAULT_SCHEMA_NAME.to_string(), table_name: "table".to_string(), compact_options: Options::Regular(Default::default()), + parallelism: 1, }, ), ( @@ -325,6 +428,7 @@ mod tests { schema_name: DEFAULT_SCHEMA_NAME.to_string(), table_name: "table".to_string(), compact_options: Options::Regular(Default::default()), + parallelism: 1, }, ), ( @@ -337,6 +441,7 @@ mod tests { schema_name: DEFAULT_SCHEMA_NAME.to_string(), table_name: "table".to_string(), compact_options: Options::Regular(Default::default()), + parallelism: 1, }, ), ( @@ -346,6 +451,7 @@ mod tests { schema_name: DEFAULT_SCHEMA_NAME.to_string(), table_name: "table".to_string(), compact_options: Options::Regular(Default::default()), + parallelism: 1, }, ), ( @@ -355,6 +461,7 @@ mod tests { schema_name: DEFAULT_SCHEMA_NAME.to_string(), table_name: "table".to_string(), compact_options: Options::StrictWindow(StrictWindow { window_seconds: 0 }), + parallelism: 1, }, ), ( @@ -366,32 +473,94 @@ mod tests { compact_options: Options::StrictWindow(StrictWindow { window_seconds: 3600, }), + parallelism: 1, }, ), ( - &["table", "regular", "abcd"], + &["table", "swcs", "120"], + CompactTableRequest { + catalog_name: DEFAULT_CATALOG_NAME.to_string(), + schema_name: DEFAULT_SCHEMA_NAME.to_string(), + table_name: "table".to_string(), + compact_options: Options::StrictWindow(StrictWindow { + window_seconds: 120, + }), + parallelism: 1, + }, + ), + // Test with parallelism parameter + ( + &["table", "regular", "parallelism=4"], CompactTableRequest { catalog_name: DEFAULT_CATALOG_NAME.to_string(), schema_name: DEFAULT_SCHEMA_NAME.to_string(), table_name: "table".to_string(), compact_options: Options::Regular(Default::default()), + parallelism: 4, }, ), ( - &["table", "swcs", "120"], + &["table", "strict_window", "window=3600,parallelism=2"], CompactTableRequest { catalog_name: DEFAULT_CATALOG_NAME.to_string(), schema_name: DEFAULT_SCHEMA_NAME.to_string(), table_name: "table".to_string(), compact_options: Options::StrictWindow(StrictWindow { - window_seconds: 120, + window_seconds: 3600, + }), + parallelism: 2, + }, + ), + ( + &["table", "strict_window", "window=3600"], + CompactTableRequest { + catalog_name: DEFAULT_CATALOG_NAME.to_string(), + schema_name: DEFAULT_SCHEMA_NAME.to_string(), + table_name: "table".to_string(), + compact_options: Options::StrictWindow(StrictWindow { + window_seconds: 3600, + }), + parallelism: 1, + }, + ), + ( + &["table", "strict_window", "window_seconds=7200"], + CompactTableRequest { + catalog_name: DEFAULT_CATALOG_NAME.to_string(), + schema_name: DEFAULT_SCHEMA_NAME.to_string(), + table_name: "table".to_string(), + compact_options: Options::StrictWindow(StrictWindow { + window_seconds: 7200, + }), + parallelism: 1, + }, + ), + ( + &["table", "strict_window", "window=1800"], + CompactTableRequest { + catalog_name: DEFAULT_CATALOG_NAME.to_string(), + schema_name: DEFAULT_SCHEMA_NAME.to_string(), + table_name: "table".to_string(), + compact_options: Options::StrictWindow(StrictWindow { + window_seconds: 1800, }), + parallelism: 1, + }, + ), + ( + &["table", "regular", "parallelism=8"], + CompactTableRequest { + catalog_name: DEFAULT_CATALOG_NAME.to_string(), + schema_name: DEFAULT_SCHEMA_NAME.to_string(), + table_name: "table".to_string(), + compact_options: Options::Regular(Default::default()), + parallelism: 8, }, ), ]); assert!( - parse_compact_params( + parse_compact_request( &["table", "strict_window", "abc"] .into_iter() .map(ValueRef::String) @@ -402,7 +571,7 @@ mod tests { ); assert!( - parse_compact_params( + parse_compact_request( &["a.b.table", "strict_window", "abc"] .into_iter() .map(ValueRef::String) @@ -411,5 +580,88 @@ mod tests { ) .is_err() ); + + // Test invalid parallelism + assert!( + parse_compact_request( + &["table", "regular", "options", "invalid"] + .into_iter() + .map(ValueRef::String) + .collect::>(), + &QueryContext::arc(), + ) + .is_err() + ); + + // Test too many parameters + assert!( + parse_compact_request( + &["table", "regular", "options", "4", "extra"] + .into_iter() + .map(ValueRef::String) + .collect::>(), + &QueryContext::arc(), + ) + .is_err() + ); + + // Test invalid keyword argument format + assert!( + parse_compact_request( + &["table", "strict_window", "window"] + .into_iter() + .map(ValueRef::String) + .collect::>(), + &QueryContext::arc(), + ) + .is_err() + ); + + // Test invalid keyword + assert!( + parse_compact_request( + &["table", "strict_window", "invalid_key=123"] + .into_iter() + .map(ValueRef::String) + .collect::>(), + &QueryContext::arc(), + ) + .is_err() + ); + + assert!( + parse_compact_request( + &["table", "regular", "abcd"] + .into_iter() + .map(ValueRef::String) + .collect::>(), + &QueryContext::arc(), + ) + .is_err() + ); + + // Test invalid window value + assert!( + parse_compact_request( + &["table", "strict_window", "window=abc"] + .into_iter() + .map(ValueRef::String) + .collect::>(), + &QueryContext::arc(), + ) + .is_err() + ); + + // Test invalid parallelism in options string + assert!( + parse_compact_request( + &["table", "strict_window", "parallelism=abc"] + .into_iter() + .map(ValueRef::String) + .collect::>(), + &QueryContext::arc(), + ) + .is_err() + ); } } diff --git a/src/common/function/src/aggrs/aggr_wrapper.rs b/src/common/function/src/aggrs/aggr_wrapper.rs index 4ee8190f2da2..ed691296ee66 100644 --- a/src/common/function/src/aggrs/aggr_wrapper.rs +++ b/src/common/function/src/aggrs/aggr_wrapper.rs @@ -22,6 +22,7 @@ //! `foo_merge`'s input arg is the same as `foo_state`'s output, and its output is the same as `foo`'s input. //! +use std::hash::{Hash, Hasher}; use std::sync::Arc; use arrow::array::StructArray; @@ -272,7 +273,7 @@ impl StateMergeHelper { } /// Wrapper to make an aggregate function out of a state function. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct StateWrapper { inner: AggregateUDF, name: String, @@ -616,6 +617,20 @@ impl AggregateUDFImpl for MergeWrapper { } } +impl PartialEq for MergeWrapper { + fn eq(&self, other: &Self) -> bool { + self.inner == other.inner + } +} + +impl Eq for MergeWrapper {} + +impl Hash for MergeWrapper { + fn hash(&self, state: &mut H) { + self.inner.hash(state); + } +} + /// The merge accumulator, which modify `update_batch`'s behavior to accept one struct array which /// include the state fields of original aggregate function, and merge said states into original accumulator /// the output is the same as original aggregate function diff --git a/src/common/function/src/aggrs/aggr_wrapper/tests.rs b/src/common/function/src/aggrs/aggr_wrapper/tests.rs index d24cdd8475a0..97a5a792d99d 100644 --- a/src/common/function/src/aggrs/aggr_wrapper/tests.rs +++ b/src/common/function/src/aggrs/aggr_wrapper/tests.rs @@ -39,8 +39,7 @@ use datafusion::prelude::SessionContext; use datafusion_common::arrow::array::AsArray; use datafusion_common::arrow::datatypes::{Float64Type, UInt64Type}; use datafusion_common::{Column, TableReference}; -use datafusion_expr::expr::AggregateFunction; -use datafusion_expr::sqlparser::ast::NullTreatment; +use datafusion_expr::expr::{AggregateFunction, NullTreatment}; use datafusion_expr::{ Aggregate, ColumnarValue, Expr, LogicalPlan, ScalarFunctionArgs, SortExpr, TableScan, lit, }; diff --git a/src/common/function/src/aggrs/count_hash.rs b/src/common/function/src/aggrs/count_hash.rs index ded88107e6d2..7cc594f2e3cb 100644 --- a/src/common/function/src/aggrs/count_hash.rs +++ b/src/common/function/src/aggrs/count_hash.rs @@ -68,7 +68,7 @@ impl CountHash { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct CountHash { signature: Signature, } diff --git a/src/common/function/src/aggrs/vector/product.rs b/src/common/function/src/aggrs/vector/product.rs index d22ea96f4533..d8f201966ab0 100644 --- a/src/common/function/src/aggrs/vector/product.rs +++ b/src/common/function/src/aggrs/vector/product.rs @@ -15,7 +15,7 @@ use std::borrow::Cow; use std::sync::Arc; -use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, StringArray}; +use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, LargeStringArray, StringArray}; use arrow_schema::{DataType, Field}; use datafusion::logical_expr::{Signature, TypeSignature, Volatility}; use datafusion_common::{Result, ScalarValue}; @@ -63,7 +63,7 @@ impl VectorProduct { } let t = args.schema.field(0).data_type(); - if !matches!(t, DataType::Utf8 | DataType::Binary) { + if !matches!(t, DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary) { return Err(datafusion_common::DataFusionError::Internal(format!( "unexpected input datatype {t} when creating `VEC_PRODUCT`" ))); @@ -91,6 +91,13 @@ impl VectorProduct { .map(|x| x.map(Cow::Owned)) .collect::>>()? } + DataType::LargeUtf8 => { + let arr: &LargeStringArray = values[0].as_string(); + arr.iter() + .filter_map(|x| x.map(|s| parse_veclit_from_strlit(s).map_err(Into::into))) + .map(|x: Result>| x.map(Cow::Owned)) + .collect::>>()? + } DataType::Binary => { let arr: &BinaryArray = values[0].as_binary(); arr.iter() diff --git a/src/common/function/src/aggrs/vector/sum.rs b/src/common/function/src/aggrs/vector/sum.rs index b6ff942791c2..3f875a8412ce 100644 --- a/src/common/function/src/aggrs/vector/sum.rs +++ b/src/common/function/src/aggrs/vector/sum.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, StringArray}; +use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, LargeStringArray, StringArray}; use arrow_schema::{DataType, Field}; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ @@ -63,7 +63,7 @@ impl VectorSum { } let t = args.schema.field(0).data_type(); - if !matches!(t, DataType::Utf8 | DataType::Binary) { + if !matches!(t, DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary) { return Err(datafusion_common::DataFusionError::Internal(format!( "unexpected input datatype {t} when creating `VEC_SUM`" ))); @@ -98,6 +98,21 @@ impl VectorSum { *self.inner(vec_column.len()) += vec_column; } } + DataType::LargeUtf8 => { + let arr: &LargeStringArray = values[0].as_string(); + for s in arr.iter() { + let Some(s) = s else { + if is_update { + self.has_null = true; + self.sum = None; + } + return Ok(()); + }; + let values = parse_veclit_from_strlit(s)?; + let vec_column = DVectorView::from_slice(&values, values.len()); + *self.inner(vec_column.len()) += vec_column; + } + } DataType::Binary => { let arr: &BinaryArray = values[0].as_binary(); for b in arr.iter() { diff --git a/src/common/function/src/scalars/geo/geohash.rs b/src/common/function/src/scalars/geo/geohash.rs index 2a9deddca609..90bb958246ce 100644 --- a/src/common/function/src/scalars/geo/geohash.rs +++ b/src/common/function/src/scalars/geo/geohash.rs @@ -76,7 +76,7 @@ impl Function for GeohashFunction { } fn return_type(&self, _: &[DataType]) -> datafusion_common::Result { - Ok(DataType::Utf8) + Ok(DataType::Utf8View) } fn signature(&self) -> &Signature { @@ -176,7 +176,7 @@ impl Function for GeohashNeighboursFunction { Ok(DataType::List(Arc::new(Field::new( "item", DataType::Utf8View, - false, + true, )))) } diff --git a/src/common/function/src/scalars/geo/h3.rs b/src/common/function/src/scalars/geo/h3.rs index d90eed81434a..c6630525df28 100644 --- a/src/common/function/src/scalars/geo/h3.rs +++ b/src/common/function/src/scalars/geo/h3.rs @@ -355,9 +355,9 @@ impl Function for H3CellCenterLatLng { fn return_type(&self, _: &[DataType]) -> datafusion_common::Result { Ok(DataType::List(Arc::new(Field::new( - "x", + "item", DataType::Float64, - false, + true, )))) } diff --git a/src/common/function/src/scalars/json.rs b/src/common/function/src/scalars/json.rs index 9b022d71dab8..d3e38e8282db 100644 --- a/src/common/function/src/scalars/json.rs +++ b/src/common/function/src/scalars/json.rs @@ -18,6 +18,7 @@ mod json_path_exists; mod json_path_match; mod json_to_string; mod parse_json; +mod to_json; use json_get::{JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString}; use json_is::{ @@ -27,6 +28,7 @@ use json_to_string::JsonToStringFunction; use parse_json::ParseJsonFunction; use crate::function_registry::FunctionRegistry; +use crate::scalars::json::to_json::ToJsonFunction; pub(crate) struct JsonFunction; @@ -48,6 +50,8 @@ impl JsonFunction { registry.register_scalar(JsonIsArray::default()); registry.register_scalar(JsonIsObject::default()); + registry.register_scalar(ToJsonFunction::default()); + registry.register_scalar(json_path_exists::JsonPathExistsFunction::default()); registry.register_scalar(json_path_match::JsonPathMatchFunction::default()); } diff --git a/src/common/function/src/scalars/json/to_json.rs b/src/common/function/src/scalars/json/to_json.rs new file mode 100644 index 000000000000..1773de5e7a11 --- /dev/null +++ b/src/common/function/src/scalars/json/to_json.rs @@ -0,0 +1,206 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::str::FromStr; +use std::sync::Arc; + +use arrow::json::reader::infer_json_schema_from_iterator; +use arrow_schema::{ArrowError, Fields}; +use datafusion_common::DataFusionError; +use datafusion_common::arrow::array::{Array, AsArray}; +use datafusion_common::arrow::compute; +use datafusion_common::arrow::datatypes::DataType; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility}; +use datatypes::json::JsonStructureSettings; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::types::StructType; +use datatypes::vectors::StructVectorBuilder; +use derive_more::derive::Display; +use serde_json; + +use crate::function::{Function, extract_args}; + +/// Parses the `String` into `JSONB`. +#[derive(Clone, Debug, Display)] +#[display("{}", self.name())] +pub(crate) struct ToJsonFunction { + signature: Signature, +} + +impl Default for ToJsonFunction { + fn default() -> Self { + Self { + signature: Signature::string(1, Volatility::Immutable), + } + } +} + +const NAME: &str = "to_json"; + +impl Function for ToJsonFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _: &[DataType]) -> datafusion_common::Result { + // try not to provide exact struct fields + Ok(DataType::Struct(Fields::empty())) + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn invoke_with_args( + &self, + args: ScalarFunctionArgs, + ) -> datafusion_common::Result { + let [arg0] = extract_args(self.name(), &args)?; + let arg0 = compute::cast(&arg0, &DataType::Utf8View)?; + let json_strings = arg0.as_string_view(); + + let size = json_strings.len(); + + // Parse json_strings into serde_json::Value vector, mapping Arrow nulls to serde_json::Value::Null + let mut json_values = Vec::with_capacity(size); + for i in 0..size { + if json_strings.is_null(i) { + json_values.push(Ok(serde_json::Value::Null)); + } else { + let serde_json_value = + serde_json::Value::from_str(json_strings.value(i)).map_err(|e| { + ArrowError::JsonError(format!("Failed to parse JSON string: {}", e)) + }); + json_values.push(serde_json_value); + } + } + + // Convert JSON values to StructArray manually using builders + let json_values: Vec = json_values + .into_iter() + .map(|result| result.map_err(|e| DataFusionError::ArrowError(Box::new(e), None))) + .collect::>>()?; + + let inferred_schema = infer_json_schema_from_iterator( + json_values + .iter() + .filter(|v| v.is_object()) + .map(|v| Ok(v.clone())), + )?; + + let greptime_struct_type = StructType::try_from(&inferred_schema.fields).map_err(|e| { + DataFusionError::Execution(format!( + "Failed to convert arrow type to greptime internal {}", + e + )) + })?; + let json_settings = JsonStructureSettings::Structured(Some(greptime_struct_type.clone())); + let greptime_values = json_values + .into_iter() + .map(|v| { + json_settings.encode(v).map_err(|e| { + DataFusionError::Execution(format!( + "Failed to encode serde_json to Greptime Value {}", + e + )) + }) + }) + .collect::>>()?; + let mut struct_vector_builder = StructVectorBuilder::with_type_and_capacity( + greptime_struct_type, + greptime_values.len(), + ); + for v in greptime_values.into_iter() { + if let Some(struct_value) = v.as_struct().map_err(|e| { + DataFusionError::Execution(format!("Failed to coerce value to struct: {}", e)) + })? { + struct_vector_builder + .push_struct_value(struct_value) + .map_err(|e| { + DataFusionError::Execution(format!( + "Failed to convert to arrow array: {}", + e + )) + })?; + } else { + struct_vector_builder.push_null_struct_value(); + } + } + + let struct_vector = struct_vector_builder.finish(); + let struct_array = struct_vector.take_array(); + + Ok(ColumnarValue::Array(Arc::new(struct_array))) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use arrow::array::{StringArray, StructArray}; + use arrow::datatypes::{DataType, Field}; + use datafusion_common::config::ConfigOptions; + + use super::*; + + #[test] + fn test_to_json() { + let udf = ToJsonFunction::default(); + + // Create test JSON strings with consistent field types + let json_strings = vec![ + r#"{"name": "Alice", "age": 30}"#, + r#"{"name": "Bob", "age": 25}"#, + r#"null"#, + r#"{"name": "Charlie", "age": 35}"#, + ]; + + // Create StringArray from JSON strings + let string_array = Arc::new(StringArray::from(json_strings)); + + // Create ScalarFunctionArgs + let func_args = ScalarFunctionArgs { + args: vec![ColumnarValue::Array(string_array)], + arg_fields: vec![Arc::new(Field::new("arg_0", DataType::Utf8, false))], + return_field: Arc::new(Field::new( + "result", + DataType::Struct(Fields::empty()), + true, + )), + number_rows: 3, + config_options: Arc::new(ConfigOptions::default()), + }; + + // Call invoke_with_args + let result = udf.invoke_with_args(func_args).unwrap(); + + // Verify the result is a StructArray + match result { + ColumnarValue::Array(array) => { + // The array should be a StructArray + assert_eq!(array.len(), 4); + // We can verify it's a struct array by checking the data type + assert!(matches!(array.data_type(), DataType::Struct(_))); + let struct_array = array.as_any().downcast_ref::().unwrap(); + assert_eq!(struct_array.num_columns(), 2); + assert!(struct_array.is_null(2)); + + // Verify that we have the expected number of rows processed + // (including null values) + } + _ => panic!("Expected ColumnarValue::Array"), + } + } +} diff --git a/src/common/function/src/scalars/udf.rs b/src/common/function/src/scalars/udf.rs index 503a66d331b3..eee3ede801dc 100644 --- a/src/common/function/src/scalars/udf.rs +++ b/src/common/function/src/scalars/udf.rs @@ -14,6 +14,7 @@ use std::any::Any; use std::fmt::{Debug, Formatter}; +use std::hash::{Hash, Hasher}; use datafusion::arrow::datatypes::DataType; use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl}; @@ -33,6 +34,20 @@ impl Debug for ScalarUdf { } } +impl PartialEq for ScalarUdf { + fn eq(&self, other: &Self) -> bool { + self.function.signature() == other.function.signature() + } +} + +impl Eq for ScalarUdf {} + +impl Hash for ScalarUdf { + fn hash(&self, state: &mut H) { + self.function.signature().hash(state) + } +} + impl ScalarUDFImpl for ScalarUdf { fn as_any(&self) -> &dyn Any { self diff --git a/src/common/grpc-expr/src/delete.rs b/src/common/grpc-expr/src/delete.rs deleted file mode 100644 index d48893a7d27d..000000000000 --- a/src/common/grpc-expr/src/delete.rs +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2023 Greptime Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; - -use api::helper::ColumnDataTypeWrapper; -use api::v1::{Column, DeleteRequest as GrpcDeleteRequest}; -use datatypes::prelude::ConcreteDataType; -use snafu::{ResultExt, ensure}; -use table::requests::DeleteRequest; - -use crate::error::{ColumnDataTypeSnafu, IllegalDeleteRequestSnafu, Result}; -use crate::insert::add_values_to_builder; - -pub fn to_table_delete_request( - catalog_name: &str, - schema_name: &str, - request: GrpcDeleteRequest, -) -> Result { - let row_count = request.row_count as usize; - - let mut key_column_values = HashMap::with_capacity(request.key_columns.len()); - for Column { - column_name, - values, - null_mask, - datatype, - datatype_extension, - .. - } in request.key_columns - { - let Some(values) = values else { continue }; - - let datatype: ConcreteDataType = - ColumnDataTypeWrapper::try_new(datatype, datatype_extension) - .context(ColumnDataTypeSnafu)? - .into(); - let vector = add_values_to_builder(datatype, values, row_count, null_mask)?; - - ensure!( - key_column_values - .insert(column_name.clone(), vector) - .is_none(), - IllegalDeleteRequestSnafu { - reason: format!("Duplicated column '{column_name}' in delete request.") - } - ); - } - - Ok(DeleteRequest { - catalog_name: catalog_name.to_string(), - schema_name: schema_name.to_string(), - table_name: request.table_name, - key_column_values, - }) -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use api::v1::ColumnDataType; - use api::v1::column::Values; - use datatypes::prelude::{ScalarVector, VectorRef}; - use datatypes::vectors::{Int32Vector, StringVector}; - - use super::*; - - #[test] - fn test_to_table_delete_request() { - let grpc_request = GrpcDeleteRequest { - table_name: "foo".to_string(), - key_columns: vec![ - Column { - column_name: "id".to_string(), - values: Some(Values { - i32_values: vec![1, 2, 3], - ..Default::default() - }), - datatype: ColumnDataType::Int32 as i32, - ..Default::default() - }, - Column { - column_name: "name".to_string(), - values: Some(Values { - string_values: vec!["a".to_string(), "b".to_string(), "c".to_string()], - ..Default::default() - }), - datatype: ColumnDataType::String as i32, - ..Default::default() - }, - ], - row_count: 3, - }; - - let mut request = - to_table_delete_request("foo_catalog", "foo_schema", grpc_request).unwrap(); - - assert_eq!(request.catalog_name, "foo_catalog"); - assert_eq!(request.schema_name, "foo_schema"); - assert_eq!(request.table_name, "foo"); - assert_eq!( - Arc::new(Int32Vector::from_slice(vec![1, 2, 3])) as VectorRef, - request.key_column_values.remove("id").unwrap() - ); - assert_eq!( - Arc::new(StringVector::from_slice(&["a", "b", "c"])) as VectorRef, - request.key_column_values.remove("name").unwrap() - ); - assert!(request.key_column_values.is_empty()); - } -} diff --git a/src/common/grpc-expr/src/error.rs b/src/common/grpc-expr/src/error.rs index e4f441d88247..aab1adb672b5 100644 --- a/src/common/grpc-expr/src/error.rs +++ b/src/common/grpc-expr/src/error.rs @@ -25,13 +25,6 @@ use store_api::metadata::MetadataError; #[snafu(visibility(pub))] #[stack_trace_debug] pub enum Error { - #[snafu(display("Illegal delete request, reason: {reason}"))] - IllegalDeleteRequest { - reason: String, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Column datatype error"))] ColumnDataType { #[snafu(implicit)] @@ -65,13 +58,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to create vector"))] - CreateVector { - #[snafu(implicit)] - location: Location, - source: datatypes::error::Error, - }, - #[snafu(display("Missing required field in protobuf, field: {}", field))] MissingField { field: String, @@ -87,13 +73,6 @@ pub enum Error { source: api::error::Error, }, - #[snafu(display("Unexpected values length, reason: {}", reason))] - UnexpectedValuesLength { - reason: String, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Unknown location type: {}", location_type))] UnknownLocationType { location_type: i32, @@ -189,18 +168,13 @@ pub type Result = std::result::Result; impl ErrorExt for Error { fn status_code(&self) -> StatusCode { match self { - Error::IllegalDeleteRequest { .. } => StatusCode::InvalidArguments, - Error::ColumnDataType { .. } => StatusCode::Internal, Error::DuplicatedTimestampColumn { .. } | Error::DuplicatedColumnName { .. } | Error::MissingTimestampColumn { .. } => StatusCode::InvalidArguments, - Error::CreateVector { .. } => StatusCode::InvalidArguments, Error::MissingField { .. } => StatusCode::InvalidArguments, Error::InvalidColumnDef { source, .. } => source.status_code(), - Error::UnexpectedValuesLength { .. } | Error::UnknownLocationType { .. } => { - StatusCode::InvalidArguments - } + Error::UnknownLocationType { .. } => StatusCode::InvalidArguments, Error::UnknownColumnDataType { .. } | Error::InvalidStringIndexColumnType { .. } => { StatusCode::InvalidArguments diff --git a/src/common/grpc-expr/src/insert.rs b/src/common/grpc-expr/src/insert.rs deleted file mode 100644 index d1e360d148fc..000000000000 --- a/src/common/grpc-expr/src/insert.rs +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2023 Greptime Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use api::helper; -use api::v1::column::Values; -use common_base::BitVec; -use datatypes::data_type::{ConcreteDataType, DataType}; -use datatypes::prelude::VectorRef; -use snafu::{ResultExt, ensure}; - -use crate::error::{CreateVectorSnafu, Result, UnexpectedValuesLengthSnafu}; - -pub(crate) fn add_values_to_builder( - data_type: ConcreteDataType, - values: Values, - row_count: usize, - null_mask: Vec, -) -> Result { - if null_mask.is_empty() { - Ok(helper::pb_values_to_vector_ref(&data_type, values)) - } else { - let builder = &mut data_type.create_mutable_vector(row_count); - let values = helper::pb_values_to_values(&data_type, values); - let null_mask = BitVec::from_vec(null_mask); - ensure!( - null_mask.count_ones() + values.len() == row_count, - UnexpectedValuesLengthSnafu { - reason: "If null_mask is not empty, the sum of the number of nulls and the length of values must be equal to row_count." - } - ); - - let mut idx_of_values = 0; - for idx in 0..row_count { - match is_null(&null_mask, idx) { - Some(true) => builder.push_null(), - _ => { - builder - .try_push_value_ref(&values[idx_of_values].as_value_ref()) - .context(CreateVectorSnafu)?; - idx_of_values += 1 - } - } - } - Ok(builder.to_vector()) - } -} - -fn is_null(null_mask: &BitVec, idx: usize) -> Option { - null_mask.get(idx).as_deref().copied() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_is_null() { - let null_mask = BitVec::from_slice(&[0b0000_0001, 0b0000_1000]); - - assert_eq!(Some(true), is_null(&null_mask, 0)); - assert_eq!(Some(false), is_null(&null_mask, 1)); - assert_eq!(Some(false), is_null(&null_mask, 10)); - assert_eq!(Some(true), is_null(&null_mask, 11)); - assert_eq!(Some(false), is_null(&null_mask, 12)); - - assert_eq!(None, is_null(&null_mask, 16)); - assert_eq!(None, is_null(&null_mask, 99)); - } -} diff --git a/src/common/grpc-expr/src/lib.rs b/src/common/grpc-expr/src/lib.rs index c774b751d416..0941cb650d0c 100644 --- a/src/common/grpc-expr/src/lib.rs +++ b/src/common/grpc-expr/src/lib.rs @@ -13,9 +13,7 @@ // limitations under the License. mod alter; -pub mod delete; pub mod error; -pub mod insert; pub mod util; pub use alter::{alter_expr_to_request, create_table_schema}; diff --git a/src/common/macro/src/admin_fn.rs b/src/common/macro/src/admin_fn.rs index ca97e5468f36..651c083ec84c 100644 --- a/src/common/macro/src/admin_fn.rs +++ b/src/common/macro/src/admin_fn.rs @@ -345,6 +345,20 @@ fn build_struct( Ok(datafusion_expr::ColumnarValue::Array(result_vector.to_arrow_array())) } } + + impl PartialEq for #name { + fn eq(&self, other: &Self) -> bool { + self.signature == other.signature + } + } + + impl Eq for #name {} + + impl std::hash::Hash for #name { + fn hash(&self, state: &mut H) { + self.signature.hash(state) + } + } } .into() } diff --git a/src/common/macro/src/row/schema.rs b/src/common/macro/src/row/schema.rs index 5e296c86004f..67848a36a0d4 100644 --- a/src/common/macro/src/row/schema.rs +++ b/src/common/macro/src/row/schema.rs @@ -90,6 +90,7 @@ fn impl_schema_method(fields: &[ParsedField<'_>]) -> Result { Some(ColumnDataTypeExtension { type_ext: Some(TypeExt::VectorType(VectorTypeExtension { dim: #dim })) }) } } + // TODO(sunng87): revisit all these implementations Some(TypeExt::ListType(ext)) => { let item_type = syn::Ident::new(&ext.datatype.to_string(), ident.span()); quote! { @@ -108,6 +109,12 @@ fn impl_schema_method(fields: &[ParsedField<'_>]) -> Result { Some(ColumnDataTypeExtension { type_ext: Some(TypeExt::StructType(StructTypeExtension { fields: [#(#fields),*] })) }) } } + Some(TypeExt::JsonNativeType(ext)) => { + let inner = syn::Ident::new(&ext.datatype.to_string(), ident.span()); + quote! { + Some(ColumnDataTypeExtension { type_ext: Some(TypeExt::JsonNativeType(JsonNativeTypeExtension { datatype: #inner })) }) + } + } None => { quote! { None } } diff --git a/src/common/meta/src/cluster.rs b/src/common/meta/src/cluster.rs index c7820477b7bb..63001970b6ae 100644 --- a/src/common/meta/src/cluster.rs +++ b/src/common/meta/src/cluster.rs @@ -124,6 +124,9 @@ pub struct NodeInfo { // The node build memory bytes #[serde(default)] pub memory_bytes: u64, + // The node build hostname + #[serde(default)] + pub hostname: String, } #[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)] @@ -332,6 +335,7 @@ mod tests { start_time_ms: 1, cpus: 0, memory_bytes: 0, + hostname: "test_hostname".to_string(), }; let node_info_bytes: Vec = node_info.try_into().unwrap(); diff --git a/src/common/meta/src/ddl/test_util/create_table.rs b/src/common/meta/src/ddl/test_util/create_table.rs index a4150fcb9c96..a6cc1b4cbf20 100644 --- a/src/common/meta/src/ddl/test_util/create_table.rs +++ b/src/common/meta/src/ddl/test_util/create_table.rs @@ -131,6 +131,7 @@ pub fn build_raw_table_info_from_expr(expr: &CreateTableExpr) -> RawTableInfo { region_numbers: vec![], options: TableOptions::try_from_iter(&expr.table_options).unwrap(), created_on: DateTime::default(), + updated_on: DateTime::default(), partition_key_indices: vec![], column_ids: vec![], }, diff --git a/src/common/meta/src/key/table_info.rs b/src/common/meta/src/key/table_info.rs index 1bb099f44c5e..c93961f64343 100644 --- a/src/common/meta/src/key/table_info.rs +++ b/src/common/meta/src/key/table_info.rs @@ -287,8 +287,13 @@ mod tests { #[test] fn test_deserialization_compatibility() { - let s = r#"{"version":1,"table_info":{"ident":{"table_id":8714,"version":0},"name":"go_gc_duration_seconds","desc":"Created on insertion","catalog_name":"e87lehzy63d4cloud_docs_test","schema_name":"public","meta":{"schema":{"column_schemas":[{"name":"instance","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"job","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"quantile","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"greptime_timestamp","data_type":{"Timestamp":{"Millisecond":null}},"is_nullable":false,"is_time_index":true,"default_constraint":null,"metadata":{"greptime:time_index":"true"}},{"name":"greptime_value","data_type":{"Float64":{}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}}],"timestamp_index":3,"version":0},"primary_key_indices":[0,1,2],"value_indices":[],"engine":"mito","next_column_id":5,"region_numbers":[],"engine_options":{},"options":{"write_buffer_size":null,"ttl":null,"extra_options":{}},"created_on":"1970-01-01T00:00:00Z"},"table_type":"Base"}}"#; - let v = TableInfoValue::try_from_raw_value(s.as_bytes()).unwrap(); + let old_fmt = r#"{"version":1,"table_info":{"ident":{"table_id":8714,"version":0},"name":"go_gc_duration_seconds","desc":"Created on insertion","catalog_name":"e87lehzy63d4cloud_docs_test","schema_name":"public","meta":{"schema":{"column_schemas":[{"name":"instance","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"job","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"quantile","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"greptime_timestamp","data_type":{"Timestamp":{"Millisecond":null}},"is_nullable":false,"is_time_index":true,"default_constraint":null,"metadata":{"greptime:time_index":"true"}},{"name":"greptime_value","data_type":{"Float64":{}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}}],"timestamp_index":3,"version":0},"primary_key_indices":[0,1,2],"value_indices":[],"engine":"mito","next_column_id":5,"region_numbers":[],"engine_options":{},"options":{"write_buffer_size":null,"ttl":null,"extra_options":{}},"created_on":"1970-01-01T00:00:00Z"},"table_type":"Base"}}"#; + let new_fmt = r#"{"version":1,"table_info":{"ident":{"table_id":8714,"version":0},"name":"go_gc_duration_seconds","desc":"Created on insertion","catalog_name":"e87lehzy63d4cloud_docs_test","schema_name":"public","meta":{"schema":{"column_schemas":[{"name":"instance","data_type":{"String":{"size_type":"Utf8"}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"job","data_type":{"String":{"size_type":"Utf8"}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"quantile","data_type":{"String":{"size_type":"Utf8"}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"greptime_timestamp","data_type":{"Timestamp":{"Millisecond":null}},"is_nullable":false,"is_time_index":true,"default_constraint":null,"metadata":{"greptime:time_index":"true"}},{"name":"greptime_value","data_type":{"Float64":{}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}}],"timestamp_index":3,"version":0},"primary_key_indices":[0,1,2],"value_indices":[],"engine":"mito","next_column_id":5,"region_numbers":[],"engine_options":{},"options":{"write_buffer_size":null,"ttl":null,"extra_options":{}},"created_on":"1970-01-01T00:00:00Z"},"table_type":"Base"}}"#; + + let v = TableInfoValue::try_from_raw_value(old_fmt.as_bytes()).unwrap(); + let new_v = TableInfoValue::try_from_raw_value(new_fmt.as_bytes()).unwrap(); + assert_eq!(v, new_v); + assert_eq!(v.table_info.meta.created_on, v.table_info.meta.updated_on); assert!(v.table_info.meta.partition_key_indices.is_empty()); } @@ -328,6 +333,7 @@ mod tests { schema: RawSchema::from(&schema), engine: "mito".to_string(), created_on: chrono::DateTime::default(), + updated_on: chrono::DateTime::default(), primary_key_indices: vec![0, 1], next_column_id: 3, value_indices: vec![2, 3], diff --git a/src/common/meta/src/rpc/ddl.rs b/src/common/meta/src/rpc/ddl.rs index 9f0d69442a6e..b9a871775f0f 100644 --- a/src/common/meta/src/rpc/ddl.rs +++ b/src/common/meta/src/rpc/ddl.rs @@ -1503,6 +1503,7 @@ mod tests { region_numbers: vec![0], options: Default::default(), created_on: Default::default(), + updated_on: Default::default(), partition_key_indices: Default::default(), column_ids: Default::default(), }; diff --git a/src/common/meta/src/rpc/ddl/trigger.rs b/src/common/meta/src/rpc/ddl/trigger.rs index c231566cf382..bcce5ea371f9 100644 --- a/src/common/meta/src/rpc/ddl/trigger.rs +++ b/src/common/meta/src/rpc/ddl/trigger.rs @@ -12,8 +12,7 @@ use api::v1::{ use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; -use crate::error; -use crate::error::Result; +use crate::error::{self, Result, TooLargeDurationSnafu}; use crate::rpc::ddl::DdlTask; // Create trigger @@ -27,7 +26,11 @@ pub struct CreateTriggerTask { pub labels: HashMap, pub annotations: HashMap, pub interval: Duration, - pub raw_interval_expr: String, + pub raw_interval_expr: Option, + pub r#for: Option, + pub for_raw_expr: Option, + pub keep_firing_for: Option, + pub keep_firing_for_raw_expr: Option, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -62,10 +65,20 @@ impl TryFrom for PbCreateTriggerTask { .map(PbNotifyChannel::from) .collect(); - let interval = task - .interval - .try_into() - .context(error::TooLargeDurationSnafu)?; + let interval = task.interval.try_into().context(TooLargeDurationSnafu)?; + let raw_interval_expr = task.raw_interval_expr.unwrap_or_default(); + + let r#for = task + .r#for + .map(|d| d.try_into().context(TooLargeDurationSnafu)) + .transpose()?; + let for_raw_expr = task.for_raw_expr.unwrap_or_default(); + + let keep_firing_for = task + .keep_firing_for + .map(|d| d.try_into().context(TooLargeDurationSnafu)) + .transpose()?; + let keep_firing_for_raw_expr = task.keep_firing_for_raw_expr.unwrap_or_default(); let expr = PbCreateTriggerExpr { catalog_name: task.catalog_name, @@ -76,7 +89,11 @@ impl TryFrom for PbCreateTriggerTask { labels: task.labels, annotations: task.annotations, interval: Some(interval), - raw_interval_expr: task.raw_interval_expr, + raw_interval_expr, + r#for, + for_raw_expr, + keep_firing_for, + keep_firing_for_raw_expr, }; Ok(PbCreateTriggerTask { @@ -102,6 +119,26 @@ impl TryFrom for CreateTriggerTask { let interval = expr.interval.context(error::MissingIntervalSnafu)?; let interval = interval.try_into().context(error::NegativeDurationSnafu)?; + let r#for = expr + .r#for + .map(Duration::try_from) + .transpose() + .context(error::NegativeDurationSnafu)?; + + let keep_firing_for = expr + .keep_firing_for + .map(Duration::try_from) + .transpose() + .context(error::NegativeDurationSnafu)?; + + let raw_interval_expr = + (!expr.raw_interval_expr.is_empty()).then_some(expr.raw_interval_expr); + + let for_raw_expr = (!expr.for_raw_expr.is_empty()).then_some(expr.for_raw_expr); + + let keep_firing_for_raw_expr = + (!expr.keep_firing_for_raw_expr.is_empty()).then_some(expr.keep_firing_for_raw_expr); + let task = CreateTriggerTask { catalog_name: expr.catalog_name, trigger_name: expr.trigger_name, @@ -111,7 +148,11 @@ impl TryFrom for CreateTriggerTask { labels: expr.labels, annotations: expr.annotations, interval, - raw_interval_expr: expr.raw_interval_expr, + raw_interval_expr, + r#for, + for_raw_expr, + keep_firing_for, + keep_firing_for_raw_expr, }; Ok(task) } @@ -271,7 +312,11 @@ mod tests { .into_iter() .collect(), interval: Duration::from_secs(60), - raw_interval_expr: "'1 minute'::INTERVAL".to_string(), + raw_interval_expr: Some("'1 minute'::INTERVAL".to_string()), + r#for: Duration::from_secs(300).into(), + for_raw_expr: Some("'5 minute'::INTERVAL".to_string()), + keep_firing_for: Duration::from_secs(600).into(), + keep_firing_for_raw_expr: Some("'10 minute'::INTERVAL".to_string()), }; let pb_task: PbCreateTriggerTask = original.clone().try_into().unwrap(); @@ -306,6 +351,14 @@ mod tests { assert_eq!(original.labels, round_tripped.labels); assert_eq!(original.annotations, round_tripped.annotations); assert_eq!(original.interval, round_tripped.interval); + assert_eq!(original.raw_interval_expr, round_tripped.raw_interval_expr); + assert_eq!(original.r#for, round_tripped.r#for); + assert_eq!(original.for_raw_expr, round_tripped.for_raw_expr); + assert_eq!(original.keep_firing_for, round_tripped.keep_firing_for); + assert_eq!( + original.keep_firing_for_raw_expr, + round_tripped.keep_firing_for_raw_expr + ); // Invalid, since create_trigger is None and it's required. let invalid_task = PbCreateTriggerTask { diff --git a/src/common/recordbatch/src/lib.rs b/src/common/recordbatch/src/lib.rs index c008a9312e37..7ae4a419d6fa 100644 --- a/src/common/recordbatch/src/lib.rs +++ b/src/common/recordbatch/src/lib.rs @@ -33,7 +33,7 @@ use datatypes::arrow::util::pretty; use datatypes::prelude::{ConcreteDataType, VectorRef}; use datatypes::scalars::{ScalarVector, ScalarVectorBuilder}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; -use datatypes::types::json_type_value_to_string; +use datatypes::types::{JsonFormat, jsonb_to_string}; use datatypes::vectors::{BinaryVector, StringVectorBuilder}; use error::Result; use futures::task::{Context, Poll}; @@ -90,32 +90,34 @@ pub fn map_json_type_to_string( ) -> Result { let mut vectors = Vec::with_capacity(original_schema.column_schemas().len()); for (vector, schema) in batch.columns.iter().zip(original_schema.column_schemas()) { - if let ConcreteDataType::Json(j) = schema.data_type { - let mut string_vector_builder = StringVectorBuilder::with_capacity(vector.len()); - let binary_vector = vector - .as_any() - .downcast_ref::() - .with_context(|| error::DowncastVectorSnafu { - from_type: schema.data_type.clone(), - to_type: ConcreteDataType::binary_datatype(), - })?; - for value in binary_vector.iter_data() { - let Some(value) = value else { - string_vector_builder.push(None); - continue; - }; - let string_value = - json_type_value_to_string(value, &j.format).with_context(|_| { - error::CastVectorSnafu { + if let ConcreteDataType::Json(j) = &schema.data_type { + if matches!(&j.format, JsonFormat::Jsonb) { + let mut string_vector_builder = StringVectorBuilder::with_capacity(vector.len()); + let binary_vector = vector + .as_any() + .downcast_ref::() + .with_context(|| error::DowncastVectorSnafu { + from_type: schema.data_type.clone(), + to_type: ConcreteDataType::binary_datatype(), + })?; + for value in binary_vector.iter_data() { + let Some(value) = value else { + string_vector_builder.push(None); + continue; + }; + let string_value = + jsonb_to_string(value).with_context(|_| error::CastVectorSnafu { from_type: schema.data_type.clone(), to_type: ConcreteDataType::string_datatype(), - } - })?; - string_vector_builder.push(Some(string_value.as_str())); - } + })?; + string_vector_builder.push(Some(string_value.as_str())); + } - let string_vector = string_vector_builder.finish(); - vectors.push(Arc::new(string_vector) as VectorRef); + let string_vector = string_vector_builder.finish(); + vectors.push(Arc::new(string_vector) as VectorRef); + } else { + vectors.push(vector.clone()); + } } else { vectors.push(vector.clone()); } diff --git a/src/common/sql/src/convert.rs b/src/common/sql/src/convert.rs index 690788feef62..0ff2e4406162 100644 --- a/src/common/sql/src/convert.rs +++ b/src/common/sql/src/convert.rs @@ -16,9 +16,10 @@ use std::str::FromStr; use common_time::Timestamp; use common_time::timezone::Timezone; +use datatypes::json::JsonStructureSettings; use datatypes::prelude::ConcreteDataType; use datatypes::schema::ColumnDefaultConstraint; -use datatypes::types::{parse_string_to_json_type_value, parse_string_to_vector_type_value}; +use datatypes::types::{JsonFormat, parse_string_to_jsonb, parse_string_to_vector_type_value}; use datatypes::value::{OrderedF32, OrderedF64, Value}; use snafu::{OptionExt, ResultExt, ensure}; pub use sqlparser::ast::{ @@ -210,7 +211,8 @@ pub fn sql_value_to_value( | Value::Duration(_) | Value::IntervalYearMonth(_) | Value::IntervalDayTime(_) - | Value::IntervalMonthDayNano(_) => match unary_op { + | Value::IntervalMonthDayNano(_) + | Value::Json(_) => match unary_op { UnaryOperator::Plus => {} UnaryOperator::Minus => { value = value @@ -297,8 +299,21 @@ pub(crate) fn parse_string_to_value( } ConcreteDataType::Binary(_) => Ok(Value::Binary(s.as_bytes().into())), ConcreteDataType::Json(j) => { - let v = parse_string_to_json_type_value(&s, &j.format).context(DatatypeSnafu)?; - Ok(Value::Binary(v.into())) + match &j.format { + JsonFormat::Jsonb => { + let v = parse_string_to_jsonb(&s).context(DatatypeSnafu)?; + Ok(Value::Binary(v.into())) + } + JsonFormat::Native(_inner) => { + // Always use the structured version at this level. + let serde_json_value = + serde_json::from_str(&s).context(DeserializeSnafu { json: s })?; + let json_structure_settings = JsonStructureSettings::Structured(None); + json_structure_settings + .encode(serde_json_value) + .context(DatatypeSnafu) + } + } } ConcreteDataType::Vector(d) => { let v = parse_string_to_vector_type_value(&s, Some(d.dim)).context(DatatypeSnafu)?; diff --git a/src/common/stat/Cargo.toml b/src/common/stat/Cargo.toml index 02757e687dbd..3d0198f6a2fc 100644 --- a/src/common/stat/Cargo.toml +++ b/src/common/stat/Cargo.toml @@ -5,9 +5,12 @@ edition.workspace = true license.workspace = true [dependencies] +common-base.workspace = true lazy_static.workspace = true nix.workspace = true +num_cpus.workspace = true prometheus.workspace = true +sysinfo.workspace = true [lints] workspace = true diff --git a/src/common/stat/src/cgroups.rs b/src/common/stat/src/cgroups.rs index 45d66e6f357e..fe26f5ec3607 100644 --- a/src/common/stat/src/cgroups.rs +++ b/src/common/stat/src/cgroups.rs @@ -23,9 +23,6 @@ use prometheus::core::{Collector, Desc}; use prometheus::proto::MetricFamily; use prometheus::{IntGauge, Opts}; -/// `MAX_VALUE` is used to indicate that the resource is unlimited. -pub const MAX_VALUE: i64 = -1; - const CGROUP_UNIFIED_MOUNTPOINT: &str = "/sys/fs/cgroup"; const MEMORY_MAX_FILE_CGROUP_V2: &str = "memory.max"; @@ -43,11 +40,11 @@ const MAX_VALUE_CGROUP_V2: &str = "max"; // For easier comparison, if the memory limit is larger than 1PB we consider it as unlimited. const MAX_MEMORY_IN_BYTES: i64 = 1125899906842624; // 1PB -/// Get the limit of memory in bytes. +/// Get the limit of memory in bytes from cgroups filesystem. /// -/// - If the memory is unlimited, return `-1`. +/// - If the cgroup total memory is unset, return `None`. /// - Return `None` if it fails to read the memory limit or not on linux. -pub fn get_memory_limit() -> Option { +pub fn get_memory_limit_from_cgroups() -> Option { #[cfg(target_os = "linux")] { let memory_max_file = if is_cgroup_v2()? { @@ -58,13 +55,13 @@ pub fn get_memory_limit() -> Option { MEMORY_MAX_FILE_CGROUP_V1 }; - // For cgroup v1, it will return a very large value(different from platform) if the memory is unlimited. + // For cgroup v1, it will return a very large value(different from platform) if the memory is unset. let memory_limit = read_value_from_file(Path::new(CGROUP_UNIFIED_MOUNTPOINT).join(memory_max_file))?; - // If memory limit exceeds 1PB(cgroup v1), consider it as unlimited. + // If memory limit exceeds 1PB(cgroup v1), consider it as unset. if memory_limit > MAX_MEMORY_IN_BYTES { - return Some(MAX_VALUE); + return None; } Some(memory_limit) } @@ -73,10 +70,10 @@ pub fn get_memory_limit() -> Option { None } -/// Get the usage of memory in bytes. +/// Get the usage of memory in bytes from cgroups filesystem. /// /// - Return `None` if it fails to read the memory usage or not on linux or cgroup is v1. -pub fn get_memory_usage() -> Option { +pub fn get_memory_usage_from_cgroups() -> Option { #[cfg(target_os = "linux")] { if is_cgroup_v2()? { @@ -93,11 +90,11 @@ pub fn get_memory_usage() -> Option { None } -/// Get the limit of cpu in millicores. +/// Get the limit of cpu in millicores from cgroups filesystem. /// -/// - If the cpu is unlimited, return `-1`. +/// - If the cpu limit is unset, return `None`. /// - Return `None` if it fails to read the cpu limit or not on linux. -pub fn get_cpu_limit() -> Option { +pub fn get_cpu_limit_from_cgroups() -> Option { #[cfg(target_os = "linux")] if is_cgroup_v2()? { // Read `/sys/fs/cgroup/cpu.max` to get the cpu limit. @@ -108,10 +105,6 @@ pub fn get_cpu_limit() -> Option { Path::new(CGROUP_UNIFIED_MOUNTPOINT).join(CPU_QUOTA_FILE_CGROUP_V1), )?; - if quota == MAX_VALUE { - return Some(MAX_VALUE); - } - let period = read_value_from_file( Path::new(CGROUP_UNIFIED_MOUNTPOINT).join(CPU_PERIOD_FILE_CGROUP_V1), )?; @@ -167,9 +160,9 @@ fn is_cgroup_v2() -> Option { fn read_value_from_file>(path: P) -> Option { let content = read_to_string(&path).ok()?; - // If the content starts with "max", return `MAX_VALUE`. + // If the content starts with "max", return `None`. if content.starts_with(MAX_VALUE_CGROUP_V2) { - return Some(MAX_VALUE); + return None; } content.trim().parse::().ok() @@ -183,10 +176,10 @@ fn get_cgroup_v2_cpu_limit>(path: P) -> Option { return None; } - // If the cpu is unlimited, it will be `-1`. + // If the cgroup cpu limit is unset, return `None`. let quota = fields[0].trim(); if quota == MAX_VALUE_CGROUP_V2 { - return Some(MAX_VALUE); + return None; } let quota = quota.parse::().ok()?; @@ -241,7 +234,7 @@ impl Collector for CgroupsMetricsCollector { self.cpu_usage.set(cpu_usage); } - if let Some(memory_usage) = get_memory_usage() { + if let Some(memory_usage) = get_memory_usage_from_cgroups() { self.memory_usage.set(memory_usage); } @@ -263,8 +256,8 @@ mod tests { 100000 ); assert_eq!( - read_value_from_file(Path::new("testdata").join("memory.max.unlimited")).unwrap(), - MAX_VALUE + read_value_from_file(Path::new("testdata").join("memory.max.unlimited")), + None ); assert_eq!(read_value_from_file(Path::new("non_existent_file")), None); } @@ -276,8 +269,8 @@ mod tests { 1500 ); assert_eq!( - get_cgroup_v2_cpu_limit(Path::new("testdata").join("cpu.max.unlimited")).unwrap(), - MAX_VALUE + get_cgroup_v2_cpu_limit(Path::new("testdata").join("cpu.max.unlimited")), + None ); assert_eq!( get_cgroup_v2_cpu_limit(Path::new("non_existent_file")), diff --git a/src/common/stat/src/lib.rs b/src/common/stat/src/lib.rs index 14d1f90c1df5..2c6cbea3f186 100644 --- a/src/common/stat/src/lib.rs +++ b/src/common/stat/src/lib.rs @@ -15,3 +15,64 @@ mod cgroups; pub use cgroups::*; +use common_base::readable_size::ReadableSize; +use sysinfo::System; + +/// Get the total CPU in millicores. +pub fn get_total_cpu_millicores() -> i64 { + // Get CPU limit from cgroups filesystem. + if let Some(cgroup_cpu_limit) = get_cpu_limit_from_cgroups() { + cgroup_cpu_limit + } else { + // Get total CPU cores from host system. + num_cpus::get() as i64 * 1000 + } +} + +/// Get the total memory in bytes. +pub fn get_total_memory_bytes() -> i64 { + // Get memory limit from cgroups filesystem. + if let Some(cgroup_memory_limit) = get_memory_limit_from_cgroups() { + cgroup_memory_limit + } else { + // Get total memory from host system. + if sysinfo::IS_SUPPORTED_SYSTEM { + let mut sys_info = System::new(); + sys_info.refresh_memory(); + sys_info.total_memory() as i64 + } else { + // If the system is not supported, return -1. + -1 + } + } +} + +/// Get the total CPU cores. The result will be rounded to the nearest integer. +/// For example, if the total CPU is 1.5 cores(1500 millicores), the result will be 2. +pub fn get_total_cpu_cores() -> usize { + ((get_total_cpu_millicores() as f64) / 1000.0).round() as usize +} + +/// Get the total memory in readable size. +pub fn get_total_memory_readable() -> Option { + if get_total_memory_bytes() > 0 { + Some(ReadableSize(get_total_memory_bytes() as u64)) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_total_cpu_cores() { + assert!(get_total_cpu_cores() > 0); + } + + #[test] + fn test_get_total_memory_readable() { + assert!(get_total_memory_readable().unwrap() > ReadableSize::mb(0)); + } +} diff --git a/src/datanode/Cargo.toml b/src/datanode/Cargo.toml index 0abbefa76e37..3dcffd0ac940 100644 --- a/src/datanode/Cargo.toml +++ b/src/datanode/Cargo.toml @@ -43,6 +43,7 @@ datatypes.workspace = true file-engine.workspace = true futures.workspace = true futures-util.workspace = true +hostname.workspace = true humantime-serde.workspace = true lazy_static.workspace = true log-store.workspace = true @@ -61,7 +62,6 @@ servers.workspace = true session.workspace = true snafu.workspace = true store-api.workspace = true -substrait.workspace = true table.workspace = true tokio.workspace = true toml.workspace = true diff --git a/src/datanode/src/heartbeat.rs b/src/datanode/src/heartbeat.rs index 5b8fa4c1e31c..bb3f25957c2a 100644 --- a/src/datanode/src/heartbeat.rs +++ b/src/datanode/src/heartbeat.rs @@ -251,6 +251,10 @@ impl HeartbeatTask { start_time_ms: node_epoch, cpus, memory_bytes, + hostname: hostname::get() + .unwrap_or_default() + .to_string_lossy() + .to_string(), }), node_workloads: Some(NodeWorkloads::Datanode(DatanodeWorkloads { types: workload_types.iter().map(|w| w.to_i32()).collect(), diff --git a/src/datatypes/src/arrow_array.rs b/src/datatypes/src/arrow_array.rs index 40b7d46d1d02..97aa299fadef 100644 --- a/src/datatypes/src/arrow_array.rs +++ b/src/datatypes/src/arrow_array.rs @@ -16,3 +16,5 @@ pub type BinaryArray = arrow::array::BinaryArray; pub type MutableBinaryArray = arrow::array::BinaryBuilder; pub type StringArray = arrow::array::StringArray; pub type MutableStringArray = arrow::array::StringBuilder; +pub type LargeStringArray = arrow::array::LargeStringArray; +pub type MutableLargeStringArray = arrow::array::LargeStringBuilder; diff --git a/src/datatypes/src/data_type.rs b/src/datatypes/src/data_type.rs index 8b97399284c5..3f305828a220 100644 --- a/src/datatypes/src/data_type.rs +++ b/src/datatypes/src/data_type.rs @@ -33,8 +33,8 @@ use crate::types::{ BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTimeType, - IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType, - StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType, + IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonFormat, JsonType, ListType, + NullType, StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType, UInt8Type, UInt16Type, UInt32Type, UInt64Type, VectorType, }; @@ -350,7 +350,7 @@ impl ConcreteDataType { pub fn as_json(&self) -> Option { match self { - ConcreteDataType::Json(j) => Some(*j), + ConcreteDataType::Json(j) => Some(j.clone()), _ => None, } } @@ -454,9 +454,8 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType { ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::BinaryView => { Self::binary_datatype() } - ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => { - Self::string_datatype() - } + ArrowDataType::Utf8 | ArrowDataType::Utf8View => Self::string_datatype(), + ArrowDataType::LargeUtf8 => Self::large_string_datatype(), ArrowDataType::List(field) => Self::List(ListType::new( ConcreteDataType::from_arrow_type(field.data_type()), )), @@ -518,6 +517,10 @@ impl_new_concrete_type_functions!( ); impl ConcreteDataType { + pub fn large_string_datatype() -> Self { + ConcreteDataType::String(StringType::large_utf8()) + } + pub fn timestamp_second_datatype() -> Self { ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType)) } @@ -668,6 +671,10 @@ impl ConcreteDataType { pub fn vector_default_datatype() -> ConcreteDataType { Self::vector_datatype(0) } + + pub fn json_native_datatype(inner_type: ConcreteDataType) -> ConcreteDataType { + ConcreteDataType::Json(JsonType::new(JsonFormat::Native(Box::new(inner_type)))) + } } /// Data type abstraction. @@ -773,6 +780,14 @@ mod tests { ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8), ConcreteDataType::String(_) )); + // Test LargeUtf8 mapping to large String type + let large_string_type = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8); + assert!(matches!(large_string_type, ConcreteDataType::String(_))); + if let ConcreteDataType::String(string_type) = &large_string_type { + assert!(string_type.is_large()); + } else { + panic!("Expected a String type"); + } assert_eq!( ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new( "item", @@ -787,6 +802,38 @@ mod tests { )); } + #[test] + fn test_large_utf8_round_trip() { + // Test round-trip conversion for LargeUtf8 + let large_utf8_arrow = ArrowDataType::LargeUtf8; + let concrete_type = ConcreteDataType::from_arrow_type(&large_utf8_arrow); + let back_to_arrow = concrete_type.as_arrow_type(); + + assert!(matches!(concrete_type, ConcreteDataType::String(_))); + // Round-trip should preserve the LargeUtf8 type + assert_eq!(large_utf8_arrow, back_to_arrow); + + // Test that Utf8 and LargeUtf8 map to different string variants + let utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8); + let large_utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8); + + assert!(matches!(utf8_concrete, ConcreteDataType::String(_))); + assert!(matches!(large_utf8_concrete, ConcreteDataType::String(_))); + + // They should have different size types + if let (ConcreteDataType::String(utf8_type), ConcreteDataType::String(large_type)) = + (&utf8_concrete, &large_utf8_concrete) + { + assert!(!utf8_type.is_large()); + assert!(large_type.is_large()); + } else { + panic!("Expected both to be String types"); + } + + // They should be different types + assert_ne!(utf8_concrete, large_utf8_concrete); + } + #[test] fn test_from_arrow_timestamp() { assert_eq!( diff --git a/src/datatypes/src/json.rs b/src/datatypes/src/json.rs new file mode 100644 index 000000000000..96894a17360b --- /dev/null +++ b/src/datatypes/src/json.rs @@ -0,0 +1,2415 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Data conversion between greptime's StructType and Json +//! +//! The idea of this module is to provide utilities to convert serde_json::Value to greptime's StructType and vice versa. +//! +//! The struct will carry all the fields of the Json object. We will not flatten any json object in this implementation. +//! + +use std::collections::HashSet; + +use common_base::bytes::StringBytes; +use ordered_float::OrderedFloat; +use serde_json::{Map, Value as Json}; +use snafu::{ResultExt, ensure}; + +use crate::data_type::{ConcreteDataType, DataType}; +use crate::error::{self, Error}; +use crate::types::{ListType, StructField, StructType}; +use crate::value::{ListValue, StructValue, Value}; + +/// The configuration of JSON encoding +/// +/// The enum describes how we handle JSON encoding to `StructValue` internally. +/// It defines three configurations: +/// - Structured: Encodes JSON objects as StructValue with an optional predefined StructType. +/// - UnstructuredRaw: Encodes JSON data as string and store it in a struct with a field named "_raw". +/// - PartialUnstructuredByKey: Encodes JSON objects as StructValue with an optional predefined StructType +/// and a set of unstructured keys, these keys are provided as flattened names, for example: `a.b.c`. +/// +/// We provide a few methods to convert JSON data to StructValue based on the settings. And we also +/// convert them to fully structured StructValue for user-facing APIs: the UI protocol and the UDF interface. +/// +/// **Important**: This settings only controls the internal form of JSON encoding. +#[derive(Debug, Clone)] +pub enum JsonStructureSettings { + // TODO(sunng87): provide a limit + Structured(Option), + UnstructuredRaw, + PartialUnstructuredByKey { + fields: Option, + unstructured_keys: HashSet, + }, +} + +/// Context for JSON encoding/decoding that tracks the current key path +#[derive(Clone, Debug)] +pub struct JsonContext<'a> { + /// Current key path in dot notation (e.g., "user.profile.name") + pub key_path: String, + /// Settings for JSON structure handling + pub settings: &'a JsonStructureSettings, +} + +impl JsonStructureSettings { + pub const RAW_FIELD: &'static str = "_raw"; + + /// Decode an encoded StructValue back into a serde_json::Value. + pub fn decode(&self, value: Value) -> Result { + let context = JsonContext { + key_path: String::new(), + settings: self, + }; + decode_value_with_context(value, &context) + } + + /// Decode a StructValue that was encoded with current settings back into a fully structured StructValue. + /// This is useful for reconstructing the original structure from encoded data, especially when + /// unstructured encoding was used for some fields. + pub fn decode_struct(&self, struct_value: StructValue) -> Result { + let context = JsonContext { + key_path: String::new(), + settings: self, + }; + decode_struct_with_settings(struct_value, &context) + } + + /// Encode a serde_json::Value into a Value::Json using current settings. + pub fn encode(&self, json: Json) -> Result { + let context = JsonContext { + key_path: String::new(), + settings: self, + }; + encode_json_with_context(json, None, &context).map(|v| Value::Json(Box::new(v))) + } + + /// Encode a serde_json::Value into a Value::Json with given data type. + pub fn encode_with_type( + &self, + json: Json, + data_type: Option<&ConcreteDataType>, + ) -> Result { + let context = JsonContext { + key_path: String::new(), + settings: self, + }; + encode_json_with_context(json, data_type, &context).map(|v| Value::Json(Box::new(v))) + } +} + +impl<'a> JsonContext<'a> { + /// Create a new context with an updated key path + pub fn with_key(&self, key: &str) -> JsonContext<'a> { + let new_key_path = if self.key_path.is_empty() { + key.to_string() + } else { + format!("{}.{}", self.key_path, key) + }; + JsonContext { + key_path: new_key_path, + settings: self.settings, + } + } + + /// Check if the current key path should be treated as unstructured + pub fn is_unstructured_key(&self) -> bool { + match &self.settings { + JsonStructureSettings::PartialUnstructuredByKey { + unstructured_keys, .. + } => unstructured_keys.contains(&self.key_path), + _ => false, + } + } +} + +/// Main encoding function with key path tracking +pub fn encode_json_with_context<'a>( + json: Json, + data_type: Option<&ConcreteDataType>, + context: &JsonContext<'a>, +) -> Result { + // Check if the entire encoding should be unstructured + if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) { + let json_string = json.to_string(); + let struct_value = StructValue::try_new( + vec![Value::String(json_string.into())], + StructType::new(vec![StructField::new( + JsonStructureSettings::RAW_FIELD.to_string(), + ConcreteDataType::string_datatype(), + true, + )]), + )?; + return Ok(Value::Struct(struct_value)); + } + + // Check if current key should be treated as unstructured + if context.is_unstructured_key() { + return Ok(Value::String(json.to_string().into())); + } + + match json { + Json::Object(json_object) => { + ensure!( + matches!(data_type, Some(ConcreteDataType::Struct(_)) | None), + error::InvalidJsonSnafu { + value: "JSON object can only be encoded to Struct type".to_string(), + } + ); + + let data_type = data_type.and_then(|x| x.as_struct()); + let struct_value = encode_json_object_with_context(json_object, data_type, context)?; + Ok(Value::Struct(struct_value)) + } + Json::Array(json_array) => { + let item_type = if let Some(ConcreteDataType::List(list_type)) = data_type { + Some(list_type.item_type()) + } else { + None + }; + let list_value = encode_json_array_with_context(json_array, item_type, context)?; + Ok(Value::List(list_value)) + } + _ => { + // For non-collection types, verify type compatibility + if let Some(expected_type) = data_type { + let (value, actual_type) = + encode_json_value_with_context(json, Some(expected_type), context)?; + if &actual_type == expected_type { + Ok(value) + } else { + Err(error::InvalidJsonSnafu { + value: format!( + "JSON value type {} does not match expected type {}", + actual_type.name(), + expected_type.name() + ), + } + .build()) + } + } else { + let (value, _) = encode_json_value_with_context(json, None, context)?; + Ok(value) + } + } + } +} + +fn encode_json_object_with_context<'a>( + mut json_object: Map, + fields: Option<&StructType>, + context: &JsonContext<'a>, +) -> Result { + let total_json_keys = json_object.len(); + let mut items = Vec::with_capacity(total_json_keys); + let mut struct_fields = Vec::with_capacity(total_json_keys); + // First, process fields from the provided schema in their original order + if let Some(fields) = fields { + for field in fields.fields() { + let field_name = field.name(); + + if let Some(value) = json_object.remove(field_name) { + let field_context = context.with_key(field_name); + let (value, data_type) = + encode_json_value_with_context(value, Some(field.data_type()), &field_context)?; + items.push(value); + struct_fields.push(StructField::new( + field_name.to_string(), + data_type, + true, // JSON fields are always nullable + )); + } else { + // Field exists in schema but not in JSON - add null value + items.push(Value::Null); + struct_fields.push(field.clone()); + } + } + } + + // Then, process any remaining JSON fields that weren't in the schema + for (key, value) in json_object { + let field_context = context.with_key(&key); + + let (value, data_type) = encode_json_value_with_context(value, None, &field_context)?; + items.push(value); + + struct_fields.push(StructField::new( + key.clone(), + data_type, + true, // JSON fields are always nullable + )); + } + + let struct_type = StructType::new(struct_fields); + StructValue::try_new(items, struct_type) +} + +fn encode_json_array_with_context<'a>( + json_array: Vec, + item_type: Option<&ConcreteDataType>, + context: &JsonContext<'a>, +) -> Result { + let json_array_len = json_array.len(); + let mut items = Vec::with_capacity(json_array_len); + let mut element_type = None; + + for (index, value) in json_array.into_iter().enumerate() { + let array_context = context.with_key(&index.to_string()); + let (item_value, item_type) = + encode_json_value_with_context(value, item_type, &array_context)?; + items.push(item_value); + + // Determine the common type for the list + if let Some(current_type) = &element_type { + // For now, we'll use the first non-null type we encounter + // In a more sophisticated implementation, we might want to find a common supertype + if *current_type == ConcreteDataType::null_datatype() + && item_type != ConcreteDataType::null_datatype() + { + element_type = Some(item_type); + } + } else { + element_type = Some(item_type); + } + } + + // Use provided item_type if available, otherwise determine from elements + let element_type = if let Some(item_type) = item_type { + item_type.clone() + } else { + element_type.unwrap_or_else(ConcreteDataType::string_datatype) + }; + let list_type = ListType::new(element_type); + + Ok(ListValue::new(items, ConcreteDataType::List(list_type))) +} + +/// Helper function to encode a JSON value to a Value and determine its ConcreteDataType with context +fn encode_json_value_with_context<'a>( + json: Json, + expected_type: Option<&ConcreteDataType>, + context: &JsonContext<'a>, +) -> Result<(Value, ConcreteDataType), Error> { + // Check if current key should be treated as unstructured + if context.is_unstructured_key() { + return Ok(( + Value::String(json.to_string().into()), + ConcreteDataType::string_datatype(), + )); + } + + match json { + Json::Null => Ok((Value::Null, ConcreteDataType::null_datatype())), + Json::Bool(b) => Ok((Value::Boolean(b), ConcreteDataType::boolean_datatype())), + Json::Number(n) => { + if let Some(i) = n.as_i64() { + // Use int64 for all integer numbers when possible + if let Some(expected) = expected_type + && let Ok(value) = try_convert_to_expected_type(i, expected) + { + return Ok((value, expected.clone())); + } + Ok((Value::Int64(i), ConcreteDataType::int64_datatype())) + } else if let Some(u) = n.as_u64() { + // Use int64 for unsigned integers that fit, otherwise use u64 + if let Some(expected) = expected_type + && let Ok(value) = try_convert_to_expected_type(u, expected) + { + return Ok((value, expected.clone())); + } + if u <= i64::MAX as u64 { + Ok((Value::Int64(u as i64), ConcreteDataType::int64_datatype())) + } else { + Ok((Value::UInt64(u), ConcreteDataType::uint64_datatype())) + } + } else if let Some(f) = n.as_f64() { + // Try to use the expected type if provided + if let Some(expected) = expected_type + && let Ok(value) = try_convert_to_expected_type(f, expected) + { + return Ok((value, expected.clone())); + } + + // Default to f64 for floating point numbers + Ok(( + Value::Float64(OrderedFloat(f)), + ConcreteDataType::float64_datatype(), + )) + } else { + // Fallback to string representation + Ok(( + Value::String(StringBytes::from(n.to_string())), + ConcreteDataType::string_datatype(), + )) + } + } + Json::String(s) => { + if let Some(expected) = expected_type + && let Ok(value) = try_convert_to_expected_type(s.as_str(), expected) + { + return Ok((value, expected.clone())); + } + Ok(( + Value::String(StringBytes::from(s.clone())), + ConcreteDataType::string_datatype(), + )) + } + Json::Array(arr) => { + let list_value = encode_json_array_with_context(arr, expected_type, context)?; + let data_type = list_value.datatype().clone(); + Ok((Value::List(list_value), data_type)) + } + Json::Object(obj) => { + let struct_value = encode_json_object_with_context(obj, None, context)?; + let data_type = ConcreteDataType::Struct(struct_value.struct_type().clone()); + Ok((Value::Struct(struct_value), data_type)) + } + } +} + +/// Main decoding function with key path tracking +pub fn decode_value_with_context<'a>( + value: Value, + context: &JsonContext<'a>, +) -> Result { + // Check if the entire decoding should be unstructured + if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) { + return decode_unstructured_value(value); + } + + // Check if current key should be treated as unstructured + if context.is_unstructured_key() { + return decode_unstructured_value(value); + } + + match value { + Value::Json(inner) => decode_value_with_context(*inner, context), + Value::Struct(struct_value) => decode_struct_with_context(struct_value, context), + Value::List(list_value) => decode_list_with_context(list_value, context), + _ => decode_primitive_value(value), + } +} + +/// Decode a structured value to JSON object +fn decode_struct_with_context<'a>( + struct_value: StructValue, + context: &JsonContext<'a>, +) -> Result { + let mut json_object = Map::with_capacity(struct_value.len()); + + let (items, fields) = struct_value.into_parts(); + + for (field, field_value) in fields.fields().iter().zip(items.into_iter()) { + let field_context = context.with_key(field.name()); + let json_value = decode_value_with_context(field_value, &field_context)?; + json_object.insert(field.name().to_string(), json_value); + } + + Ok(Json::Object(json_object)) +} + +/// Decode a list value to JSON array +fn decode_list_with_context<'a>( + list_value: ListValue, + context: &JsonContext<'a>, +) -> Result { + let mut json_array = Vec::with_capacity(list_value.len()); + + let data_items = list_value.take_items(); + + for (index, item) in data_items.into_iter().enumerate() { + let array_context = context.with_key(&index.to_string()); + let json_value = decode_value_with_context(item, &array_context)?; + json_array.push(json_value); + } + + Ok(Json::Array(json_array)) +} + +/// Decode unstructured value (stored as string) +fn decode_unstructured_value(value: Value) -> Result { + match value { + // Handle expected format: StructValue with single _raw field + Value::Struct(struct_value) => { + if struct_value.struct_type().fields().len() == 1 { + let field = &struct_value.struct_type().fields()[0]; + if field.name() == JsonStructureSettings::RAW_FIELD + && let Some(Value::String(s)) = struct_value.items().first() + { + let json_str = s.as_utf8(); + return serde_json::from_str(json_str).with_context(|_| { + error::DeserializeSnafu { + json: json_str.to_string(), + } + }); + } + } + // Invalid format - expected struct with single _raw field + Err(error::InvalidJsonSnafu { + value: "Unstructured value must be stored as struct with single _raw field" + .to_string(), + } + .build()) + } + // Handle old format: plain string (for backward compatibility) + Value::String(s) => { + let json_str = s.as_utf8(); + serde_json::from_str(json_str).with_context(|_| error::DeserializeSnafu { + json: json_str.to_string(), + }) + } + _ => Err(error::InvalidJsonSnafu { + value: "Unstructured value must be stored as string or struct with _raw field" + .to_string(), + } + .build()), + } +} + +/// Decode primitive value to JSON +fn decode_primitive_value(value: Value) -> Result { + match value { + Value::Null => Ok(Json::Null), + Value::Boolean(b) => Ok(Json::Bool(b)), + Value::UInt8(v) => Ok(Json::from(v)), + Value::UInt16(v) => Ok(Json::from(v)), + Value::UInt32(v) => Ok(Json::from(v)), + Value::UInt64(v) => Ok(Json::from(v)), + Value::Int8(v) => Ok(Json::from(v)), + Value::Int16(v) => Ok(Json::from(v)), + Value::Int32(v) => Ok(Json::from(v)), + Value::Int64(v) => Ok(Json::from(v)), + Value::Float32(v) => Ok(Json::from(v.0)), + Value::Float64(v) => Ok(Json::from(v.0)), + Value::String(s) => Ok(Json::String(s.as_utf8().to_string())), + Value::Binary(b) => serde_json::to_value(b.as_ref()).context(error::SerializeSnafu), + Value::Date(v) => Ok(Json::from(v.val())), + Value::Timestamp(v) => serde_json::to_value(v.value()).context(error::SerializeSnafu), + Value::Time(v) => serde_json::to_value(v.value()).context(error::SerializeSnafu), + Value::IntervalYearMonth(v) => { + serde_json::to_value(v.to_i32()).context(error::SerializeSnafu) + } + Value::IntervalDayTime(v) => { + serde_json::to_value(v.to_i64()).context(error::SerializeSnafu) + } + Value::IntervalMonthDayNano(v) => { + serde_json::to_value(v.to_i128()).context(error::SerializeSnafu) + } + Value::Duration(v) => serde_json::to_value(v.value()).context(error::SerializeSnafu), + Value::Decimal128(v) => serde_json::to_value(v.to_string()).context(error::SerializeSnafu), + Value::Struct(_) | Value::List(_) | Value::Json(_) => { + // These should be handled by the context-aware functions + Err(error::InvalidJsonSnafu { + value: "Structured values should be handled by context-aware decoding".to_string(), + } + .build()) + } + } +} + +/// Decode a StructValue that was encoded with current settings back into a fully structured StructValue +fn decode_struct_with_settings<'a>( + struct_value: StructValue, + context: &JsonContext<'a>, +) -> Result { + // Check if we can return the struct directly (Structured case) + if matches!(context.settings, JsonStructureSettings::Structured(_)) { + return Ok(struct_value); + } + + // Check if we can return the struct directly (PartialUnstructuredByKey with no matching keys) + if let JsonStructureSettings::PartialUnstructuredByKey { + unstructured_keys, .. + } = context.settings + && unstructured_keys.is_empty() + { + return Ok(struct_value.clone()); + } + + // Check if the entire decoding should be unstructured (UnstructuredRaw case) + if matches!(context.settings, JsonStructureSettings::UnstructuredRaw) { + // For UnstructuredRaw, the entire struct should be reconstructed from _raw field + return decode_unstructured_raw_struct(struct_value); + } + + let mut items = Vec::with_capacity(struct_value.len()); + let mut struct_fields = Vec::with_capacity(struct_value.len()); + + // Process each field in the struct value + let (struct_data, fields) = struct_value.into_parts(); + for (field, value) in fields.fields().iter().zip(struct_data.into_iter()) { + let field_context = context.with_key(field.name()); + + // Check if this field should be treated as unstructured + if field_context.is_unstructured_key() { + // Decode the unstructured value + let json_value = decode_unstructured_value(value)?; + + // Re-encode the unstructured value with proper structure using structured context + let structured_context = JsonContext { + key_path: field_context.key_path.clone(), + settings: &JsonStructureSettings::Structured(None), + }; + let (decoded_value, data_type) = encode_json_value_with_context( + json_value, + None, // Don't force a specific type, let it be inferred from JSON + &structured_context, + )?; + + items.push(decoded_value); + struct_fields.push(StructField::new( + field.name().to_string(), + data_type, + true, // JSON fields are always nullable + )); + } else { + // For structured fields, recursively decode if they are structs/lists + let decoded_value = match value { + Value::Struct(nested_struct) => { + let nested_context = context.with_key(field.name()); + Value::Struct(decode_struct_with_settings(nested_struct, &nested_context)?) + } + Value::List(list_value) => { + let list_context = context.with_key(field.name()); + Value::List(decode_list_with_settings(list_value, &list_context)?) + } + _ => value.clone(), + }; + + items.push(decoded_value); + struct_fields.push(field.clone()); + } + } + + let struct_type = StructType::new(struct_fields); + StructValue::try_new(items, struct_type) +} + +/// Decode a ListValue that was encoded with current settings back into a fully structured ListValue +fn decode_list_with_settings<'a>( + list_value: ListValue, + context: &JsonContext<'a>, +) -> Result { + let mut items = Vec::with_capacity(list_value.len()); + + let (data_items, datatype) = list_value.into_parts(); + + for (index, item) in data_items.into_iter().enumerate() { + let item_context = context.with_key(&index.to_string()); + + let decoded_item = match item { + Value::Struct(nested_struct) => { + Value::Struct(decode_struct_with_settings(nested_struct, &item_context)?) + } + Value::List(nested_list) => { + Value::List(decode_list_with_settings(nested_list, &item_context)?) + } + _ => item.clone(), + }; + + items.push(decoded_item); + } + + Ok(ListValue::new(items, datatype)) +} + +/// Helper function to decode a struct that was encoded with UnstructuredRaw settings +fn decode_unstructured_raw_struct(struct_value: StructValue) -> Result { + // For UnstructuredRaw, the struct must have exactly one field named "_raw" + if struct_value.struct_type().fields().len() == 1 { + let field = &struct_value.struct_type().fields()[0]; + if field.name() == JsonStructureSettings::RAW_FIELD + && let Some(Value::String(s)) = struct_value.items().first() + { + let json_str = s.as_utf8(); + let json_value: Json = + serde_json::from_str(json_str).with_context(|_| error::DeserializeSnafu { + json: json_str.to_string(), + })?; + + // Re-encode the JSON with proper structure + let context = JsonContext { + key_path: String::new(), + settings: &JsonStructureSettings::Structured(None), + }; + let (decoded_value, data_type) = + encode_json_value_with_context(json_value, None, &context)?; + + if let Value::Struct(decoded_struct) = decoded_value { + return Ok(decoded_struct); + } else { + // If the decoded value is not a struct, wrap it in a struct + let struct_type = + StructType::new(vec![StructField::new("value".to_string(), data_type, true)]); + return StructValue::try_new(vec![decoded_value], struct_type); + } + } + } + + // Invalid format - expected struct with single _raw field + Err(error::InvalidJsonSnafu { + value: "UnstructuredRaw value must be stored as struct with single _raw field".to_string(), + } + .build()) +} + +/// Helper function to try converting a value to an expected type +fn try_convert_to_expected_type( + value: T, + expected_type: &ConcreteDataType, +) -> Result +where + T: Into, +{ + let value = value.into(); + expected_type.try_cast(value.clone()).ok_or_else(|| { + error::CastTypeSnafu { + msg: format!( + "Cannot cast from {} to {}", + value.data_type().name(), + expected_type.name() + ), + } + .build() + }) +} + +#[cfg(test)] +mod tests { + use serde_json::json; + + use super::*; + + #[test] + fn test_encode_json_null() { + let json = Json::Null; + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::Null); + } + + #[test] + fn test_encode_json_boolean() { + let json = Json::Bool(true); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::Boolean(true)); + } + + #[test] + fn test_encode_json_number_integer() { + let json = Json::from(42); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::Int64(42)); + } + + #[test] + fn test_encode_json_number_float() { + let json = Json::from(3.15); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + match result { + Value::Float64(f) => assert_eq!(f.0, 3.15), + _ => panic!("Expected Float64"), + } + } + + #[test] + fn test_encode_json_string() { + let json = Json::String("hello".to_string()); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::String("hello".into())); + } + + #[test] + fn test_encode_json_array() { + let json = json!([1, 2, 3]); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + + if let Value::List(list_value) = result { + assert_eq!(list_value.items().len(), 3); + assert_eq!(list_value.items()[0], Value::Int64(1)); + assert_eq!(list_value.items()[1], Value::Int64(2)); + assert_eq!(list_value.items()[2], Value::Int64(3)); + } else { + panic!("Expected List value"); + } + } + + #[test] + fn test_encode_json_object() { + let json = json!({ + "name": "John", + "age": 30, + "active": true + }); + + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + let Value::Struct(result) = result else { + panic!("Expected Struct value"); + }; + assert_eq!(result.items().len(), 3); + + let items = result.items(); + let struct_type = result.struct_type(); + + // Check that we have the expected fields + let field_names: Vec<&str> = struct_type.fields().iter().map(|f| f.name()).collect(); + assert!(field_names.contains(&"name")); + assert!(field_names.contains(&"age")); + assert!(field_names.contains(&"active")); + + // Find and check each field + for (i, field) in struct_type.fields().iter().enumerate() { + match field.name() { + "name" => { + assert_eq!(items[i], Value::String("John".into())); + assert_eq!(field.data_type(), &ConcreteDataType::string_datatype()); + } + "age" => { + assert_eq!(items[i], Value::Int64(30)); + assert_eq!(field.data_type(), &ConcreteDataType::int64_datatype()); + } + "active" => { + assert_eq!(items[i], Value::Boolean(true)); + assert_eq!(field.data_type(), &ConcreteDataType::boolean_datatype()); + } + _ => panic!("Unexpected field: {}", field.name()), + } + } + } + + #[test] + fn test_encode_json_nested_object() { + let json = json!({ + "person": { + "name": "Alice", + "age": 25 + }, + "scores": [95, 87, 92] + }); + + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + let Value::Struct(result) = result else { + panic!("Expected Struct value"); + }; + assert_eq!(result.items().len(), 2); + + let items = result.items(); + let struct_type = result.struct_type(); + + // Check person field (nested struct) + let person_index = struct_type + .fields() + .iter() + .position(|f| f.name() == "person") + .unwrap(); + if let Value::Struct(person_struct) = &items[person_index] { + assert_eq!(person_struct.items().len(), 2); + let person_fields: Vec<&str> = person_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + assert!(person_fields.contains(&"name")); + assert!(person_fields.contains(&"age")); + } else { + panic!("Expected Struct value for person field"); + } + + // Check scores field (list) + let scores_index = struct_type + .fields() + .iter() + .position(|f| f.name() == "scores") + .unwrap(); + if let Value::List(scores_list) = &items[scores_index] { + assert_eq!(scores_list.items().len(), 3); + assert_eq!(scores_list.items()[0], Value::Int64(95)); + assert_eq!(scores_list.items()[1], Value::Int64(87)); + assert_eq!(scores_list.items()[2], Value::Int64(92)); + } else { + panic!("Expected List value for scores field"); + } + } + + #[test] + fn test_encode_json_with_expected_type() { + // Test encoding JSON number with expected int8 type + let json = Json::from(42); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json.clone(), Some(&ConcreteDataType::int8_datatype())) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::Int8(42)); + + // Test with expected string type + let result = settings + .encode_with_type(json, Some(&ConcreteDataType::string_datatype())) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::String("42".into())); + } + + #[test] + fn test_encode_json_array_mixed_types() { + let json = json!([1, "hello", true, 3.15]); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + + if let Value::List(list_value) = result { + assert_eq!(list_value.items().len(), 4); + // The first non-null type should determine the list type + // In this case, it should be string since we can't find a common numeric type + assert_eq!( + list_value.datatype(), + &ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())) + ); + } else { + panic!("Expected List value"); + } + } + + #[test] + fn test_encode_json_empty_array() { + let json = json!([]); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + + if let Value::List(list_value) = result { + assert_eq!(list_value.items().len(), 0); + // Empty arrays default to string type + assert_eq!( + list_value.datatype(), + &ConcreteDataType::List(ListType::new(ConcreteDataType::string_datatype())) + ); + } else { + panic!("Expected List value"); + } + } + + #[test] + fn test_encode_json_structured() { + let json = json!({ + "name": "Bob", + "age": 35 + }); + + let settings = JsonStructureSettings::Structured(None); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); + + if let Value::Struct(struct_value) = result { + assert_eq!(struct_value.items().len(), 2); + let field_names: Vec<&str> = struct_value + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + assert!(field_names.contains(&"name")); + assert!(field_names.contains(&"age")); + } else { + panic!("Expected Struct value"); + } + } + + #[test] + fn test_encode_json_structured_with_fields() { + let json = json!({ + "name": "Carol", + "age": 28 + }); + + // Define expected struct type + let fields = vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), + ]; + let struct_type = StructType::new(fields); + let concrete_type = ConcreteDataType::Struct(struct_type); + + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, Some(&concrete_type)) + .unwrap() + .into_json_inner() + .unwrap(); + + if let Value::Struct(struct_value) = result { + assert_eq!(struct_value.items().len(), 2); + let struct_fields = struct_value.struct_type().fields(); + assert_eq!(struct_fields[0].name(), "name"); + assert_eq!( + struct_fields[0].data_type(), + &ConcreteDataType::string_datatype() + ); + assert_eq!(struct_fields[1].name(), "age"); + assert_eq!( + struct_fields[1].data_type(), + &ConcreteDataType::int64_datatype() + ); + } else { + panic!("Expected Struct value"); + } + } + + #[test] + fn test_encode_json_object_field_order_preservation() { + let json = json!({ + "z_field": "last", + "a_field": "first", + "m_field": "middle" + }); + + // Define schema with specific field order + let fields = vec![ + StructField::new( + "a_field".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new( + "m_field".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new( + "z_field".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + ]; + let struct_type = StructType::new(fields); + + let result = encode_json_object_with_context( + json.as_object().unwrap().clone(), + Some(&struct_type), + &JsonContext { + key_path: String::new(), + settings: &JsonStructureSettings::Structured(None), + }, + ) + .unwrap(); + + // Verify field order is preserved from schema + let struct_fields = result.struct_type().fields(); + assert_eq!(struct_fields[0].name(), "a_field"); + assert_eq!(struct_fields[1].name(), "m_field"); + assert_eq!(struct_fields[2].name(), "z_field"); + + // Verify values are correct + let items = result.items(); + assert_eq!(items[0], Value::String("first".into())); + assert_eq!(items[1], Value::String("middle".into())); + assert_eq!(items[2], Value::String("last".into())); + } + + #[test] + fn test_encode_json_object_schema_reuse_with_extra_fields() { + let json = json!({ + "name": "Alice", + "age": 25, + "active": true // Extra field not in schema + }); + + // Define schema with only name and age + let fields = vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), + ]; + let struct_type = StructType::new(fields); + + let result = encode_json_object_with_context( + json.as_object().unwrap().clone(), + Some(&struct_type), + &JsonContext { + key_path: String::new(), + settings: &JsonStructureSettings::Structured(None), + }, + ) + .unwrap(); + + // Verify schema fields come first in order + let struct_fields = result.struct_type().fields(); + assert_eq!(struct_fields[0].name(), "name"); + assert_eq!(struct_fields[1].name(), "age"); + assert_eq!(struct_fields[2].name(), "active"); + + // Verify values are correct + let items = result.items(); + assert_eq!(items[0], Value::String("Alice".into())); + assert_eq!(items[1], Value::Int64(25)); + assert_eq!(items[2], Value::Boolean(true)); + } + + #[test] + fn test_encode_json_object_missing_schema_fields() { + let json = json!({ + "name": "Bob" + // age field is missing from JSON but present in schema + }); + + // Define schema with name and age + let fields = vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), + ]; + let struct_type = StructType::new(fields); + + let result = encode_json_object_with_context( + json.as_object().unwrap().clone(), + Some(&struct_type), + &JsonContext { + key_path: String::new(), + settings: &JsonStructureSettings::Structured(None), + }, + ) + .unwrap(); + + // Verify both schema fields are present + let struct_fields = result.struct_type().fields(); + assert_eq!(struct_fields[0].name(), "name"); + assert_eq!(struct_fields[1].name(), "age"); + + // Verify values - name has value, age is null + let items = result.items(); + assert_eq!(items[0], Value::String("Bob".into())); + assert_eq!(items[1], Value::Null); + } + + #[test] + fn test_json_structure_settings_structured() { + let json = json!({ + "name": "Eve", + "score": 95 + }); + + let settings = JsonStructureSettings::Structured(None); + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); + + if let Value::Struct(struct_value) = result { + assert_eq!(struct_value.items().len(), 2); + } else { + panic!("Expected Struct value"); + } + } + + #[test] + fn test_encode_json_array_with_item_type() { + let json = json!([1, 2, 3]); + let list_type = ListType::new(ConcreteDataType::int8_datatype()); + let concrete_type = ConcreteDataType::List(list_type); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, Some(&concrete_type)) + .unwrap() + .into_json_inner() + .unwrap(); + + if let Value::List(list_value) = result { + assert_eq!(list_value.items().len(), 3); + assert_eq!(list_value.items()[0], Value::Int8(1)); + assert_eq!(list_value.items()[1], Value::Int8(2)); + assert_eq!(list_value.items()[2], Value::Int8(3)); + assert_eq!( + list_value.datatype(), + &ConcreteDataType::List(ListType::new(ConcreteDataType::int8_datatype())) + ); + } else { + panic!("Expected List value"); + } + } + + #[test] + fn test_encode_json_array_empty_with_item_type() { + let json = json!([]); + let list_type = ListType::new(ConcreteDataType::string_datatype()); + let concrete_type = ConcreteDataType::List(list_type); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, Some(&concrete_type)) + .unwrap() + .into_json_inner() + .unwrap(); + + if let Value::List(list_value) = result { + assert_eq!(list_value.items().len(), 0); + assert_eq!( + list_value.datatype(), + &ConcreteDataType::List(ListType::new(ConcreteDataType::string_datatype())) + ); + } else { + panic!("Expected List value"); + } + } + + #[cfg(test)] + mod decode_tests { + use serde_json::json; + + use super::*; + + #[test] + fn test_decode_primitive_values() { + let settings = JsonStructureSettings::Structured(None); + + // Test null + let result = settings.decode(Value::Null).unwrap(); + assert_eq!(result, Json::Null); + + // Test boolean + let result = settings.decode(Value::Boolean(true)).unwrap(); + assert_eq!(result, Json::Bool(true)); + + // Test integer + let result = settings.decode(Value::Int64(42)).unwrap(); + assert_eq!(result, Json::from(42)); + + // Test float + let result = settings.decode(Value::Float64(OrderedFloat(3.16))).unwrap(); + assert_eq!(result, Json::from(3.16)); + + // Test string + let result = settings.decode(Value::String("hello".into())).unwrap(); + assert_eq!(result, Json::String("hello".to_string())); + } + + #[test] + fn test_decode_struct() { + let settings = JsonStructureSettings::Structured(None); + + let struct_value = StructValue::new( + vec![ + Value::String("Alice".into()), + Value::Int64(25), + Value::Boolean(true), + ], + StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), + StructField::new( + "active".to_string(), + ConcreteDataType::boolean_datatype(), + true, + ), + ]), + ); + + let result = settings.decode(Value::Struct(struct_value)).unwrap(); + let expected = json!({ + "name": "Alice", + "age": 25, + "active": true + }); + assert_eq!(result, expected); + } + + #[test] + fn test_decode_list() { + let settings = JsonStructureSettings::Structured(None); + + let list_value = ListValue::new( + vec![Value::Int64(1), Value::Int64(2), Value::Int64(3)], + ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())), + ); + + let result = settings.decode(Value::List(list_value)).unwrap(); + let expected = json!([1, 2, 3]); + assert_eq!(result, expected); + } + + #[test] + fn test_decode_nested_structure() { + let settings = JsonStructureSettings::Structured(None); + + let inner_struct = StructValue::new( + vec![Value::String("Alice".into()), Value::Int64(25)], + StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), + ]), + ); + + let outer_struct = StructValue::new( + vec![ + Value::Struct(inner_struct), + Value::List(ListValue::new( + vec![Value::Int64(95), Value::Int64(87)], + ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())), + )), + ], + StructType::new(vec![ + StructField::new( + "user".to_string(), + ConcreteDataType::Struct(StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new( + "age".to_string(), + ConcreteDataType::int64_datatype(), + true, + ), + ])), + true, + ), + StructField::new( + "scores".to_string(), + ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())), + true, + ), + ]), + ); + + let result = settings.decode(Value::Struct(outer_struct)).unwrap(); + let expected = json!({ + "user": { + "name": "Alice", + "age": 25 + }, + "scores": [95, 87] + }); + assert_eq!(result, expected); + } + + #[test] + fn test_decode_unstructured_raw() { + let settings = JsonStructureSettings::UnstructuredRaw; + + let json_str = r#"{"name": "Bob", "age": 30}"#; + let value = Value::String(json_str.into()); + + let result = settings.decode(value).unwrap(); + let expected: Json = serde_json::from_str(json_str).unwrap(); + assert_eq!(result, expected); + } + + #[test] + fn test_decode_unstructured_raw_struct_format() { + let settings = JsonStructureSettings::UnstructuredRaw; + + let json_str = r#"{"name": "Bob", "age": 30}"#; + let struct_value = StructValue::new( + vec![Value::String(json_str.into())], + StructType::new(vec![StructField::new( + JsonStructureSettings::RAW_FIELD.to_string(), + ConcreteDataType::string_datatype(), + true, + )]), + ); + let value = Value::Struct(struct_value); + + let result = settings.decode(value).unwrap(); + let expected: Json = serde_json::from_str(json_str).unwrap(); + assert_eq!(result, expected); + } + + #[test] + fn test_decode_partial_unstructured() { + let mut unstructured_keys = HashSet::new(); + unstructured_keys.insert("user.metadata".to_string()); + + let settings = JsonStructureSettings::PartialUnstructuredByKey { + fields: None, + unstructured_keys, + }; + + let metadata_json = r#"{"preferences": {"theme": "dark"}, "history": [1, 2, 3]}"#; + + let struct_value = StructValue::new( + vec![ + Value::String("Alice".into()), + Value::String(metadata_json.into()), + ], + StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new( + "metadata".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + ]), + ); + + let result = settings.decode(Value::Struct(struct_value)).unwrap(); + + if let Json::Object(obj) = result { + assert_eq!(obj.get("name"), Some(&Json::String("Alice".to_string()))); + + if let Some(Json::String(metadata_str)) = obj.get("metadata") { + let metadata: Json = serde_json::from_str(metadata_str).unwrap(); + let expected_metadata: Json = serde_json::from_str(metadata_json).unwrap(); + assert_eq!(metadata, expected_metadata); + } else { + panic!("Expected metadata to be unstructured string"); + } + } else { + panic!("Expected object result"); + } + } + + #[test] + fn test_decode_missing_fields() { + let settings = JsonStructureSettings::Structured(None); + + // Struct with missing field (null value) + let struct_value = StructValue::new( + vec![ + Value::String("Bob".into()), + Value::Null, // missing age field + ], + StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), + ]), + ); + + let result = settings.decode(Value::Struct(struct_value)).unwrap(); + let expected = json!({ + "name": "Bob", + "age": null + }); + assert_eq!(result, expected); + } + } + + #[test] + fn test_encode_json_with_concrete_type() { + let settings = JsonStructureSettings::Structured(None); + + // Test encoding JSON number with expected int64 type + let json = Json::from(42); + let result = settings + .encode_with_type(json, Some(&ConcreteDataType::int64_datatype())) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::Int64(42)); + + // Test encoding JSON string with expected string type + let json = Json::String("hello".to_string()); + let result = settings + .encode_with_type(json, Some(&ConcreteDataType::string_datatype())) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::String("hello".into())); + + // Test encoding JSON boolean with expected boolean type + let json = Json::Bool(true); + let result = settings + .encode_with_type(json, Some(&ConcreteDataType::boolean_datatype())) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::Boolean(true)); + } + + #[test] + fn test_encode_json_with_mismatched_type() { + // Test encoding JSON number with mismatched string type + let json = Json::from(42); + let settings = JsonStructureSettings::Structured(None); + let result = settings.encode_with_type(json, Some(&ConcreteDataType::string_datatype())); + assert!(result.is_ok()); // Should succeed due to type conversion + + // Test encoding JSON object with mismatched non-struct type + let json = json!({"name": "test"}); + let result = settings.encode_with_type(json, Some(&ConcreteDataType::int64_datatype())); + assert!(result.is_err()); // Should fail - object can't be converted to int64 + } + + #[test] + fn test_encode_json_array_with_list_type() { + let json = json!([1, 2, 3]); + let list_type = ListType::new(ConcreteDataType::int64_datatype()); + let concrete_type = ConcreteDataType::List(list_type); + + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, Some(&concrete_type)) + .unwrap() + .into_json_inner() + .unwrap(); + + if let Value::List(list_value) = result { + assert_eq!(list_value.items().len(), 3); + assert_eq!(list_value.items()[0], Value::Int64(1)); + assert_eq!(list_value.items()[1], Value::Int64(2)); + assert_eq!(list_value.items()[2], Value::Int64(3)); + assert_eq!( + list_value.datatype(), + &ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())) + ); + } else { + panic!("Expected List value"); + } + } + + #[test] + fn test_encode_json_non_collection_with_type() { + // Test null with null type + let json = Json::Null; + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json.clone(), Some(&ConcreteDataType::null_datatype())) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::Null); + + // Test float with float64 type + let json = Json::from(3.15); + let result = settings + .encode_with_type(json, Some(&ConcreteDataType::float64_datatype())) + .unwrap() + .into_json_inner() + .unwrap(); + match result { + Value::Float64(f) => assert_eq!(f.0, 3.15), + _ => panic!("Expected Float64"), + } + } + + #[test] + fn test_encode_json_large_unsigned_integer() { + // Test unsigned integer that fits in i64 + let json = Json::from(u64::MAX / 2); + let settings = JsonStructureSettings::Structured(None); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::Int64((u64::MAX / 2) as i64)); + + // Test unsigned integer that exceeds i64 range + let json = Json::from(u64::MAX); + let result = settings + .encode_with_type(json, None) + .unwrap() + .into_json_inner() + .unwrap(); + assert_eq!(result, Value::UInt64(u64::MAX)); + } + + #[test] + fn test_json_structure_settings_unstructured_raw() { + let json = json!({ + "name": "Frank", + "score": 88 + }); + + let settings = JsonStructureSettings::UnstructuredRaw; + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); + + if let Value::Struct(struct_value) = result { + assert_eq!(struct_value.struct_type().fields().len(), 1); + let field = &struct_value.struct_type().fields()[0]; + assert_eq!(field.name(), JsonStructureSettings::RAW_FIELD); + assert_eq!(field.data_type(), &ConcreteDataType::string_datatype()); + + let items = struct_value.items(); + assert_eq!(items.len(), 1); + if let Value::String(s) = &items[0] { + let json_str = s.as_utf8(); + assert!(json_str.contains("\"name\":\"Frank\"")); + assert!(json_str.contains("\"score\":88")); + } else { + panic!("Expected String value in _raw field"); + } + } else { + panic!("Expected Struct value"); + } + } + + #[test] + fn test_json_structure_settings_unstructured_raw_with_type() { + let json = json!({ + "name": "Grace", + "age": 30, + "active": true + }); + + let settings = JsonStructureSettings::UnstructuredRaw; + + // Test with encode (no type) + let result = settings + .encode(json.clone()) + .unwrap() + .into_json_inner() + .unwrap(); + if let Value::Struct(s) = result { + if let Value::String(json_str) = &s.items()[0] { + let json_str = json_str.as_utf8(); + assert!(json_str.contains("\"name\":\"Grace\"")); + assert!(json_str.contains("\"age\":30")); + assert!(json_str.contains("\"active\":true")); + } else { + panic!("Expected String value in _raw field"); + } + } else { + panic!("Expected Struct value for encode"); + } + + // Test with encode_with_type (with type) + let struct_type = StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), + StructField::new( + "active".to_string(), + ConcreteDataType::boolean_datatype(), + true, + ), + ]); + let concrete_type = ConcreteDataType::Struct(struct_type); + + let result2 = settings + .encode_with_type(json, Some(&concrete_type)) + .unwrap() + .into_json_inner() + .unwrap(); + if let Value::Struct(s) = result2 { + if let Value::String(json_str) = &s.items()[0] { + let json_str = json_str.as_utf8(); + assert!(json_str.contains("\"name\":\"Grace\"")); + assert!(json_str.contains("\"age\":30")); + assert!(json_str.contains("\"active\":true")); + } else { + panic!("Expected String value for _raw field"); + } + } else { + panic!("Expected String value for encode_with_type"); + } + + // Test with nested objects + let nested_json = json!({ + "user": { + "profile": { + "name": "Alice", + "settings": {"theme": "dark"} + } + } + }); + + let result3 = settings + .encode(nested_json) + .unwrap() + .into_json_inner() + .unwrap(); + if let Value::Struct(s) = result3 { + if let Value::String(json_str) = &s.items()[0] { + let json_str = json_str.as_utf8(); + assert!(json_str.contains("\"user\"")); + assert!(json_str.contains("\"profile\"")); + assert!(json_str.contains("\"name\":\"Alice\"")); + assert!(json_str.contains("\"settings\"")); + assert!(json_str.contains("\"theme\":\"dark\"")); + } else { + panic!("Expected String value for _raw field"); + } + } else { + panic!("Expected String value for nested JSON"); + } + + // Test with arrays + let array_json = json!([1, "hello", true, 3.15]); + let result4 = settings + .encode(array_json) + .unwrap() + .into_json_inner() + .unwrap(); + if let Value::Struct(s) = result4 { + if let Value::String(json_str) = &s.items()[0] { + let json_str = json_str.as_utf8(); + assert!(json_str.contains("[1,\"hello\",true,3.15]")); + } else { + panic!("Expected String value for _raw field") + } + } else { + panic!("Expected String value for array JSON"); + } + } + + #[test] + fn test_encode_json_with_context_partial_unstructured() { + let json = json!({ + "user": { + "name": "Alice", + "metadata": { + "preferences": {"theme": "dark"}, + "history": [1, 2, 3] + } + } + }); + + let mut unstructured_keys = HashSet::new(); + unstructured_keys.insert("user.metadata".to_string()); + + let settings = JsonStructureSettings::PartialUnstructuredByKey { + fields: None, + unstructured_keys, + }; + let result = settings.encode(json).unwrap().into_json_inner().unwrap(); + + if let Value::Struct(struct_value) = result { + let items = struct_value.items(); + let struct_type = struct_value.struct_type(); + + // Find user field + let user_index = struct_type + .fields() + .iter() + .position(|f| f.name() == "user") + .unwrap(); + if let Value::Struct(user_struct) = &items[user_index] { + let user_items = user_struct.items(); + let user_fields: Vec<&str> = user_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + // name should be structured + let name_index = user_fields.iter().position(|&f| f == "name").unwrap(); + assert_eq!(user_items[name_index], Value::String("Alice".into())); + + // metadata should be unstructured (string) + let metadata_index = user_fields.iter().position(|&f| f == "metadata").unwrap(); + if let Value::String(metadata_str) = &user_items[metadata_index] { + let json_str = metadata_str.as_utf8(); + assert!(json_str.contains("\"preferences\"")); + assert!(json_str.contains("\"history\"")); + } else { + panic!("Expected String value for metadata field"); + } + } else { + panic!("Expected Struct value for user field"); + } + } else { + panic!("Expected Struct value"); + } + } + + #[test] + fn test_decode_struct_structured() { + // Test decoding a structured struct value - should return the same struct + let settings = JsonStructureSettings::Structured(None); + + let original_struct = StructValue::new( + vec![ + Value::String("Alice".into()), + Value::Int64(25), + Value::Boolean(true), + ], + StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), + StructField::new( + "active".to_string(), + ConcreteDataType::boolean_datatype(), + true, + ), + ]), + ); + + let decoded_struct = settings.decode_struct(original_struct.clone()).unwrap(); + + // With Structured settings, the struct should be returned directly + assert_eq!(decoded_struct.items(), original_struct.items()); + assert_eq!(decoded_struct.struct_type(), original_struct.struct_type()); + } + + #[test] + fn test_decode_struct_partial_unstructured_empty_keys() { + // Test decoding with PartialUnstructuredByKey but empty unstructured_keys + let settings = JsonStructureSettings::PartialUnstructuredByKey { + fields: None, + unstructured_keys: HashSet::new(), + }; + + let original_struct = StructValue::new( + vec![ + Value::String("Alice".into()), + Value::Int64(25), + Value::Boolean(true), + ], + StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), + StructField::new( + "active".to_string(), + ConcreteDataType::boolean_datatype(), + true, + ), + ]), + ); + + let decoded_struct = settings.decode_struct(original_struct.clone()).unwrap(); + + // With empty unstructured_keys, the struct should be returned directly + assert_eq!(decoded_struct.items(), original_struct.items()); + assert_eq!(decoded_struct.struct_type(), original_struct.struct_type()); + } + + #[test] + fn test_decode_struct_partial_unstructured() { + // Test decoding a struct with unstructured fields + let mut unstructured_keys = HashSet::new(); + unstructured_keys.insert("metadata".to_string()); + + let settings = JsonStructureSettings::PartialUnstructuredByKey { + fields: Some(StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new( + "metadata".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + ])), + unstructured_keys, + }; + + // Create a struct where metadata is stored as unstructured JSON string + let encoded_struct = StructValue::new( + vec![ + Value::String("Alice".into()), + Value::String(r#"{"preferences":{"theme":"dark"},"history":[1,2,3]}"#.into()), + ], + StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new( + "metadata".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + ]), + ); + + let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); + + // Verify name field remains the same + assert_eq!(decoded_struct.items()[0], Value::String("Alice".into())); + + // Verify metadata field is now properly structured + if let Value::Struct(metadata_struct) = &decoded_struct.items()[1] { + let metadata_fields: Vec<&str> = metadata_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + assert!(metadata_fields.contains(&"preferences")); + assert!(metadata_fields.contains(&"history")); + } else { + panic!("Expected metadata to be decoded as structured value"); + } + } + + #[test] + fn test_decode_struct_nested_unstructured() { + // Test decoding nested structures with unstructured fields + let mut unstructured_keys = HashSet::new(); + unstructured_keys.insert("user.metadata".to_string()); + + let settings = JsonStructureSettings::PartialUnstructuredByKey { + fields: None, + unstructured_keys, + }; + + // Create a nested struct where user.metadata is stored as unstructured JSON string + let user_struct = StructValue::new( + vec![ + Value::String("Alice".into()), + Value::String(r#"{"preferences":{"theme":"dark"},"history":[1,2,3]}"#.into()), + ], + StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new( + "metadata".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + ]), + ); + + let encoded_struct = StructValue::new( + vec![Value::Struct(user_struct)], + StructType::new(vec![StructField::new( + "user".to_string(), + ConcreteDataType::struct_datatype(StructType::new(vec![])), + true, + )]), + ); + + let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); + + // Verify the nested structure is properly decoded + if let Value::Struct(decoded_user) = &decoded_struct.items()[0] { + if let Value::Struct(metadata_struct) = &decoded_user.items()[1] { + let metadata_fields: Vec<&str> = metadata_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + assert!(metadata_fields.contains(&"preferences")); + assert!(metadata_fields.contains(&"history")); + + let preference_index = metadata_fields + .iter() + .position(|&field| field == "preferences") + .unwrap(); + let history_index = metadata_fields + .iter() + .position(|&field| field == "history") + .unwrap(); + + // Verify the nested structure within preferences + if let Value::Struct(preferences_struct) = + &metadata_struct.items()[preference_index] + { + let pref_fields: Vec<&str> = preferences_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + assert!(pref_fields.contains(&"theme")); + } else { + panic!("Expected preferences to be decoded as structured value"); + } + + // Verify the array within history + if let Value::List(history_list) = &metadata_struct.items()[history_index] { + assert_eq!(history_list.items().len(), 3); + } else { + panic!("Expected history to be decoded as list value"); + } + } else { + panic!("Expected metadata to be decoded as structured value"); + } + } else { + panic!("Expected user to be decoded as structured value"); + } + } + + #[test] + fn test_decode_struct_unstructured_raw() { + // Test decoding with UnstructuredRaw setting + let settings = JsonStructureSettings::UnstructuredRaw; + + // With UnstructuredRaw, the entire JSON is encoded as a struct with _raw field + let encoded_struct = StructValue::new( + vec![Value::String( + r#"{"name":"Alice","age":25,"active":true}"#.into(), + )], + StructType::new(vec![StructField::new( + "_raw".to_string(), + ConcreteDataType::string_datatype(), + true, + )]), + ); + + let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); + + // With UnstructuredRaw, the entire struct should be reconstructed from _raw field + let decoded_fields: Vec<&str> = decoded_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + assert!(decoded_fields.contains(&"name")); + assert!(decoded_fields.contains(&"age")); + assert!(decoded_fields.contains(&"active")); + + // Verify the actual values + let name_index = decoded_fields.iter().position(|&f| f == "name").unwrap(); + let age_index = decoded_fields.iter().position(|&f| f == "age").unwrap(); + let active_index = decoded_fields.iter().position(|&f| f == "active").unwrap(); + + assert_eq!( + decoded_struct.items()[name_index], + Value::String("Alice".into()) + ); + assert_eq!(decoded_struct.items()[age_index], Value::Int64(25)); + assert_eq!(decoded_struct.items()[active_index], Value::Boolean(true)); + } + + #[test] + fn test_decode_struct_unstructured_raw_invalid_format() { + // Test UnstructuredRaw decoding when the struct doesn't have the expected _raw field format + let settings = JsonStructureSettings::UnstructuredRaw; + + // Create a struct that doesn't match the expected UnstructuredRaw format + let invalid_struct = StructValue::new( + vec![Value::String("Alice".into()), Value::Int64(25)], + StructType::new(vec![ + StructField::new( + "name".to_string(), + ConcreteDataType::string_datatype(), + true, + ), + StructField::new("age".to_string(), ConcreteDataType::int64_datatype(), true), + ]), + ); + + // Should fail with error since it doesn't match expected UnstructuredRaw format + let result = settings.decode_struct(invalid_struct); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("UnstructuredRaw value must be stored as struct with single _raw field") + ); + } + + #[test] + fn test_decode_struct_unstructured_raw_primitive_value() { + // Test UnstructuredRaw decoding when the _raw field contains a primitive value + let settings = JsonStructureSettings::UnstructuredRaw; + + // Test with a string primitive in _raw field + let string_struct = StructValue::new( + vec![Value::String("\"hello world\"".into())], + StructType::new(vec![StructField::new( + "_raw".to_string(), + ConcreteDataType::string_datatype(), + true, + )]), + ); + + let decoded_struct = settings.decode_struct(string_struct).unwrap(); + let decoded_fields: Vec<&str> = decoded_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + assert!(decoded_fields.contains(&"value")); + assert_eq!( + decoded_struct.items()[0], + Value::String("hello world".into()) + ); + + // Test with a number primitive in _raw field + let number_struct = StructValue::new( + vec![Value::String("42".into())], + StructType::new(vec![StructField::new( + "_raw".to_string(), + ConcreteDataType::string_datatype(), + true, + )]), + ); + + let decoded_struct = settings.decode_struct(number_struct).unwrap(); + let decoded_fields: Vec<&str> = decoded_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + assert!(decoded_fields.contains(&"value")); + assert_eq!(decoded_struct.items()[0], Value::Int64(42)); + + // Test with a boolean primitive in _raw field + let bool_struct = StructValue::new( + vec![Value::String("true".into())], + StructType::new(vec![StructField::new( + "_raw".to_string(), + ConcreteDataType::string_datatype(), + true, + )]), + ); + + let decoded_struct = settings.decode_struct(bool_struct).unwrap(); + let decoded_fields: Vec<&str> = decoded_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + assert!(decoded_fields.contains(&"value")); + assert_eq!(decoded_struct.items()[0], Value::Boolean(true)); + + // Test with a null primitive in _raw field + let null_struct = StructValue::new( + vec![Value::String("null".into())], + StructType::new(vec![StructField::new( + "_raw".to_string(), + ConcreteDataType::string_datatype(), + true, + )]), + ); + + let decoded_struct = settings.decode_struct(null_struct).unwrap(); + let decoded_fields: Vec<&str> = decoded_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + assert!(decoded_fields.contains(&"value")); + assert_eq!(decoded_struct.items()[0], Value::Null); + } + + #[test] + fn test_decode_struct_unstructured_raw_array() { + // Test UnstructuredRaw decoding when the _raw field contains a JSON array + let settings = JsonStructureSettings::UnstructuredRaw; + + // Test with an array in _raw field + let array_struct = StructValue::new( + vec![Value::String("[1, \"hello\", true, 3.15]".into())], + StructType::new(vec![StructField::new( + "_raw".to_string(), + ConcreteDataType::string_datatype(), + true, + )]), + ); + + let decoded_struct = settings.decode_struct(array_struct).unwrap(); + let decoded_fields: Vec<&str> = decoded_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + assert!(decoded_fields.contains(&"value")); + + if let Value::List(list_value) = &decoded_struct.items()[0] { + assert_eq!(list_value.items().len(), 4); + assert_eq!(list_value.items()[0], Value::Int64(1)); + assert_eq!(list_value.items()[1], Value::String("hello".into())); + assert_eq!(list_value.items()[2], Value::Boolean(true)); + assert_eq!(list_value.items()[3], Value::Float64(OrderedFloat(3.15))); + } else { + panic!("Expected array to be decoded as ListValue"); + } + } + + #[test] + fn test_decode_struct_comprehensive_flow() { + // Test the complete flow: encode JSON with partial unstructured settings, + // then decode the resulting StructValue back to fully structured form + let mut unstructured_keys = HashSet::new(); + unstructured_keys.insert("metadata".to_string()); + unstructured_keys.insert("user.profile.settings".to_string()); + + let settings = JsonStructureSettings::PartialUnstructuredByKey { + fields: None, + unstructured_keys, + }; + + // Original JSON with nested structure + let original_json = json!({ + "name": "Alice", + "age": 25, + "metadata": { + "tags": ["admin", "premium"], + "preferences": { + "theme": "dark", + "notifications": true + } + }, + "user": { + "profile": { + "name": "Alice Smith", + "settings": { + "language": "en", + "timezone": "UTC" + } + }, + "active": true + } + }); + + // Encode the JSON with partial unstructured settings + let encoded_value = settings + .encode(original_json) + .unwrap() + .into_json_inner() + .unwrap(); + + // Verify encoding worked - metadata and user.profile.settings should be unstructured + if let Value::Struct(encoded_struct) = encoded_value { + let fields: Vec<&str> = encoded_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + assert!(fields.contains(&"name")); + assert!(fields.contains(&"age")); + assert!(fields.contains(&"metadata")); + assert!(fields.contains(&"user")); + + // Check that metadata is stored as string (unstructured) + let metadata_index = fields.iter().position(|&f| f == "metadata").unwrap(); + if let Value::String(_) = encoded_struct.items()[metadata_index] { + // Good - metadata is unstructured + } else { + panic!("Expected metadata to be encoded as string (unstructured)"); + } + + // Check that user.profile.settings is unstructured + let user_index = fields.iter().position(|&f| f == "user").unwrap(); + if let Value::Struct(user_struct) = &encoded_struct.items()[user_index] { + let user_fields: Vec<&str> = user_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + let profile_index = user_fields.iter().position(|&f| f == "profile").unwrap(); + if let Value::Struct(profile_struct) = &user_struct.items()[profile_index] { + let profile_fields: Vec<&str> = profile_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + let settings_index = profile_fields + .iter() + .position(|&f| f == "settings") + .unwrap(); + if let Value::String(_) = &profile_struct.items()[settings_index] { + // Good - settings is unstructured + } else { + panic!( + "Expected user.profile.settings to be encoded as string (unstructured)" + ); + } + } else { + panic!("Expected user.profile to be a struct"); + } + } else { + panic!("Expected user to be a struct"); + } + + // Now decode the struct back to fully structured form + let decoded_struct = settings.decode_struct(encoded_struct).unwrap(); + + // Verify the decoded struct has proper structure + let decoded_fields: Vec<&str> = decoded_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + assert!(decoded_fields.contains(&"name")); + assert!(decoded_fields.contains(&"age")); + assert!(decoded_fields.contains(&"metadata")); + assert!(decoded_fields.contains(&"user")); + + // Check that metadata is now properly structured + let metadata_index = decoded_fields + .iter() + .position(|&f| f == "metadata") + .unwrap(); + if let Value::Struct(metadata_struct) = &decoded_struct.items()[metadata_index] { + let metadata_fields: Vec<&str> = metadata_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + assert!(metadata_fields.contains(&"tags")); + assert!(metadata_fields.contains(&"preferences")); + + // Check nested structure within metadata + let preferences_index = metadata_fields + .iter() + .position(|&f| f == "preferences") + .unwrap(); + if let Value::Struct(prefs_struct) = &metadata_struct.items()[preferences_index] { + let prefs_fields: Vec<&str> = prefs_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + assert!(prefs_fields.contains(&"theme")); + assert!(prefs_fields.contains(&"notifications")); + } else { + panic!("Expected metadata.preferences to be a struct"); + } + } else { + panic!("Expected metadata to be decoded as struct"); + } + + // Check that user.profile.settings is now properly structured + let user_index = decoded_fields.iter().position(|&f| f == "user").unwrap(); + if let Value::Struct(user_struct) = &decoded_struct.items()[user_index] { + let user_fields: Vec<&str> = user_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + let profile_index = user_fields.iter().position(|&f| f == "profile").unwrap(); + if let Value::Struct(profile_struct) = &user_struct.items()[profile_index] { + let profile_fields: Vec<&str> = profile_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + let settings_index = profile_fields + .iter() + .position(|&f| f == "settings") + .unwrap(); + if let Value::Struct(settings_struct) = &profile_struct.items()[settings_index] + { + let settings_fields: Vec<&str> = settings_struct + .struct_type() + .fields() + .iter() + .map(|f| f.name()) + .collect(); + + assert!(settings_fields.contains(&"language")); + assert!(settings_fields.contains(&"timezone")); + } else { + panic!("Expected user.profile.settings to be decoded as struct"); + } + } else { + panic!("Expected user.profile to be a struct"); + } + } else { + panic!("Expected user to be a struct"); + } + } else { + panic!("Expected encoded value to be a struct"); + } + } +} diff --git a/src/datatypes/src/lib.rs b/src/datatypes/src/lib.rs index 6ad27fc656ac..6b200803803b 100644 --- a/src/datatypes/src/lib.rs +++ b/src/datatypes/src/lib.rs @@ -19,6 +19,7 @@ pub mod data_type; pub mod duration; pub mod error; pub mod interval; +pub mod json; pub mod macros; pub mod prelude; pub mod scalars; diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index 8a79b3c02e2b..6bdf321137ee 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -368,8 +368,7 @@ impl TryFrom for Schema { type Error = Error; fn try_from(value: DFSchemaRef) -> Result { - let s: ArrowSchema = value.as_ref().into(); - s.try_into() + value.inner().clone().try_into() } } diff --git a/src/datatypes/src/types.rs b/src/datatypes/src/types.rs index 1f28233bb57d..1c7df86249e3 100644 --- a/src/datatypes/src/types.rs +++ b/src/datatypes/src/types.rs @@ -44,8 +44,8 @@ pub use interval_type::{ IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, }; pub use json_type::{ - JSON_TYPE_NAME, JsonType, json_type_value_to_serde_json, json_type_value_to_string, - parse_string_to_json_type_value, + JSON_TYPE_NAME, JsonFormat, JsonType, jsonb_to_serde_json, jsonb_to_string, + parse_string_to_jsonb, }; pub use list_type::ListType; pub use null_type::NullType; @@ -53,7 +53,7 @@ pub use primitive_type::{ Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, LogicalPrimitiveType, OrdPrimitive, UInt8Type, UInt16Type, UInt32Type, UInt64Type, WrapperType, }; -pub use string_type::StringType; +pub use string_type::{StringSizeType, StringType}; pub use struct_type::{StructField, StructType}; pub use time_type::{ TimeMicrosecondType, TimeMillisecondType, TimeNanosecondType, TimeSecondType, TimeType, diff --git a/src/datatypes/src/types/cast.rs b/src/datatypes/src/types/cast.rs index 2330814f8193..d7a27a45e10a 100644 --- a/src/datatypes/src/types/cast.rs +++ b/src/datatypes/src/types/cast.rs @@ -104,7 +104,7 @@ pub fn can_cast_type(src_value: &Value, dest_type: &ConcreteDataType) -> bool { (_, Boolean(_)) => src_type.is_numeric() || src_type.is_string(), (Boolean(_), _) => dest_type.is_numeric() || dest_type.is_string(), - // numeric types cast + // numeric and string types cast ( UInt8(_) | UInt16(_) | UInt32(_) | UInt64(_) | Int8(_) | Int16(_) | Int32(_) | Int64(_) | Float32(_) | Float64(_) | String(_), diff --git a/src/datatypes/src/types/json_type.rs b/src/datatypes/src/types/json_type.rs index f36372d6df36..01ec81dd0864 100644 --- a/src/datatypes/src/types/json_type.rs +++ b/src/datatypes/src/types/json_type.rs @@ -21,6 +21,7 @@ use snafu::ResultExt; use crate::data_type::DataType; use crate::error::{DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, Result}; +use crate::prelude::ConcreteDataType; use crate::scalars::ScalarVectorBuilder; use crate::type_id::LogicalTypeId; use crate::value::Value; @@ -28,19 +29,16 @@ use crate::vectors::{BinaryVectorBuilder, MutableVector}; pub const JSON_TYPE_NAME: &str = "Json"; -#[derive( - Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default, -)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)] pub enum JsonFormat { #[default] Jsonb, + Native(Box), } /// JsonType is a data type for JSON data. It is stored as binary data of jsonb format. /// It utilizes current binary value and vector implementation. -#[derive( - Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, -)] +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] pub struct JsonType { pub format: JsonFormat, } @@ -81,34 +79,26 @@ impl DataType for JsonType { } /// Converts a json type value to string -pub fn json_type_value_to_string(val: &[u8], format: &JsonFormat) -> Result { - match format { - JsonFormat::Jsonb => match jsonb::from_slice(val) { - Ok(jsonb_value) => { - let serialized = jsonb_value.to_string(); - Ok(serialized) - } - Err(e) => InvalidJsonbSnafu { error: e }.fail(), - }, +pub fn jsonb_to_string(val: &[u8]) -> Result { + match jsonb::from_slice(val) { + Ok(jsonb_value) => { + let serialized = jsonb_value.to_string(); + Ok(serialized) + } + Err(e) => InvalidJsonbSnafu { error: e }.fail(), } } /// Converts a json type value to serde_json::Value -pub fn json_type_value_to_serde_json(val: &[u8], format: &JsonFormat) -> Result { - match format { - JsonFormat::Jsonb => { - let json_string = json_type_value_to_string(val, format)?; - serde_json::Value::from_str(json_string.as_str()) - .context(DeserializeSnafu { json: json_string }) - } - } +pub fn jsonb_to_serde_json(val: &[u8]) -> Result { + let json_string = jsonb_to_string(val)?; + serde_json::Value::from_str(json_string.as_str()) + .context(DeserializeSnafu { json: json_string }) } /// Parses a string to a json type value -pub fn parse_string_to_json_type_value(s: &str, format: &JsonFormat) -> Result> { - match format { - JsonFormat::Jsonb => jsonb::parse_value(s.as_bytes()) - .map_err(|_| InvalidJsonSnafu { value: s }.build()) - .map(|json| json.to_vec()), - } +pub fn parse_string_to_jsonb(s: &str) -> Result> { + jsonb::parse_value(s.as_bytes()) + .map_err(|_| InvalidJsonSnafu { value: s }.build()) + .map(|json| json.to_vec()) } diff --git a/src/datatypes/src/types/string_type.rs b/src/datatypes/src/types/string_type.rs index fdca9637075e..61677ead4ad1 100644 --- a/src/datatypes/src/types/string_type.rs +++ b/src/datatypes/src/types/string_type.rs @@ -19,17 +19,97 @@ use common_base::bytes::StringBytes; use serde::{Deserialize, Serialize}; use crate::data_type::{DataType, DataTypeRef}; -use crate::prelude::ScalarVectorBuilder; use crate::type_id::LogicalTypeId; use crate::value::Value; use crate::vectors::{MutableVector, StringVectorBuilder}; -#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] -pub struct StringType; +/// String size variant to distinguish between UTF8 and LargeUTF8 +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default, +)] +pub enum StringSizeType { + /// Regular UTF8 strings (up to 2GB) + #[default] + Utf8, + /// Large UTF8 strings (up to 2^63 bytes) + LargeUtf8, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] +pub struct StringType { + #[serde(default)] + size_type: StringSizeType, +} + +/// Custom deserialization to support both old and new formats. +impl<'de> serde::Deserialize<'de> for StringType { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + #[derive(serde::Deserialize)] + struct Helper { + #[serde(default)] + size_type: StringSizeType, + } + + let opt = Option::::deserialize(deserializer)?; + Ok(match opt { + Some(helper) => Self { + size_type: helper.size_type, + }, + None => Self::default(), + }) + } +} + +impl Default for StringType { + fn default() -> Self { + Self { + size_type: StringSizeType::Utf8, + } + } +} impl StringType { + /// Create a new StringType with default (Utf8) size + pub fn new() -> Self { + Self { + size_type: StringSizeType::Utf8, + } + } + + /// Create a new StringType with specified size + pub fn with_size(size_type: StringSizeType) -> Self { + Self { size_type } + } + + /// Create a StringType for regular UTF8 strings + pub fn utf8() -> Self { + Self::with_size(StringSizeType::Utf8) + } + + /// Create a StringType for large UTF8 strings + pub fn large_utf8() -> Self { + Self::with_size(StringSizeType::LargeUtf8) + } + + /// Get the size type + pub fn size_type(&self) -> StringSizeType { + self.size_type + } + + /// Check if this is a large UTF8 string type + pub fn is_large(&self) -> bool { + matches!(self.size_type, StringSizeType::LargeUtf8) + } + pub fn arc() -> DataTypeRef { - Arc::new(Self) + Arc::new(Self::new()) + } + + pub fn large_arc() -> DataTypeRef { + Arc::new(Self::large_utf8()) } } @@ -47,11 +127,19 @@ impl DataType for StringType { } fn as_arrow_type(&self) -> ArrowDataType { - ArrowDataType::Utf8 + match self.size_type { + StringSizeType::Utf8 => ArrowDataType::Utf8, + StringSizeType::LargeUtf8 => ArrowDataType::LargeUtf8, + } } fn create_mutable_vector(&self, capacity: usize) -> Box { - Box::new(StringVectorBuilder::with_capacity(capacity)) + match self.size_type { + StringSizeType::Utf8 => Box::new(StringVectorBuilder::with_string_capacity(capacity)), + StringSizeType::LargeUtf8 => { + Box::new(StringVectorBuilder::with_large_capacity(capacity)) + } + } } fn try_cast(&self, from: Value) -> Option { @@ -89,6 +177,8 @@ impl DataType for StringType { Value::Duration(v) => Some(Value::String(StringBytes::from(v.to_string()))), Value::Decimal128(v) => Some(Value::String(StringBytes::from(v.to_string()))), + Value::Json(v) => self.try_cast(*v), + // StringBytes is only support for utf-8, Value::Binary and collections are not allowed. Value::Binary(_) | Value::List(_) | Value::Struct(_) => None, } diff --git a/src/datatypes/src/types/struct_type.rs b/src/datatypes/src/types/struct_type.rs index 4f7b758d55a6..e854783b3252 100644 --- a/src/datatypes/src/types/struct_type.rs +++ b/src/datatypes/src/types/struct_type.rs @@ -95,6 +95,10 @@ impl StructType { &self.fields } + pub fn take_fields(self) -> Vec { + self.fields + } + pub fn as_arrow_fields(&self) -> Fields { self.fields .iter() @@ -123,6 +127,10 @@ impl StructField { &self.name } + pub fn take_name(self) -> String { + self.name + } + pub fn data_type(&self) -> &ConcreteDataType { &self.data_type } diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs index d1eb6c09d3e0..63b508636153 100644 --- a/src/datatypes/src/value.rs +++ b/src/datatypes/src/value.rs @@ -81,8 +81,12 @@ pub enum Value { IntervalDayTime(IntervalDayTime), IntervalMonthDayNano(IntervalMonthDayNano), + // Collection types: List(ListValue), Struct(StructValue), + + // Json Logical types: + Json(Box), } impl Display for Value { @@ -144,6 +148,9 @@ impl Display for Value { .join(", "); write!(f, "{{ {items} }}") } + Value::Json(json_data) => { + write!(f, "Json({})", json_data) + } } } } @@ -190,6 +197,7 @@ macro_rules! define_data_type_func { $struct::Struct(struct_value) => { ConcreteDataType::struct_datatype(struct_value.struct_type().clone()) } + $struct::Json(v) => ConcreteDataType::json_native_datatype(v.data_type()), } } }; @@ -200,7 +208,11 @@ impl Value { /// Returns true if this is a null value. pub fn is_null(&self) -> bool { - matches!(self, Value::Null) + match self { + Value::Null => true, + Value::Json(inner) => inner.is_null(), + _ => false, + } } /// Cast itself to [ListValue]. @@ -208,6 +220,7 @@ impl Value { match self { Value::Null => Ok(None), Value::List(v) => Ok(Some(v)), + Value::Json(inner) => inner.as_list(), other => error::CastTypeSnafu { msg: format!("Failed to cast {other:?} to list value"), } @@ -219,6 +232,7 @@ impl Value { match self { Value::Null => Ok(None), Value::Struct(v) => Ok(Some(v)), + Value::Json(inner) => inner.as_struct(), other => error::CastTypeSnafu { msg: format!("Failed to cast {other:?} to struct value"), } @@ -253,6 +267,7 @@ impl Value { Value::Duration(v) => ValueRef::Duration(*v), Value::Decimal128(v) => ValueRef::Decimal128(*v), Value::Struct(v) => ValueRef::Struct(StructValueRef::Ref(v)), + Value::Json(v) => ValueRef::Json(Box::new(v.as_value_ref())), } } @@ -322,6 +337,7 @@ impl Value { Value::UInt8(v) => Some(*v as _), Value::UInt16(v) => Some(*v as _), Value::UInt32(v) => Some(*v as _), + Value::Json(inner) => inner.as_i64(), _ => None, } } @@ -333,6 +349,7 @@ impl Value { Value::UInt16(v) => Some(*v as _), Value::UInt32(v) => Some(*v as _), Value::UInt64(v) => Some(*v), + Value::Json(inner) => inner.as_u64(), _ => None, } } @@ -349,6 +366,7 @@ impl Value { Value::UInt16(v) => Some(*v as _), Value::UInt32(v) => Some(*v as _), Value::UInt64(v) => Some(*v as _), + Value::Json(inner) => inner.as_f64_lossy(), _ => None, } } @@ -365,6 +383,15 @@ impl Value { pub fn as_bool(&self) -> Option { match self { Value::Boolean(b) => Some(*b), + Value::Json(inner) => inner.as_bool(), + _ => None, + } + } + + /// Extract the inner JSON value from a JSON type. + pub fn into_json_inner(self) -> Option { + match self { + Value::Json(v) => Some(*v), _ => None, } } @@ -411,6 +438,7 @@ impl Value { }, Value::Decimal128(_) => LogicalTypeId::Decimal128, Value::Struct(_) => LogicalTypeId::Struct, + Value::Json(_) => LogicalTypeId::Json, } } @@ -420,11 +448,11 @@ impl Value { let value_type_id = self.logical_type_id(); let output_type_id = output_type.logical_type_id(); ensure!( - // Json type leverage Value(Binary) for storage. output_type_id == value_type_id || self.is_null() || (output_type_id == LogicalTypeId::Json - && value_type_id == LogicalTypeId::Binary), + && (value_type_id == LogicalTypeId::Binary + || value_type_id == LogicalTypeId::Json)), error::ToScalarValueSnafu { reason: format!( "expect value to return output_type {output_type_id:?}, actual: {value_type_id:?}", @@ -444,7 +472,13 @@ impl Value { Value::Int64(v) => ScalarValue::Int64(Some(*v)), Value::Float32(v) => ScalarValue::Float32(Some(v.0)), Value::Float64(v) => ScalarValue::Float64(Some(v.0)), - Value::String(v) => ScalarValue::Utf8(Some(v.as_utf8().to_string())), + Value::String(v) => { + let s = v.as_utf8().to_string(); + match output_type { + ConcreteDataType::String(t) if t.is_large() => ScalarValue::LargeUtf8(Some(s)), + _ => ScalarValue::Utf8(Some(s)), + } + } Value::Binary(v) => ScalarValue::Binary(Some(v.to_vec())), Value::Date(v) => ScalarValue::Date32(Some(v.val())), Value::Null => to_null_scalar_value(output_type)?, @@ -467,6 +501,7 @@ impl Value { let struct_type = output_type.as_struct().unwrap(); struct_value.try_to_scalar_value(struct_type)? } + Value::Json(v) => v.try_to_scalar_value(output_type)?, }; Ok(scalar_value) @@ -519,6 +554,8 @@ impl Value { Value::IntervalDayTime(x) => Some(Value::IntervalDayTime(x.negative())), Value::IntervalMonthDayNano(x) => Some(Value::IntervalMonthDayNano(x.negative())), + Value::Json(v) => v.try_negative().map(|neg| Value::Json(Box::new(neg))), + Value::Binary(_) | Value::String(_) | Value::Boolean(_) @@ -575,7 +612,13 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result { ScalarValue::Binary(None) } - ConcreteDataType::String(_) => ScalarValue::Utf8(None), + ConcreteDataType::String(t) => { + if t.is_large() { + ScalarValue::LargeUtf8(None) + } else { + ScalarValue::Utf8(None) + } + } ConcreteDataType::Date(_) => ScalarValue::Date32(None), ConcreteDataType::Timestamp(t) => timestamp_to_scalar_value(t.unit(), None), ConcreteDataType::Interval(v) => match v { @@ -866,21 +909,18 @@ impl TryFrom for serde_json::Value { Value::Duration(v) => serde_json::to_value(v.value())?, Value::Decimal128(v) => serde_json::to_value(v.to_string())?, Value::Struct(v) => { - let map = v - .fields - .clone() // TODO:(sunng87) remove in next patch when into_parts is merged - .fields() - .iter() - .zip(v.take_items().into_iter()) + let (items, struct_type) = v.into_parts(); + let map = struct_type + .take_fields() + .into_iter() + .zip(items.into_iter()) .map(|(field, value)| { - Ok(( - field.name().to_string(), - serde_json::Value::try_from(value)?, - )) + Ok((field.take_name(), serde_json::Value::try_from(value)?)) }) .collect::>>()?; serde_json::Value::Object(map) } + Value::Json(v) => serde_json::Value::try_from(*v)?, }; Ok(json_value) @@ -912,11 +952,23 @@ impl ListValue { self.items } + pub fn into_parts(self) -> (Vec, ConcreteDataType) { + (self.items, self.datatype) + } + pub fn datatype(&self) -> &ConcreteDataType { &self.datatype } - fn try_to_scalar_value(&self, output_type: &ListType) -> Result { + pub fn len(&self) -> usize { + self.items.len() + } + + pub fn is_empty(&self) -> bool { + self.items.is_empty() + } + + pub fn try_to_scalar_value(&self, output_type: &ListType) -> Result { let vs = self .items .iter() @@ -979,6 +1031,13 @@ impl StructValue { Ok(Self { items, fields }) } + /// Create a new struct value. + /// + /// Panics if the number of items does not match the number of fields. + pub fn new(items: Vec, fields: StructType) -> Self { + Self::try_new(items, fields).unwrap() + } + pub fn items(&self) -> &[Value] { &self.items } @@ -987,10 +1046,22 @@ impl StructValue { self.items } + pub fn into_parts(self) -> (Vec, StructType) { + (self.items, self.fields) + } + pub fn struct_type(&self) -> &StructType { &self.fields } + pub fn len(&self) -> usize { + self.items.len() + } + + pub fn is_empty(&self) -> bool { + self.items.is_empty() + } + fn estimated_size(&self) -> usize { self.items .iter() @@ -1131,7 +1202,9 @@ impl TryFrom for Value { .collect::>>()?; Value::Struct(StructValue::try_new(items, struct_type)?) } - ScalarValue::Decimal256(_, _, _) + ScalarValue::Decimal32(_, _, _) + | ScalarValue::Decimal64(_, _, _) + | ScalarValue::Decimal256(_, _, _) | ScalarValue::FixedSizeList(_) | ScalarValue::LargeList(_) | ScalarValue::Dictionary(_, _) @@ -1178,6 +1251,7 @@ impl From> for Value { ValueRef::List(v) => v.to_value(), ValueRef::Decimal128(v) => Value::Decimal128(v), ValueRef::Struct(v) => v.to_value(), + ValueRef::Json(v) => Value::Json(Box::new(Value::from(*v))), } } } @@ -1220,6 +1294,8 @@ pub enum ValueRef<'a> { // Compound types: List(ListValueRef<'a>), Struct(StructValueRef<'a>), + + Json(Box>), } macro_rules! impl_as_for_value_ref { @@ -1227,6 +1303,18 @@ macro_rules! impl_as_for_value_ref { match $value { ValueRef::Null => Ok(None), ValueRef::$Variant(v) => Ok(Some(v.clone())), + ValueRef::Json(v) => match v.as_ref() { + ValueRef::Null => Ok(None), + ValueRef::$Variant(v) => Ok(Some(v.clone())), + other => error::CastTypeSnafu { + msg: format!( + "Failed to cast value ref {:?} to {}", + other, + stringify!($Variant) + ), + } + .fail(), + }, other => error::CastTypeSnafu { msg: format!( "Failed to cast value ref {:?} to {}", @@ -1244,60 +1332,65 @@ impl<'a> ValueRef<'a> { /// Returns true if this is null. pub fn is_null(&self) -> bool { - matches!(self, ValueRef::Null) + match self { + ValueRef::Null => true, + ValueRef::Json(v) => v.is_null(), + _ => false, + } } /// Cast itself to binary slice. - pub fn as_binary(&self) -> Result> { + pub fn try_into_binary(&self) -> Result> { impl_as_for_value_ref!(self, Binary) } /// Cast itself to string slice. - pub fn as_string(&self) -> Result> { + pub fn try_into_string(&self) -> Result> { impl_as_for_value_ref!(self, String) } /// Cast itself to boolean. - pub fn as_boolean(&self) -> Result> { + pub fn try_into_boolean(&self) -> Result> { impl_as_for_value_ref!(self, Boolean) } - pub fn as_i8(&self) -> Result> { + pub fn try_into_i8(&self) -> Result> { impl_as_for_value_ref!(self, Int8) } - pub fn as_u8(&self) -> Result> { + pub fn try_into_u8(&self) -> Result> { impl_as_for_value_ref!(self, UInt8) } - pub fn as_i16(&self) -> Result> { + pub fn try_into_i16(&self) -> Result> { impl_as_for_value_ref!(self, Int16) } - pub fn as_u16(&self) -> Result> { + pub fn try_into_u16(&self) -> Result> { impl_as_for_value_ref!(self, UInt16) } - pub fn as_i32(&self) -> Result> { + pub fn try_into_i32(&self) -> Result> { impl_as_for_value_ref!(self, Int32) } - pub fn as_u32(&self) -> Result> { + pub fn try_into_u32(&self) -> Result> { impl_as_for_value_ref!(self, UInt32) } - pub fn as_i64(&self) -> Result> { + pub fn try_into_i64(&self) -> Result> { impl_as_for_value_ref!(self, Int64) } - pub fn as_u64(&self) -> Result> { + pub fn try_into_u64(&self) -> Result> { impl_as_for_value_ref!(self, UInt64) } - pub fn as_f32(&self) -> Result> { + pub fn try_into_f32(&self) -> Result> { match self { ValueRef::Null => Ok(None), ValueRef::Float32(f) => Ok(Some(f.0)), + ValueRef::Json(v) => v.try_into_f32(), other => error::CastTypeSnafu { msg: format!("Failed to cast value ref {:?} to ValueRef::Float32", other,), } @@ -1305,10 +1398,11 @@ impl<'a> ValueRef<'a> { } } - pub fn as_f64(&self) -> Result> { + pub fn try_into_f64(&self) -> Result> { match self { ValueRef::Null => Ok(None), ValueRef::Float64(f) => Ok(Some(f.0)), + ValueRef::Json(v) => v.try_into_f64(), other => error::CastTypeSnafu { msg: format!("Failed to cast value ref {:?} to ValueRef::Float64", other,), } @@ -1317,50 +1411,51 @@ impl<'a> ValueRef<'a> { } /// Cast itself to [Date]. - pub fn as_date(&self) -> Result> { + pub fn try_into_date(&self) -> Result> { impl_as_for_value_ref!(self, Date) } /// Cast itself to [Timestamp]. - pub fn as_timestamp(&self) -> Result> { + pub fn try_into_timestamp(&self) -> Result> { impl_as_for_value_ref!(self, Timestamp) } /// Cast itself to [Time]. - pub fn as_time(&self) -> Result> { + pub fn try_into_time(&self) -> Result> { impl_as_for_value_ref!(self, Time) } - pub fn as_duration(&self) -> Result> { + pub fn try_into_duration(&self) -> Result> { impl_as_for_value_ref!(self, Duration) } /// Cast itself to [IntervalYearMonth]. - pub fn as_interval_year_month(&self) -> Result> { + pub fn try_into_interval_year_month(&self) -> Result> { impl_as_for_value_ref!(self, IntervalYearMonth) } /// Cast itself to [IntervalDayTime]. - pub fn as_interval_day_time(&self) -> Result> { + pub fn try_into_interval_day_time(&self) -> Result> { impl_as_for_value_ref!(self, IntervalDayTime) } /// Cast itself to [IntervalMonthDayNano]. - pub fn as_interval_month_day_nano(&self) -> Result> { + pub fn try_into_interval_month_day_nano(&self) -> Result> { impl_as_for_value_ref!(self, IntervalMonthDayNano) } /// Cast itself to [ListValueRef]. - pub fn as_list(&self) -> Result>> { + pub fn try_into_list(&self) -> Result>> { impl_as_for_value_ref!(self, List) } - pub fn as_struct(&self) -> Result>> { + /// Cast itself to [StructValueRef]. + pub fn try_into_struct(&self) -> Result>> { impl_as_for_value_ref!(self, Struct) } /// Cast itself to [Decimal128]. - pub fn as_decimal128(&self) -> Result> { + pub fn try_into_decimal128(&self) -> Result> { impl_as_for_value_ref!(self, Decimal128) } } @@ -1621,6 +1716,7 @@ impl ValueRef<'_> { StructValueRef::Ref(val) => val.estimated_size(), StructValueRef::RefList { val, .. } => val.iter().map(|v| v.data_size()).sum(), }, + ValueRef::Json(v) => v.data_size(), } } } @@ -1649,6 +1745,11 @@ pub(crate) mod tests { ConcreteDataType::string_datatype(), true, ), + StructField::new( + "awards".to_string(), + ConcreteDataType::list_datatype(ConcreteDataType::boolean_datatype()), + true, + ), ]) } @@ -1660,6 +1761,7 @@ pub(crate) mod tests { Value::String("tom".into()), Value::UInt8(25), Value::String("94038".into()), + Value::List(build_list_value()), ]; StructValue::try_new(struct_items, struct_type).unwrap() } @@ -1671,11 +1773,16 @@ pub(crate) mod tests { ScalarValue::Utf8(Some("tom".into())).to_array().unwrap(), ScalarValue::UInt8(Some(25)).to_array().unwrap(), ScalarValue::Utf8(Some("94038".into())).to_array().unwrap(), + build_scalar_list_value().to_array().unwrap(), ]; let struct_arrow_array = StructArray::new(struct_type.as_arrow_fields(), arrays, None); ScalarValue::Struct(Arc::new(struct_arrow_array)) } + pub fn build_list_type() -> ConcreteDataType { + ConcreteDataType::list_datatype(ConcreteDataType::boolean_datatype()) + } + pub(crate) fn build_list_value() -> ListValue { let items = vec![Value::Boolean(true), Value::Boolean(false)]; ListValue::new(items, ConcreteDataType::boolean_datatype()) @@ -2151,6 +2258,23 @@ pub(crate) mod tests { &ConcreteDataType::struct_datatype(build_struct_type()), &Value::Struct(build_struct_value()), ); + + check_type_and_value( + &ConcreteDataType::json_native_datatype(ConcreteDataType::boolean_datatype()), + &Value::Json(Box::new(Value::Boolean(true))), + ); + + check_type_and_value( + &ConcreteDataType::json_native_datatype(build_list_type()), + &Value::Json(Box::new(Value::List(build_list_value()))), + ); + + check_type_and_value( + &ConcreteDataType::json_native_datatype(ConcreteDataType::struct_datatype( + build_struct_type(), + )), + &Value::Json(Box::new(Value::Struct(build_struct_value()))), + ); } #[test] @@ -2281,7 +2405,35 @@ pub(crate) mod tests { ) .unwrap(); assert_eq!( - serde_json::Value::try_from(Value::Struct(struct_value)).unwrap(), + serde_json::Value::try_from(Value::Struct(struct_value.clone())).unwrap(), + serde_json::json!({ + "num": 42, + "name": "tomcat", + "yes_or_no": true + }) + ); + + // string wrapped in json + assert_eq!( + serde_json::Value::try_from(Value::Json(Box::new(Value::String("hello".into())))) + .unwrap(), + serde_json::json!("hello") + ); + + // list wrapped in json + assert_eq!( + serde_json::Value::try_from(Value::Json(Box::new(Value::List(ListValue::new( + vec![Value::Int32(1), Value::Int32(2), Value::Int32(3),], + ConcreteDataType::int32_datatype() + ))))) + .unwrap(), + serde_json::json!([1, 2, 3]) + ); + + // struct wrapped in json + assert_eq!( + serde_json::Value::try_from(Value::Json(Box::new(Value::Struct(struct_value)))) + .unwrap(), serde_json::json!({ "num": 42, "name": "tomcat", @@ -2293,6 +2445,7 @@ pub(crate) mod tests { #[test] fn test_null_value() { assert!(Value::Null.is_null()); + assert!(Value::Json(Box::new(Value::Null)).is_null()); assert!(!Value::Boolean(true).is_null()); assert!(Value::Null < Value::Boolean(false)); assert!(Value::Boolean(true) > Value::Null); @@ -2371,6 +2524,13 @@ pub(crate) mod tests { ValueRef::Struct(StructValueRef::Ref(&struct_value)), Value::Struct(struct_value.clone()).as_value_ref() ); + + assert_eq!( + ValueRef::Json(Box::new(ValueRef::Struct(StructValueRef::Ref( + &struct_value + )))), + Value::Json(Box::new(Value::Struct(struct_value.clone()))).as_value_ref() + ); } #[test] @@ -2381,11 +2541,11 @@ pub(crate) mod tests { }; } - check_as_null!(as_binary); - check_as_null!(as_string); - check_as_null!(as_boolean); - check_as_null!(as_date); - check_as_null!(as_list); + check_as_null!(try_into_binary); + check_as_null!(try_into_string); + check_as_null!(try_into_boolean); + check_as_null!(try_into_list); + check_as_null!(try_into_struct); macro_rules! check_as_correct { ($data: expr, $Variant: ident, $method: ident) => { @@ -2393,27 +2553,29 @@ pub(crate) mod tests { }; } - check_as_correct!("hello", String, as_string); - check_as_correct!("hello".as_bytes(), Binary, as_binary); - check_as_correct!(true, Boolean, as_boolean); - check_as_correct!(Date::new(123), Date, as_date); - check_as_correct!(Time::new_second(12), Time, as_time); - check_as_correct!(Duration::new_second(12), Duration, as_duration); + check_as_correct!("hello", String, try_into_string); + check_as_correct!("hello".as_bytes(), Binary, try_into_binary); + check_as_correct!(true, Boolean, try_into_boolean); + check_as_correct!(Date::new(123), Date, try_into_date); + check_as_correct!(Time::new_second(12), Time, try_into_time); + check_as_correct!(Duration::new_second(12), Duration, try_into_duration); let list = build_list_value(); - check_as_correct!(ListValueRef::Ref { val: &list }, List, as_list); + check_as_correct!(ListValueRef::Ref { val: &list }, List, try_into_list); let struct_value = build_struct_value(); - check_as_correct!(StructValueRef::Ref(&struct_value), Struct, as_struct); + check_as_correct!(StructValueRef::Ref(&struct_value), Struct, try_into_struct); let wrong_value = ValueRef::Int32(12345); - assert!(wrong_value.as_binary().is_err()); - assert!(wrong_value.as_string().is_err()); - assert!(wrong_value.as_boolean().is_err()); - assert!(wrong_value.as_date().is_err()); - assert!(wrong_value.as_list().is_err()); - assert!(wrong_value.as_time().is_err()); - assert!(wrong_value.as_timestamp().is_err()); + assert!(wrong_value.try_into_binary().is_err()); + assert!(wrong_value.try_into_string().is_err()); + assert!(wrong_value.try_into_boolean().is_err()); + assert!(wrong_value.try_into_list().is_err()); + assert!(wrong_value.try_into_struct().is_err()); + assert!(wrong_value.try_into_date().is_err()); + assert!(wrong_value.try_into_time().is_err()); + assert!(wrong_value.try_into_timestamp().is_err()); + assert!(wrong_value.try_into_duration().is_err()); } #[test] @@ -2489,8 +2651,13 @@ pub(crate) mod tests { assert_eq!( Value::Struct(build_struct_value()).to_string(), - "{ id: 1, name: tom, age: 25, address: 94038 }" + "{ id: 1, name: tom, age: 25, address: 94038, awards: Boolean[true, false] }" ); + + assert_eq!( + Value::Json(Box::new(Value::Struct(build_struct_value()))).to_string(), + "Json({ id: 1, name: tom, age: 25, address: 94038, awards: Boolean[true, false] })" + ) } #[test] @@ -2977,7 +3144,14 @@ pub(crate) mod tests { check_value_ref_size_eq( &ValueRef::Struct(StructValueRef::Ref(&build_struct_value())), - 13, + 15, + ); + + check_value_ref_size_eq( + &ValueRef::Json(Box::new(ValueRef::Struct(StructValueRef::Ref( + &build_struct_value(), + )))), + 15, ); } diff --git a/src/datatypes/src/vectors/binary.rs b/src/datatypes/src/vectors/binary.rs index abda58e7faa0..11572eb5b0d9 100644 --- a/src/datatypes/src/vectors/binary.rs +++ b/src/datatypes/src/vectors/binary.rs @@ -242,7 +242,7 @@ impl MutableVector for BinaryVectorBuilder { } fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> { - match value.as_binary()? { + match value.try_into_binary()? { Some(v) => self.mutable_array.append_value(v), None => self.mutable_array.append_null(), } @@ -475,7 +475,7 @@ mod tests { .collect::>(); for i in 0..3 { assert_eq!( - json_vector.get_ref(i).as_binary().unwrap().unwrap(), + json_vector.get_ref(i).try_into_binary().unwrap().unwrap(), jsonbs.get(i).unwrap().as_slice() ); } @@ -486,7 +486,7 @@ mod tests { .unwrap(); for i in 0..3 { assert_eq!( - json_vector.get_ref(i).as_binary().unwrap().unwrap(), + json_vector.get_ref(i).try_into_binary().unwrap().unwrap(), jsonbs.get(i).unwrap().as_slice() ); } @@ -551,8 +551,8 @@ mod tests { assert_eq!(converted.len(), expected.len()); for i in 0..3 { assert_eq!( - converted.get_ref(i).as_binary().unwrap().unwrap(), - expected.get_ref(i).as_binary().unwrap().unwrap() + converted.get_ref(i).try_into_binary().unwrap().unwrap(), + expected.get_ref(i).try_into_binary().unwrap().unwrap() ); } } diff --git a/src/datatypes/src/vectors/boolean.rs b/src/datatypes/src/vectors/boolean.rs index 3bcc9118ed64..6013762133f8 100644 --- a/src/datatypes/src/vectors/boolean.rs +++ b/src/datatypes/src/vectors/boolean.rs @@ -180,7 +180,7 @@ impl MutableVector for BooleanVectorBuilder { } fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> { - match value.as_boolean()? { + match value.try_into_boolean()? { Some(v) => self.mutable_array.append_value(v), None => self.mutable_array.append_null(), } diff --git a/src/datatypes/src/vectors/decimal.rs b/src/datatypes/src/vectors/decimal.rs index 6ba3fb6267d9..9a1478dfffac 100644 --- a/src/datatypes/src/vectors/decimal.rs +++ b/src/datatypes/src/vectors/decimal.rs @@ -315,7 +315,7 @@ impl MutableVector for Decimal128VectorBuilder { } fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> { - let decimal_val = value.as_decimal128()?.map(|v| v.val()); + let decimal_val = value.try_into_decimal128()?.map(|v| v.val()); self.mutable_array.append_option(decimal_val); Ok(()) } diff --git a/src/datatypes/src/vectors/helper.rs b/src/datatypes/src/vectors/helper.rs index 035ebf4ab6dc..74363cc52992 100644 --- a/src/datatypes/src/vectors/helper.rs +++ b/src/datatypes/src/vectors/helper.rs @@ -245,7 +245,9 @@ impl Helper { length, ) } - ScalarValue::Decimal256(_, _, _) + ScalarValue::Decimal32(_, _, _) + | ScalarValue::Decimal64(_, _, _) + | ScalarValue::Decimal256(_, _, _) | ScalarValue::FixedSizeList(_) | ScalarValue::LargeList(_) | ScalarValue::Dictionary(_, _) @@ -291,7 +293,8 @@ impl Helper { ArrowDataType::Float32 => Arc::new(Float32Vector::try_from_arrow_array(array)?), ArrowDataType::Float64 => Arc::new(Float64Vector::try_from_arrow_array(array)?), ArrowDataType::Utf8 => Arc::new(StringVector::try_from_arrow_array(array)?), - ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => { + ArrowDataType::LargeUtf8 => Arc::new(StringVector::try_from_arrow_array(array)?), + ArrowDataType::Utf8View => { let array = arrow::compute::cast(array.as_ref(), &ArrowDataType::Utf8) .context(crate::error::ArrowComputeSnafu)?; Arc::new(StringVector::try_from_arrow_array(array)?) @@ -742,17 +745,17 @@ mod tests { #[test] fn test_large_string_array_into_vector() { let input_vec = vec!["a", "b"]; - let assertion_array = StringArray::from(input_vec.clone()); + let assertion_array = LargeStringArray::from(input_vec.clone()); let large_string_array: ArrayRef = Arc::new(LargeStringArray::from(input_vec)); let vector = Helper::try_into_vector(large_string_array).unwrap(); assert_eq!(2, vector.len()); assert_eq!(0, vector.null_count()); - let output_arrow_array: StringArray = vector + let output_arrow_array: LargeStringArray = vector .to_arrow_array() .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap() .clone(); assert_eq!(&assertion_array, &output_arrow_array); diff --git a/src/datatypes/src/vectors/list.rs b/src/datatypes/src/vectors/list.rs index d1a1fb14a212..feda1beeea6a 100644 --- a/src/datatypes/src/vectors/list.rs +++ b/src/datatypes/src/vectors/list.rs @@ -284,7 +284,7 @@ impl MutableVector for ListVectorBuilder { } fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> { - if let Some(list_ref) = value.as_list()? { + if let Some(list_ref) = value.try_into_list()? { match list_ref { ListValueRef::Indexed { vector, idx } => match vector.get(idx).as_list()? { Some(list_value) => self.push_list_value(list_value)?, diff --git a/src/datatypes/src/vectors/string.rs b/src/datatypes/src/vectors/string.rs index ba3023522f55..2f6cddb32845 100644 --- a/src/datatypes/src/vectors/string.rs +++ b/src/datatypes/src/vectors/string.rs @@ -18,7 +18,9 @@ use std::sync::Arc; use arrow::array::{Array, ArrayBuilder, ArrayIter, ArrayRef}; use snafu::ResultExt; -use crate::arrow_array::{MutableStringArray, StringArray}; +use crate::arrow_array::{ + LargeStringArray, MutableLargeStringArray, MutableStringArray, StringArray, +}; use crate::data_type::ConcreteDataType; use crate::error::{self, Result}; use crate::scalars::{ScalarVector, ScalarVectorBuilder}; @@ -26,69 +28,93 @@ use crate::serialize::Serializable; use crate::value::{Value, ValueRef}; use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef}; +/// Internal representation for string arrays +#[derive(Debug, PartialEq)] +enum StringArrayData { + String(StringArray), + LargeString(LargeStringArray), +} + /// Vector of strings. #[derive(Debug, PartialEq)] pub struct StringVector { - array: StringArray, + array: StringArrayData, } impl StringVector { pub(crate) fn as_arrow(&self) -> &dyn Array { - &self.array + match &self.array { + StringArrayData::String(array) => array, + StringArrayData::LargeString(array) => array, + } + } + + /// Create a StringVector from a regular StringArray + pub fn from_string_array(array: StringArray) -> Self { + Self { + array: StringArrayData::String(array), + } + } + + /// Create a StringVector from a LargeStringArray + pub fn from_large_string_array(array: LargeStringArray) -> Self { + Self { + array: StringArrayData::LargeString(array), + } + } + + pub fn from_slice>(slice: &[T]) -> Self { + Self::from_string_array(StringArray::from_iter( + slice.iter().map(|s| Some(s.as_ref())), + )) } } impl From for StringVector { fn from(array: StringArray) -> Self { - Self { array } + Self::from_string_array(array) + } +} + +impl From for StringVector { + fn from(array: LargeStringArray) -> Self { + Self::from_large_string_array(array) } } impl From>> for StringVector { fn from(data: Vec>) -> Self { - Self { - array: StringArray::from_iter(data), - } + Self::from_string_array(StringArray::from_iter(data)) } } impl From>> for StringVector { fn from(data: Vec>) -> Self { - Self { - array: StringArray::from_iter(data), - } + Self::from_string_array(StringArray::from_iter(data)) } } impl From<&[Option]> for StringVector { fn from(data: &[Option]) -> Self { - Self { - array: StringArray::from_iter(data), - } + Self::from_string_array(StringArray::from_iter(data)) } } impl From<&[Option<&str>]> for StringVector { fn from(data: &[Option<&str>]) -> Self { - Self { - array: StringArray::from_iter(data), - } + Self::from_string_array(StringArray::from_iter(data)) } } impl From> for StringVector { fn from(data: Vec) -> Self { - Self { - array: StringArray::from_iter(data.into_iter().map(Some)), - } + Self::from_string_array(StringArray::from_iter(data.into_iter().map(Some))) } } impl From> for StringVector { fn from(data: Vec<&str>) -> Self { - Self { - array: StringArray::from_iter(data.into_iter().map(Some)), - } + Self::from_string_array(StringArray::from_iter(data.into_iter().map(Some))) } } @@ -106,67 +132,177 @@ impl Vector for StringVector { } fn len(&self) -> usize { - self.array.len() + match &self.array { + StringArrayData::String(array) => array.len(), + StringArrayData::LargeString(array) => array.len(), + } } fn to_arrow_array(&self) -> ArrayRef { - Arc::new(self.array.clone()) + match &self.array { + StringArrayData::String(array) => Arc::new(array.clone()), + StringArrayData::LargeString(array) => Arc::new(array.clone()), + } } fn to_boxed_arrow_array(&self) -> Box { - Box::new(self.array.clone()) + match &self.array { + StringArrayData::String(array) => Box::new(array.clone()), + StringArrayData::LargeString(array) => Box::new(array.clone()), + } } fn validity(&self) -> Validity { - vectors::impl_validity_for_vector!(self.array) + match &self.array { + StringArrayData::String(array) => vectors::impl_validity_for_vector!(array), + StringArrayData::LargeString(array) => vectors::impl_validity_for_vector!(array), + } } fn memory_size(&self) -> usize { - self.array.get_buffer_memory_size() + match &self.array { + StringArrayData::String(array) => array.get_buffer_memory_size(), + StringArrayData::LargeString(array) => array.get_buffer_memory_size(), + } } fn null_count(&self) -> usize { - self.array.null_count() + match &self.array { + StringArrayData::String(array) => array.null_count(), + StringArrayData::LargeString(array) => array.null_count(), + } } fn is_null(&self, row: usize) -> bool { - self.array.is_null(row) + match &self.array { + StringArrayData::String(array) => array.is_null(row), + StringArrayData::LargeString(array) => array.is_null(row), + } } fn slice(&self, offset: usize, length: usize) -> VectorRef { - Arc::new(Self::from(self.array.slice(offset, length))) + match &self.array { + StringArrayData::String(array) => { + Arc::new(Self::from_string_array(array.slice(offset, length))) + } + StringArrayData::LargeString(array) => { + Arc::new(Self::from_large_string_array(array.slice(offset, length))) + } + } } fn get(&self, index: usize) -> Value { - vectors::impl_get_for_vector!(self.array, index) + match &self.array { + StringArrayData::String(array) => vectors::impl_get_for_vector!(array, index), + StringArrayData::LargeString(array) => vectors::impl_get_for_vector!(array, index), + } } fn get_ref(&self, index: usize) -> ValueRef<'_> { - vectors::impl_get_ref_for_vector!(self.array, index) + match &self.array { + StringArrayData::String(array) => vectors::impl_get_ref_for_vector!(array, index), + StringArrayData::LargeString(array) => vectors::impl_get_ref_for_vector!(array, index), + } + } +} + +pub enum StringIter<'a> { + String(ArrayIter<&'a StringArray>), + LargeString(ArrayIter<&'a LargeStringArray>), +} + +impl<'a> Iterator for StringIter<'a> { + type Item = Option<&'a str>; + + fn next(&mut self) -> Option { + match self { + StringIter::String(iter) => iter.next(), + StringIter::LargeString(iter) => iter.next(), + } } } impl ScalarVector for StringVector { type OwnedItem = String; type RefItem<'a> = &'a str; - type Iter<'a> = ArrayIter<&'a StringArray>; + type Iter<'a> = StringIter<'a>; type Builder = StringVectorBuilder; fn get_data(&self, idx: usize) -> Option> { - if self.array.is_valid(idx) { - Some(self.array.value(idx)) - } else { - None + match &self.array { + StringArrayData::String(array) => { + if array.is_valid(idx) { + Some(array.value(idx)) + } else { + None + } + } + StringArrayData::LargeString(array) => { + if array.is_valid(idx) { + Some(array.value(idx)) + } else { + None + } + } } } fn iter_data(&self) -> Self::Iter<'_> { - self.array.iter() + match &self.array { + StringArrayData::String(array) => StringIter::String(array.iter()), + StringArrayData::LargeString(array) => StringIter::LargeString(array.iter()), + } } } +/// Internal representation for mutable string arrays +enum MutableStringArrayData { + String(MutableStringArray), + LargeString(MutableLargeStringArray), +} + pub struct StringVectorBuilder { - pub mutable_array: MutableStringArray, + mutable_array: MutableStringArrayData, +} + +impl Default for StringVectorBuilder { + fn default() -> Self { + Self::new() + } +} + +impl StringVectorBuilder { + /// Create a builder for regular strings + pub fn new() -> Self { + Self { + mutable_array: MutableStringArrayData::String(MutableStringArray::new()), + } + } + + /// Create a builder for large strings + pub fn new_large() -> Self { + Self { + mutable_array: MutableStringArrayData::LargeString(MutableLargeStringArray::new()), + } + } + + /// Create a builder for regular strings with capacity + pub fn with_string_capacity(capacity: usize) -> Self { + Self { + mutable_array: MutableStringArrayData::String(MutableStringArray::with_capacity( + capacity, 0, + )), + } + } + + /// Create a builder for large strings with capacity + pub fn with_large_capacity(capacity: usize) -> Self { + Self { + mutable_array: MutableStringArrayData::LargeString( + MutableLargeStringArray::with_capacity(capacity, 0), + ), + } + } } impl MutableVector for StringVectorBuilder { @@ -175,7 +311,10 @@ impl MutableVector for StringVectorBuilder { } fn len(&self) -> usize { - self.mutable_array.len() + match &self.mutable_array { + MutableStringArrayData::String(array) => array.len(), + MutableStringArrayData::LargeString(array) => array.len(), + } } fn as_any(&self) -> &dyn Any { @@ -193,11 +332,16 @@ impl MutableVector for StringVectorBuilder { fn to_vector_cloned(&self) -> VectorRef { Arc::new(self.finish_cloned()) } - fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> { - match value.as_string()? { - Some(v) => self.mutable_array.append_value(v), - None => self.mutable_array.append_null(), + match value.try_into_string()? { + Some(v) => match &mut self.mutable_array { + MutableStringArrayData::String(array) => array.append_value(v), + MutableStringArrayData::LargeString(array) => array.append_value(v), + }, + None => match &mut self.mutable_array { + MutableStringArrayData::String(array) => array.append_null(), + MutableStringArrayData::LargeString(array) => array.append_null(), + }, } Ok(()) } @@ -207,7 +351,10 @@ impl MutableVector for StringVectorBuilder { } fn push_null(&mut self) { - self.mutable_array.append_null() + match &mut self.mutable_array { + MutableStringArrayData::String(array) => array.append_null(), + MutableStringArrayData::LargeString(array) => array.append_null(), + } } } @@ -216,26 +363,44 @@ impl ScalarVectorBuilder for StringVectorBuilder { fn with_capacity(capacity: usize) -> Self { Self { - mutable_array: MutableStringArray::with_capacity(capacity, 0), + mutable_array: MutableStringArrayData::String(MutableStringArray::with_capacity( + capacity, 0, + )), } } fn push(&mut self, value: Option<::RefItem<'_>>) { match value { - Some(v) => self.mutable_array.append_value(v), - None => self.mutable_array.append_null(), + Some(v) => match &mut self.mutable_array { + MutableStringArrayData::String(array) => array.append_value(v), + MutableStringArrayData::LargeString(array) => array.append_value(v), + }, + None => match &mut self.mutable_array { + MutableStringArrayData::String(array) => array.append_null(), + MutableStringArrayData::LargeString(array) => array.append_null(), + }, } } fn finish(&mut self) -> Self::VectorType { - StringVector { - array: self.mutable_array.finish(), + match &mut self.mutable_array { + MutableStringArrayData::String(array) => { + StringVector::from_string_array(array.finish()) + } + MutableStringArrayData::LargeString(array) => { + StringVector::from_large_string_array(array.finish()) + } } } fn finish_cloned(&self) -> Self::VectorType { - StringVector { - array: self.mutable_array.finish_cloned(), + match &self.mutable_array { + MutableStringArrayData::String(array) => { + StringVector::from_string_array(array.finish_cloned()) + } + MutableStringArrayData::LargeString(array) => { + StringVector::from_large_string_array(array.finish_cloned()) + } } } } @@ -249,7 +414,26 @@ impl Serializable for StringVector { } } -vectors::impl_try_from_arrow_array_for_vector!(StringArray, StringVector); +impl StringVector { + pub fn try_from_arrow_array( + array: impl AsRef, + ) -> crate::error::Result { + let array = array.as_ref(); + + if let Some(string_array) = array.as_any().downcast_ref::() { + Ok(StringVector::from_string_array(string_array.clone())) + } else if let Some(large_string_array) = array.as_any().downcast_ref::() { + Ok(StringVector::from_large_string_array( + large_string_array.clone(), + )) + } else { + Err(crate::error::UnsupportedArrowTypeSnafu { + arrow_type: array.data_type().clone(), + } + .build()) + } + } +} #[cfg(test)] mod tests { diff --git a/src/datatypes/src/vectors/struct_vector.rs b/src/datatypes/src/vectors/struct_vector.rs index b46d1353fb5a..a5af4350996f 100644 --- a/src/datatypes/src/vectors/struct_vector.rs +++ b/src/datatypes/src/vectors/struct_vector.rs @@ -63,6 +63,10 @@ impl StructVector { pub fn struct_type(&self) -> &StructType { &self.fields } + + pub fn take_array(self) -> StructArray { + self.array + } } impl Vector for StructVector { @@ -308,7 +312,7 @@ impl StructVectorBuilder { } } - fn push_struct_value(&mut self, struct_value: &StructValue) -> Result<()> { + pub fn push_struct_value(&mut self, struct_value: &StructValue) -> Result<()> { for (index, value) in struct_value.items().iter().enumerate() { self.value_builders[index].try_push_value_ref(&value.as_value_ref())?; } @@ -317,7 +321,7 @@ impl StructVectorBuilder { Ok(()) } - fn push_null_struct_value(&mut self) { + pub fn push_null_struct_value(&mut self) { for builder in &mut self.value_builders { builder.push_null(); } @@ -351,7 +355,7 @@ impl MutableVector for StructVectorBuilder { } fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> { - if let Some(struct_ref) = value.as_struct()? { + if let Some(struct_ref) = value.try_into_struct()? { match struct_ref { StructValueRef::Indexed { vector, idx } => match vector.get(idx).as_struct()? { Some(struct_value) => self.push_struct_value(struct_value)?, @@ -438,6 +442,8 @@ impl ScalarVectorBuilder for StructVectorBuilder { #[cfg(test)] mod tests { use super::*; + use crate::types::StructField; + use crate::value::ListValue; use crate::value::tests::*; #[test] @@ -478,9 +484,45 @@ mod tests { assert_eq!(items.next(), Some(&Value::String("tom".into()))); assert_eq!(items.next(), Some(&Value::UInt8(25))); assert_eq!(items.next(), Some(&Value::String("94038".into()))); + assert_eq!(items.next(), Some(&Value::List(build_list_value()))); assert_eq!(items.next(), None); } else { panic!("Expected a struct value"); } } + + #[test] + fn test_deep_nested_struct_list() { + // level 1: struct + let struct_type = ConcreteDataType::struct_datatype(build_struct_type()); + let struct_value = build_struct_value(); + // level 2: list + let list_type = ConcreteDataType::list_datatype(struct_type.clone()); + let list_value = ListValue::new( + vec![ + Value::Struct(struct_value.clone()), + Value::Struct(struct_value.clone()), + ], + struct_type.clone(), + ); + // level 3: struct + let root_type = StructType::new(vec![StructField::new( + "items".to_string(), + list_type, + false, + )]); + let root_value = StructValue::new(vec![Value::List(list_value)], root_type.clone()); + + let mut builder = StructVectorBuilder::with_type_and_capacity(root_type.clone(), 20); + builder.push(Some(StructValueRef::Ref(&root_value))); + + let vector = builder.finish(); + assert_eq!(vector.len(), 1); + assert_eq!(vector.null_count(), 0); + assert_eq!( + vector.data_type(), + ConcreteDataType::struct_datatype(root_type) + ); + assert_eq!(vector.get(0), Value::Struct(root_value)); + } } diff --git a/src/flow/Cargo.toml b/src/flow/Cargo.toml index 9e0204d91736..689a092b90bd 100644 --- a/src/flow/Cargo.toml +++ b/src/flow/Cargo.toml @@ -32,6 +32,7 @@ common-options.workspace = true common-query.workspace = true common-recordbatch.workspace = true common-runtime.workspace = true +common-stat.workspace = true common-telemetry.workspace = true common-time.workspace = true common-version.workspace = true @@ -48,6 +49,7 @@ enum_dispatch = "0.3" futures.workspace = true get-size2 = "0.1.2" greptime-proto.workspace = true +hostname.workspace = true http.workspace = true humantime-serde.workspace = true itertools.workspace = true diff --git a/src/flow/src/adapter.rs b/src/flow/src/adapter.rs index 8750e4f40837..9721d490405f 100644 --- a/src/flow/src/adapter.rs +++ b/src/flow/src/adapter.rs @@ -26,6 +26,7 @@ use common_error::ext::BoxedError; use common_meta::key::TableMetadataManagerRef; use common_options::memory::MemoryOptions; use common_runtime::JoinHandle; +use common_stat::get_total_cpu_cores; use common_telemetry::logging::{LoggingOptions, TracingOptions}; use common_telemetry::{debug, info, trace}; use datatypes::schema::ColumnSchema; @@ -92,7 +93,7 @@ pub struct FlowConfig { impl Default for FlowConfig { fn default() -> Self { Self { - num_workers: (common_config::utils::get_cpus() / 2).max(1), + num_workers: (get_total_cpu_cores() / 2).max(1), batching_mode: BatchingModeOptions::default(), } } @@ -141,7 +142,7 @@ impl Default for FlownodeOptions { impl Configurable for FlownodeOptions { fn validate_sanitize(&mut self) -> common_config::error::Result<()> { if self.flow.num_workers == 0 { - self.flow.num_workers = (common_config::utils::get_cpus() / 2).max(1); + self.flow.num_workers = (get_total_cpu_cores() / 2).max(1); } Ok(()) } diff --git a/src/flow/src/df_optimizer.rs b/src/flow/src/df_optimizer.rs index 5fa180d53c60..1d41d093464a 100644 --- a/src/flow/src/df_optimizer.rs +++ b/src/flow/src/df_optimizer.rs @@ -427,7 +427,7 @@ fn expand_tumble_analyzer( /// This is a placeholder for tumble_start and tumble_end function, so that datafusion can /// recognize them as scalar function -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub struct TumbleExpand { signature: Signature, name: String, diff --git a/src/flow/src/heartbeat.rs b/src/flow/src/heartbeat.rs index 62db52444f48..cc42668f5a00 100644 --- a/src/flow/src/heartbeat.rs +++ b/src/flow/src/heartbeat.rs @@ -185,6 +185,10 @@ impl HeartbeatTask { start_time_ms, cpus, memory_bytes, + hostname: hostname::get() + .unwrap_or_default() + .to_string_lossy() + .to_string(), }) } diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index bf6b6bd25d92..b90e4f5eb2f7 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -46,6 +46,7 @@ datafusion-expr.workspace = true datanode.workspace = true datatypes.workspace = true futures.workspace = true +hostname.workspace = true humantime.workspace = true humantime-serde.workspace = true lazy_static.workspace = true @@ -70,7 +71,6 @@ snafu.workspace = true sql.workspace = true sqlparser.workspace = true store-api.workspace = true -substrait.workspace = true table.workspace = true tokio.workspace = true tokio-util.workspace = true diff --git a/src/frontend/src/heartbeat.rs b/src/frontend/src/heartbeat.rs index 59514f7da48b..76fdc3305bdf 100644 --- a/src/frontend/src/heartbeat.rs +++ b/src/frontend/src/heartbeat.rs @@ -158,6 +158,10 @@ impl HeartbeatTask { start_time_ms, cpus, memory_bytes, + hostname: hostname::get() + .unwrap_or_default() + .to_string_lossy() + .to_string(), }) } diff --git a/src/frontend/src/limiter.rs b/src/frontend/src/limiter.rs index 8c728c42c7ce..e0e32e6b1b54 100644 --- a/src/frontend/src/limiter.rs +++ b/src/frontend/src/limiter.rs @@ -229,6 +229,12 @@ impl Limiter { .unwrap_or(0) }) .sum(), + ValueData::JsonValue(inner) => inner + .as_ref() + .value_data + .as_ref() + .map(Self::size_of_value_data) + .unwrap_or(0), } } } diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index 971118d8350f..bde6959b8912 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -34,6 +34,7 @@ roaring = "0.10" serde.workspace = true serde_json.workspace = true snafu.workspace = true +store-api.workspace = true tantivy = { version = "0.24", features = ["zstd-compression"] } tantivy-jieba = "0.16" tokio.workspace = true diff --git a/src/index/src/fulltext_index.rs b/src/index/src/fulltext_index.rs index 4cbbbdf477fd..8de28c049019 100644 --- a/src/index/src/fulltext_index.rs +++ b/src/index/src/fulltext_index.rs @@ -75,3 +75,12 @@ impl Config { Ok(Self::default()) } } + +impl Analyzer { + pub fn to_str(&self) -> &'static str { + match self { + Analyzer::English => "English", + Analyzer::Chinese => "Chinese", + } + } +} diff --git a/src/index/src/lib.rs b/src/index/src/lib.rs index 8c88a8d8007e..547f880bb45d 100644 --- a/src/index/src/lib.rs +++ b/src/index/src/lib.rs @@ -21,6 +21,7 @@ pub mod error; pub mod external_provider; pub mod fulltext_index; pub mod inverted_index; +pub mod target; pub type Bytes = Vec; pub type BytesRef<'a> = &'a [u8]; diff --git a/src/index/src/target.rs b/src/index/src/target.rs new file mode 100644 index 000000000000..cca0a2819232 --- /dev/null +++ b/src/index/src/target.rs @@ -0,0 +1,107 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::fmt::{self, Display}; + +use common_error::ext::ErrorExt; +use common_error::status_code::StatusCode; +use common_macro::stack_trace_debug; +use serde::{Deserialize, Serialize}; +use snafu::{Snafu, ensure}; +use store_api::storage::ColumnId; + +/// Describes an index target. Column ids are the only supported variant for now. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum IndexTarget { + ColumnId(ColumnId), +} + +impl Display for IndexTarget { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + IndexTarget::ColumnId(id) => write!(f, "{}", id), + } + } +} + +impl IndexTarget { + /// Parse a target key string back into an index target description. + pub fn decode(key: &str) -> Result { + validate_column_key(key)?; + let id = key + .parse::() + .map_err(|_| InvalidColumnIdSnafu { value: key }.build())?; + Ok(IndexTarget::ColumnId(id)) + } +} + +/// Errors that can occur when working with index target keys. +#[derive(Snafu, Clone, PartialEq, Eq)] +#[stack_trace_debug] +pub enum TargetKeyError { + #[snafu(display("target key cannot be empty"))] + Empty, + + #[snafu(display("target key must contain digits only: {key}"))] + InvalidCharacters { key: String }, + + #[snafu(display("failed to parse column id from '{value}'"))] + InvalidColumnId { value: String }, +} + +impl ErrorExt for TargetKeyError { + fn status_code(&self) -> StatusCode { + StatusCode::InvalidArguments + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +fn validate_column_key(key: &str) -> Result<(), TargetKeyError> { + ensure!(!key.is_empty(), EmptySnafu); + ensure!( + key.chars().all(|ch| ch.is_ascii_digit()), + InvalidCharactersSnafu { key } + ); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn encode_decode_column() { + let target = IndexTarget::ColumnId(42); + let key = format!("{}", target); + assert_eq!(key, "42"); + let decoded = IndexTarget::decode(&key).unwrap(); + assert_eq!(decoded, target); + } + + #[test] + fn decode_rejects_empty() { + let err = IndexTarget::decode("").unwrap_err(); + assert!(matches!(err, TargetKeyError::Empty)); + } + + #[test] + fn decode_rejects_invalid_digits() { + let err = IndexTarget::decode("1a2").unwrap_err(); + assert!(matches!(err, TargetKeyError::InvalidCharacters { .. })); + } +} diff --git a/src/meta-client/src/client.rs b/src/meta-client/src/client.rs index 9091156f5ca7..2a66c1570af5 100644 --- a/src/meta-client/src/client.rs +++ b/src/meta-client/src/client.rs @@ -353,6 +353,8 @@ impl ProcedureExecutor for MetaClient { } } +// TODO(zyy17): Allow deprecated fields for backward compatibility. Remove this when the deprecated fields are removed from the proto. +#[allow(deprecated)] #[async_trait::async_trait] impl ClusterInfo for MetaClient { type Error = Error; @@ -372,25 +374,61 @@ impl ClusterInfo for MetaClient { let (leader, followers) = cluster_client.get_metasrv_peers().await?; followers .into_iter() - .map(|node| NodeInfo { - peer: node.peer.unwrap_or_default(), - last_activity_ts, - status: NodeStatus::Metasrv(MetasrvStatus { is_leader: false }), - version: node.version, - git_commit: node.git_commit, - start_time_ms: node.start_time_ms, - cpus: node.cpus, - memory_bytes: node.memory_bytes, + .map(|node| { + if let Some(node_info) = node.info { + NodeInfo { + peer: node.peer.unwrap_or_default(), + last_activity_ts, + status: NodeStatus::Metasrv(MetasrvStatus { is_leader: false }), + version: node_info.version, + git_commit: node_info.git_commit, + start_time_ms: node_info.start_time_ms, + cpus: node_info.cpus, + memory_bytes: node_info.memory_bytes, + hostname: node_info.hostname, + } + } else { + // TODO(zyy17): It's for backward compatibility. Remove this when the deprecated fields are removed from the proto. + NodeInfo { + peer: node.peer.unwrap_or_default(), + last_activity_ts, + status: NodeStatus::Metasrv(MetasrvStatus { is_leader: false }), + version: node.version, + git_commit: node.git_commit, + start_time_ms: node.start_time_ms, + cpus: node.cpus, + memory_bytes: node.memory_bytes, + hostname: "".to_string(), + } + } }) - .chain(leader.into_iter().map(|node| NodeInfo { - peer: node.peer.unwrap_or_default(), - last_activity_ts, - status: NodeStatus::Metasrv(MetasrvStatus { is_leader: true }), - version: node.version, - git_commit: node.git_commit, - start_time_ms: node.start_time_ms, - cpus: node.cpus, - memory_bytes: node.memory_bytes, + .chain(leader.into_iter().map(|node| { + if let Some(node_info) = node.info { + NodeInfo { + peer: node.peer.unwrap_or_default(), + last_activity_ts, + status: NodeStatus::Metasrv(MetasrvStatus { is_leader: true }), + version: node_info.version, + git_commit: node_info.git_commit, + start_time_ms: node_info.start_time_ms, + cpus: node_info.cpus, + memory_bytes: node_info.memory_bytes, + hostname: node_info.hostname, + } + } else { + // TODO(zyy17): It's for backward compatibility. Remove this when the deprecated fields are removed from the proto. + NodeInfo { + peer: node.peer.unwrap_or_default(), + last_activity_ts, + status: NodeStatus::Metasrv(MetasrvStatus { is_leader: true }), + version: node.version, + git_commit: node.git_commit, + start_time_ms: node.start_time_ms, + cpus: node.cpus, + memory_bytes: node.memory_bytes, + hostname: "".to_string(), + } + } })) .collect::>() } else { diff --git a/src/meta-srv/Cargo.toml b/src/meta-srv/Cargo.toml index b521ddad6cff..90a4fdc17b80 100644 --- a/src/meta-srv/Cargo.toml +++ b/src/meta-srv/Cargo.toml @@ -53,6 +53,7 @@ either.workspace = true etcd-client.workspace = true futures.workspace = true h2 = "0.3" +hostname.workspace = true http-body-util = "0.1" humantime.workspace = true humantime-serde.workspace = true diff --git a/src/meta-srv/src/discovery/lease.rs b/src/meta-srv/src/discovery/lease.rs index 557437964e54..46b92c0f1ae6 100644 --- a/src/meta-srv/src/discovery/lease.rs +++ b/src/meta-srv/src/discovery/lease.rs @@ -245,6 +245,7 @@ mod tests { start_time_ms: current_time_millis() as u64, cpus: 0, memory_bytes: 0, + hostname: "test_hostname".to_string(), }; let key_prefix = NodeInfoKey::key_prefix_with_role(Role::Frontend); @@ -270,6 +271,7 @@ mod tests { start_time_ms: current_time_millis() as u64, cpus: 0, memory_bytes: 0, + hostname: "test_hostname".to_string(), }; in_memory @@ -307,6 +309,7 @@ mod tests { start_time_ms: last_activity_ts as u64, cpus: 0, memory_bytes: 0, + hostname: "test_hostname".to_string(), }; let key_prefix = NodeInfoKey::key_prefix_with_role(Role::Frontend); diff --git a/src/meta-srv/src/election/rds/mysql.rs b/src/meta-srv/src/election/rds/mysql.rs index 6737f40054ce..a0890969f86c 100644 --- a/src/meta-srv/src/election/rds/mysql.rs +++ b/src/meta-srv/src/election/rds/mysql.rs @@ -1163,6 +1163,7 @@ mod tests { start_time_ms: 0, cpus: 0, memory_bytes: 0, + hostname: "test_hostname".to_string(), }; mysql_election.register_candidate(&node_info).await.unwrap(); } diff --git a/src/meta-srv/src/election/rds/postgres.rs b/src/meta-srv/src/election/rds/postgres.rs index f577cc111ab0..14b2bbb40986 100644 --- a/src/meta-srv/src/election/rds/postgres.rs +++ b/src/meta-srv/src/election/rds/postgres.rs @@ -1002,6 +1002,7 @@ mod tests { start_time_ms: 0, cpus: 0, memory_bytes: 0, + hostname: "test_hostname".to_string(), }; pg_election.register_candidate(&node_info).await.unwrap(); } diff --git a/src/meta-srv/src/handler/collect_cluster_info_handler.rs b/src/meta-srv/src/handler/collect_cluster_info_handler.rs index 8ce24e86368a..f144f3edc5dc 100644 --- a/src/meta-srv/src/handler/collect_cluster_info_handler.rs +++ b/src/meta-srv/src/handler/collect_cluster_info_handler.rs @@ -54,6 +54,7 @@ impl HeartbeatHandler for CollectFrontendClusterInfoHandler { start_time_ms: info.start_time_ms, cpus: info.cpus, memory_bytes: info.memory_bytes, + hostname: info.hostname, }; put_into_memory_store(ctx, key, value).await?; @@ -89,6 +90,7 @@ impl HeartbeatHandler for CollectFlownodeClusterInfoHandler { start_time_ms: info.start_time_ms, cpus: info.cpus, memory_bytes: info.memory_bytes, + hostname: info.hostname, }; put_into_memory_store(ctx, key, value).await?; @@ -142,6 +144,7 @@ impl HeartbeatHandler for CollectDatanodeClusterInfoHandler { start_time_ms: info.start_time_ms, cpus: info.cpus, memory_bytes: info.memory_bytes, + hostname: info.hostname, }; put_into_memory_store(ctx, key, value).await?; diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs index 615e75d3178d..4c2c7fcf5339 100644 --- a/src/meta-srv/src/metasrv.rs +++ b/src/meta-srv/src/metasrv.rs @@ -375,11 +375,16 @@ pub struct MetasrvNodeInfo { // The node cpus #[serde(default)] pub cpus: u32, - #[serde(default)] // The node memory bytes + #[serde(default)] pub memory_bytes: u64, + // The node hostname + #[serde(default)] + pub hostname: String, } +// TODO(zyy17): Allow deprecated fields for backward compatibility. Remove this when the deprecated top-level fields are removed from the proto. +#[allow(deprecated)] impl From for api::v1::meta::MetasrvNodeInfo { fn from(node_info: MetasrvNodeInfo) -> Self { Self { @@ -387,11 +392,22 @@ impl From for api::v1::meta::MetasrvNodeInfo { addr: node_info.addr, ..Default::default() }), - version: node_info.version, - git_commit: node_info.git_commit, + // TODO(zyy17): The following top-level fields are deprecated. They are kept for backward compatibility and will be removed in a future version. + // New code should use the fields in `info.NodeInfo` instead. + version: node_info.version.clone(), + git_commit: node_info.git_commit.clone(), start_time_ms: node_info.start_time_ms, cpus: node_info.cpus, memory_bytes: node_info.memory_bytes, + // The canonical location for node information. + info: Some(api::v1::meta::NodeInfo { + version: node_info.version, + git_commit: node_info.git_commit, + start_time_ms: node_info.start_time_ms, + cpus: node_info.cpus, + memory_bytes: node_info.memory_bytes, + hostname: node_info.hostname, + }), } } } @@ -696,6 +712,10 @@ impl Metasrv { start_time_ms: self.start_time_ms(), cpus: self.resource_spec().cpus as u32, memory_bytes: self.resource_spec().memory.unwrap_or_default().as_bytes(), + hostname: hostname::get() + .unwrap_or_default() + .to_string_lossy() + .to_string(), } } diff --git a/src/meta-srv/src/procedure/utils.rs b/src/meta-srv/src/procedure/utils.rs index 02488e0f83bd..8f0aa9b7cba4 100644 --- a/src/meta-srv/src/procedure/utils.rs +++ b/src/meta-srv/src/procedure/utils.rs @@ -195,6 +195,7 @@ pub mod test_data { region_numbers: vec![1, 2, 3], options: TableOptions::default(), created_on: DateTime::default(), + updated_on: DateTime::default(), partition_key_indices: vec![], column_ids: vec![], }, diff --git a/src/meta-srv/src/service/cluster.rs b/src/meta-srv/src/service/cluster.rs index 392268ff6d35..e39337c37466 100644 --- a/src/meta-srv/src/service/cluster.rs +++ b/src/meta-srv/src/service/cluster.rs @@ -99,6 +99,10 @@ impl Metasrv { start_time_ms: self.start_time_ms(), cpus: self.resource_spec().cpus as u32, memory_bytes: self.resource_spec().memory.unwrap_or_default().as_bytes(), + hostname: hostname::get() + .unwrap_or_default() + .to_string_lossy() + .to_string(), } .into() } diff --git a/src/metric-engine/src/engine/flush.rs b/src/metric-engine/src/engine/flush.rs index acbced8f70f8..23899cbb0586 100644 --- a/src/metric-engine/src/engine/flush.rs +++ b/src/metric-engine/src/engine/flush.rs @@ -127,14 +127,13 @@ mod tests { assert_eq!( debug_format, r#" -ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "", level: 0, file_path: "test_metric_region/11_0000000001/data/.parquet", file_size: 3157, index_file_path: Some("test_metric_region/11_0000000001/data/index/.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true } -ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "", level: 0, file_path: "test_metric_region/11_0000000002/data/.parquet", file_size: 3157, index_file_path: Some("test_metric_region/11_0000000002/data/index/.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true } -ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "", level: 0, file_path: "test_metric_region/11_0000000001/metadata/.parquet", file_size: 3429, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true } -ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "", level: 0, file_path: "test_metric_region/11_0000000002/metadata/.parquet", file_size: 3413, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true } -ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "", level: 0, file_path: "test_metric_region/22_0000000042/data/.parquet", file_size: 3157, index_file_path: Some("test_metric_region/22_0000000042/data/index/.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true } -ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "", level: 0, file_path: "test_metric_region/22_0000000042/metadata/.parquet", file_size: 3413, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"# +ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "", level: 0, file_path: "test_metric_region/11_0000000001/data/.parquet", file_size: 3173, index_file_path: Some("test_metric_region/11_0000000001/data/index/.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true } +ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "", level: 0, file_path: "test_metric_region/11_0000000002/data/.parquet", file_size: 3173, index_file_path: Some("test_metric_region/11_0000000002/data/index/.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true } +ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "", level: 0, file_path: "test_metric_region/11_0000000001/metadata/.parquet", file_size: 3505, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true } +ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "", level: 0, file_path: "test_metric_region/11_0000000002/metadata/.parquet", file_size: 3489, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true } +ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "", level: 0, file_path: "test_metric_region/22_0000000042/data/.parquet", file_size: 3173, index_file_path: Some("test_metric_region/22_0000000042/data/index/.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true } +ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "", level: 0, file_path: "test_metric_region/22_0000000042/metadata/.parquet", file_size: 3489, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"# ); - // list from storage let storage_entries = mito .all_ssts_from_storage() diff --git a/src/metric-engine/src/metadata_region.rs b/src/metric-engine/src/metadata_region.rs index d2d53306ba2c..cbd8e83bd3ca 100644 --- a/src/metric-engine/src/metadata_region.rs +++ b/src/metric-engine/src/metadata_region.rs @@ -324,7 +324,7 @@ fn decode_record_batch_to_key_and_value(batch: RecordBatch) -> Vec<(String, Stri .flat_map(move |row_index| { let key = key_col .get_ref(row_index) - .as_string() + .try_into_string() .unwrap() .map(|s| s.to_string()); @@ -333,7 +333,7 @@ fn decode_record_batch_to_key_and_value(batch: RecordBatch) -> Vec<(String, Stri k, val_col .get_ref(row_index) - .as_string() + .try_into_string() .unwrap() .map(|s| s.to_string()) .unwrap_or_default(), @@ -351,7 +351,7 @@ fn decode_record_batch_to_key(batch: RecordBatch) -> Vec { .flat_map(move |row_index| { key_col .get_ref(row_index) - .as_string() + .try_into_string() .unwrap() .map(|s| s.to_string()) }) @@ -614,7 +614,7 @@ impl MetadataRegion { let val = first_batch .column(0) .get_ref(0) - .as_string() + .try_into_string() .unwrap() .map(|s| s.to_string()); @@ -699,10 +699,20 @@ mod test { semantic_type, column_id: 5, }; - let expected = "{\"column_schema\":{\"name\":\"blabla\",\"data_type\":{\"String\":null},\"is_nullable\":false,\"is_time_index\":false,\"default_constraint\":null,\"metadata\":{}},\"semantic_type\":\"Tag\",\"column_id\":5}".to_string(); + let old_fmt = "{\"column_schema\":{\"name\":\"blabla\",\"data_type\":{\"String\":null},\"is_nullable\":false,\"is_time_index\":false,\"default_constraint\":null,\"metadata\":{}},\"semantic_type\":\"Tag\",\"column_id\":5}".to_string(); + let new_fmt = "{\"column_schema\":{\"name\":\"blabla\",\"data_type\":{\"String\":{\"size_type\":\"Utf8\"}},\"is_nullable\":false,\"is_time_index\":false,\"default_constraint\":null,\"metadata\":{}},\"semantic_type\":\"Tag\",\"column_id\":5}".to_string(); assert_eq!( MetadataRegion::serialize_column_metadata(&column_metadata), - expected + new_fmt + ); + // Ensure both old and new formats can be deserialized. + assert_eq!( + MetadataRegion::deserialize_column_metadata(&old_fmt).unwrap(), + column_metadata + ); + assert_eq!( + MetadataRegion::deserialize_column_metadata(&new_fmt).unwrap(), + column_metadata ); let semantic_type = "\"Invalid Column Metadata\""; diff --git a/src/mito-codec/src/index.rs b/src/mito-codec/src/index.rs index e95f1a789f2a..d98a6d3a517b 100644 --- a/src/mito-codec/src/index.rs +++ b/src/mito-codec/src/index.rs @@ -17,7 +17,6 @@ use std::collections::HashMap; use std::sync::Arc; -use datatypes::data_type::ConcreteDataType; use datatypes::value::ValueRef; use memcomparable::Serializer; use snafu::{OptionExt, ResultExt, ensure}; @@ -49,9 +48,9 @@ impl IndexValueCodec { ) -> Result<()> { ensure!(!value.is_null(), IndexEncodeNullSnafu); - if matches!(field.data_type(), ConcreteDataType::String(_)) { + if field.data_type().is_string() { let value = value - .as_string() + .try_into_string() .context(FieldTypeMismatchSnafu)? .context(IndexEncodeNullSnafu)?; buffer.extend_from_slice(value.as_bytes()); diff --git a/src/mito-codec/src/key_values.rs b/src/mito-codec/src/key_values.rs index ffe27d522cf7..8f594d6ff82a 100644 --- a/src/mito-codec/src/key_values.rs +++ b/src/mito-codec/src/key_values.rs @@ -182,19 +182,19 @@ impl KeyValue<'_> { let Some(primary_key) = self.primary_keys().next() else { return 0; }; - let key = primary_key.as_binary().unwrap().unwrap(); + let key = primary_key.try_into_binary().unwrap().unwrap(); let mut deserializer = Deserializer::new(key); deserializer.advance(COLUMN_ID_ENCODE_SIZE); let field = SortField::new(ConcreteDataType::uint32_datatype()); let table_id = field.deserialize(&mut deserializer).unwrap(); - table_id.as_value_ref().as_u32().unwrap().unwrap() + table_id.as_value_ref().try_into_u32().unwrap().unwrap() } else { let Some(value) = self.primary_keys().next() else { return 0; }; - value.as_u32().unwrap().unwrap() + value.try_into_u32().unwrap().unwrap() } } diff --git a/src/mito-codec/src/row_converter/dense.rs b/src/mito-codec/src/row_converter/dense.rs index cd4d74666e7f..76c2d65d5a28 100644 --- a/src/mito-codec/src/row_converter/dense.rs +++ b/src/mito-codec/src/row_converter/dense.rs @@ -124,7 +124,7 @@ impl SortField { ConcreteDataType::$ty(_) => { paste!{ value - .[]() + .[]() .context(FieldTypeMismatchSnafu)? .serialize($serializer) .context(SerializeFieldSnafu)?; @@ -132,26 +132,26 @@ impl SortField { } )* ConcreteDataType::Timestamp(_) => { - let timestamp = value.as_timestamp().context(FieldTypeMismatchSnafu)?; + let timestamp = value.try_into_timestamp().context(FieldTypeMismatchSnafu)?; timestamp .map(|t|t.value()) .serialize($serializer) .context(SerializeFieldSnafu)?; } ConcreteDataType::Interval(IntervalType::YearMonth(_)) => { - let interval = value.as_interval_year_month().context(FieldTypeMismatchSnafu)?; + let interval = value.try_into_interval_year_month().context(FieldTypeMismatchSnafu)?; interval.map(|i| i.to_i32()) .serialize($serializer) .context(SerializeFieldSnafu)?; } ConcreteDataType::Interval(IntervalType::DayTime(_)) => { - let interval = value.as_interval_day_time().context(FieldTypeMismatchSnafu)?; + let interval = value.try_into_interval_day_time().context(FieldTypeMismatchSnafu)?; interval.map(|i| i.to_i64()) .serialize($serializer) .context(SerializeFieldSnafu)?; } ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => { - let interval = value.as_interval_month_day_nano().context(FieldTypeMismatchSnafu)?; + let interval = value.try_into_interval_month_day_nano().context(FieldTypeMismatchSnafu)?; interval.map(|i| i.to_i128()) .serialize($serializer) .context(SerializeFieldSnafu)?; diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index 380913e25f07..4cc1efb8bc1e 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -16,6 +16,7 @@ workspace = true api.workspace = true aquamarine.workspace = true async-channel = "1.9" +common-stat.workspace = true async-stream.workspace = true async-trait.workspace = true bytemuck.workspace = true @@ -49,6 +50,7 @@ futures.workspace = true humantime-serde.workspace = true index.workspace = true itertools.workspace = true +greptime-proto.workspace = true lazy_static = "1.4" log-store = { workspace = true } mito-codec.workspace = true diff --git a/src/mito2/src/cache.rs b/src/mito2/src/cache.rs index 9b313f1fa8b3..b371e39b78d2 100644 --- a/src/mito2/src/cache.rs +++ b/src/mito2/src/cache.rs @@ -170,6 +170,19 @@ impl CacheStrategy { } } + /// Calls [CacheManager::evict_puffin_cache()]. + pub async fn evict_puffin_cache(&self, file_id: RegionFileId) { + match self { + CacheStrategy::EnableAll(cache_manager) => { + cache_manager.evict_puffin_cache(file_id).await + } + CacheStrategy::Compaction(cache_manager) => { + cache_manager.evict_puffin_cache(file_id).await + } + CacheStrategy::Disabled => {} + } + } + /// Calls [CacheManager::get_selector_result()]. /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled]. pub fn get_selector_result( @@ -374,6 +387,35 @@ impl CacheManager { } } + /// Evicts every puffin-related cache entry for the given file. + pub async fn evict_puffin_cache(&self, file_id: RegionFileId) { + if let Some(cache) = &self.bloom_filter_index_cache { + cache.invalidate_file(file_id.file_id()); + } + + if let Some(cache) = &self.inverted_index_cache { + cache.invalidate_file(file_id.file_id()); + } + + if let Some(cache) = &self.index_result_cache { + cache.invalidate_file(file_id.file_id()); + } + + if let Some(cache) = &self.puffin_metadata_cache { + cache.remove(&file_id.to_string()); + } + + if let Some(write_cache) = &self.write_cache { + write_cache + .remove(IndexKey::new( + file_id.region_id(), + file_id.file_id(), + FileType::Puffin, + )) + .await; + } + } + /// Gets result of for the selector. pub fn get_selector_result( &self, @@ -760,10 +802,16 @@ type SelectorResultCache = Cache>; mod tests { use std::sync::Arc; + use api::v1::index::{BloomFilterMeta, InvertedIndexMetas}; use datatypes::vectors::Int64Vector; + use puffin::file_metadata::FileMetadata; + use store_api::storage::ColumnId; use super::*; + use crate::cache::index::bloom_filter_index::Tag; + use crate::cache::index::result_cache::PredicateKey; use crate::cache::test_util::parquet_meta; + use crate::sst::parquet::row_selection::RowGroupSelection; #[tokio::test] async fn test_disable_cache() { @@ -852,4 +900,106 @@ mod tests { cache.put_selector_result(key, result); assert!(cache.get_selector_result(&key).is_some()); } + + #[tokio::test] + async fn test_evict_puffin_cache_clears_all_entries() { + use std::collections::{BTreeMap, HashMap}; + + let cache = CacheManager::builder() + .index_metadata_size(128) + .index_content_size(128) + .index_content_page_size(64) + .index_result_cache_size(128) + .puffin_metadata_size(128) + .build(); + let cache = Arc::new(cache); + + let region_id = RegionId::new(1, 1); + let region_file_id = RegionFileId::new(region_id, FileId::random()); + let column_id: ColumnId = 1; + + let bloom_cache = cache.bloom_filter_index_cache().unwrap().clone(); + let inverted_cache = cache.inverted_index_cache().unwrap().clone(); + let result_cache = cache.index_result_cache().unwrap(); + let puffin_metadata_cache = cache.puffin_metadata_cache().unwrap().clone(); + + let bloom_key = (region_file_id.file_id(), column_id, Tag::Skipping); + bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default())); + inverted_cache.put_metadata( + region_file_id.file_id(), + Arc::new(InvertedIndexMetas::default()), + ); + let predicate = PredicateKey::new_bloom(Arc::new(BTreeMap::new())); + let selection = Arc::new(RowGroupSelection::default()); + result_cache.put(predicate.clone(), region_file_id.file_id(), selection); + let file_id_str = region_file_id.to_string(); + let metadata = Arc::new(FileMetadata { + blobs: Vec::new(), + properties: HashMap::new(), + }); + puffin_metadata_cache.put_metadata(file_id_str.clone(), metadata); + + assert!(bloom_cache.get_metadata(bloom_key).is_some()); + assert!( + inverted_cache + .get_metadata(region_file_id.file_id()) + .is_some() + ); + assert!( + result_cache + .get(&predicate, region_file_id.file_id()) + .is_some() + ); + assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_some()); + + cache.evict_puffin_cache(region_file_id).await; + + assert!(bloom_cache.get_metadata(bloom_key).is_none()); + assert!( + inverted_cache + .get_metadata(region_file_id.file_id()) + .is_none() + ); + assert!( + result_cache + .get(&predicate, region_file_id.file_id()) + .is_none() + ); + assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none()); + + // Refill caches and evict via CacheStrategy to ensure delegation works. + bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default())); + inverted_cache.put_metadata( + region_file_id.file_id(), + Arc::new(InvertedIndexMetas::default()), + ); + result_cache.put( + predicate.clone(), + region_file_id.file_id(), + Arc::new(RowGroupSelection::default()), + ); + puffin_metadata_cache.put_metadata( + file_id_str.clone(), + Arc::new(FileMetadata { + blobs: Vec::new(), + properties: HashMap::new(), + }), + ); + + let strategy = CacheStrategy::EnableAll(cache.clone()); + strategy.evict_puffin_cache(region_file_id).await; + + assert!(bloom_cache.get_metadata(bloom_key).is_none()); + assert!( + inverted_cache + .get_metadata(region_file_id.file_id()) + .is_none() + ); + assert!( + result_cache + .get(&predicate, region_file_id.file_id()) + .is_none() + ); + assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none()); + } } diff --git a/src/mito2/src/cache/index.rs b/src/mito2/src/cache/index.rs index 0a803d7c8b2f..cf2477299436 100644 --- a/src/mito2/src/cache/index.rs +++ b/src/mito2/src/cache/index.rs @@ -112,6 +112,7 @@ where .with_label_values(&[INDEX_METADATA_TYPE]) .sub(size.into()); }) + .support_invalidation_closures() .build(); let index_cache = moka::sync::CacheBuilder::new(index_content_cap) .name(&format!("index_content_{}", index_type)) @@ -122,6 +123,7 @@ where .with_label_values(&[INDEX_CONTENT_TYPE]) .sub(size.into()); }) + .support_invalidation_closures() .build(); Self { index_metadata, @@ -219,6 +221,23 @@ where .add((self.weight_of_content)(&(key, page_key), &value).into()); self.index.insert((key, page_key), value); } + + /// Invalidates all cache entries whose keys satisfy `predicate`. + pub fn invalidate_if(&self, predicate: F) + where + F: Fn(&K) -> bool + Send + Sync + 'static, + { + let predicate = Arc::new(predicate); + let metadata_predicate = Arc::clone(&predicate); + + self.index_metadata + .invalidate_entries_if(move |key, _| metadata_predicate(key)) + .expect("cache should support invalidation closures"); + + self.index + .invalidate_entries_if(move |(key, _), _| predicate(key)) + .expect("cache should support invalidation closures"); + } } /// Prunes the size of the last page based on the indexes. diff --git a/src/mito2/src/cache/index/bloom_filter_index.rs b/src/mito2/src/cache/index/bloom_filter_index.rs index f4b9477b8369..9e8d864d7dfc 100644 --- a/src/mito2/src/cache/index/bloom_filter_index.rs +++ b/src/mito2/src/cache/index/bloom_filter_index.rs @@ -50,6 +50,11 @@ impl BloomFilterIndexCache { bloom_filter_index_content_weight, ) } + + /// Removes all cached entries for the given `file_id`. + pub fn invalidate_file(&self, file_id: FileId) { + self.invalidate_if(move |key| key.0 == file_id); + } } /// Calculates weight for bloom filter index metadata. diff --git a/src/mito2/src/cache/index/inverted_index.rs b/src/mito2/src/cache/index/inverted_index.rs index 5caf998a12b5..06a7a3f6d430 100644 --- a/src/mito2/src/cache/index/inverted_index.rs +++ b/src/mito2/src/cache/index/inverted_index.rs @@ -44,6 +44,11 @@ impl InvertedIndexCache { inverted_index_content_weight, ) } + + /// Removes all cached entries for the given `file_id`. + pub fn invalidate_file(&self, file_id: FileId) { + self.invalidate_if(move |key| *key == file_id); + } } /// Calculates weight for inverted index metadata. diff --git a/src/mito2/src/cache/index/result_cache.rs b/src/mito2/src/cache/index/result_cache.rs index 5ae8425cc82c..1b14c9d981c0 100644 --- a/src/mito2/src/cache/index/result_cache.rs +++ b/src/mito2/src/cache/index/result_cache.rs @@ -63,6 +63,7 @@ impl IndexResultCache { .with_label_values(&[INDEX_RESULT_TYPE, to_str(cause)]) .inc(); }) + .support_invalidation_closures() .build(); Self { cache } } @@ -97,6 +98,13 @@ impl IndexResultCache { fn index_result_cache_weight(k: &(PredicateKey, FileId), v: &Arc) -> u32 { k.0.mem_usage() as u32 + v.mem_usage() as u32 } + + /// Removes cached results for the given file. + pub fn invalidate_file(&self, file_id: FileId) { + self.cache + .invalidate_entries_if(move |(_, cached_file_id), _| *cached_file_id == file_id) + .expect("cache should support invalidation closures"); + } } /// Key for different types of index predicates. diff --git a/src/mito2/src/config.rs b/src/mito2/src/config.rs index ec528de29550..edf070996020 100644 --- a/src/mito2/src/config.rs +++ b/src/mito2/src/config.rs @@ -19,6 +19,7 @@ use std::path::Path; use std::time::Duration; use common_base::readable_size::ReadableSize; +use common_stat::{get_total_cpu_cores, get_total_memory_readable}; use common_telemetry::warn; use serde::{Deserialize, Serialize}; use serde_with::serde_as; @@ -162,7 +163,7 @@ impl Default for MitoConfig { max_background_index_builds: divide_num_cpus(8), max_background_flushes: divide_num_cpus(2), max_background_compactions: divide_num_cpus(4), - max_background_purges: common_config::utils::get_cpus(), + max_background_purges: get_total_cpu_cores(), auto_flush_interval: Duration::from_secs(30 * 60), global_write_buffer_size: ReadableSize::gb(1), global_write_buffer_reject_size: ReadableSize::gb(2), @@ -188,7 +189,7 @@ impl Default for MitoConfig { }; // Adjust buffer and cache size according to system memory if we can. - if let Some(sys_memory) = common_config::utils::get_sys_total_memory() { + if let Some(sys_memory) = get_total_memory_readable() { mito_config.adjust_buffer_and_cache_size(sys_memory); } @@ -227,11 +228,9 @@ impl MitoConfig { self.max_background_compactions = divide_num_cpus(4); } if self.max_background_purges == 0 { - warn!( - "Sanitize max background purges 0 to {}", - common_config::utils::get_cpus() - ); - self.max_background_purges = common_config::utils::get_cpus(); + let cpu_cores = get_total_cpu_cores(); + warn!("Sanitize max background purges 0 to {}", cpu_cores); + self.max_background_purges = cpu_cores; } if self.global_write_buffer_reject_size <= self.global_write_buffer_size { @@ -504,7 +503,7 @@ impl InvertedIndexConfig { pub fn mem_threshold_on_create(&self) -> Option { match self.mem_threshold_on_create { MemoryThreshold::Auto => { - if let Some(sys_memory) = common_config::utils::get_sys_total_memory() { + if let Some(sys_memory) = get_total_memory_readable() { Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize) } else { Some(ReadableSize::mb(64).as_bytes() as usize) @@ -549,7 +548,7 @@ impl FulltextIndexConfig { pub fn mem_threshold_on_create(&self) -> usize { match self.mem_threshold_on_create { MemoryThreshold::Auto => { - if let Some(sys_memory) = common_config::utils::get_sys_total_memory() { + if let Some(sys_memory) = get_total_memory_readable() { (sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as _ } else { ReadableSize::mb(64).as_bytes() as _ @@ -591,7 +590,7 @@ impl BloomFilterConfig { pub fn mem_threshold_on_create(&self) -> Option { match self.mem_threshold_on_create { MemoryThreshold::Auto => { - if let Some(sys_memory) = common_config::utils::get_sys_total_memory() { + if let Some(sys_memory) = get_total_memory_readable() { Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize) } else { Some(ReadableSize::mb(64).as_bytes() as usize) @@ -606,7 +605,7 @@ impl BloomFilterConfig { /// Divide cpu num by a non-zero `divisor` and returns at least 1. fn divide_num_cpus(divisor: usize) -> usize { debug_assert!(divisor > 0); - let cores = common_config::utils::get_cpus(); + let cores = get_total_cpu_cores(); debug_assert!(cores > 0); cores.div_ceil(divisor) diff --git a/src/mito2/src/engine.rs b/src/mito2/src/engine.rs index c10c398681b6..d48f75fa91a8 100644 --- a/src/mito2/src/engine.rs +++ b/src/mito2/src/engine.rs @@ -67,6 +67,8 @@ mod sync_test; #[cfg(test)] mod truncate_test; +mod puffin_index; + use std::any::Any; use std::collections::HashMap; use std::sync::Arc; @@ -78,7 +80,7 @@ use common_base::Plugins; use common_error::ext::BoxedError; use common_meta::key::SchemaMetadataManagerRef; use common_recordbatch::SendableRecordBatchStream; -use common_telemetry::{info, tracing}; +use common_telemetry::{info, tracing, warn}; use common_wal::options::{WAL_OPTIONS_KEY, WalOptions}; use futures::future::{join_all, try_join_all}; use futures::stream::{self, Stream, StreamExt}; @@ -97,12 +99,14 @@ use store_api::region_engine::{ RegionStatistic, SetRegionRoleStateResponse, SettableRegionRoleState, SyncManifestResponse, }; use store_api::region_request::{AffectedRows, RegionOpenRequest, RegionRequest}; -use store_api::sst_entry::{ManifestSstEntry, StorageSstEntry}; -use store_api::storage::{RegionId, ScanRequest, SequenceNumber}; +use store_api::sst_entry::{ManifestSstEntry, PuffinIndexMetaEntry, StorageSstEntry}; +use store_api::storage::{FileId, RegionId, ScanRequest, SequenceNumber}; use tokio::sync::{Semaphore, oneshot}; +use crate::access_layer::RegionFilePathFactory; use crate::cache::{CacheManagerRef, CacheStrategy}; use crate::config::MitoConfig; +use crate::engine::puffin_index::{IndexEntryContext, collect_index_entries_from_puffin}; use crate::error::{ InvalidRequestSnafu, JoinSnafu, MitoManifestInfoSnafu, RecvSnafu, RegionNotFoundSnafu, Result, SerdeJsonSnafu, SerializeColumnMetadataSnafu, @@ -117,7 +121,7 @@ use crate::read::stream::ScanBatchStream; use crate::region::MitoRegionRef; use crate::region::opener::PartitionExprFetcherRef; use crate::request::{RegionEditRequest, WorkerRequest}; -use crate::sst::file::FileMeta; +use crate::sst::file::{FileMeta, RegionFileId}; use crate::sst::file_ref::FileReferenceManagerRef; use crate::wal::entry_distributor::{ DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE, build_wal_entry_distributor_and_receivers, @@ -434,6 +438,89 @@ impl MitoEngine { results } + /// Lists metadata about all puffin index targets stored in the engine. + pub async fn all_index_metas(&self) -> Vec { + let node_id = self.inner.workers.file_ref_manager().node_id(); + let cache_manager = self.inner.workers.cache_manager(); + let puffin_metadata_cache = cache_manager.puffin_metadata_cache().cloned(); + let bloom_filter_cache = cache_manager.bloom_filter_index_cache().cloned(); + let inverted_index_cache = cache_manager.inverted_index_cache().cloned(); + + let mut results = Vec::new(); + + for region in self.inner.workers.all_regions() { + let manifest_entries = region.manifest_sst_entries().await; + let access_layer = region.access_layer.clone(); + let table_dir = access_layer.table_dir().to_string(); + let path_type = access_layer.path_type(); + let object_store = access_layer.object_store().clone(); + let puffin_factory = access_layer.puffin_manager_factory().clone(); + let path_factory = RegionFilePathFactory::new(table_dir, path_type); + + let entry_futures = manifest_entries.into_iter().map(|entry| { + let object_store = object_store.clone(); + let path_factory = path_factory.clone(); + let puffin_factory = puffin_factory.clone(); + let puffin_metadata_cache = puffin_metadata_cache.clone(); + let bloom_filter_cache = bloom_filter_cache.clone(); + let inverted_index_cache = inverted_index_cache.clone(); + + async move { + let Some(index_file_path) = entry.index_file_path.as_ref() else { + return Vec::new(); + }; + + let file_id = match FileId::parse_str(&entry.file_id) { + Ok(file_id) => file_id, + Err(err) => { + warn!( + err; + "Failed to parse puffin index file id, table_dir: {}, file_id: {}", + entry.table_dir, + entry.file_id + ); + return Vec::new(); + } + }; + + let region_file_id = RegionFileId::new(entry.region_id, file_id); + let context = IndexEntryContext { + table_dir: &entry.table_dir, + index_file_path: index_file_path.as_str(), + region_id: entry.region_id, + table_id: entry.table_id, + region_number: entry.region_number, + region_group: entry.region_group, + region_sequence: entry.region_sequence, + file_id: &entry.file_id, + index_file_size: entry.index_file_size, + node_id, + }; + + let manager = puffin_factory + .build(object_store, path_factory) + .with_puffin_metadata_cache(puffin_metadata_cache); + + collect_index_entries_from_puffin( + manager, + region_file_id, + context, + bloom_filter_cache, + inverted_index_cache, + ) + .await + } + }); + + let mut meta_stream = stream::iter(entry_futures).buffer_unordered(8); // Parallelism is 8. + while let Some(mut metas) = meta_stream.next().await { + results.append(&mut metas); + } + } + + results + } + /// Lists all SSTs from the storage layer of all regions in the engine. pub fn all_ssts_from_storage(&self) -> impl Stream> { let node_id = self.inner.workers.file_ref_manager().node_id(); diff --git a/src/mito2/src/engine/basic_test.rs b/src/mito2/src/engine/basic_test.rs index 2a5d0fbf87df..f88b01be5816 100644 --- a/src/mito2/src/engine/basic_test.rs +++ b/src/mito2/src/engine/basic_test.rs @@ -787,9 +787,9 @@ async fn test_list_ssts() { assert_eq!( debug_format, r#" -ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "", level: 0, file_path: "test/11_0000000001/.parquet", file_size: 2515, index_file_path: Some("test/11_0000000001/index/.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true } -ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "", level: 0, file_path: "test/11_0000000002/.parquet", file_size: 2515, index_file_path: Some("test/11_0000000002/index/.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true } -ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "", level: 0, file_path: "test/22_0000000042/.parquet", file_size: 2515, index_file_path: Some("test/22_0000000042/index/.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"# +ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "", level: 0, file_path: "test/11_0000000001/.parquet", file_size: 2531, index_file_path: Some("test/11_0000000001/index/.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true } +ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "", level: 0, file_path: "test/11_0000000002/.parquet", file_size: 2531, index_file_path: Some("test/11_0000000002/index/.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true } +ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "", level: 0, file_path: "test/22_0000000042/.parquet", file_size: 2531, index_file_path: Some("test/22_0000000042/index/.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"# ); // list from storage @@ -819,3 +819,150 @@ StorageSstEntry { file_path: "test/22_0000000042/.parquet", file_size: StorageSstEntry { file_path: "test/22_0000000042/index/.puffin", file_size: None, last_modified_ms: None, node_id: None }"# ); } + +#[tokio::test] +async fn test_all_index_metas_list_all_types() { + use datatypes::schema::{ + FulltextAnalyzer, FulltextBackend, FulltextOptions, SkippingIndexOptions, SkippingIndexType, + }; + + let mut env = TestEnv::new().await; + let engine = env.create_engine(MitoConfig::default()).await; + + // One region with both fulltext backends and inverted index enabled, plus bloom skipping index + let region_id = RegionId::new(11, 1); + + let mut request = CreateRequestBuilder::new().tag_num(3).field_num(2).build(); + // inverted index on tag_0 + request.column_metadatas[0] + .column_schema + .set_inverted_index(true); + // fulltext bloom on tag_1 + let ft_bloom = FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::English, + false, + FulltextBackend::Bloom, + 4, + 0.001, + ); + request.column_metadatas[1] + .column_schema + .set_fulltext_options(&ft_bloom) + .unwrap(); + // fulltext tantivy on tag_2 + let ft_tantivy = FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::Chinese, + true, + FulltextBackend::Tantivy, + 2, + 0.01, + ); + request.column_metadatas[2] + .column_schema + .set_fulltext_options(&ft_tantivy) + .unwrap(); + // bloom filter skipping index on field_1 (which is at index 3) + let skipping = SkippingIndexOptions::new_unchecked(2, 0.01, SkippingIndexType::BloomFilter); + request.column_metadatas[3] + .column_schema + .set_skipping_options(&skipping) + .unwrap(); + + // inverted index on field_1 + request.column_metadatas[4] + .column_schema + .set_inverted_index(true); + + engine + .handle_request(region_id, RegionRequest::Create(request.clone())) + .await + .unwrap(); + + // write some rows (schema: tag_0, tag_1, tag_2, field_0, field_1, ts) + let column_schemas = rows_schema(&request); + let rows_vec: Vec = (0..20) + .map(|ts| api::v1::Row { + values: vec![ + api::v1::Value { + value_data: Some(api::v1::value::ValueData::StringValue("x".to_string())), + }, + api::v1::Value { + value_data: Some(api::v1::value::ValueData::StringValue("y".to_string())), + }, + api::v1::Value { + value_data: Some(api::v1::value::ValueData::StringValue("z".to_string())), + }, + api::v1::Value { + value_data: Some(api::v1::value::ValueData::F64Value(ts as f64)), + }, + api::v1::Value { + value_data: Some(api::v1::value::ValueData::F64Value((20 - ts) as f64)), + }, + api::v1::Value { + value_data: Some(api::v1::value::ValueData::TimestampMillisecondValue( + ts as i64 * 1000, + )), + }, + ], + }) + .collect(); + let rows = api::v1::Rows { + schema: column_schemas.clone(), + rows: rows_vec, + }; + put_rows(&engine, region_id, rows).await; + + // flush to generate sst and indexes + engine + .handle_request( + region_id, + RegionRequest::Flush(RegionFlushRequest { + row_group_size: None, + }), + ) + .await + .unwrap(); + + fn bucket_size(size: u64) -> u64 { + if size < 512 { size } else { (size / 16) * 16 } + } + + let mut metas = engine.all_index_metas().await; + for entry in &mut metas { + entry.index_file_path = entry.index_file_path.replace(&entry.file_id, ""); + entry.file_id = "".to_string(); + entry.index_file_size = entry.index_file_size.map(bucket_size); + if entry.index_type == "fulltext_tantivy" { + entry.blob_size = bucket_size(entry.blob_size); + } + if let Some(meta_json) = entry.meta_json.as_mut() + && let Ok(mut value) = serde_json::from_str::(meta_json) + { + if let Some(inverted) = value.get_mut("inverted").and_then(|v| v.as_object_mut()) { + inverted.insert("base_offset".to_string(), serde_json::Value::from(0)); + } + *meta_json = value.to_string(); + } + } + metas.sort_by(|a, b| { + (a.index_type.as_str(), a.target_key.as_str()) + .cmp(&(b.index_type.as_str(), b.target_key.as_str())) + }); + + let debug_format = metas + .iter() + .map(|entry| format!("\n{:?}", entry)) + .collect::(); + + assert_eq!( + debug_format, + r#" +PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "", index_file_size: Some(6032), index_type: "bloom_filter", target_type: "column", target_key: "3", target_json: "{\"column\":3}", blob_size: 751, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":640,\"row_count\":20,\"rows_per_segment\":2,\"segment_count\":10}}"), node_id: None } +PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "", index_file_size: Some(6032), index_type: "fulltext_bloom", target_type: "column", target_key: "1", target_json: "{\"column\":1}", blob_size: 87, meta_json: Some("{\"bloom\":{\"bloom_filter_size\":64,\"row_count\":20,\"rows_per_segment\":4,\"segment_count\":5},\"fulltext\":{\"analyzer\":\"English\",\"case_sensitive\":false}}"), node_id: None } +PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "", index_file_size: Some(6032), index_type: "fulltext_tantivy", target_type: "column", target_key: "2", target_json: "{\"column\":2}", blob_size: 1104, meta_json: Some("{\"fulltext\":{\"analyzer\":\"Chinese\",\"case_sensitive\":true}}"), node_id: None } +PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "", index_file_size: Some(6032), index_type: "inverted", target_type: "column", target_key: "0", target_json: "{\"column\":0}", blob_size: 70, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":44,\"inverted_index_size\":70,\"null_bitmap_size\":8,\"relative_fst_offset\":26,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None } +PuffinIndexMetaEntry { table_dir: "test/", index_file_path: "test/11_0000000001/index/.puffin", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "", index_file_size: Some(6032), index_type: "inverted", target_type: "column", target_key: "4", target_json: "{\"column\":4}", blob_size: 515, meta_json: Some("{\"inverted\":{\"base_offset\":0,\"bitmap_type\":\"Roaring\",\"fst_size\":147,\"inverted_index_size\":515,\"null_bitmap_size\":8,\"relative_fst_offset\":368,\"relative_null_bitmap_offset\":0,\"segment_row_count\":1024,\"total_row_count\":20}}"), node_id: None }"# + ); +} diff --git a/src/mito2/src/engine/puffin_index.rs b/src/mito2/src/engine/puffin_index.rs new file mode 100644 index 000000000000..05529db59ba4 --- /dev/null +++ b/src/mito2/src/engine/puffin_index.rs @@ -0,0 +1,502 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::convert::TryFrom; + +use common_base::range_read::RangeReader; +use common_telemetry::warn; +use greptime_proto::v1::index::{BloomFilterMeta, InvertedIndexMeta, InvertedIndexMetas}; +use index::bitmap::BitmapType; +use index::bloom_filter::reader::{BloomFilterReader, BloomFilterReaderImpl}; +use index::fulltext_index::Config as FulltextConfig; +use index::inverted_index::format::reader::{InvertedIndexBlobReader, InvertedIndexReader}; +use index::target::IndexTarget; +use puffin::blob_metadata::BlobMetadata; +use puffin::puffin_manager::{PuffinManager, PuffinReader}; +use serde_json::{Map, Value, json}; +use store_api::sst_entry::PuffinIndexMetaEntry; +use store_api::storage::{ColumnId, RegionGroup, RegionId, RegionNumber, RegionSeq, TableId}; + +use crate::cache::index::bloom_filter_index::{ + BloomFilterIndexCacheRef, CachedBloomFilterIndexBlobReader, Tag, +}; +use crate::cache::index::inverted_index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef}; +use crate::sst::file::RegionFileId; +use crate::sst::index::bloom_filter::INDEX_BLOB_TYPE as BLOOM_BLOB_TYPE; +use crate::sst::index::fulltext_index::{ + INDEX_BLOB_TYPE_BLOOM as FULLTEXT_BLOOM_BLOB_TYPE, + INDEX_BLOB_TYPE_TANTIVY as FULLTEXT_TANTIVY_BLOB_TYPE, +}; +use crate::sst::index::inverted_index::INDEX_BLOB_TYPE as INVERTED_BLOB_TYPE; +use crate::sst::index::puffin_manager::{SstPuffinManager, SstPuffinReader}; + +const INDEX_TYPE_BLOOM: &str = "bloom_filter"; +const INDEX_TYPE_FULLTEXT_BLOOM: &str = "fulltext_bloom"; +const INDEX_TYPE_FULLTEXT_TANTIVY: &str = "fulltext_tantivy"; +const INDEX_TYPE_INVERTED: &str = "inverted"; + +const TARGET_TYPE_UNKNOWN: &str = "unknown"; + +const TARGET_TYPE_COLUMN: &str = "column"; + +pub(crate) struct IndexEntryContext<'a> { + pub(crate) table_dir: &'a str, + pub(crate) index_file_path: &'a str, + pub(crate) region_id: RegionId, + pub(crate) table_id: TableId, + pub(crate) region_number: RegionNumber, + pub(crate) region_group: RegionGroup, + pub(crate) region_sequence: RegionSeq, + pub(crate) file_id: &'a str, + pub(crate) index_file_size: Option, + pub(crate) node_id: Option, +} + +/// Collect index metadata entries present in the SST puffin file. +pub(crate) async fn collect_index_entries_from_puffin( + manager: SstPuffinManager, + region_file_id: RegionFileId, + context: IndexEntryContext<'_>, + bloom_filter_cache: Option, + inverted_index_cache: Option, +) -> Vec { + let mut entries = Vec::new(); + + let reader = match manager.reader(®ion_file_id).await { + Ok(reader) => reader, + Err(err) => { + warn!( + err; + "Failed to open puffin index file, table_dir: {}, file_id: {}", + context.table_dir, + context.file_id + ); + return entries; + } + }; + + let file_metadata = match reader.metadata().await { + Ok(metadata) => metadata, + Err(err) => { + warn!( + err; + "Failed to read puffin file metadata, table_dir: {}, file_id: {}", + context.table_dir, + context.file_id + ); + return entries; + } + }; + + for blob in &file_metadata.blobs { + match BlobIndexTypeTargetKey::from_blob_type(&blob.blob_type) { + Some(BlobIndexTypeTargetKey::BloomFilter(target_key)) => { + let bloom_meta = try_read_bloom_meta( + &reader, + region_file_id, + blob.blob_type.as_str(), + target_key, + bloom_filter_cache.as_ref(), + Tag::Skipping, + &context, + ) + .await; + + let bloom_value = bloom_meta.as_ref().map(bloom_meta_value); + let (target_type, target_json) = decode_target_info(target_key); + let meta_json = build_meta_json(bloom_value, None, None); + let entry = build_index_entry( + &context, + INDEX_TYPE_BLOOM, + target_type, + target_key.to_string(), + target_json, + blob.length as u64, + meta_json, + ); + entries.push(entry); + } + Some(BlobIndexTypeTargetKey::FulltextBloom(target_key)) => { + let bloom_meta = try_read_bloom_meta( + &reader, + region_file_id, + blob.blob_type.as_str(), + target_key, + bloom_filter_cache.as_ref(), + Tag::Fulltext, + &context, + ) + .await; + + let bloom_value = bloom_meta.as_ref().map(bloom_meta_value); + let fulltext_value = Some(fulltext_meta_value(blob)); + let (target_type, target_json) = decode_target_info(target_key); + let meta_json = build_meta_json(bloom_value, fulltext_value, None); + let entry = build_index_entry( + &context, + INDEX_TYPE_FULLTEXT_BLOOM, + target_type, + target_key.to_string(), + target_json, + blob.length as u64, + meta_json, + ); + entries.push(entry); + } + Some(BlobIndexTypeTargetKey::FulltextTantivy(target_key)) => { + let fulltext_value = Some(fulltext_meta_value(blob)); + let (target_type, target_json) = decode_target_info(target_key); + let meta_json = build_meta_json(None, fulltext_value, None); + let entry = build_index_entry( + &context, + INDEX_TYPE_FULLTEXT_TANTIVY, + target_type, + target_key.to_string(), + target_json, + blob.length as u64, + meta_json, + ); + entries.push(entry); + } + Some(BlobIndexTypeTargetKey::Inverted) => { + let mut inverted_entries = collect_inverted_entries( + &reader, + region_file_id, + inverted_index_cache.as_ref(), + &context, + ) + .await; + entries.append(&mut inverted_entries); + } + None => {} + } + } + + entries +} + +async fn collect_inverted_entries( + reader: &SstPuffinReader, + region_file_id: RegionFileId, + cache: Option<&InvertedIndexCacheRef>, + context: &IndexEntryContext<'_>, +) -> Vec { + // Read the inverted index blob and surface its per-column metadata entries. + let file_id = region_file_id.file_id(); + + let guard = match reader.blob(INVERTED_BLOB_TYPE).await { + Ok(guard) => guard, + Err(err) => { + warn!( + err; + "Failed to open inverted index blob, table_dir: {}, file_id: {}", + context.table_dir, + context.file_id + ); + return Vec::new(); + } + }; + + let blob_reader = match guard.reader().await { + Ok(reader) => reader, + Err(err) => { + warn!( + err; + "Failed to build inverted index blob reader, table_dir: {}, file_id: {}", + context.table_dir, + context.file_id + ); + return Vec::new(); + } + }; + + let blob_size = blob_reader + .metadata() + .await + .ok() + .map(|meta| meta.content_length); + let metas = if let (Some(cache), Some(blob_size)) = (cache, blob_size) { + let reader = CachedInvertedIndexBlobReader::new( + file_id, + blob_size, + InvertedIndexBlobReader::new(blob_reader), + cache.clone(), + ); + match reader.metadata().await { + Ok(metas) => metas, + Err(err) => { + warn!( + err; + "Failed to read inverted index metadata, table_dir: {}, file_id: {}", + context.table_dir, + context.file_id + ); + return Vec::new(); + } + } + } else { + let reader = InvertedIndexBlobReader::new(blob_reader); + match reader.metadata().await { + Ok(metas) => metas, + Err(err) => { + warn!( + err; + "Failed to read inverted index metadata, table_dir: {}, file_id: {}", + context.table_dir, + context.file_id + ); + return Vec::new(); + } + } + }; + + build_inverted_entries(context, metas.as_ref()) +} + +fn build_inverted_entries( + context: &IndexEntryContext<'_>, + metas: &InvertedIndexMetas, +) -> Vec { + let mut entries = Vec::new(); + for (name, meta) in &metas.metas { + let (target_type, target_json) = decode_target_info(name); + let inverted_value = inverted_meta_value(meta, metas); + let meta_json = build_meta_json(None, None, Some(inverted_value)); + let entry = build_index_entry( + context, + INDEX_TYPE_INVERTED, + target_type, + name.clone(), + target_json, + meta.inverted_index_size, + meta_json, + ); + entries.push(entry); + } + entries +} + +async fn try_read_bloom_meta( + reader: &SstPuffinReader, + region_file_id: RegionFileId, + blob_type: &str, + target_key: &str, + cache: Option<&BloomFilterIndexCacheRef>, + tag: Tag, + context: &IndexEntryContext<'_>, +) -> Option { + let column_id = decode_column_id(target_key); + + // Failures are logged but do not abort the overall metadata collection. + match reader.blob(blob_type).await { + Ok(guard) => match guard.reader().await { + Ok(blob_reader) => { + let blob_size = blob_reader + .metadata() + .await + .ok() + .map(|meta| meta.content_length); + let bloom_reader = BloomFilterReaderImpl::new(blob_reader); + let result = match (cache, column_id, blob_size) { + (Some(cache), Some(column_id), Some(blob_size)) => { + CachedBloomFilterIndexBlobReader::new( + region_file_id.file_id(), + column_id, + tag, + blob_size, + bloom_reader, + cache.clone(), + ) + .metadata() + .await + } + _ => bloom_reader.metadata().await, + }; + + match result { + Ok(meta) => Some(meta), + Err(err) => { + warn!( + err; + "Failed to read index metadata, table_dir: {}, file_id: {}, blob: {}", + context.table_dir, + context.file_id, + blob_type + ); + None + } + } + } + Err(err) => { + warn!( + err; + "Failed to open index blob reader, table_dir: {}, file_id: {}, blob: {}", + context.table_dir, + context.file_id, + blob_type + ); + None + } + }, + Err(err) => { + warn!( + err; + "Failed to open index blob, table_dir: {}, file_id: {}, blob: {}", + context.table_dir, + context.file_id, + blob_type + ); + None + } + } +} + +fn decode_target_info(target_key: &str) -> (String, String) { + match IndexTarget::decode(target_key) { + Ok(IndexTarget::ColumnId(id)) => ( + TARGET_TYPE_COLUMN.to_string(), + json!({ "column": id }).to_string(), + ), + _ => ( + TARGET_TYPE_UNKNOWN.to_string(), + json!({ "error": "failed_to_decode" }).to_string(), + ), + } +} + +fn decode_column_id(target_key: &str) -> Option { + match IndexTarget::decode(target_key) { + Ok(IndexTarget::ColumnId(id)) => Some(id), + _ => None, + } +} + +fn bloom_meta_value(meta: &BloomFilterMeta) -> Value { + json!({ + "rows_per_segment": meta.rows_per_segment, + "segment_count": meta.segment_count, + "row_count": meta.row_count, + "bloom_filter_size": meta.bloom_filter_size, + }) +} + +fn fulltext_meta_value(blob: &BlobMetadata) -> Value { + let config = FulltextConfig::from_blob_metadata(blob).unwrap_or_default(); + json!({ + "analyzer": config.analyzer.to_str(), + "case_sensitive": config.case_sensitive, + }) +} + +fn inverted_meta_value(meta: &InvertedIndexMeta, metas: &InvertedIndexMetas) -> Value { + let bitmap_type = BitmapType::try_from(meta.bitmap_type) + .map(|bt| format!("{:?}", bt)) + .unwrap_or_else(|_| meta.bitmap_type.to_string()); + json!({ + "bitmap_type": bitmap_type, + "base_offset": meta.base_offset, + "inverted_index_size": meta.inverted_index_size, + "relative_fst_offset": meta.relative_fst_offset, + "fst_size": meta.fst_size, + "relative_null_bitmap_offset": meta.relative_null_bitmap_offset, + "null_bitmap_size": meta.null_bitmap_size, + "segment_row_count": metas.segment_row_count, + "total_row_count": metas.total_row_count, + }) +} + +fn build_meta_json( + bloom: Option, + fulltext: Option, + inverted: Option, +) -> Option { + let mut map = Map::new(); + if let Some(value) = bloom { + map.insert("bloom".to_string(), value); + } + if let Some(value) = fulltext { + map.insert("fulltext".to_string(), value); + } + if let Some(value) = inverted { + map.insert("inverted".to_string(), value); + } + if map.is_empty() { + None + } else { + Some(Value::Object(map).to_string()) + } +} + +enum BlobIndexTypeTargetKey<'a> { + BloomFilter(&'a str), + FulltextBloom(&'a str), + FulltextTantivy(&'a str), + Inverted, +} + +impl<'a> BlobIndexTypeTargetKey<'a> { + fn from_blob_type(blob_type: &'a str) -> Option { + if let Some(target_key) = Self::target_key_from_blob(blob_type, BLOOM_BLOB_TYPE) { + Some(BlobIndexTypeTargetKey::BloomFilter(target_key)) + } else if let Some(target_key) = + Self::target_key_from_blob(blob_type, FULLTEXT_BLOOM_BLOB_TYPE) + { + Some(BlobIndexTypeTargetKey::FulltextBloom(target_key)) + } else if let Some(target_key) = + Self::target_key_from_blob(blob_type, FULLTEXT_TANTIVY_BLOB_TYPE) + { + Some(BlobIndexTypeTargetKey::FulltextTantivy(target_key)) + } else if blob_type == INVERTED_BLOB_TYPE { + Some(BlobIndexTypeTargetKey::Inverted) + } else { + None + } + } + + fn target_key_from_blob(blob_type: &'a str, prefix: &str) -> Option<&'a str> { + // Blob types encode their target as "-". + blob_type + .strip_prefix(prefix) + .and_then(|suffix| suffix.strip_prefix('-')) + } +} + +fn build_index_entry( + context: &IndexEntryContext<'_>, + index_type: &str, + target_type: String, + target_key: String, + target_json: String, + blob_size: u64, + meta_json: Option, +) -> PuffinIndexMetaEntry { + PuffinIndexMetaEntry { + table_dir: context.table_dir.to_string(), + index_file_path: context.index_file_path.to_string(), + region_id: context.region_id, + table_id: context.table_id, + region_number: context.region_number, + region_group: context.region_group, + region_sequence: context.region_sequence, + file_id: context.file_id.to_string(), + index_file_size: context.index_file_size, + index_type: index_type.to_string(), + target_type, + target_key, + target_json, + blob_size, + meta_json, + node_id: context.node_id, + } +} diff --git a/src/mito2/src/manifest/manager.rs b/src/mito2/src/manifest/manager.rs index c2cd4877fe87..b65d9c840daa 100644 --- a/src/mito2/src/manifest/manager.rs +++ b/src/mito2/src/manifest/manager.rs @@ -923,6 +923,6 @@ mod test { // get manifest size again let manifest_size = manager.manifest_usage(); - assert_eq!(manifest_size, 1748); + assert_eq!(manifest_size, 1764); } } diff --git a/src/mito2/src/memtable.rs b/src/mito2/src/memtable.rs index 2744f24890dc..b4461e8b065a 100644 --- a/src/mito2/src/memtable.rs +++ b/src/mito2/src/memtable.rs @@ -28,7 +28,7 @@ use mito_codec::key_values::KeyValue; pub use mito_codec::key_values::KeyValues; use serde::{Deserialize, Serialize}; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{ColumnId, SequenceNumber}; +use store_api::storage::{ColumnId, SequenceNumber, SequenceRange}; use crate::config::MitoConfig; use crate::error::{Result, UnsupportedOperationSnafu}; @@ -186,7 +186,7 @@ pub trait Memtable: Send + Sync + fmt::Debug { &self, projection: Option<&[ColumnId]>, predicate: Option, - sequence: Option, + sequence: Option, ) -> Result; /// Returns the ranges in the memtable. @@ -197,7 +197,7 @@ pub trait Memtable: Send + Sync + fmt::Debug { &self, projection: Option<&[ColumnId]>, predicate: PredicateGroup, - sequence: Option, + sequence: Option, for_flush: bool, ) -> Result; diff --git a/src/mito2/src/memtable/builder.rs b/src/mito2/src/memtable/builder.rs index bd1c73e2d7b4..7e37077f3e08 100644 --- a/src/mito2/src/memtable/builder.rs +++ b/src/mito2/src/memtable/builder.rs @@ -47,7 +47,7 @@ impl FieldBuilder { pub(crate) fn push(&mut self, value: ValueRef) -> datatypes::error::Result<()> { match self { FieldBuilder::String(b) => { - if let Some(s) = value.as_string()? { + if let Some(s) = value.try_into_string()? { b.append(s); } else { b.append_null(); diff --git a/src/mito2/src/memtable/bulk.rs b/src/mito2/src/memtable/bulk.rs index c410a53943ec..d67e9f1424f3 100644 --- a/src/mito2/src/memtable/bulk.rs +++ b/src/mito2/src/memtable/bulk.rs @@ -30,7 +30,7 @@ use datatypes::arrow::datatypes::SchemaRef; use mito_codec::key_values::KeyValue; use rayon::prelude::*; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{ColumnId, FileId, RegionId, SequenceNumber}; +use store_api::storage::{ColumnId, FileId, RegionId, SequenceRange}; use tokio::sync::Semaphore; use crate::error::{Result, UnsupportedOperationSnafu}; @@ -323,7 +323,7 @@ impl Memtable for BulkMemtable { &self, _projection: Option<&[ColumnId]>, _predicate: Option, - _sequence: Option, + _sequence: Option, ) -> Result { todo!() } @@ -332,7 +332,7 @@ impl Memtable for BulkMemtable { &self, projection: Option<&[ColumnId]>, predicate: PredicateGroup, - sequence: Option, + sequence: Option, for_flush: bool, ) -> Result { let mut ranges = BTreeMap::new(); @@ -602,7 +602,7 @@ impl BulkMemtable { struct BulkRangeIterBuilder { part: BulkPart, context: Arc, - sequence: Option, + sequence: Option, } impl IterBuilder for BulkRangeIterBuilder { @@ -641,7 +641,7 @@ struct EncodedBulkRangeIterBuilder { file_id: FileId, part: EncodedBulkPart, context: Arc, - sequence: Option, + sequence: Option, } impl IterBuilder for EncodedBulkRangeIterBuilder { @@ -1381,7 +1381,7 @@ mod tests { memtable.write_bulk(part).unwrap(); let predicate_group = PredicateGroup::new(&metadata, &[]).unwrap(); - let sequence_filter = Some(400u64); // Filters out rows with sequence > 400 + let sequence_filter = Some(SequenceRange::LtEq { max: 400 }); // Filters out rows with sequence > 400 let ranges = memtable .ranges(None, predicate_group, sequence_filter, false) .unwrap(); diff --git a/src/mito2/src/memtable/bulk/part.rs b/src/mito2/src/memtable/bulk/part.rs index d8b9d4b17644..4eb2655755ec 100644 --- a/src/mito2/src/memtable/bulk/part.rs +++ b/src/mito2/src/memtable/bulk/part.rs @@ -53,7 +53,7 @@ use snafu::{OptionExt, ResultExt, Snafu}; use store_api::codec::PrimaryKeyEncoding; use store_api::metadata::{RegionMetadata, RegionMetadataRef}; use store_api::storage::consts::PRIMARY_KEY_COLUMN_NAME; -use store_api::storage::{FileId, SequenceNumber}; +use store_api::storage::{FileId, SequenceNumber, SequenceRange}; use table::predicate::Predicate; use crate::error::{ @@ -254,7 +254,7 @@ impl PrimaryKeyColumnBuilder { fn push_value_ref(&mut self, value: ValueRef) -> Result<()> { match self { PrimaryKeyColumnBuilder::StringDict(builder) => { - if let Some(s) = value.as_string().context(DataTypeMismatchSnafu)? { + if let Some(s) = value.try_into_string().context(DataTypeMismatchSnafu)? { // We know the value is a string. builder.append_value(s); } else { @@ -365,7 +365,7 @@ impl BulkPartConverter { .context(ColumnNotFoundSnafu { column: PRIMARY_KEY_COLUMN_NAME, })? - .as_binary() + .try_into_binary() .context(DataTypeMismatchSnafu)? { self.key_array_builder @@ -408,7 +408,12 @@ impl BulkPartConverter { // Updates statistics // Safety: timestamp of kv must be both present and a valid timestamp value. - let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value(); + let ts = kv + .timestamp() + .try_into_timestamp() + .unwrap() + .unwrap() + .value(); self.min_ts = self.min_ts.min(ts); self.max_ts = self.max_ts.max(ts); self.max_sequence = self.max_sequence.max(kv.sequence()); @@ -564,7 +569,7 @@ impl EncodedBulkPart { pub(crate) fn read( &self, context: BulkIterContextRef, - sequence: Option, + sequence: Option, ) -> Result> { // use predicate to find row groups to read. let row_groups_to_read = context.row_groups_to_read(&self.metadata.parquet_metadata); diff --git a/src/mito2/src/memtable/bulk/part_reader.rs b/src/mito2/src/memtable/bulk/part_reader.rs index 49a3e6b1b70e..b14ff05dfef4 100644 --- a/src/mito2/src/memtable/bulk/part_reader.rs +++ b/src/mito2/src/memtable/bulk/part_reader.rs @@ -17,14 +17,14 @@ use std::ops::BitAnd; use std::sync::Arc; use bytes::Bytes; -use datatypes::arrow::array::{BooleanArray, Scalar, UInt64Array}; +use datatypes::arrow::array::BooleanArray; use datatypes::arrow::buffer::BooleanBuffer; use datatypes::arrow::record_batch::RecordBatch; use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; use parquet::file::metadata::ParquetMetaData; use snafu::ResultExt; -use store_api::storage::SequenceNumber; +use store_api::storage::SequenceRange; use crate::error::{self, ComputeArrowSnafu, DecodeArrowRowGroupSnafu}; use crate::memtable::bulk::context::{BulkIterContext, BulkIterContextRef}; @@ -39,7 +39,7 @@ pub struct EncodedBulkPartIter { current_reader: Option, builder: MemtableRowGroupReaderBuilder, /// Sequence number filter. - sequence: Option>, + sequence: Option, } impl EncodedBulkPartIter { @@ -49,12 +49,10 @@ impl EncodedBulkPartIter { mut row_groups_to_read: VecDeque, parquet_meta: Arc, data: Bytes, - sequence: Option, + sequence: Option, ) -> error::Result { assert!(context.read_format().as_flat().is_some()); - let sequence = sequence.map(UInt64Array::new_scalar); - let projection_mask = ProjectionMask::roots( parquet_meta.file_metadata().schema_descr(), context.read_format().projection_indices().iter().copied(), @@ -121,7 +119,7 @@ pub struct BulkPartRecordBatchIter { /// Iterator context for filtering context: BulkIterContextRef, /// Sequence number filter. - sequence: Option>, + sequence: Option, } impl BulkPartRecordBatchIter { @@ -129,12 +127,10 @@ impl BulkPartRecordBatchIter { pub fn new( record_batch: RecordBatch, context: BulkIterContextRef, - sequence: Option, + sequence: Option, ) -> Self { assert!(context.read_format().as_flat().is_some()); - let sequence = sequence.map(UInt64Array::new_scalar); - Self { record_batch: Some(record_batch), context, @@ -185,7 +181,7 @@ impl Iterator for BulkPartRecordBatchIter { /// Panics if the format is not flat. fn apply_combined_filters( context: &BulkIterContext, - sequence: &Option>, + sequence: &Option, record_batch: RecordBatch, ) -> error::Result> { // Converts the format to the flat format first. @@ -234,9 +230,9 @@ fn apply_combined_filters( if let Some(sequence) = sequence { let sequence_column = record_batch.column(sequence_column_index(record_batch.num_columns())); - let sequence_filter = - datatypes::arrow::compute::kernels::cmp::lt_eq(sequence_column, sequence) - .context(ComputeArrowSnafu)?; + let sequence_filter = sequence + .filter(&sequence_column) + .context(ComputeArrowSnafu)?; // Combine with existing filter using AND operation combined_filter = match combined_filter { None => Some(sequence_filter), @@ -385,7 +381,11 @@ mod tests { assert_eq!(6, result[0].num_columns(),); // Creates iter with sequence filter (only include sequences <= 2) - let iter = BulkPartRecordBatchIter::new(record_batch.clone(), context, Some(2)); + let iter = BulkPartRecordBatchIter::new( + record_batch.clone(), + context, + Some(SequenceRange::LtEq { max: 2 }), + ); let result: Vec<_> = iter.map(|rb| rb.unwrap()).collect(); assert_eq!(1, result.len()); let expect_sequence = Arc::new(UInt64Array::from(vec![1, 2])) as ArrayRef; diff --git a/src/mito2/src/memtable/partition_tree.rs b/src/mito2/src/memtable/partition_tree.rs index d20c51a13761..e404a5851ee6 100644 --- a/src/mito2/src/memtable/partition_tree.rs +++ b/src/mito2/src/memtable/partition_tree.rs @@ -28,11 +28,12 @@ use std::sync::Arc; use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering}; use common_base::readable_size::ReadableSize; +use common_stat::get_total_memory_readable; use mito_codec::key_values::KeyValue; use mito_codec::row_converter::{PrimaryKeyCodec, build_primary_key_codec}; use serde::{Deserialize, Serialize}; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{ColumnId, SequenceNumber}; +use store_api::storage::{ColumnId, SequenceRange}; use table::predicate::Predicate; use crate::error::{Result, UnsupportedOperationSnafu}; @@ -91,9 +92,9 @@ pub struct PartitionTreeConfig { impl Default for PartitionTreeConfig { fn default() -> Self { let mut fork_dictionary_bytes = ReadableSize::mb(512); - if let Some(sys_memory) = common_config::utils::get_sys_total_memory() { + if let Some(total_memory) = get_total_memory_readable() { let adjust_dictionary_bytes = - std::cmp::min(sys_memory / DICTIONARY_SIZE_FACTOR, fork_dictionary_bytes); + std::cmp::min(total_memory / DICTIONARY_SIZE_FACTOR, fork_dictionary_bytes); if adjust_dictionary_bytes.0 > 0 { fork_dictionary_bytes = adjust_dictionary_bytes; } @@ -181,7 +182,7 @@ impl Memtable for PartitionTreeMemtable { &self, projection: Option<&[ColumnId]>, predicate: Option, - sequence: Option, + sequence: Option, ) -> Result { self.tree.read(projection, predicate, sequence, None) } @@ -190,7 +191,7 @@ impl Memtable for PartitionTreeMemtable { &self, projection: Option<&[ColumnId]>, predicate: PredicateGroup, - sequence: Option, + sequence: Option, _for_flush: bool, ) -> Result { let projection = projection.map(|ids| ids.to_vec()); @@ -314,7 +315,7 @@ impl PartitionTreeMemtable { &self, projection: Option<&[ColumnId]>, predicate: Option, - sequence: Option, + sequence: Option, ) -> Result { self.tree.read(projection, predicate, sequence, None) } @@ -361,7 +362,7 @@ struct PartitionTreeIterBuilder { tree: Arc, projection: Option>, predicate: Option, - sequence: Option, + sequence: Option, } impl IterBuilder for PartitionTreeIterBuilder { @@ -428,7 +429,13 @@ mod tests { let expected_ts = kvs .iter() - .map(|kv| kv.timestamp().as_timestamp().unwrap().unwrap().value()) + .map(|kv| { + kv.timestamp() + .try_into_timestamp() + .unwrap() + .unwrap() + .value() + }) .collect::>(); let iter = memtable.iter(None, None, None).unwrap(); diff --git a/src/mito2/src/memtable/partition_tree/tree.rs b/src/mito2/src/memtable/partition_tree/tree.rs index 0b903a261601..17977db56a1d 100644 --- a/src/mito2/src/memtable/partition_tree/tree.rs +++ b/src/mito2/src/memtable/partition_tree/tree.rs @@ -30,7 +30,7 @@ use mito_codec::row_converter::{PrimaryKeyCodec, SortField}; use snafu::{ResultExt, ensure}; use store_api::codec::PrimaryKeyEncoding; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{ColumnId, SequenceNumber}; +use store_api::storage::{ColumnId, SequenceRange}; use table::predicate::Predicate; use crate::error::{ @@ -151,7 +151,12 @@ impl PartitionTree { for kv in kvs.iter() { self.verify_primary_key_length(&kv)?; // Safety: timestamp of kv must be both present and a valid timestamp value. - let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value(); + let ts = kv + .timestamp() + .try_into_timestamp() + .unwrap() + .unwrap() + .value(); metrics.min_ts = metrics.min_ts.min(ts); metrics.max_ts = metrics.max_ts.max(ts); metrics.value_bytes += kv.fields().map(|v| v.data_size()).sum::(); @@ -196,7 +201,12 @@ impl PartitionTree { self.verify_primary_key_length(&kv)?; // Safety: timestamp of kv must be both present and a valid timestamp value. - let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value(); + let ts = kv + .timestamp() + .try_into_timestamp() + .unwrap() + .unwrap() + .value(); metrics.min_ts = metrics.min_ts.min(ts); metrics.max_ts = metrics.max_ts.max(ts); metrics.value_bytes += kv.fields().map(|v| v.data_size()).sum::(); @@ -229,7 +239,7 @@ impl PartitionTree { &self, projection: Option<&[ColumnId]>, predicate: Option, - sequence: Option, + sequence: Option, mem_scan_metrics: Option, ) -> Result { let start = Instant::now(); @@ -465,7 +475,7 @@ struct TreeIterMetrics { struct TreeIter { /// Optional Sequence number of the current reader which limit results batch to lower than this sequence number. - sequence: Option, + sequence: Option, partitions: VecDeque, current_reader: Option, metrics: TreeIterMetrics, diff --git a/src/mito2/src/memtable/simple_bulk_memtable.rs b/src/mito2/src/memtable/simple_bulk_memtable.rs index 1270557ac69b..cd7d9bdf5c5f 100644 --- a/src/mito2/src/memtable/simple_bulk_memtable.rs +++ b/src/mito2/src/memtable/simple_bulk_memtable.rs @@ -27,7 +27,7 @@ use mito_codec::key_values::KeyValue; use rayon::prelude::*; use snafu::{OptionExt, ResultExt}; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{ColumnId, SequenceNumber}; +use store_api::storage::{ColumnId, SequenceRange}; use crate::flush::WriteBufferManagerRef; use crate::memtable::bulk::part::BulkPart; @@ -111,7 +111,12 @@ impl SimpleBulkMemtable { let size = series.push(ts, sequence, op_type, kv.fields()); stats.value_bytes += size; // safety: timestamp of kv must be both present and a valid timestamp value. - let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value(); + let ts = kv + .timestamp() + .try_into_timestamp() + .unwrap() + .unwrap() + .value(); stats.min_ts = stats.min_ts.min(ts); stats.max_ts = stats.max_ts.max(ts); } @@ -218,7 +223,7 @@ impl Memtable for SimpleBulkMemtable { &self, projection: Option<&[ColumnId]>, _predicate: Option, - sequence: Option, + sequence: Option, ) -> error::Result { let iter = self.create_iter(projection, sequence)?.build(None)?; @@ -234,7 +239,7 @@ impl Memtable for SimpleBulkMemtable { &self, projection: Option<&[ColumnId]>, predicate: PredicateGroup, - sequence: Option, + sequence: Option, _for_flush: bool, ) -> error::Result { let start_time = Instant::now(); @@ -833,7 +838,9 @@ mod tests { .unwrap(); // Filter with sequence 0 should only return first write - let mut iter = memtable.iter(None, None, Some(0)).unwrap(); + let mut iter = memtable + .iter(None, None, Some(SequenceRange::LtEq { max: 0 })) + .unwrap(); let batch = iter.next().unwrap().unwrap(); assert_eq!(1, batch.num_rows()); assert_eq!(1.0, batch.fields()[0].data.get(0).as_f64_lossy().unwrap()); diff --git a/src/mito2/src/memtable/simple_bulk_memtable/test_only.rs b/src/mito2/src/memtable/simple_bulk_memtable/test_only.rs index f71385d78cfd..2f2f9a278d05 100644 --- a/src/mito2/src/memtable/simple_bulk_memtable/test_only.rs +++ b/src/mito2/src/memtable/simple_bulk_memtable/test_only.rs @@ -16,7 +16,7 @@ use std::collections::HashSet; use std::time::Instant; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{ColumnId, SequenceNumber}; +use store_api::storage::{ColumnId, SequenceRange}; use crate::error; use crate::memtable::simple_bulk_memtable::{Iter, SimpleBulkMemtable}; @@ -33,7 +33,7 @@ impl SimpleBulkMemtable { pub(crate) fn create_iter( &self, projection: Option<&[ColumnId]>, - sequence: Option, + sequence: Option, ) -> error::Result { let mut series = self.series.write().unwrap(); @@ -59,7 +59,7 @@ pub(crate) struct BatchIterBuilderDeprecated { region_metadata: RegionMetadataRef, values: Option, projection: HashSet, - sequence: Option, + sequence: Option, dedup: bool, merge_mode: MergeMode, } diff --git a/src/mito2/src/memtable/time_partition.rs b/src/mito2/src/memtable/time_partition.rs index 53d4a8f74aca..6038d5cd2004 100644 --- a/src/mito2/src/memtable/time_partition.rs +++ b/src/mito2/src/memtable/time_partition.rs @@ -273,7 +273,7 @@ impl TimePartitions { let mut all_in_partition = true; for kv in kvs.iter() { // Safety: We checked the schema in the write request. - let ts = kv.timestamp().as_timestamp().unwrap().unwrap(); + let ts = kv.timestamp().try_into_timestamp().unwrap().unwrap(); if !part.contains_timestamp(ts) { all_in_partition = false; break; @@ -634,7 +634,7 @@ impl TimePartitions { for kv in kvs.iter() { let mut part_found = false; // Safety: We used the timestamp before. - let ts = kv.timestamp().as_timestamp().unwrap().unwrap(); + let ts = kv.timestamp().try_into_timestamp().unwrap().unwrap(); for part in parts { if part.contains_timestamp(ts) { parts_to_write diff --git a/src/mito2/src/memtable/time_series.rs b/src/mito2/src/memtable/time_series.rs index e1c292269fdc..60fe2f0bcdbf 100644 --- a/src/mito2/src/memtable/time_series.rs +++ b/src/mito2/src/memtable/time_series.rs @@ -39,7 +39,7 @@ use mito_codec::key_values::KeyValue; use mito_codec::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodecExt}; use snafu::{OptionExt, ResultExt, ensure}; use store_api::metadata::RegionMetadataRef; -use store_api::storage::{ColumnId, SequenceNumber}; +use store_api::storage::{ColumnId, SequenceRange}; use table::predicate::Predicate; use crate::error::{ @@ -197,7 +197,12 @@ impl TimeSeriesMemtable { stats.value_bytes += value_allocated; // safety: timestamp of kv must be both present and a valid timestamp value. - let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value(); + let ts = kv + .timestamp() + .try_into_timestamp() + .unwrap() + .unwrap() + .value(); stats.min_ts = stats.min_ts.min(ts); stats.max_ts = stats.max_ts.max(ts); Ok(()) @@ -272,7 +277,7 @@ impl Memtable for TimeSeriesMemtable { &self, projection: Option<&[ColumnId]>, filters: Option, - sequence: Option, + sequence: Option, ) -> Result { let projection = if let Some(projection) = projection { projection.iter().copied().collect() @@ -299,7 +304,7 @@ impl Memtable for TimeSeriesMemtable { &self, projection: Option<&[ColumnId]>, predicate: PredicateGroup, - sequence: Option, + sequence: Option, _for_flush: bool, ) -> Result { let projection = if let Some(projection) = projection { @@ -463,7 +468,7 @@ impl SeriesSet { projection: HashSet, predicate: Option, dedup: bool, - sequence: Option, + sequence: Option, mem_scan_metrics: Option, ) -> Result { let primary_key_schema = primary_key_schema(&self.region_metadata); @@ -531,7 +536,7 @@ struct Iter { pk_datatypes: Vec, codec: Arc, dedup: bool, - sequence: Option, + sequence: Option, metrics: Metrics, mem_scan_metrics: Option, } @@ -547,7 +552,7 @@ impl Iter { pk_datatypes: Vec, codec: Arc, dedup: bool, - sequence: Option, + sequence: Option, mem_scan_metrics: Option, ) -> Result { let predicate = predicate @@ -896,7 +901,7 @@ impl ValueBuilder { }; self.timestamp - .push(ts.as_timestamp().unwrap().unwrap().value()); + .push(ts.try_into_timestamp().unwrap().unwrap().value()); self.sequence.push(sequence); self.op_type.push(op_type); let num_rows = self.timestamp.len(); @@ -1239,7 +1244,7 @@ struct TimeSeriesIterBuilder { projection: HashSet, predicate: Option, dedup: bool, - sequence: Option, + sequence: Option, merge_mode: MergeMode, } @@ -1650,10 +1655,13 @@ mod tests { memtable.write(&kvs).unwrap(); let mut expected_ts: HashMap = HashMap::new(); - for ts in kvs - .iter() - .map(|kv| kv.timestamp().as_timestamp().unwrap().unwrap().value()) - { + for ts in kvs.iter().map(|kv| { + kv.timestamp() + .try_into_timestamp() + .unwrap() + .unwrap() + .value() + }) { *expected_ts.entry(ts).or_default() += if dedup { 1 } else { 2 }; } diff --git a/src/mito2/src/read.rs b/src/mito2/src/read.rs index a10b2ba0213f..06d615452add 100644 --- a/src/mito2/src/read.rs +++ b/src/mito2/src/read.rs @@ -41,7 +41,7 @@ use async_trait::async_trait; use common_time::Timestamp; use datafusion_common::arrow::array::UInt8Array; use datatypes::arrow; -use datatypes::arrow::array::{Array, ArrayRef, UInt64Array}; +use datatypes::arrow::array::{Array, ArrayRef}; use datatypes::arrow::compute::SortOptions; use datatypes::arrow::record_batch::RecordBatch; use datatypes::arrow::row::{RowConverter, SortField}; @@ -60,7 +60,7 @@ use futures::stream::BoxStream; use mito_codec::row_converter::{CompositeValues, PrimaryKeyCodec}; use snafu::{OptionExt, ResultExt, ensure}; use store_api::metadata::RegionMetadata; -use store_api::storage::{ColumnId, SequenceNumber}; +use store_api::storage::{ColumnId, SequenceNumber, SequenceRange}; use crate::error::{ ComputeArrowSnafu, ComputeVectorSnafu, ConvertVectorSnafu, DecodeSnafu, InvalidBatchSnafu, @@ -361,17 +361,29 @@ impl Batch { } /// Filters rows by the given `sequence`. Only preserves rows with sequence less than or equal to `sequence`. - pub fn filter_by_sequence(&mut self, sequence: Option) -> Result<()> { - let seq = match (sequence, self.last_sequence()) { - (None, _) | (_, None) => return Ok(()), - (Some(sequence), Some(last_sequence)) if sequence >= last_sequence => return Ok(()), - (Some(sequence), Some(_)) => sequence, + pub fn filter_by_sequence(&mut self, sequence: Option) -> Result<()> { + let seq_range = match sequence { + None => return Ok(()), + Some(seq_range) => { + let (Some(first), Some(last)) = (self.first_sequence(), self.last_sequence()) + else { + return Ok(()); + }; + let is_subset = match seq_range { + SequenceRange::Gt { min } => min < first, + SequenceRange::LtEq { max } => max >= last, + SequenceRange::GtLtEq { min, max } => min < first && max >= last, + }; + if is_subset { + return Ok(()); + } + seq_range + } }; let seqs = self.sequences.as_arrow(); - let sequence = UInt64Array::new_scalar(seq); - let predicate = datafusion_common::arrow::compute::kernels::cmp::lt_eq(seqs, &sequence) - .context(ComputeArrowSnafu)?; + let predicate = seq_range.filter(seqs).context(ComputeArrowSnafu)?; + let predicate = BooleanVector::from(predicate); self.filter(&predicate)?; @@ -1292,7 +1304,9 @@ mod tests { &[OpType::Put, OpType::Put, OpType::Put, OpType::Put], &[21, 22, 23, 24], ); - batch.filter_by_sequence(Some(13)).unwrap(); + batch + .filter_by_sequence(Some(SequenceRange::LtEq { max: 13 })) + .unwrap(); let expect = new_batch( &[1, 2, 3], &[11, 12, 13], @@ -1309,7 +1323,9 @@ mod tests { &[21, 22, 23, 24], ); - batch.filter_by_sequence(Some(10)).unwrap(); + batch + .filter_by_sequence(Some(SequenceRange::LtEq { max: 10 })) + .unwrap(); assert!(batch.is_empty()); // None filter. @@ -1325,13 +1341,95 @@ mod tests { // Filter a empty batch let mut batch = new_batch(&[], &[], &[], &[]); - batch.filter_by_sequence(Some(10)).unwrap(); + batch + .filter_by_sequence(Some(SequenceRange::LtEq { max: 10 })) + .unwrap(); assert!(batch.is_empty()); // Filter a empty batch with None let mut batch = new_batch(&[], &[], &[], &[]); batch.filter_by_sequence(None).unwrap(); assert!(batch.is_empty()); + + // Test From variant - exclusive lower bound + let mut batch = new_batch( + &[1, 2, 3, 4], + &[11, 12, 13, 14], + &[OpType::Put, OpType::Put, OpType::Put, OpType::Put], + &[21, 22, 23, 24], + ); + batch + .filter_by_sequence(Some(SequenceRange::Gt { min: 12 })) + .unwrap(); + let expect = new_batch(&[3, 4], &[13, 14], &[OpType::Put, OpType::Put], &[23, 24]); + assert_eq!(expect, batch); + + // Test From variant with no matches + let mut batch = new_batch( + &[1, 2, 3, 4], + &[11, 12, 13, 14], + &[OpType::Put, OpType::Delete, OpType::Put, OpType::Put], + &[21, 22, 23, 24], + ); + batch + .filter_by_sequence(Some(SequenceRange::Gt { min: 20 })) + .unwrap(); + assert!(batch.is_empty()); + + // Test Range variant - exclusive lower bound, inclusive upper bound + let mut batch = new_batch( + &[1, 2, 3, 4, 5], + &[11, 12, 13, 14, 15], + &[ + OpType::Put, + OpType::Put, + OpType::Put, + OpType::Put, + OpType::Put, + ], + &[21, 22, 23, 24, 25], + ); + batch + .filter_by_sequence(Some(SequenceRange::GtLtEq { min: 12, max: 14 })) + .unwrap(); + let expect = new_batch(&[3, 4], &[13, 14], &[OpType::Put, OpType::Put], &[23, 24]); + assert_eq!(expect, batch); + + // Test Range variant with mixed operations + let mut batch = new_batch( + &[1, 2, 3, 4, 5], + &[11, 12, 13, 14, 15], + &[ + OpType::Put, + OpType::Delete, + OpType::Put, + OpType::Delete, + OpType::Put, + ], + &[21, 22, 23, 24, 25], + ); + batch + .filter_by_sequence(Some(SequenceRange::GtLtEq { min: 11, max: 13 })) + .unwrap(); + let expect = new_batch( + &[2, 3], + &[12, 13], + &[OpType::Delete, OpType::Put], + &[22, 23], + ); + assert_eq!(expect, batch); + + // Test Range variant with no matches + let mut batch = new_batch( + &[1, 2, 3, 4], + &[11, 12, 13, 14], + &[OpType::Put, OpType::Put, OpType::Put, OpType::Put], + &[21, 22, 23, 24], + ); + batch + .filter_by_sequence(Some(SequenceRange::GtLtEq { min: 20, max: 25 })) + .unwrap(); + assert!(batch.is_empty()); } #[test] diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index cd660dab9f20..536c48e2488a 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -35,7 +35,9 @@ use smallvec::SmallVec; use snafu::ResultExt; use store_api::metadata::{RegionMetadata, RegionMetadataRef}; use store_api::region_engine::{PartitionRange, RegionScannerRef}; -use store_api::storage::{RegionId, ScanRequest, TimeSeriesDistribution, TimeSeriesRowSelector}; +use store_api::storage::{ + RegionId, ScanRequest, SequenceRange, TimeSeriesDistribution, TimeSeriesRowSelector, +}; use table::predicate::{Predicate, build_time_range_predicate}; use tokio::sync::{Semaphore, mpsc}; use tokio_stream::wrappers::ReceiverStream; @@ -438,7 +440,10 @@ impl ScanRegion { let ranges_in_memtable = m.ranges( Some(mapper.column_ids()), predicate.clone(), - self.request.sequence, + SequenceRange::new( + self.request.memtable_min_sequence, + self.request.memtable_max_sequence, + ), false, )?; mem_range_builders.extend(ranges_in_memtable.ranges.into_values().map(|v| { diff --git a/src/mito2/src/region/options.rs b/src/mito2/src/region/options.rs index 363c756eb8b2..a56d25d6e577 100644 --- a/src/mito2/src/region/options.rs +++ b/src/mito2/src/region/options.rs @@ -20,6 +20,7 @@ use std::collections::HashMap; use std::time::Duration; use common_base::readable_size::ReadableSize; +use common_stat::get_total_memory_readable; use common_time::TimeToLive; use common_wal::options::{WAL_OPTIONS_KEY, WalOptions}; use serde::de::Error as _; @@ -354,9 +355,9 @@ pub struct PartitionTreeOptions { impl Default for PartitionTreeOptions { fn default() -> Self { let mut fork_dictionary_bytes = ReadableSize::mb(512); - if let Some(sys_memory) = common_config::utils::get_sys_total_memory() { + if let Some(total_memory) = get_total_memory_readable() { let adjust_dictionary_bytes = std::cmp::min( - sys_memory / crate::memtable::partition_tree::DICTIONARY_SIZE_FACTOR, + total_memory / crate::memtable::partition_tree::DICTIONARY_SIZE_FACTOR, fork_dictionary_bytes, ); if adjust_dictionary_bytes.0 > 0 { diff --git a/src/mito2/src/remap_manifest.rs b/src/mito2/src/remap_manifest.rs index ed993b41b6df..6800a4bf4d88 100644 --- a/src/mito2/src/remap_manifest.rs +++ b/src/mito2/src/remap_manifest.rs @@ -22,7 +22,6 @@ use store_api::storage::RegionId; use crate::error; pub use crate::error::{Error, Result}; use crate::manifest::action::{RegionManifest, RemovedFilesRecord}; -use crate::sst::file::FileMeta; /// Remaps file references from old region manifests to new region manifests. pub struct RemapManifest { @@ -167,11 +166,13 @@ impl RemapManifest { Entry::Vacant(e) => { e.insert(file_meta_clone); } + #[cfg(debug_assertions)] Entry::Occupied(e) => { // File already exists - verify it's the same physical file - #[cfg(debug_assertions)] Self::verify_file_consistency(e.get(), &file_meta_clone)?; } + #[cfg(not(debug_assertions))] + Entry::Occupied(_) => {} } } @@ -180,7 +181,10 @@ impl RemapManifest { /// Verifies that two file metadata entries are consistent. #[cfg(debug_assertions)] - fn verify_file_consistency(existing: &FileMeta, new: &FileMeta) -> Result<()> { + fn verify_file_consistency( + existing: &crate::sst::file::FileMeta, + new: &crate::sst::file::FileMeta, + ) -> Result<()> { // When the same file appears from multiple overlapping old regions, // verify they are actually the same physical file with identical metadata diff --git a/src/mito2/src/sst/index/bloom_filter.rs b/src/mito2/src/sst/index/bloom_filter.rs index 7f454937f0e8..0896c3028ff5 100644 --- a/src/mito2/src/sst/index/bloom_filter.rs +++ b/src/mito2/src/sst/index/bloom_filter.rs @@ -15,4 +15,4 @@ pub(crate) mod applier; pub(crate) mod creator; -const INDEX_BLOB_TYPE: &str = "greptime-bloom-filter-v1"; +pub(crate) const INDEX_BLOB_TYPE: &str = "greptime-bloom-filter-v1"; diff --git a/src/mito2/src/sst/index/bloom_filter/applier.rs b/src/mito2/src/sst/index/bloom_filter/applier.rs index 4562f01cdf70..3fa387c8dc55 100644 --- a/src/mito2/src/sst/index/bloom_filter/applier.rs +++ b/src/mito2/src/sst/index/bloom_filter/applier.rs @@ -22,6 +22,7 @@ use common_base::range_read::RangeReader; use common_telemetry::warn; use index::bloom_filter::applier::{BloomFilterApplier, InListPredicate}; use index::bloom_filter::reader::{BloomFilterReader, BloomFilterReaderImpl}; +use index::target::IndexTarget; use object_store::ObjectStore; use puffin::puffin_manager::cache::PuffinMetadataCacheRef; use puffin::puffin_manager::{PuffinManager, PuffinReader}; @@ -263,12 +264,14 @@ impl BloomFilterIndexApplier { file_cache.local_store(), WriteCachePathProvider::new(file_cache.clone()), ); + let blob_name = Self::column_blob_name(column_id); + let reader = puffin_manager .reader(&file_id) .await .context(PuffinBuildReaderSnafu)? .with_file_size_hint(file_size_hint) - .blob(&Self::column_blob_name(column_id)) + .blob(&blob_name) .await .context(PuffinReadBlobSnafu)? .reader() @@ -279,7 +282,7 @@ impl BloomFilterIndexApplier { // TODO(ruihang): use the same util with the code in creator fn column_blob_name(column_id: ColumnId) -> String { - format!("{INDEX_BLOB_TYPE}-{column_id}") + format!("{INDEX_BLOB_TYPE}-{}", IndexTarget::ColumnId(column_id)) } /// Creates a blob reader from the remote index file @@ -297,12 +300,14 @@ impl BloomFilterIndexApplier { ) .with_puffin_metadata_cache(self.puffin_metadata_cache.clone()); + let blob_name = Self::column_blob_name(column_id); + puffin_manager .reader(&file_id) .await .context(PuffinBuildReaderSnafu)? .with_file_size_hint(file_size_hint) - .blob(&Self::column_blob_name(column_id)) + .blob(&blob_name) .await .context(PuffinReadBlobSnafu)? .reader() diff --git a/src/mito2/src/sst/index/bloom_filter/creator.rs b/src/mito2/src/sst/index/bloom_filter/creator.rs index 387302aeb0b7..a48898902f1d 100644 --- a/src/mito2/src/sst/index/bloom_filter/creator.rs +++ b/src/mito2/src/sst/index/bloom_filter/creator.rs @@ -21,6 +21,7 @@ use datatypes::arrow::record_batch::RecordBatch; use datatypes::schema::SkippingIndexType; use datatypes::vectors::Helper; use index::bloom_filter::creator::BloomFilterCreator; +use index::target::IndexTarget; use mito_codec::index::{IndexValueCodec, IndexValuesCodec}; use mito_codec::row_converter::SortField; use puffin::puffin_manager::{PuffinWriter, PutOptions}; @@ -381,7 +382,8 @@ impl BloomFilterIndexer { ) -> Result { let (tx, rx) = tokio::io::duplex(PIPE_BUFFER_SIZE_FOR_SENDING_BLOB); - let blob_name = format!("{}-{}", INDEX_BLOB_TYPE, col_id); + let target_key = IndexTarget::ColumnId(*col_id); + let blob_name = format!("{INDEX_BLOB_TYPE}-{target_key}"); let (index_finish, puffin_add_blob) = futures::join!( creator.finish(tx.compat_write()), puffin_writer.put_blob( diff --git a/src/mito2/src/sst/index/fulltext_index.rs b/src/mito2/src/sst/index/fulltext_index.rs index 86d8a35b9d29..542a372bb01c 100644 --- a/src/mito2/src/sst/index/fulltext_index.rs +++ b/src/mito2/src/sst/index/fulltext_index.rs @@ -15,5 +15,5 @@ pub(crate) mod applier; pub(crate) mod creator; -const INDEX_BLOB_TYPE_TANTIVY: &str = "greptime-fulltext-index-v1"; -const INDEX_BLOB_TYPE_BLOOM: &str = "greptime-fulltext-index-bloom"; +pub(crate) const INDEX_BLOB_TYPE_TANTIVY: &str = "greptime-fulltext-index-v1"; +pub(crate) const INDEX_BLOB_TYPE_BLOOM: &str = "greptime-fulltext-index-bloom"; diff --git a/src/mito2/src/sst/index/fulltext_index/applier.rs b/src/mito2/src/sst/index/fulltext_index/applier.rs index c88fc611dbbc..6b68fc348da8 100644 --- a/src/mito2/src/sst/index/fulltext_index/applier.rs +++ b/src/mito2/src/sst/index/fulltext_index/applier.rs @@ -24,6 +24,7 @@ use index::bloom_filter::reader::BloomFilterReaderImpl; use index::fulltext_index::search::{FulltextIndexSearcher, RowId, TantivyFulltextIndexSearcher}; use index::fulltext_index::tokenizer::{ChineseTokenizer, EnglishTokenizer, Tokenizer}; use index::fulltext_index::{Analyzer, Config}; +use index::target::IndexTarget; use object_store::ObjectStore; use puffin::puffin_manager::cache::PuffinMetadataCacheRef; use puffin::puffin_manager::{GuardWithMetadata, PuffinManager, PuffinReader}; @@ -171,7 +172,10 @@ impl FulltextIndexApplier { column_id: ColumnId, request: &FulltextRequest, ) -> Result>> { - let blob_key = format!("{INDEX_BLOB_TYPE_TANTIVY}-{column_id}"); + let blob_key = format!( + "{INDEX_BLOB_TYPE_TANTIVY}-{}", + IndexTarget::ColumnId(column_id) + ); let dir = self .index_source .dir(file_id, &blob_key, file_size_hint) @@ -283,7 +287,10 @@ impl FulltextIndexApplier { terms: &[FulltextTerm], output: &mut [(usize, Vec>)], ) -> Result { - let blob_key = format!("{INDEX_BLOB_TYPE_BLOOM}-{column_id}"); + let blob_key = format!( + "{INDEX_BLOB_TYPE_BLOOM}-{}", + IndexTarget::ColumnId(column_id) + ); let Some(reader) = self .index_source .blob(file_id, &blob_key, file_size_hint) diff --git a/src/mito2/src/sst/index/fulltext_index/creator.rs b/src/mito2/src/sst/index/fulltext_index/creator.rs index 5a7c92dd46de..15e8870441a2 100644 --- a/src/mito2/src/sst/index/fulltext_index/creator.rs +++ b/src/mito2/src/sst/index/fulltext_index/creator.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use std::sync::atomic::AtomicUsize; use common_telemetry::warn; -use datatypes::arrow::array::{Array, StringArray}; +use datatypes::arrow::array::{Array, LargeStringArray, StringArray}; use datatypes::arrow::datatypes::DataType; use datatypes::arrow::record_batch::RecordBatch; use datatypes::schema::{FulltextAnalyzer, FulltextBackend}; @@ -25,6 +25,7 @@ use index::fulltext_index::create::{ BloomFilterFulltextIndexCreator, FulltextIndexCreator, TantivyFulltextIndexCreator, }; use index::fulltext_index::{Analyzer, Config}; +use index::target::IndexTarget; use puffin::blob_metadata::CompressionCodec; use puffin::puffin_manager::PutOptions; use snafu::{ResultExt, ensure}; @@ -297,7 +298,7 @@ impl SingleCreator { for i in 0..batch.num_rows() { let data = data.get_ref(i); let text = data - .as_string() + .try_into_string() .context(DataTypeMismatchSnafu)? .unwrap_or_default(); self.inner.push_text(text).await?; @@ -321,12 +322,34 @@ impl SingleCreator { if let Some(column_array) = batch.column_by_name(&self.column_name) { // Convert Arrow array to string array. // TODO(yingwen): Use Utf8View later if possible. - let array = datatypes::arrow::compute::cast(column_array, &DataType::Utf8) - .context(ComputeArrowSnafu)?; - let string_array = array.as_any().downcast_ref::().unwrap(); - for text_opt in string_array.iter() { - let text = text_opt.unwrap_or_default(); - self.inner.push_text(text).await?; + match column_array.data_type() { + DataType::Utf8 => { + let string_array = column_array.as_any().downcast_ref::().unwrap(); + for text_opt in string_array.iter() { + let text = text_opt.unwrap_or_default(); + self.inner.push_text(text).await?; + } + } + DataType::LargeUtf8 => { + let large_string_array = column_array + .as_any() + .downcast_ref::() + .unwrap(); + for text_opt in large_string_array.iter() { + let text = text_opt.unwrap_or_default(); + self.inner.push_text(text).await?; + } + } + _ => { + // For other types, cast to Utf8 as before + let array = datatypes::arrow::compute::cast(column_array, &DataType::Utf8) + .context(ComputeArrowSnafu)?; + let string_array = array.as_any().downcast_ref::().unwrap(); + for text_opt in string_array.iter() { + let text = text_opt.unwrap_or_default(); + self.inner.push_text(text).await?; + } + } } } else { // If the column is not found in the batch, push empty text. @@ -385,16 +408,22 @@ impl AltFulltextCreator { ) -> Result { match self { Self::Tantivy(creator) => { - let key = format!("{INDEX_BLOB_TYPE_TANTIVY}-{}", column_id); + let blob_key = format!( + "{INDEX_BLOB_TYPE_TANTIVY}-{}", + IndexTarget::ColumnId(*column_id) + ); creator - .finish(puffin_writer, &key, put_options) + .finish(puffin_writer, &blob_key, put_options) .await .context(FulltextFinishSnafu) } Self::Bloom(creator) => { - let key = format!("{INDEX_BLOB_TYPE_BLOOM}-{}", column_id); + let blob_key = format!( + "{INDEX_BLOB_TYPE_BLOOM}-{}", + IndexTarget::ColumnId(*column_id) + ); creator - .finish(puffin_writer, &key, put_options) + .finish(puffin_writer, &blob_key, put_options) .await .context(FulltextFinishSnafu) } diff --git a/src/mito2/src/sst/index/inverted_index.rs b/src/mito2/src/sst/index/inverted_index.rs index 73dca4ac47f2..f7919ae24f54 100644 --- a/src/mito2/src/sst/index/inverted_index.rs +++ b/src/mito2/src/sst/index/inverted_index.rs @@ -15,4 +15,4 @@ pub(crate) mod applier; pub(crate) mod creator; -const INDEX_BLOB_TYPE: &str = "greptime-inverted-index-v1"; +pub(crate) const INDEX_BLOB_TYPE: &str = "greptime-inverted-index-v1"; diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder.rs b/src/mito2/src/sst/index/inverted_index/applier/builder.rs index 60690df3a88c..8bc5e8b6d184 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder.rs @@ -27,6 +27,7 @@ use datatypes::data_type::ConcreteDataType; use datatypes::value::Value; use index::inverted_index::search::index_apply::PredicatesIndexApplier; use index::inverted_index::search::predicate::Predicate; +use index::target::IndexTarget; use mito_codec::index::IndexValueCodec; use mito_codec::row_converter::SortField; use object_store::ObjectStore; @@ -139,8 +140,13 @@ impl<'a> InvertedIndexApplierBuilder<'a> { let predicates = self .output .iter() - .map(|(column_id, predicates)| (column_id.to_string(), predicates.clone())) - .collect(); + .map(|(column_id, predicates)| { + ( + format!("{}", IndexTarget::ColumnId(*column_id)), + predicates.clone(), + ) + }) + .collect::>(); let applier = PredicatesIndexApplier::try_from(predicates); Ok(Some( diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index 608b11b0759f..b7019422f83c 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -24,6 +24,7 @@ use index::inverted_index::create::InvertedIndexCreator; use index::inverted_index::create::sort::external_sort::ExternalSorter; use index::inverted_index::create::sort_create::SortIndexCreator; use index::inverted_index::format::writer::InvertedIndexBlobWriter; +use index::target::IndexTarget; use mito_codec::index::{IndexValueCodec, IndexValuesCodec}; use mito_codec::row_converter::SortField; use puffin::puffin_manager::{PuffinWriter, PutOptions}; @@ -72,7 +73,7 @@ pub struct InvertedIndexer { /// The memory usage of the index creator. memory_usage: Arc, - /// Ids of indexed columns and their names (`to_string` of the column id). + /// Ids of indexed columns and their encoded target keys. indexed_column_ids: Vec<(ColumnId, String)>, /// Region metadata for column lookups. @@ -115,8 +116,8 @@ impl InvertedIndexer { let indexed_column_ids = indexed_column_ids .into_iter() .map(|col_id| { - let col_id_str = col_id.to_string(); - (col_id, col_id_str) + let target_key = format!("{}", IndexTarget::ColumnId(col_id)); + (col_id, target_key) }) .collect(); Self { @@ -181,7 +182,7 @@ impl InvertedIndexer { let column_indices = self.column_index_cache.as_ref().unwrap(); - for ((col_id, col_id_str), &column_index) in + for ((col_id, target_key), &column_index) in self.indexed_column_ids.iter().zip(column_indices.iter()) { if let Some(index) = column_index { @@ -197,7 +198,7 @@ impl InvertedIndexer { if value_ref.is_null() { self.index_creator - .push_with_name(col_id_str, None) + .push_with_name(target_key, None) .await .context(PushIndexValueSnafu)?; } else { @@ -208,7 +209,7 @@ impl InvertedIndexer { ) .context(EncodeSnafu)?; self.index_creator - .push_with_name(col_id_str, Some(&self.value_buf)) + .push_with_name(target_key, Some(&self.value_buf)) .await .context(PushIndexValueSnafu)?; } @@ -286,7 +287,7 @@ impl InvertedIndexer { let n = batch.num_rows(); guard.inc_row_count(n); - for (col_id, col_id_str) in &self.indexed_column_ids { + for (col_id, target_key) in &self.indexed_column_ids { match self.codec.pk_col_info(*col_id) { // pk Some(col_info) => { @@ -308,7 +309,7 @@ impl InvertedIndexer { .transpose()?; self.index_creator - .push_with_name_n(col_id_str, value, n) + .push_with_name_n(target_key, value, n) .await .context(PushIndexValueSnafu)?; } @@ -327,7 +328,7 @@ impl InvertedIndexer { let value = values.data.get_ref(i); if value.is_null() { self.index_creator - .push_with_name(col_id_str, None) + .push_with_name(target_key, None) .await .context(PushIndexValueSnafu)?; } else { @@ -338,7 +339,7 @@ impl InvertedIndexer { ) .context(EncodeSnafu)?; self.index_creator - .push_with_name(col_id_str, Some(&self.value_buf)) + .push_with_name(target_key, Some(&self.value_buf)) .await .context(PushIndexValueSnafu)?; } diff --git a/src/mito2/src/sst/parquet/flat_format.rs b/src/mito2/src/sst/parquet/flat_format.rs index 2dc30bb3641f..bcf1d8694c10 100644 --- a/src/mito2/src/sst/parquet/flat_format.rs +++ b/src/mito2/src/sst/parquet/flat_format.rs @@ -688,7 +688,7 @@ impl FlatConvertFormat { let values_array = values_vector.to_arrow_array(); // Only creates dictionary array for string types, otherwise take values by keys - if matches!(column_type, ConcreteDataType::String(_)) { + if column_type.is_string() { // Creates dictionary array using the same keys for string types // Note that the dictionary values may have nulls. let dict_array = DictionaryArray::new(keys.clone(), values_array); diff --git a/src/mito2/src/test_util/memtable_util.rs b/src/mito2/src/test_util/memtable_util.rs index 75efa0c6f5e9..2174ac7b9f21 100644 --- a/src/mito2/src/test_util/memtable_util.rs +++ b/src/mito2/src/test_util/memtable_util.rs @@ -30,7 +30,7 @@ use mito_codec::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodecExt, SortFi use store_api::metadata::{ ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef, }; -use store_api::storage::{ColumnId, RegionId, SequenceNumber}; +use store_api::storage::{ColumnId, RegionId, SequenceNumber, SequenceRange}; use table::predicate::Predicate; use crate::error::Result; @@ -89,7 +89,7 @@ impl Memtable for EmptyMemtable { &self, _projection: Option<&[ColumnId]>, _filters: Option, - _sequence: Option, + _sequence: Option, ) -> Result { Ok(Box::new(std::iter::empty())) } @@ -98,7 +98,7 @@ impl Memtable for EmptyMemtable { &self, _projection: Option<&[ColumnId]>, _predicate: PredicateGroup, - _sequence: Option, + _sequence: Option, _for_flush: bool, ) -> Result { Ok(MemtableRanges::default()) diff --git a/src/mito2/src/worker/handle_compaction.rs b/src/mito2/src/worker/handle_compaction.rs index b9aa7baa38ea..ffa0aa4468d4 100644 --- a/src/mito2/src/worker/handle_compaction.rs +++ b/src/mito2/src/worker/handle_compaction.rs @@ -35,6 +35,7 @@ impl RegionWorkerLoop { return; }; COMPACTION_REQUEST_COUNT.inc(); + let parallelism = req.parallelism.unwrap_or(1) as usize; if let Err(e) = self .compaction_scheduler .schedule_compaction( @@ -45,8 +46,7 @@ impl RegionWorkerLoop { sender, ®ion.manifest_ctx, self.schema_metadata_manager.clone(), - // TODO(yingwen): expose this to frontend - 1, + parallelism, ) .await { @@ -116,7 +116,7 @@ impl RegionWorkerLoop { OptionOutputTx::none(), ®ion.manifest_ctx, self.schema_metadata_manager.clone(), - 1, + 1, // Default for automatic compaction ) .await { diff --git a/src/operator/src/expr_helper.rs b/src/operator/src/expr_helper.rs index b3a503a49967..3fa9a0ae1f69 100644 --- a/src/operator/src/expr_helper.rs +++ b/src/operator/src/expr_helper.rs @@ -682,6 +682,40 @@ pub fn column_schemas_to_defs( .collect() } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RepartitionRequest { + pub catalog_name: String, + pub schema_name: String, + pub table_name: String, + pub from_exprs: Vec, + pub into_exprs: Vec, +} + +pub(crate) fn to_repartition_request( + alter_table: AlterTable, + query_ctx: &QueryContextRef, +) -> Result { + let (catalog_name, schema_name, table_name) = + table_idents_to_full_name(alter_table.table_name(), query_ctx) + .map_err(BoxedError::new) + .context(ExternalSnafu)?; + + let AlterTableOperation::Repartition { operation } = alter_table.alter_operation else { + return InvalidSqlSnafu { + err_msg: "expected REPARTITION operation", + } + .fail(); + }; + + Ok(RepartitionRequest { + catalog_name, + schema_name, + table_name, + from_exprs: operation.from_exprs, + into_exprs: operation.into_exprs, + }) +} + /// Converts a SQL alter table statement into a gRPC alter table expression. pub(crate) fn to_alter_table_expr( alter_table: AlterTable, @@ -764,6 +798,12 @@ pub(crate) fn to_alter_table_expr( AlterTableOperation::UnsetTableOptions { keys } => { AlterTableKind::UnsetTableOptions(UnsetTableOptions { keys }) } + AlterTableOperation::Repartition { .. } => { + return NotSupportedSnafu { + feat: "ALTER TABLE ... REPARTITION", + } + .fail(); + } AlterTableOperation::SetIndex { options } => { let option = match options { sql::statements::alter::SetIndexOperation::Fulltext { @@ -939,11 +979,10 @@ pub fn to_create_flow_task_expr( query_ctx: &QueryContextRef, ) -> Result { // retrieve sink table name - let sink_table_ref = - object_name_to_table_reference(create_flow.sink_table_name.clone().into(), true) - .with_context(|_| ConvertIdentifierSnafu { - ident: create_flow.sink_table_name.to_string(), - })?; + let sink_table_ref = object_name_to_table_reference(create_flow.sink_table_name.clone(), true) + .with_context(|_| ConvertIdentifierSnafu { + ident: create_flow.sink_table_name.to_string(), + })?; let catalog = sink_table_ref .catalog() .unwrap_or(query_ctx.current_catalog()) @@ -961,9 +1000,11 @@ pub fn to_create_flow_task_expr( let source_table_names = extract_tables_from_query(&create_flow.query) .map(|name| { - let reference = object_name_to_table_reference(name.clone().into(), true) - .with_context(|_| ConvertIdentifierSnafu { - ident: name.to_string(), + let reference = + object_name_to_table_reference(name.clone(), true).with_context(|_| { + ConvertIdentifierSnafu { + ident: name.to_string(), + } })?; let catalog = reference .catalog() @@ -1391,6 +1432,50 @@ SELECT max(c1), min(c2) FROM schema_2.table_2;"; assert!(modify_column_type.target_type_extension.is_none()); } + #[test] + fn test_to_repartition_request() { + let sql = r#" +ALTER TABLE metrics REPARTITION ( + device_id < 100 +) INTO ( + device_id < 100 AND area < 'South', + device_id < 100 AND area >= 'South' +);"#; + let stmt = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap() + .pop() + .unwrap(); + + let Statement::AlterTable(alter_table) = stmt else { + unreachable!() + }; + + let request = to_repartition_request(alter_table, &QueryContext::arc()).unwrap(); + assert_eq!("greptime", request.catalog_name); + assert_eq!("public", request.schema_name); + assert_eq!("metrics", request.table_name); + assert_eq!( + request + .from_exprs + .into_iter() + .map(|x| x.to_string()) + .collect::>(), + vec!["device_id < 100".to_string()] + ); + assert_eq!( + request + .into_exprs + .into_iter() + .map(|x| x.to_string()) + .collect::>(), + vec![ + "device_id < 100 AND area < 'South'".to_string(), + "device_id < 100 AND area >= 'South'".to_string() + ] + ); + } + fn new_test_table_names() -> Vec { vec![ TableName { diff --git a/src/operator/src/expr_helper/trigger.rs b/src/operator/src/expr_helper/trigger.rs index 2df91a0c07e0..95e9a162d571 100644 --- a/src/operator/src/expr_helper/trigger.rs +++ b/src/operator/src/expr_helper/trigger.rs @@ -6,10 +6,9 @@ use api::v1::{ use session::context::QueryContextRef; use snafu::{ResultExt, ensure}; use sql::ast::{ObjectName, ObjectNamePartExt}; -use sql::statements::create::trigger::{ChannelType, CreateTrigger, TriggerOn}; +use sql::statements::create::trigger::{ChannelType, CreateTrigger, DurationExpr, TriggerOn}; -use crate::error; -use crate::error::Result; +use crate::error::{Result, TooLargeDurationSnafu}; pub fn to_create_trigger_task_expr( create_trigger: CreateTrigger, @@ -19,17 +18,13 @@ pub fn to_create_trigger_task_expr( trigger_name, if_not_exists, trigger_on, + r#for, + keep_firing_for, labels, annotations, channels, } = create_trigger; - let TriggerOn { - query, - interval, - raw_interval_expr, - } = trigger_on; - let catalog_name = query_ctx.current_catalog().to_string(); let trigger_name = sanitize_trigger_name(trigger_name)?; @@ -49,11 +44,35 @@ pub fn to_create_trigger_task_expr( }) .collect::>(); + let TriggerOn { + query, + query_interval, + } = trigger_on; + + let DurationExpr { + duration, + raw_expr: raw_interval_expr, + } = query_interval; + + let (r#for, for_raw_expr) = if let Some(f) = r#for { + let duration = f.duration.try_into().context(TooLargeDurationSnafu)?; + (Some(duration), f.raw_expr) + } else { + (None, String::new()) + }; + + let (keep_firing_for, keep_firing_for_raw_expr) = if let Some(k) = keep_firing_for { + let duration = k.duration.try_into().context(TooLargeDurationSnafu)?; + (Some(duration), k.raw_expr) + } else { + (None, String::new()) + }; + let sql = query.to_string(); let labels = labels.into_map(); let annotations = annotations.into_map(); - let interval = interval.try_into().context(error::TooLargeDurationSnafu)?; + let interval = duration.try_into().context(TooLargeDurationSnafu)?; Ok(PbCreateTriggerExpr { catalog_name, @@ -65,6 +84,10 @@ pub fn to_create_trigger_task_expr( annotations, interval: Some(interval), raw_interval_expr, + r#for, + for_raw_expr, + keep_firing_for, + keep_firing_for_raw_expr, }) } diff --git a/src/operator/src/request.rs b/src/operator/src/request.rs index 36f368a68181..1bca4618429f 100644 --- a/src/operator/src/request.rs +++ b/src/operator/src/request.rs @@ -109,6 +109,7 @@ impl Requester { .map(|partition| { RegionRequestBody::Compact(CompactRequest { region_id: partition.id.into(), + parallelism: request.parallelism, options: Some(request.compact_options), }) }) @@ -146,6 +147,7 @@ impl Requester { ) -> Result { let request = RegionRequestBody::Compact(CompactRequest { region_id: region_id.into(), + parallelism: 1, options: None, // todo(hl): maybe also support parameters in region compaction. }); diff --git a/src/operator/src/statement.rs b/src/operator/src/statement.rs index 47a89949855c..5dd39681b679 100644 --- a/src/operator/src/statement.rs +++ b/src/operator/src/statement.rs @@ -691,7 +691,7 @@ fn verify_time_related_format(with: &OptionMap) -> Result<()> { time_format.is_none() && date_format.is_none() && timestamp_format.is_none(), error::TimestampFormatNotSupportedSnafu { format: "".to_string(), - file_format: file_format.cloned().unwrap_or_default(), + file_format: file_format.unwrap_or_default(), } ); } @@ -742,7 +742,7 @@ fn to_copy_table_request(stmt: CopyTable, query_ctx: QueryContextRef) -> Result< let pattern = with .get(common_datasource::file_format::FILE_PATTERN) - .cloned(); + .map(|x| x.to_string()); Ok(CopyTableRequest { catalog_name, diff --git a/src/operator/src/statement/ddl.rs b/src/operator/src/statement/ddl.rs index 8bd454a71d09..3b626d13d0aa 100644 --- a/src/operator/src/statement/ddl.rs +++ b/src/operator/src/statement/ddl.rs @@ -66,7 +66,7 @@ use snafu::{OptionExt, ResultExt, ensure}; use sql::parser::{ParseOptions, ParserContext}; #[cfg(feature = "enterprise")] use sql::statements::alter::trigger::AlterTrigger; -use sql::statements::alter::{AlterDatabase, AlterTable}; +use sql::statements::alter::{AlterDatabase, AlterTable, AlterTableOperation}; #[cfg(feature = "enterprise")] use sql::statements::create::trigger::CreateTrigger; use sql::statements::create::{ @@ -87,10 +87,10 @@ use crate::error::{ ColumnNotFoundSnafu, ConvertSchemaSnafu, CreateLogicalTablesSnafu, CreateTableInfoSnafu, EmptyDdlExprSnafu, ExternalSnafu, ExtractTableNamesSnafu, FlowNotFoundSnafu, InvalidPartitionRuleSnafu, InvalidPartitionSnafu, InvalidSqlSnafu, InvalidTableNameSnafu, - InvalidViewNameSnafu, InvalidViewStmtSnafu, PartitionExprToPbSnafu, Result, SchemaInUseSnafu, - SchemaNotFoundSnafu, SchemaReadOnlySnafu, SubstraitCodecSnafu, TableAlreadyExistsSnafu, - TableMetadataManagerSnafu, TableNotFoundSnafu, UnrecognizedTableOptionSnafu, - ViewAlreadyExistsSnafu, + InvalidViewNameSnafu, InvalidViewStmtSnafu, NotSupportedSnafu, PartitionExprToPbSnafu, Result, + SchemaInUseSnafu, SchemaNotFoundSnafu, SchemaReadOnlySnafu, SubstraitCodecSnafu, + TableAlreadyExistsSnafu, TableMetadataManagerSnafu, TableNotFoundSnafu, + UnrecognizedTableOptionSnafu, ViewAlreadyExistsSnafu, }; use crate::expr_helper; use crate::statement::StatementExecutor; @@ -1194,6 +1194,17 @@ impl StatementExecutor { alter_table: AlterTable, query_context: QueryContextRef, ) -> Result { + if matches!( + alter_table.alter_operation(), + AlterTableOperation::Repartition { .. } + ) { + let _request = expr_helper::to_repartition_request(alter_table, &query_context)?; + return NotSupportedSnafu { + feat: "ALTER TABLE REPARTITION", + } + .fail(); + } + let expr = expr_helper::to_alter_table_expr(alter_table, &query_context)?; self.alter_table_inner(expr, query_context).await } @@ -1720,6 +1731,7 @@ pub fn create_table_info( region_numbers: vec![], options: table_options, created_on: Utc::now(), + updated_on: Utc::now(), partition_key_indices, column_ids: vec![], }; diff --git a/src/promql/src/extension_plan/empty_metric.rs b/src/promql/src/extension_plan/empty_metric.rs index 741a6b64bc97..5514cb1abb64 100644 --- a/src/promql/src/extension_plan/empty_metric.rs +++ b/src/promql/src/extension_plan/empty_metric.rs @@ -123,7 +123,7 @@ impl EmptyMetric { physical_planner.create_physical_expr(expr, &self.time_index_schema, session_state) }) .transpose()?; - let result_schema: SchemaRef = Arc::new(self.result_schema.as_ref().into()); + let result_schema: SchemaRef = self.result_schema.inner().clone(); let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(result_schema.clone()), Partitioning::UnknownPartitioning(1), @@ -134,7 +134,7 @@ impl EmptyMetric { start: self.start, end: self.end, interval: self.interval, - time_index_schema: Arc::new(self.time_index_schema.as_ref().into()), + time_index_schema: self.time_index_schema.inner().clone(), result_schema, expr: physical_expr, properties, diff --git a/src/promql/src/extension_plan/histogram_fold.rs b/src/promql/src/extension_plan/histogram_fold.rs index 4742bf49d4a8..e80d4a76762d 100644 --- a/src/promql/src/extension_plan/histogram_fold.rs +++ b/src/promql/src/extension_plan/histogram_fold.rs @@ -181,7 +181,7 @@ impl HistogramFold { .index_of_column_by_name(None, &self.ts_column) .unwrap(); - let output_schema: SchemaRef = Arc::new(self.output_schema.as_ref().into()); + let output_schema: SchemaRef = self.output_schema.inner().clone(); let properties = PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), @@ -578,7 +578,7 @@ impl HistogramFoldStream { let le_str_val = le_array.get(cursor + bias); let le_str_val_ref = le_str_val.as_value_ref(); let le_str = le_str_val_ref - .as_string() + .try_into_string() .unwrap() .expect("le column should not be nullable"); let le = le_str.parse::().unwrap(); @@ -587,7 +587,7 @@ impl HistogramFoldStream { let counter = field_array .get(cursor + bias) .as_value_ref() - .as_f64() + .try_into_f64() .unwrap() .expect("field column should not be nullable"); counters.push(counter); @@ -805,14 +805,13 @@ mod test { async fn fold_overall() { let memory_exec = Arc::new(prepare_test_data()); let output_schema: SchemaRef = Arc::new( - (*HistogramFold::convert_schema( + HistogramFold::convert_schema( &Arc::new(memory_exec.schema().to_dfschema().unwrap()), "le", ) .unwrap() - .as_ref()) - .clone() - .into(), + .as_arrow() + .clone(), ); let properties = PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), diff --git a/src/promql/src/extension_plan/range_manipulate.rs b/src/promql/src/extension_plan/range_manipulate.rs index 1e18e34cd196..9315c0620565 100644 --- a/src/promql/src/extension_plan/range_manipulate.rs +++ b/src/promql/src/extension_plan/range_manipulate.rs @@ -157,7 +157,7 @@ impl RangeManipulate { } pub fn to_execution_plan(&self, exec_input: Arc) -> Arc { - let output_schema: SchemaRef = SchemaRef::new(self.output_schema.as_ref().into()); + let output_schema: SchemaRef = self.output_schema.inner().clone(); let properties = exec_input.properties(); let properties = PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), @@ -732,8 +732,8 @@ mod test { &field_columns, ) .unwrap() - .as_ref() - .into(), + .as_arrow() + .clone(), ); let properties = PlanProperties::new( EquivalenceProperties::new(manipulate_output_schema.clone()), diff --git a/src/promql/src/extension_plan/union_distinct_on.rs b/src/promql/src/extension_plan/union_distinct_on.rs index e5e80525b87f..795669a4e909 100644 --- a/src/promql/src/extension_plan/union_distinct_on.rs +++ b/src/promql/src/extension_plan/union_distinct_on.rs @@ -92,7 +92,7 @@ impl UnionDistinctOn { left_exec: Arc, right_exec: Arc, ) -> Arc { - let output_schema: SchemaRef = Arc::new(self.output_schema.as_ref().into()); + let output_schema: SchemaRef = self.output_schema.inner().clone(); let properties = Arc::new(PlanProperties::new( EquivalenceProperties::new(output_schema.clone()), Partitioning::UnknownPartitioning(1), diff --git a/src/puffin/src/puffin_manager/cache.rs b/src/puffin/src/puffin_manager/cache.rs index 049aa4d4a6ed..c2c5fb67f496 100644 --- a/src/puffin/src/puffin_manager/cache.rs +++ b/src/puffin/src/puffin_manager/cache.rs @@ -57,4 +57,9 @@ impl PuffinMetadataCache { pub fn put_metadata(&self, file_id: String, metadata: Arc) { self.cache.insert(file_id, metadata); } + + /// Removes the metadata of the given file from the cache, if present. + pub fn remove(&self, file_id: &str) { + self.cache.invalidate(file_id); + } } diff --git a/src/query/Cargo.toml b/src/query/Cargo.toml index 0c6c2e033f9a..344d7bd5fc10 100644 --- a/src/query/Cargo.toml +++ b/src/query/Cargo.toml @@ -54,6 +54,7 @@ meter-core.workspace = true meter-macros.workspace = true object-store.workspace = true once_cell.workspace = true +parking_lot.workspace = true partition.workspace = true prometheus.workspace = true promql.workspace = true diff --git a/src/query/src/dist_plan/analyzer.rs b/src/query/src/dist_plan/analyzer.rs index 9c216f74df1c..34e035644b64 100644 --- a/src/query/src/dist_plan/analyzer.rs +++ b/src/query/src/dist_plan/analyzer.rs @@ -544,13 +544,13 @@ impl PlanRewriter { return Ok((c.clone(), BTreeSet::new())); } let index = - plan.schema().index_of_column_by_name(None, &c).ok_or_else(|| { - datafusion_common::DataFusionError::Internal( - format!( - "PlanRewriter: maybe_set_partitions: column {c} not found in schema of plan: {plan}" - ), - ) - })?; + if let Some(c) = plan.schema().index_of_column_by_name(None, &c){ + c + } else { + // the `projection` field of `TableScan` doesn't contain the partition columns, + // this is similar to not having a alias, hence return empty alias set + return Ok((c.clone(), BTreeSet::new())) + }; let column = plan.schema().columns().get(index).cloned().ok_or_else(|| { datafusion_common::DataFusionError::Internal(format!( "PlanRewriter: maybe_set_partitions: column index {index} out of bounds in schema of plan: {plan}" diff --git a/src/query/src/dist_plan/analyzer/test.rs b/src/query/src/dist_plan/analyzer/test.rs index c9c874dbc2d9..e9528ccf0076 100644 --- a/src/query/src/dist_plan/analyzer/test.rs +++ b/src/query/src/dist_plan/analyzer/test.rs @@ -92,6 +92,7 @@ impl TestTable { next_column_id: 5, options: Default::default(), created_on: Default::default(), + updated_on: Default::default(), partition_key_indices: vec![0, 1], column_ids: vec![0, 1, 2, 3, 4], }; @@ -1629,3 +1630,32 @@ fn test_last_value_no_order_by() { .join("\n"); assert_eq!(expected, result.to_string()); } + +#[test] +fn test_table_scan_projection() { + init_default_ut_logging(); + let test_table = TestTable::table_with_name(0, "t".to_string()); + let table_provider = Arc::new(DfTableProviderAdapter::new(test_table)); + let table_source = Arc::new(DefaultTableSource::new(table_provider.clone() as _)); + let ctx = SessionContext::new(); + ctx.register_table(TableReference::bare("t"), table_provider.clone() as _) + .unwrap(); + + let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, Some(vec![3]), vec![]) + .unwrap() + .build() + .unwrap(); + + let config = ConfigOptions::default(); + let result = DistPlannerAnalyzer {} + .analyze(plan.clone(), &config) + .unwrap(); + let expected = [ + "Projection: t.ts", + " MergeScan [is_placeholder=false, remote_input=[", + "TableScan: t projection=[ts]", + "]]", + ] + .join("\n"); + assert_eq!(expected, result.to_string()); +} diff --git a/src/query/src/dist_plan/commutativity.rs b/src/query/src/dist_plan/commutativity.rs index d0e26a3f92d4..ba6a74203bb2 100644 --- a/src/query/src/dist_plan/commutativity.rs +++ b/src/query/src/dist_plan/commutativity.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use common_function::aggrs::aggr_wrapper::{StateMergeHelper, is_all_aggr_exprs_steppable}; use common_telemetry::debug; use datafusion::error::Result as DfResult; +use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; use datafusion_expr::{Expr, LogicalPlan, UserDefinedLogicalNode}; use promql::extension_plan::{ EmptyMetric, InstantManipulate, RangeManipulate, SeriesDivide, SeriesNormalize, @@ -93,6 +94,12 @@ impl Categorizer { plan: &LogicalPlan, partition_cols: Option, ) -> DfResult { + // Subquery is treated separately in `inspect_plan_with_subquery`. To avoid rewrite the + // "maybe rewritten" plan, stop the check here. + if has_subquery(plan)? { + return Ok(Commutativity::NonCommutative); + } + let partition_cols = partition_cols.unwrap_or_default(); let comm = match plan { @@ -331,6 +338,24 @@ pub fn partial_commutative_transformer(plan: &LogicalPlan) -> Option DfResult { + let mut found = false; + plan.apply_expressions(|e| { + e.apply(|x| { + if matches!( + x, + Expr::Exists(_) | Expr::InSubquery(_) | Expr::ScalarSubquery(_) + ) { + found = true; + Ok(TreeNodeRecursion::Stop) + } else { + Ok(TreeNodeRecursion::Continue) + } + }) + })?; + Ok(found) +} + #[cfg(test)] mod test { use datafusion_expr::{LogicalPlanBuilder, Sort}; diff --git a/src/query/src/dist_plan/planner.rs b/src/query/src/dist_plan/planner.rs index 6c7eba6b1e72..cea8e5404549 100644 --- a/src/query/src/dist_plan/planner.rs +++ b/src/query/src/dist_plan/planner.rs @@ -163,7 +163,7 @@ impl ExtensionPlanner for DistExtensionPlanner { }; // TODO(ruihang): generate different execution plans for different variant merge operation - let schema = optimized_plan.schema().as_ref().into(); + let schema = optimized_plan.schema().as_arrow(); let query_ctx = session_state .config() .get_extension() @@ -173,7 +173,7 @@ impl ExtensionPlanner for DistExtensionPlanner { table_name, regions, input_plan.clone(), - &schema, + schema, self.region_query_handler.clone(), query_ctx, session_state.config().target_partitions(), diff --git a/src/query/src/dummy_catalog.rs b/src/query/src/dummy_catalog.rs index d81767a9044a..798ae5254991 100644 --- a/src/query/src/dummy_catalog.rs +++ b/src/query/src/dummy_catalog.rs @@ -256,7 +256,7 @@ impl DummyTableProvider { } pub fn with_sequence(&self, sequence: u64) { - self.scan_request.lock().unwrap().sequence = Some(sequence); + self.scan_request.lock().unwrap().memtable_max_sequence = Some(sequence); } /// Gets the scan request of the provider. @@ -287,7 +287,7 @@ impl DummyTableProviderFactory { let scan_request = query_ctx .as_ref() .map(|ctx| ScanRequest { - sequence: ctx.get_snapshot(region_id.as_u64()), + memtable_max_sequence: ctx.get_snapshot(region_id.as_u64()), sst_min_sequence: ctx.sst_min_sequence(region_id.as_u64()), ..Default::default() }) diff --git a/src/query/src/optimizer/windowed_sort.rs b/src/query/src/optimizer/windowed_sort.rs index 8de6fc831724..b2077d7c5d40 100644 --- a/src/query/src/optimizer/windowed_sort.rs +++ b/src/query/src/optimizer/windowed_sort.rs @@ -115,6 +115,7 @@ impl WindowedSortPhysicalRule { sort_exec.fetch(), scanner_info.partition_ranges.clone(), sort_input, + sort_exec.create_filter(), )) }; @@ -196,9 +197,9 @@ fn fetch_partition_range(input: Arc) -> DataFusionResult() { - for (expr, output_name) in projection.expr() { - if let Some(column_expr) = expr.as_any().downcast_ref::() { - alias_map.push((column_expr.name().to_string(), output_name.clone())); + for expr in projection.expr() { + if let Some(column_expr) = expr.expr.as_any().downcast_ref::() { + alias_map.push((column_expr.name().to_string(), expr.alias.clone())); } } // resolve alias properly diff --git a/src/query/src/part_sort.rs b/src/query/src/part_sort.rs index e9d70ec17a3a..1d59a8acf730 100644 --- a/src/query/src/part_sort.rs +++ b/src/query/src/part_sort.rs @@ -33,11 +33,14 @@ use datafusion::execution::{RecordBatchStream, TaskContext}; use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use datafusion::physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties, TopK, + TopKDynamicFilters, }; use datafusion_common::{DataFusionError, internal_err}; use datafusion_physical_expr::PhysicalSortExpr; +use datafusion_physical_expr::expressions::{DynamicFilterPhysicalExpr, lit}; use futures::{Stream, StreamExt}; use itertools::Itertools; +use parking_lot::RwLock; use snafu::location; use store_api::region_engine::PartitionRange; @@ -58,6 +61,7 @@ pub struct PartSortExec { metrics: ExecutionPlanMetricsSet, partition_ranges: Vec>, properties: PlanProperties, + filter: Arc>, } impl PartSortExec { @@ -66,6 +70,7 @@ impl PartSortExec { limit: Option, partition_ranges: Vec>, input: Arc, + filter: Arc>, ) -> Self { let metrics = ExecutionPlanMetricsSet::new(); let properties = input.properties(); @@ -83,6 +88,7 @@ impl PartSortExec { metrics, partition_ranges, properties, + filter, } } @@ -166,6 +172,7 @@ impl ExecutionPlan for PartSortExec { self.limit, self.partition_ranges.clone(), new_input.clone(), + self.filter.clone(), ))) } @@ -239,6 +246,9 @@ impl PartSortStream { partition: usize, ) -> datafusion_common::Result { let buffer = if let Some(limit) = limit { + let filter = Arc::new(RwLock::new(TopKDynamicFilters::new(Arc::new( + DynamicFilterPhysicalExpr::new(vec![], lit(true)), + )))); PartSortBuffer::Top( TopK::try_new( partition, @@ -249,7 +259,7 @@ impl PartSortStream { context.session_config().batch_size(), context.runtime_env(), &sort.metrics, - None, + filter.clone(), )?, 0, ) @@ -497,6 +507,9 @@ impl PartSortStream { /// Internal method for sorting `Top` buffer (with limit). fn sort_top_buffer(&mut self) -> datafusion_common::Result { + let filter = Arc::new(RwLock::new(TopKDynamicFilters::new(Arc::new( + DynamicFilterPhysicalExpr::new(vec![], lit(true)), + )))); let new_top_buffer = TopK::try_new( self.partition, self.schema().clone(), @@ -506,7 +519,7 @@ impl PartSortStream { self.context.session_config().batch_size(), self.context.runtime_env(), &self.root_metrics, - None, + filter, )?; let PartSortBuffer::Top(top_k, _) = std::mem::replace(&mut self.buffer, PartSortBuffer::Top(new_top_buffer, 0)) @@ -675,6 +688,7 @@ mod test { use arrow::json::ArrayWriter; use arrow_schema::{DataType, Field, Schema, SortOptions, TimeUnit}; use common_time::Timestamp; + use datafusion::physical_plan::sorts::sort::SortExec; use datafusion_physical_expr::expressions::Column; use futures::StreamExt; use store_api::region_engine::PartitionRange; @@ -1033,16 +1047,19 @@ mod test { cols }) .collect_vec(); - let mock_input = MockInputExec::new(batches, schema.clone()); + let mock_input = Arc::new(MockInputExec::new(batches, schema.clone())); + let expr = PhysicalSortExpr { + expr: Arc::new(Column::new("ts", 0)), + options: opt, + }; + let sort_exec = SortExec::new([expr.clone()].into(), mock_input.clone()); let exec = PartSortExec::new( - PhysicalSortExpr { - expr: Arc::new(Column::new("ts", 0)), - options: opt, - }, + expr, limit, vec![ranges.clone()], - Arc::new(mock_input), + mock_input, + sort_exec.create_filter(), ); let exec_stream = exec.execute(0, Arc::new(TaskContext::default())).unwrap(); diff --git a/src/query/src/planner.rs b/src/query/src/planner.rs index e59be28cebc8..faba24a74271 100644 --- a/src/query/src/planner.rs +++ b/src/query/src/planner.rs @@ -14,6 +14,7 @@ use std::any::Any; use std::borrow::Cow; +use std::str::FromStr; use std::sync::Arc; use async_trait::async_trait; @@ -116,9 +117,10 @@ impl DfLogicalPlanner { // default to configuration value let options = self.session_state.config().options(); - let format = format.as_ref().unwrap_or(&options.explain.format); - - let format: ExplainFormat = format.parse()?; + let format = format + .map(|x| ExplainFormat::from_str(&x)) + .transpose()? + .unwrap_or_else(|| options.explain.format.clone()); Ok(LogicalPlan::Explain(Explain { verbose, @@ -208,8 +210,7 @@ impl DfLogicalPlanner { let Statement::Query(query) = stmt.into_owned() else { unreachable!("is_tql_cte should only be true for Query statements"); }; - let sqlparser_stmt = - datafusion::sql::sqlparser::ast::Statement::Query(Box::new(query.inner.into())); + let sqlparser_stmt = sqlparser::ast::Statement::Query(Box::new(query.inner)); sql_to_rel .sql_statement_to_plan_with_context(sqlparser_stmt, &mut planner_context) .context(PlanSqlSnafu)? @@ -261,7 +262,7 @@ impl DfLogicalPlanner { let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options); - Ok(sql_to_rel.sql_to_expr(sql.into(), schema, &mut PlannerContext::new())?) + Ok(sql_to_rel.sql_to_expr(sql, schema, &mut PlannerContext::new())?) } #[tracing::instrument(skip_all)] diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index b65cddc2e227..a1dc1b640a02 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -2464,6 +2464,7 @@ impl PromPlanner { window_frame: WindowFrame::new(Some(true)), null_treatment: None, distinct: false, + filter: None, }, })) }) diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index 26dc8fe5f640..6fb0b1f1d338 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -90,7 +90,11 @@ opentelemetry-proto.workspace = true operator.workspace = true otel-arrow-rust.workspace = true parking_lot.workspace = true -pgwire = { version = "0.33", default-features = false, features = ["server-api-ring"] } +#pgwire = { version = "0.33", default-features = false, features = ["server-api-ring"] } +pgwire = { git = "https://github.com/sunng87/pgwire.git", rev = "658e37936da5039dfb8495af9cac7c511dbfaf90", default-features = false, features = [ + "server-api-ring", + "pg_ext_types", +] } pin-project = "1.0" pipeline.workspace = true postgres-types = { version = "0.2", features = ["with-chrono-0_4", "with-serde_json-1"] } diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index e373e0a78050..946e22ba5ba8 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -34,7 +34,7 @@ use common_time::timestamp::TimeUnit; use datatypes::data_type::DataType; use datatypes::prelude::ConcreteDataType; use datatypes::schema::SchemaRef; -use datatypes::types::json_type_value_to_serde_json; +use datatypes::types::jsonb_to_serde_json; use event::{LogState, LogValidatorRef}; use futures::FutureExt; use http::{HeaderValue, Method}; @@ -301,11 +301,11 @@ impl HttpRecordsOutput { let schema = &schemas[col_idx]; for row_idx in 0..recordbatch.num_rows() { let value = col.get(row_idx); - let value = if let ConcreteDataType::Json(json_type) = schema.data_type + // TODO(sunng87): is this duplicated with `map_json_type_to_string` in recordbatch? + let value = if let ConcreteDataType::Json(_json_type) = &schema.data_type && let datatypes::value::Value::Binary(bytes) = value { - json_type_value_to_serde_json(bytes.as_ref(), &json_type.format) - .context(ConvertSqlValueSnafu)? + jsonb_to_serde_json(bytes.as_ref()).context(ConvertSqlValueSnafu)? } else { serde_json::Value::try_from(col.get(row_idx)).context(ToJsonSnafu)? }; @@ -1084,7 +1084,7 @@ impl HttpServer { /// Route Prometheus [HTTP API]. /// /// [HTTP API]: https://prometheus.io/docs/prometheus/latest/querying/api/ - fn route_prometheus(prometheus_handler: PrometheusHandlerRef) -> Router { + pub fn route_prometheus(prometheus_handler: PrometheusHandlerRef) -> Router { Router::new() .route( "/format_query", diff --git a/src/servers/src/http/handler.rs b/src/servers/src/http/handler.rs index 69ba93cc5cde..ca56e5234e34 100644 --- a/src/servers/src/http/handler.rs +++ b/src/servers/src/http/handler.rs @@ -208,7 +208,7 @@ pub async fn sql_format( let mut parts: Vec = Vec::with_capacity(stmts.len()); for stmt in stmts { - let mut s = format!("{:#}", stmt); + let mut s = format!("{stmt}"); if !s.trim_end().ends_with(';') { s.push(';'); } diff --git a/src/servers/src/mysql/helper.rs b/src/servers/src/mysql/helper.rs index 4705c0138704..cf92741beaf1 100644 --- a/src/servers/src/mysql/helper.rs +++ b/src/servers/src/mysql/helper.rs @@ -201,9 +201,14 @@ pub fn convert_value(param: &ParamValue, t: &ConcreteDataType) -> Result value::to_null_scalar_value(t).context(error::ConvertScalarValueSnafu), ValueInner::Bytes(b) => match t { - ConcreteDataType::String(_) => Ok(ScalarValue::Utf8(Some( - String::from_utf8_lossy(b).to_string(), - ))), + ConcreteDataType::String(t) => { + let s = String::from_utf8_lossy(b).to_string(); + if t.is_large() { + Ok(ScalarValue::LargeUtf8(Some(s))) + } else { + Ok(ScalarValue::Utf8(Some(s))) + } + } ConcreteDataType::Binary(_) => Ok(ScalarValue::Binary(Some(b.to_vec()))), ConcreteDataType::Timestamp(ts_type) => covert_bytes_to_timestamp(b, ts_type), _ => error::PreparedStmtTypeMismatchSnafu { diff --git a/src/servers/src/mysql/writer.rs b/src/servers/src/mysql/writer.rs index 73545db34794..58cd3900a2e3 100644 --- a/src/servers/src/mysql/writer.rs +++ b/src/servers/src/mysql/writer.rs @@ -21,7 +21,7 @@ use common_recordbatch::{RecordBatch, SendableRecordBatchStream}; use common_telemetry::{debug, error}; use datatypes::prelude::{ConcreteDataType, Value}; use datatypes::schema::SchemaRef; -use datatypes::types::json_type_value_to_string; +use datatypes::types::jsonb_to_string; use futures::StreamExt; use itertools::Itertools; use opensrv_mysql::{ @@ -31,7 +31,7 @@ use session::context::QueryContextRef; use snafu::prelude::*; use tokio::io::AsyncWrite; -use crate::error::{self, ConvertSqlValueSnafu, Result}; +use crate::error::{self, ConvertSqlValueSnafu, Result, ToJsonSnafu}; use crate::metrics::*; /// Try to write multiple output to the writer if possible. @@ -212,10 +212,9 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> { Value::Float32(v) => row_writer.write_col(v.0)?, Value::Float64(v) => row_writer.write_col(v.0)?, Value::String(v) => row_writer.write_col(v.as_utf8())?, - Value::Binary(v) => match column.data_type { - ConcreteDataType::Json(j) => { - let s = json_type_value_to_string(&v, &j.format) - .context(ConvertSqlValueSnafu)?; + Value::Binary(v) => match &column.data_type { + ConcreteDataType::Json(_j) => { + let s = jsonb_to_string(&v).context(ConvertSqlValueSnafu)?; row_writer.write_col(s)?; } _ => { @@ -248,6 +247,11 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> { .map(|(k, v)| format!("{k}: {v}")) .join(", ") ))?, + Value::Json(inner) => { + let json_value = + serde_json::Value::try_from(*inner).context(ToJsonSnafu)?; + row_writer.write_col(json_value.to_string())? + } Value::Time(v) => row_writer .write_col(v.to_timezone_aware_string(Some(&query_context.timezone())))?, Value::Decimal128(v) => row_writer.write_col(v.to_string())?, diff --git a/src/servers/src/postgres/handler.rs b/src/servers/src/postgres/handler.rs index f3c1fdeca962..9561b9605e72 100644 --- a/src/servers/src/postgres/handler.rs +++ b/src/servers/src/postgres/handler.rs @@ -173,7 +173,7 @@ where .map(move |row| { row.and_then(|row| { let mut encoder = DataRowEncoder::new(pg_schema_ref.clone()); - for (value, column) in row.iter().zip(schema.column_schemas()) { + for (value, column) in row.into_iter().zip(schema.column_schemas()) { encode_value(&query_ctx, value, &mut encoder, &column.data_type)?; } encoder.finish() diff --git a/src/servers/src/postgres/types.rs b/src/servers/src/postgres/types.rs index 204c349547b2..e78980f1d8a7 100644 --- a/src/servers/src/postgres/types.rs +++ b/src/servers/src/postgres/types.rs @@ -25,9 +25,10 @@ use common_time::{IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth}; use datafusion_common::ScalarValue; use datafusion_expr::LogicalPlan; use datatypes::arrow::datatypes::DataType as ArrowDataType; +use datatypes::json::JsonStructureSettings; use datatypes::prelude::{ConcreteDataType, Value}; use datatypes::schema::Schema; -use datatypes::types::{IntervalType, TimestampType, json_type_value_to_string}; +use datatypes::types::{IntervalType, JsonFormat, TimestampType, jsonb_to_string}; use datatypes::value::{ListValue, StructValue}; use pgwire::api::Type; use pgwire::api::portal::{Format, Portal}; @@ -62,21 +63,35 @@ pub(super) fn schema_to_pg(origin: &Schema, field_formats: &Format) -> Result>>() } +/// this function will encode greptime's `StructValue` into PostgreSQL jsonb type +/// +/// Note that greptimedb has different types of StructValue for storing json data, +/// based on policy defined in `JsonStructureSettings`. But here the `StructValue` +/// should be fully structured. +/// +/// there are alternatives like records, arrays, etc. but there are also limitations: +/// records: there is no support for include keys +/// arrays: element in array must be the same type fn encode_struct( _query_ctx: &QueryContextRef, - _struct_value: &StructValue, - _builder: &mut DataRowEncoder, + struct_value: StructValue, + builder: &mut DataRowEncoder, ) -> PgWireResult<()> { - todo!("how to encode struct for postgres"); + let encoding_setting = JsonStructureSettings::Structured(None); + let json_value = encoding_setting + .decode(Value::Struct(struct_value)) + .map_err(|e| PgWireError::ApiError(Box::new(e)))?; + + builder.encode_field(&json_value) } fn encode_array( query_ctx: &QueryContextRef, - value_list: &ListValue, + value_list: ListValue, builder: &mut DataRowEncoder, ) -> PgWireResult<()> { - match value_list.datatype() { - &ConcreteDataType::Boolean(_) => { + match &value_list.datatype() { + ConcreteDataType::Boolean(_) => { let array = value_list .items() .iter() @@ -90,7 +105,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => { + ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => { let array = value_list .items() .iter() @@ -107,7 +122,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => { + ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => { let array = value_list .items() .iter() @@ -124,7 +139,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => { + ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => { let array = value_list .items() .iter() @@ -141,7 +156,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => { + ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => { let array = value_list .items() .iter() @@ -158,7 +173,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Float32(_) => { + ConcreteDataType::Float32(_) => { let array = value_list .items() .iter() @@ -172,7 +187,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Float64(_) => { + ConcreteDataType::Float64(_) => { let array = value_list .items() .iter() @@ -186,7 +201,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => { + ConcreteDataType::Binary(_) | ConcreteDataType::Vector(_) => { let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output(); match *bytea_output { @@ -240,7 +255,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Date(_) => { + ConcreteDataType::Date(_) => { let array = value_list .items() .iter() @@ -264,7 +279,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Timestamp(_) => { + ConcreteDataType::Timestamp(_) => { let array = value_list .items() .iter() @@ -290,7 +305,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Time(_) => { + ConcreteDataType::Time(_) => { let array = value_list .items() .iter() @@ -304,7 +319,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Interval(_) => { + ConcreteDataType::Interval(_) => { let array = value_list .items() .iter() @@ -320,7 +335,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Decimal128(_) => { + ConcreteDataType::Decimal128(_) => { let array = value_list .items() .iter() @@ -334,23 +349,42 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Json(j) => { - let array = value_list - .items() - .iter() - .map(|v| match v { - Value::Null => Ok(None), - Value::Binary(v) => { - let s = json_type_value_to_string(v, &j.format).map_err(convert_err)?; - Ok(Some(s)) - } - _ => Err(convert_err(Error::Internal { - err_msg: format!("Invalid list item type, find {v:?}, expected json",), - })), - }) - .collect::>>>()?; - builder.encode_field(&array) - } + ConcreteDataType::Json(j) => match &j.format { + JsonFormat::Jsonb => { + let array = value_list + .take_items() + .into_iter() + .map(|v| match v { + Value::Null => Ok(None), + Value::Binary(v) => { + let s = jsonb_to_string(&v).map_err(convert_err)?; + Ok(Some(s)) + } + + _ => Err(convert_err(Error::Internal { + err_msg: format!("Invalid list item type, find {v:?}, expected json",), + })), + }) + .collect::>>>()?; + builder.encode_field(&array) + } + JsonFormat::Native(_) => { + let array = value_list + .take_items() + .into_iter() + .map(|v| match v { + Value::Null => Ok(None), + Value::Json(inner) => serde_json::Value::try_from(*inner) + .map(Some) + .map_err(|e| PgWireError::ApiError(Box::new(e))), + _ => Err(convert_err(Error::Internal { + err_msg: format!("Invalid list item type, find {v:?}, expected json",), + })), + }) + .collect::>>>()?; + builder.encode_field(&array) + } + }, _ => Err(convert_err(Error::Internal { err_msg: format!( "cannot write array type {:?} in postgres protocol: unimplemented", @@ -362,27 +396,27 @@ fn encode_array( pub(super) fn encode_value( query_ctx: &QueryContextRef, - value: &Value, + value: Value, builder: &mut DataRowEncoder, datatype: &ConcreteDataType, ) -> PgWireResult<()> { match value { Value::Null => builder.encode_field(&None::<&i8>), - Value::Boolean(v) => builder.encode_field(v), - Value::UInt8(v) => builder.encode_field(&(*v as i8)), - Value::UInt16(v) => builder.encode_field(&(*v as i16)), - Value::UInt32(v) => builder.encode_field(v), - Value::UInt64(v) => builder.encode_field(&(*v as i64)), - Value::Int8(v) => builder.encode_field(v), - Value::Int16(v) => builder.encode_field(v), - Value::Int32(v) => builder.encode_field(v), - Value::Int64(v) => builder.encode_field(v), + Value::Boolean(v) => builder.encode_field(&v), + Value::UInt8(v) => builder.encode_field(&(v as i8)), + Value::UInt16(v) => builder.encode_field(&(v as i16)), + Value::UInt32(v) => builder.encode_field(&v), + Value::UInt64(v) => builder.encode_field(&(v as i64)), + Value::Int8(v) => builder.encode_field(&v), + Value::Int16(v) => builder.encode_field(&v), + Value::Int32(v) => builder.encode_field(&v), + Value::Int64(v) => builder.encode_field(&v), Value::Float32(v) => builder.encode_field(&v.0), Value::Float64(v) => builder.encode_field(&v.0), Value::String(v) => builder.encode_field(&v.as_utf8()), Value::Binary(v) => match datatype { - ConcreteDataType::Json(j) => { - let s = json_type_value_to_string(v, &j.format).map_err(convert_err)?; + ConcreteDataType::Json(_j) => { + let s = jsonb_to_string(v.as_ref()).map_err(convert_err)?; builder.encode_field(&s) } _ => { @@ -425,11 +459,11 @@ pub(super) fn encode_value( })) } } - Value::IntervalYearMonth(v) => builder.encode_field(&PgInterval::from(*v)), - Value::IntervalDayTime(v) => builder.encode_field(&PgInterval::from(*v)), - Value::IntervalMonthDayNano(v) => builder.encode_field(&PgInterval::from(*v)), + Value::IntervalYearMonth(v) => builder.encode_field(&PgInterval::from(v)), + Value::IntervalDayTime(v) => builder.encode_field(&PgInterval::from(v)), + Value::IntervalMonthDayNano(v) => builder.encode_field(&PgInterval::from(v)), Value::Decimal128(v) => builder.encode_field(&v.to_string()), - Value::Duration(d) => match PgInterval::try_from(*d) { + Value::Duration(d) => match PgInterval::try_from(d) { Ok(i) => builder.encode_field(&i), Err(e) => Err(convert_err(Error::Internal { err_msg: e.to_string(), @@ -437,6 +471,11 @@ pub(super) fn encode_value( }, Value::List(values) => encode_array(query_ctx, values, builder), Value::Struct(values) => encode_struct(query_ctx, values, builder), + Value::Json(inner) => { + let json_value = serde_json::Value::try_from(*inner) + .map_err(|e| PgWireError::ApiError(Box::new(e)))?; + builder.encode_field(&json_value) + } } } @@ -476,9 +515,7 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result { &ConcreteDataType::Decimal128(_) => Ok(Type::NUMERIC_ARRAY), &ConcreteDataType::Json(_) => Ok(Type::JSON_ARRAY), &ConcreteDataType::Duration(_) => Ok(Type::INTERVAL_ARRAY), - // TODO(sunng87) we may treat list/array as json directly so we can - // support deeply nested data structures - &ConcreteDataType::Struct(_) => Ok(Type::RECORD_ARRAY), + &ConcreteDataType::Struct(_) => Ok(Type::JSON_ARRAY), &ConcreteDataType::Dictionary(_) | &ConcreteDataType::Vector(_) | &ConcreteDataType::List(_) => server_error::UnsupportedDataTypeSnafu { @@ -493,7 +530,7 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result { } .fail(), &ConcreteDataType::Duration(_) => Ok(Type::INTERVAL), - &ConcreteDataType::Struct(_) => Ok(Type::RECORD), + &ConcreteDataType::Struct(_) => Ok(Type::JSON), } } @@ -650,7 +687,13 @@ pub(super) fn parameters_to_scalar_values( let data = portal.parameter::(idx, &client_type)?; if let Some(server_type) = &server_type { match server_type { - ConcreteDataType::String(_) => ScalarValue::Utf8(data), + ConcreteDataType::String(t) => { + if t.is_large() { + ScalarValue::LargeUtf8(data) + } else { + ScalarValue::Utf8(data) + } + } _ => { return Err(invalid_parameter_error( "invalid_parameter_type", @@ -932,8 +975,13 @@ pub(super) fn parameters_to_scalar_values( let data = portal.parameter::>(idx, &client_type)?; if let Some(server_type) = &server_type { match server_type { - ConcreteDataType::String(_) => { - ScalarValue::Utf8(data.map(|d| String::from_utf8_lossy(&d).to_string())) + ConcreteDataType::String(t) => { + let s = data.map(|d| String::from_utf8_lossy(&d).to_string()); + if t.is_large() { + ScalarValue::LargeUtf8(s) + } else { + ScalarValue::Utf8(s) + } } ConcreteDataType::Binary(_) => ScalarValue::Binary(data), _ => { @@ -1353,7 +1401,7 @@ mod test { .build() .into(); let mut builder = DataRowEncoder::new(Arc::new(schema)); - for (value, datatype) in values.iter().zip(datatypes) { + for (value, datatype) in values.into_iter().zip(datatypes) { encode_value(&query_context, value, &mut builder, &datatype).unwrap(); } } diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml index 8459bb375dae..fbfb8c480af7 100644 --- a/src/sql/Cargo.toml +++ b/src/sql/Cargo.toml @@ -21,6 +21,7 @@ common-error.workspace = true common-macro.workspace = true common-query.workspace = true common-sql.workspace = true +common-telemetry.workspace = true common-time.workspace = true datafusion.workspace = true datafusion-common.workspace = true diff --git a/src/sql/src/ast.rs b/src/sql/src/ast.rs index 5d207fb5793b..122740987a0e 100644 --- a/src/sql/src/ast.rs +++ b/src/sql/src/ast.rs @@ -25,7 +25,10 @@ pub trait ObjectNamePartExt { impl ObjectNamePartExt for ObjectNamePart { fn to_string_unquoted(&self) -> String { - let ObjectNamePart::Identifier(ident) = self; + let ObjectNamePart::Identifier(ident) = self else { + // If it's not an ident, just return it as a string. + return self.to_string(); + }; ident.value.clone() } } diff --git a/src/sql/src/error.rs b/src/sql/src/error.rs index 1748d3b9de9c..4caad26656ce 100644 --- a/src/sql/src/error.rs +++ b/src/sql/src/error.rs @@ -20,10 +20,8 @@ use common_macro::stack_trace_debug; use datafusion_common::DataFusionError; use datatypes::prelude::{ConcreteDataType, Value}; use snafu::{Location, Snafu}; -use sqlparser::ast::Ident; use sqlparser::parser::ParserError; -use crate::ast::Expr; use crate::parsers::error::TQLError; pub type Result = std::result::Result; @@ -210,10 +208,9 @@ pub enum Error { location: Location, }, - #[snafu(display("Unrecognized table option key: {}, value: {}", key, value))] - InvalidTableOptionValue { - key: Ident, - value: Expr, + #[snafu(display("Invalid expr as option value, error: {error}"))] + InvalidExprAsOptionValue { + error: String, #[snafu(implicit)] location: Location, }, @@ -361,7 +358,7 @@ impl ErrorExt for Error { } InvalidColumnOption { .. } - | InvalidTableOptionValue { .. } + | InvalidExprAsOptionValue { .. } | InvalidDatabaseName { .. } | InvalidDatabaseOption { .. } | ColumnTypeMismatch { .. } diff --git a/src/sql/src/parser.rs b/src/sql/src/parser.rs index 6e2a880348cd..6c2a7e11ab2d 100644 --- a/src/sql/src/parser.rs +++ b/src/sql/src/parser.rs @@ -14,15 +14,15 @@ use std::str::FromStr; -use snafu::ResultExt; -use sqlparser::ast::{Ident, ObjectNamePart, Query, Value}; +use snafu::{OptionExt, ResultExt}; +use sqlparser::ast::{Ident, Query, Value}; use sqlparser::dialect::Dialect; use sqlparser::keywords::Keyword; use sqlparser::parser::{Parser, ParserError, ParserOptions}; use sqlparser::tokenizer::{Token, TokenWithSpan}; use crate::ast::{Expr, ObjectName}; -use crate::error::{self, Result, SyntaxSnafu}; +use crate::error::{self, InvalidSqlSnafu, Result, SyntaxSnafu}; use crate::parsers::tql_parser; use crate::statements::kill::Kill; use crate::statements::statement::Statement; @@ -106,7 +106,7 @@ impl ParserContext<'_> { expected: "a table name", actual: self.parser.peek_token().to_string(), })?; - Ok(Self::canonicalize_object_name(raw_table_name)) + Self::canonicalize_object_name(raw_table_name) } pub fn parse_function(sql: &str, dialect: &dyn Dialect) -> Result { @@ -303,17 +303,20 @@ impl ParserContext<'_> { } /// Like [canonicalize_identifier] but for [ObjectName]. - pub fn canonicalize_object_name(object_name: ObjectName) -> ObjectName { + pub(crate) fn canonicalize_object_name(object_name: ObjectName) -> Result { object_name .0 .into_iter() .map(|x| { - let ObjectNamePart::Identifier(ident) = x; - ident + x.as_ident() + .cloned() + .map(Self::canonicalize_identifier) + .with_context(|| InvalidSqlSnafu { + msg: format!("not an ident: '{x}'"), + }) }) - .map(Self::canonicalize_identifier) - .collect::>() - .into() + .collect::>>() + .map(Into::into) } /// Simply a shortcut for sqlparser's same name method `parse_object_name`, diff --git a/src/sql/src/parsers/alter_parser.rs b/src/sql/src/parsers/alter_parser.rs index bbbcb1d2f6d8..0cab82efc9e0 100644 --- a/src/sql/src/parsers/alter_parser.rs +++ b/src/sql/src/parsers/alter_parser.rs @@ -20,7 +20,7 @@ use std::collections::HashMap; use common_query::AddColumnLocation; use datatypes::schema::COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE; use snafu::{ResultExt, ensure}; -use sqlparser::ast::Ident; +use sqlparser::ast::{Expr, Ident}; use sqlparser::keywords::Keyword; use sqlparser::parser::{Parser, ParserError}; use sqlparser::tokenizer::{Token, TokenWithSpan}; @@ -32,13 +32,14 @@ use crate::parsers::create_parser::INVERTED; use crate::parsers::utils::{ validate_column_fulltext_create_option, validate_column_skipping_index_create_option, }; +use crate::statements::OptionMap; use crate::statements::alter::{ AddColumn, AlterDatabase, AlterDatabaseOperation, AlterTable, AlterTableOperation, - DropDefaultsOperation, KeyValueOption, SetDefaultsOperation, SetIndexOperation, - UnsetIndexOperation, + DropDefaultsOperation, KeyValueOption, RepartitionOperation, SetDefaultsOperation, + SetIndexOperation, UnsetIndexOperation, }; use crate::statements::statement::Statement; -use crate::util::parse_option_string; +use crate::util::{OptionValue, parse_option_string}; impl ParserContext<'_> { pub(crate) fn parse_alter(&mut self) -> Result { @@ -67,7 +68,7 @@ impl ParserContext<'_> { .parser .parse_object_name(false) .context(error::SyntaxSnafu)?; - let database_name = Self::canonicalize_object_name(database_name); + let database_name = Self::canonicalize_object_name(database_name)?; match self.parser.peek_token().token { Token::Word(w) => { @@ -116,7 +117,7 @@ impl ParserContext<'_> { .parser .parse_object_name(false) .context(error::SyntaxSnafu)?; - let table_name = Self::canonicalize_object_name(raw_table_name); + let table_name = Self::canonicalize_object_name(raw_table_name)?; let alter_operation = match self.parser.peek_token().token { Token::Word(w) => { @@ -124,6 +125,8 @@ impl ParserContext<'_> { self.parse_alter_table_modify()? } else if w.value.eq_ignore_ascii_case("UNSET") { self.parse_alter_table_unset()? + } else if w.value.eq_ignore_ascii_case("REPARTITION") { + self.parse_alter_table_repartition()? } else { match w.keyword { Keyword::ADD => self.parse_alter_table_add()?, @@ -142,7 +145,7 @@ impl ParserContext<'_> { let new_table_name_obj_raw = self.parse_object_name().context(error::SyntaxSnafu)?; let new_table_name_obj = - Self::canonicalize_object_name(new_table_name_obj_raw); + Self::canonicalize_object_name(new_table_name_obj_raw)?; let new_table_name = match &new_table_name_obj.0[..] { [table] => table.to_string_unquoted(), _ => { @@ -166,7 +169,7 @@ impl ParserContext<'_> { AlterTableOperation::SetTableOptions { options } } _ => self.expected( - "ADD or DROP or MODIFY or RENAME or SET after ALTER TABLE", + "ADD or DROP or MODIFY or RENAME or SET or REPARTITION after ALTER TABLE", self.parser.peek_token(), )?, } @@ -189,6 +192,65 @@ impl ParserContext<'_> { Ok(AlterTableOperation::UnsetTableOptions { keys }) } + fn parse_alter_table_repartition(&mut self) -> Result { + let _ = self.parser.next_token(); + + let from_exprs = self.parse_repartition_expr_list()?; + self.parser + .expect_keyword(Keyword::INTO) + .context(error::SyntaxSnafu)?; + let into_exprs = self.parse_repartition_expr_list()?; + + if matches!(self.parser.peek_token().token, Token::Comma) { + return self.expected("end of REPARTITION clause", self.parser.peek_token()); + } + + Ok(AlterTableOperation::Repartition { + operation: RepartitionOperation::new(from_exprs, into_exprs), + }) + } + + fn parse_repartition_expr_list(&mut self) -> Result> { + self.parser + .expect_token(&Token::LParen) + .context(error::SyntaxSnafu)?; + + if matches!(self.parser.peek_token().token, Token::RParen) { + return self.expected( + "expression inside REPARTITION clause", + self.parser.peek_token(), + ); + } + + let mut exprs = Vec::new(); + loop { + let expr = self.parser.parse_expr().context(error::SyntaxSnafu)?; + exprs.push(expr); + + match self.parser.peek_token().token { + Token::Comma => { + self.parser.next_token(); + if matches!(self.parser.peek_token().token, Token::RParen) { + self.parser.next_token(); + break; + } + } + Token::RParen => { + self.parser.next_token(); + break; + } + _ => { + return self.expected( + "comma or right parenthesis after repartition expression", + self.parser.peek_token(), + ); + } + } + } + + Ok(exprs) + } + fn parse_alter_table_add(&mut self) -> Result { let _ = self.parser.next_token(); if let Some(constraint) = self @@ -394,7 +456,7 @@ impl ParserContext<'_> { .context(error::SyntaxSnafu)? .into_iter() .map(parse_option_string) - .collect::>>()?; + .collect::>>()?; for key in options.keys() { ensure!( @@ -408,9 +470,10 @@ impl ParserContext<'_> { options.insert( COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE.to_string(), - "true".to_string(), + "true".to_string().into(), ); + let options = OptionMap::new(options).into_map(); Ok(AlterTableOperation::SetIndex { options: SetIndexOperation::Fulltext { column_name, @@ -426,9 +489,9 @@ impl ParserContext<'_> { .context(error::SyntaxSnafu)? .into_iter() .map(parse_option_string) - .collect::>>()?; + .collect::>>()?; - for key in options.keys() { + for (key, _) in options.iter() { ensure!( validate_column_skipping_index_create_option(key), InvalidColumnOptionSnafu { @@ -438,6 +501,7 @@ impl ParserContext<'_> { ); } + let options = OptionMap::new(options).into_map(); Ok(AlterTableOperation::SetIndex { options: SetIndexOperation::Skipping { column_name, @@ -809,6 +873,70 @@ mod tests { } } + #[test] + fn test_parse_alter_table_repartition() { + let sql = r#" +ALTER TABLE t REPARTITION ( + device_id < 100 +) INTO ( + device_id < 100 AND area < 'South', + device_id < 100 AND area >= 'South', +);"#; + let mut result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + assert_eq!(1, result.len()); + + let statement = result.remove(0); + assert_matches!(statement, Statement::AlterTable { .. }); + if let Statement::AlterTable(alter_table) = statement { + assert_matches!( + alter_table.alter_operation(), + AlterTableOperation::Repartition { .. } + ); + + if let AlterTableOperation::Repartition { operation } = alter_table.alter_operation() { + assert_eq!(operation.from_exprs.len(), 1); + assert_eq!(operation.from_exprs[0].to_string(), "device_id < 100"); + assert_eq!(operation.into_exprs.len(), 2); + assert_eq!( + operation.into_exprs[0].to_string(), + "device_id < 100 AND area < 'South'" + ); + assert_eq!( + operation.into_exprs[1].to_string(), + "device_id < 100 AND area >= 'South'" + ); + } + } + } + + #[test] + fn test_parse_alter_table_repartition_multiple() { + let sql = r#" +ALTER TABLE metrics REPARTITION +( + a < 10, + a >= 10 +) INTO ( + a < 20 +), +( + b < 20 +) INTO ( + b < 10, + b >= 10, +);"#; + + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap_err(); + assert_eq!( + result.output_msg(), + "Invalid SQL syntax: sql parser error: Expected end of REPARTITION clause, found: ," + ); + } + #[test] fn test_parse_alter_drop_column() { let sql = "ALTER TABLE my_metric_1 DROP a"; @@ -966,7 +1094,7 @@ mod tests { let err = result.output_msg(); assert_eq!( err, - "Invalid SQL syntax: sql parser error: Expected ADD or DROP or MODIFY or RENAME or SET after ALTER TABLE, found: table_t" + "Invalid SQL syntax: sql parser error: Expected ADD or DROP or MODIFY or RENAME or SET or REPARTITION after ALTER TABLE, found: table_t" ); let sql = "ALTER TABLE test_table RENAME table_t"; diff --git a/src/sql/src/parsers/alter_parser/trigger.rs b/src/sql/src/parsers/alter_parser/trigger.rs index 47fcedd6c425..73e9275acd8b 100644 --- a/src/sql/src/parsers/alter_parser/trigger.rs +++ b/src/sql/src/parsers/alter_parser/trigger.rs @@ -544,7 +544,7 @@ mod tests { use crate::parsers::alter_parser::trigger::{apply_label_change, apply_label_replacement}; use crate::statements::OptionMap; use crate::statements::alter::trigger::{LabelChange, LabelOperations}; - use crate::statements::create::trigger::TriggerOn; + use crate::statements::create::trigger::{DurationExpr, TriggerOn}; use crate::statements::statement::Statement; #[test] @@ -569,12 +569,12 @@ mod tests { }; let TriggerOn { query, - interval, - raw_interval_expr, + query_interval, } = alter.operation.trigger_on.unwrap(); + let DurationExpr { duration, raw_expr } = query_interval; assert_eq!(query.to_string(), "(SELECT * FROM test_table)"); - assert_eq!(raw_interval_expr, "'5 minute'::INTERVAL"); - assert_eq!(interval, Duration::from_secs(300)); + assert_eq!(raw_expr, "'5 minute'::INTERVAL"); + assert_eq!(duration, Duration::from_secs(300)); assert!(alter.operation.rename.is_none()); assert!(alter.operation.label_operations.is_none()); assert!(alter.operation.annotation_operations.is_none()); @@ -625,8 +625,8 @@ mod tests { }; assert_eq!(labels.len(), 2); - assert_eq!(labels.get("key1"), Some(&"value1".to_string())); - assert_eq!(labels.get("key2"), Some(&"VALUE2".to_string())); + assert_eq!(labels.get("key1"), Some("value1")); + assert_eq!(labels.get("key2"), Some("VALUE2")); // Passed case: multiple ADD/DROP/MODIFY LABELS. let sql = r#"test_trigger ADD LABELS (key1='value1') MODIFY LABELS (key2='value2') DROP LABELS ('key3')"#; diff --git a/src/sql/src/parsers/copy_parser.rs b/src/sql/src/parsers/copy_parser.rs index 185aa4dbcfac..892992d31033 100644 --- a/src/sql/src/parsers/copy_parser.rs +++ b/src/sql/src/parsers/copy_parser.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; - use snafu::ResultExt; use sqlparser::keywords::Keyword; use sqlparser::tokenizer::Token; @@ -21,6 +19,7 @@ use sqlparser::tokenizer::Token::Word; use crate::error::{self, Result}; use crate::parser::ParserContext; +use crate::statements::OptionMap; use crate::statements::copy::{ CopyDatabase, CopyDatabaseArgument, CopyQueryTo, CopyQueryToArgument, CopyTable, CopyTableArgument, @@ -28,9 +27,6 @@ use crate::statements::copy::{ use crate::statements::statement::Statement; use crate::util::parse_option_string; -pub type With = HashMap; -pub type Connection = HashMap; - // COPY tbl TO 'output.parquet'; impl ParserContext<'_> { pub(crate) fn parse_copy(&mut self) -> Result { @@ -73,8 +69,8 @@ impl ParserContext<'_> { let argument = CopyDatabaseArgument { database_name, - with: with.into(), - connection: connection.into(), + with, + connection, location, }; CopyDatabase::To(argument) @@ -92,8 +88,8 @@ impl ParserContext<'_> { let argument = CopyDatabaseArgument { database_name, - with: with.into(), - connection: connection.into(), + with, + connection, location, }; CopyDatabase::From(argument) @@ -108,14 +104,14 @@ impl ParserContext<'_> { expected: "a table name", actual: self.peek_token_as_string(), })?; - let table_name = Self::canonicalize_object_name(raw_table_name); + let table_name = Self::canonicalize_object_name(raw_table_name)?; if self.parser.parse_keyword(Keyword::TO) { let (with, connection, location, limit) = self.parse_copy_parameters()?; Ok(CopyTable::To(CopyTableArgument { table_name, - with: with.into(), - connection: connection.into(), + with, + connection, location, limit, })) @@ -126,8 +122,8 @@ impl ParserContext<'_> { let (with, connection, location, limit) = self.parse_copy_parameters()?; Ok(CopyTable::From(CopyTableArgument { table_name, - with: with.into(), - connection: connection.into(), + with, + connection, location, limit, })) @@ -161,14 +157,14 @@ impl ParserContext<'_> { Ok(CopyQueryTo { query: Box::new(query), arg: CopyQueryToArgument { - with: with.into(), - connection: connection.into(), + with, + connection, location, }, }) } - fn parse_copy_parameters(&mut self) -> Result<(With, Connection, String, Option)> { + fn parse_copy_parameters(&mut self) -> Result<(OptionMap, OptionMap, String, Option)> { let location = self.parser .parse_literal_string() @@ -185,7 +181,8 @@ impl ParserContext<'_> { let with = options .into_iter() .map(parse_option_string) - .collect::>()?; + .collect::>>()?; + let with = OptionMap::new(with); let connection_options = self .parser @@ -195,7 +192,8 @@ impl ParserContext<'_> { let connection = connection_options .into_iter() .map(parse_option_string) - .collect::>()?; + .collect::>>()?; + let connection = OptionMap::new(connection); let limit = if self.parser.parse_keyword(Keyword::LIMIT) { Some( @@ -309,7 +307,7 @@ mod tests { struct Test<'a> { sql: &'a str, expected_pattern: Option, - expected_connection: HashMap, + expected_connection: HashMap<&'a str, &'a str>, } let tests = [ @@ -321,10 +319,7 @@ mod tests { Test { sql: "COPY catalog0.schema0.tbl FROM 'tbl_file.parquet' WITH (PATTERN = 'demo.*') CONNECTION (FOO='Bar', ONE='two')", expected_pattern: Some("demo.*".into()), - expected_connection: [("foo", "Bar"), ("one", "two")] - .into_iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(), + expected_connection: HashMap::from([("foo", "Bar"), ("one", "two")]), }, ]; @@ -346,10 +341,7 @@ mod tests { if let Some(expected_pattern) = test.expected_pattern { assert_eq!(copy_table.pattern().unwrap(), expected_pattern); } - assert_eq!( - copy_table.connection.clone(), - test.expected_connection.into() - ); + assert_eq!(copy_table.connection.to_str_map(), test.expected_connection); } _ => unreachable!(), } @@ -360,7 +352,7 @@ mod tests { fn test_parse_copy_table_to() { struct Test<'a> { sql: &'a str, - expected_connection: HashMap, + expected_connection: HashMap<&'a str, &'a str>, } let tests = [ @@ -370,17 +362,11 @@ mod tests { }, Test { sql: "COPY catalog0.schema0.tbl TO 'tbl_file.parquet' CONNECTION (FOO='Bar', ONE='two')", - expected_connection: [("foo", "Bar"), ("one", "two")] - .into_iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(), + expected_connection: HashMap::from([("foo", "Bar"), ("one", "two")]), }, Test { sql: "COPY catalog0.schema0.tbl TO 'tbl_file.parquet' WITH (FORMAT = 'parquet') CONNECTION (FOO='Bar', ONE='two')", - expected_connection: [("foo", "Bar"), ("one", "two")] - .into_iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(), + expected_connection: HashMap::from([("foo", "Bar"), ("one", "two")]), }, ]; @@ -399,10 +385,7 @@ mod tests { Statement::Copy(crate::statements::copy::Copy::CopyTable(CopyTable::To( copy_table, ))) => { - assert_eq!( - copy_table.connection.clone(), - test.expected_connection.into() - ); + assert_eq!(copy_table.connection.to_str_map(), test.expected_connection); } _ => unreachable!(), } diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index 30a087088a97..157f5540719c 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod json; #[cfg(feature = "enterprise")] pub mod trigger; @@ -50,7 +51,7 @@ use crate::statements::create::{ use crate::statements::statement::Statement; use crate::statements::transform::type_alias::get_data_type_by_alias_name; use crate::statements::{OptionMap, sql_data_type_to_concrete_data_type}; -use crate::util::{location_to_index, parse_option_string}; +use crate::util::{OptionValue, location_to_index, parse_option_string}; pub const ENGINE: &str = "ENGINE"; pub const MAXVALUE: &str = "MAXVALUE"; @@ -195,7 +196,7 @@ impl<'a> ParserContext<'a> { expected: "a database name", actual: self.peek_token_as_string(), })?; - let database_name = Self::canonicalize_object_name(database_name); + let database_name = Self::canonicalize_object_name(database_name)?; let options = self .parser @@ -203,7 +204,7 @@ impl<'a> ParserContext<'a> { .context(SyntaxSnafu)? .into_iter() .map(parse_option_string) - .collect::>>()?; + .collect::>>()?; for key in options.keys() { ensure!( @@ -211,7 +212,7 @@ impl<'a> ParserContext<'a> { InvalidDatabaseOptionSnafu { key: key.clone() } ); } - if let Some(append_mode) = options.get("append_mode") + if let Some(append_mode) = options.get("append_mode").and_then(|x| x.as_string()) && append_mode == "true" && options.contains_key("merge_mode") { @@ -224,7 +225,7 @@ impl<'a> ParserContext<'a> { Ok(Statement::CreateDatabase(CreateDatabase { name: database_name, if_not_exists, - options: options.into(), + options: OptionMap::new(options), })) } @@ -450,11 +451,11 @@ impl<'a> ParserContext<'a> { .context(SyntaxSnafu)? .into_iter() .map(parse_option_string) - .collect::>>()?; + .collect::>>()?; for key in options.keys() { ensure!(validate_table_option(key), InvalidTableOptionSnafu { key }); } - Ok(options.into()) + Ok(OptionMap::new(options)) } /// "PARTITION ON COLUMNS (...)" clause @@ -662,9 +663,17 @@ impl<'a> ParserContext<'a> { } ); + let mut extensions = ColumnExtensions::default(); + let data_type = parser.parse_data_type().context(SyntaxSnafu)?; + // Must immediately parse the JSON datatype format because it is closely after the "JSON" + // datatype, like this: "JSON(format = ...)". + if matches!(data_type, DataType::JSON) { + let options = json::parse_json_datatype_options(parser)?; + extensions.json_datatype_options = Some(options); + } + let mut options = vec![]; - let mut extensions = ColumnExtensions::default(); loop { if parser.parse_keyword(Keyword::CONSTRAINT) { let name = Some(parser.parse_identifier().context(SyntaxSnafu)?); @@ -810,9 +819,9 @@ impl<'a> ParserContext<'a> { .context(error::SyntaxSnafu)? .into_iter() .map(parse_option_string) - .collect::>>()?; + .collect::>>()?; - for key in options.keys() { + for (key, _) in options.iter() { ensure!( validate_column_skipping_index_create_option(key), InvalidColumnOptionSnafu { @@ -822,7 +831,8 @@ impl<'a> ParserContext<'a> { ); } - column_extensions.skipping_index_options = Some(options.into()); + let options = OptionMap::new(options); + column_extensions.skipping_index_options = Some(options); is_index_declared |= true; } @@ -860,9 +870,9 @@ impl<'a> ParserContext<'a> { .context(error::SyntaxSnafu)? .into_iter() .map(parse_option_string) - .collect::>>()?; + .collect::>>()?; - for key in options.keys() { + for (key, _) in options.iter() { ensure!( validate_column_fulltext_create_option(key), InvalidColumnOptionSnafu { @@ -872,7 +882,8 @@ impl<'a> ParserContext<'a> { ); } - column_extensions.fulltext_index_options = Some(options.into()); + let options = OptionMap::new(options); + column_extensions.fulltext_index_options = Some(options); is_index_declared |= true; } @@ -1203,7 +1214,7 @@ mod tests { struct Test<'a> { sql: &'a str, expected_table_name: &'a str, - expected_options: HashMap, + expected_options: HashMap<&'a str, &'a str>, expected_engine: &'a str, expected_if_not_exist: bool, } @@ -1213,8 +1224,8 @@ mod tests { sql: "CREATE EXTERNAL TABLE city with(location='/var/data/city.csv',format='csv');", expected_table_name: "city", expected_options: HashMap::from([ - ("location".to_string(), "/var/data/city.csv".to_string()), - ("format".to_string(), "csv".to_string()), + ("location", "/var/data/city.csv"), + ("format", "csv"), ]), expected_engine: FILE_ENGINE, expected_if_not_exist: false, @@ -1223,8 +1234,8 @@ mod tests { sql: "CREATE EXTERNAL TABLE IF NOT EXISTS city ENGINE=foo with(location='/var/data/city.csv',format='csv');", expected_table_name: "city", expected_options: HashMap::from([ - ("location".to_string(), "/var/data/city.csv".to_string()), - ("format".to_string(), "csv".to_string()), + ("location", "/var/data/city.csv"), + ("format", "csv"), ]), expected_engine: "foo", expected_if_not_exist: true, @@ -1233,9 +1244,9 @@ mod tests { sql: "CREATE EXTERNAL TABLE IF NOT EXISTS city ENGINE=foo with(location='/var/data/city.csv',format='csv','compaction.type'='bar');", expected_table_name: "city", expected_options: HashMap::from([ - ("location".to_string(), "/var/data/city.csv".to_string()), - ("format".to_string(), "csv".to_string()), - ("compaction.type".to_string(), "bar".to_string()), + ("location", "/var/data/city.csv"), + ("format", "csv"), + ("compaction.type", "bar"), ]), expected_engine: "foo", expected_if_not_exist: true, @@ -1253,7 +1264,7 @@ mod tests { match &stmts[0] { Statement::CreateExternalTable(c) => { assert_eq!(c.name.to_string(), test.expected_table_name.to_string()); - assert_eq!(c.options, test.expected_options.into()); + assert_eq!(c.options.to_str_map(), test.expected_options); assert_eq!(c.if_not_exists, test.expected_if_not_exist); assert_eq!(c.engine, test.expected_engine); } @@ -1273,10 +1284,7 @@ mod tests { PRIMARY KEY(ts, host), ) with(location='/var/data/city.csv',format='csv');"; - let options = HashMap::from([ - ("location".to_string(), "/var/data/city.csv".to_string()), - ("format".to_string(), "csv".to_string()), - ]); + let options = HashMap::from([("location", "/var/data/city.csv"), ("format", "csv")]); let stmts = ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) @@ -1285,7 +1293,7 @@ mod tests { match &stmts[0] { Statement::CreateExternalTable(c) => { assert_eq!(c.name.to_string(), "city"); - assert_eq!(c.options, options.into()); + assert_eq!(c.options.to_str_map(), options); let columns = &c.columns; assert_column_def(&columns[0].column_def, "host", "STRING"); @@ -2427,8 +2435,7 @@ non TIMESTAMP(6) TIME INDEX, let sql = "CREATE VIEW test AS DELETE from demo"; let result = ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()); - assert!(result.is_err()); - assert_matches!(result, Err(crate::error::Error::Syntax { .. })); + assert!(result.is_ok_and(|x| x.len() == 1)); } #[test] @@ -2723,7 +2730,7 @@ CREATE TABLE log ( assert!(result.is_ok()); assert!(extensions.vector_options.is_some()); let vector_options = extensions.vector_options.unwrap(); - assert_eq!(vector_options.get(VECTOR_OPT_DIM), Some(&"128".to_string())); + assert_eq!(vector_options.get(VECTOR_OPT_DIM), Some("128")); } #[test] @@ -2783,14 +2790,8 @@ CREATE TABLE log ( assert!(result.unwrap()); assert!(extensions.fulltext_index_options.is_some()); let fulltext_options = extensions.fulltext_index_options.unwrap(); - assert_eq!( - fulltext_options.get("analyzer"), - Some(&"English".to_string()) - ); - assert_eq!( - fulltext_options.get("case_sensitive"), - Some(&"true".to_string()) - ); + assert_eq!(fulltext_options.get("analyzer"), Some("English")); + assert_eq!(fulltext_options.get("case_sensitive"), Some("true")); } // Test fulltext index with invalid type (should fail) diff --git a/src/sql/src/parsers/create_parser/json.rs b/src/sql/src/parsers/create_parser/json.rs new file mode 100644 index 000000000000..1556205fef4b --- /dev/null +++ b/src/sql/src/parsers/create_parser/json.rs @@ -0,0 +1,138 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use snafu::ResultExt; +use sqlparser::parser::Parser; +use sqlparser::tokenizer::Token; + +use crate::error::{Result, SyntaxSnafu}; +use crate::statements::OptionMap; +use crate::util; + +pub(super) fn parse_json_datatype_options(parser: &mut Parser<'_>) -> Result { + if parser.consume_token(&Token::LParen) { + let result = parser + .parse_comma_separated0(Parser::parse_sql_option, Token::RParen) + .context(SyntaxSnafu) + .and_then(|options| { + options + .into_iter() + .map(util::parse_option_string) + .collect::>>() + })?; + parser.expect_token(&Token::RParen).context(SyntaxSnafu)?; + Ok(OptionMap::new(result)) + } else { + Ok(OptionMap::default()) + } +} + +#[cfg(test)] +mod tests { + use sqlparser::ast::DataType; + + use crate::dialect::GreptimeDbDialect; + use crate::parser::{ParseOptions, ParserContext}; + use crate::statements::OptionMap; + use crate::statements::create::{ + Column, JSON_FORMAT_FULL_STRUCTURED, JSON_FORMAT_PARTIAL, JSON_FORMAT_RAW, JSON_OPT_FORMAT, + JSON_OPT_UNSTRUCTURED_KEYS, + }; + use crate::statements::statement::Statement; + + #[test] + fn test_parse_json_datatype_options() { + fn parse(sql: &str) -> OptionMap { + let Statement::CreateTable(mut create_table) = ParserContext::create_with_dialect( + sql, + &GreptimeDbDialect {}, + ParseOptions::default(), + ) + .unwrap() + .remove(0) else { + unreachable!() + }; + + let Column { + column_def, + extensions, + } = create_table.columns.remove(0); + assert_eq!(column_def.name.to_string(), "my_json"); + assert_eq!(column_def.data_type, DataType::JSON); + assert!(column_def.options.is_empty()); + + assert!(extensions.json_datatype_options.is_some()); + extensions.json_datatype_options.unwrap() + } + + let sql = r#" +CREATE TABLE json_data ( + my_json JSON(format = "partial", unstructured_keys = ["k", "foo.bar", "a.b.c"]), + ts TIMESTAMP TIME INDEX, +)"#; + let options = parse(sql); + assert_eq!(options.len(), 2); + assert_eq!( + options.value(JSON_OPT_FORMAT).and_then(|x| x.as_string()), + Some(JSON_FORMAT_PARTIAL) + ); + let expected = vec!["k", "foo.bar", "a.b.c"]; + assert_eq!( + options + .value(JSON_OPT_UNSTRUCTURED_KEYS) + .and_then(|x| x.as_list()), + Some(expected) + ); + + let sql = r#" +CREATE TABLE json_data ( + my_json JSON(format = "structured"), + ts TIMESTAMP TIME INDEX, +)"#; + let options = parse(sql); + assert_eq!(options.len(), 1); + assert_eq!( + options.value(JSON_OPT_FORMAT).and_then(|x| x.as_string()), + Some(JSON_FORMAT_FULL_STRUCTURED) + ); + + let sql = r#" +CREATE TABLE json_data ( + my_json JSON(format = "raw"), + ts TIMESTAMP TIME INDEX, +)"#; + let options = parse(sql); + assert_eq!(options.len(), 1); + assert_eq!( + options.value(JSON_OPT_FORMAT).and_then(|x| x.as_string()), + Some(JSON_FORMAT_RAW) + ); + + let sql = r#" +CREATE TABLE json_data ( + my_json JSON(), + ts TIMESTAMP TIME INDEX, +)"#; + let options = parse(sql); + assert!(options.is_empty()); + + let sql = r#" +CREATE TABLE json_data ( + my_json JSON, + ts TIMESTAMP TIME INDEX, +)"#; + let options = parse(sql); + assert!(options.is_empty()); + } +} diff --git a/src/sql/src/parsers/create_parser/trigger.rs b/src/sql/src/parsers/create_parser/trigger.rs index 65ebc7fe1e6d..6b2b0c1eb741 100644 --- a/src/sql/src/parsers/create_parser/trigger.rs +++ b/src/sql/src/parsers/create_parser/trigger.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::time::Duration; use snafu::{OptionExt, ResultExt, ensure}; @@ -12,7 +11,7 @@ use crate::parser::ParserContext; use crate::parsers::utils::convert_month_day_nano_to_duration; use crate::statements::OptionMap; use crate::statements::create::trigger::{ - AlertManagerWebhook, ChannelType, CreateTrigger, NotifyChannel, TriggerOn, + AlertManagerWebhook, ChannelType, CreateTrigger, DurationExpr, NotifyChannel, TriggerOn, }; use crate::statements::statement::Statement; use crate::util::parse_option_string; @@ -25,6 +24,8 @@ pub const ANNOTATIONS: &str = "ANNOTATIONS"; pub const NOTIFY: &str = "NOTIFY"; pub const WEBHOOK: &str = "WEBHOOK"; pub const URL: &str = "URL"; +pub const FOR: &str = "FOR"; +pub const KEEP_FIRING_FOR: &str = "KEEP_FIRING_FOR"; const TIMEOUT: &str = "timeout"; @@ -38,13 +39,15 @@ impl<'a> ParserContext<'a> { /// ```sql /// -- CREATE TRIGGER /// [IF NOT EXISTS] - /// ON () - /// EVERY - /// [LABELS (=, ...)] - /// [ANNOTATIONS (=, ...)] - /// NOTIFY( - /// WEBHOOK URL '' [WITH (=, ...)], - /// WEBHOOK URL '' [WITH (=, ...)] + /// ON () + /// EVERY + /// [FOR ] + /// [KEEP_FIRING_FOR ] + /// [LABELS (=, ...)] + /// [ANNOTATIONS (=, ...)] + /// NOTIFY( + /// WEBHOOK URL '' [WITH (=, ...)], + /// WEBHOOK URL '' [WITH (=, ...)] /// ) /// ``` /// @@ -57,6 +60,8 @@ impl<'a> ParserContext<'a> { let mut may_labels = None; let mut may_annotations = None; let mut notify_channels = vec![]; + let mut r#for = None; + let mut keep_firing_for = None; loop { let next_token = self.parser.peek_token(); @@ -81,10 +86,18 @@ impl<'a> ParserContext<'a> { let channels = self.parse_trigger_notify(true)?; notify_channels.extend(channels); } + Token::Word(w) if w.value.eq_ignore_ascii_case(FOR) => { + self.parser.next_token(); + r#for.replace(self.parse_trigger_for(true)?); + } + Token::Word(w) if w.value.eq_ignore_ascii_case(KEEP_FIRING_FOR) => { + self.parser.next_token(); + keep_firing_for.replace(self.parse_trigger_keep_firing_for(true)?); + } Token::EOF => break, _ => { return self.expected( - "`ON` or `LABELS` or `ANNOTATIONS` or `NOTIFY` keyword", + "`ON` or `LABELS` or `ANNOTATIONS` or `NOTIFY` keyword or `FOR` or `KEEP_FIRING_FOR`", next_token, ); } @@ -104,6 +117,8 @@ impl<'a> ParserContext<'a> { trigger_name, if_not_exists, trigger_on, + r#for, + keep_firing_for, labels, annotations, channels: notify_channels, @@ -149,7 +164,7 @@ impl<'a> ParserContext<'a> { return self.expected("`EVERY` keyword", self.parser.peek_token()); } - let (month_day_nano, raw_interval_expr) = self.parse_interval_month_day_nano()?; + let (month_day_nano, raw_expr) = self.parse_interval_month_day_nano()?; // Trigger Interval (month_day_nano): the months field is prohibited, // as the length of a month is ambiguous. @@ -169,13 +184,90 @@ impl<'a> ParserContext<'a> { interval }; + let query_interval = DurationExpr { + duration: interval, + raw_expr, + }; + Ok(TriggerOn { query, - interval, - raw_interval_expr, + query_interval, }) } + pub(crate) fn parse_trigger_for( + &mut self, + is_first_keyword_matched: bool, + ) -> Result { + if !is_first_keyword_matched { + if let Token::Word(w) = self.parser.peek_token().token + && w.value.eq_ignore_ascii_case(FOR) + { + self.parser.next_token(); + } else { + return self.expected("`FOR` keyword", self.parser.peek_token()); + } + } + + let (month_day_nano, raw_expr) = self.parse_interval_month_day_nano()?; + + // Trigger Interval (month_day_nano): the months field is prohibited, + // as the length of a month is ambiguous. + ensure!( + month_day_nano.months == 0, + error::InvalidIntervalSnafu { + reason: "year and month is not supported in trigger FOR duration".to_string() + } + ); + + let duration = convert_month_day_nano_to_duration(month_day_nano)?; + + let duration = if duration < Duration::from_secs(1) { + Duration::from_secs(1) + } else { + duration + }; + + Ok(DurationExpr { duration, raw_expr }) + } + + pub(crate) fn parse_trigger_keep_firing_for( + &mut self, + is_first_keyword_matched: bool, + ) -> Result { + if !is_first_keyword_matched { + if let Token::Word(w) = self.parser.peek_token().token + && w.value.eq_ignore_ascii_case(KEEP_FIRING_FOR) + { + self.parser.next_token(); + } else { + return self.expected("`KEEP_FIRING_FOR` keyword", self.parser.peek_token()); + } + } + + let (month_day_nano, raw_expr) = self.parse_interval_month_day_nano()?; + + // Trigger Interval (month_day_nano): the months field is prohibited, + // as the length of a month is ambiguous. + ensure!( + month_day_nano.months == 0, + error::InvalidIntervalSnafu { + reason: "year and month is not supported in trigger KEEP_FIRING_FOR duration" + .to_string() + } + ); + + let duration = convert_month_day_nano_to_duration(month_day_nano)?; + + let duration = if duration < Duration::from_secs(1) { + Duration::from_secs(1) + } else { + duration + }; + + Ok(DurationExpr { duration, raw_expr }) + } + /// The SQL format as follows: /// /// ```sql @@ -210,11 +302,11 @@ impl<'a> ParserContext<'a> { .context(error::SyntaxSnafu)? .into_iter() .map(parse_option_string) - .collect::>>()?; + .collect::>>()?; self.parser .expect_token(&Token::RParen) .context(error::SyntaxSnafu)?; - Ok(options.into()) + Ok(OptionMap::new(options)) } /// The SQL format as follows: @@ -251,11 +343,11 @@ impl<'a> ParserContext<'a> { .context(error::SyntaxSnafu)? .into_iter() .map(parse_option_string) - .collect::>>()?; + .collect::>>()?; self.parser .expect_token(&Token::RParen) .context(error::SyntaxSnafu)?; - Ok(options.into()) + Ok(OptionMap::new(options)) } /// The SQL format as follows: @@ -374,9 +466,9 @@ impl<'a> ParserContext<'a> { .context(error::SyntaxSnafu)? .into_iter() .map(parse_option_string) - .collect::>>()?; + .collect::>>()?; - for key in options.keys() { + for (key, _) in options.iter() { ensure!( validate_webhook_option(key), error::InvalidTriggerWebhookOptionSnafu { key: key.clone() } @@ -385,7 +477,7 @@ impl<'a> ParserContext<'a> { let webhook = AlertManagerWebhook { url, - options: options.into(), + options: OptionMap::new(options), }; Ok(NotifyChannel { @@ -436,6 +528,8 @@ IF NOT EXISTS cpu_monitor ON (SELECT host AS host_label, cpu, memory FROM machine_monitor WHERE cpu > 1) EVERY '5 minute'::INTERVAL LABELS (label_name=label_val) + FOR '1ms'::INTERVAL + KEEP_FIRING_FOR '10 minute'::INTERVAL ANNOTATIONS (annotation_name=annotation_val) NOTIFY( WEBHOOK alert_manager_1 URL 'http://127.0.0.1:9093' WITH (timeout='1m'), @@ -452,6 +546,8 @@ IF NOT EXISTS cpu_monitor ) LABELS (label_name=label_val) ANNOTATIONS (annotation_name=annotation_val) + KEEP_FIRING_FOR '10 minute'::INTERVAL + FOR '1ms'::INTERVAL ON (SELECT host AS host_label, cpu, memory FROM machine_monitor WHERE cpu > 1) EVERY '5 minute'::INTERVAL "# @@ -476,15 +572,14 @@ IF NOT EXISTS cpu_monitor ); let TriggerOn { query, - interval, - raw_interval_expr, + query_interval, } = &create_trigger.trigger_on; assert_eq!( query.to_string(), "(SELECT host AS host_label, cpu, memory FROM machine_monitor WHERE cpu > 1)" ); - assert_eq!(*interval, Duration::from_secs(300)); - assert_eq!(raw_interval_expr.clone(), "'5 minute'::INTERVAL"); + assert_eq!(query_interval.duration, Duration::from_secs(300)); + assert_eq!(query_interval.raw_expr.clone(), "'5 minute'::INTERVAL"); assert_eq!(create_trigger.labels.len(), 1); assert_eq!( create_trigger.labels.get("label_name").unwrap(), @@ -509,6 +604,13 @@ IF NOT EXISTS cpu_monitor assert_eq!(webhook2.url.to_string(), "'http://127.0.0.1:9094'"); assert_eq!(webhook2.options.len(), 1); assert_eq!(webhook2.options.get("timeout").unwrap(), "2m"); + + let r#for = create_trigger.r#for.as_ref().unwrap(); + assert_eq!(r#for.duration, Duration::from_secs(1)); + assert_eq!(r#for.raw_expr, "'1ms'::INTERVAL"); + let keep_firing_for = create_trigger.keep_firing_for.as_ref().unwrap(); + assert_eq!(keep_firing_for.duration, Duration::from_secs(600)); + assert_eq!(keep_firing_for.raw_expr, "'10 minute'::INTERVAL"); } #[test] @@ -518,12 +620,11 @@ IF NOT EXISTS cpu_monitor let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); let TriggerOn { query, - interval, - raw_interval_expr: raw_interval, + query_interval: interval, } = ctx.parse_trigger_on(false).unwrap(); assert_eq!(query.to_string(), "(SELECT * FROM cpu_usage)"); - assert_eq!(interval, Duration::from_secs(300)); - assert_eq!(raw_interval, "'5 minute'::INTERVAL"); + assert_eq!(interval.duration, Duration::from_secs(300)); + assert_eq!(interval.raw_expr, "'5 minute'::INTERVAL"); // Invalid, since missing `ON` keyword. let sql = "SELECT * FROM cpu_usage EVERY '5 minute'::INTERVAL"; @@ -559,7 +660,7 @@ IF NOT EXISTS cpu_monitor let sql = "ON (SELECT * FROM cpu_usage) EVERY '1ms'::INTERVAL"; let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); let trigger_on = ctx.parse_trigger_on(false).unwrap(); - assert_eq!(trigger_on.interval, Duration::from_secs(1)); + assert_eq!(trigger_on.query_interval.duration, Duration::from_secs(1)); } #[test] @@ -739,4 +840,66 @@ IF NOT EXISTS cpu_monitor assert!(validate_webhook_option(TIMEOUT)); assert!(!validate_webhook_option("invalid_option")); } + + #[test] + fn test_parse_trigger_for() { + // Normal. + let sql = "FOR '10 minute'::INTERVAL"; + let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); + let expr = ctx.parse_trigger_for(false).unwrap(); + assert_eq!(expr.duration, Duration::from_secs(600)); + assert_eq!(expr.raw_expr, "'10 minute'::INTERVAL"); + + // Invalid, missing FOR keyword. + let sql = "'10 minute'::INTERVAL"; + let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); + assert!(ctx.parse_trigger_for(false).is_err()); + + // Invalid, year not allowed. + let sql = "FOR '1 year'::INTERVAL"; + let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); + assert!(ctx.parse_trigger_for(false).is_err()); + + // Invalid, month not allowed. + let sql = "FOR '1 month'::INTERVAL"; + let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); + assert!(ctx.parse_trigger_for(false).is_err()); + + // Valid, interval less than 1 second is clamped. + let sql = "FOR '1ms'::INTERVAL"; + let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); + let expr = ctx.parse_trigger_for(false).unwrap(); + assert_eq!(expr.duration, Duration::from_secs(1)); + } + + #[test] + fn test_parse_trigger_keep_firing_for() { + // Normal. + let sql = "KEEP_FIRING_FOR '10 minute'::INTERVAL"; + let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); + let expr = ctx.parse_trigger_keep_firing_for(false).unwrap(); + assert_eq!(expr.duration, Duration::from_secs(600)); + assert_eq!(expr.raw_expr, "'10 minute'::INTERVAL"); + + // Invalid, missing KEEP_FIRING_FOR keyword. + let sql = "'10 minute'::INTERVAL"; + let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); + assert!(ctx.parse_trigger_keep_firing_for(false).is_err()); + + // Invalid, year not allowed. + let sql = "KEEP_FIRING_FOR '1 year'::INTERVAL"; + let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); + assert!(ctx.parse_trigger_keep_firing_for(false).is_err()); + + // Invalid, month not allowed. + let sql = "KEEP_FIRING_FOR '1 month'::INTERVAL"; + let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); + assert!(ctx.parse_trigger_keep_firing_for(false).is_err()); + + // Valid, interval less than 1 second is clamped. + let sql = "KEEP_FIRING_FOR '1ms'::INTERVAL"; + let mut ctx = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap(); + let expr = ctx.parse_trigger_keep_firing_for(false).unwrap(); + assert_eq!(expr.duration, Duration::from_secs(1)); + } } diff --git a/src/sql/src/parsers/cursor_parser.rs b/src/sql/src/parsers/cursor_parser.rs index 5d6deee12ca4..eb907b8d7633 100644 --- a/src/sql/src/parsers/cursor_parser.rs +++ b/src/sql/src/parsers/cursor_parser.rs @@ -51,7 +51,7 @@ impl ParserContext<'_> { let query_stmt = self.parse_query()?; match query_stmt { Statement::Query(query) => Ok(Statement::DeclareCursor(DeclareCursor { - cursor_name: ParserContext::canonicalize_object_name(cursor_name), + cursor_name: ParserContext::canonicalize_object_name(cursor_name)?, query, })), _ => error::InvalidSqlSnafu { @@ -78,7 +78,7 @@ impl ParserContext<'_> { .context(error::SyntaxSnafu)?; Ok(Statement::FetchCursor(FetchCursor { - cursor_name: ParserContext::canonicalize_object_name(cursor_name), + cursor_name: ParserContext::canonicalize_object_name(cursor_name)?, fetch_size, })) } @@ -91,7 +91,7 @@ impl ParserContext<'_> { .context(error::SyntaxSnafu)?; Ok(Statement::CloseCursor(CloseCursor { - cursor_name: ParserContext::canonicalize_object_name(cursor_name), + cursor_name: ParserContext::canonicalize_object_name(cursor_name)?, })) } } diff --git a/src/sql/src/parsers/describe_parser.rs b/src/sql/src/parsers/describe_parser.rs index 849dca468bcf..9a418cba3173 100644 --- a/src/sql/src/parsers/describe_parser.rs +++ b/src/sql/src/parsers/describe_parser.rs @@ -36,7 +36,7 @@ impl ParserContext<'_> { expected: "a table name", actual: self.peek_token_as_string(), })?; - let table_idents = Self::canonicalize_object_name(raw_table_idents); + let table_idents = Self::canonicalize_object_name(raw_table_idents)?; ensure!( !table_idents.0.is_empty(), InvalidTableNameSnafu { diff --git a/src/sql/src/parsers/drop_parser.rs b/src/sql/src/parsers/drop_parser.rs index 8f3872d95771..39de64e94559 100644 --- a/src/sql/src/parsers/drop_parser.rs +++ b/src/sql/src/parsers/drop_parser.rs @@ -58,7 +58,7 @@ impl ParserContext<'_> { expected: "a trigger name", actual: self.peek_token_as_string(), })?; - let trigger_ident = Self::canonicalize_object_name(raw_trigger_ident); + let trigger_ident = Self::canonicalize_object_name(raw_trigger_ident)?; ensure!( !trigger_ident.0.is_empty(), error::InvalidTriggerNameSnafu { @@ -82,7 +82,7 @@ impl ParserContext<'_> { expected: "a view name", actual: self.peek_token_as_string(), })?; - let view_ident = Self::canonicalize_object_name(raw_view_ident); + let view_ident = Self::canonicalize_object_name(raw_view_ident)?; ensure!( !view_ident.0.is_empty(), InvalidTableNameSnafu { @@ -106,7 +106,7 @@ impl ParserContext<'_> { expected: "a flow name", actual: self.peek_token_as_string(), })?; - let flow_ident = Self::canonicalize_object_name(raw_flow_ident); + let flow_ident = Self::canonicalize_object_name(raw_flow_ident)?; ensure!( !flow_ident.0.is_empty(), InvalidFlowNameSnafu { @@ -129,7 +129,7 @@ impl ParserContext<'_> { expected: "a table name", actual: self.peek_token_as_string(), })?; - let table_ident = Self::canonicalize_object_name(raw_table_ident); + let table_ident = Self::canonicalize_object_name(raw_table_ident)?; ensure!( !table_ident.0.is_empty(), InvalidTableNameSnafu { @@ -155,7 +155,7 @@ impl ParserContext<'_> { expected: "a database name", actual: self.peek_token_as_string(), })?; - let database_name = Self::canonicalize_object_name(database_name); + let database_name = Self::canonicalize_object_name(database_name)?; Ok(Statement::DropDatabase(DropDatabase::new( database_name, diff --git a/src/sql/src/parsers/explain_parser.rs b/src/sql/src/parsers/explain_parser.rs index c595efc86d82..720c9209a4d1 100644 --- a/src/sql/src/parsers/explain_parser.rs +++ b/src/sql/src/parsers/explain_parser.rs @@ -73,6 +73,7 @@ mod tests { projection: vec![sqlparser::ast::SelectItem::Wildcard( WildcardAdditionalOptions::default(), )], + exclude: None, into: None, from: vec![sqlparser::ast::TableWithJoins { relation: sqlparser::ast::TableFactor::Table { @@ -112,9 +113,8 @@ mod tests { with: None, body: Box::new(sqlparser::ast::SetExpr::Select(Box::new(select))), order_by: None, - limit: None, - limit_by: vec![], - offset: None, + limit_clause: None, + pipe_operators: vec![], fetch: None, locks: vec![], for_clause: None, diff --git a/src/sql/src/parsers/set_var_parser.rs b/src/sql/src/parsers/set_var_parser.rs index d04a46670401..8290f00af8a1 100644 --- a/src/sql/src/parsers/set_var_parser.rs +++ b/src/sql/src/parsers/set_var_parser.rs @@ -13,7 +13,7 @@ // limitations under the License. use snafu::ResultExt; -use sqlparser::ast::Statement as SpStatement; +use sqlparser::ast::{Set, Statement as SpStatement}; use crate::ast::{Ident, ObjectName}; use crate::error::{self, Result}; @@ -27,21 +27,27 @@ impl ParserContext<'_> { let _ = self.parser.next_token(); let spstatement = self.parser.parse_set().context(error::SyntaxSnafu)?; match spstatement { - SpStatement::SetVariable { - variables, - value, - hivevar, - .. - } if !hivevar => Ok(Statement::SetVariables(SetVariables { - variable: (*variables)[0].clone(), - value, - })), + SpStatement::Set(set) => match set { + Set::SingleAssignment { + scope: _, + hivevar, + variable, + values, + } if !hivevar => Ok(Statement::SetVariables(SetVariables { + variable, + value: values, + })), - SpStatement::SetTimeZone { value, .. } => Ok(Statement::SetVariables(SetVariables { - variable: ObjectName::from(vec![Ident::new("TIMEZONE")]), - value: vec![value], - })), + Set::SetTimeZone { local: _, value } => Ok(Statement::SetVariables(SetVariables { + variable: ObjectName::from(vec![Ident::new("TIMEZONE")]), + value: vec![value], + })), + set => error::UnsupportedSnafu { + keyword: set.to_string(), + } + .fail(), + }, unexp => error::UnsupportedSnafu { keyword: unexp.to_string(), } diff --git a/src/sql/src/parsers/show_parser.rs b/src/sql/src/parsers/show_parser.rs index 005653450c1c..e2e5fc50ac1a 100644 --- a/src/sql/src/parsers/show_parser.rs +++ b/src/sql/src/parsers/show_parser.rs @@ -148,7 +148,7 @@ impl ParserContext<'_> { expected: "a database name", actual: self.peek_token_as_string(), })?; - let database_name = Self::canonicalize_object_name(raw_database_name); + let database_name = Self::canonicalize_object_name(raw_database_name)?; ensure!( !database_name.0.is_empty(), InvalidDatabaseNameSnafu { @@ -168,7 +168,7 @@ impl ParserContext<'_> { expected: "a table name", actual: self.peek_token_as_string(), })?; - let table_name = Self::canonicalize_object_name(raw_table_name); + let table_name = Self::canonicalize_object_name(raw_table_name)?; ensure!( !table_name.0.is_empty(), InvalidTableNameSnafu { @@ -197,7 +197,7 @@ impl ParserContext<'_> { expected: "a flow name", actual: self.peek_token_as_string(), })?; - let flow_name = Self::canonicalize_object_name(raw_flow_name); + let flow_name = Self::canonicalize_object_name(raw_flow_name)?; ensure!( !flow_name.0.is_empty(), InvalidFlowNameSnafu { @@ -214,7 +214,7 @@ impl ParserContext<'_> { expected: "a view name", actual: self.peek_token_as_string(), })?; - let view_name = Self::canonicalize_object_name(raw_view_name); + let view_name = Self::canonicalize_object_name(raw_view_name)?; ensure!( !view_name.0.is_empty(), InvalidTableNameSnafu { @@ -241,7 +241,7 @@ impl ParserContext<'_> { ); // Safety: already checked above - Ok(Self::canonicalize_object_name(table_name).0[0].to_string_unquoted()) + Ok(Self::canonicalize_object_name(table_name)?.0[0].to_string_unquoted()) } fn parse_db_name(&mut self) -> Result> { @@ -262,7 +262,7 @@ impl ParserContext<'_> { // Safety: already checked above Ok(Some( - Self::canonicalize_object_name(db_name).0[0].to_string_unquoted(), + Self::canonicalize_object_name(db_name)?.0[0].to_string_unquoted(), )) } diff --git a/src/sql/src/parsers/show_parser/trigger.rs b/src/sql/src/parsers/show_parser/trigger.rs index 9fecadd089f3..10054ba4a406 100644 --- a/src/sql/src/parsers/show_parser/trigger.rs +++ b/src/sql/src/parsers/show_parser/trigger.rs @@ -20,7 +20,7 @@ impl ParserContext<'_> { actual: self.peek_token_as_string(), })?; - let trigger_name = Self::canonicalize_object_name(trigger_name); + let trigger_name = Self::canonicalize_object_name(trigger_name)?; ensure!( !trigger_name.0.is_empty(), diff --git a/src/sql/src/parsers/truncate_parser.rs b/src/sql/src/parsers/truncate_parser.rs index e7dd9a8c7437..1fe04865751f 100644 --- a/src/sql/src/parsers/truncate_parser.rs +++ b/src/sql/src/parsers/truncate_parser.rs @@ -33,7 +33,7 @@ impl ParserContext<'_> { expected: "a table name", actual: self.peek_token_as_string(), })?; - let table_ident = Self::canonicalize_object_name(raw_table_ident); + let table_ident = Self::canonicalize_object_name(raw_table_ident)?; ensure!( !table_ident.0.is_empty(), diff --git a/src/sql/src/parsers/utils.rs b/src/sql/src/parsers/utils.rs index 15ef50ab71f6..593801808294 100644 --- a/src/sql/src/parsers/utils.rs +++ b/src/sql/src/parsers/utils.rs @@ -75,7 +75,7 @@ pub fn parser_expr_to_scalar_value_literal( // 1. convert parser expr to logical expr let empty_df_schema = DFSchema::empty(); let logical_expr = SqlToRel::new(&StubContextProvider::default()) - .sql_to_expr(expr.into(), &empty_df_schema, &mut Default::default()) + .sql_to_expr(expr, &empty_df_schema, &mut Default::default()) .context(ConvertToLogicalExpressionSnafu)?; struct FindNow { diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index 051368f12cb0..b48e208043de 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -679,6 +679,7 @@ mod tests { vector_options: None, skipping_index_options: None, inverted_index_options: None, + json_datatype_options: None, }, }; diff --git a/src/sql/src/statements/alter.rs b/src/sql/src/statements/alter.rs index 6507701f49ba..ae5bce08c80b 100644 --- a/src/sql/src/statements/alter.rs +++ b/src/sql/src/statements/alter.rs @@ -102,6 +102,10 @@ pub enum AlterTableOperation { SetDefaults { defaults: Vec, }, + /// `REPARTITION (...) INTO (...)` + Repartition { + operation: RepartitionOperation, + }, } #[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] @@ -114,6 +118,38 @@ pub struct SetDefaultsOperation { pub default_constraint: Expr, } +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] +pub struct RepartitionOperation { + pub from_exprs: Vec, + pub into_exprs: Vec, +} + +impl RepartitionOperation { + pub fn new(from_exprs: Vec, into_exprs: Vec) -> Self { + Self { + from_exprs, + into_exprs, + } + } +} + +impl Display for RepartitionOperation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let from = self + .from_exprs + .iter() + .map(|expr| expr.to_string()) + .join(", "); + let into = self + .into_exprs + .iter() + .map(|expr| expr.to_string()) + .join(", "); + + write!(f, "({from}) INTO ({into})") + } +} + #[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum SetIndexOperation { /// `MODIFY COLUMN SET FULLTEXT INDEX [WITH ]` @@ -196,6 +232,9 @@ impl Display for AlterTableOperation { let keys = keys.iter().map(|k| format!("'{k}'")).join(","); write!(f, "UNSET {keys}") } + AlterTableOperation::Repartition { operation } => { + write!(f, "REPARTITION {operation}") + } AlterTableOperation::SetIndex { options } => match options { SetIndexOperation::Fulltext { column_name, diff --git a/src/sql/src/statements/copy.rs b/src/sql/src/statements/copy.rs index 8c18a836f9d5..7aa099c53c42 100644 --- a/src/sql/src/statements/copy.rs +++ b/src/sql/src/statements/copy.rs @@ -151,20 +151,14 @@ impl CopyTableArgument { pub fn format(&self) -> Option { self.with .get(common_datasource::file_format::FORMAT_TYPE) - .cloned() + .map(|v| v.to_string()) .or_else(|| Some("PARQUET".to_string())) } pub fn pattern(&self) -> Option { self.with .get(common_datasource::file_format::FILE_PATTERN) - .cloned() - } - - pub fn timestamp_pattern(&self) -> Option { - self.with - .get(common_datasource::file_format::TIMESTAMP_FORMAT) - .cloned() + .map(|v| v.to_string()) } } diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index c47c01543c16..9d945e7c8d78 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Formatter}; use common_catalog::consts::FILE_ENGINE; +use datatypes::json::JsonStructureSettings; use datatypes::schema::{FulltextOptions, SkippingIndexOptions}; use itertools::Itertools; use serde::Serialize; @@ -25,7 +26,8 @@ use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue}; use crate::error::{ - InvalidFlowQuerySnafu, Result, SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu, + InvalidFlowQuerySnafu, InvalidSqlSnafu, Result, SetFulltextOptionSnafu, + SetSkippingIndexOptionSnafu, }; use crate::statements::OptionMap; use crate::statements::statement::Statement; @@ -36,6 +38,12 @@ const COMMA_SEP: &str = ", "; const INDENT: usize = 2; pub const VECTOR_OPT_DIM: &str = "dim"; +pub const JSON_OPT_UNSTRUCTURED_KEYS: &str = "unstructured_keys"; +pub const JSON_OPT_FORMAT: &str = "format"; +pub const JSON_FORMAT_FULL_STRUCTURED: &str = "structured"; +pub const JSON_FORMAT_RAW: &str = "raw"; +pub const JSON_FORMAT_PARTIAL: &str = "partial"; + macro_rules! format_indent { ($fmt: expr, $arg: expr) => { format!($fmt, format_args!("{: >1$}", "", INDENT), $arg) @@ -124,6 +132,7 @@ pub struct ColumnExtensions { /// /// Inverted index doesn't have options at present. There won't be any options in that map. pub inverted_index_options: Option, + pub json_datatype_options: Option, } impl Column { @@ -209,6 +218,39 @@ impl ColumnExtensions { options.try_into().context(SetSkippingIndexOptionSnafu)?, )) } + + pub fn build_json_structure_settings(&self) -> Result> { + let Some(options) = self.json_datatype_options.as_ref() else { + return Ok(None); + }; + + let unstructured_keys = options + .value(JSON_OPT_UNSTRUCTURED_KEYS) + .and_then(|v| { + v.as_list().map(|x| { + x.into_iter() + .map(|x| x.to_string()) + .collect::>() + }) + }) + .unwrap_or_default(); + + options + .get(JSON_OPT_FORMAT) + .map(|format| match format { + JSON_FORMAT_FULL_STRUCTURED => Ok(JsonStructureSettings::Structured(None)), + JSON_FORMAT_PARTIAL => Ok(JsonStructureSettings::PartialUnstructuredByKey { + fields: None, + unstructured_keys, + }), + JSON_FORMAT_RAW => Ok(JsonStructureSettings::UnstructuredRaw), + _ => InvalidSqlSnafu { + msg: format!("unknown JSON datatype 'format': {format}"), + } + .fail(), + }) + .transpose() + } } /// Partition on columns or values. diff --git a/src/sql/src/statements/create/trigger.rs b/src/sql/src/statements/create/trigger.rs index fb135d9696e5..606c5882d104 100644 --- a/src/sql/src/statements/create/trigger.rs +++ b/src/sql/src/statements/create/trigger.rs @@ -16,6 +16,8 @@ pub struct CreateTrigger { pub trigger_name: ObjectName, pub if_not_exists: bool, pub trigger_on: TriggerOn, + pub r#for: Option, + pub keep_firing_for: Option, pub labels: OptionMap, pub annotations: OptionMap, pub channels: Vec, @@ -30,6 +32,14 @@ impl Display for CreateTrigger { writeln!(f, "{}", self.trigger_name)?; writeln!(f, " {}", self.trigger_on)?; + if let Some(r#for) = &self.r#for { + writeln!(f, " FOR {}", r#for)?; + } + + if let Some(keep_firing_for) = &self.keep_firing_for { + writeln!(f, " KEEP_FIRING_FOR {}", keep_firing_for)?; + } + if !self.labels.is_empty() { let labels = self.labels.kv_pairs(); writeln!(f, " LABELS ({})", format_list_comma!(labels))?; @@ -73,33 +83,51 @@ impl Display for NotifyChannel { } } +/// Represents a duration expression with its parsed `Duration` and the original +/// raw string. And the struct implements `Visit` and `VisitMut` traits as no-op. #[derive(Debug, PartialEq, Eq, Clone, Serialize)] -pub struct TriggerOn { - pub query: Box, - pub interval: Duration, - pub raw_interval_expr: String, +pub struct DurationExpr { + pub duration: Duration, + pub raw_expr: String, } -impl Display for TriggerOn { +impl Display for DurationExpr { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "ON {} EVERY {}", self.query, self.raw_interval_expr) + if self.raw_expr.is_empty() { + // Fallback to display duration if raw_expr is empty. + // Display in seconds since we limit the min-duration to 1 second + // in SQL parser. + write!(f, "{} seconds", self.duration.as_secs()) + } else { + write!(f, "{}", self.raw_expr) + } } } -impl Visit for TriggerOn { - fn visit(&self, visitor: &mut V) -> ControlFlow { - Visit::visit(&self.query, visitor)?; +impl Visit for DurationExpr { + fn visit(&self, _visitor: &mut V) -> ControlFlow { ControlFlow::Continue(()) } } -impl VisitMut for TriggerOn { - fn visit(&mut self, visitor: &mut V) -> ControlFlow { - VisitMut::visit(&mut self.query, visitor)?; +impl VisitMut for DurationExpr { + fn visit(&mut self, _visitor: &mut V) -> ControlFlow { ControlFlow::Continue(()) } } +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] +pub struct TriggerOn { + pub query: Box, + pub query_interval: DurationExpr, +} + +impl Display for TriggerOn { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "ON {} EVERY {}", self.query, self.query_interval) + } +} + #[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub enum ChannelType { /// Alert manager webhook options. @@ -114,6 +142,8 @@ pub struct AlertManagerWebhook { #[cfg(test)] mod tests { + use std::time::Duration; + use crate::dialect::GreptimeDbDialect; use crate::parser::{ParseOptions, ParserContext}; use crate::statements::statement::Statement; @@ -122,6 +152,8 @@ mod tests { fn test_display_create_trigger() { let sql = r#"CREATE TRIGGER IF NOT EXISTS cpu_monitor ON (SELECT host AS host_label, cpu, memory FROM machine_monitor WHERE cpu > 2) EVERY '1day 5 minute'::INTERVAL +FOR '5 minute'::INTERVAL +KEEP_FIRING_FOR '10 minute'::INTERVAL LABELS (label_name=label_val) ANNOTATIONS (annotation_name=annotation_val) NOTIFY @@ -141,6 +173,8 @@ WEBHOOK alert_manager2 URL 'http://127.0.0.1:9093' WITH (timeout='1m') let formatted = format!("{}", trigger); let expected = r#"CREATE TRIGGER IF NOT EXISTS cpu_monitor ON (SELECT host AS host_label, cpu, memory FROM machine_monitor WHERE cpu > 2) EVERY '1day 5 minute'::INTERVAL + FOR '5 minute'::INTERVAL + KEEP_FIRING_FOR '10 minute'::INTERVAL LABELS (label_name = 'label_val') ANNOTATIONS (annotation_name = 'annotation_val') NOTIFY( @@ -149,4 +183,19 @@ WEBHOOK alert_manager2 URL 'http://127.0.0.1:9093' WITH (timeout='1m') )"#; assert_eq!(expected, formatted); } + + #[test] + fn test_duration_expr_display() { + let duration_expr = super::DurationExpr { + duration: Duration::from_secs(300), + raw_expr: "'5 minute'::INTERVAL".to_string(), + }; + assert_eq!(duration_expr.to_string(), "'5 minute'::INTERVAL"); + + let duration_expr_no_raw = super::DurationExpr { + duration: Duration::from_secs(600), + raw_expr: "".to_string(), + }; + assert_eq!(duration_expr_no_raw.to_string(), "600 seconds"); + } } diff --git a/src/sql/src/statements/option_map.rs b/src/sql/src/statements/option_map.rs index 864bb3b3f9a4..f67b0dc72ab8 100644 --- a/src/sql/src/statements/option_map.rs +++ b/src/sql/src/statements/option_map.rs @@ -19,28 +19,46 @@ use common_base::secrets::{ExposeSecret, ExposeSecretMut, SecretString}; use serde::Serialize; use sqlparser::ast::{Visit, VisitMut, Visitor, VisitorMut}; +use crate::util::OptionValue; + const REDACTED_OPTIONS: [&str; 2] = ["access_key_id", "secret_access_key"]; /// Options hashmap. #[derive(Clone, Debug, Default, Serialize)] pub struct OptionMap { - options: BTreeMap, + options: BTreeMap, #[serde(skip_serializing)] secrets: BTreeMap, } impl OptionMap { + pub fn new>(options: I) -> Self { + let (secrets, options): (Vec<_>, Vec<_>) = options + .into_iter() + .partition(|(k, _)| REDACTED_OPTIONS.contains(&k.as_str())); + Self { + options: options.into_iter().collect(), + secrets: secrets + .into_iter() + .filter_map(|(k, v)| { + v.as_string() + .map(|v| (k, SecretString::new(Box::new(v.to_string())))) + }) + .collect(), + } + } + pub fn insert(&mut self, k: String, v: String) { if REDACTED_OPTIONS.contains(&k.as_str()) { self.secrets.insert(k, SecretString::new(Box::new(v))); } else { - self.options.insert(k, v); + self.options.insert(k, v.into()); } } - pub fn get(&self, k: &str) -> Option<&String> { + pub fn get(&self, k: &str) -> Option<&str> { if let Some(value) = self.options.get(k) { - Some(value) + value.as_string() } else if let Some(value) = self.secrets.get(k) { Some(value.expose_secret()) } else { @@ -48,6 +66,10 @@ impl OptionMap { } } + pub fn value(&self, k: &str) -> Option<&OptionValue> { + self.options.get(k) + } + pub fn is_empty(&self) -> bool { self.options.is_empty() && self.secrets.is_empty() } @@ -58,7 +80,11 @@ impl OptionMap { pub fn to_str_map(&self) -> HashMap<&str, &str> { let mut map = HashMap::with_capacity(self.len()); - map.extend(self.options.iter().map(|(k, v)| (k.as_str(), v.as_str()))); + map.extend( + self.options + .iter() + .filter_map(|(k, v)| v.as_string().map(|v| (k.as_str(), v))), + ); map.extend( self.secrets .iter() @@ -69,7 +95,11 @@ impl OptionMap { pub fn into_map(self) -> HashMap { let mut map = HashMap::with_capacity(self.len()); - map.extend(self.options); + map.extend( + self.options + .into_iter() + .filter_map(|(k, v)| v.as_string().map(|v| (k, v.to_string()))), + ); map.extend( self.secrets .into_iter() @@ -80,7 +110,11 @@ impl OptionMap { pub fn kv_pairs(&self) -> Vec { let mut result = Vec::with_capacity(self.options.len() + self.secrets.len()); - for (k, v) in self.options.iter() { + for (k, v) in self + .options + .iter() + .filter_map(|(k, v)| v.as_string().map(|v| (k, v))) + { if k.contains(".") { result.push(format!("'{k}' = '{}'", v.escape_debug())); } else { diff --git a/src/sql/src/statements/statement.rs b/src/sql/src/statements/statement.rs index d0096baa7152..f723409a6bab 100644 --- a/src/sql/src/statements/statement.rs +++ b/src/sql/src/statements/statement.rs @@ -310,6 +310,6 @@ impl TryFrom<&Statement> for DfStatement { .fail(); } }; - Ok(DfStatement::Statement(Box::new(s.into()))) + Ok(DfStatement::Statement(Box::new(s))) } } diff --git a/src/sql/src/util.rs b/src/sql/src/util.rs index 7de02ca1fa30..f71dfcc8d753 100644 --- a/src/sql/src/util.rs +++ b/src/sql/src/util.rs @@ -15,10 +15,15 @@ use std::collections::HashSet; use std::fmt::{Display, Formatter}; -use sqlparser::ast::{Expr, ObjectName, SetExpr, SqlOption, TableFactor, Value, ValueWithSpan}; +use serde::Serialize; +use snafu::ensure; +use sqlparser::ast::{ + Array, Expr, Ident, ObjectName, SetExpr, SqlOption, TableFactor, Value, ValueWithSpan, +}; +use sqlparser_derive::{Visit, VisitMut}; use crate::ast::ObjectNamePartExt; -use crate::error::{InvalidSqlSnafu, InvalidTableOptionValueSnafu, Result}; +use crate::error::{InvalidExprAsOptionValueSnafu, InvalidSqlSnafu, Result}; use crate::statements::create::SqlOrTql; /// Format an [ObjectName] without any quote of its idents. @@ -42,29 +47,98 @@ pub fn format_raw_object_name(name: &ObjectName) -> String { format!("{}", Inner { name }) } -pub fn parse_option_string(option: SqlOption) -> Result<(String, String)> { +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Visit, VisitMut)] +pub struct OptionValue(Expr); + +impl OptionValue { + fn try_new(expr: Expr) -> Result { + ensure!( + matches!(expr, Expr::Value(_) | Expr::Identifier(_) | Expr::Array(_)), + InvalidExprAsOptionValueSnafu { + error: format!("{expr} not accepted") + } + ); + Ok(Self(expr)) + } + + fn expr_as_string(expr: &Expr) -> Option<&str> { + match expr { + Expr::Value(ValueWithSpan { value, .. }) => match value { + Value::SingleQuotedString(s) + | Value::DoubleQuotedString(s) + | Value::TripleSingleQuotedString(s) + | Value::TripleDoubleQuotedString(s) + | Value::SingleQuotedByteStringLiteral(s) + | Value::DoubleQuotedByteStringLiteral(s) + | Value::TripleSingleQuotedByteStringLiteral(s) + | Value::TripleDoubleQuotedByteStringLiteral(s) + | Value::SingleQuotedRawStringLiteral(s) + | Value::DoubleQuotedRawStringLiteral(s) + | Value::TripleSingleQuotedRawStringLiteral(s) + | Value::TripleDoubleQuotedRawStringLiteral(s) + | Value::EscapedStringLiteral(s) + | Value::UnicodeStringLiteral(s) + | Value::NationalStringLiteral(s) + | Value::HexStringLiteral(s) => Some(s), + Value::DollarQuotedString(s) => Some(&s.value), + Value::Number(s, _) => Some(s), + _ => None, + }, + Expr::Identifier(ident) => Some(&ident.value), + _ => None, + } + } + + pub fn as_string(&self) -> Option<&str> { + Self::expr_as_string(&self.0) + } + + pub fn as_list(&self) -> Option> { + let expr = &self.0; + match expr { + Expr::Value(_) | Expr::Identifier(_) => self.as_string().map(|s| vec![s]), + Expr::Array(array) => array + .elem + .iter() + .map(Self::expr_as_string) + .collect::>>(), + _ => None, + } + } +} + +impl From for OptionValue { + fn from(value: String) -> Self { + Self(Expr::Identifier(Ident::new(value))) + } +} + +impl From<&str> for OptionValue { + fn from(value: &str) -> Self { + Self(Expr::Identifier(Ident::new(value))) + } +} + +impl From> for OptionValue { + fn from(value: Vec<&str>) -> Self { + Self(Expr::Array(Array { + elem: value + .into_iter() + .map(|x| Expr::Identifier(Ident::new(x))) + .collect(), + named: false, + })) + } +} + +pub fn parse_option_string(option: SqlOption) -> Result<(String, OptionValue)> { let SqlOption::KeyValue { key, value } = option else { return InvalidSqlSnafu { msg: "Expecting a key-value pair in the option", } .fail(); }; - let v = match value { - Expr::Value(ValueWithSpan { - value: Value::SingleQuotedString(v), - .. - }) - | Expr::Value(ValueWithSpan { - value: Value::DoubleQuotedString(v), - .. - }) => v, - Expr::Identifier(v) => v.value, - Expr::Value(ValueWithSpan { - value: Value::Number(v, _), - .. - }) => v.clone(), - value => return InvalidTableOptionValueSnafu { key, value }.fail(), - }; + let v = OptionValue::try_new(value)?; let k = key.value.to_lowercase(); Ok((k, v)) } @@ -120,7 +194,7 @@ fn extract_tables_from_set_expr(set_expr: &SetExpr, names: &mut HashSet {} + _ => {} }; } diff --git a/src/standalone/Cargo.toml b/src/standalone/Cargo.toml index 035b1dc06a54..d94975f270c6 100644 --- a/src/standalone/Cargo.toml +++ b/src/standalone/Cargo.toml @@ -19,6 +19,7 @@ common-meta.workspace = true common-options.workspace = true common-procedure.workspace = true common-query.workspace = true +common-stat.workspace = true common-telemetry.workspace = true common-time.workspace = true common-version.workspace = true @@ -27,6 +28,7 @@ datanode.workspace = true file-engine.workspace = true flow.workspace = true frontend.workspace = true +hostname.workspace = true log-store.workspace = true mito2.workspace = true query.workspace = true diff --git a/src/standalone/src/information_extension.rs b/src/standalone/src/information_extension.rs index 347955cfab85..b15ab74a9897 100644 --- a/src/standalone/src/information_extension.rs +++ b/src/standalone/src/information_extension.rs @@ -75,10 +75,12 @@ impl InformationExtension for StandaloneInformationExtension { // Use `self.start_time_ms` instead. // It's not precise but enough. start_time_ms: self.start_time_ms, - cpus: common_config::utils::get_cpus() as u32, - memory_bytes: common_config::utils::get_sys_total_memory() + cpus: common_stat::get_total_cpu_millicores() as u32, + memory_bytes: common_stat::get_total_memory_bytes() as u64, + hostname: hostname::get() .unwrap_or_default() - .as_bytes(), + .to_string_lossy() + .to_string(), }; Ok(vec![node_info]) } diff --git a/src/store-api/src/region_request.rs b/src/store-api/src/region_request.rs index 009cb4eeaff0..23927bdca944 100644 --- a/src/store-api/src/region_request.rs +++ b/src/store-api/src/region_request.rs @@ -338,9 +338,18 @@ fn make_region_compact(compact: CompactRequest) -> Result, } impl Default for RegionCompactRequest { @@ -1339,6 +1349,7 @@ impl Default for RegionCompactRequest { Self { // Default to regular compaction. options: compact_request::Options::Regular(Default::default()), + parallelism: None, } } } diff --git a/src/store-api/src/sst_entry.rs b/src/store-api/src/sst_entry.rs index a680e438bc2e..34e37de7eac5 100644 --- a/src/store-api/src/sst_entry.rs +++ b/src/store-api/src/sst_entry.rs @@ -249,6 +249,115 @@ impl StorageSstEntry { } } +/// An entry describing puffin index metadata for inspection. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PuffinIndexMetaEntry { + /// The table directory this index belongs to. + pub table_dir: String, + /// The full path of the index file in object store. + pub index_file_path: String, + /// The region id referencing the index file. + pub region_id: RegionId, + /// The table id referencing the index file. + pub table_id: TableId, + /// The region number referencing the index file. + pub region_number: RegionNumber, + /// The region group referencing the index file. + pub region_group: RegionGroup, + /// The region sequence referencing the index file. + pub region_sequence: RegionSeq, + /// Engine-specific file identifier (string form). + pub file_id: String, + /// Size of the index file in object store (if available). + pub index_file_size: Option, + /// Logical index type (`bloom_filter`, `fulltext_bloom`, `fulltext_tantivy`, `inverted`). + pub index_type: String, + /// Target type (`column`, ...). + pub target_type: String, + /// Encoded target key string. + pub target_key: String, + /// Structured JSON describing the target. + pub target_json: String, + /// Size of the blob storing this target. + pub blob_size: u64, + /// Structured JSON describing index-specific metadata (if available). + pub meta_json: Option, + /// Node id associated with the index file (if known). + pub node_id: Option, +} + +impl PuffinIndexMetaEntry { + /// Returns the schema describing puffin index metadata entries. + pub fn schema() -> SchemaRef { + use datatypes::prelude::ConcreteDataType as Ty; + Arc::new(Schema::new(vec![ + ColumnSchema::new("table_dir", Ty::string_datatype(), false), + ColumnSchema::new("index_file_path", Ty::string_datatype(), false), + ColumnSchema::new("region_id", Ty::uint64_datatype(), false), + ColumnSchema::new("table_id", Ty::uint32_datatype(), false), + ColumnSchema::new("region_number", Ty::uint32_datatype(), false), + ColumnSchema::new("region_group", Ty::uint8_datatype(), false), + ColumnSchema::new("region_sequence", Ty::uint32_datatype(), false), + ColumnSchema::new("file_id", Ty::string_datatype(), false), + ColumnSchema::new("index_file_size", Ty::uint64_datatype(), true), + ColumnSchema::new("index_type", Ty::string_datatype(), false), + ColumnSchema::new("target_type", Ty::string_datatype(), false), + ColumnSchema::new("target_key", Ty::string_datatype(), false), + ColumnSchema::new("target_json", Ty::string_datatype(), false), + ColumnSchema::new("blob_size", Ty::uint64_datatype(), false), + ColumnSchema::new("meta_json", Ty::string_datatype(), true), + ColumnSchema::new("node_id", Ty::uint64_datatype(), true), + ])) + } + + /// Converts a list of puffin index metadata entries to a record batch. + pub fn to_record_batch(entries: &[Self]) -> std::result::Result { + let schema = Self::schema(); + let table_dirs = entries.iter().map(|e| e.table_dir.as_str()); + let index_file_paths = entries.iter().map(|e| e.index_file_path.as_str()); + let region_ids = entries.iter().map(|e| e.region_id.as_u64()); + let table_ids = entries.iter().map(|e| e.table_id); + let region_numbers = entries.iter().map(|e| e.region_number); + let region_groups = entries.iter().map(|e| e.region_group); + let region_sequences = entries.iter().map(|e| e.region_sequence); + let file_ids = entries.iter().map(|e| e.file_id.as_str()); + let index_file_sizes = entries.iter().map(|e| e.index_file_size); + let index_types = entries.iter().map(|e| e.index_type.as_str()); + let target_types = entries.iter().map(|e| e.target_type.as_str()); + let target_keys = entries.iter().map(|e| e.target_key.as_str()); + let target_jsons = entries.iter().map(|e| e.target_json.as_str()); + let blob_sizes = entries.iter().map(|e| e.blob_size); + let meta_jsons = entries.iter().map(|e| e.meta_json.as_deref()); + let node_ids = entries.iter().map(|e| e.node_id); + + let columns: Vec = vec![ + Arc::new(StringArray::from_iter_values(table_dirs)), + Arc::new(StringArray::from_iter_values(index_file_paths)), + Arc::new(UInt64Array::from_iter_values(region_ids)), + Arc::new(UInt32Array::from_iter_values(table_ids)), + Arc::new(UInt32Array::from_iter_values(region_numbers)), + Arc::new(UInt8Array::from_iter_values(region_groups)), + Arc::new(UInt32Array::from_iter_values(region_sequences)), + Arc::new(StringArray::from_iter_values(file_ids)), + Arc::new(UInt64Array::from_iter(index_file_sizes)), + Arc::new(StringArray::from_iter_values(index_types)), + Arc::new(StringArray::from_iter_values(target_types)), + Arc::new(StringArray::from_iter_values(target_keys)), + Arc::new(StringArray::from_iter_values(target_jsons)), + Arc::new(UInt64Array::from_iter_values(blob_sizes)), + Arc::new(StringArray::from_iter(meta_jsons)), + Arc::new(UInt64Array::from_iter(node_ids)), + ]; + + DfRecordBatch::try_new(schema.arrow_schema().clone(), columns) + } + + /// Reserved internal inspect table name for puffin index metadata. + pub fn reserved_table_name_for_inspection() -> &'static str { + "__inspect/__mito/__puffin_index_meta" + } +} + fn build_plan_helper( scan_request: ScanRequest, table_name: &str, @@ -577,6 +686,188 @@ mod tests { assert!(node_ids.is_null(1)); } + #[test] + fn test_puffin_index_meta_to_record_batch() { + let entries = vec![ + PuffinIndexMetaEntry { + table_dir: "table1".to_string(), + index_file_path: "index1".to_string(), + region_id: RegionId::with_group_and_seq(10, 0, 20), + table_id: 10, + region_number: 20, + region_group: 0, + region_sequence: 20, + file_id: "file1".to_string(), + index_file_size: Some(1024), + index_type: "bloom_filter".to_string(), + target_type: "column".to_string(), + target_key: "1".to_string(), + target_json: "{\"column\":1}".to_string(), + blob_size: 256, + meta_json: Some("{\"bloom\":{}}".to_string()), + node_id: Some(42), + }, + PuffinIndexMetaEntry { + table_dir: "table2".to_string(), + index_file_path: "index2".to_string(), + region_id: RegionId::with_group_and_seq(11, 0, 21), + table_id: 11, + region_number: 21, + region_group: 0, + region_sequence: 21, + file_id: "file2".to_string(), + index_file_size: None, + index_type: "inverted".to_string(), + target_type: "unknown".to_string(), + target_key: "legacy".to_string(), + target_json: "{}".to_string(), + blob_size: 0, + meta_json: None, + node_id: None, + }, + ]; + + let schema = PuffinIndexMetaEntry::schema(); + let batch = PuffinIndexMetaEntry::to_record_batch(&entries).unwrap(); + + assert_eq!(schema.arrow_schema().fields().len(), batch.num_columns()); + assert_eq!(2, batch.num_rows()); + + let table_dirs = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!("table1", table_dirs.value(0)); + assert_eq!("table2", table_dirs.value(1)); + + let index_file_paths = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!("index1", index_file_paths.value(0)); + assert_eq!("index2", index_file_paths.value(1)); + + let region_ids = batch + .column(2) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!( + RegionId::with_group_and_seq(10, 0, 20).as_u64(), + region_ids.value(0) + ); + assert_eq!( + RegionId::with_group_and_seq(11, 0, 21).as_u64(), + region_ids.value(1) + ); + + let table_ids = batch + .column(3) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(10, table_ids.value(0)); + assert_eq!(11, table_ids.value(1)); + + let region_numbers = batch + .column(4) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(20, region_numbers.value(0)); + assert_eq!(21, region_numbers.value(1)); + + let region_groups = batch + .column(5) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(0, region_groups.value(0)); + assert_eq!(0, region_groups.value(1)); + + let region_sequences = batch + .column(6) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(20, region_sequences.value(0)); + assert_eq!(21, region_sequences.value(1)); + + let file_ids = batch + .column(7) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!("file1", file_ids.value(0)); + assert_eq!("file2", file_ids.value(1)); + + let index_file_sizes = batch + .column(8) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(1024, index_file_sizes.value(0)); + assert!(index_file_sizes.is_null(1)); + + let index_types = batch + .column(9) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!("bloom_filter", index_types.value(0)); + assert_eq!("inverted", index_types.value(1)); + + let target_types = batch + .column(10) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!("column", target_types.value(0)); + assert_eq!("unknown", target_types.value(1)); + + let target_keys = batch + .column(11) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!("1", target_keys.value(0)); + assert_eq!("legacy", target_keys.value(1)); + + let target_json = batch + .column(12) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!("{\"column\":1}", target_json.value(0)); + assert_eq!("{}", target_json.value(1)); + + let blob_sizes = batch + .column(13) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(256, blob_sizes.value(0)); + assert_eq!(0, blob_sizes.value(1)); + + let meta_jsons = batch + .column(14) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!("{\"bloom\":{}}", meta_jsons.value(0)); + assert!(meta_jsons.is_null(1)); + + let node_ids = batch + .column(15) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(42, node_ids.value(0)); + assert!(node_ids.is_null(1)); + } + #[test] fn test_manifest_build_plan() { // Note: filter must reference a column in the projected schema diff --git a/src/store-api/src/storage.rs b/src/store-api/src/storage.rs index a8f872cd70b2..1df7a0aff624 100644 --- a/src/store-api/src/storage.rs +++ b/src/store-api/src/storage.rs @@ -28,4 +28,4 @@ pub use datatypes::schema::{ pub use self::descriptors::*; pub use self::file::{FileId, ParseIdError}; pub use self::requests::{ScanRequest, TimeSeriesDistribution, TimeSeriesRowSelector}; -pub use self::types::SequenceNumber; +pub use self::types::{SequenceNumber, SequenceRange}; diff --git a/src/store-api/src/storage/requests.rs b/src/store-api/src/storage/requests.rs index 513a98b148bf..5e9fae321512 100644 --- a/src/store-api/src/storage/requests.rs +++ b/src/store-api/src/storage/requests.rs @@ -55,9 +55,12 @@ pub struct ScanRequest { /// Optional hint to select rows from time-series. pub series_row_selector: Option, /// Optional constraint on the sequence number of the rows to read. - /// If set, only rows with a sequence number lesser or equal to this value + /// If set, only rows with a sequence number **lesser or equal** to this value /// will be returned. - pub sequence: Option, + pub memtable_max_sequence: Option, + /// Optional constraint on the minimal sequence number in the memtable. + /// If set, only the memtables that contain sequences **greater than** this value will be scanned + pub memtable_min_sequence: Option, /// Optional constraint on the minimal sequence number in the SST files. /// If set, only the SST files that contain sequences greater than this value will be scanned. pub sst_min_sequence: Option, @@ -121,7 +124,7 @@ impl Display for ScanRequest { series_row_selector )?; } - if let Some(sequence) = &self.sequence { + if let Some(sequence) = &self.memtable_max_sequence { write!(f, "{}sequence: {}", delimiter.as_str(), sequence)?; } if let Some(sst_min_sequence) = &self.sst_min_sequence { diff --git a/src/store-api/src/storage/types.rs b/src/store-api/src/storage/types.rs index ff1162d4013f..dbe5a377af0b 100644 --- a/src/store-api/src/storage/types.rs +++ b/src/store-api/src/storage/types.rs @@ -14,6 +14,63 @@ //! Common types. +use datatypes::arrow::array::{BooleanArray, Datum, UInt64Array}; + /// Represents a sequence number of data in storage. The offset of logstore can be used /// as a sequence number. pub type SequenceNumber = u64; + +/// A range of sequence numbers. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum SequenceRange { + Gt { + /// Exclusive lower bound + min: SequenceNumber, + }, + LtEq { + /// Inclusive upper bound + max: SequenceNumber, + }, + GtLtEq { + /// Exclusive lower bound + min: SequenceNumber, + /// Inclusive upper bound + max: SequenceNumber, + }, +} + +impl SequenceRange { + pub fn new(min: Option, max: Option) -> Option { + match (min, max) { + (Some(min), Some(max)) => Some(SequenceRange::GtLtEq { min, max }), + (Some(min), None) => Some(SequenceRange::Gt { min }), + (None, Some(max)) => Some(SequenceRange::LtEq { max }), + (None, None) => None, + } + } + + pub fn filter( + &self, + seqs: &dyn Datum, + ) -> Result { + match self { + SequenceRange::Gt { min } => { + let min = UInt64Array::new_scalar(*min); + let pred = datafusion_common::arrow::compute::kernels::cmp::gt(seqs, &min)?; + Ok(pred) + } + SequenceRange::LtEq { max } => { + let max = UInt64Array::new_scalar(*max); + let pred = datafusion_common::arrow::compute::kernels::cmp::lt_eq(seqs, &max)?; + Ok(pred) + } + SequenceRange::GtLtEq { min, max } => { + let min = UInt64Array::new_scalar(*min); + let max = UInt64Array::new_scalar(*max); + let pred_min = datafusion_common::arrow::compute::kernels::cmp::gt(seqs, &min)?; + let pred_max = datafusion_common::arrow::compute::kernels::cmp::lt_eq(seqs, &max)?; + datafusion_common::arrow::compute::kernels::boolean::and(&pred_min, &pred_max) + } + } + } +} diff --git a/src/table/src/error.rs b/src/table/src/error.rs index 431b5a4db449..7fd04b26bf13 100644 --- a/src/table/src/error.rs +++ b/src/table/src/error.rs @@ -150,9 +150,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Table options value is not valid, key: `{}`, value: `{}`", key, value))] - InvalidTableOptionValue { key: String, value: String }, - #[snafu(display("Invalid column option, column name: {}, error: {}", column_name, msg))] InvalidColumnOption { column_name: String, @@ -228,8 +225,7 @@ impl ErrorExt for Error { Error::Unsupported { .. } => StatusCode::Unsupported, Error::ParseTableOption { .. } => StatusCode::InvalidArguments, Error::MissingTimeIndexColumn { .. } => StatusCode::IllegalState, - Error::InvalidTableOptionValue { .. } - | Error::SetSkippingOptions { .. } + Error::SetSkippingOptions { .. } | Error::UnsetSkippingOptions { .. } | Error::InvalidTableName { .. } => StatusCode::InvalidArguments, } diff --git a/src/table/src/metadata.rs b/src/table/src/metadata.rs index 07d646ff21fc..b8537fc93e9d 100644 --- a/src/table/src/metadata.rs +++ b/src/table/src/metadata.rs @@ -26,7 +26,7 @@ use datatypes::schema::{ SkippingIndexOptions, }; use derive_builder::Builder; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; use snafu::{OptionExt, ResultExt, ensure}; use store_api::metric_engine_consts::PHYSICAL_TABLE_METADATA_KEY; use store_api::mito_engine_options::{COMPACTION_TYPE, COMPACTION_TYPE_TWCS}; @@ -143,6 +143,8 @@ pub struct TableMeta { pub options: TableOptions, #[builder(default = "Utc::now()")] pub created_on: DateTime, + #[builder(default = "self.default_updated_on()")] + pub updated_on: DateTime, #[builder(default = "Vec::new()")] pub partition_key_indices: Vec, #[builder(default = "Vec::new()")] @@ -162,6 +164,7 @@ impl TableMetaBuilder { next_column_id: None, options: None, created_on: None, + updated_on: None, partition_key_indices: None, column_ids: None, } @@ -181,6 +184,10 @@ impl TableMetaBuilder { } } + fn default_updated_on(&self) -> DateTime { + self.created_on.unwrap_or_default() + } + pub fn new_external_table() -> Self { Self { schema: None, @@ -191,6 +198,7 @@ impl TableMetaBuilder { next_column_id: Some(0), options: None, created_on: None, + updated_on: None, partition_key_indices: None, column_ids: None, } @@ -243,7 +251,7 @@ impl TableMeta { table_name: &str, alter_kind: &AlterKind, ) -> Result { - match alter_kind { + let mut builder = match alter_kind { AlterKind::AddColumns { columns } => self.add_columns(table_name, columns), AlterKind::DropColumns { names } => self.remove_columns(table_name, names), AlterKind::ModifyColumnTypes { columns } => { @@ -257,7 +265,9 @@ impl TableMeta { AlterKind::UnsetIndexes { options } => self.unset_indexes(table_name, options), AlterKind::DropDefaults { names } => self.drop_defaults(table_name, names), AlterKind::SetDefaults { defaults } => self.set_defaults(table_name, defaults), - } + }?; + let _ = builder.updated_on(Utc::now()); + Ok(builder) } /// Creates a [TableMetaBuilder] with modified table options. @@ -1145,7 +1155,7 @@ impl From for TableIdent { } /// Struct used to serialize and deserialize [`TableMeta`]. -#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)] +#[derive(Debug, PartialEq, Eq, Clone, Serialize, Default)] pub struct RawTableMeta { pub schema: RawSchema, /// The indices of columns in primary key. Note that the index of timestamp column @@ -1162,6 +1172,7 @@ pub struct RawTableMeta { pub region_numbers: Vec, pub options: TableOptions, pub created_on: DateTime, + pub updated_on: DateTime, /// Order doesn't matter to this array. #[serde(default)] pub partition_key_indices: Vec, @@ -1171,6 +1182,47 @@ pub struct RawTableMeta { pub column_ids: Vec, } +impl<'de> Deserialize<'de> for RawTableMeta { + fn deserialize( + deserializer: D, + ) -> std::result::Result>::Error> + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + struct Helper { + schema: RawSchema, + primary_key_indices: Vec, + value_indices: Vec, + engine: String, + next_column_id: u32, + region_numbers: Vec, + options: TableOptions, + created_on: DateTime, + updated_on: Option>, + #[serde(default)] + partition_key_indices: Vec, + #[serde(default)] + column_ids: Vec, + } + + let h = Helper::deserialize(deserializer)?; + Ok(RawTableMeta { + schema: h.schema, + primary_key_indices: h.primary_key_indices, + value_indices: h.value_indices, + engine: h.engine, + next_column_id: h.next_column_id, + region_numbers: h.region_numbers, + options: h.options, + created_on: h.created_on, + updated_on: h.updated_on.unwrap_or(h.created_on), + partition_key_indices: h.partition_key_indices, + column_ids: h.column_ids, + }) + } +} + impl From for RawTableMeta { fn from(meta: TableMeta) -> RawTableMeta { RawTableMeta { @@ -1182,6 +1234,7 @@ impl From for RawTableMeta { region_numbers: meta.region_numbers, options: meta.options, created_on: meta.created_on, + updated_on: meta.updated_on, partition_key_indices: meta.partition_key_indices, column_ids: meta.column_ids, } @@ -1201,6 +1254,7 @@ impl TryFrom for TableMeta { next_column_id: raw.next_column_id, options: raw.options, created_on: raw.created_on, + updated_on: raw.updated_on, partition_key_indices: raw.partition_key_indices, column_ids: raw.column_ids, }) diff --git a/src/table/src/requests.rs b/src/table/src/requests.rs index 2418c4a2c5a2..6cdf9454805e 100644 --- a/src/table/src/requests.rs +++ b/src/table/src/requests.rs @@ -401,6 +401,7 @@ pub struct CompactTableRequest { pub schema_name: String, pub table_name: String, pub compact_options: compact_request::Options, + pub parallelism: u32, } impl Default for CompactTableRequest { @@ -410,6 +411,7 @@ impl Default for CompactTableRequest { schema_name: Default::default(), table_name: Default::default(), compact_options: compact_request::Options::Regular(Default::default()), + parallelism: 1, } } } diff --git a/src/table/src/table/numbers.rs b/src/table/src/table/numbers.rs index 05f221bce0f4..9b1ed125c014 100644 --- a/src/table/src/table/numbers.rs +++ b/src/table/src/table/numbers.rs @@ -80,6 +80,7 @@ impl NumbersTable { next_column_id: 1, options: Default::default(), created_on: Default::default(), + updated_on: Default::default(), partition_key_indices: vec![], column_ids: vec![], }; diff --git a/tests-fuzz/src/generator/create_expr.rs b/tests-fuzz/src/generator/create_expr.rs index e9c43955ebeb..c216e2c287c8 100644 --- a/tests-fuzz/src/generator/create_expr.rs +++ b/tests-fuzz/src/generator/create_expr.rs @@ -511,7 +511,7 @@ mod tests { .unwrap(); let logical_table_serialized = serde_json::to_string(&logical_table_expr).unwrap(); - let logical_table_expected = r#"{"table_name":{"value":"impedit","quote_style":null},"columns":[{"name":{"value":"ts","quote_style":null},"column_type":{"Timestamp":{"Millisecond":null}},"options":["TimeIndex"]},{"name":{"value":"val","quote_style":null},"column_type":{"Float64":{}},"options":[]},{"name":{"value":"totam","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"cumque","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"natus","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"molestias","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"qui","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]}],"if_not_exists":false,"partition":null,"engine":"metric","options":{"on_physical_table":{"String":"expedita"}},"primary_keys":[4,2,3,6,5]}"#; + let logical_table_expected = r#"{"table_name":{"value":"impedit","quote_style":null},"columns":[{"name":{"value":"ts","quote_style":null},"column_type":{"Timestamp":{"Millisecond":null}},"options":["TimeIndex"]},{"name":{"value":"val","quote_style":null},"column_type":{"Float64":{}},"options":[]},{"name":{"value":"totam","quote_style":null},"column_type":{"String":{"size_type":"Utf8"}},"options":["PrimaryKey"]},{"name":{"value":"cumque","quote_style":null},"column_type":{"String":{"size_type":"Utf8"}},"options":["PrimaryKey"]},{"name":{"value":"natus","quote_style":null},"column_type":{"String":{"size_type":"Utf8"}},"options":["PrimaryKey"]},{"name":{"value":"molestias","quote_style":null},"column_type":{"String":{"size_type":"Utf8"}},"options":["PrimaryKey"]},{"name":{"value":"qui","quote_style":null},"column_type":{"String":{"size_type":"Utf8"}},"options":["PrimaryKey"]}],"if_not_exists":false,"partition":null,"engine":"metric","options":{"on_physical_table":{"String":"expedita"}},"primary_keys":[4,2,3,6,5]}"#; assert_eq!(logical_table_expected, logical_table_serialized); } diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 538392e4371a..6d333780c145 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -662,7 +662,7 @@ pub async fn test_http_sql_slow_query(store_type: StorageType) { let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "sql_api").await; let client = TestClient::new(app).await; - let slow_query = "WITH RECURSIVE slow_cte AS (SELECT 1 AS n, md5(CAST(random() AS STRING)) AS hash UNION ALL SELECT n + 1, md5(concat(hash, n)) FROM slow_cte WHERE n < 4500) SELECT COUNT(*) FROM slow_cte"; + let slow_query = "SELECT count(*) FROM generate_series(1, 1000000000)"; let encoded_slow_query = encode(slow_query); let query_params = format!("/v1/sql?sql={encoded_slow_query}"); @@ -1152,12 +1152,12 @@ pub async fn test_prom_http_api(store_type: StorageType) { // query `__name__` without match[] // create a physical table and a logical table let res = client - .get("/v1/sql?sql=create table physical_table (`ts` timestamp time index, message string) with ('physical_metric_table' = 'true');") + .get("/v1/sql?sql=create table physical_table (`ts` timestamp time index, `message` string) with ('physical_metric_table' = 'true');") .send() .await; assert_eq!(res.status(), StatusCode::OK, "{:?}", res.text().await); let res = client - .get("/v1/sql?sql=create table logic_table (`ts` timestamp time index, message string) with ('on_physical_table' = 'physical_table');") + .get("/v1/sql?sql=create table logic_table (`ts` timestamp time index, `message` string) with ('on_physical_table' = 'physical_table');") .send() .await; assert_eq!(res.status(), StatusCode::OK, "{:?}", res.text().await); @@ -5295,7 +5295,7 @@ pub async fn test_log_query(store_type: StorageType) { // prepare data with SQL API let res = client - .get("/v1/sql?sql=create table logs (`ts` timestamp time index, message string);") + .get("/v1/sql?sql=create table logs (`ts` timestamp time index, `message` string);") .send() .await; assert_eq!(res.status(), StatusCode::OK, "{:?}", res.text().await); diff --git a/tests-integration/tests/sql.rs b/tests-integration/tests/sql.rs index bacafc5380c2..c28347076a47 100644 --- a/tests-integration/tests/sql.rs +++ b/tests-integration/tests/sql.rs @@ -81,6 +81,7 @@ macro_rules! sql_tests { test_postgres_array_types, test_mysql_prepare_stmt_insert_timestamp, test_declare_fetch_close_cursor, + test_alter_update_on, ); )* }; @@ -520,6 +521,70 @@ pub async fn test_postgres_auth(store_type: StorageType) { guard.remove_all().await; } +pub async fn test_alter_update_on(store_type: StorageType) { + let (mut guard, fe_pg_server) = setup_pg_server(store_type, "test_postgres_crud").await; + let addr = fe_pg_server.bind_addr().unwrap().to_string(); + + let pool = PgPoolOptions::new() + .max_connections(2) + .connect(&format!("postgres://{addr}/public")) + .await + .unwrap(); + + sqlx::query( + "create table demo(i bigint, ts timestamp time index, d date, dt datetime, b blob)", + ) + .execute(&pool) + .await + .unwrap(); + + let row_before_alter = sqlx::query( + "SELECT * + FROM information_schema.tables WHERE table_name = $1;", + ) + .bind("demo") + .fetch_all(&pool) + .await + .unwrap(); + + assert_eq!(row_before_alter.len(), 1); + let before_row = &row_before_alter[0]; + + let created_on: NaiveDateTime = before_row.get("create_time"); + let updated_on_before: NaiveDateTime = before_row.get("update_time"); + assert_eq!(created_on, updated_on_before); + + std::thread::sleep(std::time::Duration::from_millis(1100)); + + sqlx::query("alter table demo add column j json;") + .execute(&pool) + .await + .unwrap(); + + let row_after_alter = sqlx::query( + "SELECT * + FROM information_schema.tables WHERE table_name = $1;", + ) + .bind("demo") + .fetch_all(&pool) + .await + .unwrap(); + + assert_eq!(row_after_alter.len(), 1); + let after_row = &row_after_alter[0]; + + let updated_on_after: NaiveDateTime = after_row.get("update_time"); + assert_ne!(updated_on_before, updated_on_after); + + let _ = sqlx::query("delete from demo") + .execute(&pool) + .await + .unwrap(); + + let _ = fe_pg_server.shutdown().await; + guard.remove_all().await; +} + pub async fn test_postgres_crud(store_type: StorageType) { let (mut guard, fe_pg_server) = setup_pg_server(store_type, "test_postgres_crud").await; let addr = fe_pg_server.bind_addr().unwrap().to_string(); @@ -643,7 +708,7 @@ pub async fn test_mysql_slow_query(store_type: StorageType) { .unwrap(); // The slow query will run at least longer than 1s. - let slow_query = "WITH RECURSIVE slow_cte AS (SELECT 1 AS n, md5(CAST(random() AS STRING)) AS hash UNION ALL SELECT n + 1, md5(concat(hash, n)) FROM slow_cte WHERE n < 4500) SELECT COUNT(*) FROM slow_cte"; + let slow_query = "SELECT count(*) FROM generate_series(1, 1000000000)"; // Simulate a slow query. sqlx::query(slow_query).fetch_all(&pool).await.unwrap(); @@ -758,7 +823,7 @@ pub async fn test_postgres_slow_query(store_type: StorageType) { .await .unwrap(); - let slow_query = "WITH RECURSIVE slow_cte AS (SELECT 1 AS n, md5(CAST(random() AS STRING)) AS hash UNION ALL SELECT n + 1, md5(concat(hash, n)) FROM slow_cte WHERE n < 4500) SELECT COUNT(*) FROM slow_cte"; + let slow_query = "SELECT count(*) FROM generate_series(1, 1000000000)"; let _ = sqlx::query(slow_query).fetch_all(&pool).await.unwrap(); // Wait for the slow query to be recorded. diff --git a/tests/cases/distributed/information_schema/cluster_info.result b/tests/cases/distributed/information_schema/cluster_info.result index 76875e564102..63d02f435526 100644 --- a/tests/cases/distributed/information_schema/cluster_info.result +++ b/tests/cases/distributed/information_schema/cluster_info.result @@ -4,21 +4,22 @@ Affected Rows: 0 DESC TABLE CLUSTER_INFO; -+--------------+----------------------+-----+------+---------+---------------+ -| Column | Type | Key | Null | Default | Semantic Type | -+--------------+----------------------+-----+------+---------+---------------+ -| peer_id | Int64 | | NO | | FIELD | -| peer_type | String | | NO | | FIELD | -| peer_addr | String | | YES | | FIELD | -| cpus | UInt32 | | NO | | FIELD | -| memory_bytes | UInt64 | | NO | | FIELD | -| version | String | | NO | | FIELD | -| git_commit | String | | NO | | FIELD | -| start_time | TimestampMillisecond | | YES | | FIELD | -| uptime | String | | YES | | FIELD | -| active_time | String | | YES | | FIELD | -| node_status | String | | YES | | FIELD | -+--------------+----------------------+-----+------+---------+---------------+ ++----------------------+----------------------+-----+------+---------+---------------+ +| Column | Type | Key | Null | Default | Semantic Type | ++----------------------+----------------------+-----+------+---------+---------------+ +| peer_id | Int64 | | NO | | FIELD | +| peer_type | String | | NO | | FIELD | +| peer_addr | String | | YES | | FIELD | +| peer_hostname | String | | YES | | FIELD | +| total_cpu_millicores | UInt32 | | NO | | FIELD | +| total_memory_bytes | UInt64 | | NO | | FIELD | +| version | String | | NO | | FIELD | +| git_commit | String | | NO | | FIELD | +| start_time | TimestampMillisecond | | YES | | FIELD | +| uptime | String | | YES | | FIELD | +| active_time | String | | YES | | FIELD | +| node_status | String | | YES | | FIELD | ++----------------------+----------------------+-----+------+---------+---------------+ -- SQLNESS REPLACE version node_version -- SQLNESS REPLACE (\s\d+\.\d+(?:\.\d+)+\s) Version @@ -86,18 +87,18 @@ SELECT peer_id, node_status FROM CLUSTER_INFO WHERE PEER_TYPE = 'DATANODE' ORDER | 2 | {"workloads"PLACEHOLDER,"leader_regions"PLACEHOLDER,"follower_regions"PLACEHOLDER} | +---------+------------------------------------------------------------------+ -SELECT peer_type, cpus!=0, memory_bytes!=0 FROM CLUSTER_INFO ORDER BY peer_type; - -+-----------+-------------------------------+---------------------------------------+ -| peer_type | cluster_info.cpus != Int64(0) | cluster_info.memory_bytes != Int64(0) | -+-----------+-------------------------------+---------------------------------------+ -| DATANODE | true | true | -| DATANODE | true | true | -| DATANODE | true | true | -| FLOWNODE | true | true | -| FRONTEND | true | true | -| METASRV | true | true | -+-----------+-------------------------------+---------------------------------------+ +SELECT peer_type, total_cpu_millicores!=0, total_memory_bytes!=0 FROM CLUSTER_INFO ORDER BY peer_type; + ++-----------+-----------------------------------------------+---------------------------------------------+ +| peer_type | cluster_info.total_cpu_millicores != Int64(0) | cluster_info.total_memory_bytes != Int64(0) | ++-----------+-----------------------------------------------+---------------------------------------------+ +| DATANODE | true | true | +| DATANODE | true | true | +| DATANODE | true | true | +| FLOWNODE | true | true | +| FRONTEND | true | true | +| METASRV | true | true | ++-----------+-----------------------------------------------+---------------------------------------------+ USE PUBLIC; diff --git a/tests/cases/distributed/information_schema/cluster_info.sql b/tests/cases/distributed/information_schema/cluster_info.sql index 1bd9490bc1f4..61822b94515e 100644 --- a/tests/cases/distributed/information_schema/cluster_info.sql +++ b/tests/cases/distributed/information_schema/cluster_info.sql @@ -50,6 +50,6 @@ SELECT peer_id, peer_type, peer_addr, version, git_commit, start_time, uptime, a -- SQLNESS REPLACE (:\s*(\".*?\"|\[.*?\]|\{.*?\}|[0-9]+|true|false|null)) PLACEHOLDER SELECT peer_id, node_status FROM CLUSTER_INFO WHERE PEER_TYPE = 'DATANODE' ORDER BY peer_id; -SELECT peer_type, cpus!=0, memory_bytes!=0 FROM CLUSTER_INFO ORDER BY peer_type; +SELECT peer_type, total_cpu_millicores!=0, total_memory_bytes!=0 FROM CLUSTER_INFO ORDER BY peer_type; USE PUBLIC; diff --git a/tests/cases/standalone/common/aggregate/corr.result b/tests/cases/standalone/common/aggregate/corr.result index 7099c7a85ed0..919639bc7b5c 100644 --- a/tests/cases/standalone/common/aggregate/corr.result +++ b/tests/cases/standalone/common/aggregate/corr.result @@ -10,13 +10,12 @@ SELECT corr(NULL,NULL); +-----------------+ -- Single value returns NULL --- FIXME(dennis): datafusion returns 0.0 here, should be NULL SELECT corr(1,1); +-------------------------+ | corr(Int64(1),Int64(1)) | +-------------------------+ -| 0.0 | +| | +-------------------------+ -- Test with table diff --git a/tests/cases/standalone/common/aggregate/corr.sql b/tests/cases/standalone/common/aggregate/corr.sql index d22715337a78..8c859fdddbec 100644 --- a/tests/cases/standalone/common/aggregate/corr.sql +++ b/tests/cases/standalone/common/aggregate/corr.sql @@ -5,7 +5,6 @@ SELECT corr(NULL,NULL); -- Single value returns NULL --- FIXME(dennis): datafusion returns 0.0 here, should be NULL SELECT corr(1,1); -- Test with table diff --git a/tests/cases/standalone/common/aggregate/stddev.result b/tests/cases/standalone/common/aggregate/stddev.result index 4cabcd313a1d..78948c67d8f4 100644 --- a/tests/cases/standalone/common/aggregate/stddev.result +++ b/tests/cases/standalone/common/aggregate/stddev.result @@ -10,55 +10,55 @@ Affected Rows: 6 SELECT stddev_samp(1); -+------------------+ -| stddev(Int64(1)) | -+------------------+ -| | -+------------------+ ++-----------------------+ +| stddev_samp(Int64(1)) | ++-----------------------+ +| | ++-----------------------+ SELECT var_samp(1); -+---------------+ -| var(Int64(1)) | -+---------------+ -| | -+---------------+ ++--------------------+ +| var_samp(Int64(1)) | ++--------------------+ +| | ++--------------------+ -- stddev_samp SELECT round(stddev_samp(val), 1) FROM stddev_test; -+-----------------------------------------+ -| round(stddev(stddev_test.val),Int64(1)) | -+-----------------------------------------+ -| 478.8 | -+-----------------------------------------+ ++----------------------------------------------+ +| round(stddev_samp(stddev_test.val),Int64(1)) | ++----------------------------------------------+ +| 478.8 | ++----------------------------------------------+ SELECT round(stddev_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL; -+-----------------------------------------+ -| round(stddev(stddev_test.val),Int64(1)) | -+-----------------------------------------+ -| 478.8 | -+-----------------------------------------+ ++----------------------------------------------+ +| round(stddev_samp(stddev_test.val),Int64(1)) | ++----------------------------------------------+ +| 478.8 | ++----------------------------------------------+ SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp; -+-----+----------------------+-----------------------------------------+----------------------+ -| grp | sum(stddev_test.val) | round(stddev(stddev_test.val),Int64(1)) | min(stddev_test.val) | -+-----+----------------------+-----------------------------------------+----------------------+ -| 1 | 85 | 0.7 | 42 | -| 2 | 1042 | 677.4 | 42 | -| 3 | | | | -+-----+----------------------+-----------------------------------------+----------------------+ ++-----+----------------------+----------------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(stddev_samp(stddev_test.val),Int64(1)) | min(stddev_test.val) | ++-----+----------------------+----------------------------------------------+----------------------+ +| 1 | 85 | 0.7 | 42 | +| 2 | 1042 | 677.4 | 42 | +| 3 | | | | ++-----+----------------------+----------------------------------------------+----------------------+ SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp; -+-----+----------------------+-----------------------------------------+----------------------+ -| grp | sum(stddev_test.val) | round(stddev(stddev_test.val),Int64(1)) | min(stddev_test.val) | -+-----+----------------------+-----------------------------------------+----------------------+ -| 1 | 85 | 0.7 | 42 | -| 2 | 1042 | 677.4 | 42 | -+-----+----------------------+-----------------------------------------+----------------------+ ++-----+----------------------+----------------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(stddev_samp(stddev_test.val),Int64(1)) | min(stddev_test.val) | ++-----+----------------------+----------------------------------------------+----------------------+ +| 1 | 85 | 0.7 | 42 | +| 2 | 1042 | 677.4 | 42 | ++-----+----------------------+----------------------------------------------+----------------------+ -- stddev_pop SELECT round(stddev_pop(val), 1) FROM stddev_test; @@ -99,38 +99,38 @@ SELECT grp, sum(val), round(stddev_pop(val), 1), min(val) FROM stddev_test WHERE -- var_samp SELECT round(var_samp(val), 1) FROM stddev_test; -+--------------------------------------+ -| round(var(stddev_test.val),Int64(1)) | -+--------------------------------------+ -| 229281.6 | -+--------------------------------------+ ++-------------------------------------------+ +| round(var_samp(stddev_test.val),Int64(1)) | ++-------------------------------------------+ +| 229281.6 | ++-------------------------------------------+ SELECT round(var_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL; -+--------------------------------------+ -| round(var(stddev_test.val),Int64(1)) | -+--------------------------------------+ -| 229281.6 | -+--------------------------------------+ ++-------------------------------------------+ +| round(var_samp(stddev_test.val),Int64(1)) | ++-------------------------------------------+ +| 229281.6 | ++-------------------------------------------+ SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp; -+-----+----------------------+--------------------------------------+----------------------+ -| grp | sum(stddev_test.val) | round(var(stddev_test.val),Int64(1)) | min(stddev_test.val) | -+-----+----------------------+--------------------------------------+----------------------+ -| 1 | 85 | 0.5 | 42 | -| 2 | 1042 | 458882.0 | 42 | -| 3 | | | | -+-----+----------------------+--------------------------------------+----------------------+ ++-----+----------------------+-------------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(var_samp(stddev_test.val),Int64(1)) | min(stddev_test.val) | ++-----+----------------------+-------------------------------------------+----------------------+ +| 1 | 85 | 0.5 | 42 | +| 2 | 1042 | 458882.0 | 42 | +| 3 | | | | ++-----+----------------------+-------------------------------------------+----------------------+ SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp; -+-----+----------------------+--------------------------------------+----------------------+ -| grp | sum(stddev_test.val) | round(var(stddev_test.val),Int64(1)) | min(stddev_test.val) | -+-----+----------------------+--------------------------------------+----------------------+ -| 1 | 85 | 0.5 | 42 | -| 2 | 1042 | 458882.0 | 42 | -+-----+----------------------+--------------------------------------+----------------------+ ++-----+----------------------+-------------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(var_samp(stddev_test.val),Int64(1)) | min(stddev_test.val) | ++-----+----------------------+-------------------------------------------+----------------------+ +| 1 | 85 | 0.5 | 42 | +| 2 | 1042 | 458882.0 | 42 | ++-----+----------------------+-------------------------------------------+----------------------+ -- var_pop SELECT round(var_pop(val), 1) FROM stddev_test; diff --git a/tests/cases/standalone/common/aggregate/string_agg.result b/tests/cases/standalone/common/aggregate/string_agg.result index 851d0d774430..a4e98dee18ed 100644 --- a/tests/cases/standalone/common/aggregate/string_agg.result +++ b/tests/cases/standalone/common/aggregate/string_agg.result @@ -33,12 +33,23 @@ Affected Rows: 9 SELECT g, STRING_AGG(x,'|') FROM strings GROUP BY g ORDER BY g; -Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1 ++---+---------------------------------+ +| g | string_agg(strings.x,Utf8("|")) | ++---+---------------------------------+ +| 1 | a|b | +| 2 | i|j | +| 3 | p | +| 4 | x|y|z | ++---+---------------------------------+ -- test agg on empty set SELECT STRING_AGG(x,',') FROM strings WHERE g > 100; -Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0 ++---------------------------------+ +| string_agg(strings.x,Utf8(",")) | ++---------------------------------+ +| | ++---------------------------------+ -- string_agg can be used instead of group_concat SELECT string_agg('a', ','); @@ -59,35 +70,75 @@ SELECT string_agg('a', ','); SELECT g, string_agg(x, ',') FROM strings GROUP BY g ORDER BY g; -Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1 ++---+---------------------------------+ +| g | string_agg(strings.x,Utf8(",")) | ++---+---------------------------------+ +| 1 | a,b | +| 2 | i,j | +| 3 | p | +| 4 | x,y,z | ++---+---------------------------------+ -- Test ORDER BY -- Single group SELECT STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings; -Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0 ++--------------------------------------------------------------------+---------------------------------------------------------------------+ +| string_agg(strings.x,Utf8("")) ORDER BY [strings.x ASC NULLS LAST] | string_agg(strings.x,Utf8("|")) ORDER BY [strings.x ASC NULLS LAST] | ++--------------------------------------------------------------------+---------------------------------------------------------------------+ +| abijpxyz | a|b|i|j|p|x|y|z | ++--------------------------------------------------------------------+---------------------------------------------------------------------+ SELECT STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings; -Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0 ++----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| string_agg(strings.x,Utf8("")) ORDER BY [strings.x DESC NULLS FIRST] | string_agg(strings.x,Utf8("|")) ORDER BY [strings.x DESC NULLS FIRST] | ++----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| zyxpjiba | z|y|x|p|j|i|b|a | ++----------------------------------------------------------------------+-----------------------------------------------------------------------+ -- Grouped with ORDER BY SELECT g, STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings GROUP BY g ORDER BY g; -Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1 ++---+--------------------------------------------------------------------+---------------------------------------------------------------------+ +| g | string_agg(strings.x,Utf8("")) ORDER BY [strings.x ASC NULLS LAST] | string_agg(strings.x,Utf8("|")) ORDER BY [strings.x ASC NULLS LAST] | ++---+--------------------------------------------------------------------+---------------------------------------------------------------------+ +| 1 | ab | a|b | +| 2 | ij | i|j | +| 3 | p | p | +| 4 | xyz | x|y|z | ++---+--------------------------------------------------------------------+---------------------------------------------------------------------+ SELECT g, STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings GROUP BY g ORDER BY g; -Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1 ++---+----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| g | string_agg(strings.x,Utf8("")) ORDER BY [strings.x DESC NULLS FIRST] | string_agg(strings.x,Utf8("|")) ORDER BY [strings.x DESC NULLS FIRST] | ++---+----------------------------------------------------------------------+-----------------------------------------------------------------------+ +| 1 | ba | b|a | +| 2 | ji | j|i | +| 3 | p | p | +| 4 | zyx | z|y|x | ++---+----------------------------------------------------------------------+-----------------------------------------------------------------------+ -- Test with DISTINCT SELECT STRING_AGG(DISTINCT x, '' ORDER BY x), STRING_AGG(DISTINCT x, '|' ORDER BY x) FROM strings; -Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0 ++-----------------------------------------------------------------------------+------------------------------------------------------------------------------+ +| string_agg(DISTINCT strings.x,Utf8("")) ORDER BY [strings.x ASC NULLS LAST] | string_agg(DISTINCT strings.x,Utf8("|")) ORDER BY [strings.x ASC NULLS LAST] | ++-----------------------------------------------------------------------------+------------------------------------------------------------------------------+ +| abijpxyz | a|b|i|j|p|x|y|z | ++-----------------------------------------------------------------------------+------------------------------------------------------------------------------+ SELECT g, STRING_AGG(DISTINCT x, '' ORDER BY x) FROM strings GROUP BY g ORDER BY g; -Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1 ++---+-----------------------------------------------------------------------------+ +| g | string_agg(DISTINCT strings.x,Utf8("")) ORDER BY [strings.x ASC NULLS LAST] | ++---+-----------------------------------------------------------------------------+ +| 1 | ab | +| 2 | ij | +| 3 | p | +| 4 | xyz | ++---+-----------------------------------------------------------------------------+ -- cleanup DROP TABLE strings; diff --git a/tests/cases/standalone/common/alter/change_col_skipping_options.result b/tests/cases/standalone/common/alter/change_col_skipping_options.result index b3b90e2359c7..1b2895bd09d5 100644 --- a/tests/cases/standalone/common/alter/change_col_skipping_options.result +++ b/tests/cases/standalone/common/alter/change_col_skipping_options.result @@ -323,7 +323,7 @@ Error: 1002(Unexpected), Invalid skipping index option: Invalid false positive r ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = -0.01); -Error: 1004(InvalidArguments), Unrecognized table option key: false_positive_rate, value: -0.01 +Error: 1004(InvalidArguments), Invalid expr as option value, error: -0.01 not accepted ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = 2); diff --git a/tests/cases/standalone/common/alter/repartition.result b/tests/cases/standalone/common/alter/repartition.result new file mode 100644 index 000000000000..e31832463180 --- /dev/null +++ b/tests/cases/standalone/common/alter/repartition.result @@ -0,0 +1,48 @@ +CREATE TABLE alter_repartition_table( + device_id INT, + area STRING, + ty STRING, + ts TIMESTAMP TIME INDEX, + PRIMARY KEY(device_id) +) PARTITION ON COLUMNS (device_id, area) ( + device_id < 100, + device_id >= 100 AND device_id < 200, + device_id >= 200 +); + +Affected Rows: 0 + +-- valid grammar, currently not implemented +ALTER TABLE alter_repartition_table REPARTITION ( + device_id < 100 +) INTO ( + device_id < 100 AND area < 'South', + device_id < 100 AND area >= 'South' +); + +Error: 1001(Unsupported), Not supported: ALTER TABLE REPARTITION + +-- invalid: empty source clause +ALTER TABLE alter_repartition_table REPARTITION () INTO ( + device_id < 100 +); + +Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: Expected expression inside REPARTITION clause, found: ) + +-- invalid: more than one INTO clause +ALTER TABLE alter_repartition_table REPARTITION ( + device_id < 100 +) INTO ( + device_id < 50 +), ( + device_id >= 50 +) INTO ( + device_id >= 50 +); + +Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: Expected end of REPARTITION clause, found: , + +DROP TABLE alter_repartition_table; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/alter/repartition.sql b/tests/cases/standalone/common/alter/repartition.sql new file mode 100644 index 000000000000..64010b222a61 --- /dev/null +++ b/tests/cases/standalone/common/alter/repartition.sql @@ -0,0 +1,37 @@ +CREATE TABLE alter_repartition_table( + device_id INT, + area STRING, + ty STRING, + ts TIMESTAMP TIME INDEX, + PRIMARY KEY(device_id) +) PARTITION ON COLUMNS (device_id, area) ( + device_id < 100, + device_id >= 100 AND device_id < 200, + device_id >= 200 +); + +-- valid grammar, currently not implemented +ALTER TABLE alter_repartition_table REPARTITION ( + device_id < 100 +) INTO ( + device_id < 100 AND area < 'South', + device_id < 100 AND area >= 'South' +); + +-- invalid: empty source clause +ALTER TABLE alter_repartition_table REPARTITION () INTO ( + device_id < 100 +); + +-- invalid: more than one INTO clause +ALTER TABLE alter_repartition_table REPARTITION ( + device_id < 100 +) INTO ( + device_id < 50 +), ( + device_id >= 50 +) INTO ( + device_id >= 50 +); + +DROP TABLE alter_repartition_table; diff --git a/tests/cases/standalone/common/create/create_metric_table.result b/tests/cases/standalone/common/create/create_metric_table.result index 2895105a9e6c..55f1525a533b 100644 --- a/tests/cases/standalone/common/create/create_metric_table.result +++ b/tests/cases/standalone/common/create/create_metric_table.result @@ -36,12 +36,12 @@ Affected Rows: 0 -- create logical table with different data type on field column CREATE TABLE t3 (ts timestamp time index, val string, host string, primary key (host)) engine=metric with ("on_physical_table" = "phy"); -Error: 1004(InvalidArguments), Column type mismatch. Expect Float64(Float64Type), got String(StringType) +Error: 1004(InvalidArguments), Column type mismatch. Expect Float64(Float64Type), got String(StringType { size_type: Utf8 }) -- create logical table with different data type on tag column CREATE TABLE t4 (ts timestamp time index, val double, host double, primary key (host)) engine=metric with ("on_physical_table" = "phy"); -Error: 1004(InvalidArguments), Column type mismatch. Expect String(StringType), got Float64(Float64Type) +Error: 1004(InvalidArguments), Column type mismatch. Expect String(StringType { size_type: Utf8 }), got Float64(Float64Type) -- create logical table with different column name on field column CREATE TABLE t5 (ts timestamp time index, valval double, host string primary key) engine = metric with ("on_physical_table" = "phy"); diff --git a/tests/cases/standalone/common/error/incorrect_sql.result b/tests/cases/standalone/common/error/incorrect_sql.result index 5069376ed694..d8511ad6df55 100644 --- a/tests/cases/standalone/common/error/incorrect_sql.result +++ b/tests/cases/standalone/common/error/incorrect_sql.result @@ -25,7 +25,7 @@ Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Execution err -- No matching function signature SELECT cos(0, 1, 2, 3); -Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Failed to coerce arguments to satisfy a call to 'cos' function: coercion from [Int64, Int64, Int64, Int64] to the signature Uniform(1, [Float64, Float32]) failed No function matches the given name and argument types 'cos(Int64, Int64, Int64, Int64)'. You might need to add explicit type casts. +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Failed to coerce arguments to satisfy a call to 'cos' function: coercion from Int64, Int64, Int64, Int64 to the signature Uniform(1, [Float64, Float32]) failed No function matches the given name and argument types 'cos(Int64, Int64, Int64, Int64)'. You might need to add explicit type casts. Candidate functions: cos(Float64/Float32) diff --git a/tests/cases/standalone/common/function/arithmetic.result b/tests/cases/standalone/common/function/arithmetic.result index 01d2c7e06296..c1f6dcc387fe 100644 --- a/tests/cases/standalone/common/function/arithmetic.result +++ b/tests/cases/standalone/common/function/arithmetic.result @@ -28,27 +28,27 @@ Error: 3001(EngineExecuteQuery), Divide by zero error SELECT POW (2, 5); -+--------------------------+ -| power(Int64(2),Int64(5)) | -+--------------------------+ -| 32 | -+--------------------------+ ++------------------------+ +| pow(Int64(2),Int64(5)) | ++------------------------+ +| 32 | ++------------------------+ SELECT POW (1.01, 365); -+---------------------------------+ -| power(Float64(1.01),Int64(365)) | -+---------------------------------+ -| 37.78343433288728 | -+---------------------------------+ ++-------------------------------+ +| pow(Float64(1.01),Int64(365)) | ++-------------------------------+ +| 37.78343433288728 | ++-------------------------------+ SELECT POW (0.99, 365); -+---------------------------------+ -| power(Float64(0.99),Int64(365)) | -+---------------------------------+ -| 0.025517964452291125 | -+---------------------------------+ ++-------------------------------+ +| pow(Float64(0.99),Int64(365)) | ++-------------------------------+ +| 0.025517964452291125 | ++-------------------------------+ SELECT CLAMP(10, 0, 1); diff --git a/tests/cases/standalone/common/insert/insert_invalid.result b/tests/cases/standalone/common/insert/insert_invalid.result index af74e727785f..88123a3af694 100644 --- a/tests/cases/standalone/common/insert/insert_invalid.result +++ b/tests/cases/standalone/common/insert/insert_invalid.result @@ -8,7 +8,7 @@ Affected Rows: 1 INSERT INTO strings VALUES (3, 4); -Error: 2000(InvalidSyntax), Failed to parse value: Fail to parse number 3, invalid column type: String(StringType) +Error: 2000(InvalidSyntax), Failed to parse value: Fail to parse number 3, invalid column type: String(StringType { size_type: Utf8 }) SELECT * FROM strings WHERE i = 'â‚('; diff --git a/tests/cases/standalone/common/order/limit.result b/tests/cases/standalone/common/order/limit.result index 059cc9706aed..e830a3ea4b78 100644 --- a/tests/cases/standalone/common/order/limit.result +++ b/tests/cases/standalone/common/order/limit.result @@ -49,7 +49,7 @@ Error: 1001(Unsupported), This feature is not implemented: Unsupported LIMIT exp SELECT a FROM test LIMIT row_number() OVER (); -Error: 3001(EngineExecuteQuery), This feature is not implemented: Unsupported LIMIT expression: Some(Cast(Cast { expr: WindowFunction(WindowFunction { fun: WindowUDF(WindowUDF { inner: RowNumber { signature: Signature { type_signature: Nullary, volatility: Immutable } } }), params: WindowFunctionParams { args: [], partition_by: [], order_by: [], window_frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }, null_treatment: None, distinct: false } }), data_type: Int64 })) +Error: 3001(EngineExecuteQuery), This feature is not implemented: Unsupported LIMIT expression: Some(Cast(Cast { expr: WindowFunction(WindowFunction { fun: WindowUDF(WindowUDF { inner: RowNumber { signature: Signature { type_signature: Nullary, volatility: Immutable } } }), params: WindowFunctionParams { args: [], partition_by: [], order_by: [], window_frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }, filter: None, null_treatment: None, distinct: false } }), data_type: Int64 })) CREATE TABLE test2 (a STRING, ts TIMESTAMP TIME INDEX); diff --git a/tests/cases/standalone/common/range/by.result b/tests/cases/standalone/common/range/by.result index 0f876eb02473..0fd95ee2f331 100644 --- a/tests/cases/standalone/common/range/by.result +++ b/tests/cases/standalone/common/range/by.result @@ -52,12 +52,12 @@ SELECT ts, max(val) RANGE '5s' FROM host ALIGN '20s' BY () ORDER BY ts; SELECT ts, length(host)::INT64 + 2, max(val) RANGE '5s' FROM host ALIGN '20s' BY (length(host)::INT64 + 2) ORDER BY ts; -+---------------------+------------------------------------------------------------------+------------------------+ -| ts | arrow_cast(character_length(host.host),Utf8("Int64")) + Int64(2) | max(host.val) RANGE 5s | -+---------------------+------------------------------------------------------------------+------------------------+ -| 1970-01-01T00:00:00 | 7 | 3 | -| 1970-01-01T00:00:20 | 7 | 5 | -+---------------------+------------------------------------------------------------------+------------------------+ ++---------------------+--------------------------------------------------------+------------------------+ +| ts | arrow_cast(length(host.host),Utf8("Int64")) + Int64(2) | max(host.val) RANGE 5s | ++---------------------+--------------------------------------------------------+------------------------+ +| 1970-01-01T00:00:00 | 7 | 3 | +| 1970-01-01T00:00:20 | 7 | 5 | ++---------------------+--------------------------------------------------------+------------------------+ -- Test error -- project non-aggregation key diff --git a/tests/cases/standalone/common/range/calculate.result b/tests/cases/standalone/common/range/calculate.result index 6ac21a935265..f27cbef398e1 100644 --- a/tests/cases/standalone/common/range/calculate.result +++ b/tests/cases/standalone/common/range/calculate.result @@ -188,22 +188,22 @@ SELECT ts, host, floor(cos(ceil(sin(min(val) RANGE '5s')))) FROM host ALIGN '5s' SELECT ts, host, gcd(CAST(max(floor(val::DOUBLE)) RANGE '10s' FILL PREV as INT64) * 4, max(val * 4) RANGE '10s' FILL PREV) * length(host) + 1 FROM host ALIGN '5s' ORDER BY host, ts; -+---------------------+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| ts | host | gcd(arrow_cast(max(floor(host.val)) RANGE 10s FILL PREV,Utf8("Int64")) * Int64(4),max(host.val * Int64(4)) RANGE 10s FILL PREV) * character_length(host.host) + Int64(1) | -+---------------------+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| 1969-12-31T23:59:55 | host1 | 1 | -| 1970-01-01T00:00:00 | host1 | 1 | -| 1970-01-01T00:00:05 | host1 | 21 | -| 1970-01-01T00:00:10 | host1 | 21 | -| 1970-01-01T00:00:15 | host1 | 41 | -| 1970-01-01T00:00:20 | host1 | 41 | -| 1969-12-31T23:59:55 | host2 | 61 | -| 1970-01-01T00:00:00 | host2 | 61 | -| 1970-01-01T00:00:05 | host2 | 81 | -| 1970-01-01T00:00:10 | host2 | 81 | -| 1970-01-01T00:00:15 | host2 | 101 | -| 1970-01-01T00:00:20 | host2 | 101 | -+---------------------+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------------+-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ts | host | gcd(arrow_cast(max(floor(host.val)) RANGE 10s FILL PREV,Utf8("Int64")) * Int64(4),max(host.val * Int64(4)) RANGE 10s FILL PREV) * length(host.host) + Int64(1) | ++---------------------+-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| 1969-12-31T23:59:55 | host1 | 1 | +| 1970-01-01T00:00:00 | host1 | 1 | +| 1970-01-01T00:00:05 | host1 | 21 | +| 1970-01-01T00:00:10 | host1 | 21 | +| 1970-01-01T00:00:15 | host1 | 41 | +| 1970-01-01T00:00:20 | host1 | 41 | +| 1969-12-31T23:59:55 | host2 | 61 | +| 1970-01-01T00:00:00 | host2 | 61 | +| 1970-01-01T00:00:05 | host2 | 81 | +| 1970-01-01T00:00:10 | host2 | 81 | +| 1970-01-01T00:00:15 | host2 | 101 | +| 1970-01-01T00:00:20 | host2 | 101 | ++---------------------+-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ DROP TABLE host; diff --git a/tests/cases/standalone/common/show/show_databases_tables.result b/tests/cases/standalone/common/show/show_databases_tables.result index bcebad2b7920..97e858856982 100644 --- a/tests/cases/standalone/common/show/show_databases_tables.result +++ b/tests/cases/standalone/common/show/show_databases_tables.result @@ -123,42 +123,42 @@ SHOW TABLE STATUS; +++++++++++++++++++ |Name|Engine|Version|Row_format|Rows|Avg_row_length|Data_length|Max_data_length|Index_length|Data_free|Auto_increment|Create_time|Update_time|Check_time|Collation|Checksum|Create_options|Comment| +++++++++++++++++++ -|build_info||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|character_sets||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|check_constraints||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|cluster_info||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|collation_character_set_applicability||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|collations||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|column_privileges||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|column_statistics||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|columns||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|engines||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|events||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|files||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|flows||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|global_status||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|key_column_usage||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|optimizer_trace||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|parameters||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|partitions||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|procedure_info||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|process_list||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|profiling||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|referential_constraints||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|region_peers||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|region_statistics||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|routines||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|runtime_metrics||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|schema_privileges||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|schemata||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|session_status||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|ssts_manifest||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|ssts_storage||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|table_constraints||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|table_privileges||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|tables||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|triggers||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| -|views||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| +|build_info||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|character_sets||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|check_constraints||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|cluster_info||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|collation_character_set_applicability||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|collations||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|column_privileges||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|column_statistics||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|columns||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|engines||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|events||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|files||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|flows||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|global_status||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|key_column_usage||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|optimizer_trace||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|parameters||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|partitions||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|procedure_info||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|process_list||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|profiling||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|referential_constraints||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|region_peers||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|region_statistics||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|routines||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|runtime_metrics||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|schema_privileges||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|schemata||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|session_status||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|ssts_manifest||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|ssts_storage||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|table_constraints||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|table_privileges||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|tables||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|triggers||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +|views||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +++++++++++++++++++ -- SQLNESS REPLACE (\s[\-0-9T:\.]{15,}) DATETIME @@ -168,7 +168,7 @@ SHOW TABLE STATUS LIKE 'tables'; +++++++++++++++++++ |Name|Engine|Version|Row_format|Rows|Avg_row_length|Data_length|Max_data_length|Index_length|Data_free|Auto_increment|Create_time|Update_time|Check_time|Collation|Checksum|Create_options|Comment| +++++++++++++++++++ -|tables||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| +|tables||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +++++++++++++++++++ -- SQLNESS REPLACE (\s[\-0-9T:\.]{15,}) DATETIME @@ -178,7 +178,7 @@ SHOW TABLE STATUS WHERE Name = 'tables'; +++++++++++++++++++ |Name|Engine|Version|Row_format|Rows|Avg_row_length|Data_length|Max_data_length|Index_length|Data_free|Auto_increment|Create_time|Update_time|Check_time|Collation|Checksum|Create_options|Comment| +++++++++++++++++++ -|tables||11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| +|tables||11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +++++++++++++++++++ -- SQLNESS REPLACE (\s[\-0-9T:\.]{15,}) DATETIME @@ -188,7 +188,7 @@ SHOW TABLE STATUS from public; +++++++++++++++++++ |Name|Engine|Version|Row_format|Rows|Avg_row_length|Data_length|Max_data_length|Index_length|Data_free|Auto_increment|Create_time|Update_time|Check_time|Collation|Checksum|Create_options|Comment| +++++++++++++++++++ -|numbers|test_engine|11|Fixed|0|0|0|0|0|0|0|DATETIME|||utf8_bin|0||| +|numbers|test_engine|11|Fixed|0|0|0|0|0|0|0|DATETIME|DATETIME||utf8_bin|0||| +++++++++++++++++++ USE public; diff --git a/tests/cases/standalone/common/system/information_schema.result b/tests/cases/standalone/common/system/information_schema.result index 8d24c33b77bc..7da0520b3eb7 100644 --- a/tests/cases/standalone/common/system/information_schema.result +++ b/tests/cases/standalone/common/system/information_schema.result @@ -14,43 +14,43 @@ order by table_schema, table_name; +++++++++++++++++++++++++ |table_catalog|table_schema|table_name|table_type|table_id|data_length|max_data_length|index_length|max_index_length|avg_row_length|engine|version|row_format|table_rows|data_free|auto_increment|create_time|update_time|check_time|table_collation|checksum|create_options|table_comment|temporary| +++++++++++++++++++++++++ -|greptime|information_schema|build_info|LOCALTEMPORARY|8|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|character_sets|LOCALTEMPORARY|9|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|check_constraints|LOCALTEMPORARY|12|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|cluster_info|LOCALTEMPORARY|31|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|collation_character_set_applicability|LOCALTEMPORARY|11|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|collations|LOCALTEMPORARY|10|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|column_privileges|LOCALTEMPORARY|6|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|column_statistics|LOCALTEMPORARY|7|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|columns|LOCALTEMPORARY|4|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|engines|LOCALTEMPORARY|5|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|events|LOCALTEMPORARY|13|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|files|LOCALTEMPORARY|14|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|flows|LOCALTEMPORARY|33|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|global_status|LOCALTEMPORARY|25|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|key_column_usage|LOCALTEMPORARY|16|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|optimizer_trace|LOCALTEMPORARY|17|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|parameters|LOCALTEMPORARY|18|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|partitions|LOCALTEMPORARY|28|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|procedure_info|LOCALTEMPORARY|34|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|process_list|LOCALTEMPORARY|36|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|profiling|LOCALTEMPORARY|19|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|referential_constraints|LOCALTEMPORARY|20|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|region_peers|LOCALTEMPORARY|29|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|region_statistics|LOCALTEMPORARY|35|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|routines|LOCALTEMPORARY|21|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|runtime_metrics|LOCALTEMPORARY|27|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|schema_privileges|LOCALTEMPORARY|22|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|schemata|LOCALTEMPORARY|15|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|session_status|LOCALTEMPORARY|26|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|ssts_manifest|LOCALTEMPORARY|37|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|ssts_storage|LOCALTEMPORARY|38|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|table_constraints|LOCALTEMPORARY|30|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|table_privileges|LOCALTEMPORARY|23|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|tables|LOCALTEMPORARY|3|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|triggers|LOCALTEMPORARY|24|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|information_schema|views|LOCALTEMPORARY|32|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| -|greptime|public|numbers|LOCALTEMPORARY|2|0|0|0|0|0|test_engine|11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y| +|greptime|information_schema|build_info|LOCALTEMPORARY|8|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|character_sets|LOCALTEMPORARY|9|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|check_constraints|LOCALTEMPORARY|12|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|cluster_info|LOCALTEMPORARY|31|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|collation_character_set_applicability|LOCALTEMPORARY|11|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|collations|LOCALTEMPORARY|10|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|column_privileges|LOCALTEMPORARY|6|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|column_statistics|LOCALTEMPORARY|7|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|columns|LOCALTEMPORARY|4|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|engines|LOCALTEMPORARY|5|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|events|LOCALTEMPORARY|13|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|files|LOCALTEMPORARY|14|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|flows|LOCALTEMPORARY|33|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|global_status|LOCALTEMPORARY|25|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|key_column_usage|LOCALTEMPORARY|16|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|optimizer_trace|LOCALTEMPORARY|17|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|parameters|LOCALTEMPORARY|18|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|partitions|LOCALTEMPORARY|28|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|procedure_info|LOCALTEMPORARY|34|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|process_list|LOCALTEMPORARY|36|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|profiling|LOCALTEMPORARY|19|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|referential_constraints|LOCALTEMPORARY|20|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|region_peers|LOCALTEMPORARY|29|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|region_statistics|LOCALTEMPORARY|35|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|routines|LOCALTEMPORARY|21|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|runtime_metrics|LOCALTEMPORARY|27|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|schema_privileges|LOCALTEMPORARY|22|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|schemata|LOCALTEMPORARY|15|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|session_status|LOCALTEMPORARY|26|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|ssts_manifest|LOCALTEMPORARY|37|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|ssts_storage|LOCALTEMPORARY|38|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|table_constraints|LOCALTEMPORARY|30|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|table_privileges|LOCALTEMPORARY|23|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|tables|LOCALTEMPORARY|3|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|triggers|LOCALTEMPORARY|24|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|information_schema|views|LOCALTEMPORARY|32|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +|greptime|public|numbers|LOCALTEMPORARY|2|0|0|0|0|0|test_engine|11|Fixed|0|0|0|DATETIME|DATETIME||utf8_bin|0|||Y| +++++++++++++++++++++++++ select * from information_schema.columns order by table_schema, table_name, column_name; @@ -71,17 +71,18 @@ select * from information_schema.columns order by table_schema, table_name, colu | greptime | information_schema | check_constraints | constraint_catalog | 1 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | check_constraints | constraint_name | 3 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | check_constraints | constraint_schema | 2 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | -| greptime | information_schema | cluster_info | active_time | 10 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | -| greptime | information_schema | cluster_info | cpus | 4 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | | -| greptime | information_schema | cluster_info | git_commit | 7 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | -| greptime | information_schema | cluster_info | memory_bytes | 5 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | No | bigint unsigned | | | -| greptime | information_schema | cluster_info | node_status | 11 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | +| greptime | information_schema | cluster_info | active_time | 11 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | +| greptime | information_schema | cluster_info | git_commit | 8 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | +| greptime | information_schema | cluster_info | node_status | 12 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | | greptime | information_schema | cluster_info | peer_addr | 3 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | +| greptime | information_schema | cluster_info | peer_hostname | 4 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | | greptime | information_schema | cluster_info | peer_id | 1 | | | 19 | 0 | | | | | | select,insert | | Int64 | bigint | FIELD | | No | bigint | | | | greptime | information_schema | cluster_info | peer_type | 2 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | -| greptime | information_schema | cluster_info | start_time | 8 | | | | | 3 | | | | | select,insert | | TimestampMillisecond | timestamp(3) | FIELD | | Yes | timestamp(3) | | | -| greptime | information_schema | cluster_info | uptime | 9 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | -| greptime | information_schema | cluster_info | version | 6 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | +| greptime | information_schema | cluster_info | start_time | 9 | | | | | 3 | | | | | select,insert | | TimestampMillisecond | timestamp(3) | FIELD | | Yes | timestamp(3) | | | +| greptime | information_schema | cluster_info | total_cpu_millicores | 5 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | | +| greptime | information_schema | cluster_info | total_memory_bytes | 6 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | No | bigint unsigned | | | +| greptime | information_schema | cluster_info | uptime | 10 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | +| greptime | information_schema | cluster_info | version | 7 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | collation_character_set_applicability | character_set_name | 2 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | collation_character_set_applicability | collation_name | 1 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | collations | character_set_name | 2 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | diff --git a/tests/cases/standalone/common/system/pg_catalog.result b/tests/cases/standalone/common/system/pg_catalog.result index d6530a78d294..0aa7f1cc7e06 100644 --- a/tests/cases/standalone/common/system/pg_catalog.result +++ b/tests/cases/standalone/common/system/pg_catalog.result @@ -712,10 +712,10 @@ select * from pg_catalog.pg_type order by oid; -- SQLNESS REPLACE (\d+\s*) OID select * from pg_catalog.pg_database where datname = 'public'; -+-----+---------+--------+----------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------+ -| oid | datname | datdba | encoding | datcollate | datctype | datistemplate | datallowconn | datconnlimit | datlastsysoid | datfrozenxid | datminmxid | dattablespace | datacl | -+-----+---------+--------+----------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------+ -+-----+---------+--------+----------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------+ ++-----+---------+--------+----------+----------------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------------+-------------+--------+ +| oid | datname | datdba | encoding | datlocprovider | datcollate | datctype | datistemplate | datallowconn | datconnlimit | datlastsysoid | datfrozenxid | datminmxid | dattablespace | daticulocale | daticurules | datacl | ++-----+---------+--------+----------+----------------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------------+-------------+--------+ ++-----+---------+--------+----------+----------------+------------+----------+---------------+--------------+--------------+---------------+--------------+------------+---------------+--------------+-------------+--------+ -- \d -- SQLNESS PROTOCOL POSTGRES diff --git a/tests/cases/standalone/common/tql-explain-analyze/explain.result b/tests/cases/standalone/common/tql-explain-analyze/explain.result index 33cd57b3279d..846086bf67c6 100644 --- a/tests/cases/standalone/common/tql-explain-analyze/explain.result +++ b/tests/cases/standalone/common/tql-explain-analyze/explain.result @@ -184,6 +184,7 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; | physical_plan after OutputRequirements_| MergeScanExec: REDACTED |_|_| | physical_plan after LimitAggregation_| SAME TEXT AS ABOVE_| +| physical_plan after LimitPushPastWindows_| SAME TEXT AS ABOVE_| | physical_plan after LimitPushdown_| SAME TEXT AS ABOVE_| | physical_plan after ProjectionPushdown_| SAME TEXT AS ABOVE_| | physical_plan after EnsureCooperative_| CooperativeExec_| @@ -321,6 +322,7 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test AS series; | physical_plan after OutputRequirements_| MergeScanExec: REDACTED |_|_| | physical_plan after LimitAggregation_| SAME TEXT AS ABOVE_| +| physical_plan after LimitPushPastWindows_| SAME TEXT AS ABOVE_| | physical_plan after LimitPushdown_| SAME TEXT AS ABOVE_| | physical_plan after ProjectionPushdown_| SAME TEXT AS ABOVE_| | physical_plan after EnsureCooperative_| CooperativeExec_| diff --git a/tests/cases/standalone/common/tql/tql-cte.result b/tests/cases/standalone/common/tql/tql-cte.result index 76e4b511dbd8..7127f79d9f89 100644 --- a/tests/cases/standalone/common/tql/tql-cte.result +++ b/tests/cases/standalone/common/tql/tql-cte.result @@ -775,8 +775,8 @@ LIMIT 5; | | SubqueryAlias: l | | | TableScan: labels | | | ]] | -| physical_plan | SortPreservingMergeExec: [ts@0 ASC NULLS LAST, host@2 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], fetch=5 | -| | SortExec: TopK(fetch=5), expr=[ts@0 ASC NULLS LAST, host@2 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], preserve_REDACTED +| physical_plan | SortPreservingMergeExec: [ts@0 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], fetch=5 | +| | SortExec: TopK(fetch=5), expr=[ts@0 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], preserve_REDACTED | | ProjectionExec: expr=[ts@0 as ts, cpu@1 as avg_value, host@2 as host] | | | CoalesceBatchesExec: target_batch_size=8192 | | | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(date_trunc(Utf8("second"),t.ts)@2, date_trunc(Utf8("second"),l.ts)@2)], projection=[ts@0, cpu@1, host@4] | @@ -861,8 +861,8 @@ LIMIT 5; | | SubqueryAlias: l | | | TableScan: labels | | | ]] | -| physical_plan | SortPreservingMergeExec: [ts@0 ASC NULLS LAST, host@2 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], fetch=5 | -| | SortExec: TopK(fetch=5), expr=[ts@0 ASC NULLS LAST, host@2 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], preserve_REDACTED +| physical_plan | SortPreservingMergeExec: [ts@0 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], fetch=5 | +| | SortExec: TopK(fetch=5), expr=[ts@0 ASC NULLS LAST, avg_value@1 ASC NULLS LAST], preserve_REDACTED | | ProjectionExec: expr=[ts@1 as ts, cpu@0 as avg_value, host@2 as host] | | | CoalesceBatchesExec: target_batch_size=8192 | | | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(date_trunc(Utf8("second"),t.ts)@2, date_trunc(Utf8("second"),l.ts)@2)], projection=[cpu@0, ts@1, host@4] | diff --git a/tests/cases/standalone/common/types/string/bigstring.result b/tests/cases/standalone/common/types/string/bigstring.result index 725f2dd659bb..a749e7538b9b 100644 --- a/tests/cases/standalone/common/types/string/bigstring.result +++ b/tests/cases/standalone/common/types/string/bigstring.result @@ -23,14 +23,14 @@ Affected Rows: 1 SELECT LENGTH(a) FROM test ORDER BY 1; -+--------------------------+ -| character_length(test.a) | -+--------------------------+ -| 10 | -| 100 | -| 1000 | -| 10000 | -+--------------------------+ ++----------------+ +| length(test.a) | ++----------------+ +| 10 | +| 100 | +| 1000 | +| 10000 | ++----------------+ DROP TABLE test; diff --git a/tests/cases/standalone/common/types/string/scan_big_varchar.result b/tests/cases/standalone/common/types/string/scan_big_varchar.result index 4d9261d1162a..374c8662bdb9 100644 --- a/tests/cases/standalone/common/types/string/scan_big_varchar.result +++ b/tests/cases/standalone/common/types/string/scan_big_varchar.result @@ -33,11 +33,11 @@ Affected Rows: 1 -- verify that the append worked SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 1 | 1 | 10000 | 10000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 1 | 1 | 10000 | 10000 | ++----------+-------------------+-------------------------+-------------------------+ -- we create a total of 16K entries in the big table -- the total size of this table is 16K*10K = 160MB @@ -48,11 +48,11 @@ Affected Rows: 1 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 2 | 2 | 10000 | 20000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 2 | 2 | 10000 | 20000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 23 FROM bigtable; @@ -60,11 +60,11 @@ Affected Rows: 2 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 4 | 4 | 10000 | 40000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 4 | 4 | 10000 | 40000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 31 FROM bigtable; @@ -72,11 +72,11 @@ Affected Rows: 4 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 8 | 8 | 10000 | 80000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 8 | 8 | 10000 | 80000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 37 FROM bigtable; @@ -84,11 +84,11 @@ Affected Rows: 8 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 16 | 16 | 10000 | 160000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 16 | 16 | 10000 | 160000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 41 FROM bigtable; @@ -96,11 +96,11 @@ Affected Rows: 16 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 32 | 32 | 10000 | 320000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 32 | 32 | 10000 | 320000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 47 FROM bigtable; @@ -108,11 +108,11 @@ Affected Rows: 32 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 64 | 64 | 10000 | 640000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 64 | 64 | 10000 | 640000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 51 FROM bigtable; @@ -120,11 +120,11 @@ Affected Rows: 64 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 128 | 128 | 10000 | 1280000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 128 | 128 | 10000 | 1280000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 53 FROM bigtable; @@ -132,11 +132,11 @@ Affected Rows: 128 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 256 | 256 | 10000 | 2560000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 256 | 256 | 10000 | 2560000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 57 FROM bigtable; @@ -144,11 +144,11 @@ Affected Rows: 256 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 512 | 512 | 10000 | 5120000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 512 | 512 | 10000 | 5120000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 61 FROM bigtable; @@ -156,11 +156,11 @@ Affected Rows: 512 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 1024 | 1024 | 10000 | 10240000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 1024 | 1024 | 10000 | 10240000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 63 FROM bigtable; @@ -168,20 +168,20 @@ Affected Rows: 1024 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 2048 | 2048 | 10000 | 20480000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 2048 | 2048 | 10000 | 20480000 | ++----------+-------------------+-------------------------+-------------------------+ -- SQLNESS ARG restart=true SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 2048 | 2048 | 10000 | 20480000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 2048 | 2048 | 10000 | 20480000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 67 FROM bigtable; @@ -189,11 +189,11 @@ Affected Rows: 2048 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 4096 | 4096 | 10000 | 40960000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 4096 | 4096 | 10000 | 40960000 | ++----------+-------------------+-------------------------+-------------------------+ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 71 FROM bigtable; @@ -201,11 +201,11 @@ Affected Rows: 4096 SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 8192 | 8192 | 10000 | 81920000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ ++----------+-------------------+-------------------------+-------------------------+ +| count(*) | count(bigtable.a) | max(length(bigtable.a)) | sum(length(bigtable.a)) | ++----------+-------------------+-------------------------+-------------------------+ +| 8192 | 8192 | 10000 | 81920000 | ++----------+-------------------+-------------------------+-------------------------+ DROP TABLE test; diff --git a/tests/cases/standalone/common/types/string/unicode.result b/tests/cases/standalone/common/types/string/unicode.result index 5580093c2260..c9cd0283c029 100644 --- a/tests/cases/standalone/common/types/string/unicode.result +++ b/tests/cases/standalone/common/types/string/unicode.result @@ -58,12 +58,12 @@ SELECT substr('🦤🦆f', 1, 2); -- length on emojis SELECT length(s) FROM emojis ORDER BY id; -+----------------------------+ -| character_length(emojis.s) | -+----------------------------+ -| 1 | -| 3 | -+----------------------------+ ++------------------+ +| length(emojis.s) | ++------------------+ +| 1 | +| 3 | ++------------------+ DROP TABLE emojis; diff --git a/tests/cases/standalone/common/view/create.result b/tests/cases/standalone/common/view/create.result index 743108541153..005753612975 100644 --- a/tests/cases/standalone/common/view/create.result +++ b/tests/cases/standalone/common/view/create.result @@ -9,7 +9,7 @@ Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: Expected: AS, CREATE VIEW test_view as DELETE FROM public.numbers; -Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: Expected: SELECT, VALUES, or a subquery in the query body, found: DELETE at Line: 1, Column: 26 +Error: 1001(Unsupported), Failed to plan SQL: This feature is not implemented: Query DELETE FROM public.numbers not implemented yet, --- Table already exists --- CREATE VIEW test_table as SELECT * FROM public.numbers; @@ -87,55 +87,55 @@ SELECT * FROM INFORMATION_SCHEMA.TABLES ORDER BY TABLE_NAME, TABLE_TYPE; +++++++++++++++++++++++++ |table_catalog|table_schema|table_name|table_type|table_id|data_length|max_data_length|index_length|max_index_length|avg_row_length|engine|version|row_format|table_rows|data_free|auto_increment|create_time|update_time|check_time|table_collation|checksum|create_options|table_comment|temporary| +++++++++++++++++++++++++ -|greptime|information_schema|build_info|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|character_sets|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|check_constraints|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|cluster_info|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|collation_character_set_applicability|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|collations|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|column_privileges|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|column_statistics|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|columns|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|engines|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|events|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|files|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|flows|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|global_status|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|key_column_usage|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|public|numbers|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID|test_engine|ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|optimizer_trace|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|parameters|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|partitions|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|procedure_info|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|process_list|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|profiling|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|referential_constraints|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|region_peers|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|region_statistics|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|routines|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|runtime_metrics|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|schema_privileges|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|schemata|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|session_status|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|ssts_manifest|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|ssts_storage|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|table_constraints|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|table_privileges|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|tables|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|public|test_table|BASETABLE|ID|ID|ID|ID|ID|ID|mito|ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||N| -|greptime|public|test_view|VIEW|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||N| -|greptime|information_schema|triggers|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| -|greptime|information_schema|views|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y| +|greptime|information_schema|build_info|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|character_sets|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|check_constraints|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|cluster_info|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|collation_character_set_applicability|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|collations|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|column_privileges|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|column_statistics|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|columns|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|engines|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|events|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|files|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|flows|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|global_status|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|key_column_usage|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|public|numbers|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID|test_engine|ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|optimizer_trace|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|parameters|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|partitions|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|procedure_info|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|process_list|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|profiling|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|referential_constraints|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|region_peers|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|region_statistics|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|routines|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|runtime_metrics|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|schema_privileges|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|schemata|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|session_status|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|ssts_manifest|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|ssts_storage|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|table_constraints|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|table_privileges|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|tables|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|public|test_table|BASETABLE|ID|ID|ID|ID|ID|ID|mito|ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||N| +|greptime|public|test_view|VIEW|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||N| +|greptime|information_schema|triggers|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +|greptime|information_schema|views|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|DATETIME||utf8_bin|ID|||Y| +++++++++++++++++++++++++ -- SQLNESS REPLACE (\s\d+\s) ID SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'VIEW'; -+---------------+--------------+------------+------------+----------+-------------+-----------------+--------------+------------------+----------------+--------+---------+------------+------------+-----------+----------------+---------------------+-------------+------------+-----------------+----------+----------------+---------------+-----------+ -| table_catalog | table_schema | table_name | table_type | table_id | data_length | max_data_length | index_length | max_index_length | avg_row_length | engine | version | row_format | table_rows | data_free | auto_increment | create_time | update_time | check_time | table_collation | checksum | create_options | table_comment | temporary | -+---------------+--------------+------------+------------+----------+-------------+-----------------+--------------+------------------+----------------+--------+---------+------------+------------+-----------+----------------+---------------------+-------------+------------+-----------------+----------+----------------+---------------+-----------+ -| greptime | public | test_view | VIEW |ID |ID |ID |ID |ID |ID | |ID | Fixed |ID |ID |ID | 1970-01-01T00:00:00 | | | utf8_bin |ID | | | N | -+---------------+--------------+------------+------------+----------+-------------+-----------------+--------------+------------------+----------------+--------+---------+------------+------------+-----------+----------------+---------------------+-------------+------------+-----------------+----------+----------------+---------------+-----------+ ++---------------+--------------+------------+------------+----------+-------------+-----------------+--------------+------------------+----------------+--------+---------+------------+------------+-----------+----------------+---------------------+---------------------+------------+-----------------+----------+----------------+---------------+-----------+ +| table_catalog | table_schema | table_name | table_type | table_id | data_length | max_data_length | index_length | max_index_length | avg_row_length | engine | version | row_format | table_rows | data_free | auto_increment | create_time | update_time | check_time | table_collation | checksum | create_options | table_comment | temporary | ++---------------+--------------+------------+------------+----------+-------------+-----------------+--------------+------------------+----------------+--------+---------+------------+------------+-----------+----------------+---------------------+---------------------+------------+-----------------+----------+----------------+---------------+-----------+ +| greptime | public | test_view | VIEW |ID |ID |ID |ID |ID |ID | |ID | Fixed |ID |ID |ID | 1970-01-01T00:00:00 | 1970-01-01T00:00:00 | | utf8_bin |ID | | | N | ++---------------+--------------+------------+------------+----------+-------------+-----------------+--------------+------------------+----------------+--------+---------+------------+------------+-----------+----------------+---------------------+---------------------+------------+-----------------+----------+----------------+---------------+-----------+ SHOW COLUMNS FROM test_view; diff --git a/tests/cases/standalone/information_schema/cluster_info.result b/tests/cases/standalone/information_schema/cluster_info.result index 674d687cac79..85429840284a 100644 --- a/tests/cases/standalone/information_schema/cluster_info.result +++ b/tests/cases/standalone/information_schema/cluster_info.result @@ -4,21 +4,22 @@ Affected Rows: 0 DESC TABLE CLUSTER_INFO; -+--------------+----------------------+-----+------+---------+---------------+ -| Column | Type | Key | Null | Default | Semantic Type | -+--------------+----------------------+-----+------+---------+---------------+ -| peer_id | Int64 | | NO | | FIELD | -| peer_type | String | | NO | | FIELD | -| peer_addr | String | | YES | | FIELD | -| cpus | UInt32 | | NO | | FIELD | -| memory_bytes | UInt64 | | NO | | FIELD | -| version | String | | NO | | FIELD | -| git_commit | String | | NO | | FIELD | -| start_time | TimestampMillisecond | | YES | | FIELD | -| uptime | String | | YES | | FIELD | -| active_time | String | | YES | | FIELD | -| node_status | String | | YES | | FIELD | -+--------------+----------------------+-----+------+---------+---------------+ ++----------------------+----------------------+-----+------+---------+---------------+ +| Column | Type | Key | Null | Default | Semantic Type | ++----------------------+----------------------+-----+------+---------+---------------+ +| peer_id | Int64 | | NO | | FIELD | +| peer_type | String | | NO | | FIELD | +| peer_addr | String | | YES | | FIELD | +| peer_hostname | String | | YES | | FIELD | +| total_cpu_millicores | UInt32 | | NO | | FIELD | +| total_memory_bytes | UInt64 | | NO | | FIELD | +| version | String | | NO | | FIELD | +| git_commit | String | | NO | | FIELD | +| start_time | TimestampMillisecond | | YES | | FIELD | +| uptime | String | | YES | | FIELD | +| active_time | String | | YES | | FIELD | +| node_status | String | | YES | | FIELD | ++----------------------+----------------------+-----+------+---------+---------------+ -- SQLNESS REPLACE version node_version -- SQLNESS REPLACE (\d+\.\d+(?:\.\d+)+) Version @@ -60,13 +61,13 @@ SELECT peer_id, peer_type, peer_addr, version, git_commit, start_time, uptime, a ++ ++ -SELECT peer_type, cpus!=0, memory_bytes!=0 FROM CLUSTER_INFO ORDER BY peer_type; +SELECT peer_type, total_cpu_millicores!=0, total_memory_bytes!=0 FROM CLUSTER_INFO ORDER BY peer_type; -+------------+-------------------------------+---------------------------------------+ -| peer_type | cluster_info.cpus != Int64(0) | cluster_info.memory_bytes != Int64(0) | -+------------+-------------------------------+---------------------------------------+ -| STANDALONE | true | true | -+------------+-------------------------------+---------------------------------------+ ++------------+-----------------------------------------------+---------------------------------------------+ +| peer_type | cluster_info.total_cpu_millicores != Int64(0) | cluster_info.total_memory_bytes != Int64(0) | ++------------+-----------------------------------------------+---------------------------------------------+ +| STANDALONE | true | true | ++------------+-----------------------------------------------+---------------------------------------------+ USE PUBLIC; diff --git a/tests/cases/standalone/information_schema/cluster_info.sql b/tests/cases/standalone/information_schema/cluster_info.sql index 70fc16dd2050..7b134f118aee 100644 --- a/tests/cases/standalone/information_schema/cluster_info.sql +++ b/tests/cases/standalone/information_schema/cluster_info.sql @@ -30,6 +30,6 @@ SELECT peer_id, peer_type, peer_addr, version, git_commit, start_time, uptime, a SELECT peer_id, peer_type, peer_addr, version, git_commit, start_time, uptime, active_time FROM CLUSTER_INFO WHERE PEER_ID > 0; -SELECT peer_type, cpus!=0, memory_bytes!=0 FROM CLUSTER_INFO ORDER BY peer_type; +SELECT peer_type, total_cpu_millicores!=0, total_memory_bytes!=0 FROM CLUSTER_INFO ORDER BY peer_type; USE PUBLIC;