diff --git a/Cargo.lock b/Cargo.lock index f43b0600334d1..76b2fe3b76f00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1362,9 +1362,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" -version = "0.21.5" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "base64" @@ -1515,6 +1515,56 @@ dependencies = [ "piper", ] +[[package]] +name = "bollard" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97ccca1260af6a459d75994ad5acc1651bcabcbdbc41467cc9786519ab854c30" +dependencies = [ + "base64 0.22.1", + "bollard-stubs", + "bytes", + "futures-core", + "futures-util", + "hex", + "home", + "http 1.2.0", + "http-body-util", + "hyper 1.8.1", + "hyper-named-pipe", + "hyper-rustls", + "hyper-util", + "hyperlocal", + "log", + "pin-project-lite", + "rustls", + "rustls-native-certs", + "rustls-pemfile 2.2.0", + "rustls-pki-types", + "serde", + "serde_derive", + "serde_json", + "serde_repr", + "serde_urlencoded", + "thiserror 2.0.17", + "tokio", + "tokio-util", + "tower-service", + "url", + "winapi", +] + +[[package]] +name = "bollard-stubs" +version = "1.47.1-rc.27.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f179cfbddb6e77a5472703d4b30436bff32929c0aa8a9008ecf23d1d3cdd0da" +dependencies = [ + "serde", + "serde_repr", + "serde_with", +] + [[package]] name = "bon" version = "3.7.1" @@ -2125,11 +2175,21 @@ dependencies = [ "libc", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "core_affinity" @@ -2268,6 +2328,31 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags 2.9.4", + "crossterm_winapi", + "mio", + "parking_lot", + "rustix 0.38.44", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + [[package]] name = "crunchy" version = "0.2.2" @@ -2722,13 +2807,34 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + [[package]] name = "dirs" version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" dependencies = [ - "dirs-sys", + "dirs-sys 0.5.0", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.4.6", + "windows-sys 0.48.0", ] [[package]] @@ -2739,7 +2845,7 @@ checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" dependencies = [ "libc", "option-ext", - "redox_users", + "redox_users 0.5.0", "windows-sys 0.61.1", ] @@ -2758,6 +2864,17 @@ dependencies = [ "const-random", ] +[[package]] +name = "docker_credential" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d89dfcba45b4afad7450a99b39e751590463e45c04728cf555d36bb66940de8" +dependencies = [ + "base64 0.21.7", + "serde", + "serde_json", +] + [[package]] name = "document-features" version = "0.2.11" @@ -3098,6 +3215,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + [[package]] name = "event-listener" version = "2.5.3" @@ -3911,6 +4039,21 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-named-pipe" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" +dependencies = [ + "hex", + "hyper 1.8.1", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", + "winapi", +] + [[package]] name = "hyper-openssl" version = "0.10.2" @@ -4027,6 +4170,21 @@ dependencies = [ "windows-registry", ] +[[package]] +name = "hyperlocal" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" +dependencies = [ + "hex", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "iana-time-zone" version = "0.1.47" @@ -5062,6 +5220,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", + "log", "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.52.0", ] @@ -5190,7 +5349,7 @@ dependencies = [ "axum", "clap", "csv", - "dirs", + "dirs 6.0.0", "hyper 1.8.1", "indicatif", "maplit", @@ -5203,7 +5362,7 @@ dependencies = [ "openssl-probe", "reqwest 0.12.23", "rpassword", - "security-framework", + "security-framework 2.10.0", "semver", "serde", "serde-aux", @@ -6001,6 +6160,35 @@ dependencies = [ "zip", ] +[[package]] +name = "mz-deploy" +version = "0.1.0" +dependencies = [ + "async-stream", + "async-trait", + "chrono", + "clap", + "crossterm", + "dirs 5.0.1", + "futures", + "mz-build-info", + "mz-ore", + "mz-sql-parser", + "openssl", + "owo-colors", + "postgres-openssl", + "serde", + "serde_json", + "sha2", + "tempfile", + "testcontainers", + "thiserror 2.0.17", + "tokio", + "tokio-postgres", + "toml", + "workspace-hack", +] + [[package]] name = "mz-durable-cache" version = "0.0.0" @@ -8321,7 +8509,7 @@ dependencies = [ "openssl-probe", "openssl-sys", "schannel", - "security-framework", + "security-framework 2.10.0", "security-framework-sys", "tempfile", ] @@ -8877,6 +9065,12 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +[[package]] +name = "owo-colors" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" + [[package]] name = "p256" version = "0.13.2" @@ -9000,6 +9194,31 @@ dependencies = [ "zstd", ] +[[package]] +name = "parse-display" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914a1c2265c98e2446911282c6ac86d8524f495792c38c5bd884f80499c7538a" +dependencies = [ + "parse-display-derive", + "regex", + "regex-syntax", +] + +[[package]] +name = "parse-display-derive" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ae7800a4c974efd12df917266338e79a7a74415173caf7e70aa0a0707345281" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "regex-syntax", + "structmeta", + "syn 2.0.106", +] + [[package]] name = "parse-zoneinfo" version = "0.3.0" @@ -9352,8 +9571,8 @@ checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] name = "postgres" -version = "0.19.8" -source = "git+https://github.com/MaterializeInc/rust-postgres#c4b473b478b3adfbf8667d2fbe895d8423f1290b" +version = "0.19.12" +source = "git+https://github.com/MaterializeInc/rust-postgres#6d48e0e1aeabf98fa447f45eaea91af64df8bab0" dependencies = [ "bytes", "fallible-iterator", @@ -9365,8 +9584,8 @@ dependencies = [ [[package]] name = "postgres-openssl" -version = "0.5.0" -source = "git+https://github.com/MaterializeInc/rust-postgres#c4b473b478b3adfbf8667d2fbe895d8423f1290b" +version = "0.5.2" +source = "git+https://github.com/MaterializeInc/rust-postgres#6d48e0e1aeabf98fa447f45eaea91af64df8bab0" dependencies = [ "openssl", "tokio", @@ -9376,8 +9595,8 @@ dependencies = [ [[package]] name = "postgres-protocol" -version = "0.6.7" -source = "git+https://github.com/MaterializeInc/rust-postgres#c4b473b478b3adfbf8667d2fbe895d8423f1290b" +version = "0.6.9" +source = "git+https://github.com/MaterializeInc/rust-postgres#6d48e0e1aeabf98fa447f45eaea91af64df8bab0" dependencies = [ "base64 0.22.1", "byteorder", @@ -9386,7 +9605,7 @@ dependencies = [ "hmac", "md-5", "memchr", - "rand 0.8.5", + "rand 0.9.2", "sha2", "stringprep", ] @@ -9394,7 +9613,7 @@ dependencies = [ [[package]] name = "postgres-replication" version = "0.6.7" -source = "git+https://github.com/MaterializeInc/rust-postgres#c4b473b478b3adfbf8667d2fbe895d8423f1290b" +source = "git+https://github.com/MaterializeInc/rust-postgres#6d48e0e1aeabf98fa447f45eaea91af64df8bab0" dependencies = [ "byteorder", "bytes", @@ -9408,14 +9627,14 @@ dependencies = [ [[package]] name = "postgres-types" -version = "0.2.7" -source = "git+https://github.com/MaterializeInc/rust-postgres#c4b473b478b3adfbf8667d2fbe895d8423f1290b" +version = "0.2.11" +source = "git+https://github.com/MaterializeInc/rust-postgres#6d48e0e1aeabf98fa447f45eaea91af64df8bab0" dependencies = [ "bytes", "chrono", "fallible-iterator", "postgres-protocol", - "serde", + "serde_core", "serde_json", "uuid", ] @@ -10103,6 +10322,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -10121,6 +10349,17 @@ dependencies = [ "bitflags 2.9.4", ] +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 1.0.69", +] + [[package]] name = "redox_users" version = "0.5.0" @@ -10231,7 +10470,7 @@ version = "0.11.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6920094eb85afde5e4a138be3f2de8bbdf28000f0029e72c45025a56b042251" dependencies = [ - "base64 0.21.5", + "base64 0.21.7", "bytes", "encoding_rs", "futures-core", @@ -10250,7 +10489,7 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", @@ -10579,19 +10818,41 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395" dependencies = [ "once_cell", + "ring", "rustls-pki-types", "rustls-webpki", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework 3.5.1", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64 0.21.5", + "base64 0.21.7", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", ] [[package]] @@ -10740,7 +11001,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "770452e37cad93e0a50d5abc3990d2bc351c36d0328f86cefec2f2fb206eaef6" dependencies = [ "bitflags 1.3.2", - "core-foundation", + "core-foundation 0.9.3", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework" +version = "3.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +dependencies = [ + "bitflags 2.9.4", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -10748,9 +11022,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.11.0" +version = "2.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" dependencies = [ "core-foundation-sys", "libc", @@ -11155,6 +11429,27 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "signal-hook" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-mio" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc" +dependencies = [ + "libc", + "mio", + "signal-hook", +] + [[package]] name = "signal-hook-registry" version = "1.4.6" @@ -11444,6 +11739,29 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "structmeta" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e1575d8d40908d70f6fd05537266b90ae71b15dbbe7a8b7dffa2b759306d329" +dependencies = [ + "proc-macro2", + "quote", + "structmeta-derive", + "syn 2.0.106", +] + +[[package]] +name = "structmeta-derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "strum" version = "0.27.2" @@ -11567,7 +11885,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" dependencies = [ "bitflags 1.3.2", - "core-foundation", + "core-foundation 0.9.3", "system-configuration-sys 0.5.0", ] @@ -11578,7 +11896,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ "bitflags 2.9.4", - "core-foundation", + "core-foundation 0.9.3", "system-configuration-sys 0.6.0", ] @@ -11696,6 +12014,35 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" +[[package]] +name = "testcontainers" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59a4f01f39bb10fc2a5ab23eb0d888b1e2bb168c157f61a1b98e6c501c639c74" +dependencies = [ + "async-trait", + "bollard", + "bollard-stubs", + "bytes", + "docker_credential", + "either", + "etcetera", + "futures", + "log", + "memchr", + "parse-display", + "pin-project-lite", + "serde", + "serde_json", + "serde_with", + "thiserror 2.0.17", + "tokio", + "tokio-stream", + "tokio-tar", + "tokio-util", + "url", +] + [[package]] name = "testing_table" version = "0.3.0" @@ -12044,8 +12391,8 @@ dependencies = [ [[package]] name = "tokio-postgres" -version = "0.7.11" -source = "git+https://github.com/MaterializeInc/rust-postgres#c4b473b478b3adfbf8667d2fbe895d8423f1290b" +version = "0.7.15" +source = "git+https://github.com/MaterializeInc/rust-postgres#6d48e0e1aeabf98fa447f45eaea91af64df8bab0" dependencies = [ "async-trait", "byteorder", @@ -12056,13 +12403,13 @@ dependencies = [ "log", "parking_lot", "percent-encoding", - "phf 0.11.3", + "phf 0.13.1", "pin-project-lite", "postgres-protocol", "postgres-types", - "rand 0.8.5", + "rand 0.9.2", "serde", - "socket2 0.5.10", + "socket2 0.6.0", "tokio", "tokio-util", "whoami", @@ -12090,6 +12437,21 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "tokio-tar" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d5714c010ca3e5c27114c1cdeb9d14641ace49874aa5626d7149e47aedace75" +dependencies = [ + "filetime", + "futures-core", + "libc", + "redox_syscall 0.3.5", + "tokio", + "tokio-stream", + "xattr", +] + [[package]] name = "tokio-test" version = "0.4.4" @@ -13473,7 +13835,7 @@ dependencies = [ "rustix 1.0.7", "schemars", "scopeguard", - "security-framework", + "security-framework 2.10.0", "semver", "serde", "serde_core", diff --git a/Cargo.toml b/Cargo.toml index 5ae0e0cdbb082..5257f1260970f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,6 +58,7 @@ members = [ "src/mysql-util", "src/mz", "src/mz-debug", + "src/mz-deploy", "src/npm", "src/orchestrator", "src/orchestrator-kubernetes", @@ -181,6 +182,7 @@ default-members = [ "src/mysql-util", "src/mz", "src/mz-debug", + "src/mz-deploy", "src/npm", "src/orchestrator", "src/orchestrator-kubernetes", diff --git a/src/mz-deploy/Cargo.toml b/src/mz-deploy/Cargo.toml new file mode 100644 index 0000000000000..80d1621359416 --- /dev/null +++ b/src/mz-deploy/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "mz-deploy" +description = "Deployment tool for Materialize." +version = "0.1.0" +edition.workspace = true +rust-version.workspace = true +publish = false + +[lints] +workspace = true + +[dependencies] +async-stream = "0.3" +async-trait = "0.1" +futures = "0.3" +mz-build-info = { path = "../build-info" } +mz-ore = { path = "../ore", default-features = false, features = ["async"] } +mz-sql-parser = { path = "../sql-parser", default-features = false } +thiserror = "2.0" +clap = { version = "4.5.23", features = ["derive", "env"] } +crossterm = "0.28" +owo-colors = "4.0" +tokio = { version = "1", features = ["full"] } +tokio-postgres = { version = "0.7", features = ["with-chrono-0_4"] } +postgres-openssl = { git = "https://github.com/MaterializeInc/rust-postgres" } +openssl = "0.10" +toml = "0.8" +serde = { version = "1.0", features = ["derive"] } +dirs = "5.0" +serde_json = "1.0.145" +sha2 = "0.10" +chrono = "0.4" +workspace-hack = { version = "0.0.0", path = "../workspace-hack", optional = true } + +[dev-dependencies] +tempfile = "3.0" +testcontainers = "0.23" + +[package.metadata.cargo-udeps.ignore] +normal = ["workspace-hack"] + +[features] +default = ["workspace-hack", "docker-typecheck"] +docker-typecheck = [] diff --git a/src/mz-deploy/ci/Dockerfile b/src/mz-deploy/ci/Dockerfile new file mode 100644 index 0000000000000..cbfbd38675e6a --- /dev/null +++ b/src/mz-deploy/ci/Dockerfile @@ -0,0 +1,7 @@ +MZFROM ubuntu-base + +COPY mz-deploy /usr/local/bin/ + +WORKDIR /workdir + +ENTRYPOINT ["mz-deploy"] diff --git a/src/mz-deploy/ci/mzbuild.yml b/src/mz-deploy/ci/mzbuild.yml new file mode 100644 index 0000000000000..a83c84362028d --- /dev/null +++ b/src/mz-deploy/ci/mzbuild.yml @@ -0,0 +1,4 @@ +name: mz-deploy +pre-image: + - type: cargo-build + bin: mz-deploy diff --git a/src/mz-deploy/src/bin/mz-deploy/main.rs b/src/mz-deploy/src/bin/mz-deploy/main.rs new file mode 100644 index 0000000000000..ffdb22220696b --- /dev/null +++ b/src/mz-deploy/src/bin/mz-deploy/main.rs @@ -0,0 +1,440 @@ +use clap::CommandFactory; +use clap::{Parser, Subcommand}; +use mz_build_info::{BuildInfo, build_info}; +use mz_deploy::cli; +use mz_deploy::cli::CliError; +use mz_deploy::client::ConnectionError; +use mz_deploy::client::config::ProfilesConfig; +use mz_deploy::utils::log; +use std::path::PathBuf; +use std::sync::LazyLock; + +const BUILD_INFO: BuildInfo = build_info!(); +static VERSION: LazyLock = LazyLock::new(|| BUILD_INFO.human_version(None)); + +/// Materialize deployment tool +#[derive(Parser, Debug)] +#[command(name = "mz-deploy", version = VERSION.as_str())] +#[command(about = "Safe, testable deployments for Materialize")] +struct Args { + /// Path to the project root directory containing database schemas + #[arg(short, long, default_value = ".", global = true)] + directory: PathBuf, + + /// Enable verbose output for debugging + #[arg(short, long, global = true)] + verbose: bool, + + /// Database connection profile to use (from profiles.toml) + #[arg(short, long, global = true)] + profile: Option, + + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand, Debug)] +enum Command { + /// Compile and validate SQL without connecting to database + /// + /// Parses all SQL files, validates dependencies, and optionally type-checks SQL + /// against a local Materialize Docker container. This is useful for local development + /// and CI/CD pipelines to catch errors before deployment. + #[command(visible_alias = "build")] + Compile { + /// Skip SQL type checking (faster but less thorough validation) + #[arg(long)] + skip_typecheck: bool, + + /// Materialize Docker image to use for type checking + #[arg(long, value_name = "IMAGE")] + docker_image: Option, + }, + + /// Create tables that don't exist in the database + /// + /// Queries the database first and only creates tables that don't already exist. + /// Tracks the deployment under a deploy ID (default: random 7-char hex). + /// Only tables that are actually created are recorded in deployment metadata. + /// + /// Example: + /// mz-deploy create-tables # Use random deploy ID + /// mz-deploy create-tables --name abc123 # Use custom deploy ID + CreateTables { + /// Deploy ID for this table deployment (default: random 7-char hex) + /// + /// The deploy ID will be used to track this table deployment separately. + /// Must contain only alphanumeric characters, hyphens, and underscores. + #[arg(long, value_name = "DEPLOY_ID")] + deploy_id: Option, + + /// Allow deployment with uncommitted git changes + #[arg(long)] + allow_dirty: bool, + + /// Print SQL statements without executing them + /// + /// Runs the full compilation and validation pipeline but prints the SQL + /// that would be executed instead of actually running it. Useful for + /// reviewing changes before deployment. + #[arg(long)] + dry_run: bool, + }, + + /// Promote a staging deployment to production + /// + /// Performs an atomic schema swap between staging and production. Before promoting, + /// verifies that all staging clusters are fully hydrated and caught up (unless + /// --skip-ready is specified). This ensures zero-downtime deployments. + /// + /// The promotion will fail if: + /// - Any cluster is still hydrating (objects not yet materialized) + /// - Any cluster has lag exceeding --allowed-lag threshold + /// - Any cluster has no replicas or all replicas are OOM-looping + /// - Production schemas were modified after staging was created (unless --force) + /// + /// Example: + /// mz-deploy apply abc123 # Promote staging deployment + /// mz-deploy apply abc123 --skip-ready # Skip hydration check + /// mz-deploy apply abc123 --allowed-lag 600 # Allow up to 10 min lag + Apply { + /// Staging deployment ID to promote to production + /// + /// The deployment ID was assigned when running 'mz-deploy stage'. You can + /// find active deployments with 'mz-deploy deployments'. + #[arg(value_name = "DEPLOY_ID")] + deploy_id: String, + + /// Skip conflict detection when promoting + /// + /// Normally, apply checks if production schemas were modified after the + /// staging deployment was created. This flag bypasses that safety check, + /// which may overwrite recent production changes. + #[arg(long)] + force: bool, + + /// Skip the readiness check before promoting + /// + /// By default, apply verifies all staging clusters are hydrated and caught + /// up before promoting. Use this flag to skip that check and promote + /// immediately, which may result in stale data being served briefly. + #[arg(long)] + skip_ready: bool, + + /// Maximum lag threshold in seconds for readiness check + /// + /// During the readiness check, clusters with wallclock lag exceeding this + /// threshold are marked as "lagging" and will block promotion. Lag measures + /// how far behind real-time the materialized data is. Default: 300 (5 min). + #[arg(long, value_name = "SECONDS", default_value = "300")] + allowed_lag: i64, + }, + + /// Create a staging deployment for testing changes + /// + /// Deploys schemas and objects to staging with suffixed names (e.g., 'public_abc123'). + /// This allows testing changes in isolation before promoting to production. + /// Staging deployments can be listed with 'deployments' and promoted with 'apply'. + /// + /// Example: + /// mz-deploy stage # Use random deploy ID + /// mz-deploy stage --name abc123 # Use custom deploy ID + Stage { + /// Deploy ID for this staging deployment (default: random 7-char hex) + /// + /// The deploy ID will be used as a suffix for schemas and clusters. + /// Must contain only alphanumeric characters, hyphens, and underscores. + #[arg(long, value_name = "DEPLOY_ID")] + deploy_id: Option, + + /// Allow staging with uncommitted git changes + #[arg(long)] + allow_dirty: bool, + + /// Skip automatic rollback on failure (leaves resources for debugging) + #[arg(long)] + no_rollback: bool, + + /// Print SQL statements without executing them + /// + /// Runs the full compilation and validation pipeline but prints the SQL + /// that would be executed instead of actually running it. Useful for + /// reviewing changes before deployment. + #[arg(long)] + dry_run: bool, + }, + + /// Test database connection and display environment information + /// + /// Connects to Materialize using the specified profile and displays version, + /// environment ID, and current role. Useful for verifying connectivity and + /// configuration before running deployments. + Debug, + + /// Show detailed information about a specific deployment + /// + /// Displays comprehensive information about a deployment including metadata + /// (who deployed, when, git commit) and all objects with their hashes. + /// Use `mz-deploy history` to find deployment IDs. + /// + /// Example: + /// mz-deploy describe abc123 + #[command(visible_alias = "show")] + Describe { + /// Deployment ID to describe + #[arg(value_name = "DEPLOY_ID")] + deploy_id: String, + }, + + /// Generate types.lock file with external dependency schemas + /// + /// Queries the database for schema information about external dependencies + /// (tables/views not managed by this project but referenced in SQL). This + /// creates a types.lock file used for offline type checking. + #[command(name = "gen-data-contracts")] + GenDataContracts, + + /// Run SQL unit tests defined in test files + /// + /// Executes all test files in the project against a temporary Materialize + /// Docker container. Tests validate SQL logic without affecting production. + Test, + + /// Clean up a staging deployment by dropping all resources + /// + /// Removes staging schemas, clusters, and deployment tracking records for + /// the specified deploy ID. This is the equivalent of 'git branch -D' for + /// staging deployments. + /// + /// Example: + /// mz-deploy abort abc123 + Abort { + /// Staging deploy ID to remove + #[arg(value_name = "DEPLOY_ID")] + deploy_id: String, + }, + + /// List all active staging deployments + /// + /// Shows staging environments that have been deployed but not yet promoted, + /// similar to 'git branch'. For each deployment, displays: + /// - Deployment ID and who created it + /// - Git commit (if available) + /// - Cluster readiness status (ready, hydrating, lagging, or failing) + /// - Schemas included in the deployment + /// + /// Example: + /// mz-deploy deployments # List with default lag threshold + /// mz-deploy deployments --allowed-lag 60 # Stricter lag threshold + #[command(visible_alias = "branches")] + Deployments { + /// Maximum lag threshold in seconds for cluster status + /// + /// Clusters with wallclock lag exceeding this threshold are shown as + /// "lagging" instead of "ready". Lag measures how far behind real-time + /// the materialized data is. Default: 300 (5 min). + #[arg(long, value_name = "SECONDS", default_value = "300")] + allowed_lag: i64, + }, + + /// Show history of promoted deployments + /// + /// Displays a chronological log of deployments that have been promoted to + /// production, similar to 'git log'. Each entry shows the deploy ID, + /// who promoted it, when, and which schemas were included. + /// + /// Example: + /// mz-deploy history --limit 10 + #[command(visible_alias = "log")] + History { + /// Maximum number of deployments to show (default: unlimited) + #[arg(short, long, value_name = "N")] + limit: Option, + }, + + /// Wait for staging deployment clusters to be hydrated and ready + /// + /// Monitors cluster hydration status with a live dashboard showing progress for + /// each cluster. A cluster is considered "ready" when: + /// - All objects are fully hydrated (materialized) + /// - Wallclock lag is within the --allowed-lag threshold + /// - At least one healthy replica exists (not OOM-looping) + /// + /// Status indicators: + /// - ready: Fully hydrated and caught up + /// - hydrating: Objects still being materialized + /// - lagging: Hydrated but lag exceeds threshold + /// - failing: No replicas or all replicas OOM-looping + /// + /// Examples: + /// mz-deploy ready abc123 # Wait with live tracking + /// mz-deploy ready abc123 --snapshot # Check once and exit + /// mz-deploy ready abc123 --timeout 300 # Wait up to 5 minutes + /// mz-deploy ready abc123 --allowed-lag 60 # Require lag under 1 min + Ready { + /// Staging deployment ID to monitor + #[arg(value_name = "DEPLOY_ID")] + name: String, + + /// Check status once and exit instead of continuous monitoring + /// + /// Takes a point-in-time snapshot of cluster status and exits immediately. + /// Returns success (exit 0) only if all clusters are ready. + #[arg(long)] + snapshot: bool, + + /// Maximum time to wait in seconds before timing out + /// + /// If clusters don't become ready within this duration, the command exits + /// with an error. By default, waits indefinitely. + #[arg(long, value_name = "SECONDS")] + timeout: Option, + + /// Maximum lag threshold in seconds for "ready" status + /// + /// Clusters with wallclock lag exceeding this threshold are marked as + /// "lagging" and not considered ready. Lag measures how far behind + /// real-time the materialized data is. Default: 300 (5 min). + #[arg(long, value_name = "SECONDS", default_value = "300")] + allowed_lag: i64, + }, +} + +#[tokio::main] +async fn main() { + let args = Args::parse(); + log::set_verbose(args.verbose); + + if let Err(e) = run(args).await { + cli::display_error(&e); + } +} + +async fn run(args: Args) -> Result<(), CliError> { + match args.command { + Some(Command::Compile { + skip_typecheck, + docker_image, + }) => { + let compile_args = cli::commands::compile::CompileArgs { + typecheck: !skip_typecheck, + docker_image, + }; + cli::commands::compile::run(&args.directory, compile_args) + .await + .map(|_| ()) + } + Some(Command::CreateTables { + deploy_id, + allow_dirty, + dry_run, + }) => { + let profile = + ProfilesConfig::load_profile(Some(&args.directory), args.profile.as_deref()) + .map_err(|e| CliError::Connection(ConnectionError::Config(e)))?; + + cli::commands::create_tables::run( + &profile, + &args.directory, + deploy_id.as_deref(), + allow_dirty, + dry_run, + ) + .await?; + if !dry_run { + cli::commands::gen_data_contracts::run(&profile, &args.directory).await + } else { + Ok(()) + } + } + Some(Command::Apply { + deploy_id, + force, + skip_ready, + allowed_lag, + }) => { + let profile = + ProfilesConfig::load_profile(Some(&args.directory), args.profile.as_deref()) + .map_err(|e| CliError::Connection(ConnectionError::Config(e)))?; + if !skip_ready { + cli::commands::ready::run(&profile, &deploy_id, true, None, allowed_lag).await?; + } + cli::commands::apply::run(&profile, &deploy_id, force).await + } + Some(Command::Stage { + deploy_id, + allow_dirty, + no_rollback, + dry_run, + }) => { + let profile = + ProfilesConfig::load_profile(Some(&args.directory), args.profile.as_deref()) + .map_err(|e| CliError::Connection(ConnectionError::Config(e)))?; + + cli::commands::stage::run( + &profile, + deploy_id.as_deref(), + &args.directory, + allow_dirty, + no_rollback, + dry_run, + ) + .await + } + Some(Command::Debug) => { + let profile = + ProfilesConfig::load_profile(Some(&args.directory), args.profile.as_deref()) + .map_err(|e| CliError::Connection(ConnectionError::Config(e)))?; + + cli::commands::debug::run(&profile).await + } + Some(Command::Describe { deploy_id }) => { + let profile = + ProfilesConfig::load_profile(Some(&args.directory), args.profile.as_deref()) + .map_err(|e| CliError::Connection(ConnectionError::Config(e)))?; + + cli::commands::describe::run(&profile, &deploy_id).await + } + Some(Command::GenDataContracts) => { + let profile = + ProfilesConfig::load_profile(Some(&args.directory), args.profile.as_deref()) + .map_err(|e| CliError::Connection(ConnectionError::Config(e)))?; + cli::commands::gen_data_contracts::run(&profile, &args.directory).await + } + Some(Command::Test) => cli::commands::test::run(&args.directory).await, + Some(Command::Abort { deploy_id }) => { + let profile = + ProfilesConfig::load_profile(Some(&args.directory), args.profile.as_deref()) + .map_err(|e| CliError::Connection(ConnectionError::Config(e)))?; + cli::commands::abort::run(&profile, &deploy_id).await + } + Some(Command::Deployments { allowed_lag }) => { + let profile = + ProfilesConfig::load_profile(Some(&args.directory), args.profile.as_deref()) + .map_err(|e| CliError::Connection(ConnectionError::Config(e)))?; + cli::commands::deployments::run(&profile, allowed_lag).await + } + Some(Command::History { limit }) => { + let profile = + ProfilesConfig::load_profile(Some(&args.directory), args.profile.as_deref()) + .map_err(|e| CliError::Connection(ConnectionError::Config(e)))?; + cli::commands::history::run(&profile, limit).await + } + Some(Command::Ready { + name, + snapshot, + timeout, + allowed_lag, + }) => { + let profile = + ProfilesConfig::load_profile(Some(&args.directory), args.profile.as_deref()) + .map_err(|e| CliError::Connection(ConnectionError::Config(e)))?; + + cli::commands::ready::run(&profile, &name, snapshot, timeout, allowed_lag).await + } + None => { + Args::command().print_help().unwrap(); + Ok(()) + } + } +} diff --git a/src/mz-deploy/src/cli.rs b/src/mz-deploy/src/cli.rs new file mode 100644 index 0000000000000..6b5637c204a5e --- /dev/null +++ b/src/mz-deploy/src/cli.rs @@ -0,0 +1,27 @@ +//! CLI-specific functionality and error types. + +pub mod commands; +mod error; +pub mod helpers; + +pub use error::CliError; + +/// Display a CLI error and exit with status code 1. +/// +/// Formats the error using colored output with rustc-style formatting, +/// including any hints provided by the error's `hint()` method. +pub fn display_error(error: &CliError) { + use owo_colors::OwoColorize; + + eprintln!("{}: {}", "error".bright_red().bold(), error); + + if let Some(hint) = error.hint() { + eprintln!( + " {} {}", + "=".bright_blue().bold(), + format!("help: {}", hint).bold() + ); + } + + std::process::exit(1); +} diff --git a/src/mz-deploy/src/cli/commands.rs b/src/mz-deploy/src/cli/commands.rs new file mode 100644 index 0000000000000..dd569bf3030d9 --- /dev/null +++ b/src/mz-deploy/src/cli/commands.rs @@ -0,0 +1,17 @@ +//! Command implementations for mz-deploy CLI. +//! +//! Each command is implemented in its own module with a consistent +//! `run()` function signature that returns `Result`. + +pub mod abort; +pub mod apply; +pub mod compile; +pub mod create_tables; +pub mod debug; +pub mod deployments; +pub mod describe; +pub mod gen_data_contracts; +pub mod history; +pub mod ready; +pub mod stage; +pub mod test; diff --git a/src/mz-deploy/src/cli/commands/abort.rs b/src/mz-deploy/src/cli/commands/abort.rs new file mode 100644 index 0000000000000..7088e705ad40e --- /dev/null +++ b/src/mz-deploy/src/cli/commands/abort.rs @@ -0,0 +1,105 @@ +//! Abort command - cleanup a staged deployment. + +use crate::cli::CliError; +use crate::client::{Client, Profile}; +use crate::verbose; + +/// Abort a staged deployment by dropping schemas, clusters, and deployment records. +/// +/// This command: +/// - Validates that the staging deployment exists and hasn't been promoted +/// - Drops all staging schemas (with _ suffix) +/// - Drops all staging clusters (with _ suffix) +/// - Deletes deployment tracking records +/// +/// # Arguments +/// * `profile` - Database profile containing connection information +/// * `deploy_id` - Staging deployment ID to abort +/// +/// # Returns +/// Ok(()) if abort succeeds +/// +/// # Errors +/// Returns `CliError::StagingEnvironmentNotFound` if the deployment doesn't exist +/// Returns `CliError::StagingAlreadyPromoted` if the deployment was already promoted +/// Returns `CliError::Connection` for database errors +pub async fn run(profile: &Profile, deploy_id: &str) -> Result<(), CliError> { + println!("Aborting staged deployment: {}", deploy_id); + + let client = Client::connect_with_profile(profile.clone()) + .await + .map_err(CliError::Connection)?; + + let metadata = client.get_deployment_metadata(deploy_id).await?; + + match metadata { + Some(meta) if meta.promoted_at.is_some() => { + return Err(CliError::StagingAlreadyPromoted { + name: deploy_id.to_string(), + }); + } + Some(_) => { + // Good to proceed + } + None => { + return Err(CliError::StagingEnvironmentNotFound { + name: deploy_id.to_string(), + }); + } + } + + // Get staging schemas and clusters + let staging_schemas = client.get_staging_schemas(deploy_id).await?; + + let staging_clusters = client.get_staging_clusters(deploy_id).await?; + + verbose!("Dropping staging resources:"); + verbose!(" Schemas: {}", staging_schemas.len()); + verbose!(" Clusters: {}", staging_clusters.len()); + verbose!(); + + // Drop staging schemas + if !staging_schemas.is_empty() { + verbose!("Dropping staging schemas..."); + client.drop_staging_schemas(&staging_schemas).await?; + for (database, schema) in &staging_schemas { + verbose!(" Dropped {}.{}", database, schema); + } + } + + // Drop staging clusters + if !staging_clusters.is_empty() { + verbose!("Dropping staging clusters..."); + client.drop_staging_clusters(&staging_clusters).await?; + for cluster in &staging_clusters { + verbose!(" Dropped {}", cluster); + } + } + + // Delete deployment records + verbose!("Deleting deployment records..."); + + // Clean up cluster tracking records + client + .delete_deployment_clusters(deploy_id) + .await + .map_err(|source| CliError::DeploymentStateWriteFailed { source })?; + + // Clean up pending statements (for sinks) + client + .delete_pending_statements(deploy_id) + .await + .map_err(|source| CliError::DeploymentStateWriteFailed { source })?; + + // Clean up apply state schemas if they exist (from interrupted apply) + client + .delete_apply_state_schemas(deploy_id) + .await + .map_err(|source| CliError::DeploymentStateWriteFailed { source })?; + + client.delete_deployment(deploy_id).await?; + + println!("Successfully aborted deployment '{}'", deploy_id); + + Ok(()) +} diff --git a/src/mz-deploy/src/cli/commands/apply.rs b/src/mz-deploy/src/cli/commands/apply.rs new file mode 100644 index 0000000000000..7c5e8fae04428 --- /dev/null +++ b/src/mz-deploy/src/cli/commands/apply.rs @@ -0,0 +1,578 @@ +//! Apply command - promote staging deployment to production via ALTER SWAP. + +use crate::cli::CliError; +use crate::client::{ApplyState, Client, DeploymentKind, Profile}; +use crate::project::object_id::ObjectId; +use crate::{project, verbose}; +use owo_colors::OwoColorize; +use std::collections::BTreeSet; + +/// Promote a staging deployment to production using ALTER SWAP. +/// +/// This command implements a resumable promotion flow: +/// 1. Check for existing apply state (for resume scenarios) +/// 2. Create apply state schemas if starting fresh +/// 3. Execute atomic swap (schemas, clusters, and state schemas in one transaction) +/// 4. Execute pending sinks (created after swap since they write to external systems) +/// 5. Clean up old resources and state tracking +/// +/// # Arguments +/// * `profile` - Database profile containing connection information +/// * `deploy_id` - Staging deployment ID +/// * `force` - Force promotion despite conflicts +/// +/// # Returns +/// Ok(()) if promotion succeeds +/// +/// # Errors +/// Returns `CliError::StagingEnvironmentNotFound` if deployment doesn't exist +/// Returns `CliError::StagingAlreadyPromoted` if already promoted +/// Returns `CliError::DeploymentConflict` if conflicts detected (without --force) +/// Returns `CliError::Connection` for database errors +pub async fn run(profile: &Profile, deploy_id: &str, force: bool) -> Result<(), CliError> { + println!("Deploying '{}' to production", deploy_id); + + let client = Client::connect_with_profile(profile.clone()) + .await + .map_err(CliError::Connection)?; + + project::deployment_snapshot::initialize_deployment_table(&client).await?; + + // Validate deployment exists and is not promoted + let metadata = client.get_deployment_metadata(deploy_id).await?; + + match metadata { + Some(meta) if meta.promoted_at.is_some() => { + return Err(CliError::StagingAlreadyPromoted { + name: deploy_id.to_string(), + }); + } + Some(_) => { + // Good to proceed + } + None => { + return Err(CliError::StagingEnvironmentNotFound { + name: deploy_id.to_string(), + }); + } + } + + // Check apply state for resume scenarios + let apply_state = client.get_apply_state(deploy_id).await?; + verbose!("Apply state: {:?}", apply_state); + + // Load staging deployment state to identify what's deployed in staging + let staging_snapshot = + project::deployment_snapshot::load_from_database(&client, Some(deploy_id)).await?; + + verbose!( + "Found {} objects in staging deployment", + staging_snapshot.objects.len() + ); + + // Only check conflicts and gather resources if we haven't swapped yet + let (staging_schemas, staging_clusters, staging_suffix) = if apply_state != ApplyState::PostSwap + { + gather_resources_and_check_conflicts(&client, deploy_id, force).await? + } else { + // Post-swap: we don't need these for sink execution + verbose!("Resuming post-swap: skipping conflict check and resource gathering"); + (BTreeSet::new(), BTreeSet::new(), format!("_{}", deploy_id)) + }; + + // Execute based on current state + match apply_state { + ApplyState::NotStarted => { + // Fresh apply: create state schemas and execute swap + verbose!("Creating apply state schemas..."); + client.create_apply_state_schemas(deploy_id).await?; + + verbose!("Executing atomic swap..."); + execute_atomic_swap(&client, deploy_id, &staging_schemas, &staging_clusters).await?; + } + ApplyState::PreSwap => { + // Resume: state schemas exist but swap didn't complete + verbose!("Resuming from pre-swap state..."); + execute_atomic_swap(&client, deploy_id, &staging_schemas, &staging_clusters).await?; + } + ApplyState::PostSwap => { + // Resume: swap completed, continue to sinks + verbose!("Resuming from post-swap state..."); + } + } + + // Execute pending sinks (skip any already executed) + execute_pending_sinks(&client, deploy_id).await?; + + // Repoint existing sinks that depend on objects being dropped + // This must happen before drop_old_resources to prevent CASCADE from dropping sinks + if !staging_schemas.is_empty() { + repoint_dependent_sinks(&client, &staging_schemas, &staging_suffix).await?; + } + + // Update promoted_at timestamp + verbose!("\nUpdating deployment table..."); + client + .update_promoted_at(deploy_id) + .await + .map_err(|source| CliError::DeploymentStateWriteFailed { source })?; + + // Drop old production resources (now have staging suffix after swap) + // Only do this if we have the resource info (i.e., we did the swap in this run) + if !staging_schemas.is_empty() || !staging_clusters.is_empty() { + println!("\nDropping old production objects..."); + drop_old_resources( + &client, + &staging_schemas, + &staging_clusters, + &staging_suffix, + ) + .await; + } + + // Clean up apply state and pending statements + verbose!("Cleaning up apply state..."); + client.delete_apply_state_schemas(deploy_id).await?; + client.delete_pending_statements(deploy_id).await?; + client + .delete_deployment_clusters(deploy_id) + .await + .map_err(|source| CliError::DeploymentStateWriteFailed { source })?; + + println!("Deployment completed successfully!"); + println!("Staging deployment '{}' is now in production", deploy_id); + + Ok(()) +} + +/// Gather staging resources and check for deployment conflicts. +/// +/// Returns the staging schemas, clusters, and suffix for the swap operation. +async fn gather_resources_and_check_conflicts( + client: &Client, + deploy_id: &str, + force: bool, +) -> Result<(BTreeSet<(String, String)>, BTreeSet, String), CliError> { + verbose!("Checking for deployment conflicts..."); + let conflicts = client.check_deployment_conflicts(deploy_id).await?; + + if !conflicts.is_empty() { + if force { + // With --force, show warning but continue + eprintln!( + "\n{}: deployment conflicts detected, but continuing due to --force flag", + "warning".yellow().bold() + ); + for conflict in &conflicts { + eprintln!( + " - {}.{} (last promoted by '{}' deployment)", + conflict.database, conflict.schema, conflict.deploy_id + ); + } + eprintln!(); + } else { + // Without --force, return error + return Err(CliError::DeploymentConflict { conflicts }); + } + } else { + verbose!("No conflicts detected"); + } + + // Get schemas and clusters from deployment tables + let staging_suffix = format!("_{}", deploy_id); + let mut staging_schemas = BTreeSet::new(); + let mut staging_clusters = BTreeSet::new(); + + // Get schemas from deploy.deployments table for this deployment + let deployment_records = client.get_schema_deployments(Some(deploy_id)).await?; + for record in deployment_records { + // Skip sink-only schemas - they don't need swapping + // Sinks are created after the swap via pending_statements + if record.kind == DeploymentKind::Sinks { + verbose!( + "Skipping sink-only schema {}.{} (no swap needed)", + record.database, + record.schema + ); + continue; + } + + let staging_schema = format!("{}{}", record.schema, staging_suffix); + + // Verify staging schema still exists + if client + .schema_exists(&record.database, &staging_schema) + .await? + { + staging_schemas.insert((record.database.clone(), staging_schema)); + } else { + eprintln!( + "Warning: Staging schema {}.{} not found", + record.database, staging_schema + ); + } + } + + // Validate that all clusters in the deployment still exist + client.validate_deployment_clusters(deploy_id).await?; + + // Get clusters from deploy.clusters table + let cluster_names = client.get_deployment_clusters(deploy_id).await?; + for cluster_name in cluster_names { + let staging_cluster = format!("{}{}", cluster_name, staging_suffix); + + // Verify staging cluster still exists + if client.cluster_exists(&staging_cluster).await? { + staging_clusters.insert(cluster_name); + } else { + eprintln!("Warning: Staging cluster {} not found", staging_cluster); + } + } + + verbose!("\nSchemas to swap:"); + for (database, schema) in &staging_schemas { + let prod_schema = schema.trim_end_matches(&staging_suffix); + verbose!(" - {}.{} <-> {}", database, schema, prod_schema); + } + + if !staging_clusters.is_empty() { + verbose!("\nClusters to swap:"); + for cluster in &staging_clusters { + let staging_cluster = format!("{}{}", cluster, staging_suffix); + verbose!(" - {} <-> {}", staging_cluster, cluster); + } + } + + Ok((staging_schemas, staging_clusters, staging_suffix)) +} + +/// Execute the atomic swap of schemas, clusters, and state schemas. +/// +/// This transaction includes: +/// - Swapping user schemas (production <-> staging) +/// - Swapping clusters (production <-> staging) +/// - Swapping apply state schemas (pre <-> post, which moves the 'swapped=true' comment to _pre) +async fn execute_atomic_swap( + client: &Client, + deploy_id: &str, + staging_schemas: &BTreeSet<(String, String)>, + staging_clusters: &BTreeSet, +) -> Result<(), CliError> { + let staging_suffix = format!("_{}", deploy_id); + + // Begin transaction for atomic swap + client + .execute("BEGIN", &[]) + .await + .map_err(|e| CliError::SqlExecutionFailed { + statement: "BEGIN".to_string(), + source: e, + })?; + + // Swap schemas + for (database, staging_schema) in staging_schemas { + let prod_schema = staging_schema.trim_end_matches(&staging_suffix); + // Note: second schema name is NOT fully qualified (same database) + let swap_sql = format!( + "ALTER SCHEMA \"{}\".\"{}\" SWAP WITH \"{}\";", + database, prod_schema, staging_schema + ); + + verbose!(" {}", swap_sql); + if let Err(e) = client.execute(&swap_sql, &[]).await { + let _ = client.execute("ROLLBACK", &[]).await; + return Err(CliError::SqlExecutionFailed { + statement: swap_sql, + source: e, + }); + } + } + + // Swap clusters + for cluster in staging_clusters { + let staging_cluster = format!("{}{}", cluster, staging_suffix); + let swap_sql = format!( + "ALTER CLUSTER \"{}\" SWAP WITH \"{}\";", + cluster, staging_cluster + ); + + verbose!(" {}", swap_sql); + if let Err(e) = client.execute(&swap_sql, &[]).await { + let _ = client.execute("ROLLBACK", &[]).await; + return Err(CliError::SqlExecutionFailed { + statement: swap_sql, + source: e, + }); + } + } + + // Swap the apply state schemas - this atomically marks the swap as complete + // After this swap, apply__pre will have comment 'swapped=true' (it was _post before) + let pre_schema = format!("apply_{}_pre", deploy_id); + let post_schema = format!("apply_{}_post", deploy_id); + // Note: second schema name is NOT fully qualified (same database: _mz_deploy) + let state_swap_sql = format!( + "ALTER SCHEMA _mz_deploy.\"{}\" SWAP WITH \"{}\";", + pre_schema, post_schema + ); + + verbose!(" {}", state_swap_sql); + if let Err(e) = client.execute(&state_swap_sql, &[]).await { + let _ = client.execute("ROLLBACK", &[]).await; + return Err(CliError::SqlExecutionFailed { + statement: state_swap_sql, + source: e, + }); + } + + // Commit transaction + client + .execute("COMMIT", &[]) + .await + .map_err(|e| CliError::SqlExecutionFailed { + statement: "COMMIT".to_string(), + source: e, + })?; + + verbose!("Swap completed successfully"); + Ok(()) +} + +/// Execute pending sink statements (created after swap). +/// +/// Sinks are created in production after the swap because they immediately +/// start writing to external systems. Like tables, sinks are only created if +/// they don't already exist - the hash is ignored. +async fn execute_pending_sinks(client: &Client, deploy_id: &str) -> Result<(), CliError> { + let pending = client.get_pending_statements(deploy_id).await?; + + if pending.is_empty() { + verbose!("No pending sinks to execute"); + return Ok(()); + } + + // Build set of sink ObjectIds from pending statements + let sink_ids: BTreeSet = pending + .iter() + .map(|stmt| ObjectId { + database: stmt.database.clone(), + schema: stmt.schema.clone(), + object: stmt.object.clone(), + }) + .collect(); + + // Check which sinks already exist (like tables, skip existing ones) + let existing_sinks = client.check_sinks_exist(&sink_ids).await?; + + // Filter to only sinks that don't exist + let sinks_to_create: Vec<_> = pending + .iter() + .filter(|stmt| { + let obj_id = ObjectId { + database: stmt.database.clone(), + schema: stmt.schema.clone(), + object: stmt.object.clone(), + }; + !existing_sinks.contains(&obj_id) + }) + .collect(); + + // Log skipped sinks + if !existing_sinks.is_empty() { + println!("\nSinks that already exist (skipping):"); + let mut existing_list: Vec<_> = existing_sinks.iter().collect(); + existing_list.sort_by_key(|obj| (&obj.database, &obj.schema, &obj.object)); + for sink_id in existing_list { + println!( + " - {}.{}.{}", + sink_id.database, sink_id.schema, sink_id.object + ); + } + } + + // If all sinks exist, exit early + if sinks_to_create.is_empty() { + if !existing_sinks.is_empty() { + println!( + "\nAll {} sink(s) already exist. Nothing to create.", + sink_ids.len() + ); + } + return Ok(()); + } + + println!("\nCreating {} sink(s)...", sinks_to_create.len()); + + for stmt in sinks_to_create { + verbose!( + "Creating sink {}.{}.{}...", + stmt.database, + stmt.schema, + stmt.object + ); + + // Execute the sink creation statement + if let Err(e) = client.execute(&stmt.statement_sql, &[]).await { + // Log the error - the statement will remain unexecuted for retry + eprintln!( + "Error creating sink {}.{}.{}: {}", + stmt.database, stmt.schema, stmt.object, e + ); + return Err(CliError::SqlExecutionFailed { + statement: stmt.statement_sql.clone(), + source: e, + }); + } + + // Mark the statement as executed + client + .mark_statement_executed(deploy_id, stmt.sequence_num) + .await?; + + println!(" ✓ {}.{}.{}", stmt.database, stmt.schema, stmt.object); + } + + Ok(()) +} + +/// Repoint sinks that depend on objects in schemas about to be dropped. +/// +/// After the swap, old production objects are in schemas with the staging suffix. +/// Before dropping those schemas, we need to ALTER SINK any sinks that depend +/// on those objects to point to the new production objects instead. +/// +/// This prevents sinks from being transitively dropped by CASCADE when the +/// old schemas are dropped. +async fn repoint_dependent_sinks( + client: &Client, + staging_schemas: &BTreeSet<(String, String)>, + staging_suffix: &str, +) -> Result<(), CliError> { + // Build list of old schema names (database, old_schema_with_suffix) + // After swap, old production schemas have the staging suffix + let old_schemas: Vec<(String, String)> = staging_schemas + .iter() + .map(|(db, staging_schema)| { + let prod_schema = staging_schema.trim_end_matches(staging_suffix); + let old_schema = format!("{}{}", prod_schema, staging_suffix); + (db.clone(), old_schema) + }) + .collect(); + + // Find sinks depending on objects in old schemas + let dependent_sinks = client + .find_sinks_depending_on_schemas(&old_schemas) + .await + .map_err(CliError::Connection)?; + + if dependent_sinks.is_empty() { + verbose!("No sinks depend on objects in schemas being dropped"); + return Ok(()); + } + + println!( + "\nRepointing {} sink(s) to new upstream objects...", + dependent_sinks.len() + ); + + for sink in dependent_sinks { + // Compute new schema name (strip suffix to get production schema name) + let new_schema = sink.dependency_schema.trim_end_matches(staging_suffix); + + // Check if replacement object exists in new schema + let replacement_exists = client + .object_exists(&sink.dependency_database, new_schema, &sink.dependency_name) + .await + .map_err(CliError::Connection)?; + + if !replacement_exists { + return Err(CliError::SinkRepointFailed { + sink: format!( + "{}.{}.{}", + sink.sink_database, sink.sink_schema, sink.sink_name + ), + reason: format!( + "replacement object {}.{}.{} does not exist", + sink.dependency_database, new_schema, sink.dependency_name + ), + }); + } + + // Execute ALTER SINK ... SET FROM + let alter_sql = format!( + r#"ALTER SINK "{}"."{}"."{}". SET FROM "{}"."{}"."{}""#, + sink.sink_database, + sink.sink_schema, + sink.sink_name, + sink.dependency_database, + new_schema, + sink.dependency_name + ); + + verbose!(" {}", alter_sql); + if let Err(e) = client.execute(&alter_sql, &[]).await { + return Err(CliError::SinkRepointFailed { + sink: format!( + "{}.{}.{}", + sink.sink_database, sink.sink_schema, sink.sink_name + ), + reason: e.to_string(), + }); + } + + println!( + " {} {}.{}.{} -> {}.{}.{}", + "✓".green(), + sink.sink_database, + sink.sink_schema, + sink.sink_name, + sink.dependency_database, + new_schema, + sink.dependency_name + ); + } + + Ok(()) +} + +/// Drop old production resources after the swap. +/// +/// After the swap, old production objects now have the staging suffix. +/// This function drops them to clean up. +async fn drop_old_resources( + client: &Client, + staging_schemas: &BTreeSet<(String, String)>, + staging_clusters: &BTreeSet, + staging_suffix: &str, +) { + // Drop schemas + for (database, staging_schema) in staging_schemas { + let prod_schema = staging_schema.trim_end_matches(staging_suffix); + // After swap, the old production schema is now named with the staging suffix + let old_schema = format!("{}{}", prod_schema, staging_suffix); + let drop_sql = format!( + "DROP SCHEMA IF EXISTS \"{}\".\"{}\" CASCADE;", + database, old_schema + ); + + verbose!(" {}", drop_sql); + if let Err(e) = client.execute(&drop_sql, &[]).await { + eprintln!( + "warning: failed to drop old schema {}.{}: {}", + database, old_schema, e + ); + } + } + + // Drop clusters + for cluster in staging_clusters { + // After swap, the old production cluster is now named with the staging suffix + let old_cluster = format!("{}{}", cluster, staging_suffix); + let drop_sql = format!("DROP CLUSTER IF EXISTS \"{}\" CASCADE;", old_cluster); + + verbose!(" {}", drop_sql); + if let Err(e) = client.execute(&drop_sql, &[]).await { + eprintln!("warning: failed to drop old cluster {}: {}", old_cluster, e); + } + } +} diff --git a/src/mz-deploy/src/cli/commands/compile.rs b/src/mz-deploy/src/cli/commands/compile.rs new file mode 100644 index 0000000000000..f62b24269d849 --- /dev/null +++ b/src/mz-deploy/src/cli/commands/compile.rs @@ -0,0 +1,331 @@ +//! Compile command - validate project and show deployment plan. + +use crate::cli::CliError; +use crate::project::object_id::ObjectId; +use crate::utils::progress; +use crate::{project, verbose}; +use std::path::Path; +use std::time::{Duration, Instant}; + +/// Arguments for the compile command +#[derive(Debug, Clone)] +pub struct CompileArgs { + /// Enable type checking with Docker + pub typecheck: bool, + /// Docker image to use for type checking + pub docker_image: Option, +} + +impl Default for CompileArgs { + fn default() -> Self { + Self { + typecheck: true, + docker_image: None, + } + } +} + +/// Compile and validate the project, showing the deployment plan. +/// +/// This command: +/// - Loads and parses SQL files from the project directory +/// - Validates the project structure and dependencies +/// - Performs optional type checking with Docker +/// - Displays the deployment plan including dependencies and SQL statements +/// +/// # Arguments +/// * `directory` - Project root directory +/// * `args` - Compile command arguments +/// +/// # Returns +/// Compiled planned project ready for deployment +/// +/// # Errors +/// Returns `CliError::Project` if compilation or validation fails +pub async fn run( + directory: &Path, + args: CompileArgs, +) -> Result { + let start_time = Instant::now(); + + println!("Loading project from: {}", directory.display()); + + // Stage 1: Parse and validate SQL files + progress::stage_start("Parsing SQL files"); + let parse_start = Instant::now(); + let planned_project = project::plan(directory)?; + let parse_duration = parse_start.elapsed(); + + // Count objects and schemas + let object_count: usize = planned_project + .databases + .iter() + .flat_map(|db| &db.schemas) + .map(|schema| schema.objects.len()) + .sum(); + let schema_count: usize = planned_project + .databases + .iter() + .map(|db| db.schemas.len()) + .sum(); + + progress::stage_success( + &format!("Found {} objects in {} schemas", object_count, schema_count), + parse_duration, + ); + + // Stage 2: Validate project structure + progress::stage_start("Validating project structure"); + let validate_start = Instant::now(); + + // Topological sort validates the project (detects cycles) + let sorted = planned_project.topological_sort()?; + let validate_duration = validate_start.elapsed(); + + progress::stage_success( + &format!("All {} objects validated", sorted.len()), + validate_duration, + ); + + // Stage 3: Build dependency graph + progress::stage_start("Building dependency graph"); + let deps_start = Instant::now(); + + // Count internal dependencies (excluding external) + let internal_dep_count: usize = planned_project + .dependency_graph + .values() + .map(|deps| { + deps.iter() + .filter(|dep| !planned_project.external_dependencies.contains(dep)) + .count() + }) + .sum(); + + let deps_duration = deps_start.elapsed(); + progress::stage_success( + &format!("Resolved {} dependencies", internal_dep_count), + deps_duration, + ); + + // Show additional info + if !planned_project.external_dependencies.is_empty() { + progress::info(&format!( + "{} external dependencies detected", + planned_project.external_dependencies.len() + )); + } + if !planned_project.cluster_dependencies.is_empty() { + progress::info(&format!( + "{} clusters required", + planned_project.cluster_dependencies.len() + )); + } + + // Type checking with Docker if enabled + if args.typecheck { + let typecheck_duration = + typecheck_with_docker(directory, &planned_project, args.docker_image, object_count) + .await?; + + if let Some(duration) = typecheck_duration { + progress::stage_success(&format!("{} objects passed", object_count), duration); + } + } + + // Show verbose details if requested + if crate::utils::log::verbose_enabled() { + print_verbose_details(&planned_project, &sorted); + } + + // Final summary + let total_duration = start_time.elapsed(); + progress::summary("Project successfully compiled", total_duration); + + Ok(planned_project) +} + +/// Perform type checking using Docker +async fn typecheck_with_docker( + directory: &Path, + planned_project: &project::planned::Project, + docker_image: Option, + _object_count: usize, +) -> Result, CliError> { + use crate::types::{TypeCheckError, typecheck_with_client}; + use crate::utils::docker_runtime::DockerRuntime; + + progress::stage_start("Type checking with Docker"); + let typecheck_start = Instant::now(); + + // Load types.lock if it exists + let types = crate::types::load_types_lock(directory).unwrap_or_else(|_| { + println!("No types.lock found, assuming no external dependencies"); + println!("See gen-data-contracts for more information"); + crate::types::Types { + version: 1, + objects: std::collections::BTreeMap::new(), + } + }); + + // Create Docker runtime + let mut runtime = DockerRuntime::new(); + if let Some(image) = docker_image { + runtime = runtime.with_image(image); + } + + // Get connected client with staged dependencies + let mut client = match runtime.get_client(&types).await { + Ok(client) => client, + Err(TypeCheckError::ContainerStartFailed(e)) => { + // Docker not available, warn but don't fail + progress::info(&format!("Docker not available: {}", e)); + progress::info("Type checking skipped. Install Docker to enable type checking."); + return Ok(None); + } + Err(e) => { + return Err(e.into()); + } + }; + + // Run type checking + match typecheck_with_client(&mut client, planned_project, directory).await { + Ok(()) => { + let duration = typecheck_start.elapsed(); + Ok(Some(duration)) + } + Err(e) => { + // Real type checking errors + Err(e.into()) + } + } +} + +/// Print verbose details about the project (only shown with VERBOSE env var) +fn print_verbose_details(planned_project: &project::planned::Project, sorted: &[ObjectId]) { + // Display external dependencies + if !planned_project.external_dependencies.is_empty() { + verbose!("\nExternal Dependencies (not defined in this project):"); + let mut external: Vec<_> = planned_project.external_dependencies.iter().collect(); + external.sort(); + for dep in external { + verbose!(" - {}", dep); + } + } + + // Display cluster dependencies + if !planned_project.cluster_dependencies.is_empty() { + verbose!("\nCluster Dependencies:"); + let mut clusters: Vec<_> = planned_project.cluster_dependencies.iter().collect(); + clusters.sort_by_key(|c| &c.name); + for cluster in clusters { + verbose!(" - {}", cluster.name); + } + } + + // Display dependency graph + verbose!("\nDependency Graph:"); + for (object_id, deps) in &planned_project.dependency_graph { + if !deps.is_empty() { + verbose!(" {} depends on:", object_id); + for dep in deps { + // Mark external dependencies + if planned_project.external_dependencies.contains(dep) { + verbose!(" - {} (external)", dep); + } else { + verbose!(" - {}", dep); + } + } + } + } + + // Display deployment order + verbose!("\nDeployment order:"); + for (idx, object_id) in sorted.iter().enumerate() { + verbose!(" {}. {}", idx + 1, object_id); + } + + // Display module statements + let mod_stmts = planned_project.iter_mod_statements(); + if !mod_stmts.is_empty() { + verbose!("\nModule Setup Statements:"); + for (idx, mod_stmt) in mod_stmts.iter().enumerate() { + match mod_stmt { + project::ModStatement::Database { + database, + statement, + } => { + verbose!(" {}. Database {}: {}", idx + 1, database, statement); + } + project::ModStatement::Schema { + database, + schema, + statement, + } => { + verbose!( + " {}. Schema {}.{}: {}", + idx + 1, + database, + schema, + statement + ); + } + } + } + } + + // Display full SQL + verbose!("\nSQL Deployment Plan (fully qualified)"); + + for (idx, mod_stmt) in mod_stmts.iter().enumerate() { + match mod_stmt { + project::ModStatement::Database { + database, + statement, + } => { + verbose!("-- Module Setup {}: Database {}", idx + 1, database); + verbose!("{};", statement); + verbose!(); + } + project::ModStatement::Schema { + database, + schema, + statement, + } => { + verbose!( + "-- Module Setup {}: Schema {}.{}", + idx + 1, + database, + schema + ); + verbose!("{};", statement); + verbose!(); + } + } + } + + // Print objects in deployment order + if let Ok(objects) = planned_project.get_sorted_objects() { + for (idx, (object_id, typed_obj)) in objects.iter().enumerate() { + verbose!("-- Step {}: {}", idx + 1, object_id); + verbose!("{};", typed_obj.stmt); + + // Print indexes for this object + for index in &typed_obj.indexes { + verbose!("{};", index); + } + + // Print grants for this object + for grant in &typed_obj.grants { + verbose!("{};", grant); + } + + // Print comments for this object + for comment in &typed_obj.comments { + verbose!("{};", comment); + } + + verbose!(); + } + } +} diff --git a/src/mz-deploy/src/cli/commands/create_tables.rs b/src/mz-deploy/src/cli/commands/create_tables.rs new file mode 100644 index 0000000000000..fa43060d20250 --- /dev/null +++ b/src/mz-deploy/src/cli/commands/create_tables.rs @@ -0,0 +1,262 @@ +//! Create tables command - create tables that don't exist in the database. + +use crate::cli::{CliError, helpers}; +use crate::client::{Client, Profile}; +use crate::project::ast::Statement; +use crate::utils::git; +use crate::{project, verbose}; +use chrono::Utc; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::Path; + +/// Create tables that don't exist in the database. +/// +/// This command: +/// - Queries the database to find which tables already exist +/// - Creates only tables that don't exist (no IF NOT EXISTS needed) +/// - Creates schemas if they don't exist +/// - Deploys only CREATE TABLE and CREATE TABLE FROM SOURCE statements +/// - Deploys associated indexes, grants, and comments +/// - Tracks deployment under a deploy ID +/// - Only records tables that were actually created +/// +/// # Arguments +/// * `profile` - Database profile containing connection information +/// * `directory` - Project root directory +/// * `deploy_id` - Optional deploy ID (defaults to random 7-char hex) +/// * `allow_dirty` - Allow deploying with uncommitted changes +/// * `dry_run` - If true, print SQL instead of executing +/// +/// # Returns +/// Ok(()) if deployment succeeds +/// +/// # Errors +/// Returns various `CliError` variants for different failure modes +pub async fn run( + profile: &Profile, + directory: &Path, + deploy_id: Option<&str>, + allow_dirty: bool, + dry_run: bool, +) -> Result<(), CliError> { + // Check for uncommitted changes before proceeding + if !allow_dirty && git::is_dirty(directory) { + return Err(CliError::GitDirty); + } + + // Determine deploy ID (use provided name or random 7-char hex) + let deploy_id = match deploy_id { + Some(name) => name.to_string(), + None => helpers::generate_random_env_name(), + }; + + if dry_run { + println!("-- DRY RUN: The following SQL would be executed --\n"); + } else { + println!("Creating tables in deployment: {}", deploy_id); + } + + // Compile the project first (skip type checking since we're deploying) + let compile_args = super::compile::CompileArgs { + typecheck: false, // Skip type checking for create-tables + docker_image: None, + }; + let planned_project = super::compile::run(directory, compile_args).await?; + + // Connect to the database + let mut client = Client::connect_with_profile(profile.clone()) + .await + .map_err(CliError::Connection)?; + + (client).validate_privileges(&planned_project).await?; + client.validate_cluster_isolation(&planned_project).await?; + client.validate_sources_exist(&planned_project).await?; + verbose!("Validation successful"); + + project::deployment_snapshot::initialize_deployment_table(&client).await?; + + // Validate deployment doesn't already exist + let existing_metadata = client.get_deployment_metadata(&deploy_id).await?; + if existing_metadata.is_some() { + return Err(CliError::InvalidEnvironmentName { + name: format!("deployment '{}' already exists", deploy_id), + }); + } + + // Filter to only table objects (CreateTable and CreateTableFromSource) + let table_object_ids: BTreeSet = planned_project + .iter_objects() + .filter(|obj| { + matches!( + obj.typed_object.stmt, + Statement::CreateTable(_) | Statement::CreateTableFromSource(_) + ) + }) + .map(|obj| obj.id.clone()) + .collect(); + + if table_object_ids.is_empty() { + println!("No tables found in project"); + return Ok(()); + } + + // Get sorted table objects (respecting dependencies) + let table_objects = planned_project.get_sorted_objects_filtered(&table_object_ids)?; + + println!("Found {} table(s) in project", table_objects.len()); + + // Query which tables already exist + let existing_tables = client.check_tables_exist(&table_object_ids).await?; + + // Filter to only tables that don't exist + let tables_to_create: Vec<_> = table_objects + .into_iter() + .filter(|(obj_id, _)| !existing_tables.contains(obj_id)) + .collect(); + + // Show what's being skipped + if !existing_tables.is_empty() { + println!("\nTables that already exist (skipping):"); + let mut existing_list: Vec<_> = existing_tables.iter().collect(); + existing_list.sort_by_key(|obj| (&obj.database, &obj.schema, &obj.object)); + for table_id in existing_list { + println!( + " - {}.{}.{}", + table_id.database, table_id.schema, table_id.object + ); + } + } + + // If all tables exist, exit early + if tables_to_create.is_empty() { + println!( + "\nAll {} table(s) already exist. Nothing to create.", + table_object_ids.len() + ); + return Ok(()); + } + + println!("\nCreating {} new table(s)...", tables_to_create.len()); + + // Collect all schemas that contain tables to create + let mut table_schemas = BTreeMap::new(); + for (object_id, _) in &tables_to_create { + table_schemas.insert( + (object_id.database.clone(), object_id.schema.clone()), + crate::client::DeploymentKind::Tables, + ); + } + + // Create executor with dry-run mode + let executor = helpers::DeploymentExecutor::with_dry_run(&client, dry_run); + + // Create schemas and execute their mod statements + if !table_schemas.is_empty() { + if !dry_run { + println!("Preparing schemas..."); + } else { + println!("-- Create schemas --"); + } + + for ((database, schema), _kind) in &table_schemas { + verbose!("Creating schema {}.{} if not exists", database, schema); + let create_schema_sql = format!("CREATE SCHEMA IF NOT EXISTS {}.{}", database, schema); + executor.execute_sql(&create_schema_sql).await?; + } + + // Execute schema mod statements for schemas that contain tables + for mod_stmt in planned_project.iter_mod_statements() { + match mod_stmt { + project::ModStatement::Database { + database, + statement, + } => { + // Check if any schema in this database contains tables + let has_tables = table_schemas.keys().any(|(db, _)| db == database); + if has_tables { + verbose!("Applying database setup for: {}", database); + executor.execute_sql(statement).await?; + } + } + project::ModStatement::Schema { + database, + schema, + statement, + } => { + if table_schemas.contains_key(&(database.to_string(), schema.to_string())) { + verbose!("Applying schema setup for: {}.{}", database, schema); + executor.execute_sql(statement).await?; + } + } + } + } + } + + if dry_run { + println!("-- Create tables --"); + } + + // Execute table statements (only for tables that don't exist) + let mut success_count = 0; + + for (idx, (object_id, typed_obj)) in tables_to_create.iter().enumerate() { + verbose!( + "Creating {}/{}: {}", + idx + 1, + tables_to_create.len(), + object_id + ); + + // Execute the table statement along with indexes, grants, and comments + executor.execute_object(typed_obj).await?; + + if !dry_run { + println!( + " ✓ {}.{}.{}", + object_id.database, object_id.schema, object_id.object + ); + } + success_count += 1; + } + + // Skip deployment tracking in dry-run mode + if !dry_run { + // Build snapshot for deployment tracking - only include tables that were created + let mut snapshot_objects = BTreeMap::new(); + for (object_id, typed_obj) in &tables_to_create { + let hash = project::deployment_snapshot::compute_typed_hash(typed_obj); + snapshot_objects.insert(object_id.clone(), hash); + } + + let new_snapshot = project::deployment_snapshot::DeploymentSnapshot { + objects: snapshot_objects, + schemas: table_schemas.clone(), // Already contains only schemas with tables + }; + + // Collect deployment metadata + let metadata = helpers::collect_deployment_metadata(&client, directory).await; + + // Write deployment state to database (promoted deployment) + let now = Utc::now(); + project::deployment_snapshot::write_to_database( + &client, + &new_snapshot, + &deploy_id, + &metadata, + Some(now), + ) + .await?; + + println!("\n✓ Successfully created {} new table(s)", success_count); + if !existing_tables.is_empty() { + println!( + " Skipped {} table(s) that already existed", + existing_tables.len() + ); + } + } else { + println!("-- End of dry run ({} statement(s)) --", success_count); + } + + Ok(()) +} diff --git a/src/mz-deploy/src/cli/commands/debug.rs b/src/mz-deploy/src/cli/commands/debug.rs new file mode 100644 index 0000000000000..65ff3c6e2db53 --- /dev/null +++ b/src/mz-deploy/src/cli/commands/debug.rs @@ -0,0 +1,56 @@ +//! Debug command - test database connection. + +use crate::cli::CliError; +use crate::client::{Client, Profile}; +use crossterm::style::Stylize; +use owo_colors::OwoColorize; + +/// Test database connection with the specified profile. +/// +/// # Arguments +/// * `profile` - Database profile containing connection information +/// +/// # Returns +/// Ok(()) if connection succeeds +/// +/// # Errors +/// Returns `CliError::Connection` if connection fails +pub async fn run(profile: &Profile) -> Result<(), CliError> { + let profile_display = profile.name.as_str(); + println!("{}: {}", "Profile".green(), profile_display.cyan()); + + let client = Client::connect_with_profile(profile.clone()) + .await + .map_err(CliError::Connection)?; + + let row = client + .query_one( + r#" + SELECT + mz_version() AS version, + mz_environment_id() AS environment_id, + current_role() as role"#, + &[], + ) + .await?; + + let version: String = row.get("version"); + let environment_id: String = row.get("environment_id"); + let role: String = row.get("role"); + + let row = client.query_one("show cluster", &[]).await?; + let cluster: String = row.get("cluster"); + + println!( + "{} {}:{}", + "Connected to".green(), + profile.host.to_string().cyan(), + profile.port.to_string().cyan() + ); + println!(" {}: {}", "Environment".dimmed(), environment_id); + println!(" {}: {}", "Cluster".dimmed(), cluster); + println!(" {}: {}", "Version".dimmed(), version); + println!(" {}: {}", "Role".dimmed(), role.yellow()); + + Ok(()) +} diff --git a/src/mz-deploy/src/cli/commands/deployments.rs b/src/mz-deploy/src/cli/commands/deployments.rs new file mode 100644 index 0000000000000..d5eb08a5d8cf9 --- /dev/null +++ b/src/mz-deploy/src/cli/commands/deployments.rs @@ -0,0 +1,123 @@ +//! Deployments command - list active staging deployments. + +use crate::cli::CliError; +use crate::client::{Client, Profile}; +use chrono::Utc; +use owo_colors::OwoColorize; + +/// List all active staging deployments. +/// +/// This command: +/// - Queries all deployments where promoted_at IS NULL (staging only) +/// - Groups results by environment name +/// - Displays schemas in each staging environment with deployment metadata +/// +/// Similar to `git branch` - shows active development branches. +/// +/// # Arguments +/// * `profile` - Database profile containing connection information +/// * `allowed_lag_secs` - Maximum allowed lag in seconds before marking as "lagging" +/// +/// # Returns +/// Ok(()) if listing succeeds +/// +/// # Errors +/// Returns `CliError::Connection` for database errors +pub async fn run(profile: &Profile, allowed_lag_secs: i64) -> Result<(), CliError> { + let client = Client::connect_with_profile(profile.clone()) + .await + .map_err(CliError::Connection)?; + + client.create_deployments().await?; + let deployments = client.list_staging_deployments().await?; + + if deployments.is_empty() { + println!("No active staging deployments."); + println!(); + println!("To create a staging deployment, run:"); + println!(" {} {} {}", "mz-deploy".cyan(), "stage".cyan(), ".".cyan()); + return Ok(()); + } + + println!("Active staging deployments:"); + println!(); + + let mut env_names: Vec<_> = deployments.keys().collect(); + env_names.sort(); + + for env_name in env_names { + let deployment = &deployments[env_name]; + + // Format timestamp + let now = Utc::now(); + let duration = now.signed_duration_since(deployment.deployed_at); + let timestamp = if duration.num_seconds() < 0 { + "recently".to_string() + } else { + let hours = duration.num_hours(); + if hours < 1 { + let minutes = duration.num_minutes(); + format!("{} minutes ago", minutes) + } else if hours < 24 { + format!("{} hours ago", hours) + } else { + let days = hours / 24; + format!("{} days ago", days) + } + }; + + println!( + " {} {} by {} {} [{}]", + "●".green(), + env_name.cyan().bold(), + deployment.deployed_by.yellow(), + format!("({})", timestamp).dimmed(), + deployment.kind.to_string().dimmed() + ); + + // Display commit if available + if let Some(commit_sha) = &deployment.git_commit { + println!(" commit: {}", commit_sha.dimmed()); + } + + // Get hydration status for this deployment + match client + .get_deployment_hydration_status_with_lag(env_name, allowed_lag_secs) + .await + { + Ok(hydration_status) if !hydration_status.is_empty() => { + use crate::client::ClusterDeploymentStatus; + let mut ready_count = 0i64; + #[allow(clippy::as_conversions)] + let total_clusters = hydration_status.len() as i64; + + for ctx in &hydration_status { + if matches!(ctx.status, ClusterDeploymentStatus::Ready) { + ready_count += 1; + } + } + + let text = if ready_count == total_clusters { + "clusters: all ready".to_string() + } else { + format!("clusters: {} of {} ready", ready_count, total_clusters) + }; + println!(" {}\n", text.blue()); + } + Ok(_) => { + // Empty hydration status - deployment has no clusters + // Don't display anything + } + Err(_) => { + // Error getting hydration status - don't block display, just skip + } + } + + for (database, schema) in &deployment.schemas { + println!(" {}.{}", database.dimmed(), schema); + } + println!(); + } + + Ok(()) +} diff --git a/src/mz-deploy/src/cli/commands/describe.rs b/src/mz-deploy/src/cli/commands/describe.rs new file mode 100644 index 0000000000000..314b71f362554 --- /dev/null +++ b/src/mz-deploy/src/cli/commands/describe.rs @@ -0,0 +1,104 @@ +//! Describe command - show detailed information about a specific deployment. + +use crate::cli::CliError; +use crate::client::{Client, DeploymentKind, Profile}; +use chrono::{DateTime, Local}; +use owo_colors::OwoColorize; + +/// Show detailed information about a specific deployment. +/// +/// This command: +/// - Queries deployment metadata (when deployed, by whom, git commit, etc.) +/// - Lists all objects included in the deployment with their hashes +/// +/// Use `mz-deploy history` to see a list of deployment IDs, then use this +/// command to drill into a specific deployment's details. +/// +/// # Arguments +/// * `profile` - Database profile containing connection information +/// * `deploy_id` - The deployment ID to describe +/// +/// # Returns +/// Ok(()) if the deployment is found and displayed +/// +/// # Errors +/// Returns `CliError::Connection` for database errors +/// Returns `CliError::Message` if deployment is not found +pub async fn run(profile: &Profile, deploy_id: &str) -> Result<(), CliError> { + let client = Client::connect_with_profile(profile.clone()) + .await + .map_err(CliError::Connection)?; + + client.create_deployments().await?; + + // Get deployment metadata + let details = client.get_deployment_details(deploy_id).await?; + let Some(details) = details else { + return Err(CliError::Message(format!( + "Deployment '{}' not found", + deploy_id + ))); + }; + + // Get deployment objects + let snapshot = client.get_deployment_objects(Some(deploy_id)).await?; + + // Display deployment header + println!( + "{} {} [{}]", + "deployment".yellow().bold(), + deploy_id.cyan(), + details.kind.to_string().dimmed() + ); + + if let Some(commit_sha) = &details.git_commit { + println!("{}: {}", "Commit".dimmed(), commit_sha); + } + + println!( + "{}: {}", + "Deployed by".dimmed(), + details.deployed_by.yellow() + ); + + let deployed_datetime: DateTime = details.deployed_at.with_timezone(&Local); + let deployed_str = deployed_datetime + .format("%a %b %d %H:%M:%S %Y %z") + .to_string(); + println!("{}: {}", "Deployed at".dimmed(), deployed_str); + + if let Some(promoted) = details.promoted_at { + if details.kind == DeploymentKind::Objects { + let promoted_datetime: DateTime = promoted.with_timezone(&Local); + let promoted_str = promoted_datetime + .format("%a %b %d %H:%M:%S %Y %z") + .to_string(); + println!("{}: {}", "Promoted at".dimmed(), promoted_str); + } + } else { + println!("{}: {}", "Status".dimmed(), "staging".yellow()); + } + + println!(); + + // Display schemas + println!("{} ({}):", "Schemas".bold(), details.schemas.len()); + for (database, schema) in &details.schemas { + println!(" {}.{}", database.dimmed(), schema); + } + println!(); + + // Display objects + println!("{} ({}):", "Objects".bold(), snapshot.objects.len()); + for (object_id, hash) in &snapshot.objects { + println!( + " {}.{}.{} {}", + object_id.database.dimmed(), + object_id.schema.dimmed(), + object_id.object, + hash.chars().take(12).collect::().dimmed() + ); + } + + Ok(()) +} diff --git a/src/mz-deploy/src/cli/commands/gen_data_contracts.rs b/src/mz-deploy/src/cli/commands/gen_data_contracts.rs new file mode 100644 index 0000000000000..798cf77f53bed --- /dev/null +++ b/src/mz-deploy/src/cli/commands/gen_data_contracts.rs @@ -0,0 +1,62 @@ +//! Generate data contracts command - creates types.lock for external dependencies. + +use crate::cli::CliError; +use crate::client::{Client, Profile}; +use crate::project; +use std::path::Path; + +/// Generate data contracts (types.lock) for external dependencies. +/// +/// This command: +/// - Loads and parses the project +/// - Connects to the database +/// - Queries schema information for external dependencies +/// - Writes types.lock file with type information +/// +/// This is useful for: +/// - CI/CD pipelines that need to validate data contracts +/// - External tooling that validates schemas +/// - Developers who want type information without full compile validation +/// +/// # Arguments +/// * `profile` - Database profile containing connection information +/// * `directory` - Project root directory +/// +/// # Returns +/// Ok(()) if types.lock is successfully generated +/// +/// # Errors +/// Returns `CliError::Project` if project loading fails +/// Returns `CliError::Connection` if database connection fails +pub async fn run(profile: &Profile, directory: &Path) -> Result<(), CliError> { + println!("Generating data contracts for external dependencies..."); + + // Connect to the database + let mut client = Client::connect_with_profile(profile.clone()) + .await + .map_err(CliError::Connection)?; + + // Load and plan the project + let planned_project = project::plan(directory)?; + + if planned_project.external_dependencies.is_empty() { + println!("No external dependencies found - types.lock not needed"); + return Ok(()); + } + + println!( + "Found {} external dependencies", + planned_project.external_dependencies.len() + ); + + // Query external types and write types.lock + let types = client.query_external_types(&planned_project).await?; + types.write_types_lock(directory)?; + + println!( + "Successfully generated types.lock with {} object schemas", + types.objects.len() + ); + + Ok(()) +} diff --git a/src/mz-deploy/src/cli/commands/history.rs b/src/mz-deploy/src/cli/commands/history.rs new file mode 100644 index 0000000000000..500d9b7460c9f --- /dev/null +++ b/src/mz-deploy/src/cli/commands/history.rs @@ -0,0 +1,113 @@ +//! History command - show deployment history in chronological order. + +use crate::cli::CliError; +use crate::client::{Client, Profile}; +use chrono::{DateTime, Local}; +use owo_colors::OwoColorize; +use std::io::Write; +use std::process::{Command, Stdio}; + +/// Show deployment history in chronological order (promoted deployments only). +/// +/// This command: +/// - Queries all promoted deployments (promoted_at IS NOT NULL) ordered by promoted_at DESC +/// - Groups deployments by environment and promotion time +/// - Lists all schemas included in each deployment +/// +/// Similar to `git log` - shows historical production deployment activity with +/// each deployment showing the "commit message" (schemas changed). +/// +/// # Arguments +/// * `profile` - Database profile containing connection information +/// * `limit` - Optional limit on number of deployments to show +/// +/// # Returns +/// Ok(()) if listing succeeds +/// +/// # Errors +/// Returns `CliError::Connection` for database errors +pub async fn run(profile: &Profile, limit: Option) -> Result<(), CliError> { + // Connect to database + let client = Client::connect_with_profile(profile.clone()) + .await + .map_err(CliError::Connection)?; + + client.create_deployments().await?; + let history = client.list_deployment_history(limit).await?; + + if history.is_empty() { + println!("No deployment history found."); + println!(); + println!("To create and promote a deployment, run:"); + println!(" {} {} {}", "mz-deploy".cyan(), "stage".cyan(), ".".cyan()); + println!( + " {} {} {}", + "mz-deploy".cyan(), + "apply".cyan(), + "--staging-env ".cyan() + ); + return Ok(()); + } + + // Build output string first (while we still have async context) + let mut output = String::new(); + output.push_str("Deployment history (promoted):\n\n"); + + for entry in history { + // Convert UTC to local time for display + let datetime: DateTime = entry.promoted_at.with_timezone(&Local); + let date_str = datetime.format("%a %b %d %H:%M:%S %Y %z").to_string(); + + // Display deployment header (like a git commit) + output.push_str(&format!( + "{} {} [{}]\n", + "deployment".yellow().bold(), + entry.deploy_id.cyan(), + entry.kind.to_string().dimmed() + )); + if let Some(commit_sha) = &entry.git_commit { + output.push_str(&format!("{}: {}\n", "Commit".dimmed(), commit_sha)); + } + output.push_str(&format!( + "{}: {}\n", + "Promoted by".dimmed(), + entry.deployed_by.yellow() + )); + output.push_str(&format!("{}: {}\n", "Date".dimmed(), date_str)); + output.push('\n'); + + // List all schemas in this deployment (like files in a git commit) + for (database, schema) in &entry.schemas { + output.push_str(&format!(" {}.{}\n", database.dimmed(), schema)); + } + output.push('\n'); + } + + // Display with pager (spawned after async work is complete) + display_with_pager(&output); + + Ok(()) +} + +/// Display output through a pager (less) if available, otherwise print directly. +fn display_with_pager(content: &str) { + // Try to spawn less with flags: + // -R: interpret ANSI color codes + // -F: exit immediately if content fits on one screen + // -X: don't clear screen on exit + if let Ok(mut child) = Command::new("less") + .args(["-RFX"]) + .stdin(Stdio::piped()) + .spawn() + { + if let Some(mut stdin) = child.stdin.take() { + // Write content to less, ignore errors (e.g., broken pipe if user quits early) + let _ = stdin.write_all(content.as_bytes()); + } + // Wait for less to exit + let _ = child.wait(); + } else { + // Fallback: print directly if less isn't available + print!("{}", content); + } +} diff --git a/src/mz-deploy/src/cli/commands/ready.rs b/src/mz-deploy/src/cli/commands/ready.rs new file mode 100644 index 0000000000000..eb057571faacf --- /dev/null +++ b/src/mz-deploy/src/cli/commands/ready.rs @@ -0,0 +1,450 @@ +//! Ready command - wait for staging deployment cluster hydration. + +use crate::cli::CliError; +use crate::client::{ + Client, ClusterDeploymentStatus, ClusterStatusContext, FailureReason, HydrationStatusUpdate, + Profile, +}; +use crossterm::{ + cursor::{Hide, MoveToColumn, MoveUp, Show}, + execute, + style::Stylize, + terminal::{Clear, ClearType}, +}; +use futures::StreamExt; +use owo_colors::OwoColorize; +use std::collections::BTreeMap; +use std::io::{self, Write}; +use std::pin::pin; +use std::time::{Duration, Instant}; + +/// Wait for a staging deployment to become ready by monitoring hydration status. +/// +/// This command: +/// - Validates the staging deployment exists and hasn't been promoted +/// - Subscribes to cluster hydration status +/// - Shows a live dashboard tracking hydration, lag, and health for each cluster +/// - Exits when all clusters are ready or timeout is reached +/// +/// # Arguments +/// * `profile` - Database profile containing connection information +/// * `deploy_id` - Staging deployment ID +/// * `snapshot` - If true, check once and exit; if false, track continuously +/// * `timeout` - Optional timeout in seconds +/// * `allowed_lag_secs` - Maximum allowed lag in seconds before marking as "lagging" +/// +/// # Returns +/// Ok(()) if deployment becomes ready +/// +/// # Errors +/// Returns `CliError::StagingEnvironmentNotFound` if deployment doesn't exist +/// Returns `CliError::StagingAlreadyPromoted` if already promoted +/// Returns `CliError::ReadyTimeout` if timeout is reached +pub async fn run( + profile: &Profile, + deploy_id: &str, + snapshot: bool, + timeout: Option, + allowed_lag_secs: i64, +) -> Result<(), CliError> { + // Connect to database + let mut client = Client::connect_with_profile(profile.clone()) + .await + .map_err(CliError::Connection)?; + // Validate staging deployment exists and is not promoted + let metadata = client.get_deployment_metadata(deploy_id).await?; + match metadata { + Some(meta) if meta.promoted_at.is_some() => { + return Err(CliError::StagingAlreadyPromoted { + name: deploy_id.to_string(), + }); + } + Some(_) => {} + None => { + return Err(CliError::StagingEnvironmentNotFound { + name: deploy_id.to_string(), + }); + } + } + + if snapshot { + // Snapshot mode: query once and display status + run_snapshot(deploy_id, &client, allowed_lag_secs).await + } else { + // Continuous mode: subscribe and track with live dashboard + run_continuous(deploy_id, &mut client, timeout, allowed_lag_secs).await + } +} + +/// Run in snapshot mode: query hydration status once and display. +async fn run_snapshot( + deploy_id: &str, + client: &crate::client::Client, + allowed_lag_secs: i64, +) -> Result<(), CliError> { + let statuses = client + .get_deployment_hydration_status_with_lag(deploy_id, allowed_lag_secs) + .await?; + + if statuses.is_empty() { + println!("No clusters found in staging deployment '{}'", deploy_id); + return Ok(()); + } + + println!(); + println!("{}", format!(" Deployment: {}", deploy_id).cyan().bold()); + println!(); + + let mut has_failures = false; + let mut has_non_ready = false; + + for ctx in &statuses { + print_cluster_status(ctx, allowed_lag_secs); + + match &ctx.status { + ClusterDeploymentStatus::Failing { .. } => has_failures = true, + ClusterDeploymentStatus::Ready => {} + _ => has_non_ready = true, + } + } + + println!(); + print_summary(&statuses); + println!(); + + if has_failures { + Err(CliError::DeploymentFailing { + name: deploy_id.to_string(), + }) + } else if has_non_ready { + Err(CliError::ClustersHydrating) + } else { + println!("{}", " All clusters are ready!".green().bold()); + Ok(()) + } +} + +/// Print status for a single cluster with visual formatting. +fn print_cluster_status(ctx: &ClusterStatusContext, allowed_lag_secs: i64) { + let (status_icon, status_label, status_color) = match &ctx.status { + ClusterDeploymentStatus::Ready => ("✓", "ready", "green"), + ClusterDeploymentStatus::Hydrating { .. } => ("◐", "hydrating", "yellow"), + ClusterDeploymentStatus::Lagging { .. } => ("⚠", "lagging", "yellow"), + ClusterDeploymentStatus::Failing { .. } => ("✗", "failing", "red"), + }; + + // Cluster name header + println!(" {}", ctx.cluster_name.as_str().bold()); + + // Progress bar + let bar = render_progress_bar(ctx.hydrated_count, ctx.total_count, 40); + let progress_str = format!("{}/{} objects", ctx.hydrated_count, ctx.total_count); + + print!(" {} ", bar); + match status_color { + "green" => print!("{} {}", status_icon.green(), status_label.green()), + "yellow" => print!("{} {}", status_icon.yellow(), status_label.yellow()), + "red" => print!("{} {}", status_icon.red(), status_label.red()), + _ => print!("{} {}", status_icon, status_label), + } + println!(" {}", progress_str.dimmed()); + + // Additional context based on status + match &ctx.status { + ClusterDeploymentStatus::Ready => { + println!( + " {} lag: {}s", + "└".dimmed(), + ctx.max_lag_secs.to_string().green() + ); + } + ClusterDeploymentStatus::Hydrating { hydrated, total } => { + #[allow(clippy::as_conversions)] + let pct = if *total > 0 { + (*hydrated as f64 / *total as f64 * 100.0) as u8 + } else { + 0 + }; + println!(" {} {}% complete", "└".dimmed(), pct.to_string().yellow()); + } + ClusterDeploymentStatus::Lagging { max_lag_secs } => { + println!( + " {} lag: {}s (threshold: {}s)", + "└".dimmed(), + max_lag_secs.to_string().yellow().bold(), + allowed_lag_secs + ); + } + ClusterDeploymentStatus::Failing { reason } => { + println!(" {} {}", "└".dimmed(), reason.to_string().red()); + } + } + println!(); +} + +/// Render a Unicode progress bar. +#[allow(clippy::as_conversions)] +fn render_progress_bar(current: i64, total: i64, width: usize) -> String { + if total == 0 { + return format!("[{}]", "░".repeat(width).dimmed()); + } + + let filled = ((current as f64 / total as f64) * width as f64) as usize; + let empty = width.saturating_sub(filled); + + format!( + "[{}{}]", + "█".repeat(filled).cyan(), + "░".repeat(empty).dimmed() + ) +} + +/// Print summary footer with counts. +fn print_summary(statuses: &[ClusterStatusContext]) { + let mut ready = 0; + let mut hydrating = 0; + let mut lagging = 0; + let mut failing = 0; + + for ctx in statuses { + match ctx.status { + ClusterDeploymentStatus::Ready => ready += 1, + ClusterDeploymentStatus::Hydrating { .. } => hydrating += 1, + ClusterDeploymentStatus::Lagging { .. } => lagging += 1, + ClusterDeploymentStatus::Failing { .. } => failing += 1, + } + } + + print!(" "); + let mut parts = Vec::new(); + if ready > 0 { + parts.push(format!("{} ready", ready).green().to_string()); + } + if hydrating > 0 { + parts.push(format!("{} hydrating", hydrating).yellow().to_string()); + } + if lagging > 0 { + parts.push(format!("{} lagging", lagging).yellow().to_string()); + } + if failing > 0 { + parts.push(format!("{} failing", failing).red().to_string()); + } + println!("{}", parts.join(" · ")); +} + +/// Run in continuous mode: subscribe to hydration updates and show live dashboard. +async fn run_continuous( + deploy_id: &str, + client: &mut crate::client::Client, + timeout: Option, + allowed_lag_secs: i64, +) -> Result<(), CliError> { + // Get initial hydration status + let initial_statuses = client + .get_deployment_hydration_status_with_lag(deploy_id, allowed_lag_secs) + .await?; + + if initial_statuses.is_empty() { + println!("No clusters found in staging deployment '{}'", deploy_id); + return Ok(()); + } + + let start_time = Instant::now(); + + // Subscribe to hydration updates and monitor + let monitor_future = monitor_hydration_live( + deploy_id, + client, + initial_statuses, + start_time, + allowed_lag_secs, + ); + + if let Some(secs) = timeout { + match tokio::time::timeout(Duration::from_secs(secs), monitor_future).await { + Ok(result) => result, + Err(_) => Err(CliError::ReadyTimeout { + name: deploy_id.to_string(), + seconds: secs, + }), + } + } else { + monitor_future.await + } +} + +/// Monitor hydration status via SUBSCRIBE and update live dashboard. +async fn monitor_hydration_live( + deploy_id: &str, + client: &mut crate::client::Client, + initial_statuses: Vec, + start_time: Instant, + allowed_lag_secs: i64, +) -> Result<(), CliError> { + let mut stdout = io::stdout(); + let num_clusters = initial_statuses.len(); + + // Build initial state map + let mut cluster_states: BTreeMap = initial_statuses + .into_iter() + .map(|ctx| (ctx.cluster_name.clone(), ctx)) + .collect(); + + // Calculate lines per render (header + per-cluster lines + summary) + // Header: 3 lines, per cluster: 4 lines, summary: 2 lines + let lines_per_render = 3 + (num_clusters * 4) + 2; + + // Initial render + render_dashboard( + &mut stdout, + deploy_id, + &cluster_states, + start_time, + false, + allowed_lag_secs, + )?; + + let stream = client.subscribe_deployment_hydration(deploy_id, allowed_lag_secs); + let mut stream = pin!(stream); + + // Hide cursor during updates + execute!(stdout, Hide).ok(); + + while let Some(result) = stream.next().await { + let update = result.map_err(CliError::Connection)?; + + let status = update_to_status(&update); + + cluster_states.insert( + update.cluster_name.clone(), + ClusterStatusContext { + cluster_name: update.cluster_name, + cluster_id: update.cluster_id, + status, + hydrated_count: update.hydrated_count, + total_count: update.total_count, + max_lag_secs: update.max_lag_secs, + total_replicas: update.total_replicas, + problematic_replicas: update.problematic_replicas, + }, + ); + + // Move cursor up and clear, then re-render + #[allow(clippy::as_conversions)] + let lines = lines_per_render as u16; + execute!(stdout, MoveUp(lines), MoveToColumn(0)).ok(); + for _ in 0..lines_per_render { + execute!(stdout, Clear(ClearType::CurrentLine)).ok(); + println!(); + } + execute!(stdout, MoveUp(lines), MoveToColumn(0)).ok(); + + render_dashboard( + &mut stdout, + deploy_id, + &cluster_states, + start_time, + false, + allowed_lag_secs, + )?; + + let all_ready = cluster_states + .values() + .all(|ctx| matches!(ctx.status, ClusterDeploymentStatus::Ready)); + + if all_ready { + execute!(stdout, Show).ok(); + println!(); + println!("{}", " All clusters are ready!".green().bold()); + return Ok(()); + } + } + + execute!(stdout, Show).ok(); + Ok(()) +} + +/// Convert a HydrationStatusUpdate to a ClusterDeploymentStatus. +fn update_to_status(update: &HydrationStatusUpdate) -> ClusterDeploymentStatus { + match update.status { + ClusterDeploymentStatus::Ready => ClusterDeploymentStatus::Ready, + ClusterDeploymentStatus::Hydrating { .. } => ClusterDeploymentStatus::Hydrating { + hydrated: update.hydrated_count, + total: update.total_count, + }, + ClusterDeploymentStatus::Lagging { .. } => ClusterDeploymentStatus::Lagging { + max_lag_secs: update.max_lag_secs, + }, + ClusterDeploymentStatus::Failing { .. } => { + let reason = match update.failure_reason { + Some(FailureReason::NoReplicas) => FailureReason::NoReplicas, + Some(FailureReason::AllReplicasProblematic { .. }) => { + FailureReason::AllReplicasProblematic { + problematic: update.problematic_replicas, + total: update.total_replicas, + } + } + None => FailureReason::NoReplicas, + }; + ClusterDeploymentStatus::Failing { reason } + } + } +} + +/// Render the live dashboard. +fn render_dashboard( + stdout: &mut io::Stdout, + deploy_id: &str, + cluster_states: &BTreeMap, + start_time: Instant, + _is_update: bool, + allowed_lag_secs: i64, +) -> Result<(), CliError> { + let elapsed = start_time.elapsed(); + let elapsed_str = format_duration(elapsed); + + // Header + println!(); + println!( + "{}", + format!(" mz-deploy ready · deployment: {}", deploy_id) + .cyan() + .bold() + ); + println!(" {} {}", "elapsed:".dimmed(), elapsed_str.dimmed()); + println!(); + + // Sort clusters by name for consistent ordering + let mut cluster_names: Vec<_> = cluster_states.keys().collect(); + cluster_names.sort(); + + // Render each cluster + for name in cluster_names { + if let Some(ctx) = cluster_states.get(name) { + print_cluster_status(ctx, allowed_lag_secs); + } + } + + // Summary + let statuses: Vec<_> = cluster_states.values().cloned().collect(); + print_summary(&statuses); + + stdout.flush().ok(); + Ok(()) +} + +/// Format a duration as human-readable string. +fn format_duration(duration: Duration) -> String { + let secs = duration.as_secs(); + if secs < 60 { + format!("{}s", secs) + } else if secs < 3600 { + let mins = secs / 60; + let secs = secs % 60; + format!("{}m {}s", mins, secs) + } else { + let hours = secs / 3600; + let mins = (secs % 3600) / 60; + format!("{}h {}m", hours, mins) + } +} diff --git a/src/mz-deploy/src/cli/commands/stage.rs b/src/mz-deploy/src/cli/commands/stage.rs new file mode 100644 index 0000000000000..1eff878d6d9dc --- /dev/null +++ b/src/mz-deploy/src/cli/commands/stage.rs @@ -0,0 +1,774 @@ +//! Stage command - deploy to staging environment with renamed schemas and clusters. + +use crate::cli::{CliError, helpers}; +use crate::client::{Client, ClusterConfig, DeploymentKind, PendingStatement, Profile}; +use crate::project::ast::Statement; +use crate::project::changeset::ChangeSet; +use crate::project::object_id::ObjectId; +use crate::project::planned::extract_external_indexes; +use crate::project::typed::FullyQualifiedName; +use crate::project::{self, normalize::NormalizingVisitor}; +use crate::utils::{git, progress}; +use crate::verbose; +use mz_sql_parser::ast::Ident; +use std::collections::BTreeSet; +use std::path::Path; +use std::time::Instant; + +/// Deploy project to staging environment with renamed schemas and clusters. +/// +/// This command implements blue/green deployment by creating staging versions of all +/// schemas and clusters with a suffix (e.g., `public_dev` for staging env "dev"). +/// Objects are deployed to these staging resources, allowing testing without +/// affecting production. Later, `apply --staging-env` can atomically swap staging +/// and production using ALTER SWAP. +/// +/// This command: +/// - Compiles the project (using `compile::run`) +/// - Determines staging environment name (from --name or git SHA) +/// - Creates staging schemas (schema_) for all schemas in the project +/// - Creates staging clusters (cluster_) by cloning production cluster configs +/// - Deploys all objects to staging environment with transformed names +/// - Records deployment metadata for conflict detection +/// +/// # Arguments +/// * `profile` - Database profile containing connection information +/// * `stage_name` - Optional staging environment name (defaults to random 7-char hex) +/// * `directory` - Project root directory +/// * `allow_dirty` - Allow deploying with uncommitted changes +/// * `no_rollback` - Skip automatic rollback on failure (for debugging) +/// * `dry_run` - If true, print SQL instead of executing +/// +/// # Returns +/// Ok(()) if staging deployment succeeds +/// +/// # Errors +/// Returns `CliError::GitDirty` if repository has uncommitted changes and allow_dirty is false +/// Returns `CliError::Connection` for database errors +/// Returns `CliError::Project` for project compilation errors +pub async fn run( + profile: &Profile, + stage_name: Option<&str>, + directory: &Path, + allow_dirty: bool, + no_rollback: bool, + dry_run: bool, +) -> Result<(), CliError> { + let start_time = Instant::now(); + + // Check for uncommitted changes before proceeding + if !allow_dirty && git::is_dirty(directory) { + return Err(CliError::GitDirty); + } + + let stage_name = match stage_name { + Some(name) => name.to_string(), + None => helpers::generate_random_env_name(), + }; + + if dry_run { + println!("-- DRY RUN: The following SQL would be executed --\n"); + } else { + println!("Deploying to staging environment: {}", stage_name); + } + + // Run compile to validate and get the project (skip type checking for staging deployment) + let compile_args = super::compile::CompileArgs { + typecheck: false, // Skip type checking for staging deployment + docker_image: None, + }; + let planned_project = super::compile::run(directory, compile_args).await?; + + let staging_suffix = format!("_{}", stage_name); + + // Connect to the database + let mut client = Client::connect_with_profile(profile.clone()) + .await + .map_err(CliError::Connection)?; + + // Stage 1: Analyze project changes + progress::stage_start("Analyzing project changes"); + let analyze_start = Instant::now(); + + // Initialize deployment tracking infrastructure + project::deployment_snapshot::initialize_deployment_table(&client).await?; + + // Validate deployment doesn't already exist + let existing_metadata = client.get_deployment_metadata(&stage_name).await?; + + if existing_metadata.is_some() { + return Err(CliError::InvalidEnvironmentName { + name: format!("deployment '{}' already exists", stage_name), + }); + } + + // Build new snapshot from current planned project + let new_snapshot = project::deployment_snapshot::build_snapshot_from_planned(&planned_project)?; + + // Load PRODUCTION deployment state for comparison (environment=None) + // Stage always compares against production, not against previous staging deployments + let production_snapshot = + project::deployment_snapshot::load_from_database(&client, None).await?; + + let change_set = if !production_snapshot.objects.is_empty() { + Some(ChangeSet::from_deployment_snapshot_comparison( + &production_snapshot, + &new_snapshot, + &planned_project, + )) + } else { + None + }; + + let objects = if let Some(ref cs) = change_set { + if cs.is_empty() { + progress::info("No changes detected compared to production, skipping deployment"); + return Ok(()); + } + + verbose!("{}", cs); + planned_project.get_sorted_objects_filtered(&cs.objects_to_deploy)? + } else { + verbose!("Full deployment: no production deployment found"); + planned_project.get_sorted_objects()? + }; + + // Separate tables, sinks, and other objects + // - Tables: filter out (use create-tables command for those) + // - Sinks: store for deferred execution during apply (they write to external systems) + // - Other objects: deploy to staging + let objects_before_filter = objects.len(); + let mut sinks: Vec<_> = Vec::new(); + let objects: Vec<_> = objects + .into_iter() + .filter(|(object_id, typed_obj)| { + match &typed_obj.stmt { + Statement::CreateTable(_) | Statement::CreateTableFromSource(_) => false, + Statement::CreateSink(_) => { + // Collect sinks for deferred execution + sinks.push((object_id.clone(), *typed_obj)); + false + } + _ => true, + } + }) + .collect(); + + let table_count = objects_before_filter - objects.len() - sinks.len(); + if table_count > 0 { + verbose!( + "Skipped {} table(s) - use 'mz-deploy create-tables' for those", + table_count + ); + } + if !sinks.is_empty() { + verbose!( + "Found {} sink(s) - will be created during apply after swap", + sinks.len() + ); + } + + // Validate remaining objects don't depend on missing tables + let object_ids: BTreeSet<_> = objects.iter().map(|(id, _)| id.clone()).collect(); + client + .validate_table_dependencies(&planned_project, &object_ids) + .await?; + + let analyze_duration = analyze_start.elapsed(); + progress::stage_success( + &format!( + "Ready to deploy {} view(s)/materialized view(s)", + objects.len() + ), + analyze_duration, + ); + + // Collect schemas and clusters from objects that are actually being deployed + let mut schema_set = BTreeSet::new(); + let mut cluster_set = BTreeSet::new(); + + for (object_id, typed_obj) in &objects { + schema_set.insert((object_id.database.clone(), object_id.schema.clone())); + cluster_set.extend(typed_obj.clusters()); + } + + // Also include clusters from the changeset if available + if let Some(ref cs) = change_set { + for cluster in &cs.dirty_clusters { + cluster_set.insert(cluster.name.clone()); + } + } else { + // For full deployment, include all project clusters + for cluster in &planned_project.cluster_dependencies { + cluster_set.insert(cluster.name.clone()); + } + } + + // Stage 2: Validate project before writing any metadata or creating resources + // This ensures databases, schemas, clusters, and external dependencies exist + progress::stage_start("Validating project"); + let validate_start = Instant::now(); + client.validate_project(&planned_project, directory).await?; + client.validate_cluster_isolation(&planned_project).await?; + client.validate_privileges(&planned_project).await?; + client + .validate_sink_connections_exist(&planned_project) + .await?; + let validate_duration = validate_start.elapsed(); + progress::stage_success("All validations passed", validate_duration); + + // Skip metadata recording in dry-run mode + if !dry_run { + // Stage 3: Collect deployment metadata and write to database BEFORE creating resources + // This allows abort logic to clean up even if resource creation fails + progress::stage_start("Recording deployment metadata"); + let metadata_start = Instant::now(); + let metadata = helpers::collect_deployment_metadata(&client, directory).await; + + // Build a snapshot containing all objects that will be deployed + // Objects go to schemas with kind=Objects, sinks go to schemas with kind=Sinks + let mut staging_snapshot = project::deployment_snapshot::DeploymentSnapshot::default(); + + // Add regular objects (views, MVs) - schemas get kind=Objects + for (object_id, typed_obj) in &objects { + let hash = project::deployment_snapshot::compute_typed_hash(typed_obj); + staging_snapshot.objects.insert(object_id.clone(), hash); + + // Track which schema this object belongs to (kind=Objects for regular objects) + staging_snapshot.schemas.insert( + (object_id.database.clone(), object_id.schema.clone()), + DeploymentKind::Objects, + ); + } + + // Add sinks - schemas get kind=Sinks (only if not already marked as Objects) + // If a schema has both regular objects AND sinks, it stays as Objects + for (object_id, typed_obj) in &sinks { + let hash = project::deployment_snapshot::compute_typed_hash(typed_obj); + staging_snapshot.objects.insert(object_id.clone(), hash); + + // Only mark as Sinks if the schema doesn't already have regular objects + staging_snapshot + .schemas + .entry((object_id.database.clone(), object_id.schema.clone())) + .or_insert(DeploymentKind::Sinks); + } + + // Write deployment state to database BEFORE creating resources + // (environment=stage_name for staging, promoted_at=None) + project::deployment_snapshot::write_to_database( + &client, + &staging_snapshot, + &stage_name, + &metadata, + None, + ) + .await?; + + // Store pending statements for sinks (to be executed during apply after swap) + if !sinks.is_empty() { + let pending_statements: Vec = sinks + .iter() + .enumerate() + .map(|(idx, (object_id, typed_obj))| { + // Create original FQN (without staging suffix) + let original_item_name = mz_sql_parser::ast::UnresolvedItemName(vec![ + Ident::new(&object_id.database).expect("valid database"), + Ident::new(&object_id.schema).expect("valid schema"), + Ident::new(&object_id.object).expect("valid object"), + ]); + let original_fqn = FullyQualifiedName::from(original_item_name); + + // Use fully_qualifying visitor - sinks are created in production schemas + // (no staging suffix needed since they're created after the swap) + let visitor = NormalizingVisitor::fully_qualifying(&original_fqn); + + // Normalize the sink statement for production + // Note: cluster is not transformed since FullyQualifyingTransformer doesn't + // implement ClusterTransformer - the sink will use the production cluster as-is + let stmt = typed_obj + .stmt + .clone() + .normalize_name_with(&visitor, &original_fqn.to_item_name()) + .normalize_dependencies_with(&visitor); + + let hash = project::deployment_snapshot::compute_typed_hash(typed_obj); + + #[allow(clippy::as_conversions)] + PendingStatement { + deploy_id: stage_name.clone(), + sequence_num: idx as i32, + database: object_id.database.clone(), + schema: object_id.schema.clone(), + object: object_id.object.clone(), + object_hash: hash, + statement_sql: stmt.to_string(), + statement_kind: "sink".to_string(), + executed_at: None, + } + }) + .collect(); + + client + .insert_pending_statements(&pending_statements) + .await?; + verbose!( + "Stored {} pending sink statement(s)", + pending_statements.len() + ); + } + + let metadata_duration = metadata_start.elapsed(); + progress::stage_success("Deployment metadata recorded", metadata_duration); + } + + // Perform resource creation with automatic rollback on failure + let result = create_resources_with_rollback( + &client, + &stage_name, + &staging_suffix, + &schema_set, + &cluster_set, + &planned_project, + &objects, + no_rollback, + dry_run, + ) + .await; + + match result { + Ok(success_count) => { + if dry_run { + println!("-- End of dry run ({} object(s)) --", success_count); + } else { + let total_duration = start_time.elapsed(); + progress::summary( + &format!( + "Successfully deployed to {} objects to '{}' staging environment", + success_count, stage_name + ), + total_duration, + ); + } + Ok(()) + } + Err(e) => Err(e), + } +} + +/// Create staging resources (schemas, clusters, objects) with automatic rollback on failure. +/// +/// This function performs all resource creation and automatically triggers rollback +/// on failure unless the no_rollback flag is set. +#[allow(clippy::too_many_arguments)] +async fn create_resources_with_rollback<'a>( + client: &crate::client::Client, + stage_name: &str, + staging_suffix: &str, + schema_set: &BTreeSet<(String, String)>, + cluster_set: &BTreeSet, + planned_project: &'a project::planned::Project, + objects: &'a [(ObjectId, &'a project::typed::DatabaseObject)], + no_rollback: bool, + dry_run: bool, +) -> Result { + // Create executor with dry-run mode + let executor = helpers::DeploymentExecutor::with_dry_run(client, dry_run); + + // Wrap resource creation in a closure that we can call and handle errors from + let create_result = async { + // Stage 4: Create staging schemas + if !dry_run { + progress::stage_start("Creating staging schemas"); + } else { + println!("-- Create staging schemas --"); + } + let schema_start = Instant::now(); + for (database, schema) in schema_set { + let staging_schema = format!("{}{}", schema, staging_suffix); + let create_schema_sql = format!( + "CREATE SCHEMA IF NOT EXISTS {}.{}", + database, staging_schema + ); + executor.execute_sql(&create_schema_sql).await?; + verbose!(" Created schema {}.{}", database, staging_schema); + } + if !dry_run { + let schema_duration = schema_start.elapsed(); + progress::stage_success( + &format!("Created {} staging schema(s)", schema_set.len()), + schema_duration, + ); + + // Create production schemas if they don't exist (needed for swap) + progress::info("Creating production schemas if not exists"); + for (database, schema) in schema_set { + client.create_schema(database, schema).await?; + verbose!(" Ensured schema {}.{} exists", database, schema); + } + } + + // Execute schema mod_statements for staging schemas + if !dry_run { + progress::stage_start("Applying schema setup statements"); + } else { + println!("-- Apply schema setup statements --"); + } + let mod_start = Instant::now(); + for mod_stmt in planned_project.iter_mod_statements() { + match mod_stmt { + project::ModStatement::Database { + database, + statement, + } => { + // Check if any schema in this database is in our schema_set + let has_schema = schema_set.iter().any(|(db, _)| db == database); + if has_schema { + verbose!("Applying database setup for: {}", database); + executor.execute_sql(statement).await?; + } + } + project::ModStatement::Schema { + database, + schema, + statement, + } => { + if schema_set.contains(&(database.to_string(), schema.to_string())) { + // Transform schema name to staging version + let staging_schema = format!("{}{}", schema, staging_suffix); + let transformed_stmt = statement.to_string().replace( + &format!("{}.{}", database, schema), + &format!("{}.{}", database, staging_schema), + ); + + verbose!("Applying schema setup for: {}.{}", database, staging_schema); + executor.execute_sql(&transformed_stmt).await?; + } + } + } + } + if !dry_run { + let mod_duration = mod_start.elapsed(); + progress::stage_success("Schema setup statements applied", mod_duration); + + // Write cluster mappings to deploy.clusters table BEFORE creating clusters + // This allows abort logic to clean up even if cluster creation fails + let cluster_names: Vec = cluster_set.iter().cloned().collect(); + client + .insert_deployment_clusters(stage_name, &cluster_names) + .await?; + verbose!("Cluster mappings recorded"); + } + + // Stage 5: Create staging clusters (by cloning production cluster configs) + if !dry_run { + progress::stage_start("Creating staging clusters"); + } else { + println!("-- Create staging clusters --"); + } + let cluster_start = Instant::now(); + let mut created_clusters = 0; + for prod_cluster in cluster_set { + let staging_cluster = format!("{}{}", prod_cluster, staging_suffix); + + if dry_run { + // In dry-run mode, just print the CREATE CLUSTER statement + // We can't check if cluster exists or get prod config without side effects + let create_cluster_sql = format!( + "CREATE CLUSTER {} (SIZE = '')", + staging_cluster, prod_cluster + ); + executor.execute_sql(&create_cluster_sql).await?; + created_clusters += 1; + continue; + } + + // Check if staging cluster already exists + let cluster_exists = client.cluster_exists(&staging_cluster).await?; + + if cluster_exists { + verbose!(" Cluster '{}' already exists, skipping", staging_cluster); + continue; + } + + // Get production cluster configuration (handles both managed and unmanaged) + let config = client.get_cluster_config(prod_cluster).await?; + + let config = match config { + Some(config) => config, + None => { + return Err(CliError::ClusterNotFound { + name: prod_cluster.clone(), + }); + } + }; + + // Create staging cluster with same configuration + client + .create_cluster_with_config(&staging_cluster, &config) + .await?; + created_clusters += 1; + + // Log details based on cluster type + match &config { + ClusterConfig::Managed { options, grants } => { + verbose!( + " Created managed cluster '{}' (size: {}, replication_factor: {}, {} grant(s), cloned from '{}')", + staging_cluster, + options.size, + options.replication_factor, + grants.len(), + prod_cluster + ); + } + ClusterConfig::Unmanaged { replicas, grants } => { + verbose!( + " Created unmanaged cluster '{}' with {} replica(s), {} grant(s) (cloned from '{}')", + staging_cluster, + replicas.len(), + grants.len(), + prod_cluster + ); + for replica in replicas { + verbose!( + " - {} (size: {}{})", + replica.name, + replica.size, + replica + .availability_zone + .as_ref() + .map(|az| format!(", az: {}", az)) + .unwrap_or_default() + ); + } + } + } + } + + if !dry_run { + let cluster_duration = cluster_start.elapsed(); + progress::stage_success( + &format!("Created {} cluster(s)", created_clusters), + cluster_duration, + ); + } + + // Stage 6: Deploy objects using staging transformer + if !dry_run { + progress::stage_start("Deploying objects to staging"); + } else { + println!("-- Deploy objects to staging --"); + } + let deploy_start = Instant::now(); + + // Collect ObjectIds from objects being deployed for the staging transformer + let objects_to_deploy_set: BTreeSet<_> = + objects.iter().map(|(oid, _)| oid.clone()).collect(); + + // Deploy external indexes + let mut external_indexes: Vec<_> = planned_project + .iter_objects() + .filter(|object| !objects_to_deploy_set.contains(&object.id)) + .flat_map(extract_external_indexes) + .filter_map(|(cluster, index)| cluster_set.contains(&cluster.name).then_some(index)) + .collect(); + + // Transform cluster names in external indexes for staging + crate::project::normalize::transform_cluster_names_for_staging( + &mut external_indexes, + staging_suffix, + ); + for index in external_indexes { + verbose!("Creating external index {}", index); + executor.execute_sql(&index).await?; + } + + let mut success_count = 0; + for (idx, (object_id, typed_obj)) in objects.iter().enumerate() { + verbose!( + "Applying {}/{}: {}{} (to schema {}{})", + idx + 1, + objects.len(), + &object_id.object, + staging_suffix, + &object_id.schema, + staging_suffix + ); + + // Create original FQN (without staging suffix) + let original_item_name = mz_sql_parser::ast::UnresolvedItemName(vec![ + Ident::new(&object_id.database).expect("valid database"), + Ident::new(&object_id.schema).expect("valid schema"), + Ident::new(&object_id.object).expect("valid object"), + ]); + let original_fqn = FullyQualifiedName::from(original_item_name); + + // Create staging visitor (it will apply the suffix during normalization) + // External dependencies and objects not being deployed are NOT transformed + let visitor = NormalizingVisitor::staging( + &original_fqn, + staging_suffix.to_string(), + &planned_project.external_dependencies, + Some(&objects_to_deploy_set), + ); + + // Normalize and deploy main statement + // The visitor will transform all names and clusters to include the staging suffix + let stmt = typed_obj + .stmt + .clone() + .normalize_name_with(&visitor, &original_fqn.to_item_name()) + .normalize_dependencies_with(&visitor) + .normalize_cluster_with(&visitor); + + executor.execute_sql(&stmt).await?; + + // Deploy indexes, grants, and comments (normalize them with staging transformer) + let mut indexes = typed_obj.indexes.clone(); + let mut grants = typed_obj.grants.clone(); + let mut comments = typed_obj.comments.clone(); + + // Normalize references to use staging suffix + visitor.normalize_index_references(&mut indexes); + visitor.normalize_index_clusters(&mut indexes); + visitor.normalize_grant_references(&mut grants); + visitor.normalize_comment_references(&mut comments); + + for index in &indexes { + executor.execute_sql(index).await?; + } + + for grant in &grants { + executor.execute_sql(grant).await?; + } + + for comment in &comments { + executor.execute_sql(comment).await?; + } + + success_count += 1; + } + + if !dry_run { + let deploy_duration = deploy_start.elapsed(); + progress::stage_success( + &format!("Deployed {} view(s)/materialized view(s)", success_count), + deploy_duration, + ); + } + + // Return success count + Ok::(success_count) + } + .await; + + // Handle result with rollback on failure (skip rollback in dry-run mode) + match create_result { + Ok(count) => Ok(count), + Err(e) => { + if dry_run || no_rollback { + if !dry_run { + progress::error( + "Deployment failed (skipping rollback due to --no-rollback flag)", + ); + } + return Err(e); + } + + progress::error("Deployment failed, rolling back..."); + let (schemas, clusters) = rollback_staging_resources(client, stage_name).await; + + if schemas > 0 || clusters > 0 { + progress::info(&format!( + "Rolled back: {} schema(s), {} cluster(s)", + schemas, clusters + )); + } + + Err(e) + } + } +} + +/// Rollback staging resources on deployment failure. +/// +/// This function performs best-effort cleanup of staging resources created during +/// a failed deployment. It mirrors the abort command logic but uses a best-effort +/// approach where cleanup failures are logged rather than returning errors. +/// +/// # Arguments +/// * `client` - Database client +/// * `environment` - Staging environment name +/// +/// # Returns +/// Number of schemas and clusters that were cleaned up (for summary message) +async fn rollback_staging_resources( + client: &crate::client::Client, + environment: &str, +) -> (usize, usize) { + // Get staging resources using pattern matching (same as abort command) + let staging_schemas = match client.get_staging_schemas(environment).await { + Ok(schemas) => schemas, + Err(e) => { + verbose!("Warning: Failed to query staging schemas: {}", e); + vec![] + } + }; + + let staging_clusters = match client.get_staging_clusters(environment).await { + Ok(clusters) => clusters, + Err(e) => { + verbose!("Warning: Failed to query staging clusters: {}", e); + vec![] + } + }; + + let schema_count = staging_schemas.len(); + let cluster_count = staging_clusters.len(); + + // Drop staging schemas (best-effort) + if !staging_schemas.is_empty() { + verbose!("Dropping staging schemas..."); + match client.drop_staging_schemas(&staging_schemas).await { + Ok(()) => { + for (database, schema) in &staging_schemas { + verbose!(" Dropped {}.{}", database, schema); + } + } + Err(e) => { + verbose!("Warning: Failed to drop some schemas: {}", e); + } + } + } + + // Drop staging clusters (best-effort) + if !staging_clusters.is_empty() { + verbose!("Dropping staging clusters..."); + match client.drop_staging_clusters(&staging_clusters).await { + Ok(()) => { + for cluster in &staging_clusters { + verbose!(" Dropped {}", cluster); + } + } + Err(e) => { + verbose!("Warning: Failed to drop some clusters: {}", e); + } + } + } + + // Delete deployment records (best-effort) + verbose!("Deleting deployment records..."); + if let Err(e) = client.delete_deployment_clusters(environment).await { + verbose!("Warning: Failed to delete cluster records: {}", e); + } + + if let Err(e) = client.delete_pending_statements(environment).await { + verbose!("Warning: Failed to delete pending statements: {}", e); + } + + if let Err(e) = client.delete_deployment(environment).await { + verbose!("Warning: Failed to delete deployment records: {}", e); + } + + (schema_count, cluster_count) +} diff --git a/src/mz-deploy/src/cli/commands/test.rs b/src/mz-deploy/src/cli/commands/test.rs new file mode 100644 index 0000000000000..01793ea1e176a --- /dev/null +++ b/src/mz-deploy/src/cli/commands/test.rs @@ -0,0 +1,389 @@ +//! Test command - run unit tests against the database. + +use crate::cli::CliError; +use crate::project::{self, typed}; +use crate::types::{self, TypeCheckError, Types}; +use crate::unit_test; +use crate::utils::docker_runtime::DockerRuntime; +use mz_sql_parser::ast::Ident; +use owo_colors::OwoColorize; +use std::collections::BTreeSet; +use std::path::Path; + +/// Run unit tests against the database. +/// +/// This command: +/// - Loads the project from the filesystem +/// - Connects to the database +/// - Finds all test files in the `test/` directory +/// - Parses test files (`.mztest` format) +/// - For each test: +/// - Locates the target view in the project +/// - Desugars the test into SQL statements +/// - Executes setup statements (CREATE TEMP TABLE, etc.) +/// - Runs the test query (a query that returns rows only on failure) +/// - Reports pass/fail with detailed output +/// - Cleans up after each test with DISCARD ALL +/// +/// # Test file format +/// +/// Tests use a custom format: +/// ```text +/// # test_name +/// target_view +/// +/// field1 field2 field3 +/// ------ +/// value1 value2 value3 +/// value4 value5 value6 +/// ``` +/// +/// The test passes if the query returns no rows. Rows are returned when: +/// - Expected rows are MISSING from the actual results +/// - Unexpected rows appear in actual results +/// +/// # Arguments +/// * `directory` - Project root directory +/// +/// # Returns +/// Ok(()) if all tests pass +/// +/// # Errors +/// Returns `CliError::Project` if project loading fails +/// Returns `CliError::Connection` if database connection fails +/// Returns error if tests fail (exits with code 1) +pub async fn run(directory: &Path) -> Result<(), CliError> { + // Load the project (tests are loaded during compilation) + let planned_project = project::plan(directory)?; + + // Tests use their own mocks, so don't pre-create tables from types.lock + let empty_types = Types::default(); + + // Create Docker runtime and get connected client + let runtime = DockerRuntime::new(); + if planned_project.tests.is_empty() { + println!("No tests found in {}", directory.display()); + return Ok(()); + } + + // Load types for validation + // 1. Load types.lock (external dependencies) - ok if missing + let mut combined_types = types::load_types_lock(directory).unwrap_or_default(); + + // 2. Load types.cache (internal views) or trigger typecheck if stale/missing + let internal_types = + load_or_generate_types_cache(directory, &planned_project, &runtime).await?; + combined_types.merge(&internal_types); + + println!( + "{}\n", + format!("Running tests from {}:", directory.display()).bold() + ); + + let mut passed_tests = 0; + let mut failed_tests = 0; + let mut validation_failed = 0; + + // Run each test from the compiled project + for (object_id, test) in &planned_project.tests { + // Get dependencies for this object from the project's dependency graph + let dependencies = planned_project + .dependency_graph + .get(object_id) + .cloned() + .unwrap_or_else(BTreeSet::new); + + // Validate test before running + if let Err(e) = + unit_test::validate_unit_test(test, object_id, &combined_types, &dependencies) + { + println!( + "{} {} ... {}", + "test".cyan(), + test.name.cyan(), + "VALIDATION FAILED".red().bold() + ); + // Print the inner error which has the detailed display + match &e { + unit_test::TestValidationError::UnmockedDependency(inner) => eprintln!("{}", inner), + unit_test::TestValidationError::MockSchemaMismatch(inner) => eprintln!("{}", inner), + unit_test::TestValidationError::ExpectedSchemaMismatch(inner) => { + eprintln!("{}", inner) + } + unit_test::TestValidationError::InvalidAtTime(inner) => { + eprintln!("{}", inner) + } + unit_test::TestValidationError::TypesCacheUnavailable { reason } => { + eprintln!( + "{}: types cache unavailable: {}", + "error".bright_red().bold(), + reason + ); + } + } + validation_failed += 1; + continue; + } + + let client = match runtime.get_client(&empty_types).await { + Ok(client) => client, + Err(TypeCheckError::ContainerStartFailed(e)) => { + return Err(CliError::Message(format!( + "Docker not available for running tests: {}", + e + ))); + } + Err(e) => { + return Err(CliError::Message(format!( + "Failed to start test environment: {}", + e + ))); + } + }; + + // Validate at_time if present by attempting to cast it to mz_timestamp + if let Some(at_time) = &test.at_time { + let validation_query = format!("SELECT {}::mz_timestamp", at_time); + if let Err(e) = client.query(&validation_query, &[]).await { + println!( + "{} {} ... {}", + "test".cyan(), + test.name.cyan(), + "VALIDATION FAILED".red().bold() + ); + let error = unit_test::InvalidAtTimeError { + test_name: test.name.clone(), + at_time_value: at_time.clone(), + db_error: e.to_string(), + }; + eprintln!("{}", error); + validation_failed += 1; + continue; + } + } + + print!("{} {} ... ", "test".cyan(), test.name.cyan()); + + // Find the target object in the project + let target_obj = match planned_project.find_object(object_id) { + Some(obj) => obj, + None => { + println!("{}", "FAILED".red().bold()); + eprintln!( + " {}: target object '{}' not found in project", + "error".red().bold(), + object_id + ); + failed_tests += 1; + continue; + } + }; + + // Convert planned::ObjectId to typed::FullyQualifiedName for unit test processing + // Note: Ident::new() only fails for invalid SQL identifiers, but ObjectIds + // are created from successfully parsed SQL files, so identifiers are always valid. + let typed_fqn = + typed::FullyQualifiedName::from(mz_sql_parser::ast::UnresolvedItemName(vec![ + Ident::new(&object_id.database) + .expect("database name from parsed SQL should be valid identifier"), + Ident::new(&object_id.schema) + .expect("schema name from parsed SQL should be valid identifier"), + Ident::new(&object_id.object) + .expect("object name from parsed SQL should be valid identifier"), + ])); + + // Desugar the test + let sql_statements = + unit_test::desugar_unit_test(test, &target_obj.typed_object.stmt, &typed_fqn); + + // Execute all SQL statements except the last one (which is the test query) + let mut execution_failed = false; + for sql in &sql_statements[..sql_statements.len() - 1] { + if let Err(e) = client.execute(sql, &[]).await { + println!("{}", "FAILED".red().bold()); + eprintln!(" {}: failed to execute SQL: {:?}", "error".red().bold(), e); + eprintln!(" statement: {}", sql); + execution_failed = true; + failed_tests += 1; + break; + } + } + + if execution_failed { + continue; + } + + // Execute the test query (last statement) + let test_query = &sql_statements[sql_statements.len() - 1]; + match client.query(test_query, &[]).await { + Ok(rows) => { + if rows.is_empty() { + println!("{}", "ok".green().bold()); + passed_tests += 1; + } else { + println!("{}", "FAILED".red().bold()); + eprintln!(" {}:", "Test assertion failed".yellow().bold()); + + // Print column headers + if let Some(first_row) = rows.first() { + let columns: Vec = first_row + .columns() + .iter() + .map(|col| col.name().to_string()) + .collect(); + let header = columns.join(" | "); + eprintln!(" {}", header.bold().cyan()); + eprintln!(" {}", "-".repeat(header.len()).cyan()); + } + + // Print rows + for row in &rows { + let mut values: Vec = Vec::new(); + for i in 0..row.len() { + let value_str = if let Ok(v) = row.try_get::<_, String>(i) { + v + } else if let Ok(v) = row.try_get::<_, i64>(i) { + v.to_string() + } else if let Ok(v) = row.try_get::<_, i32>(i) { + v.to_string() + } else if let Ok(v) = row.try_get::<_, f64>(i) { + v.to_string() + } else if let Ok(v) = row.try_get::<_, bool>(i) { + v.to_string() + } else { + "".to_string() + }; + + // Color the status column (first column) differently + if i == 0 { + let colored = match value_str.as_str() { + "MISSING" => value_str.red().bold().to_string(), + "UNEXPECTED" => value_str.yellow().bold().to_string(), + _ => value_str, + }; + values.push(colored); + } else { + values.push(value_str); + } + } + eprintln!(" {}", values.join(" | ")); + } + + failed_tests += 1; + } + } + Err(e) => { + println!("{}", "FAILED".red().bold()); + eprintln!( + " {}: failed to execute test query: {}", + "error".red().bold(), + e + ); + failed_tests += 1; + } + } + + // Clean up with DISCARD ALL + if let Err(e) = client.execute("DISCARD ALL", &[]).await { + eprintln!("warning: failed to execute DISCARD ALL: {}", e); + } + } + + // Print test summary + print!("\n{}: ", "test result".bold()); + let total_failed = failed_tests + validation_failed; + if total_failed == 0 { + print!("{}. ", "ok".green().bold()); + } else { + print!("{}. ", "FAILED".red().bold()); + } + print!("{}; ", format!("{} passed", passed_tests).green()); + if failed_tests > 0 { + print!("{}; ", format!("{} failed", failed_tests).red()); + } else { + print!("{} failed; ", failed_tests); + } + if validation_failed > 0 { + println!( + "{}", + format!("{} validation errors", validation_failed).red() + ); + } else { + println!("{} validation errors", validation_failed); + } + + if total_failed > 0 { + return Err(CliError::TestsFailed { + failed: total_failed, + passed: passed_tests, + }); + } + + Ok(()) +} + +/// Load types.cache or generate it by running type checking if stale/missing. +async fn load_or_generate_types_cache( + directory: &Path, + planned_project: &project::planned::Project, + runtime: &DockerRuntime, +) -> Result { + // Check if types.cache exists and is up-to-date + if !types::is_types_cache_stale(directory) { + if let Ok(cached) = types::load_types_cache(directory) { + println!( + "{}", + "Using cached types from .mz-deploy/types.cache".dimmed() + ); + return Ok(cached); + } + } + + // Types cache is stale or missing - regenerate by type checking + println!( + "{}", + "Types cache stale or missing, running type check...".yellow() + ); + + // Load types.lock for external dependencies (used in Docker runtime) + let external_types = types::load_types_lock(directory).unwrap_or_default(); + + // Get a client from the Docker runtime with external dependencies staged + let mut client = match runtime.get_client(&external_types).await { + Ok(client) => client, + Err(TypeCheckError::ContainerStartFailed(e)) => { + // Docker not available - warn and return empty types + // Tests will still run but validation will be limited + eprintln!( + "{}: Docker not available for type checking: {}", + "warning".yellow().bold(), + e + ); + eprintln!( + "{}", + "Test validation will be limited without types.cache".yellow() + ); + return Ok(Types::default()); + } + Err(e) => { + return Err(CliError::Message(format!( + "Failed to start type check environment: {}", + e + ))); + } + }; + + // Run type checking (this will also generate types.cache) + match types::typecheck_with_client(&mut client, planned_project, directory).await { + Ok(()) => { + // Type checking succeeded and wrote types.cache - load it + types::load_types_cache(directory).map_err(|e| { + CliError::Message(format!("Failed to load generated types.cache: {}", e)) + }) + } + Err(e) => { + // Type check failed - return error + Err(CliError::TypeCheckFailed(e)) + } + } +} diff --git a/src/mz-deploy/src/cli/error.rs b/src/mz-deploy/src/cli/error.rs new file mode 100644 index 0000000000000..3c6ad02c96634 --- /dev/null +++ b/src/mz-deploy/src/cli/error.rs @@ -0,0 +1,320 @@ +//! Error types for CLI operations. +//! +//! This module provides error types for high-level CLI commands that wrap +//! lower-level errors from the client and project modules. + +use crate::client::{ConflictRecord, ConnectionError, DatabaseValidationError}; +use crate::project::deployment_snapshot::DeploymentSnapshotError; +use crate::project::error::{DependencyError, ProjectError}; +use crate::types::{TypeCheckError, TypesError}; +use crate::unit_test::TestValidationError; +use chrono::{DateTime, Local}; +use owo_colors::OwoColorize; +use thiserror::Error; + +/// Top-level error type for CLI operations. +/// +/// This wraps errors from project loading, database operations, and +/// adds CLI-specific error variants. +#[derive(Debug, Error)] +pub enum CliError { + /// Error during project compilation/loading + #[error(transparent)] + Project(#[from] ProjectError), + + /// Database connection error + #[error(transparent)] + Connection(#[from] ConnectionError), + + /// Deployment snapshot operation error + #[error(transparent)] + DeploymentSnapshot(#[from] DeploymentSnapshotError), + + /// Dependency analysis error + #[error(transparent)] + Dependency(#[from] DependencyError), + + /// Validation error (missing databases, schemas, clusters) + #[error(transparent)] + Validation(DatabaseValidationError), + + /// Types lock file error + #[error(transparent)] + Types(#[from] TypesError), + + /// Deployment conflict detected - schemas were updated after deployment started + #[error("deployment conflict: {count} schema{plural} updated since deployment started", + count = conflicts.len(), + plural = if conflicts.len() == 1 { "" } else { "s" })] + DeploymentConflict { conflicts: Vec }, + + /// Staging environment not found + #[error("staging environment '{name}' not found")] + StagingEnvironmentNotFound { name: String }, + + /// Staging environment already promoted + #[error("staging environment '{name}' has already been promoted")] + StagingAlreadyPromoted { name: String }, + + /// Failed to determine git SHA + #[error("failed to determine git SHA for staging environment name")] + GitShaFailed, + + /// Git repository has uncommitted changes + #[error("git repository has uncommitted changes")] + GitDirty, + + /// No schemas to deploy + #[error("no schemas found to deploy")] + NoSchemas, + + /// Attempting to overwrite production objects without proper safety flags + #[error("refusing to overwrite production objects")] + ProductionOverwriteNotAllowed { + objects: Vec<(String, String, String)>, // (database, schema, object) + }, + + /// Failed to create deployment table + #[error("failed to create deployment tracking table: {source}")] + DeploymentTableCreationFailed { source: ConnectionError }, + + /// Failed to execute SQL during deployment + #[error("failed to execute SQL statement: {source}")] + SqlExecutionFailed { + statement: String, + source: ConnectionError, + }, + + /// Failed to repoint a sink to a new upstream object + #[error("failed to repoint sink {sink}: {reason}")] + SinkRepointFailed { sink: String, reason: String }, + + /// Failed to write deployment state + #[error("failed to write deployment state to tracking table: {source}")] + DeploymentStateWriteFailed { source: ConnectionError }, + + /// Invalid staging environment name + #[error("invalid staging environment name: '{name}'")] + InvalidEnvironmentName { name: String }, + + /// Schema does not exist in database + #[error("schema '{schema}' does not exist in database '{database}'")] + SchemaNotFound { database: String, schema: String }, + + /// Cluster does not exist + #[error("cluster '{name}' does not exist")] + ClusterNotFound { name: String }, + + /// Tests failed during execution + #[error("{failed} test{plural} failed, {passed} passed", + plural = if *failed == 1 { "" } else { "s" })] + TestsFailed { failed: usize, passed: usize }, + + /// Test validation failed (schema mismatch, missing mocks) + #[error(transparent)] + TestValidationFailed(#[from] TestValidationError), + + /// Type check failed + #[error(transparent)] + TypeCheckFailed(#[from] TypeCheckError), + + /// Timeout waiting for deployment to be ready + #[error("timeout waiting for deployment '{name}' to be ready after {seconds} seconds")] + ReadyTimeout { name: String, seconds: u64 }, + + /// I/O error + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + /// Clusters are not yet hydrated + #[error("some clusters are still hydrating")] + ClustersHydrating, + + /// Deployment is failing due to cluster health issues + #[error("deployment '{name}' is failing due to cluster health issues")] + DeploymentFailing { name: String }, + + /// Generic error message + #[error("{0}")] + Message(String), +} + +impl CliError { + /// Get contextual hint for resolving this error. + /// + /// Returns `None` for errors that wrap other error types (they provide their own hints). + pub fn hint(&self) -> Option { + match self { + Self::Project(_) | Self::Connection(_) => None, + Self::DeploymentConflict { conflicts } => { + let conflict_list = conflicts + .iter() + .map(|c| { + let promoted_datetime: DateTime = c.promoted_at.into(); + let promoted_str = promoted_datetime + .format("%a %b %d %H:%M:%S %Y %z") + .to_string(); + format!(" - {}.{} (last promoted by '{}' at {})", + c.database.yellow(), + c.schema.yellow(), + c.deploy_id, + promoted_str) + }) + .collect::>() + .join("\n"); + Some(format!( + "the following schemas were updated in production after your deployment started:\n{}\n\n\ + Rebase your deployment by running:\n \ + {} {} {}\n \ + {} {} {} --name \n\n\ + Or use {} to force the deployment (may overwrite recent changes)", + conflict_list, + "mz-deploy".cyan(), + "abort".cyan(), + "--name ".cyan(), + "mz-deploy".cyan(), + "stage".cyan(), + ".".cyan(), + "--force".yellow().bold() + )) + } + Self::StagingEnvironmentNotFound { name } => Some(format!( + "verify the staging environment name '{}' is correct, or deploy to staging first using:\n \ + {} {} {} --name {}", + name.yellow(), + "mz-deploy".cyan(), + "stage".cyan(), + ".".cyan(), + name.cyan() + )), + Self::StagingAlreadyPromoted { .. } => Some( + "this staging environment has already been applied to production.\n\ + Deploy a new staging environment to make changes" + .to_string(), + ), + Self::GitShaFailed => Some( + "either run mz-deploy from inside a git repository, or provide a staging environment name using:\n \ + mz-deploy stage . --name " + .to_string(), + ), + Self::GitDirty => Some( + "commit or stash your changes before deploying, or use the --allow-dirty flag to deploy anyway" + .to_string(), + ), + Self::NoSchemas => Some( + "create at least one schema directory under your project directory (e.g., materialize/public/)" + .to_string(), + ), + Self::ProductionOverwriteNotAllowed { objects } => { + let object_list = objects + .iter() + .take(5) + .map(|(db, schema, obj)| { + format!(" - {}.{}.{}", db.yellow(), schema.yellow(), obj.yellow()) + }) + .collect::>() + .join("\n"); + let more = if objects.len() > 5 { + format!("\n ... and {} more", objects.len() - 5) + } else { + String::new() + }; + Some(format!( + "the following objects already exist in production:\n{}{}\n\n\ + To update existing objects, use blue/green deployment:\n \ + {} {} {}\n \ + {} {} {} ", + object_list, + more, + "mz-deploy".cyan(), + "stage".cyan(), + ".".cyan(), + "mz-deploy".cyan(), + "apply".cyan(), + "--staging-env".cyan() + )) + } + Self::DeploymentTableCreationFailed { .. } => Some( + "ensure your database user has CREATE privileges on the database" + .to_string(), + ), + Self::SqlExecutionFailed { statement, .. } => Some(format!( + "SQL statement:\n {}", + statement.lines().take(5).collect::>().join("\n ") + )), + Self::SinkRepointFailed { sink, .. } => Some(format!( + "the sink '{}' could not be repointed to the new upstream object.\n\ + This may happen if:\n \ + - The new object has an incompatible schema (e.g., Avro schema mismatch)\n \ + - The replacement object doesn't exist in the new schema\n\n\ + To proceed, you may need to manually drop and recreate the sink", + sink.yellow() + )), + Self::DeploymentStateWriteFailed { .. } => Some( + "the SQL was applied successfully, but deployment tracking failed.\n\ + The next deployment may re-apply some objects" + .to_string(), + ), + Self::InvalidEnvironmentName { .. } => Some( + "environment names must contain only alphanumeric characters, hyphens, and underscores" + .to_string(), + ), + Self::SchemaNotFound { database, schema } => Some(format!( + "create the schema first, or check that you're connected to the correct database.\n \ + CREATE SCHEMA {}.{}", + database.cyan(), + schema.cyan() + )), + Self::ClusterNotFound { name } => Some(format!( + "create the cluster first:\n \ + CREATE CLUSTER {} SIZE = '{}' REPLICATION FACTOR = 1", + name.cyan(), + "M.1-small".cyan() + )), + Self::TestsFailed { .. } => Some( + "review the test output above for details on which assertions failed" + .to_string(), + ), + Self::TestValidationFailed(_) => Some( + "review the validation error above and update your test to match the schema.\n\ + Run 'mz-deploy compile' to regenerate types.cache if needed" + .to_string(), + ), + Self::TypeCheckFailed(_) => Some( + "review the type checking errors above and fix any SQL syntax or dependency issues" + .to_string(), + ), + Self::ReadyTimeout { .. } => Some( + "deployment is taking longer than expected to hydrate. You can:\n \ + - Increase timeout with --timeout flag\n \ + - Check cluster replica status with: mz-deploy ready " + .to_string(), + ), + Self::ClustersHydrating => Some("check cluster replica status with: mz-deploy ready ".to_string()), + Self::DeploymentFailing { .. } => Some( + "one or more clusters are not ready. Check for:\n \ + - Missing replicas (cluster has no replicas configured)\n \ + - OOM-looping replicas (3+ OOM kills in 24 hours)\n\n\ + Use 'mz-deploy ready ' for details" + .to_string(), + ), + Self::Validation(_) | Self::Types(_) | Self::DeploymentSnapshot(_) | Self::Dependency(_) => { + // These errors provide their own context via transparent wrapping + None + } + Self::Io(_) | Self::Message(_) => None, + } + } +} + +impl From for CliError { + fn from(error: DatabaseValidationError) -> Self { + CliError::Validation(error) + } +} + +impl From for CliError { + fn from(msg: String) -> Self { + CliError::Message(msg) + } +} diff --git a/src/mz-deploy/src/cli/helpers.rs b/src/mz-deploy/src/cli/helpers.rs new file mode 100644 index 0000000000000..064fc31ee9164 --- /dev/null +++ b/src/mz-deploy/src/cli/helpers.rs @@ -0,0 +1,156 @@ +//! Shared helper functions for CLI commands. +//! +//! This module contains common functionality used across multiple commands +//! to reduce code duplication and ensure consistent behavior. + +use crate::cli::CliError; +use crate::client::Client; +use crate::project::{self, typed}; +use crate::utils::git::get_git_commit; +use std::path::Path; + +/// Collect deployment metadata (user and git commit). +/// +/// This function retrieves the current database user and git commit hash +/// for recording deployment provenance. If the current user cannot be +/// determined, it defaults to "unknown". +/// +/// # Arguments +/// * `client` - Database client for querying current user +/// * `directory` - Project directory for determining git commit +/// +/// # Returns +/// Deployment metadata containing user and optional git commit +pub async fn collect_deployment_metadata( + client: &Client, + directory: &Path, +) -> project::deployment_snapshot::DeploymentMetadata { + let deployed_by = client.get_current_user().await.unwrap_or_else(|e| { + eprintln!("warning: failed to get current user: {}", e); + "unknown".to_string() + }); + + let git_commit = get_git_commit(directory); + + project::deployment_snapshot::DeploymentMetadata { + deployed_by, + git_commit, + } +} + +/// Generate a random 7-character hex environment name. +/// +/// Uses SHA256 hash of current timestamp to generate a unique identifier +/// for deployments when no explicit name is provided. +/// +/// # Returns +/// A 7-character lowercase hex string (e.g., "a3f7b2c") +pub fn generate_random_env_name() -> String { + use sha2::{Digest, Sha256}; + use std::time::SystemTime; + + let now = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .expect("system time before Unix epoch") + .as_nanos(); + + let mut hasher = Sha256::new(); + hasher.update(now.to_le_bytes()); + let hash = hasher.finalize(); + + // Take first 4 bytes of hash and format as 7-char hex + format!( + "{:07x}", + u32::from_le_bytes([hash[0], hash[1], hash[2], hash[3]]) & 0xFFFFFFF + ) +} + +/// Helper for executing database object deployments. +/// +/// This struct consolidates the pattern of executing a database object's +/// SQL statements (main statement + indexes + grants + comments) with +/// consistent error handling. Supports dry-run mode where SQL is printed +/// instead of executed. +pub struct DeploymentExecutor<'a> { + client: &'a Client, + dry_run: bool, +} + +impl<'a> DeploymentExecutor<'a> { + /// Create a new deployment executor that executes SQL. + pub fn new(client: &'a Client) -> Self { + Self { + client, + dry_run: false, + } + } + + /// Create a deployment executor with configurable dry-run mode. + pub fn with_dry_run(client: &'a Client, dry_run: bool) -> Self { + Self { client, dry_run } + } + + /// Returns true if this executor is in dry-run mode. + pub fn is_dry_run(&self) -> bool { + self.dry_run + } + + /// Execute all SQL statements for a database object. + /// + /// This executes the main CREATE statement, followed by any indexes, + /// grants, and comments associated with the object. + /// + /// # Arguments + /// * `typed_obj` - The typed database object to deploy + /// + /// # Returns + /// Ok(()) if all statements execute successfully + /// + /// # Errors + /// Returns `CliError::SqlExecutionFailed` if any statement fails + pub async fn execute_object(&self, typed_obj: &typed::DatabaseObject) -> Result<(), CliError> { + // Execute main statement + self.execute_sql(&typed_obj.stmt).await?; + + // Execute indexes + for index in &typed_obj.indexes { + self.execute_sql(index).await?; + } + + // Execute grants + for grant in &typed_obj.grants { + self.execute_sql(grant).await?; + } + + // Execute comments + for comment in &typed_obj.comments { + self.execute_sql(comment).await?; + } + + Ok(()) + } + + /// Execute (or print in dry-run mode) a single SQL statement. + /// + /// # Arguments + /// * `stmt` - Any type that can be converted to SQL string (via ToString) + /// + /// # Errors + /// Returns `CliError::SqlExecutionFailed` with statement context (only in non-dry-run mode) + pub async fn execute_sql(&self, stmt: &impl ToString) -> Result<(), CliError> { + let sql = stmt.to_string(); + + if self.dry_run { + println!("{};", sql); + println!(); + } else { + self.client.execute(&sql, &[]).await.map_err(|source| { + CliError::SqlExecutionFailed { + statement: sql, + source, + } + })?; + } + Ok(()) + } +} diff --git a/src/mz-deploy/src/client.rs b/src/mz-deploy/src/client.rs new file mode 100644 index 0000000000000..86992bb897d0c --- /dev/null +++ b/src/mz-deploy/src/client.rs @@ -0,0 +1,20 @@ +pub mod config; +mod connection; +mod deployment_ops; +mod errors; +mod introspection; +mod models; +mod validation; + +pub use config::Profile; +pub use connection::Client; +pub use deployment_ops::{ + ClusterDeploymentStatus, ClusterStatusContext, DEFAULT_ALLOWED_LAG_SECS, FailureReason, + HydrationStatusUpdate, +}; +pub use errors::{ConnectionError, DatabaseValidationError, format_relative_path}; +pub use models::{ + ApplyState, Cluster, ClusterConfig, ClusterGrant, ClusterOptions, ClusterReplica, + ConflictRecord, DeploymentDetails, DeploymentHistoryEntry, DeploymentKind, DeploymentMetadata, + DeploymentObjectRecord, PendingStatement, SchemaDeploymentRecord, StagingDeployment, +}; diff --git a/src/mz-deploy/src/client/config.rs b/src/mz-deploy/src/client/config.rs new file mode 100644 index 0000000000000..fdfa400c3d3bd --- /dev/null +++ b/src/mz-deploy/src/client/config.rs @@ -0,0 +1,193 @@ +use serde::Deserialize; +use std::collections::BTreeMap; +use std::fs; +use std::path::{Path, PathBuf}; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ConfigError { + #[error( + "profiles configuration file not found. Searched:\n - {project_path}\n - {global_path}\n\nCreate a profiles.toml file in one of these locations with connection details." + )] + ProfilesNotFound { + project_path: String, + global_path: String, + }, + #[error("failed to read profiles configuration from {path}: {source}")] + ReadError { + path: String, + source: std::io::Error, + }, + #[error("failed to parse profiles configuration from {path}: {source}")] + ParseError { + path: String, + source: toml::de::Error, + }, + #[error("no default profile found. Add a profile named 'default' to your profiles.toml")] + NoDefaultProfile, + #[error("profile '{name}' not found in configuration")] + ProfileNotFound { name: String }, + #[error("environment variable '{var}' not found for profile '{profile}'")] + EnvVarNotFound { var: String, profile: String }, +} + +#[derive(Debug, Clone)] +pub struct Profile { + pub name: String, + pub host: String, + pub port: u16, + pub username: Option, + pub password: Option, +} + +#[derive(Debug, Deserialize, Clone)] +struct ProfileData { + pub host: String, + #[serde(default = "default_port")] + pub port: u16, + #[serde(alias = "user")] + pub username: Option, + pub password: Option, +} + +fn default_port() -> u16 { + 6875 +} + +#[derive(Debug)] +pub struct ProfilesConfig { + profiles: BTreeMap, + source_path: PathBuf, +} + +impl ProfilesConfig { + /// Load profiles configuration, checking project directory first, then global directory + /// + /// # Arguments + /// * `project_directory` - Optional project directory to search for profiles.toml. + /// If None, uses current working directory. + pub fn load(project_directory: Option<&Path>) -> Result { + let project_path = project_directory + .map(|dir| dir.join(".mz/profiles.toml")) + .unwrap_or_else(|| PathBuf::from(".mz/profiles.toml")); + + let global_path = dirs::home_dir() + .map(|home| home.join(".mz/profiles.toml")) + .unwrap_or_else(|| PathBuf::from("~/.mz/profiles.toml")); + + // Try project directory first + let (path, content) = if project_path.exists() { + let content = + fs::read_to_string(&project_path).map_err(|source| ConfigError::ReadError { + path: project_path.display().to_string(), + source, + })?; + (project_path, content) + } else if global_path.exists() { + let content = + fs::read_to_string(&global_path).map_err(|source| ConfigError::ReadError { + path: global_path.display().to_string(), + source, + })?; + (global_path, content) + } else { + return Err(ConfigError::ProfilesNotFound { + project_path: project_path.display().to_string(), + global_path: global_path.display().to_string(), + }); + }; + + let profiles_data: BTreeMap = + toml::from_str(&content).map_err(|source| ConfigError::ParseError { + path: path.display().to_string(), + source, + })?; + + // Convert ProfileData to Profile by adding the name field + let mut profiles = BTreeMap::new(); + for (name, data) in profiles_data { + profiles.insert( + name.clone(), + Profile { + name: name.clone(), + host: data.host, + port: data.port, + username: data.username, + password: data.password, + }, + ); + } + + // Validate that a default profile exists + if !profiles.contains_key("default") { + return Err(ConfigError::NoDefaultProfile); + } + + Ok(ProfilesConfig { + profiles, + source_path: path, + }) + } + + /// Get a profile by name + pub fn get_profile(&self, name: &str) -> Result { + self.profiles + .get(name) + .cloned() + .ok_or_else(|| ConfigError::ProfileNotFound { + name: name.to_string(), + }) + } + + /// Get the default profile + pub fn get_default_profile(&self) -> Result { + self.get_profile("default") + } + + /// Expand environment variables in a profile's password field + /// Supports ${VAR_NAME} syntax + pub fn expand_env_vars(&self, mut profile: Profile) -> Result { + if let Some(password) = &profile.password + && password.starts_with("${") + && password.ends_with("}") + { + let var_name = &password[2..password.len() - 1]; + let env_value = std::env::var(var_name).map_err(|_| ConfigError::EnvVarNotFound { + var: var_name.to_string(), + profile: profile.name.clone(), + })?; + profile.password = Some(env_value); + } + + // Also check for environment variable override + // Format: MZ_PROFILE_{PROFILE_NAME}_PASSWORD + let env_var_name = format!("MZ_PROFILE_{}_PASSWORD", profile.name.to_uppercase()); + if let Ok(password) = std::env::var(&env_var_name) { + profile.password = Some(password); + } + + Ok(profile) + } + + pub fn source_path(&self) -> &PathBuf { + &self.source_path + } + + /// Convenience method to load profiles and get a specific profile in one call + /// + /// # Arguments + /// * `project_directory` - Optional project directory to search for profiles.toml + /// * `profile_name` - Optional profile name. If None, uses "default" + pub fn load_profile( + project_directory: Option<&Path>, + profile_name: Option<&str>, + ) -> Result { + let config = Self::load(project_directory)?; + let profile = if let Some(name) = profile_name { + config.get_profile(name)? + } else { + config.get_default_profile()? + }; + config.expand_env_vars(profile) + } +} diff --git a/src/mz-deploy/src/client/connection.rs b/src/mz-deploy/src/client/connection.rs new file mode 100644 index 0000000000000..a478d807c6ddb --- /dev/null +++ b/src/mz-deploy/src/client/connection.rs @@ -0,0 +1,1103 @@ +//! Database client for mz-deploy. +//! +//! This module provides the main `Client` struct for interacting with Materialize. +//! The client handles connection management and delegates specialized operations +//! to submodules: +//! +//! - `errors` - Error types for client operations +//! - `deployment_ops` - Deployment tracking and management +//! - `introspection` - Database metadata queries +//! - `validation` - Project validation against the database + +use crate::client::config::{Profile, ProfilesConfig}; +use crate::client::deployment_ops::{ + self, ClusterDeploymentStatus, ClusterStatusContext, DEFAULT_ALLOWED_LAG_SECS, FailureReason, + HydrationStatusUpdate, +}; +use crate::client::errors::{ConnectionError, DatabaseValidationError}; +use crate::client::introspection; +use crate::client::models::{ + ApplyState, Cluster, ClusterConfig, ClusterOptions, ConflictRecord, DeploymentDetails, + DeploymentHistoryEntry, DeploymentMetadata, DeploymentObjectRecord, PendingStatement, + SchemaDeploymentRecord, StagingDeployment, +}; +use crate::client::validation; +use crate::project::deployment_snapshot::DeploymentSnapshot; +use crate::project::object_id::ObjectId; +use crate::project::planned; +use crate::types::{ColumnType, Types}; +use crate::utils::sql_utils::quote_identifier; +use async_stream::try_stream; +use futures::Stream; +use openssl::ssl::{SslConnector, SslMethod, SslVerifyMode}; +use postgres_openssl::MakeTlsConnector; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::Path; +use tokio_postgres::types::ToSql; +use tokio_postgres::{Client as PgClient, NoTls, Row, ToStatement}; + +/// Database client for interacting with Materialize. +/// +/// The `Client` struct provides methods for: +/// - Connecting to the database +/// - Schema and cluster management +/// - Deployment tracking +/// - Database introspection +/// - Project validation +pub struct Client { + client: PgClient, + profile: Profile, +} + +impl Client { + // ========================================================================= + // Connection Methods + // ========================================================================= + + /// Connect to the database using a named profile. + /// + /// Note: This method searches for profiles.toml in the current working directory. + /// For project-specific configuration, use `ProfilesConfig::load_profile()` with + /// a project directory and then `connect_with_profile()`. + pub async fn connect(profile_name: Option<&str>) -> Result { + // Load profiles configuration (searches in CWD for backwards compatibility) + let config = ProfilesConfig::load(None)?; + + // Get the requested profile or default + let profile = if let Some(name) = profile_name { + config.get_profile(name)? + } else { + config.get_default_profile()? + }; + + // Expand environment variables + let profile = config.expand_env_vars(profile)?; + + // Connect to the database + Self::connect_with_profile(profile).await + } + + /// Connect to the database using a Profile directly. + /// + /// Tries TLS connection first (required for Materialize Cloud), then falls back + /// to NoTls for local connections (e.g., localhost, Docker). + pub async fn connect_with_profile(profile: Profile) -> Result { + // Build connection string + // Values with special characters need to be quoted with single quotes, + // and single quotes/backslashes within values need to be escaped + let mut conn_str = format!("host={} port={}", profile.host, profile.port); + + if let Some(ref username) = profile.username { + conn_str.push_str(&format!(" user='{}'", escape_conn_string_value(username))); + } + + if let Some(ref password) = profile.password { + conn_str.push_str(&format!( + " password='{}'", + escape_conn_string_value(password) + )); + } + + // Determine if this is likely a cloud connection (not localhost) + let is_local = profile.host == "localhost" + || profile.host == "127.0.0.1" + || profile.host.starts_with("192.168.") + || profile.host.starts_with("10.") + || profile.host.starts_with("172."); + + let client = if is_local { + // Local connection - use NoTls + let (client, connection) = + tokio_postgres::connect(&conn_str, NoTls) + .await + .map_err(|source| ConnectionError::Connect { + host: profile.host.clone(), + port: profile.port, + source, + })?; + + // Spawn the connection handler + mz_ore::task::spawn(|| "mz-deploy-connection", async move { + if let Err(e) = connection.await { + eprintln!("connection error: {}", e); + } + }); + + client + } else { + // Cloud connection - use TLS + let mut builder = SslConnector::builder(SslMethod::tls()).map_err(|e| { + ConnectionError::Message(format!("Failed to create TLS builder: {}", e)) + })?; + + // Load CA certificates - try platform-specific paths + // macOS: Homebrew OpenSSL or system certificates + // Linux: Standard system paths + let ca_paths = [ + "/etc/ssl/cert.pem", // macOS system + "/opt/homebrew/etc/openssl@3/cert.pem", // macOS Homebrew ARM + "/usr/local/etc/openssl@3/cert.pem", // macOS Homebrew Intel + "/opt/homebrew/etc/openssl/cert.pem", // macOS Homebrew ARM (older) + "/usr/local/etc/openssl/cert.pem", // macOS Homebrew Intel (older) + "/etc/ssl/certs/ca-certificates.crt", // Debian/Ubuntu + "/etc/pki/tls/certs/ca-bundle.crt", // RHEL/CentOS + "/etc/ssl/ca-bundle.pem", // OpenSUSE + ]; + + let mut ca_loaded = false; + for path in &ca_paths { + if std::path::Path::new(path).exists() { + if builder.set_ca_file(path).is_ok() { + ca_loaded = true; + break; + } + } + } + + if !ca_loaded { + // Fall back to default paths as last resort + let _ = builder.set_default_verify_paths(); + } + + builder.set_verify(SslVerifyMode::PEER); + + let connector = MakeTlsConnector::new(builder.build()); + + let (client, connection) = tokio_postgres::connect(&conn_str, connector) + .await + .map_err(|source| ConnectionError::Connect { + host: profile.host.clone(), + port: profile.port, + source, + })?; + + // Spawn the connection handler + mz_ore::task::spawn(|| "mz-deploy-connection", async move { + if let Err(e) = connection.await { + eprintln!("connection error: {}", e); + } + }); + + client + }; + + Ok(Client { client, profile }) + } + + /// Get the profile used for this connection. + pub fn profile(&self) -> &Profile { + &self.profile + } + + /// Get a reference to the underlying tokio-postgres client. + pub fn postgres_client(&self) -> &PgClient { + &self.client + } + + // ========================================================================= + // Basic Query Methods + // ========================================================================= + + /// Execute a SQL statement that doesn't return rows. + pub async fn execute( + &self, + statement: &T, + params: &[&(dyn ToSql + Sync)], + ) -> Result + where + T: ?Sized + ToStatement, + { + self.client + .execute(statement, params) + .await + .map_err(ConnectionError::Query) + } + + /// Execute a SQL query and return the resulting rows. + pub async fn query_one( + &self, + statement: &T, + params: &[&(dyn ToSql + Sync)], + ) -> Result + where + T: ?Sized + ToStatement, + { + self.client + .query_one(statement, params) + .await + .map_err(ConnectionError::Query) + } + + /// Execute a SQL query and return the resulting rows. + pub async fn query( + &self, + statement: &T, + params: &[&(dyn ToSql + Sync)], + ) -> Result, ConnectionError> + where + T: ?Sized + ToStatement, + { + self.client + .query(statement, params) + .await + .map_err(ConnectionError::Query) + } + + /// Query SHOW COLUMNS for all external dependencies and return their schemas as a Types object. + pub async fn query_external_types( + &mut self, + project: &planned::Project, + ) -> Result { + let mut objects = BTreeMap::new(); + let oids = project + .external_dependencies + .iter() + .cloned() + .chain(project.get_tables()); + + for oid in oids { + let quoted_db = quote_identifier(&oid.database); + let quoted_schema = quote_identifier(&oid.schema); + let quoted_object = quote_identifier(&oid.object); + + let rows = self + .client + .query( + &format!( + "SHOW COLUMNS FROM {}.{}.{}", + quoted_db, quoted_schema, quoted_object + ), + &[], + ) + .await?; + + let mut columns = BTreeMap::new(); + for row in rows { + let name: String = row.get("name"); + let type_str: String = row.get("type"); + let nullable: bool = row.get("nullable"); + + let column_type = ColumnType { + r#type: type_str, + nullable, + }; + + columns.insert(name, column_type); + } + + objects.insert(oid.to_string(), columns); + } + + Ok(Types { + version: 1, + objects, + }) + } + + /// Query types for internal project views from the database. + /// + /// This is used after type checking to capture the column schemas of all views + /// defined in the project. These types are cached in `.mz-deploy/types.cache` + /// and used by the test command to validate unit tests. + /// + /// Note: This should be called after the views have been created in the database + /// (either as permanent or temporary views during type checking). + /// + /// # Arguments + /// * `object_ids` - The object IDs to query types for + /// * `flatten` - If true, query using flattened FQN names (for temporary views) + /// + /// # Returns + /// A Types struct containing the column schemas for all queried objects + pub async fn query_internal_types( + &mut self, + object_ids: &[&ObjectId], + flatten: bool, + ) -> Result { + let mut objects = BTreeMap::new(); + + for oid in object_ids { + // Build the object name (flattened for temp views, or regular FQN) + let object_ref = if flatten { + // For temporary views, the name is a single flattened identifier + format!("\"{}.{}.{}\"", oid.database, oid.schema, oid.object) + } else { + let quoted_db = quote_identifier(&oid.database); + let quoted_schema = quote_identifier(&oid.schema); + let quoted_object = quote_identifier(&oid.object); + format!("{}.{}.{}", quoted_db, quoted_schema, quoted_object) + }; + + let rows = self + .client + .query(&format!("SHOW COLUMNS FROM {}", object_ref), &[]) + .await?; + + let mut columns = BTreeMap::new(); + for row in rows { + let name: String = row.get("name"); + let type_str: String = row.get("type"); + let nullable: bool = row.get("nullable"); + + let column_type = ColumnType { + r#type: type_str, + nullable, + }; + + columns.insert(name, column_type); + } + + // Always store with regular FQN key (not flattened) + objects.insert(oid.to_string(), columns); + } + + Ok(Types { + version: 1, + objects, + }) + } + + // ========================================================================= + // Schema Operations + // ========================================================================= + + /// Create a schema in the specified database (idempotent). + pub async fn create_schema(&self, database: &str, schema: &str) -> Result<(), ConnectionError> { + let sql = format!( + "CREATE SCHEMA IF NOT EXISTS {}.{}", + quote_identifier(database), + quote_identifier(schema) + ); + + self.client.execute(&sql, &[]).await.map_err(|e| { + ConnectionError::SchemaCreationFailed { + database: database.to_string(), + schema: schema.to_string(), + source: Box::new(e), + } + })?; + + Ok(()) + } + + /// Check if a schema exists in the specified database. + pub async fn schema_exists( + &self, + database: &str, + schema: &str, + ) -> Result { + introspection::schema_exists(&self.client, database, schema).await + } + + // ========================================================================= + // Cluster Operations + // ========================================================================= + + /// Create a cluster with the specified configuration. + pub async fn create_cluster( + &self, + name: &str, + options: &ClusterOptions, + ) -> Result<(), ConnectionError> { + let sql = format!( + "CREATE CLUSTER {} (SIZE = '{}', REPLICATION FACTOR = {})", + quote_identifier(name), + options.size, + options.replication_factor + ); + + self.client.execute(&sql, &[]).await.map_err(|e| { + if e.to_string().contains("already exists") { + ConnectionError::ClusterAlreadyExists { + name: name.to_string(), + } + } else { + ConnectionError::ClusterCreationFailed { + name: name.to_string(), + source: Box::new(e), + } + } + })?; + + Ok(()) + } + + /// Check if a cluster exists. + pub async fn cluster_exists(&self, name: &str) -> Result { + introspection::cluster_exists(&self.client, name).await + } + + /// Get a cluster by name. + pub async fn get_cluster(&self, name: &str) -> Result, ConnectionError> { + introspection::get_cluster(&self.client, name).await + } + + /// List all clusters. + pub async fn list_clusters(&self) -> Result, ConnectionError> { + introspection::list_clusters(&self.client).await + } + + /// Get cluster configuration including replicas and grants. + /// + /// This fetches all information needed to clone a cluster's configuration: + /// - For managed clusters: size and replication factor + /// - For unmanaged clusters: replica configurations + /// - For both: privilege grants + pub async fn get_cluster_config( + &self, + name: &str, + ) -> Result, ConnectionError> { + introspection::get_cluster_config(&self.client, name).await + } + + /// Create a cluster with the specified configuration (managed or unmanaged). + /// + /// For managed clusters, creates a cluster with SIZE and REPLICATION FACTOR. + /// For unmanaged clusters, creates an empty cluster and then adds replicas. + /// In both cases, applies the privilege grants from the configuration. + pub async fn create_cluster_with_config( + &self, + name: &str, + config: &ClusterConfig, + ) -> Result<(), ConnectionError> { + match config { + ClusterConfig::Managed { options, grants } => { + // Create managed cluster + self.create_cluster(name, options).await?; + + // Apply grants + for grant in grants { + let sql = format!( + "GRANT {} ON CLUSTER {} TO {}", + grant.privilege_type, + quote_identifier(name), + quote_identifier(&grant.grantee) + ); + self.client.execute(&sql, &[]).await.map_err(|e| { + ConnectionError::Message(format!( + "Failed to grant {} to {} on cluster '{}': {}", + grant.privilege_type, grant.grantee, name, e + )) + })?; + } + + Ok(()) + } + ClusterConfig::Unmanaged { replicas, grants } => { + // Create empty unmanaged cluster + let create_cluster_sql = + format!("CREATE CLUSTER {} REPLICAS ()", quote_identifier(name)); + + self.client + .execute(&create_cluster_sql, &[]) + .await + .map_err(|e| { + if e.to_string().contains("already exists") { + ConnectionError::ClusterAlreadyExists { + name: name.to_string(), + } + } else { + ConnectionError::ClusterCreationFailed { + name: name.to_string(), + source: Box::new(e), + } + } + })?; + + // Create each replica + for replica in replicas { + let mut options_parts = vec![format!("SIZE = '{}'", replica.size)]; + + if let Some(ref az) = replica.availability_zone { + options_parts.push(format!("AVAILABILITY ZONE '{}'", az)); + } + + let create_replica_sql = format!( + "CREATE CLUSTER REPLICA {}.{} ({})", + quote_identifier(name), + quote_identifier(&replica.name), + options_parts.join(", ") + ); + + self.client + .execute(&create_replica_sql, &[]) + .await + .map_err(|e| ConnectionError::ClusterCreationFailed { + name: format!("{}.{}", name, replica.name), + source: Box::new(e), + })?; + } + + // Apply grants + for grant in grants { + let sql = format!( + "GRANT {} ON CLUSTER {} TO {}", + grant.privilege_type, + quote_identifier(name), + quote_identifier(&grant.grantee) + ); + self.client.execute(&sql, &[]).await.map_err(|e| { + ConnectionError::Message(format!( + "Failed to grant {} to {} on cluster '{}': {}", + grant.privilege_type, grant.grantee, name, e + )) + })?; + } + + Ok(()) + } + } + } + + // ========================================================================= + // Deployment Tracking Operations + // ========================================================================= + + /// Create the deployment tracking schemas/tables for staging deployments. + pub async fn create_deployments(&self) -> Result<(), ConnectionError> { + deployment_ops::create_deployments(&self.client).await + } + + /// Insert schema deployment records (insert-only, no DELETE). + pub async fn insert_schema_deployments( + &self, + deployments: &[SchemaDeploymentRecord], + ) -> Result<(), ConnectionError> { + deployment_ops::insert_schema_deployments(&self.client, deployments).await + } + + /// Append deployment object records (insert-only, never update or delete). + pub async fn append_deployment_objects( + &self, + objects: &[DeploymentObjectRecord], + ) -> Result<(), ConnectionError> { + deployment_ops::append_deployment_objects(&self.client, objects).await + } + + /// Insert cluster records for a staging deployment. + pub async fn insert_deployment_clusters( + &self, + deploy_id: &str, + clusters: &[String], + ) -> Result<(), ConnectionError> { + deployment_ops::insert_deployment_clusters(&self.client, deploy_id, clusters).await + } + + /// Get cluster names for a staging deployment. + pub async fn get_deployment_clusters( + &self, + deploy_id: &str, + ) -> Result, ConnectionError> { + deployment_ops::get_deployment_clusters(&self.client, deploy_id).await + } + + /// Validate that all cluster IDs in a deployment still exist in the catalog. + pub async fn validate_deployment_clusters( + &self, + deploy_id: &str, + ) -> Result<(), ConnectionError> { + deployment_ops::validate_deployment_clusters(&self.client, deploy_id).await + } + + /// Get detailed hydration and health status for clusters in a staging deployment. + /// + /// Uses the default allowed lag threshold of 5 minutes. + pub async fn get_deployment_hydration_status( + &self, + deploy_id: &str, + ) -> Result, ConnectionError> { + deployment_ops::get_deployment_hydration_status( + &self.client, + deploy_id, + DEFAULT_ALLOWED_LAG_SECS, + ) + .await + } + + /// Get detailed hydration and health status with custom lag threshold. + /// + /// # Arguments + /// * `deploy_id` - Staging deployment ID + /// * `allowed_lag_secs` - Maximum allowed lag in seconds before marking as "lagging" + pub async fn get_deployment_hydration_status_with_lag( + &self, + deploy_id: &str, + allowed_lag_secs: i64, + ) -> Result, ConnectionError> { + deployment_ops::get_deployment_hydration_status(&self.client, deploy_id, allowed_lag_secs) + .await + } + + /// Subscribe to hydration status changes for a staging deployment. + /// + /// Returns a stream of typed `HydrationStatusUpdate` structs. The stream automatically: + /// - Iterates through cursor results + /// - Parses rows into typed structs + /// - Filters out retractions (mz_diff == -1) + /// + /// The subscription includes hydration progress, wallclock lag, and replica health. + pub fn subscribe_deployment_hydration( + &mut self, + deploy_id: &str, + allowed_lag_secs: i64, + ) -> impl Stream> + '_ { + let deploy_id = deploy_id.to_string(); + + try_stream! { + let txn = self.client.transaction().await?; + let pattern = format!("%_{}", deploy_id); + + let subscribe_sql = format!( + r#" + DECLARE c CURSOR FOR SUBSCRIBE ( + WITH + -- Detect problematic replicas: 3+ OOM kills in 24h (subscribe-friendly) + problematic_replicas AS ( + SELECT replica_id + FROM mz_internal.mz_cluster_replica_status_history + WHERE occurred_at + INTERVAL '24 hours' > mz_now() + AND reason = 'oom-killed' + GROUP BY replica_id + HAVING COUNT(*) >= 3 + ), + + -- Cluster health: count total vs problematic replicas + cluster_health AS ( + SELECT + c.name AS cluster_name, + c.id AS cluster_id, + COUNT(r.id) AS total_replicas, + COUNT(pr.replica_id) AS problematic_replicas + FROM mz_clusters c + LEFT JOIN mz_cluster_replicas r ON c.id = r.cluster_id + LEFT JOIN problematic_replicas pr ON r.id = pr.replica_id + WHERE c.name LIKE $1 + GROUP BY c.name, c.id + ), + + -- Hydration counts per cluster (best replica) + hydration_counts AS ( + SELECT + c.name AS cluster_name, + r.id AS replica_id, + COUNT(*) FILTER (WHERE mhs.hydrated) AS hydrated, + COUNT(*) AS total + FROM mz_clusters c + JOIN mz_cluster_replicas r ON c.id = r.cluster_id + LEFT JOIN mz_internal.mz_hydration_statuses mhs ON mhs.replica_id = r.id + WHERE c.name LIKE $1 + GROUP BY c.name, r.id + ), + + hydration_best AS ( + SELECT cluster_name, MAX(hydrated) AS hydrated, MAX(total) AS total + FROM hydration_counts + GROUP BY cluster_name + ), + + -- Max lag per cluster using mz_wallclock_global_lag + cluster_lag AS ( + SELECT + c.name AS cluster_name, + MAX(EXTRACT(EPOCH FROM wgl.lag)) AS max_lag_secs + FROM mz_clusters c + JOIN mz_cluster_replicas r ON c.id = r.cluster_id + JOIN mz_internal.mz_hydration_statuses mhs ON mhs.replica_id = r.id + JOIN mz_internal.mz_wallclock_global_lag wgl ON wgl.object_id = mhs.object_id + WHERE c.name LIKE $1 + GROUP BY c.name + ) + + SELECT + ch.cluster_name, + ch.cluster_id, + CASE + WHEN ch.total_replicas = 0 THEN 'failing' + WHEN ch.total_replicas = ch.problematic_replicas THEN 'failing' + WHEN COALESCE(hb.hydrated, 0) < COALESCE(hb.total, 0) THEN 'hydrating' + WHEN COALESCE(cl.max_lag_secs, 0) > {allowed_lag_secs} THEN 'lagging' + ELSE 'ready' + END AS status, + CASE + WHEN ch.total_replicas = 0 THEN 'no_replicas' + WHEN ch.total_replicas = ch.problematic_replicas THEN 'all_replicas_problematic' + ELSE NULL + END AS failure_reason, + COALESCE(hb.hydrated, 0) AS hydrated_count, + COALESCE(hb.total, 0) AS total_count, + COALESCE(cl.max_lag_secs, 0)::bigint AS max_lag_secs, + ch.total_replicas, + ch.problematic_replicas + FROM cluster_health ch + LEFT JOIN hydration_best hb ON ch.cluster_name = hb.cluster_name + LEFT JOIN cluster_lag cl ON ch.cluster_name = cl.cluster_name + ) + "#, + allowed_lag_secs = allowed_lag_secs + ); + + txn.execute(&subscribe_sql, &[&pattern]).await?; + + loop { + let rows = txn.query("FETCH ALL c", &[]).await?; + + if rows.is_empty() { + continue; + } + + for row in rows { + let mz_diff: i64 = row.get(1); + + // Skip retractions + if mz_diff == -1 { + continue; + } + + let status_str: String = row.get(4); + let failure_reason_str: Option = row.get(5); + let hydrated_count: i64 = row.get(6); + let total_count: i64 = row.get(7); + let max_lag_secs: i64 = row.get(8); + let total_replicas: i64 = row.get(9); + let problematic_replicas: i64 = row.get(10); + + let failure_reason = failure_reason_str.as_deref().map(|s| match s { + "no_replicas" => FailureReason::NoReplicas, + "all_replicas_problematic" => FailureReason::AllReplicasProblematic { + problematic: problematic_replicas, + total: total_replicas, + }, + _ => FailureReason::NoReplicas, // default fallback + }); + + let status = match status_str.as_str() { + "ready" => ClusterDeploymentStatus::Ready, + "hydrating" => ClusterDeploymentStatus::Hydrating { + hydrated: hydrated_count, + total: total_count, + }, + "lagging" => ClusterDeploymentStatus::Lagging { max_lag_secs }, + "failing" => ClusterDeploymentStatus::Failing { + reason: failure_reason.clone().unwrap_or(FailureReason::NoReplicas), + }, + // Default to ready for unknown status + _ => ClusterDeploymentStatus::Ready, + }; + + let update = HydrationStatusUpdate { + cluster_name: row.get(2), + cluster_id: row.get(3), + status, + failure_reason, + hydrated_count, + total_count, + max_lag_secs, + total_replicas, + problematic_replicas, + }; + + yield update; + } + } + } + } + + /// Delete cluster records for a staging deployment. + pub async fn delete_deployment_clusters(&self, deploy_id: &str) -> Result<(), ConnectionError> { + deployment_ops::delete_deployment_clusters(&self.client, deploy_id).await + } + + /// Update promoted_at timestamp for a staging deployment. + pub async fn update_promoted_at(&self, deploy_id: &str) -> Result<(), ConnectionError> { + deployment_ops::update_promoted_at(&self.client, deploy_id).await + } + + /// Delete all deployment records for a specific deployment. + pub async fn delete_deployment(&self, deploy_id: &str) -> Result<(), ConnectionError> { + deployment_ops::delete_deployment(&self.client, deploy_id).await + } + + /// Check if the deployment tracking table exists. + pub async fn deployment_table_exists(&self) -> Result { + deployment_ops::deployment_table_exists(&self.client).await + } + + /// Get schema deployment records from the database for a specific deployment. + pub async fn get_schema_deployments( + &self, + deploy_id: Option<&str>, + ) -> Result, ConnectionError> { + deployment_ops::get_schema_deployments(&self.client, deploy_id).await + } + + /// Get deployment object records from the database for a specific deployment. + pub async fn get_deployment_objects( + &self, + deploy_id: Option<&str>, + ) -> Result { + deployment_ops::get_deployment_objects(&self.client, deploy_id).await + } + + /// Get metadata about a deployment for validation. + pub async fn get_deployment_metadata( + &self, + deploy_id: &str, + ) -> Result, ConnectionError> { + deployment_ops::get_deployment_metadata(&self.client, deploy_id).await + } + + /// Get detailed information about a specific deployment. + pub async fn get_deployment_details( + &self, + deploy_id: &str, + ) -> Result, ConnectionError> { + deployment_ops::get_deployment_details(&self.client, deploy_id).await + } + + /// List all staging deployments (promoted_at IS NULL), grouped by deploy_id. + pub async fn list_staging_deployments( + &self, + ) -> Result, ConnectionError> { + deployment_ops::list_staging_deployments(&self.client).await + } + + /// List deployment history in chronological order (promoted deployments only). + pub async fn list_deployment_history( + &self, + limit: Option, + ) -> Result, ConnectionError> { + deployment_ops::list_deployment_history(&self.client, limit).await + } + + /// Check for deployment conflicts (schemas updated after deployment started). + pub async fn check_deployment_conflicts( + &self, + deploy_id: &str, + ) -> Result, ConnectionError> { + deployment_ops::check_deployment_conflicts(&self.client, deploy_id).await + } + + // ========================================================================= + // Apply State Operations + // ========================================================================= + + /// Create apply state schemas with comments for tracking apply progress. + pub async fn create_apply_state_schemas(&self, deploy_id: &str) -> Result<(), ConnectionError> { + deployment_ops::create_apply_state_schemas(&self.client, deploy_id).await + } + + /// Get the current apply state for a deployment. + pub async fn get_apply_state(&self, deploy_id: &str) -> Result { + deployment_ops::get_apply_state(&self.client, deploy_id).await + } + + /// Delete apply state schemas after successful completion. + pub async fn delete_apply_state_schemas(&self, deploy_id: &str) -> Result<(), ConnectionError> { + deployment_ops::delete_apply_state_schemas(&self.client, deploy_id).await + } + + // ========================================================================= + // Pending Statements Operations + // ========================================================================= + + /// Insert pending statements for deferred execution (e.g., sinks). + pub async fn insert_pending_statements( + &self, + statements: &[PendingStatement], + ) -> Result<(), ConnectionError> { + deployment_ops::insert_pending_statements(&self.client, statements).await + } + + /// Get pending statements for a deployment that haven't been executed yet. + pub async fn get_pending_statements( + &self, + deploy_id: &str, + ) -> Result, ConnectionError> { + deployment_ops::get_pending_statements(&self.client, deploy_id).await + } + + /// Mark a pending statement as executed. + pub async fn mark_statement_executed( + &self, + deploy_id: &str, + sequence_num: i32, + ) -> Result<(), ConnectionError> { + deployment_ops::mark_statement_executed(&self.client, deploy_id, sequence_num).await + } + + /// Delete all pending statements for a deployment. + pub async fn delete_pending_statements(&self, deploy_id: &str) -> Result<(), ConnectionError> { + deployment_ops::delete_pending_statements(&self.client, deploy_id).await + } + + // ========================================================================= + // Introspection Operations + // ========================================================================= + + /// Get the current Materialize user/role. + pub async fn get_current_user(&self) -> Result { + introspection::get_current_user(&self.client).await + } + + /// Check which objects from a set exist in the production database. + pub async fn check_objects_exist( + &self, + objects: &BTreeSet, + ) -> Result, ConnectionError> { + introspection::check_objects_exist(&self.client, objects).await + } + + /// Check which tables from the given set exist in the database. + pub async fn check_tables_exist( + &self, + tables: &BTreeSet, + ) -> Result, ConnectionError> { + introspection::check_tables_exist(&self.client, tables).await + } + + /// Check which sinks from the given set exist in the database. + pub async fn check_sinks_exist( + &self, + sinks: &BTreeSet, + ) -> Result, ConnectionError> { + introspection::check_sinks_exist(&self.client, sinks).await + } + + /// Find sinks that depend on objects in the specified schemas. + /// + /// Used during apply to identify sinks that need to be repointed + /// before old schemas are dropped with CASCADE. + pub async fn find_sinks_depending_on_schemas( + &self, + schemas: &[(String, String)], + ) -> Result, ConnectionError> { + introspection::find_sinks_depending_on_schemas(&self.client, schemas).await + } + + /// Check if an object (MV, table, source) exists in the specified schema. + pub async fn object_exists( + &self, + database: &str, + schema: &str, + object: &str, + ) -> Result { + introspection::object_exists(&self.client, database, schema, object).await + } + + /// Get staging schema names for a specific deployment. + pub async fn get_staging_schemas( + &self, + deploy_id: &str, + ) -> Result, ConnectionError> { + introspection::get_staging_schemas(&self.client, deploy_id).await + } + + /// Get staging cluster names for a specific deployment. + pub async fn get_staging_clusters( + &self, + deploy_id: &str, + ) -> Result, ConnectionError> { + introspection::get_staging_clusters(&self.client, deploy_id).await + } + + /// Drop all objects in a schema. + pub async fn drop_schema_objects( + &self, + database: &str, + schema: &str, + ) -> Result, ConnectionError> { + introspection::drop_schema_objects(&self.client, database, schema).await + } + + /// Drop specific objects by their ObjectIds. + pub async fn drop_objects( + &self, + objects: &BTreeSet, + ) -> Result, ConnectionError> { + introspection::drop_objects(&self.client, objects).await + } + + /// Drop staging schemas by name. + pub async fn drop_staging_schemas( + &self, + schemas: &[(String, String)], + ) -> Result<(), ConnectionError> { + introspection::drop_staging_schemas(&self.client, schemas).await + } + + /// Drop staging clusters by name. + pub async fn drop_staging_clusters(&self, clusters: &[String]) -> Result<(), ConnectionError> { + introspection::drop_staging_clusters(&self.client, clusters).await + } + + // ========================================================================= + // Validation Operations + // ========================================================================= + + /// Validate that all required databases, schemas, and external dependencies exist. + pub async fn validate_project( + &mut self, + planned_project: &planned::Project, + project_root: &Path, + ) -> Result<(), DatabaseValidationError> { + validation::validate_project_impl(&self.client, planned_project, project_root).await + } + + /// Validate that sources and sinks don't share clusters with indexes or materialized views. + pub async fn validate_cluster_isolation( + &mut self, + planned_project: &planned::Project, + ) -> Result<(), DatabaseValidationError> { + validation::validate_cluster_isolation_impl(&self.client, planned_project).await + } + + /// Validate that the user has sufficient privileges to deploy the project. + pub async fn validate_privileges( + &mut self, + planned_project: &planned::Project, + ) -> Result<(), DatabaseValidationError> { + validation::validate_privileges_impl(&self.client, planned_project).await + } + + /// Validate that all sources referenced by CREATE TABLE FROM SOURCE statements exist. + pub async fn validate_sources_exist( + &mut self, + planned_project: &planned::Project, + ) -> Result<(), DatabaseValidationError> { + validation::validate_sources_exist_impl(&self.client, planned_project).await + } + + /// Validate that all connections referenced by CREATE SINK statements exist. + /// + /// Sinks reference connections (Kafka, Iceberg) that are not managed by mz-deploy. + /// This validates they exist before attempting deployment. + pub async fn validate_sink_connections_exist( + &mut self, + planned_project: &planned::Project, + ) -> Result<(), DatabaseValidationError> { + validation::validate_sink_connections_exist_impl(&self.client, planned_project).await + } + + /// Validate that all tables referenced by objects to be deployed exist in the database. + pub async fn validate_table_dependencies( + &mut self, + planned_project: &planned::Project, + objects_to_deploy: &BTreeSet, + ) -> Result<(), DatabaseValidationError> { + validation::validate_table_dependencies_impl( + &self.client, + planned_project, + objects_to_deploy, + ) + .await + } +} + +/// Escape a value for use in a libpq connection string. +/// +/// In connection strings, values containing special characters must be quoted +/// with single quotes, and any single quotes or backslashes within the value +/// must be escaped with a backslash. +fn escape_conn_string_value(value: &str) -> String { + value.replace('\\', "\\\\").replace('\'', "\\'") +} diff --git a/src/mz-deploy/src/client/deployment_ops.rs b/src/mz-deploy/src/client/deployment_ops.rs new file mode 100644 index 0000000000000..eed6f0be603f8 --- /dev/null +++ b/src/mz-deploy/src/client/deployment_ops.rs @@ -0,0 +1,1319 @@ +//! Deployment tracking operations. +//! +//! This module contains methods for managing deployment records in the database, +//! including creating tracking tables, inserting/querying deployment records, +//! and managing deployment lifecycle (staging, promotion, abort). + +use crate::client::errors::ConnectionError; +use crate::client::models::{ + ApplyState, ConflictRecord, DeploymentDetails, DeploymentHistoryEntry, DeploymentKind, + DeploymentMetadata, DeploymentObjectRecord, PendingStatement, SchemaDeploymentRecord, + StagingDeployment, +}; +use crate::project::deployment_snapshot::DeploymentSnapshot; +use crate::project::object_id::ObjectId; +use chrono::{DateTime, Utc}; +use std::collections::{BTreeMap, BTreeSet}; +use std::fmt; +use tokio_postgres::Client as PgClient; +use tokio_postgres::types::ToSql; + +/// Reason why a cluster deployment is failing. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum FailureReason { + /// Cluster has no replicas configured. + NoReplicas, + /// All replicas are experiencing repeated OOM kills (3+ in 24h). + AllReplicasProblematic { problematic: i64, total: i64 }, +} + +impl fmt::Display for FailureReason { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FailureReason::NoReplicas => write!(f, "no replicas configured"), + FailureReason::AllReplicasProblematic { problematic, total } => { + write!( + f, + "all {} of {} replicas OOM-looping (3+ crashes in 24h)", + problematic, total + ) + } + } + } +} + +/// Status of a cluster in a staging deployment. +#[derive(Debug, Clone, PartialEq)] +pub enum ClusterDeploymentStatus { + /// Cluster is fully hydrated and lag is within threshold. + Ready, + /// Cluster is still hydrating. + Hydrating { hydrated: i64, total: i64 }, + /// Cluster is hydrated but lag exceeds threshold. + Lagging { max_lag_secs: i64 }, + /// Cluster is in a failing state. + Failing { reason: FailureReason }, +} + +/// Full status context for a cluster in a staging deployment. +#[derive(Debug, Clone)] +pub struct ClusterStatusContext { + /// Cluster name (with deployment suffix). + pub cluster_name: String, + /// Cluster ID. + pub cluster_id: String, + /// Overall status. + pub status: ClusterDeploymentStatus, + /// Number of hydrated objects. + pub hydrated_count: i64, + /// Total number of objects. + pub total_count: i64, + /// Maximum lag in seconds across all objects. + pub max_lag_secs: i64, + /// Total number of replicas. + pub total_replicas: i64, + /// Number of problematic (OOM-looping) replicas. + pub problematic_replicas: i64, +} + +/// A hydration status update from the SUBSCRIBE stream. +/// +/// This represents a single update from the streaming subscription +/// to cluster hydration status. Retractions (mz_diff == -1) are +/// filtered out before yielding these updates. +#[derive(Debug, Clone)] +pub struct HydrationStatusUpdate { + /// Cluster name (with deployment suffix). + pub cluster_name: String, + /// Cluster ID. + pub cluster_id: String, + /// Overall status. + pub status: ClusterDeploymentStatus, + /// Reason for failure, if status is Failing. + pub failure_reason: Option, + /// Number of hydrated objects. + pub hydrated_count: i64, + /// Total number of objects. + pub total_count: i64, + /// Maximum lag in seconds across all objects. + pub max_lag_secs: i64, + /// Total number of replicas. + pub total_replicas: i64, + /// Number of problematic (OOM-looping) replicas. + pub problematic_replicas: i64, +} + +/// Create the deployment tracking database and tables. +/// +/// This creates: +/// - `_mz_deploy` database +/// - `_mz_deploy.public.deployments` table for tracking deployment metadata +/// - `_mz_deploy.public.objects` table for tracking deployed objects and their hashes +/// - `_mz_deploy.public.clusters` table for tracking clusters used by deployments +/// - `_mz_deploy.public.pending_statements` table for deferred statements (sinks) +/// - `_mz_deploy.public.production` view for querying current production state +pub async fn create_deployments(client: &PgClient) -> Result<(), ConnectionError> { + client + .execute("CREATE DATABASE IF NOT EXISTS _mz_deploy;", &[]) + .await + .map_err(ConnectionError::Query)?; + + client + .execute( + r#"CREATE TABLE IF NOT EXISTS _mz_deploy.public.deployments ( + deploy_id TEXT NOT NULL, + deployed_at TIMESTAMPTZ NOT NULL, + promoted_at TIMESTAMPTZ, + database TEXT NOT NULL, + schema TEXT NOT NULL, + deployed_by TEXT NOT NULL, + commit TEXT, + kind TEXT NOT NULL + ) WITH ( + PARTITION BY (deploy_id, deployed_at, promoted_at) + );"#, + &[], + ) + .await + .map_err(ConnectionError::Query)?; + + client + .execute( + r#"CREATE TABLE IF NOT EXISTS _mz_deploy.public.objects ( + deploy_id TEXT NOT NULL, + database TEXT NOT NULL, + schema TEXT NOT NULL, + object TEXT NOT NULL, + hash TEXT NOT NULL + ) WITH ( + PARTITION BY (deploy_id, database, schema) + );"#, + &[], + ) + .await + .map_err(ConnectionError::Query)?; + + client + .execute( + r#"CREATE TABLE IF NOT EXISTS _mz_deploy.public.clusters ( + deploy_id TEXT NOT NULL, + cluster_id TEXT NOT NULL + ) WITH ( + PARTITION BY (deploy_id) + );"#, + &[], + ) + .await + .map_err(ConnectionError::Query)?; + + client + .execute( + r#"CREATE TABLE IF NOT EXISTS _mz_deploy.public.pending_statements ( + deploy_id TEXT NOT NULL, + sequence_num INT NOT NULL, + database TEXT NOT NULL, + schema TEXT NOT NULL, + object TEXT NOT NULL, + object_hash TEXT NOT NULL, + statement_sql TEXT NOT NULL, + statement_kind TEXT NOT NULL, + executed_at TIMESTAMPTZ + ) WITH ( + PARTITION BY (deploy_id) + );"#, + &[], + ) + .await + .map_err(ConnectionError::Query)?; + + client + .execute( + r#" + CREATE VIEW IF NOT EXISTS _mz_deploy.public.production AS + WITH candidates AS ( + SELECT DISTINCT ON (database, schema) database, schema, deploy_id, promoted_at, commit, kind + FROM _mz_deploy.public.deployments + WHERE promoted_at IS NOT NULL + ORDER BY database, schema, promoted_at DESC + ) + + SELECT c.database, c.schema, c.deploy_id, c.promoted_at, c.commit, c.kind + FROM candidates c + JOIN mz_schemas s ON c.schema = s.name + JOIN mz_databases d ON c.database = d.name; + "#, + &[], + ) + .await + .map_err(ConnectionError::Query)?; + + Ok(()) +} + +/// Insert schema deployment records (insert-only, no DELETE). +pub async fn insert_schema_deployments( + client: &PgClient, + deployments: &[SchemaDeploymentRecord], +) -> Result<(), ConnectionError> { + if deployments.is_empty() { + return Ok(()); + } + + let insert_sql = r#" + INSERT INTO _mz_deploy.public.deployments + (deploy_id, database, schema, deployed_at, deployed_by, promoted_at, commit, kind) + VALUES + ($1, $2, $3, $4, $5, $6, $7, $8) + "#; + + for deployment in deployments { + let kind_str = deployment.kind.to_string(); + client + .execute( + insert_sql, + &[ + &deployment.deploy_id, + &deployment.database, + &deployment.schema, + &deployment.deployed_at, + &deployment.deployed_by, + &deployment.promoted_at, + &deployment.git_commit, + &kind_str, + ], + ) + .await + .map_err(ConnectionError::Query)?; + } + + Ok(()) +} + +/// Append deployment object records (insert-only, never update or delete). +pub async fn append_deployment_objects( + client: &PgClient, + objects: &[DeploymentObjectRecord], +) -> Result<(), ConnectionError> { + if objects.is_empty() { + return Ok(()); + } + + let insert_sql = r#" + INSERT INTO _mz_deploy.public.objects + (deploy_id, database, schema, object, hash) + VALUES + ($1, $2, $3, $4, $5) + "#; + + for obj in objects { + client + .execute( + insert_sql, + &[ + &obj.deploy_id, + &obj.database, + &obj.schema, + &obj.object, + &obj.object_hash, + ], + ) + .await + .map_err(ConnectionError::Query)?; + } + + Ok(()) +} + +/// Insert cluster records for a staging deployment. +/// +/// Accepts cluster names and resolves them to cluster IDs internally. +/// Fails if any cluster names cannot be resolved (cluster doesn't exist). +pub async fn insert_deployment_clusters( + client: &PgClient, + deploy_id: &str, + clusters: &[String], +) -> Result<(), ConnectionError> { + if clusters.is_empty() { + return Ok(()); + } + + // Step 1: Query mz_catalog to get cluster IDs for the given names + let placeholders: Vec = (1..=clusters.len()).map(|i| format!("${}", i)).collect(); + let placeholders_str = placeholders.join(", "); + + let select_sql = format!( + "SELECT name, id FROM mz_catalog.mz_clusters WHERE name IN ({})", + placeholders_str + ); + + #[allow(clippy::as_conversions)] + let params: Vec<&(dyn ToSql + Sync)> = + clusters.iter().map(|c| c as &(dyn ToSql + Sync)).collect(); + + let rows = client.query(&select_sql, ¶ms).await?; + + // Verify all clusters were found + if rows.len() != clusters.len() { + let found_names: BTreeSet = rows.iter().map(|row| row.get("name")).collect(); + let missing: Vec<&str> = clusters + .iter() + .filter(|name| !found_names.contains(*name)) + .map(|s| s.as_str()) + .collect(); + + return Err(ConnectionError::IntrospectionFailed { + object_type: "cluster".to_string(), + source: format!( + "Failed to resolve cluster names to IDs. The following clusters do not exist: {}", + missing.join(", ") + ) + .into(), + }); + } + + // Step 2: Insert the cluster IDs into _mz_deploy.public.clusters + let insert_sql = r#" + INSERT INTO _mz_deploy.public.clusters (deploy_id, cluster_id) + VALUES ($1, $2) + "#; + + for row in rows { + let cluster_id: String = row.get("id"); + client + .execute(insert_sql, &[&deploy_id, &cluster_id]) + .await + .map_err(ConnectionError::Query)?; + } + + Ok(()) +} + +/// Get cluster names for a staging deployment. +/// +/// Returns cluster names by resolving cluster IDs via JOIN with mz_catalog.mz_clusters. +/// If a cluster ID exists in _mz_deploy.public.clusters but the cluster was deleted from the catalog, +/// that cluster will be silently omitted from results. +pub async fn get_deployment_clusters( + client: &PgClient, + deploy_id: &str, +) -> Result, ConnectionError> { + let query = r#" + SELECT c.name + FROM _mz_deploy.public.clusters dc + JOIN mz_catalog.mz_clusters c ON dc.cluster_id = c.id + WHERE dc.deploy_id = $1 + ORDER BY c.name + "#; + + let rows = client + .query(query, &[&deploy_id]) + .await + .map_err(ConnectionError::Query)?; + + Ok(rows.iter().map(|row| row.get("name")).collect()) +} + +/// Validate that all cluster IDs in a deployment still exist in the catalog. +/// +/// Returns an error if any cluster IDs in _mz_deploy.public.clusters cannot be resolved +/// to clusters in mz_catalog.mz_clusters (i.e., clusters were deleted). +pub async fn validate_deployment_clusters( + client: &PgClient, + deploy_id: &str, +) -> Result<(), ConnectionError> { + let query = r#" + SELECT dc.cluster_id + FROM _mz_deploy.public.clusters dc + LEFT JOIN mz_catalog.mz_clusters c ON dc.cluster_id = c.id + WHERE dc.deploy_id = $1 AND c.id IS NULL + "#; + + let rows = client + .query(query, &[&deploy_id]) + .await + .map_err(ConnectionError::Query)?; + + if !rows.is_empty() { + let missing_ids: Vec = rows.iter().map(|row| row.get("cluster_id")).collect(); + return Err(ConnectionError::IntrospectionFailed { + object_type: "cluster".to_string(), + source: format!( + "Deployment '{}' references {} cluster(s) that no longer exist: {}. \ + These clusters may have been deleted. Run 'mz-deploy abort {}' to clean up.", + deploy_id, + missing_ids.len(), + missing_ids.join(", "), + deploy_id + ) + .into(), + }); + } + + Ok(()) +} + +/// Delete cluster records for a staging deployment. +pub async fn delete_deployment_clusters( + client: &PgClient, + deploy_id: &str, +) -> Result<(), ConnectionError> { + client + .execute( + "DELETE FROM _mz_deploy.public.clusters WHERE deploy_id = $1", + &[&deploy_id], + ) + .await + .map_err(ConnectionError::Query)?; + Ok(()) +} + +/// Update promoted_at timestamp for a staging deployment. +pub async fn update_promoted_at(client: &PgClient, deploy_id: &str) -> Result<(), ConnectionError> { + let update_sql = r#" + UPDATE _mz_deploy.public.deployments + SET promoted_at = NOW() + WHERE deploy_id = $1 + "#; + + client + .execute(update_sql, &[&deploy_id]) + .await + .map_err(ConnectionError::Query)?; + Ok(()) +} + +/// Delete all deployment records for a specific deployment. +pub async fn delete_deployment(client: &PgClient, deploy_id: &str) -> Result<(), ConnectionError> { + client + .execute( + "DELETE FROM _mz_deploy.public.deployments WHERE deploy_id = $1", + &[&deploy_id], + ) + .await + .map_err(ConnectionError::Query)?; + client + .execute( + "DELETE FROM _mz_deploy.public.objects WHERE deploy_id = $1", + &[&deploy_id], + ) + .await + .map_err(ConnectionError::Query)?; + Ok(()) +} + +/// Get schema deployment records from the database for a specific deployment. +pub async fn get_schema_deployments( + client: &PgClient, + deploy_id: Option<&str>, +) -> Result, ConnectionError> { + let query = if deploy_id.is_none() { + r#" + SELECT deploy_id, database, schema, + promoted_at as deployed_at, + '' as deployed_by, + promoted_at, + commit, + kind + FROM _mz_deploy.public.production + ORDER BY database, schema + "# + } else { + r#" + SELECT deploy_id, database, schema, + deployed_at, + deployed_by, + promoted_at, + commit, + kind + FROM _mz_deploy.public.deployments + WHERE deploy_id = $1 + ORDER BY database, schema + "# + }; + + let rows = if deploy_id.is_none() { + client + .query(query, &[]) + .await + .map_err(ConnectionError::Query)? + } else { + client + .query(query, &[&deploy_id]) + .await + .map_err(ConnectionError::Query)? + }; + + let mut records = Vec::new(); + for row in rows { + let deploy_id: String = row.get("deploy_id"); + let database: String = row.get("database"); + let schema: String = row.get("schema"); + let deployed_at: DateTime = row.get("deployed_at"); + let deployed_by: String = row.get("deployed_by"); + let promoted_at: Option> = row.get("promoted_at"); + let git_commit: Option = row.get("commit"); + let kind_str: String = row.get("kind"); + + let kind = kind_str.parse().map_err(|e| { + ConnectionError::Message(format!("Failed to parse deployment kind: {}", e)) + })?; + + records.push(SchemaDeploymentRecord { + deploy_id, + database, + schema, + deployed_at, + deployed_by, + promoted_at, + git_commit, + kind, + }); + } + + Ok(records) +} + +/// Get deployment object records from the database for a specific deployment. +pub async fn get_deployment_objects( + client: &PgClient, + deploy_id: Option<&str>, +) -> Result { + let query = if deploy_id.is_none() { + r#" + SELECT o.database, o.schema, o.object, o.hash + FROM _mz_deploy.public.objects o + JOIN _mz_deploy.public.production p + ON o.database = p.database AND o.schema = p.schema + WHERE o.deploy_id = p.deploy_id + "# + } else { + r#" + SELECT database, schema, object, hash + FROM _mz_deploy.public.objects + WHERE deploy_id = $1 + "# + }; + + let rows = if deploy_id.is_none() { + client + .query(query, &[]) + .await + .map_err(ConnectionError::Query)? + } else { + client + .query(query, &[&deploy_id]) + .await + .map_err(ConnectionError::Query)? + }; + + let mut objects = BTreeMap::new(); + let mut schemas = BTreeMap::new(); + for row in rows { + let database: String = row.get("database"); + let schema: String = row.get("schema"); + let object: String = row.get("object"); + let object_hash: String = row.get("hash"); + + let object_id = ObjectId { + database: database.clone(), + schema: schema.clone(), + object, + }; + objects.insert(object_id, object_hash); + // Default to Objects kind for snapshots loaded from DB (used for comparison only) + schemas + .entry((database, schema)) + .or_insert(DeploymentKind::Objects); + } + + Ok(DeploymentSnapshot { objects, schemas }) +} + +/// Get metadata about a deployment for validation. +pub async fn get_deployment_metadata( + client: &PgClient, + deploy_id: &str, +) -> Result, ConnectionError> { + let query = r#" + SELECT deploy_id, + promoted_at, + database, + schema + FROM _mz_deploy.public.deployments + WHERE deploy_id = $1 + "#; + + let rows = client + .query(query, &[&deploy_id]) + .await + .map_err(ConnectionError::Query)?; + + if rows.is_empty() { + return Ok(None); + } + + let first_row = &rows[0]; + let deploy_id: String = first_row.get("deploy_id"); + let promoted_at: Option> = first_row.get("promoted_at"); + + let mut schemas = Vec::new(); + for row in rows { + let database: String = row.get("database"); + let schema: String = row.get("schema"); + schemas.push((database, schema)); + } + + Ok(Some(DeploymentMetadata { + deploy_id, + promoted_at, + schemas, + })) +} + +/// Get detailed information about a specific deployment. +/// +/// Returns deployment details if the deployment exists, or None if not found. +pub async fn get_deployment_details( + client: &PgClient, + deploy_id: &str, +) -> Result, ConnectionError> { + let query = r#" + SELECT deploy_id, + deployed_at, + promoted_at, + deployed_by, + commit, + kind, + database, + schema + FROM _mz_deploy.public.deployments + WHERE deploy_id = $1 + ORDER BY database, schema + "#; + + let rows = client + .query(query, &[&deploy_id]) + .await + .map_err(ConnectionError::Query)?; + + if rows.is_empty() { + return Ok(None); + } + + let first_row = &rows[0]; + let deployed_at: DateTime = first_row.get("deployed_at"); + let promoted_at: Option> = first_row.get("promoted_at"); + let deployed_by: String = first_row.get("deployed_by"); + let git_commit: Option = first_row.get("commit"); + let kind_str: String = first_row.get("kind"); + let kind: DeploymentKind = kind_str.parse().map_err(ConnectionError::Message)?; + + let mut schemas = Vec::new(); + for row in rows { + let database: String = row.get("database"); + let schema: String = row.get("schema"); + schemas.push((database, schema)); + } + + Ok(Some(DeploymentDetails { + deployed_at, + promoted_at, + deployed_by, + git_commit, + kind, + schemas, + })) +} + +/// List all staging deployments (promoted_at IS NULL), grouped by deploy_id. +/// +/// Returns a map from deploy_id to staging deployment details. +pub async fn list_staging_deployments( + client: &PgClient, +) -> Result, ConnectionError> { + let query = r#" + SELECT deploy_id, + deployed_at, + deployed_by, + commit, + kind, + database, + schema + FROM _mz_deploy.public.deployments + WHERE promoted_at IS NULL + ORDER BY deploy_id, database, schema + "#; + + let rows = client + .query(query, &[]) + .await + .map_err(ConnectionError::Query)?; + + let mut deployments: BTreeMap = BTreeMap::new(); + + for row in rows { + let deploy_id: String = row.get("deploy_id"); + let deployed_at: DateTime = row.get("deployed_at"); + let deployed_by: String = row.get("deployed_by"); + let git_commit: Option = row.get("commit"); + let kind_str: String = row.get("kind"); + let database: String = row.get("database"); + let schema: String = row.get("schema"); + + deployments + .entry(deploy_id) + .or_insert_with(|| { + // Parse kind - default to Objects if parsing fails (shouldn't happen) + let kind = kind_str.parse().unwrap_or(DeploymentKind::Objects); + StagingDeployment { + deployed_at, + deployed_by: deployed_by.clone(), + git_commit: git_commit.clone(), + kind, + schemas: Vec::new(), + } + }) + .schemas + .push((database, schema)); + } + + Ok(deployments) +} + +/// List deployment history in chronological order (promoted deployments only). +/// +/// Returns a vector of deployment history entries ordered by promotion time. +pub async fn list_deployment_history( + client: &PgClient, + limit: Option, +) -> Result, ConnectionError> { + // We need to limit unique deployments, not individual schema rows + // First get distinct deployments, then join with schemas + let query = if let Some(limit) = limit { + format!( + r#" + WITH unique_deployments AS ( + SELECT DISTINCT deploy_id, promoted_at, deployed_by, commit, kind + FROM _mz_deploy.public.deployments + WHERE promoted_at IS NOT NULL + ORDER BY promoted_at DESC + LIMIT {} + ) + SELECT d.deploy_id, + d.promoted_at, + d.deployed_by, + d.commit, + d.kind, + d.database, + d.schema + FROM _mz_deploy.public.deployments d + JOIN unique_deployments u + ON d.deploy_id = u.deploy_id + AND d.promoted_at = u.promoted_at + AND d.deployed_by = u.deployed_by + ORDER BY d.promoted_at DESC, d.database, d.schema + "#, + limit + ) + } else { + r#" + SELECT deploy_id, + promoted_at, + deployed_by, + commit, + kind, + database, + schema + FROM _mz_deploy.public.deployments + WHERE promoted_at IS NOT NULL + ORDER BY promoted_at DESC, database, schema + "# + .to_string() + }; + + let rows = client + .query(&query, &[]) + .await + .map_err(ConnectionError::Query)?; + + // Group by (deploy_id, promoted_at, deployed_by, commit, kind) + let mut deployments: Vec = Vec::new(); + let mut current_deploy_id: Option = None; + + for row in rows { + let deploy_id: String = row.get("deploy_id"); + let promoted_at: DateTime = row.get("promoted_at"); + let deployed_by: String = row.get("deployed_by"); + let git_commit: Option = row.get("commit"); + let kind_str: String = row.get("kind"); + let database: String = row.get("database"); + let schema: String = row.get("schema"); + + // Check if this is a new deployment or same as current + if current_deploy_id.as_ref() != Some(&deploy_id) { + // Parse kind - default to Objects if parsing fails (shouldn't happen) + let kind = kind_str.parse().unwrap_or(DeploymentKind::Objects); + // Start a new deployment group + deployments.push(DeploymentHistoryEntry { + deploy_id: deploy_id.clone(), + promoted_at, + deployed_by, + git_commit, + kind, + schemas: vec![(database, schema)], + }); + current_deploy_id = Some(deploy_id); + } else { + // Add schema to current deployment + if let Some(last) = deployments.last_mut() { + last.schemas.push((database, schema)); + } + } + } + + Ok(deployments) +} + +/// Check for deployment conflicts (schemas updated after deployment started). +pub async fn check_deployment_conflicts( + client: &PgClient, + deploy_id: &str, +) -> Result, ConnectionError> { + let query = r#" + SELECT p.database, p.schema, p.deploy_id, p.promoted_at + FROM _mz_deploy.public.production p + JOIN _mz_deploy.public.deployments d USING (database, schema) + WHERE d.deploy_id = $1 AND p.promoted_at > d.deployed_at + "#; + + let rows = client + .query(query, &[&deploy_id]) + .await + .map_err(ConnectionError::Query)?; + + let conflicts = rows + .iter() + .map(|row| ConflictRecord { + database: row.get("database"), + schema: row.get("schema"), + deploy_id: row.get("deploy_id"), + promoted_at: row.get("promoted_at"), + }) + .collect(); + + Ok(conflicts) +} + +/// Check if the deployment tracking table exists. +pub async fn deployment_table_exists(client: &PgClient) -> Result { + let query = r#" + SELECT EXISTS( + SELECT 1 + FROM mz_catalog.mz_tables t + JOIN mz_catalog.mz_schemas s ON t.schema_id = s.id + JOIN mz_catalog.mz_databases d ON s.database_id = d.id + WHERE t.name = 'deployments' + AND s.name = 'public' + AND d.name = '_mz_deploy' + ) + "#; + + let row = client + .query_one(query, &[]) + .await + .map_err(ConnectionError::Query)?; + + Ok(row.get(0)) +} + +/// Default allowed lag threshold in seconds (5 minutes). +pub const DEFAULT_ALLOWED_LAG_SECS: i64 = 300; + +/// Get detailed hydration and health status for clusters in a staging deployment. +/// +/// This function checks: +/// - Hydration progress for each cluster +/// - Wallclock lag to determine if data is fresh +/// - Replica health (detecting OOM-looping replicas) +/// +/// # Arguments +/// * `client` - Database client +/// * `deploy_id` - Staging deployment ID +/// * `allowed_lag_secs` - Maximum allowed lag in seconds before marking as "lagging" +/// +/// # Returns +/// A vector of `ClusterStatusContext` with full status details for each cluster. +pub async fn get_deployment_hydration_status( + client: &PgClient, + deploy_id: &str, + allowed_lag_secs: i64, +) -> Result, ConnectionError> { + let pattern = format!("%_{}", deploy_id); + + let query = format!( + r#" + WITH + -- Detect problematic replicas: 3+ OOM kills in 24h (subscribe-friendly) + problematic_replicas AS ( + SELECT replica_id + FROM mz_internal.mz_cluster_replica_status_history + WHERE occurred_at + INTERVAL '24 hours' > mz_now() + AND reason = 'oom-killed' + GROUP BY replica_id + HAVING COUNT(*) >= 3 + ), + + -- Cluster health: count total vs problematic replicas + cluster_health AS ( + SELECT + c.name AS cluster_name, + c.id AS cluster_id, + COUNT(r.id) AS total_replicas, + COUNT(pr.replica_id) AS problematic_replicas + FROM mz_clusters c + LEFT JOIN mz_cluster_replicas r ON c.id = r.cluster_id + LEFT JOIN problematic_replicas pr ON r.id = pr.replica_id + WHERE c.name LIKE $1 + GROUP BY c.name, c.id + ), + + -- Hydration counts per cluster (best replica) + hydration_counts AS ( + SELECT + c.name AS cluster_name, + r.id AS replica_id, + COUNT(*) FILTER (WHERE mhs.hydrated) AS hydrated, + COUNT(*) AS total + FROM mz_clusters c + JOIN mz_cluster_replicas r ON c.id = r.cluster_id + LEFT JOIN mz_internal.mz_hydration_statuses mhs ON mhs.replica_id = r.id + WHERE c.name LIKE $1 + GROUP BY c.name, r.id + ), + + hydration_best AS ( + SELECT cluster_name, MAX(hydrated) AS hydrated, MAX(total) AS total + FROM hydration_counts + GROUP BY cluster_name + ), + + -- Max lag per cluster using mz_wallclock_global_lag + cluster_lag AS ( + SELECT + c.name AS cluster_name, + MAX(EXTRACT(EPOCH FROM wgl.lag)) AS max_lag_secs + FROM mz_clusters c + JOIN mz_cluster_replicas r ON c.id = r.cluster_id + JOIN mz_internal.mz_hydration_statuses mhs ON mhs.replica_id = r.id + JOIN mz_internal.mz_wallclock_global_lag wgl ON wgl.object_id = mhs.object_id + WHERE c.name LIKE $1 + GROUP BY c.name + ) + + SELECT + ch.cluster_name, + ch.cluster_id, + CASE + WHEN ch.total_replicas = 0 THEN 'failing' + WHEN ch.total_replicas = ch.problematic_replicas THEN 'failing' + WHEN COALESCE(hb.hydrated, 0) < COALESCE(hb.total, 0) THEN 'hydrating' + WHEN COALESCE(cl.max_lag_secs, 0) > {allowed_lag_secs} THEN 'lagging' + ELSE 'ready' + END AS status, + CASE + WHEN ch.total_replicas = 0 THEN 'no_replicas' + WHEN ch.total_replicas = ch.problematic_replicas THEN 'all_replicas_problematic' + ELSE NULL + END AS failure_reason, + COALESCE(hb.hydrated, 0) AS hydrated_count, + COALESCE(hb.total, 0) AS total_count, + COALESCE(cl.max_lag_secs, 0)::bigint AS max_lag_secs, + ch.total_replicas, + ch.problematic_replicas + FROM cluster_health ch + LEFT JOIN hydration_best hb ON ch.cluster_name = hb.cluster_name + LEFT JOIN cluster_lag cl ON ch.cluster_name = cl.cluster_name + "#, + allowed_lag_secs = allowed_lag_secs + ); + + let rows = client + .query(&query, &[&pattern]) + .await + .map_err(ConnectionError::Query)?; + + let mut results = Vec::new(); + for row in rows { + let cluster_name: String = row.get("cluster_name"); + let cluster_id: String = row.get("cluster_id"); + let status_str: String = row.get("status"); + let failure_reason: Option = row.get("failure_reason"); + let hydrated_count: i64 = row.get("hydrated_count"); + let total_count: i64 = row.get("total_count"); + let max_lag_secs: i64 = row.get("max_lag_secs"); + let total_replicas: i64 = row.get("total_replicas"); + let problematic_replicas: i64 = row.get("problematic_replicas"); + + let status = match status_str.as_str() { + "ready" => ClusterDeploymentStatus::Ready, + "hydrating" => ClusterDeploymentStatus::Hydrating { + hydrated: hydrated_count, + total: total_count, + }, + "lagging" => ClusterDeploymentStatus::Lagging { max_lag_secs }, + "failing" => { + let reason = match failure_reason.as_deref() { + Some("no_replicas") => FailureReason::NoReplicas, + Some("all_replicas_problematic") => FailureReason::AllReplicasProblematic { + problematic: problematic_replicas, + total: total_replicas, + }, + _ => FailureReason::NoReplicas, // fallback + }; + ClusterDeploymentStatus::Failing { reason } + } + _ => ClusterDeploymentStatus::Ready, // fallback + }; + + results.push(ClusterStatusContext { + cluster_name, + cluster_id, + status, + hydrated_count, + total_count, + max_lag_secs, + total_replicas, + problematic_replicas, + }); + } + + Ok(results) +} + +// ============================================================================= +// Apply State Management +// ============================================================================= + +/// Create apply state schemas with comments for tracking apply progress. +/// +/// Creates two schemas in `_mz_deploy`: +/// - `apply__pre` with comment 'swapped=false' +/// - `apply__post` with comment 'swapped=true' +/// +/// The schemas are created first (if they don't exist), then comments are set +/// (if they don't have comments). During the swap transaction, the schemas +/// exchange names, which effectively moves the 'swapped=true' comment to the +/// `_pre` schema. +pub async fn create_apply_state_schemas( + client: &PgClient, + deploy_id: &str, +) -> Result<(), ConnectionError> { + let pre_schema = format!("apply_{}_pre", deploy_id); + let post_schema = format!("apply_{}_post", deploy_id); + + // Create _pre schema if it doesn't exist + let create_pre = format!("CREATE SCHEMA IF NOT EXISTS _mz_deploy.{}", pre_schema); + client + .execute(&create_pre, &[]) + .await + .map_err(ConnectionError::Query)?; + + // Create _post schema if it doesn't exist + let create_post = format!("CREATE SCHEMA IF NOT EXISTS _mz_deploy.{}", post_schema); + client + .execute(&create_post, &[]) + .await + .map_err(ConnectionError::Query)?; + + // Query to check if a schema has a comment (using mz_internal.mz_comments) + let comment_check_query = r#" + SELECT c.comment + FROM mz_catalog.mz_schemas s + JOIN mz_catalog.mz_databases d ON s.database_id = d.id + LEFT JOIN mz_internal.mz_comments c ON s.id = c.id + WHERE s.name = $1 AND d.name = '_mz_deploy' + "#; + + // Set comment on _pre schema if it doesn't have one + let rows = client + .query(comment_check_query, &[&pre_schema]) + .await + .map_err(ConnectionError::Query)?; + + if !rows.is_empty() { + let comment: Option = rows[0].get("comment"); + if comment.is_none() { + let comment_pre = format!( + "COMMENT ON SCHEMA _mz_deploy.{} IS 'swapped=false'", + pre_schema + ); + client + .execute(&comment_pre, &[]) + .await + .map_err(ConnectionError::Query)?; + } + } + + // Set comment on _post schema if it doesn't have one + let rows = client + .query(comment_check_query, &[&post_schema]) + .await + .map_err(ConnectionError::Query)?; + + if !rows.is_empty() { + let comment: Option = rows[0].get("comment"); + if comment.is_none() { + let comment_post = format!( + "COMMENT ON SCHEMA _mz_deploy.{} IS 'swapped=true'", + post_schema + ); + client + .execute(&comment_post, &[]) + .await + .map_err(ConnectionError::Query)?; + } + } + + Ok(()) +} + +/// Get the current apply state for a deployment. +/// +/// Checks for the existence of `_mz_deploy.apply__pre` schema +/// and its comment to determine the state: +/// - Schema doesn't exist → NotStarted +/// - Schema exists with comment 'swapped=false' → PreSwap +/// - Schema exists with comment 'swapped=true' → PostSwap +pub async fn get_apply_state( + client: &PgClient, + deploy_id: &str, +) -> Result { + let pre_schema = format!("apply_{}_pre", deploy_id); + + // Query schema existence and comment using mz_internal.mz_comments + let query = r#" + SELECT c.comment + FROM mz_catalog.mz_schemas s + JOIN mz_catalog.mz_databases d ON s.database_id = d.id + LEFT JOIN mz_internal.mz_comments c ON s.id = c.id + WHERE s.name = $1 AND d.name = '_mz_deploy' + "#; + + let rows = client + .query(query, &[&pre_schema]) + .await + .map_err(ConnectionError::Query)?; + + if rows.is_empty() { + return Ok(ApplyState::NotStarted); + } + + let comment: Option = rows[0].get("comment"); + match comment.as_deref() { + Some("swapped=false") => Ok(ApplyState::PreSwap), + Some("swapped=true") => Ok(ApplyState::PostSwap), + _ => { + // Unexpected comment or no comment - treat as not started + Ok(ApplyState::NotStarted) + } + } +} + +/// Delete apply state schemas after successful completion. +pub async fn delete_apply_state_schemas( + client: &PgClient, + deploy_id: &str, +) -> Result<(), ConnectionError> { + let pre_schema = format!("apply_{}_pre", deploy_id); + let post_schema = format!("apply_{}_post", deploy_id); + + // Drop schemas if they exist + let drop_pre = format!("DROP SCHEMA IF EXISTS _mz_deploy.{}", pre_schema); + client + .execute(&drop_pre, &[]) + .await + .map_err(ConnectionError::Query)?; + + let drop_post = format!("DROP SCHEMA IF EXISTS _mz_deploy.{}", post_schema); + client + .execute(&drop_post, &[]) + .await + .map_err(ConnectionError::Query)?; + + Ok(()) +} + +// ============================================================================= +// Pending Statements Management +// ============================================================================= + +/// Insert pending statements for deferred execution (e.g., sinks). +pub async fn insert_pending_statements( + client: &PgClient, + statements: &[PendingStatement], +) -> Result<(), ConnectionError> { + if statements.is_empty() { + return Ok(()); + } + + let insert_sql = r#" + INSERT INTO _mz_deploy.public.pending_statements + (deploy_id, sequence_num, database, schema, object, object_hash, statement_sql, statement_kind, executed_at) + VALUES + ($1, $2, $3, $4, $5, $6, $7, $8, $9) + "#; + + for stmt in statements { + client + .execute( + insert_sql, + &[ + &stmt.deploy_id, + &stmt.sequence_num, + &stmt.database, + &stmt.schema, + &stmt.object, + &stmt.object_hash, + &stmt.statement_sql, + &stmt.statement_kind, + &stmt.executed_at, + ], + ) + .await + .map_err(ConnectionError::Query)?; + } + + Ok(()) +} + +/// Get pending statements for a deployment that haven't been executed yet. +pub async fn get_pending_statements( + client: &PgClient, + deploy_id: &str, +) -> Result, ConnectionError> { + let query = r#" + SELECT deploy_id, sequence_num, database, schema, object, object_hash, + statement_sql, statement_kind, executed_at + FROM _mz_deploy.public.pending_statements + WHERE deploy_id = $1 AND executed_at IS NULL + ORDER BY sequence_num + "#; + + let rows = client + .query(query, &[&deploy_id]) + .await + .map_err(ConnectionError::Query)?; + + let mut statements = Vec::new(); + for row in rows { + statements.push(PendingStatement { + deploy_id: row.get("deploy_id"), + sequence_num: row.get("sequence_num"), + database: row.get("database"), + schema: row.get("schema"), + object: row.get("object"), + object_hash: row.get("object_hash"), + statement_sql: row.get("statement_sql"), + statement_kind: row.get("statement_kind"), + executed_at: row.get("executed_at"), + }); + } + + Ok(statements) +} + +/// Mark a pending statement as executed. +pub async fn mark_statement_executed( + client: &PgClient, + deploy_id: &str, + sequence_num: i32, +) -> Result<(), ConnectionError> { + let update_sql = r#" + UPDATE _mz_deploy.public.pending_statements + SET executed_at = NOW() + WHERE deploy_id = $1 AND sequence_num = $2 + "#; + + client + .execute(update_sql, &[&deploy_id, &sequence_num]) + .await + .map_err(ConnectionError::Query)?; + + Ok(()) +} + +/// Delete all pending statements for a deployment. +pub async fn delete_pending_statements( + client: &PgClient, + deploy_id: &str, +) -> Result<(), ConnectionError> { + client + .execute( + "DELETE FROM _mz_deploy.public.pending_statements WHERE deploy_id = $1", + &[&deploy_id], + ) + .await + .map_err(ConnectionError::Query)?; + + Ok(()) +} diff --git a/src/mz-deploy/src/client/errors.rs b/src/mz-deploy/src/client/errors.rs new file mode 100644 index 0000000000000..ce6ea6018ff54 --- /dev/null +++ b/src/mz-deploy/src/client/errors.rs @@ -0,0 +1,615 @@ +//! Error types for the client module. +//! +//! This module contains all error types used by the database client, +//! including connection errors and validation errors. + +use crate::client::config::ConfigError; +use crate::project::object_id::ObjectId; +use owo_colors::OwoColorize; +use std::fmt; +use std::path::PathBuf; +use thiserror::Error; + +/// Errors that can occur during database operations. +#[derive(Debug, Error)] +pub enum ConnectionError { + #[error("configuration error: {0}")] + Config(#[from] ConfigError), + + #[error("failed to connect to {host}:{port}: {source}")] + Connect { + host: String, + port: u16, + source: tokio_postgres::Error, + }, + + #[error("{}", format_query_error(.0))] + Query(tokio_postgres::Error), + + #[error("dependency error: {0}")] + Dependency(#[from] crate::project::error::DependencyError), + + #[error("failed to create schema '{database}.{schema}': {source}")] + SchemaCreationFailed { + database: String, + schema: String, + source: Box, + }, + + #[error("failed to create cluster '{name}': {source}")] + ClusterCreationFailed { + name: String, + source: Box, + }, + + #[error("cluster '{name}' already exists")] + ClusterAlreadyExists { name: String }, + + #[error("introspection failed for {object_type}: {source}")] + IntrospectionFailed { + object_type: String, + source: Box, + }, + + #[error("cluster '{name}' not found")] + ClusterNotFound { name: String }, + + #[error("deployment '{deploy_id}' already exists")] + DeploymentAlreadyExists { deploy_id: String }, + + #[error("deployment '{deploy_id}' not found")] + DeploymentNotFound { deploy_id: String }, + + #[error("deployment '{deploy_id}' has already been promoted to production")] + DeploymentAlreadyPromoted { deploy_id: String }, + + #[error("unsupported statement type: {0}")] + UnsupportedStatementType(String), + + #[error("{0}")] + Message(String), +} + +fn format_query_error(error: &tokio_postgres::Error) -> String { + if let Some(db_error) = error.as_db_error() { + let mut parts = vec![format!("database error: {}", db_error.message())]; + + if let Some(detail) = db_error.detail() { + parts.push(format!(" Detail: {}", detail)); + } + + if let Some(hint) = db_error.hint() { + parts.push(format!(" Hint: {}", hint)); + } + + parts.push(format!(" Code: {:?}", db_error.code())); + parts.join("\n") + } else { + format!("query error: {}", error) + } +} + +impl From for ConnectionError { + fn from(error: tokio_postgres::Error) -> Self { + ConnectionError::Query(error) + } +} + +/// Errors that can occur during project validation against the database. +#[derive(Debug)] +pub enum DatabaseValidationError { + MissingDatabases(Vec), + MissingSchemas(Vec<(String, String)>), + MissingClusters(Vec), + CompilationFailed { + file_path: PathBuf, + object_name: ObjectId, + missing_dependencies: Vec, + }, + Multiple { + databases: Vec, + schemas: Vec<(String, String)>, + clusters: Vec, + compilation_errors: Vec, + }, + ClusterConflict { + cluster_name: String, + compute_objects: Vec, + storage_objects: Vec, + }, + InsufficientPrivileges { + missing_database_usage: Vec, + missing_createcluster: bool, + }, + MissingSources(Vec), + MissingConnections(Vec), + MissingTableDependencies { + objects_needing_tables: Vec<(ObjectId, Vec)>, + }, + QueryError(tokio_postgres::Error), +} + +impl fmt::Display for DatabaseValidationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + DatabaseValidationError::MissingDatabases(dbs) => { + write!(f, "Missing databases: {}", dbs.join(", ")) + } + DatabaseValidationError::MissingSchemas(schemas) => { + let schema_list: Vec = schemas + .iter() + .map(|(db, schema)| format!("{}.{}", db, schema)) + .collect(); + write!(f, "Missing schemas: {}", schema_list.join(", ")) + } + DatabaseValidationError::MissingClusters(clusters) => { + write!(f, "Missing clusters: {}", clusters.join(", ")) + } + DatabaseValidationError::CompilationFailed { + file_path, + object_name, + missing_dependencies, + } => { + let relative_path = format_relative_path(file_path); + + writeln!( + f, + "{}: failed to compile '{}': missing external dependencies", + "error".bright_red().bold(), + object_name + )?; + writeln!(f, " {} {}", "-->".bright_blue().bold(), relative_path)?; + writeln!(f)?; + writeln!(f, " Missing dependencies:")?; + for dep in missing_dependencies { + writeln!(f, " - {}", dep)?; + } + Ok(()) + } + DatabaseValidationError::Multiple { + databases, + schemas, + clusters, + compilation_errors, + } => { + let mut has_errors = false; + + writeln!(f, "Missing dependencies")?; + if !databases.is_empty() { + writeln!(f, "Missing databases: {}", databases.join(", "))?; + has_errors = true; + } + + if !schemas.is_empty() { + let schema_list: Vec = schemas + .iter() + .map(|(db, schema)| format!("{}.{}", db, schema)) + .collect(); + writeln!(f, "Missing schemas: {}", schema_list.join(", "))?; + has_errors = true; + } + + if !clusters.is_empty() { + writeln!(f, "Missing clusters: {}", clusters.join(", "))?; + has_errors = true; + } + + if !compilation_errors.is_empty() { + if has_errors { + writeln!(f)?; + } + for (idx, err) in compilation_errors.iter().enumerate() { + if idx > 0 { + writeln!(f)?; + } + write!(f, "{}", err)?; + } + } + + Ok(()) + } + DatabaseValidationError::ClusterConflict { + cluster_name, + compute_objects, + storage_objects, + } => { + writeln!( + f, + "{}: cluster '{}' contains both storage and computation objects", + "error".bright_red().bold(), + cluster_name + )?; + writeln!(f)?; + writeln!(f, " Computation objects (indexes, materialized views):")?; + for obj in compute_objects { + writeln!(f, " - {}", obj)?; + } + writeln!(f)?; + writeln!(f, " Storage objects (sources, sinks):")?; + for obj in storage_objects { + writeln!(f, " - {}", obj)?; + } + writeln!(f)?; + writeln!( + f, + " {} Move sources/sinks to a separate cluster to avoid accidental recreation", + "help:".bright_cyan().bold() + )?; + Ok(()) + } + DatabaseValidationError::InsufficientPrivileges { + missing_database_usage, + missing_createcluster, + } => { + writeln!( + f, + "{}: insufficient privileges to deploy this project", + "error".bright_red().bold() + )?; + writeln!(f)?; + + if !missing_database_usage.is_empty() { + writeln!(f, " Missing USAGE privilege on databases:")?; + for db in missing_database_usage { + writeln!(f, " - {}", db)?; + } + writeln!(f)?; + } + + if *missing_createcluster { + writeln!(f, " Missing CREATECLUSTER system privilege")?; + writeln!(f)?; + } + + writeln!( + f, + " {} Ask your administrator to grant the required privileges:", + "help:".bright_cyan().bold() + )?; + writeln!(f)?; + + if !missing_database_usage.is_empty() { + for db in missing_database_usage { + writeln!(f, " GRANT USAGE ON DATABASE {} TO ;", db)?; + } + } + + if *missing_createcluster { + writeln!(f, " GRANT CREATECLUSTER ON SYSTEM TO ;")?; + } + + Ok(()) + } + DatabaseValidationError::MissingSources(sources) => { + writeln!( + f, + "{}: The following sources are referenced but do not exist:", + "error".bright_red().bold() + )?; + for source in sources { + writeln!( + f, + " - {}.{}.{}", + source.database, source.schema, source.object + )?; + } + writeln!(f)?; + writeln!( + f, + "Please ensure all sources are created before running this command." + )?; + Ok(()) + } + DatabaseValidationError::MissingConnections(connections) => { + writeln!( + f, + "{}: The following connections are referenced but do not exist:", + "error".bright_red().bold() + )?; + for conn in connections { + writeln!(f, " - {}.{}.{}", conn.database, conn.schema, conn.object)?; + } + writeln!(f)?; + writeln!( + f, + "{} Connections are not managed by mz-deploy and must be created separately.", + "help:".bright_cyan().bold() + )?; + Ok(()) + } + DatabaseValidationError::MissingTableDependencies { + objects_needing_tables, + } => { + writeln!( + f, + "{}: Objects depend on tables that don't exist in the database", + "error".bright_red().bold() + )?; + writeln!(f)?; + for (object, missing_tables) in objects_needing_tables { + writeln!( + f, + " {} {}.{}.{} depends on:", + "×".bright_red(), + object.database, + object.schema, + object.object + )?; + for table in missing_tables { + writeln!( + f, + " - {}.{}.{}", + table.database, table.schema, table.object + )?; + } + } + writeln!(f)?; + writeln!( + f, + "{} Run 'mz-deploy create-tables' to create the required tables first", + "help:".bright_cyan().bold() + )?; + Ok(()) + } + DatabaseValidationError::QueryError(e) => { + write!(f, "Database query failed: {}", e) + } + } + } +} + +impl std::error::Error for DatabaseValidationError {} + +/// Extract last 3 path components for display (database/schema/file.sql). +/// +/// This helper is used in error formatting to show relative paths +/// that are more readable than full absolute paths. +pub fn format_relative_path(path: &std::path::Path) -> String { + let path_components: Vec<_> = path.components().collect(); + let len = path_components.len(); + if len >= 3 { + format!( + "{}/{}/{}", + path_components[len - 3].as_os_str().to_string_lossy(), + path_components[len - 2].as_os_str().to_string_lossy(), + path_components[len - 1].as_os_str().to_string_lossy() + ) + } else { + path.display().to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[test] + fn test_missing_table_dependencies_error_display() { + let error = DatabaseValidationError::MissingTableDependencies { + objects_needing_tables: vec![ + ( + ObjectId::new( + "materialize".to_string(), + "public".to_string(), + "my_view".to_string(), + ), + vec![ + ObjectId::new( + "materialize".to_string(), + "tables".to_string(), + "users".to_string(), + ), + ObjectId::new( + "materialize".to_string(), + "tables".to_string(), + "orders".to_string(), + ), + ], + ), + ( + ObjectId::new( + "materialize".to_string(), + "public".to_string(), + "another_view".to_string(), + ), + vec![ObjectId::new( + "materialize".to_string(), + "tables".to_string(), + "products".to_string(), + )], + ), + ], + }; + + let error_string = format!("{}", error); + + // Check that error message contains key elements + assert!(error_string.contains("error")); + assert!(error_string.contains("Objects depend on tables that don't exist")); + assert!(error_string.contains("materialize.public.my_view")); + assert!(error_string.contains("materialize.tables.users")); + assert!(error_string.contains("materialize.tables.orders")); + assert!(error_string.contains("materialize.public.another_view")); + assert!(error_string.contains("materialize.tables.products")); + assert!(error_string.contains("help")); + assert!(error_string.contains("mz-deploy create-tables")); + } + + #[test] + fn test_format_relative_path() { + let path = PathBuf::from("/home/user/project/database/schema/file.sql"); + assert_eq!(format_relative_path(&path), "database/schema/file.sql"); + + let short_path = PathBuf::from("file.sql"); + assert_eq!(format_relative_path(&short_path), "file.sql"); + } + + #[test] + fn test_format_relative_path_exactly_three_components() { + let path = PathBuf::from("database/schema/file.sql"); + assert_eq!(format_relative_path(&path), "database/schema/file.sql"); + } + + #[test] + fn test_format_relative_path_two_components() { + let path = PathBuf::from("schema/file.sql"); + assert_eq!(format_relative_path(&path), "schema/file.sql"); + } + + #[test] + fn test_missing_databases_error_display() { + let error = + DatabaseValidationError::MissingDatabases(vec!["db1".to_string(), "db2".to_string()]); + let error_string = format!("{}", error); + assert!(error_string.contains("Missing databases")); + assert!(error_string.contains("db1")); + assert!(error_string.contains("db2")); + } + + #[test] + fn test_missing_schemas_error_display() { + let error = DatabaseValidationError::MissingSchemas(vec![ + ("db1".to_string(), "schema1".to_string()), + ("db2".to_string(), "schema2".to_string()), + ]); + let error_string = format!("{}", error); + assert!(error_string.contains("Missing schemas")); + assert!(error_string.contains("db1.schema1")); + assert!(error_string.contains("db2.schema2")); + } + + #[test] + fn test_missing_clusters_error_display() { + let error = DatabaseValidationError::MissingClusters(vec![ + "cluster1".to_string(), + "cluster2".to_string(), + ]); + let error_string = format!("{}", error); + assert!(error_string.contains("Missing clusters")); + assert!(error_string.contains("cluster1")); + assert!(error_string.contains("cluster2")); + } + + #[test] + fn test_cluster_conflict_error_display() { + let error = DatabaseValidationError::ClusterConflict { + cluster_name: "shared_cluster".to_string(), + compute_objects: vec!["my_index".to_string(), "my_mv".to_string()], + storage_objects: vec!["my_source".to_string()], + }; + let error_string = format!("{}", error); + assert!(error_string.contains("shared_cluster")); + assert!(error_string.contains("storage and computation objects")); + assert!(error_string.contains("my_index")); + assert!(error_string.contains("my_mv")); + assert!(error_string.contains("my_source")); + assert!(error_string.contains("help")); + } + + #[test] + fn test_insufficient_privileges_error_display() { + let error = DatabaseValidationError::InsufficientPrivileges { + missing_database_usage: vec!["db1".to_string(), "db2".to_string()], + missing_createcluster: true, + }; + let error_string = format!("{}", error); + assert!(error_string.contains("insufficient privileges")); + assert!(error_string.contains("db1")); + assert!(error_string.contains("db2")); + assert!(error_string.contains("CREATECLUSTER")); + assert!(error_string.contains("GRANT")); + } + + #[test] + fn test_insufficient_privileges_only_database() { + let error = DatabaseValidationError::InsufficientPrivileges { + missing_database_usage: vec!["db1".to_string()], + missing_createcluster: false, + }; + let error_string = format!("{}", error); + assert!(error_string.contains("db1")); + assert!(!error_string.contains("CREATECLUSTER ON SYSTEM")); + } + + #[test] + fn test_missing_sources_error_display() { + let error = DatabaseValidationError::MissingSources(vec![ObjectId::new( + "materialize".to_string(), + "public".to_string(), + "kafka_source".to_string(), + )]); + let error_string = format!("{}", error); + assert!(error_string.contains("sources are referenced but do not exist")); + assert!(error_string.contains("materialize.public.kafka_source")); + } + + #[test] + fn test_multiple_validation_errors_display() { + let error = DatabaseValidationError::Multiple { + databases: vec!["missing_db".to_string()], + schemas: vec![("db".to_string(), "missing_schema".to_string())], + clusters: vec!["missing_cluster".to_string()], + compilation_errors: vec![], + }; + let error_string = format!("{}", error); + assert!(error_string.contains("missing_db")); + assert!(error_string.contains("db.missing_schema")); + assert!(error_string.contains("missing_cluster")); + } + + #[test] + fn test_connection_error_display() { + let error = ConnectionError::Message("test error message".to_string()); + let error_string = format!("{}", error); + assert_eq!(error_string, "test error message"); + } + + #[test] + fn test_connection_error_cluster_not_found() { + let error = ConnectionError::ClusterNotFound { + name: "missing_cluster".to_string(), + }; + let error_string = format!("{}", error); + assert!(error_string.contains("missing_cluster")); + assert!(error_string.contains("not found")); + } + + #[test] + fn test_connection_error_deployment_already_exists() { + let error = ConnectionError::DeploymentAlreadyExists { + deploy_id: "staging_123".to_string(), + }; + let error_string = format!("{}", error); + assert!(error_string.contains("staging_123")); + assert!(error_string.contains("already exists")); + } + + #[test] + fn test_connection_error_deployment_not_found() { + let error = ConnectionError::DeploymentNotFound { + deploy_id: "nonexistent".to_string(), + }; + let error_string = format!("{}", error); + assert!(error_string.contains("nonexistent")); + assert!(error_string.contains("not found")); + } + + #[test] + fn test_connection_error_deployment_already_promoted() { + let error = ConnectionError::DeploymentAlreadyPromoted { + deploy_id: "prod_deploy".to_string(), + }; + let error_string = format!("{}", error); + assert!(error_string.contains("prod_deploy")); + assert!(error_string.contains("already been promoted")); + } + + #[test] + fn test_database_validation_error_is_error_trait() { + // Verify that DatabaseValidationError implements std::error::Error + let error: Box = + Box::new(DatabaseValidationError::MissingDatabases(vec![])); + assert!(error.to_string().contains("Missing databases")); + } +} diff --git a/src/mz-deploy/src/client/introspection.rs b/src/mz-deploy/src/client/introspection.rs new file mode 100644 index 0000000000000..c27b103decec0 --- /dev/null +++ b/src/mz-deploy/src/client/introspection.rs @@ -0,0 +1,735 @@ +//! Database introspection operations. +//! +//! This module contains methods for querying database metadata, +//! such as checking for existence of schemas, clusters, and objects. + +use crate::client::errors::ConnectionError; +use crate::client::models::{Cluster, ClusterConfig, ClusterGrant, ClusterOptions, ClusterReplica}; +use crate::project::object_id::ObjectId; +use crate::utils::sql_utils::quote_identifier; +use std::collections::BTreeSet; +use tokio_postgres::Client as PgClient; +use tokio_postgres::types::ToSql; + +/// A sink that depends on an object in a schema being dropped. +/// +/// Used during apply to identify sinks that need to be repointed to new +/// upstream objects before the old schemas are dropped with CASCADE. +#[derive(Debug, Clone)] +pub struct DependentSink { + pub sink_database: String, + pub sink_schema: String, + pub sink_name: String, + pub dependency_database: String, + pub dependency_schema: String, + pub dependency_name: String, + pub dependency_type: String, +} + +/// Check if a schema exists in the specified database. +pub async fn schema_exists( + client: &PgClient, + database: &str, + schema: &str, +) -> Result { + let query = r#" + SELECT EXISTS( + SELECT 1 + FROM mz_catalog.mz_schemas s + JOIN mz_catalog.mz_databases d ON s.database_id = d.id + WHERE s.name = $1 AND d.name = $2 + ) AS exists + "#; + + let row = client + .query_one(query, &[&schema, &database]) + .await + .map_err(ConnectionError::Query)?; + + Ok(row.get("exists")) +} + +/// Check if a cluster exists. +pub async fn cluster_exists(client: &PgClient, name: &str) -> Result { + let query = r#" + SELECT EXISTS( + SELECT 1 FROM mz_catalog.mz_clusters WHERE name = $1 + ) AS exists + "#; + + let row = client + .query_one(query, &[&name]) + .await + .map_err(ConnectionError::Query)?; + + Ok(row.get("exists")) +} + +/// Get a cluster by name. +pub async fn get_cluster( + client: &PgClient, + name: &str, +) -> Result, ConnectionError> { + let query = r#" + SELECT + id, + name, + size, + replication_factor + FROM mz_catalog.mz_clusters + WHERE name = $1 + "#; + + let rows = client + .query(query, &[&name]) + .await + .map_err(ConnectionError::Query)?; + + if rows.is_empty() { + return Ok(None); + } + + let row = &rows[0]; + let replication_factor: Option = row + .try_get("replication_factor") + .or_else(|_| { + row.try_get::<_, Option>("replication_factor") + .map(|v| v.map(i64::from)) + }) + .or_else(|_| { + row.try_get::<_, Option>("replication_factor") + .map(|v| v.map(i64::from)) + }) + .unwrap_or(None); + + Ok(Some(Cluster { + id: row.get("id"), + name: row.get("name"), + size: row.get("size"), + replication_factor, + })) +} + +/// List all clusters. +pub async fn list_clusters(client: &PgClient) -> Result, ConnectionError> { + let query = r#" + SELECT + id, + name, + size, + replication_factor + FROM mz_catalog.mz_clusters + ORDER BY name + "#; + + let rows = client + .query(query, &[]) + .await + .map_err(ConnectionError::Query)?; + + Ok(rows + .iter() + .map(|row| Cluster { + id: row.get("id"), + name: row.get("name"), + size: row.get("size"), + replication_factor: row.get("replication_factor"), + }) + .collect()) +} + +/// Get cluster configuration including replicas and grants. +/// +/// This fetches all information needed to clone a cluster's configuration: +/// - For managed clusters: size and replication factor +/// - For unmanaged clusters: replica configurations +/// - For both: privilege grants +pub async fn get_cluster_config( + client: &PgClient, + name: &str, +) -> Result, ConnectionError> { + // Query 1: Get cluster info and replicas with LEFT JOIN + let cluster_query = r#" + SELECT + c.id, + c.name, + c.managed, + c.size, + c.replication_factor, + r.name AS replica_name, + r.size AS replica_size, + r.availability_zone + FROM mz_catalog.mz_clusters c + LEFT JOIN mz_catalog.mz_cluster_replicas r ON c.id = r.cluster_id + WHERE c.name = $1 + ORDER BY r.name + "#; + + let cluster_rows = client + .query(cluster_query, &[&name]) + .await + .map_err(ConnectionError::Query)?; + + if cluster_rows.is_empty() { + return Ok(None); + } + + // Extract cluster-level info from first row + let first_row = &cluster_rows[0]; + let managed: bool = first_row.get("managed"); + let size: Option = first_row.get("size"); + let replication_factor: Option = first_row + .try_get("replication_factor") + .or_else(|_| { + first_row + .try_get::<_, Option>("replication_factor") + .map(|v| v.map(i64::from)) + }) + .or_else(|_| { + first_row + .try_get::<_, Option>("replication_factor") + .map(|v| v.map(i64::from)) + }) + .unwrap_or(None); + + // Query 2: Get grants + let grants_query = r#" + WITH cluster_privilege AS ( + SELECT mz_internal.mz_aclexplode(privileges).* + FROM mz_clusters + WHERE name = $1 + ) + SELECT + grantee.name AS grantee, + c.privilege_type + FROM cluster_privilege AS c + JOIN mz_roles AS grantee ON c.grantee = grantee.id + WHERE grantee.name NOT IN ('none', 'mz_system', 'mz_support') + "#; + + let grant_rows = client + .query(grants_query, &[&name]) + .await + .map_err(ConnectionError::Query)?; + + let grants: Vec = grant_rows + .iter() + .map(|row| ClusterGrant { + grantee: row.get("grantee"), + privilege_type: row.get("privilege_type"), + }) + .collect(); + + if managed { + // Managed cluster + let size = size.ok_or_else(|| { + ConnectionError::Message(format!( + "Managed cluster '{}' has no size (unexpected)", + name + )) + })?; + + let replication_factor = replication_factor.unwrap_or(1).try_into().map_err(|_| { + ConnectionError::Message(format!("Invalid replication_factor for cluster '{}'", name)) + })?; + + Ok(Some(ClusterConfig::Managed { + options: ClusterOptions { + size, + replication_factor, + }, + grants, + })) + } else { + // Unmanaged cluster - collect replicas + let mut replicas = Vec::new(); + for row in &cluster_rows { + let replica_name: Option = row.get("replica_name"); + if let Some(replica_name) = replica_name { + replicas.push(ClusterReplica { + name: replica_name, + size: row.get("replica_size"), + availability_zone: row.get("availability_zone"), + }); + } + } + + Ok(Some(ClusterConfig::Unmanaged { replicas, grants })) + } +} + +/// Get the current Materialize user/role. +pub async fn get_current_user(client: &PgClient) -> Result { + let row = client + .query_one("SELECT current_user()", &[]) + .await + .map_err(ConnectionError::Query)?; + + Ok(row.get(0)) +} + +/// Check which objects from a set exist in the production database. +/// +/// Returns fully-qualified names of objects that exist. +pub async fn check_objects_exist( + client: &PgClient, + objects: &BTreeSet, +) -> Result, ConnectionError> { + let fqns: Vec = objects.iter().map(|o| o.to_string()).collect(); + if fqns.is_empty() { + return Ok(Vec::new()); + } + + let placeholders: Vec = (1..=fqns.len()).map(|i| format!("${}", i)).collect(); + let placeholders_str = placeholders.join(", "); + + let query = format!( + r#" + SELECT d.name || '.' || s.name || '.' || mo.name as fqn + FROM mz_objects mo + JOIN mz_schemas s ON mo.schema_id = s.id + JOIN mz_databases d ON s.database_id = d.id + WHERE d.name || '.' || s.name || '.' || mo.name IN ({}) + AND mo.type IN ('table', 'view', 'materialized-view', 'source', 'sink') + ORDER BY fqn + "#, + placeholders_str + ); + + let mut params: Vec<&(dyn ToSql + Sync)> = Vec::new(); + for fqn in &fqns { + params.push(fqn); + } + + let rows = client + .query(&query, ¶ms) + .await + .map_err(ConnectionError::Query)?; + + Ok(rows.iter().map(|row| row.get("fqn")).collect()) +} + +/// Check which tables from the given set exist in the database. +/// +/// Returns a HashSet of ObjectIds for tables that already exist. +pub async fn check_tables_exist( + client: &PgClient, + tables: &BTreeSet, +) -> Result, ConnectionError> { + let fqns: Vec = tables.iter().map(|o| o.to_string()).collect(); + if fqns.is_empty() { + return Ok(BTreeSet::new()); + } + + let placeholders: Vec = (1..=fqns.len()).map(|i| format!("${}", i)).collect(); + let placeholders_str = placeholders.join(", "); + + let query = format!( + r#" + SELECT d.name || '.' || s.name || '.' || t.name as fqn + FROM mz_tables t + JOIN mz_schemas s ON t.schema_id = s.id + JOIN mz_databases d ON s.database_id = d.id + WHERE d.name || '.' || s.name || '.' || t.name IN ({}) + ORDER BY fqn + "#, + placeholders_str + ); + + let mut params: Vec<&(dyn ToSql + Sync)> = Vec::new(); + for fqn in &fqns { + params.push(fqn); + } + + let rows = client + .query(&query, ¶ms) + .await + .map_err(ConnectionError::Query)?; + + // Convert FQN strings back to ObjectIds + let mut existing = BTreeSet::new(); + for row in rows { + let fqn: String = row.get("fqn"); + // Find the matching ObjectId from the input set + if let Some(obj_id) = tables.iter().find(|o| o.to_string() == fqn) { + existing.insert(obj_id.clone()); + } + } + + Ok(existing) +} + +/// Check which sinks from the given set exist in the database. +/// +/// Returns a BTreeSet of ObjectIds for sinks that already exist. +/// Used during apply to skip creating sinks that already exist (like tables). +pub async fn check_sinks_exist( + client: &PgClient, + sinks: &BTreeSet, +) -> Result, ConnectionError> { + let fqns: Vec = sinks.iter().map(|o| o.to_string()).collect(); + if fqns.is_empty() { + return Ok(BTreeSet::new()); + } + + let placeholders: Vec = (1..=fqns.len()).map(|i| format!("${}", i)).collect(); + let placeholders_str = placeholders.join(", "); + + let query = format!( + r#" + SELECT d.name || '.' || s.name || '.' || k.name as fqn + FROM mz_sinks k + JOIN mz_schemas s ON k.schema_id = s.id + JOIN mz_databases d ON s.database_id = d.id + WHERE d.name || '.' || s.name || '.' || k.name IN ({}) + ORDER BY fqn + "#, + placeholders_str + ); + + let mut params: Vec<&(dyn ToSql + Sync)> = Vec::new(); + for fqn in &fqns { + params.push(fqn); + } + + let rows = client + .query(&query, ¶ms) + .await + .map_err(ConnectionError::Query)?; + + // Convert FQN strings back to ObjectIds + let mut existing = BTreeSet::new(); + for row in rows { + let fqn: String = row.get("fqn"); + // Find the matching ObjectId from the input set + if let Some(obj_id) = sinks.iter().find(|o| o.to_string() == fqn) { + existing.insert(obj_id.clone()); + } + } + + Ok(existing) +} + +/// Find sinks that depend on objects in the specified schemas. +/// +/// This is used during apply to identify sinks that need to be repointed +/// before old schemas are dropped with CASCADE. Only returns sinks whose +/// upstream object (FROM clause) is in one of the specified schemas. +pub async fn find_sinks_depending_on_schemas( + client: &PgClient, + schemas: &[(String, String)], +) -> Result, ConnectionError> { + if schemas.is_empty() { + return Ok(Vec::new()); + } + + // Build WHERE clause for (database, schema) pairs + let mut conditions = Vec::new(); + let mut param_idx = 1; + + for _ in schemas { + conditions.push(format!( + "(dep_db.name = ${} AND dep_schema.name = ${})", + param_idx, + param_idx + 1 + )); + param_idx += 2; + } + + let where_clause = conditions.join(" OR "); + + let query = format!( + r#" + SELECT + sink_db.name as sink_database, + sink_schema.name as sink_schema, + sinks.name as sink_name, + dep_db.name as dependency_database, + dep_schema.name as dependency_schema, + dep_obj.name as dependency_name, + dep_obj.type as dependency_type + FROM mz_sinks sinks + JOIN mz_schemas sink_schema ON sinks.schema_id = sink_schema.id + JOIN mz_databases sink_db ON sink_schema.database_id = sink_db.id + JOIN mz_object_dependencies deps ON sinks.id = deps.object_id + JOIN mz_objects dep_obj ON deps.referenced_object_id = dep_obj.id + JOIN mz_schemas dep_schema ON dep_obj.schema_id = dep_schema.id + JOIN mz_databases dep_db ON dep_schema.database_id = dep_db.id + WHERE ({}) + AND dep_obj.type IN ('materialized-view', 'table', 'source') + ORDER BY sink_db.name, sink_schema.name, sinks.name + "#, + where_clause + ); + + // Build params vector with references to the schema tuples + let mut params: Vec<&(dyn ToSql + Sync)> = Vec::new(); + for (database, schema) in schemas { + params.push(database); + params.push(schema); + } + + let rows = client + .query(&query, ¶ms) + .await + .map_err(ConnectionError::Query)?; + + Ok(rows + .iter() + .map(|row| DependentSink { + sink_database: row.get("sink_database"), + sink_schema: row.get("sink_schema"), + sink_name: row.get("sink_name"), + dependency_database: row.get("dependency_database"), + dependency_schema: row.get("dependency_schema"), + dependency_name: row.get("dependency_name"), + dependency_type: row.get("dependency_type"), + }) + .collect()) +} + +/// Check if an object (MV, table, source) exists in the specified schema. +/// +/// Used to verify that a replacement object exists before repointing a sink. +pub async fn object_exists( + client: &PgClient, + database: &str, + schema: &str, + object: &str, +) -> Result { + let query = r#" + SELECT EXISTS( + SELECT 1 FROM mz_objects o + JOIN mz_schemas s ON o.schema_id = s.id + JOIN mz_databases d ON s.database_id = d.id + WHERE d.name = $1 AND s.name = $2 AND o.name = $3 + AND o.type IN ('materialized-view', 'table', 'source') + ) AS exists + "#; + + let row = client + .query_one(query, &[&database, &schema, &object]) + .await + .map_err(ConnectionError::Query)?; + + Ok(row.get("exists")) +} + +/// Get staging schema names for a specific deployment. +pub async fn get_staging_schemas( + client: &PgClient, + deploy_id: &str, +) -> Result, ConnectionError> { + let suffix = format!("_{}", deploy_id); + let pattern = format!("%{}", suffix); + + let query = r#" + SELECT d.name as database, s.name as schema + FROM mz_schemas s + JOIN mz_databases d ON s.database_id = d.id + WHERE s.name LIKE $1 + "#; + + let rows = client + .query(query, &[&pattern]) + .await + .map_err(ConnectionError::Query)?; + + Ok(rows + .iter() + .map(|row| { + let database: String = row.get("database"); + let schema: String = row.get("schema"); + (database, schema) + }) + .collect()) +} + +/// Get staging cluster names for a specific deployment. +pub async fn get_staging_clusters( + client: &PgClient, + deploy_id: &str, +) -> Result, ConnectionError> { + let suffix = format!("_{}", deploy_id); + let pattern = format!("%{}", suffix); + + let query = r#" + SELECT name + FROM mz_clusters + WHERE name LIKE $1 + "#; + + let rows = client + .query(query, &[&pattern]) + .await + .map_err(ConnectionError::Query)?; + + Ok(rows.iter().map(|row| row.get("name")).collect()) +} + +/// Drop all objects in a schema. +/// +/// Returns the fully-qualified names of dropped objects. +pub async fn drop_schema_objects( + client: &PgClient, + database: &str, + schema: &str, +) -> Result, ConnectionError> { + let query = r#" + SELECT mo.name, mo.type + FROM mz_objects mo + JOIN mz_schemas s ON mo.schema_id = s.id + JOIN mz_databases d ON s.database_id = d.id + WHERE d.name = $1 AND s.name = $2 + AND mo.type IN ('table', 'view', 'materialized-view', 'source', 'sink') + ORDER BY mo.id DESC + "#; + + let rows = client + .query(query, &[&database, &schema]) + .await + .map_err(ConnectionError::Query)?; + + let mut dropped = Vec::new(); + for row in rows { + let name: String = row.get("name"); + let obj_type: String = row.get("type"); + + let fqn = format!( + "{}.{}.{}", + quote_identifier(database), + quote_identifier(schema), + quote_identifier(&name) + ); + let drop_type = match obj_type.as_str() { + "table" => "TABLE", + "view" => "VIEW", + "materialized-view" => "MATERIALIZED VIEW", + "source" => "SOURCE", + "sink" => "SINK", + _ => continue, + }; + + let drop_sql = format!("DROP {} IF EXISTS {} CASCADE", drop_type, fqn); + client + .execute(&drop_sql, &[]) + .await + .map_err(ConnectionError::Query)?; + + dropped.push(fqn); + } + + Ok(dropped) +} + +/// Drop specific objects by their ObjectIds. +/// +/// Returns the fully-qualified names of dropped objects. +pub async fn drop_objects( + client: &PgClient, + objects: &BTreeSet, +) -> Result, ConnectionError> { + let mut dropped = Vec::new(); + + if objects.is_empty() { + return Ok(dropped); + } + + let placeholders: Vec = (1..=objects.len()).map(|i| format!("${}", i)).collect(); + let placeholders_str = placeholders.join(", "); + + let query = format!( + r#" + SELECT mo.name, s.name as schema_name, d.name as database_name, mo.type + FROM mz_objects mo + JOIN mz_schemas s ON mo.schema_id = s.id + JOIN mz_databases d ON s.database_id = d.id + WHERE d.name || '.' || s.name || '.' || mo.name IN ({}) + AND mo.type IN ('table', 'view', 'materialized-view', 'source', 'sink') + ORDER BY mo.id DESC + "#, + placeholders_str + ); + + let mut params: Vec<&(dyn ToSql + Sync)> = Vec::new(); + let fqns: Vec<_> = objects.iter().map(|object| object.to_string()).collect(); + for fqn in &fqns { + params.push(fqn); + } + + let rows = client + .query(&query, ¶ms) + .await + .map_err(ConnectionError::Query)?; + + for row in rows { + let name: String = row.get("name"); + let schema: String = row.get("schema_name"); + let database: String = row.get("database_name"); + let obj_type: String = row.get("type"); + + let fqn = format!( + "{}.{}.{}", + quote_identifier(&database), + quote_identifier(&schema), + quote_identifier(&name) + ); + let drop_type = match obj_type.as_str() { + "table" => "TABLE", + "view" => "VIEW", + "materialized-view" => "MATERIALIZED VIEW", + "source" => "SOURCE", + "sink" => "SINK", + _ => continue, + }; + + let drop_sql = format!("DROP {} IF EXISTS {} CASCADE", drop_type, fqn); + client + .execute(&drop_sql, &[]) + .await + .map_err(ConnectionError::Query)?; + + dropped.push(fqn); + } + + Ok(dropped) +} + +/// Drop staging schemas by name. +pub async fn drop_staging_schemas( + client: &PgClient, + schemas: &[(String, String)], +) -> Result<(), ConnectionError> { + for (database, schema) in schemas { + let drop_sql = format!( + "DROP SCHEMA IF EXISTS {}.{} CASCADE", + quote_identifier(database), + quote_identifier(schema) + ); + client + .execute(&drop_sql, &[]) + .await + .map_err(ConnectionError::Query)?; + } + + Ok(()) +} + +/// Drop staging clusters by name. +pub async fn drop_staging_clusters( + client: &PgClient, + clusters: &[String], +) -> Result<(), ConnectionError> { + for cluster in clusters { + let drop_sql = format!( + "DROP CLUSTER IF EXISTS {} CASCADE", + quote_identifier(cluster) + ); + client + .execute(&drop_sql, &[]) + .await + .map_err(ConnectionError::Query)?; + } + + Ok(()) +} diff --git a/src/mz-deploy/src/client/models.rs b/src/mz-deploy/src/client/models.rs new file mode 100644 index 0000000000000..d45112898e992 --- /dev/null +++ b/src/mz-deploy/src/client/models.rs @@ -0,0 +1,574 @@ +//! Domain models for Materialize catalog objects. +//! +//! These types represent objects in the Materialize system catalog and provide +//! a type-safe interface over raw database rows. + +use chrono::{DateTime, Utc}; +use std::fmt; +use std::str::FromStr; + +/// The type of deployment - either tables-only or full objects. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DeploymentKind { + /// Table creation deployment (create-tables command) + Tables, + /// Full object deployment (stage, apply commands) + Objects, + /// Contains sinks + Sinks, +} + +impl fmt::Display for DeploymentKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + DeploymentKind::Tables => write!(f, "tables"), + DeploymentKind::Objects => write!(f, "objects"), + DeploymentKind::Sinks => write!(f, "sinks"), + } + } +} + +impl FromStr for DeploymentKind { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "tables" => Ok(DeploymentKind::Tables), + "objects" => Ok(DeploymentKind::Objects), + "sinks" => Ok(DeploymentKind::Sinks), + _ => Err(format!("Invalid deployment kind: {}", s)), + } + } +} + +/// A compute cluster in Materialize. +/// +/// Clusters provide the compute resources for materialized views, indexes, and sinks. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Cluster { + /// Materialize's unique identifier for the cluster + pub id: String, + /// Cluster name (e.g., "quickstart") + pub name: String, + /// Cluster size (e.g., "M.1-large"), None for unmanaged clusters + pub size: Option, + /// Number of replicas for fault tolerance (stored as i64 to handle postgres uint4 type) + pub replication_factor: Option, +} + +/// Options for creating a new cluster. +/// +/// Only size and replication factor are configurable - all other settings +/// use Materialize defaults. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ClusterOptions { + /// Cluster size (e.g., "M.1-large", "M.1-small") + pub size: String, + /// Number of replicas (default: 1) + pub replication_factor: u32, +} + +impl ClusterOptions { + /// Create cluster options from a production cluster configuration. + pub fn from_cluster(cluster: &Cluster) -> Result { + let size = cluster.size.clone().ok_or_else(|| { + format!( + "Cluster '{}' has no size (unmanaged cluster?)", + cluster.name + ) + })?; + + let replication_factor = cluster + .replication_factor + .unwrap_or(1) + .try_into() + .map_err(|_| format!("Invalid replication_factor for cluster '{}'", cluster.name))?; + + Ok(Self { + size, + replication_factor, + }) + } +} + +/// Configuration for a cluster replica (used for unmanaged clusters). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ClusterReplica { + /// Replica name (e.g., "r1", "r2") + pub name: String, + /// Replica size (e.g., "25cc") + pub size: String, + /// Optional availability zone + pub availability_zone: Option, +} + +/// A privilege grant on a cluster. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ClusterGrant { + /// Role name that receives the grant + pub grantee: String, + /// Privilege type (e.g., "USAGE", "CREATE") + pub privilege_type: String, +} + +/// Configuration for creating a cluster (managed or unmanaged). +/// +/// This captures all the information needed to clone a cluster's configuration +/// including its replicas (for unmanaged clusters) and privilege grants. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ClusterConfig { + /// Managed cluster with SIZE and REPLICATION FACTOR + Managed { + /// Cluster options (size, replication factor) + options: ClusterOptions, + /// Privilege grants on the cluster + grants: Vec, + }, + /// Unmanaged cluster with explicit replicas + Unmanaged { + /// Replica configurations + replicas: Vec, + /// Privilege grants on the cluster + grants: Vec, + }, +} + +impl ClusterConfig { + /// Get the grants for this cluster configuration. + pub fn grants(&self) -> &[ClusterGrant] { + match self { + ClusterConfig::Managed { grants, .. } => grants, + ClusterConfig::Unmanaged { grants, .. } => grants, + } + } +} + +/// A schema deployment record tracking when and how a schema was deployed. +/// +/// Stored in the `deploy.deployments` table. Schemas are deployed +/// atomically - all objects in a dirty schema are redeployed together. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SchemaDeploymentRecord { + /// Deploy ID (e.g., "" for direct deploy, "staging" for staged deploy) + pub deploy_id: String, + /// Database name (e.g., "materialize") + pub database: String, + /// Schema name (e.g., "public") + pub schema: String, + /// When this schema was deployed + pub deployed_at: DateTime, + /// Which Materialize user/role deployed this schema + pub deployed_by: String, + /// When this schema was promoted to production (NULL for staging, set on promotion) + pub promoted_at: Option>, + /// Git commit hash if available + pub git_commit: Option, + /// Type of deployment (tables or objects) + pub kind: DeploymentKind, +} + +/// An object deployment record tracking object-level deployment history. +/// +/// Stored in the `deploy.objects` table (append-only). +/// Each row records that an object with a specific hash was deployed +/// to a deployment at a point in time. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DeploymentObjectRecord { + /// Deploy ID (e.g., "" for direct deploy, "staging" for staged deploy) + pub deploy_id: String, + /// Database name (e.g., "materialize") + pub database: String, + /// Schema name (e.g., "public") + pub schema: String, + /// Object name (e.g., "my_view") + pub object: String, + /// Hash of the HIR DatabaseObject (semantic content hash) + pub object_hash: String, + /// When this object was deployed + pub deployed_at: DateTime, +} + +/// Metadata about a deployment. +/// +/// Used for validation before operations like apply or abort. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DeploymentMetadata { + /// Deploy ID + pub deploy_id: String, + /// When this deployment was promoted (NULL if not promoted) + pub promoted_at: Option>, + /// List of (database, schema) tuples in this deployment + pub schemas: Vec<(String, String)>, +} + +/// A conflict record indicating a schema was updated after deployment started. +/// +/// Used for git-merge-style conflict detection when promoting deployments. +/// Returned by conflict detection queries that check if production schemas +/// were modified since the staging deployment began. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ConflictRecord { + /// Database name containing the conflicting schema + pub database: String, + /// Schema name that has a conflict + pub schema: String, + /// Deploy ID that last promoted this schema + pub deploy_id: String, + /// When the schema was last promoted to production + pub promoted_at: DateTime, +} + +/// Details about a specific deployment. +/// +/// Returned by `get_deployment_details()` for the describe command. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DeploymentDetails { + /// When this deployment was created + pub deployed_at: DateTime, + /// When this deployment was promoted (None if still staging) + pub promoted_at: Option>, + /// Which Materialize user/role deployed this + pub deployed_by: String, + /// Git commit hash if available + pub git_commit: Option, + /// Type of deployment (tables or objects) + pub kind: DeploymentKind, + /// List of (database, schema) tuples in this deployment + pub schemas: Vec<(String, String)>, +} + +/// Summary of a staging deployment. +/// +/// Used by `list_staging_deployments()` for the deployments command. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct StagingDeployment { + /// When this deployment was created + pub deployed_at: DateTime, + /// Which Materialize user/role deployed this + pub deployed_by: String, + /// Git commit hash if available + pub git_commit: Option, + /// Type of deployment (tables or objects) + pub kind: DeploymentKind, + /// List of (database, schema) tuples in this deployment + pub schemas: Vec<(String, String)>, +} + +/// A promoted deployment in history. +/// +/// Returned by `list_deployment_history()` for the history command. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DeploymentHistoryEntry { + /// Deploy ID for this deployment + pub deploy_id: String, + /// When this deployment was promoted + pub promoted_at: DateTime, + /// Which Materialize user/role deployed this + pub deployed_by: String, + /// Git commit hash if available + pub git_commit: Option, + /// Type of deployment (tables or objects) + pub kind: DeploymentKind, + /// List of (database, schema) tuples in this deployment + pub schemas: Vec<(String, String)>, +} + +/// State of an apply operation for resumable apply. +/// +/// This is determined by checking the existence and comments of the +/// `_mz_deploy.apply__pre` and `_mz_deploy.apply__post` schemas. +/// Comments are set when creating the schemas; the swap transaction exchanges which +/// schema has which comment. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ApplyState { + /// No apply state schemas exist - fresh apply or completed. + NotStarted, + /// State schemas exist but swap hasn't happened yet. + /// The `_pre` schema has comment 'swapped=false'. + PreSwap, + /// Swap has completed. + /// After the swap, `_pre` schema has comment 'swapped=true' (it was `_post` before). + PostSwap, +} + +/// A pending statement to be executed after the swap. +/// +/// Used for deferred execution of statements like sinks that cannot +/// be created in staging (they write to external systems immediately). +/// Stored in `_mz_deploy.public.pending_statements` table. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PendingStatement { + /// Deploy ID this statement belongs to + pub deploy_id: String, + /// Sequence number for ordering execution + pub sequence_num: i32, + /// Database containing the object + pub database: String, + /// Schema containing the object + pub schema: String, + /// Object name + pub object: String, + /// Hash of the object definition + pub object_hash: String, + /// SQL statement to execute + pub statement_sql: String, + /// Kind of statement (e.g., "sink") + pub statement_kind: String, + /// When this statement was executed (None if not yet executed) + pub executed_at: Option>, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_deployment_kind_display() { + assert_eq!(DeploymentKind::Tables.to_string(), "tables"); + assert_eq!(DeploymentKind::Objects.to_string(), "objects"); + } + + #[test] + fn test_deployment_kind_from_str_valid() { + assert_eq!( + "tables".parse::().unwrap(), + DeploymentKind::Tables + ); + assert_eq!( + "objects".parse::().unwrap(), + DeploymentKind::Objects + ); + } + + #[test] + fn test_deployment_kind_from_str_invalid() { + let result = "invalid".parse::(); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), "Invalid deployment kind: invalid"); + } + + #[test] + fn test_deployment_kind_roundtrip() { + // Verify that Display and FromStr are consistent + for kind in [DeploymentKind::Tables, DeploymentKind::Objects] { + let s = kind.to_string(); + let parsed: DeploymentKind = s.parse().unwrap(); + assert_eq!(kind, parsed); + } + } + + #[test] + fn test_cluster_options_from_cluster_success() { + let cluster = Cluster { + id: "u1".to_string(), + name: "quickstart".to_string(), + size: Some("25cc".to_string()), + replication_factor: Some(2), + }; + + let options = ClusterOptions::from_cluster(&cluster).unwrap(); + assert_eq!(options.size, "25cc"); + assert_eq!(options.replication_factor, 2); + } + + #[test] + fn test_cluster_options_from_cluster_default_replication() { + let cluster = Cluster { + id: "u1".to_string(), + name: "quickstart".to_string(), + size: Some("25cc".to_string()), + replication_factor: None, // Should default to 1 + }; + + let options = ClusterOptions::from_cluster(&cluster).unwrap(); + assert_eq!(options.size, "25cc"); + assert_eq!(options.replication_factor, 1); + } + + #[test] + fn test_cluster_options_from_cluster_no_size() { + let cluster = Cluster { + id: "u1".to_string(), + name: "unmanaged".to_string(), + size: None, // Unmanaged cluster + replication_factor: Some(1), + }; + + let result = ClusterOptions::from_cluster(&cluster); + assert!(result.is_err()); + let err_msg = result.unwrap_err(); + assert!(err_msg.contains("unmanaged")); + assert!(err_msg.contains("has no size")); + } + + #[test] + fn test_cluster_options_from_cluster_negative_replication() { + let cluster = Cluster { + id: "u1".to_string(), + name: "test".to_string(), + size: Some("25cc".to_string()), + replication_factor: Some(-1), // Invalid negative value + }; + + let result = ClusterOptions::from_cluster(&cluster); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Invalid replication_factor")); + } + + #[test] + fn test_cluster_equality() { + let cluster1 = Cluster { + id: "u1".to_string(), + name: "test".to_string(), + size: Some("25cc".to_string()), + replication_factor: Some(1), + }; + + let cluster2 = Cluster { + id: "u1".to_string(), + name: "test".to_string(), + size: Some("25cc".to_string()), + replication_factor: Some(1), + }; + + let cluster3 = Cluster { + id: "u2".to_string(), // Different ID + name: "test".to_string(), + size: Some("25cc".to_string()), + replication_factor: Some(1), + }; + + assert_eq!(cluster1, cluster2); + assert_ne!(cluster1, cluster3); + } + + #[test] + fn test_cluster_options_equality() { + let opts1 = ClusterOptions { + size: "25cc".to_string(), + replication_factor: 2, + }; + + let opts2 = ClusterOptions { + size: "25cc".to_string(), + replication_factor: 2, + }; + + let opts3 = ClusterOptions { + size: "50cc".to_string(), + replication_factor: 2, + }; + + assert_eq!(opts1, opts2); + assert_ne!(opts1, opts3); + } + + #[test] + fn test_cluster_replica_equality() { + let r1 = ClusterReplica { + name: "r1".to_string(), + size: "25cc".to_string(), + availability_zone: Some("use1-az1".to_string()), + }; + + let r2 = ClusterReplica { + name: "r1".to_string(), + size: "25cc".to_string(), + availability_zone: Some("use1-az1".to_string()), + }; + + let r3 = ClusterReplica { + name: "r2".to_string(), + size: "25cc".to_string(), + availability_zone: None, + }; + + assert_eq!(r1, r2); + assert_ne!(r1, r3); + } + + #[test] + fn test_cluster_grant_equality() { + let g1 = ClusterGrant { + grantee: "reader".to_string(), + privilege_type: "USAGE".to_string(), + }; + + let g2 = ClusterGrant { + grantee: "reader".to_string(), + privilege_type: "USAGE".to_string(), + }; + + let g3 = ClusterGrant { + grantee: "writer".to_string(), + privilege_type: "CREATE".to_string(), + }; + + assert_eq!(g1, g2); + assert_ne!(g1, g3); + } + + #[test] + fn test_cluster_config_managed() { + let config = ClusterConfig::Managed { + options: ClusterOptions { + size: "25cc".to_string(), + replication_factor: 2, + }, + grants: vec![ClusterGrant { + grantee: "reader".to_string(), + privilege_type: "USAGE".to_string(), + }], + }; + + assert_eq!(config.grants().len(), 1); + assert_eq!(config.grants()[0].grantee, "reader"); + } + + #[test] + fn test_cluster_config_unmanaged() { + let config = ClusterConfig::Unmanaged { + replicas: vec![ + ClusterReplica { + name: "r1".to_string(), + size: "25cc".to_string(), + availability_zone: None, + }, + ClusterReplica { + name: "r2".to_string(), + size: "50cc".to_string(), + availability_zone: Some("use1-az1".to_string()), + }, + ], + grants: vec![], + }; + + if let ClusterConfig::Unmanaged { replicas, grants } = &config { + assert_eq!(replicas.len(), 2); + assert_eq!(replicas[0].name, "r1"); + assert_eq!(replicas[1].availability_zone, Some("use1-az1".to_string())); + assert!(grants.is_empty()); + } else { + panic!("Expected Unmanaged config"); + } + } + + #[test] + fn test_cluster_config_unmanaged_empty_replicas() { + // Unmanaged clusters with 0 replicas are valid + let config = ClusterConfig::Unmanaged { + replicas: vec![], + grants: vec![ClusterGrant { + grantee: "admin".to_string(), + privilege_type: "CREATE".to_string(), + }], + }; + + if let ClusterConfig::Unmanaged { replicas, grants } = &config { + assert!(replicas.is_empty()); + assert_eq!(grants.len(), 1); + } else { + panic!("Expected Unmanaged config"); + } + } +} diff --git a/src/mz-deploy/src/client/validation.rs b/src/mz-deploy/src/client/validation.rs new file mode 100644 index 0000000000000..8988254925af5 --- /dev/null +++ b/src/mz-deploy/src/client/validation.rs @@ -0,0 +1,513 @@ +//! Database validation operations. +//! +//! This module contains methods for validating projects against the database, +//! including checking for required databases, schemas, clusters, and privileges. + +use crate::client::errors::DatabaseValidationError; +use crate::project::ast::Statement; +use crate::project::object_id::ObjectId; +use crate::project::planned; +use mz_sql_parser::ast::CreateSinkConnection; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::Path; +use std::path::PathBuf; +use tokio_postgres::Client as PgClient; +use tokio_postgres::types::ToSql; + +/// Internal helper to query which sources exist on the given clusters using IN clause. +pub(crate) async fn query_sources_by_cluster( + client: &PgClient, + cluster_names: &BTreeSet, +) -> Result>, DatabaseValidationError> { + if cluster_names.is_empty() { + return Ok(BTreeMap::new()); + } + + // Build IN clause with placeholders + let placeholders: Vec = (1..=cluster_names.len()) + .map(|i| format!("${}", i)) + .collect(); + let in_clause = placeholders.join(", "); + + let query = format!( + r#" + SELECT + c.name as cluster_name, + d.name || '.' || s.name || '.' || mo.name as fqn + FROM mz_catalog.mz_sources src + JOIN mz_catalog.mz_objects mo ON src.id = mo.id + JOIN mz_catalog.mz_schemas s ON mo.schema_id = s.id + JOIN mz_catalog.mz_databases d ON s.database_id = d.id + JOIN mz_catalog.mz_clusters c ON src.cluster_id = c.id + WHERE mo.id LIKE 'u%' AND c.name IN ({}) + "#, + in_clause + ); + + #[allow(clippy::as_conversions)] + let params: Vec<&(dyn ToSql + Sync)> = cluster_names + .iter() + .map(|s| s as &(dyn ToSql + Sync)) + .collect(); + + let rows = client + .query(&query, ¶ms) + .await + .map_err(DatabaseValidationError::QueryError)?; + + let mut result: BTreeMap> = BTreeMap::new(); + for row in rows { + let cluster_name: String = row.get("cluster_name"); + let fqn: String = row.get("fqn"); + result + .entry(cluster_name) + .or_insert_with(Vec::new) + .push(fqn); + } + + Ok(result) +} + +/// Internal implementation of validate_project. +pub(crate) async fn validate_project_impl( + client: &PgClient, + planned_project: &planned::Project, + project_root: &Path, +) -> Result<(), DatabaseValidationError> { + let mut missing_databases = Vec::new(); + let mut missing_schemas = Vec::new(); + let mut missing_clusters = Vec::new(); + + // Collect all required databases + let mut required_databases = BTreeSet::new(); + for db in &planned_project.databases { + required_databases.insert(db.name.clone()); + } + for ext_dep in &planned_project.external_dependencies { + required_databases.insert(ext_dep.database.clone()); + } + + // Collect schemas - split into project schemas (we can create) vs external schemas (must exist) + let mut external_schemas = BTreeSet::new(); + for ext_dep in &planned_project.external_dependencies { + external_schemas.insert((ext_dep.database.clone(), ext_dep.schema.clone())); + } + + // Check databases exist + for database in &required_databases { + let query = "SELECT name FROM mz_databases WHERE name = $1"; + let rows = client + .query(query, &[database]) + .await + .map_err(DatabaseValidationError::QueryError)?; + if rows.is_empty() { + missing_databases.push(database.clone()); + } + } + + // Check only external dependency schemas exist (project schemas will be created if needed) + for (database, schema) in &external_schemas { + let query = r#" + SELECT s.name FROM mz_schemas s + JOIN mz_databases d ON s.database_id = d.id + WHERE s.name = $1 AND d.name = $2"#; + let rows = client + .query(query, &[schema, database]) + .await + .map_err(DatabaseValidationError::QueryError)?; + if rows.is_empty() { + missing_schemas.push((database.clone(), schema.clone())); + } + } + + // Check clusters exist + for cluster in &planned_project.cluster_dependencies { + let query = "SELECT name FROM mz_clusters WHERE name = $1"; + let rows = client + .query(query, &[&cluster.name]) + .await + .map_err(DatabaseValidationError::QueryError)?; + if rows.is_empty() { + missing_clusters.push(cluster.name.clone()); + } + } + + // Build ObjectId to file path mapping by reconstructing paths from ObjectIds + // Path format: ///.sql + let mut object_paths: BTreeMap = BTreeMap::new(); + for db in &planned_project.databases { + for schema in &db.schemas { + for obj in &schema.objects { + let file_path = project_root + .join(&obj.id.database) + .join(&obj.id.schema) + .join(format!("{}.sql", obj.id.object)); + object_paths.insert(obj.id.clone(), file_path); + } + } + } + + // Check external dependencies and group missing ones by file + let mut missing_external_deps = BTreeSet::new(); + for ext_dep in &planned_project.external_dependencies { + let query = r#" + SELECT mo.name + FROM mz_objects mo + JOIN mz_schemas s ON mo.schema_id = s.id + JOIN mz_databases d ON s.database_id = d.id + WHERE mo.name = $1 AND s.name = $2 AND d.name = $3 + "#; + + let rows = client + .query( + query, + &[&ext_dep.object, &ext_dep.schema, &ext_dep.database], + ) + .await + .map_err(DatabaseValidationError::QueryError)?; + if rows.is_empty() { + missing_external_deps.insert(ext_dep.clone()); + } + } + + // Group missing dependencies by the files that reference them + let mut file_missing_deps: BTreeMap)> = BTreeMap::new(); + + for db in &planned_project.databases { + for schema in &db.schemas { + for obj in &schema.objects { + let mut missing_for_this_object = Vec::new(); + + for dep in &obj.dependencies { + if missing_external_deps.contains(dep) { + missing_for_this_object.push(dep.clone()); + } + } + + if !missing_for_this_object.is_empty() + && let Some(file_path) = object_paths.get(&obj.id) + { + file_missing_deps + .insert(file_path.clone(), (obj.id.clone(), missing_for_this_object)); + } + } + } + } + + // Create compilation error for each file with missing dependencies + let mut compilation_errors = Vec::new(); + for (file_path, (object_id, missing_deps)) in file_missing_deps { + compilation_errors.push(DatabaseValidationError::CompilationFailed { + file_path, + object_name: object_id, + missing_dependencies: missing_deps, + }); + } + + // Return results + if !missing_databases.is_empty() + || !missing_schemas.is_empty() + || !missing_clusters.is_empty() + || !compilation_errors.is_empty() + { + Err(DatabaseValidationError::Multiple { + databases: missing_databases, + schemas: missing_schemas, + clusters: missing_clusters, + compilation_errors, + }) + } else { + Ok(()) + } +} + +/// Internal implementation of validate_cluster_isolation. +pub(crate) async fn validate_cluster_isolation_impl( + client: &PgClient, + planned_project: &planned::Project, +) -> Result<(), DatabaseValidationError> { + // Get all clusters used by the project + let mut all_clusters: BTreeSet = BTreeSet::new(); + for cluster in &planned_project.cluster_dependencies { + all_clusters.insert(cluster.name.clone()); + } + + // Query sources from the database for these clusters + let sources_by_cluster = query_sources_by_cluster(client, &all_clusters).await?; + + // Validate cluster isolation using the project's validation method + planned_project + .validate_cluster_isolation(&sources_by_cluster) + .map_err(|(cluster_name, compute_objects, storage_objects)| { + DatabaseValidationError::ClusterConflict { + cluster_name, + compute_objects, + storage_objects, + } + }) +} + +/// Internal implementation of validate_privileges. +pub(crate) async fn validate_privileges_impl( + client: &PgClient, + planned_project: &planned::Project, +) -> Result<(), DatabaseValidationError> { + // Check if user is a superuser + let row = client + .query_one("SELECT mz_is_superuser()", &[]) + .await + .map_err(DatabaseValidationError::QueryError)?; + let is_superuser: bool = row.get(0); + + if is_superuser { + return Ok(()); // Superuser has all privileges + } + + // Collect all required databases from the project + let mut priv_required_databases = BTreeSet::new(); + for db in &planned_project.databases { + priv_required_databases.insert(db.name.clone()); + } + + // Check USAGE privileges on databases using the provided query + let missing_usage = if !priv_required_databases.is_empty() { + // Build IN clause with placeholders + let placeholders: Vec = (1..=priv_required_databases.len()) + .map(|i| format!("${}", i)) + .collect(); + let in_clause = placeholders.join(", "); + + let query = format!( + r#" + SELECT name + FROM mz_internal.mz_show_my_database_privileges + WHERE name IN ({}) + GROUP BY name + HAVING NOT BOOL_OR(privilege_type = 'USAGE') + "#, + in_clause + ); + + #[allow(clippy::as_conversions)] + let params: Vec<&(dyn ToSql + Sync)> = priv_required_databases + .iter() + .map(|s| s as &(dyn ToSql + Sync)) + .collect(); + + let rows = client + .query(&query, ¶ms) + .await + .map_err(DatabaseValidationError::QueryError)?; + + rows.iter() + .map(|row| row.get::<_, String>("name")) + .collect::>() + } else { + Vec::new() + }; + + // Check CREATECLUSTER privilege if project has cluster dependencies + let missing_createcluster = if !planned_project.cluster_dependencies.is_empty() { + let query = r#" + SELECT EXISTS ( + SELECT * FROM mz_internal.mz_show_my_system_privileges + WHERE privilege_type = 'CREATECLUSTER' + ) + "#; + + let row = client + .query_one(query, &[]) + .await + .map_err(DatabaseValidationError::QueryError)?; + + let has_createcluster: bool = row.get(0); + !has_createcluster + } else { + false + }; + + // Return error if missing any privileges + if !missing_usage.is_empty() || missing_createcluster { + return Err(DatabaseValidationError::InsufficientPrivileges { + missing_database_usage: missing_usage, + missing_createcluster, + }); + } + + Ok(()) +} + +/// Internal implementation of validate_sources_exist. +pub(crate) async fn validate_sources_exist_impl( + client: &PgClient, + planned_project: &planned::Project, +) -> Result<(), DatabaseValidationError> { + let mut missing_sources = Vec::new(); + + // Collect all source references from CREATE TABLE FROM SOURCE statements + for obj in planned_project.iter_objects() { + if let Statement::CreateTableFromSource(ref stmt) = obj.typed_object.stmt { + // Extract the source ObjectId from the statement + let source_id = + ObjectId::from_raw_item_name(&stmt.source, &obj.id.database, &obj.id.schema); + + // Check if source exists in the database + let query = r#" + SELECT s.name + FROM mz_sources s + JOIN mz_schemas sch ON s.schema_id = sch.id + JOIN mz_databases d ON sch.database_id = d.id + WHERE s.name = $1 AND sch.name = $2 AND d.name = $3"#; + + let rows = client + .query( + query, + &[&source_id.object, &source_id.schema, &source_id.database], + ) + .await + .map_err(DatabaseValidationError::QueryError)?; + + if rows.is_empty() { + missing_sources.push(source_id); + } + } + } + + if !missing_sources.is_empty() { + return Err(DatabaseValidationError::MissingSources(missing_sources)); + } + + Ok(()) +} + +/// Internal implementation of validate_sink_connections_exist. +/// +/// Validates that all connections referenced by sinks exist in the database. +/// Sinks reference connections (Kafka, Iceberg) that are not managed by mz-deploy. +pub(crate) async fn validate_sink_connections_exist_impl( + client: &PgClient, + planned_project: &planned::Project, +) -> Result<(), DatabaseValidationError> { + let mut missing_connections = Vec::new(); + let mut checked = BTreeSet::new(); // Avoid duplicate checks + + // Collect all connection references from CREATE SINK statements + for obj in planned_project.iter_objects() { + if let Statement::CreateSink(ref stmt) = obj.typed_object.stmt { + // Extract connection ObjectId(s) based on sink type + let connection_ids = match &stmt.connection { + CreateSinkConnection::Kafka { connection, .. } => { + vec![ObjectId::from_raw_item_name( + connection, + &obj.id.database, + &obj.id.schema, + )] + } + CreateSinkConnection::Iceberg { + connection, + aws_connection, + .. + } => { + vec![ + ObjectId::from_raw_item_name(connection, &obj.id.database, &obj.id.schema), + ObjectId::from_raw_item_name( + aws_connection, + &obj.id.database, + &obj.id.schema, + ), + ] + } + }; + + // Check each connection exists + for conn_id in connection_ids { + if checked.contains(&conn_id) { + continue; + } + checked.insert(conn_id.clone()); + + let query = r#" + SELECT c.name + FROM mz_connections c + JOIN mz_schemas s ON c.schema_id = s.id + JOIN mz_databases d ON s.database_id = d.id + WHERE c.name = $1 AND s.name = $2 AND d.name = $3"#; + + let rows = client + .query( + query, + &[&conn_id.object, &conn_id.schema, &conn_id.database], + ) + .await + .map_err(DatabaseValidationError::QueryError)?; + + if rows.is_empty() { + missing_connections.push(conn_id); + } + } + } + } + + if !missing_connections.is_empty() { + return Err(DatabaseValidationError::MissingConnections( + missing_connections, + )); + } + + Ok(()) +} + +/// Internal implementation of validate_table_dependencies. +pub(crate) async fn validate_table_dependencies_impl( + client: &PgClient, + planned_project: &planned::Project, + objects_to_deploy: &BTreeSet, +) -> Result<(), DatabaseValidationError> { + let mut objects_needing_tables = Vec::new(); + + // Build a set of all table IDs in the project + let project_tables: BTreeSet = planned_project.get_tables().collect(); + + // For each object to be deployed, check if it depends on tables + for object_id in objects_to_deploy { + // Find the object in the planned project + if let Some(obj) = planned_project.find_object(object_id) { + let mut missing_tables = Vec::new(); + + // Check each dependency + for dep_id in &obj.dependencies { + // Is this dependency a table? + if project_tables.contains(dep_id) { + // Check if the table exists in the database + let query = r#" + SELECT t.name + FROM mz_tables t + JOIN mz_schemas s ON t.schema_id = s.id + JOIN mz_databases d ON s.database_id = d.id + WHERE t.name = $1 AND s.name = $2 AND d.name = $3"#; + + let rows = client + .query(query, &[&dep_id.object, &dep_id.schema, &dep_id.database]) + .await + .map_err(DatabaseValidationError::QueryError)?; + + if rows.is_empty() { + missing_tables.push(dep_id.clone()); + } + } + } + + if !missing_tables.is_empty() { + objects_needing_tables.push((object_id.clone(), missing_tables)); + } + } + } + + if !objects_needing_tables.is_empty() { + return Err(DatabaseValidationError::MissingTableDependencies { + objects_needing_tables, + }); + } + + Ok(()) +} diff --git a/src/mz-deploy/src/lib.rs b/src/mz-deploy/src/lib.rs new file mode 100644 index 0000000000000..9dbd9b6fa5310 --- /dev/null +++ b/src/mz-deploy/src/lib.rs @@ -0,0 +1,10 @@ +//! mz-deploy library +//! +//! This library provides core functionality for the mz-deploy CLI tool. + +pub mod cli; +pub mod client; +pub mod project; +pub mod types; +pub mod unit_test; +pub mod utils; diff --git a/src/mz-deploy/src/project.rs b/src/mz-deploy/src/project.rs new file mode 100644 index 0000000000000..fb19e06f40308 --- /dev/null +++ b/src/mz-deploy/src/project.rs @@ -0,0 +1,23 @@ +use std::path::Path; + +pub mod ast; +pub mod changeset; +pub mod deployment_snapshot; +pub mod error; +pub mod normalize; +pub mod object_id; +mod parser; +pub mod planned; +pub mod raw; +pub mod typed; + +// Re-export commonly used types +pub use planned::ModStatement; + +/// Load, validate, and convert a project to a planned deployment representation. +pub fn plan>(root: P) -> Result { + let raw_project = raw::load_project(root)?; + let typed_project = typed::Project::try_from(raw_project)?; + let planned_project = planned::Project::from(typed_project); + Ok(planned_project) +} diff --git a/src/mz-deploy/src/project/ast.rs b/src/mz-deploy/src/project/ast.rs new file mode 100644 index 0000000000000..156720446f389 --- /dev/null +++ b/src/mz-deploy/src/project/ast.rs @@ -0,0 +1,268 @@ +//! Abstract Syntax Tree (AST) types shared across different project representations. +//! +//! This module contains core AST types that are used by multiple layers (HIR, MIR, etc.) +//! without creating circular dependencies between those modules. + +use mz_sql_parser::ast::*; + +/// A structured identifier for database objects supporting partial qualification. +/// +/// Represents a database object identifier that may be partially or fully qualified: +/// - Unqualified: `object` +/// - Schema-qualified: `schema.object` +/// - Fully-qualified: `database.schema.object` +/// +/// This type is used internally for matching and validating object references across +/// SQL statements where references may have different levels of qualification. +#[derive(Debug)] +pub struct DatabaseIdent { + pub database: Option, + pub schema: Option, + pub object: String, +} + +impl From for DatabaseIdent { + fn from(value: UnresolvedItemName) -> Self { + match value.0.as_slice() { + [object] => Self { + database: None, + schema: None, + object: object.to_string(), + }, + [schema, object] => Self { + database: None, + schema: Some(schema.to_string()), + object: object.to_string(), + }, + [database, schema, object] => Self { + database: Some(database.to_string()), + schema: Some(schema.to_string()), + object: object.to_string(), + }, + _ => unreachable!(), + } + } +} + +impl DatabaseIdent { + /// Checks if this identifier matches another identifier with flexible qualification matching. + /// + /// This method performs a partial match where an identifier with fewer qualification + /// levels can match an identifier with more levels, as long as the specified parts match. + /// + /// # Matching Rules + /// + /// - Object names must always match exactly + /// - If this ident has a schema, it must match the other's schema (if present) + /// - If this ident has a database, it must match the other's database (if present) + /// - Missing qualifiers in either ident are treated as wildcards + /// + /// # Examples + /// + /// ```text + /// "table" matches "schema.table" ✓ + /// "schema.table" matches "db.schema.table" ✓ + /// "schema.table" matches "table" ✗ (schema specified but not in other) + /// "schema1.table" matches "schema2.table" ✗ (schema mismatch) + /// "db.schema.table" matches "db.schema.table" ✓ + /// ``` + pub(crate) fn matches(&self, other: &DatabaseIdent) -> bool { + if self.object != other.object { + return false; + } + + // If we have a schema specified, it must match + if let Some(ref our_schema) = self.schema + && let Some(ref their_schema) = other.schema + && our_schema != their_schema + { + return false; + } + + // If we have a database specified, it must match + if let Some(ref our_db) = self.database + && let Some(ref their_db) = other.database + && our_db != their_db + { + return false; + } + + true + } +} + +/// A Materialize cluster reference. +/// +/// Clusters in Materialize are non-namespaced objects that can be referenced +/// by indexes and materialized views via `IN CLUSTER ` clauses. +/// +/// This struct provides type safety for cluster references and allows for +/// future extensibility (e.g., tracking cluster size, replicas, etc.). +#[derive(Debug, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)] +pub struct Cluster { + pub name: String, +} + +impl Cluster { + /// Creates a new cluster reference with the given name. + pub fn new(name: String) -> Self { + Self { name } + } +} + +impl std::fmt::Display for Cluster { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name) + } +} + +/// A validated SQL statement representing a database object. +/// +/// This enum wraps all supported CREATE statements from Materialize's SQL dialect. +/// Each variant contains the parsed AST node with the `Raw` resolution state. +#[derive(Debug, Clone, Hash)] +pub enum Statement { + /// CREATE SINK statement + CreateSink(CreateSinkStatement), + /// CREATE VIEW statement + CreateView(CreateViewStatement), + /// CREATE MATERIALIZED VIEW statement + CreateMaterializedView(CreateMaterializedViewStatement), + /// CREATE TABLE statement + CreateTable(CreateTableStatement), + /// CREATE TABLE ... FROM SOURCE statement + CreateTableFromSource(CreateTableFromSourceStatement), +} + +impl Statement { + /// Extracts the database identifier from the statement. + /// + /// Returns the object name (potentially qualified with schema/database) + /// declared in the CREATE statement. + pub fn ident(&self) -> DatabaseIdent { + match self { + Statement::CreateSink(s) => s + .name + .clone() + .expect("CREATE SINK statement should have a name") + .into(), + Statement::CreateView(v) => v.definition.name.clone().into(), + Statement::CreateMaterializedView(m) => m.name.clone().into(), + Statement::CreateTable(t) => t.name.clone().into(), + Statement::CreateTableFromSource(t) => t.name.clone().into(), + } + } +} + +impl std::fmt::Display for Statement { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Statement::CreateSink(s) => write!(f, "{}", s), + Statement::CreateView(s) => write!(f, "{}", s), + Statement::CreateMaterializedView(s) => write!(f, "{}", s), + Statement::CreateTable(s) => write!(f, "{}", s), + Statement::CreateTableFromSource(s) => write!(f, "{}", s), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::BTreeSet; + + #[test] + fn test_cluster_creation() { + let cluster = Cluster::new("quickstart".to_string()); + assert_eq!(cluster.name, "quickstart"); + } + + #[test] + fn test_cluster_equality() { + let c1 = Cluster::new("quickstart".to_string()); + let c2 = Cluster::new("quickstart".to_string()); + let c3 = Cluster::new("prod".to_string()); + + assert_eq!(c1, c2); + assert_ne!(c1, c3); + assert_ne!(c2, c3); + } + + #[test] + fn test_cluster_clone() { + let c1 = Cluster::new("quickstart".to_string()); + let c2 = c1.clone(); + + assert_eq!(c1, c2); + assert_eq!(c1.name, c2.name); + } + + #[test] + fn test_cluster_hash_consistency() { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let c1 = Cluster::new("quickstart".to_string()); + let c2 = Cluster::new("quickstart".to_string()); + + let mut hasher1 = DefaultHasher::new(); + c1.hash(&mut hasher1); + let hash1 = hasher1.finish(); + + let mut hasher2 = DefaultHasher::new(); + c2.hash(&mut hasher2); + let hash2 = hasher2.finish(); + + assert_eq!(hash1, hash2, "Equal clusters should have equal hashes"); + } + + #[test] + fn test_cluster_in_hashset() { + let mut clusters = BTreeSet::new(); + + assert!(clusters.insert(Cluster::new("quickstart".to_string()))); + assert!(!clusters.insert(Cluster::new("quickstart".to_string()))); // duplicate + assert!(clusters.insert(Cluster::new("prod".to_string()))); + + assert_eq!(clusters.len(), 2); + assert!(clusters.contains(&Cluster::new("quickstart".to_string()))); + assert!(clusters.contains(&Cluster::new("prod".to_string()))); + assert!(!clusters.contains(&Cluster::new("staging".to_string()))); + } + + #[test] + fn test_database_ident_matches() { + // Object name only + let ident1 = DatabaseIdent { + database: None, + schema: None, + object: "table".to_string(), + }; + + let ident2 = DatabaseIdent { + database: Some("db".to_string()), + schema: Some("public".to_string()), + object: "table".to_string(), + }; + + assert!(ident1.matches(&ident2)); + + // Schema qualified + let ident3 = DatabaseIdent { + database: None, + schema: Some("public".to_string()), + object: "table".to_string(), + }; + + assert!(ident3.matches(&ident2)); + + // Schema mismatch + let ident4 = DatabaseIdent { + database: None, + schema: Some("private".to_string()), + object: "table".to_string(), + }; + + assert!(!ident4.matches(&ident2)); + } +} diff --git a/src/mz-deploy/src/project/changeset.rs b/src/mz-deploy/src/project/changeset.rs new file mode 100644 index 0000000000000..1e11afff436a6 --- /dev/null +++ b/src/mz-deploy/src/project/changeset.rs @@ -0,0 +1,1164 @@ +//! Change detection for incremental deployment. +//! +//! This module implements a Dirty Propagation Algorithm to determine +//! which database objects, schemas, and clusters need redeployment after changes. +//! +//! ## Algorithm Overview +//! +//! The algorithm computes three result sets via fixed-point iteration: +//! - `DirtyStmt(object)` - All objects that must be reprocessed +//! - `DirtyCluster(cluster)` - All clusters that must be refreshed +//! - `DirtySchema(database, schema)` - All schemas containing dirty objects +//! +//! ## Propagation Rules +//! +//! ### Rule Category 1 — Statement Dirtiness +//! ```datalog +//! DirtyStmt(O) :- ChangedStmt(O) # Changed objects are dirty +//! DirtyStmt(O) :- StmtUsesCluster(O, C), DirtyCluster(C) # Objects on dirty statement clusters are dirty +//! DirtyStmt(O) :- DependsOn(O, P), DirtyStmt(P) # Downstream dependents are dirty +//! DirtyStmt(O) :- DirtySchema(Db, Sch), ObjectInSchema(O, Db, Sch) # Objects in dirty schemas are dirty +//! ``` +//! +//! **Key Insight:** Index clusters do NOT cause objects to be marked dirty. Indexes are physical +//! optimizations that can be managed independently without redeploying the object's statement. +//! If object A's index uses a dirty cluster, object A is NOT marked for redeployment. +//! +//! ### Rule Category 2 — Cluster Dirtiness +//! ```datalog +//! DirtyCluster(C) :- ChangedStmt(O), StmtUsesCluster(O, C), NOT IsSink(O) # Clusters of changed statements are dirty (excluding sinks) +//! DirtyCluster(C) :- ChangedStmt(O), IndexUsesCluster(O, _, C), NOT IsSink(O) # Clusters of changed indexes are dirty (excluding sinks) +//! ``` +//! +//! **Note:** Clusters are only marked dirty when the STATEMENT itself changes, +//! not when the object is dirty for other reasons (dependencies, schema propagation, etc.). +//! **Sinks are excluded** because they write to external systems and are created after the swap. +//! +//! ### Rule Category 3 — Schema Dirtiness +//! ```datalog +//! DirtySchema(Db, Sch) :- DirtyStmt(O), ObjectInSchema(O, Db, Sch), NOT IsSink(O) # Dirty objects make their schemas dirty (excluding sinks) +//! ``` +//! +//! **Key Property:** All dirty objects (except sinks) contribute to schema dirtiness, which triggers +//! schema-level atomic redeployment. Sinks are excluded because they are created after the swap +//! during apply and shouldn't cause other objects to be redeployed. + +use super::ast::{Cluster, Statement}; +use super::deployment_snapshot::DeploymentSnapshot; +use super::planned::{self, Project}; +use crate::project::object_id::ObjectId; +use crate::verbose; +use owo_colors::OwoColorize; +use std::collections::{BTreeMap, BTreeSet}; +use std::fmt::{Display, Formatter}; + +/// Represents the set of changes between two project states. +/// +/// Used to determine which objects need redeployment based on snapshot comparison. +#[derive(Debug, Clone)] +pub struct ChangeSet { + /// Objects that exist in changed files + pub changed_objects: BTreeSet, + + /// Schemas where ANY file changed (entire schema is dirty) + pub dirty_schemas: BTreeSet<(String, String)>, + + /// Clusters used by objects in dirty schemas + pub dirty_clusters: BTreeSet, + + /// All objects that need redeployment (includes transitive dependencies) + pub objects_to_deploy: BTreeSet, +} + +impl ChangeSet { + /// Create a ChangeSet by comparing old and new deployment snapshots using Datalog. + /// + /// This method uses Datalog fixed-point computation to determine the transitive + /// closure of all objects, clusters, and schemas affected by changes. + /// + /// # Arguments + /// * `old_snapshot` - Previous deployment snapshot + /// * `new_snapshot` - Current deployment snapshot + /// * `project` - MIR project with dependency information + /// + /// # Returns + /// A ChangeSet identifying all objects requiring redeployment + pub fn from_deployment_snapshot_comparison( + old_snapshot: &DeploymentSnapshot, + new_snapshot: &DeploymentSnapshot, + project: &Project, + ) -> Self { + // Step 1: Find changed objects by comparing hashes + let changed_objects = find_changed_objects(old_snapshot, new_snapshot); + + // Step 2: Extract base facts from project + let base_facts = extract_base_facts(project); + + // Step 3: Run Datalog fixed-point computation + let (dirty_stmts, dirty_clusters, dirty_schemas) = + compute_dirty_datalog(&changed_objects, &base_facts); + + ChangeSet { + changed_objects: changed_objects.into_iter().collect(), + dirty_schemas: dirty_schemas.into_iter().collect(), + dirty_clusters: dirty_clusters.into_iter().collect(), + objects_to_deploy: dirty_stmts.into_iter().collect(), + } + } + + /// Check if any changes were detected. + pub fn is_empty(&self) -> bool { + self.objects_to_deploy.is_empty() + } + + /// Get the number of objects that need deployment. + pub fn deployment_count(&self) -> usize { + self.objects_to_deploy.len() + } +} + +impl Display for ChangeSet { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + writeln!( + f, + "Incremental deployment: {} objects need redeployment", + self.deployment_count() + )?; + + if !self.changed_objects.is_empty() { + writeln!(f, "Changed objects:")?; + for obj in &self.changed_objects { + writeln!(f, " - {}.{}.{}", obj.database, obj.schema, obj.object)?; + } + } + + if !self.dirty_schemas.is_empty() { + writeln!(f, "Dirty schemas:")?; + for (db, schema) in &self.dirty_schemas { + writeln!(f, " - {}.{}", db, schema)?; + } + } + + if !self.dirty_clusters.is_empty() { + writeln!(f, "Dirty clusters:")?; + for cluster in &self.dirty_clusters { + writeln!(f, " - {}", cluster.name)?; + } + } + + if !self.objects_to_deploy.is_empty() { + writeln!(f, "Objects to deploy:")?; + for obj in &self.objects_to_deploy { + writeln!(f, " - {}.{}.{}", obj.database, obj.schema, obj.object)?; + } + } + + Ok(()) + } +} + +// +// BASE FACT EXTRACTION +// + +/// Base facts extracted from the project for Datalog computation. +#[derive(Debug)] +struct BaseFacts { + /// ObjectInSchema(object, database, schema) + object_in_schema: Vec<(ObjectId, String, String)>, + + /// DependsOn(child, parent) - child depends on parent + depends_on: Vec<(ObjectId, ObjectId)>, + + /// StmtUsesCluster(object, cluster_name) + stmt_uses_cluster: Vec<(ObjectId, String)>, + + /// IndexUsesCluster(object, index_name, cluster_name) + index_uses_cluster: Vec<(ObjectId, String, String)>, + + /// IsSink(object) - objects that are sinks (should not propagate dirtiness to clusters/schemas) + is_sink: BTreeSet, +} + +/// Find changed objects by comparing snapshot hashes. +fn find_changed_objects( + old_snapshot: &DeploymentSnapshot, + new_snapshot: &DeploymentSnapshot, +) -> BTreeSet { + verbose!( + "{} {}", + "▶".cyan(), + "Comparing deployment snapshots...".cyan().bold() + ); + let mut changed = BTreeSet::new(); + + // Objects with different hashes or newly added + for (object_id, new_hash) in &new_snapshot.objects { + match old_snapshot.objects.get(object_id) { + Some(old_hash) if old_hash != new_hash => { + verbose!( + " ├─ {}: {} ({} {} → {})", + "Changed".green(), + object_id.to_string().cyan(), + "hash".dimmed(), + old_hash[..8].to_string().dimmed(), + new_hash[..8].to_string().dimmed() + ); + changed.insert(object_id.clone()); + } + None => { + verbose!( + " ├─ {}: {} ({} {})", + "New".green(), + object_id.to_string().cyan(), + "hash".dimmed(), + new_hash[..8].to_string().dimmed() + ); + changed.insert(object_id.clone()); + } + _ => {} + } + } + + // Deleted objects + for object_id in old_snapshot.objects.keys() { + if !new_snapshot.objects.contains_key(object_id) { + verbose!(" ├─ {}: {}", "Deleted".red(), object_id.to_string().cyan()); + changed.insert(object_id.clone()); + } + } + + verbose!( + " └─ Found {} changed object(s)", + changed.len().to_string().bold() + ); + changed +} + +/// Extract all base facts from the project for Datalog computation. +fn extract_base_facts(project: &Project) -> BaseFacts { + verbose!( + "{} {}", + "▶".cyan(), + "Extracting base facts from project...".cyan().bold() + ); + let mut object_in_schema = Vec::new(); + let mut depends_on = Vec::new(); + let mut stmt_uses_cluster = Vec::new(); + let mut index_uses_cluster = Vec::new(); + let mut is_sink = BTreeSet::new(); + + // Extract facts from each object in the project + for db in &project.databases { + for schema in &db.schemas { + for obj in &schema.objects { + let obj_id = obj.id.clone(); + + // ObjectInSchema fact + object_in_schema.push((obj_id.clone(), db.name.clone(), schema.name.clone())); + + // IsSink fact - sinks should not propagate dirtiness to clusters/schemas + if matches!(obj.typed_object.stmt, Statement::CreateSink(_)) { + verbose!(" ├─ {}: {}", "IsSink".yellow(), obj_id.to_string().cyan()); + is_sink.insert(obj_id.clone()); + } + + // DependsOn facts from dependency graph + if let Some(deps) = project.dependency_graph.get(&obj_id) { + for parent in deps { + depends_on.push((obj_id.clone(), parent.clone())); + } + } + + // Extract cluster usage from statement + let (_, clusters) = + planned::extract_dependencies(&obj.typed_object.stmt, &db.name, &schema.name); + + // StmtUsesCluster facts + for cluster in clusters { + stmt_uses_cluster.push((obj_id.clone(), cluster.name.clone())); + } + + // IndexUsesCluster facts - extract from indexes + for index in &obj.typed_object.indexes { + // Extract cluster directly from CreateIndexStatement + if let Some(cluster_name) = &index.in_cluster { + let index_name = index + .name + .as_ref() + .map(|n| n.to_string()) + .unwrap_or_else(|| "unnamed_index".to_string()); + + // Convert cluster name to string + let cluster_str = cluster_name.to_string(); + + index_uses_cluster.push((obj_id.clone(), index_name, cluster_str)); + } + } + } + } + } + + verbose!( + " └─ Base facts: {} objects, {} dependencies, {} stmt→cluster, {} index→cluster, {} sinks", + object_in_schema.len().to_string().bold(), + depends_on.len().to_string().bold(), + stmt_uses_cluster.len().to_string().bold(), + index_uses_cluster.len().to_string().bold(), + is_sink.len().to_string().bold() + ); + + BaseFacts { + object_in_schema, + depends_on, + stmt_uses_cluster, + index_uses_cluster, + is_sink, + } +} + +/// Pre-computed indexes for efficient Datalog rule evaluation. +struct DatalogIndexes { + /// Object -> clusters used by the statement + stmt_to_clusters: BTreeMap>, + /// Object -> clusters used by indexes on that object + index_to_clusters: BTreeMap>, + /// Parent -> list of dependent children (reverse of depends_on) + dependents: BTreeMap>, + /// Object -> (database, schema) it belongs to + object_to_schema: BTreeMap, +} + +impl DatalogIndexes { + fn from_base_facts(facts: &BaseFacts) -> Self { + // stmt_to_clusters: group by object + let mut stmt_to_clusters: BTreeMap> = BTreeMap::new(); + for (obj, cluster) in &facts.stmt_uses_cluster { + stmt_to_clusters + .entry(obj.clone()) + .or_default() + .push(cluster.clone()); + } + + // index_to_clusters: group by object (ignoring index name) + let mut index_to_clusters: BTreeMap> = BTreeMap::new(); + for (obj, _index_name, cluster) in &facts.index_uses_cluster { + index_to_clusters + .entry(obj.clone()) + .or_default() + .push(cluster.clone()); + } + + // dependents: reverse the depends_on relation (parent -> children) + let mut dependents: BTreeMap> = BTreeMap::new(); + for (child, parent) in &facts.depends_on { + dependents + .entry(parent.clone()) + .or_default() + .push(child.clone()); + } + + // object_to_schema: direct mapping + let object_to_schema = facts + .object_in_schema + .iter() + .map(|(obj, db, sch)| (obj.clone(), (db.clone(), sch.clone()))) + .collect(); + + DatalogIndexes { + stmt_to_clusters, + index_to_clusters, + dependents, + object_to_schema, + } + } +} + +/// Compute dirty objects, clusters, and schemas using fixed-point iteration. +/// +/// Implements the Datalog rules defined at the top of this module. +/// +/// **Important:** Sinks are special - they do NOT propagate dirtiness to clusters or schemas. +/// Sinks write to external systems and are created after the swap during apply, so they +/// shouldn't cause other objects to be redeployed. +fn compute_dirty_datalog( + changed_stmts: &BTreeSet, + base_facts: &BaseFacts, +) -> ( + BTreeSet, + BTreeSet, + BTreeSet<(String, String)>, +) { + verbose!( + "{} {}", + "▶".cyan(), + "Starting fixed-point computation...".cyan().bold() + ); + verbose!( + " ├─ Initial changed statements: [{}]", + changed_stmts + .iter() + .map(|o| o.to_string().cyan().to_string()) + .collect::>() + .join(", ") + ); + verbose!( + " └─ Known sinks: [{}]", + base_facts + .is_sink + .iter() + .map(|o| o.to_string().yellow().to_string()) + .collect::>() + .join(", ") + ); + + let indexes = DatalogIndexes::from_base_facts(base_facts); + + // Initialize result sets + let mut dirty_stmts: BTreeSet = changed_stmts.clone(); + let mut dirty_clusters: BTreeSet = BTreeSet::new(); + let mut dirty_schemas: BTreeSet<(String, String)> = BTreeSet::new(); + + // Fixed-point iteration: apply rules until no changes + let mut iteration = 0; + loop { + iteration += 1; + let prev_sizes = (dirty_stmts.len(), dirty_clusters.len(), dirty_schemas.len()); + verbose!( + "\n{} {} (stmts={}, clusters={}, schemas={})", + "▶".cyan(), + format!("Iteration {}", iteration).cyan().bold(), + dirty_stmts.len().to_string().bold(), + dirty_clusters.len().to_string().bold(), + dirty_schemas.len().to_string().bold() + ); + + // --- Cluster dirtiness rules (only from changed statements, excluding sinks) --- + // Rule 1: DirtyCluster(C) :- ChangedStmt(O), StmtUsesCluster(O, C), NOT IsSink(O) + // Rule 2: DirtyCluster(C) :- ChangedStmt(O), IndexUsesCluster(O, _, C), NOT IsSink(O) + for obj in changed_stmts { + // Sinks should NOT make clusters dirty + if base_facts.is_sink.contains(obj) { + verbose!( + " ├─ {}: {} is a sink, not marking clusters dirty", + "SKIP".yellow().bold(), + obj.to_string().cyan() + ); + continue; + } + if let Some(clusters) = indexes.stmt_to_clusters.get(obj) { + for cluster in clusters { + if dirty_clusters.insert(cluster.clone()) { + verbose!( + " ├─ {}: DirtyCluster({}) ← ChangedStmt({}) uses cluster", + "Rule 1".bold(), + cluster.magenta(), + obj.to_string().cyan() + ); + } + } + } + if let Some(clusters) = indexes.index_to_clusters.get(obj) { + for cluster in clusters { + if dirty_clusters.insert(cluster.clone()) { + verbose!( + " ├─ {}: DirtyCluster({}) ← ChangedStmt({}) has index on cluster", + "Rule 2".bold(), + cluster.magenta(), + obj.to_string().cyan() + ); + } + } + } + } + + // --- Statement dirtiness rules --- + // Rule 3: DirtyStmt(O) :- StmtUsesCluster(O, C), DirtyCluster(C) + for (obj, clusters) in &indexes.stmt_to_clusters { + for cluster in clusters { + if dirty_clusters.contains(cluster) && dirty_stmts.insert(obj.clone()) { + verbose!( + " ├─ {}: DirtyStmt({}) ← uses DirtyCluster({})", + "Rule 3".bold(), + obj.to_string().cyan(), + cluster.magenta() + ); + break; + } + } + } + + // Rule 4: DirtyStmt(O) :- DependsOn(O, P), DirtyStmt(P) + let current_dirty: Vec<_> = dirty_stmts.iter().cloned().collect(); + for dirty_obj in current_dirty { + if let Some(children) = indexes.dependents.get(&dirty_obj) { + for child in children { + if dirty_stmts.insert(child.clone()) { + verbose!( + " ├─ {}: DirtyStmt({}) ← depends on DirtyStmt({})", + "Rule 4".bold(), + child.to_string().cyan(), + dirty_obj.to_string().cyan() + ); + } + } + } + } + + // --- Schema dirtiness rules (excluding sinks) --- + // Rule 5: DirtySchema(Db, Sch) :- DirtyStmt(O), ObjectInSchema(O, Db, Sch), NOT IsSink(O) + for obj in &dirty_stmts { + // Sinks should NOT make schemas dirty + if base_facts.is_sink.contains(obj) { + verbose!( + " ├─ {}: {} is a sink, not marking schema dirty", + "SKIP".yellow().bold(), + obj.to_string().cyan() + ); + continue; + } + if let Some((db, sch)) = indexes.object_to_schema.get(obj) { + if dirty_schemas.insert((db.clone(), sch.clone())) { + verbose!( + " ├─ {}: DirtySchema({}) ← DirtyStmt({}) in schema", + "Rule 5".bold(), + format!("{}.{}", db, sch).blue(), + obj.to_string().cyan() + ); + } + } + } + + // Rule 6: DirtyStmt(O) :- DirtySchema(Db, Sch), ObjectInSchema(O, Db, Sch) + for (obj, (db, sch)) in &indexes.object_to_schema { + if dirty_schemas.contains(&(db.clone(), sch.clone())) { + if dirty_stmts.insert(obj.clone()) { + verbose!( + " ├─ {}: DirtyStmt({}) ← in DirtySchema({})", + "Rule 6".bold(), + obj.to_string().cyan(), + format!("{}.{}", db, sch).blue() + ); + } + } + } + + // Fixed point reached when no sets grew + if (dirty_stmts.len(), dirty_clusters.len(), dirty_schemas.len()) == prev_sizes { + verbose!( + "\n{} Fixed point reached after {} iteration(s)", + "✓".green(), + iteration.to_string().bold() + ); + break; + } + } + + // Log final results + verbose!("{} {}", "▶".cyan(), "Final Results".cyan().bold()); + verbose!( + " ├─ Dirty statements ({}): [{}]", + dirty_stmts.len().to_string().bold(), + dirty_stmts + .iter() + .map(|o| o.to_string().cyan().to_string()) + .collect::>() + .join(", ") + ); + verbose!( + " ├─ Dirty clusters ({}): [{}]", + dirty_clusters.len().to_string().bold(), + dirty_clusters + .iter() + .map(|c| c.magenta().to_string()) + .collect::>() + .join(", ") + ); + verbose!( + " └─ Dirty schemas ({}): [{}]", + dirty_schemas.len().to_string().bold(), + dirty_schemas + .iter() + .map(|(db, sch)| format!("{}.{}", db, sch).blue().to_string()) + .collect::>() + .join(", ") + ); + + // Convert cluster names to Cluster structs + let dirty_cluster_structs = dirty_clusters.into_iter().map(Cluster::new).collect(); + + (dirty_stmts, dirty_cluster_structs, dirty_schemas) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_object_file_path() { + let path = "materialize/public/users.sql"; + let parts: Vec<&str> = path.split('/').collect(); + + match parts.as_slice() { + [db, schema, file] if file.ends_with(".sql") => { + assert_eq!(*db, "materialize"); + assert_eq!(*schema, "public"); + assert_eq!(file.strip_suffix(".sql").unwrap(), "users"); + } + _ => panic!("Path didn't match expected pattern"), + } + } + + #[test] + fn test_parse_schema_mod_file_path() { + let path = "materialize/public.sql"; + let parts: Vec<&str> = path.split('/').collect(); + + match parts.as_slice() { + [db, schema_file] if schema_file.ends_with(".sql") => { + assert_eq!(*db, "materialize"); + assert_eq!(schema_file.strip_suffix(".sql").unwrap(), "public"); + } + _ => panic!("Path didn't match expected pattern"), + } + } + + #[test] + fn test_parse_database_mod_file_path() { + let path = "materialize.sql"; + let parts: Vec<&str> = path.split('/').collect(); + + match parts.as_slice() { + [db_file] if db_file.ends_with(".sql") => { + assert_eq!(db_file.strip_suffix(".sql").unwrap(), "materialize"); + } + _ => panic!("Path didn't match expected pattern"), + } + } + + #[test] + fn test_schema_propagation_all_objects_in_dirty_schema_are_dirty() { + // Test that when one object in a schema becomes dirty, + // ALL objects in that schema become dirty (schema-level atomicity) + + // Create base facts for a schema with 3 objects + let obj1 = ObjectId::new("db".to_string(), "schema".to_string(), "table1".to_string()); + let obj2 = ObjectId::new("db".to_string(), "schema".to_string(), "table2".to_string()); + let obj3 = ObjectId::new("db".to_string(), "schema".to_string(), "view1".to_string()); + + let base_facts = BaseFacts { + object_in_schema: vec![ + (obj1.clone(), "db".to_string(), "schema".to_string()), + (obj2.clone(), "db".to_string(), "schema".to_string()), + (obj3.clone(), "db".to_string(), "schema".to_string()), + ], + depends_on: vec![], + stmt_uses_cluster: vec![], + index_uses_cluster: vec![], + is_sink: BTreeSet::new(), + }; + + // Only obj1 is changed + let mut changed_stmts = BTreeSet::new(); + changed_stmts.insert(obj1.clone()); + + // Run Datalog computation + let (dirty_stmts, _dirty_clusters, dirty_schemas) = + compute_dirty_datalog(&changed_stmts, &base_facts); + + // Verify schema is dirty + assert!( + dirty_schemas.contains(&("db".to_string(), "schema".to_string())), + "Schema should be marked dirty" + ); + + // CRITICAL: All objects in the dirty schema should be dirty + assert!( + dirty_stmts.contains(&obj1), + "obj1 (changed) should be dirty" + ); + assert!( + dirty_stmts.contains(&obj2), + "obj2 (same schema as changed obj1) should be dirty" + ); + assert!( + dirty_stmts.contains(&obj3), + "obj3 (same schema as changed obj1) should be dirty" + ); + + println!("Dirty objects: {:?}", dirty_stmts); + println!("Dirty schemas: {:?}", dirty_schemas); + } + + #[test] + fn test_index_cluster_does_not_dirty_parent_object_cluster() { + // Critical test: If an index uses a dirty cluster, the index should be redeployed, + // but the parent object and its cluster should NOT be marked dirty. + // + // Scenario: + // - winning_bids MV on "staging" cluster + // - Index on winning_bids using "quickstart" cluster + // - some_other_obj on "quickstart" cluster changes + // + // Expected: + // - quickstart cluster becomes dirty ✓ + // - Index needs redeployment ✓ + // - winning_bids needs redeployment (to deploy its index) ✓ + // - BUT staging cluster should NOT be dirty ✗ (current bug) + + let mv = ObjectId::new( + "db".to_string(), + "schema".to_string(), + "winning_bids".to_string(), + ); + let other = ObjectId::new( + "db".to_string(), + "schema".to_string(), + "other_obj".to_string(), + ); + + let base_facts = BaseFacts { + object_in_schema: vec![ + (mv.clone(), "db".to_string(), "schema".to_string()), + (other.clone(), "db".to_string(), "schema".to_string()), + ], + depends_on: vec![], + stmt_uses_cluster: vec![ + (mv.clone(), "staging".to_string()), + (other.clone(), "quickstart".to_string()), + ], + index_uses_cluster: vec![( + mv.clone(), + "idx_item".to_string(), + "quickstart".to_string(), + )], + is_sink: BTreeSet::new(), + }; + + // Only other_obj is changed + let mut changed_stmts = BTreeSet::new(); + changed_stmts.insert(other.clone()); + + // Run Datalog computation + let (dirty_stmts, dirty_clusters, _dirty_schemas) = + compute_dirty_datalog(&changed_stmts, &base_facts); + + println!("Dirty stmts: {:?}", dirty_stmts); + println!("Dirty clusters: {:?}", dirty_clusters); + + // Verify quickstart cluster is dirty + assert!( + dirty_clusters.iter().any(|c| c.name == "quickstart"), + "quickstart cluster should be dirty because other_obj changed" + ); + + // Verify winning_bids needs redeployment (because its index uses dirty cluster) + assert!( + dirty_stmts.contains(&mv), + "winning_bids should be redeployed (its index uses dirty quickstart cluster)" + ); + + // CRITICAL: staging cluster should NOT be dirty + // The MV's statement uses staging, but the MV is only dirty because of its index, + // not because its statement changed. Therefore staging should not be marked dirty. + assert!( + !dirty_clusters.iter().any(|c| c.name == "staging"), + "staging cluster should NOT be dirty - winning_bids is only dirty due to its index, not its statement" + ); + } + + #[test] + fn test_index_cluster_does_not_dirty_schema() { + // Scenario: + // - table1 and table2 in the same schema + // - table1 has index on cluster "index_cluster" + // - some_other_obj uses "index_cluster" and changes + // + // Expected (NEW BEHAVIOR): + // - index_cluster becomes dirty ✓ + // - table1 should NOT be dirty (indexes don't cause redeployment) ✓ + // - schema should NOT be dirty ✓ + // - table2 should NOT be dirty ✓ + // + // This ensures that objects are only redeployed when their statement changes, + // not when their index clusters become dirty. + + let table1 = ObjectId::new("db".to_string(), "schema".to_string(), "table1".to_string()); + let table2 = ObjectId::new("db".to_string(), "schema".to_string(), "table2".to_string()); + let other = ObjectId::new( + "db".to_string(), + "other_schema".to_string(), + "other_obj".to_string(), + ); + + let base_facts = BaseFacts { + object_in_schema: vec![ + (table1.clone(), "db".to_string(), "schema".to_string()), + (table2.clone(), "db".to_string(), "schema".to_string()), + (other.clone(), "db".to_string(), "other_schema".to_string()), + ], + depends_on: vec![], + stmt_uses_cluster: vec![(other.clone(), "index_cluster".to_string())], + index_uses_cluster: vec![( + table1.clone(), + "idx1".to_string(), + "index_cluster".to_string(), + )], + is_sink: BTreeSet::new(), + }; + + let mut changed_stmts = BTreeSet::new(); + changed_stmts.insert(other.clone()); + + let (dirty_stmts, dirty_clusters, dirty_schemas) = + compute_dirty_datalog(&changed_stmts, &base_facts); + + // index_cluster should be dirty + assert!(dirty_clusters.iter().any(|c| c.name == "index_cluster")); + + // table1 should NOT be dirty (indexes don't cause object redeployment) + assert!( + !dirty_stmts.contains(&table1), + "table1 should NOT be dirty - indexes don't cause redeployment" + ); + + // Schema should NOT be dirty + assert!( + !dirty_schemas.contains(&("db".to_string(), "schema".to_string())), + "schema should NOT be dirty" + ); + + // And table2 should NOT be dirty + assert!(!dirty_stmts.contains(&table2), "table2 should NOT be dirty"); + } + + #[test] + fn test_schema_propagation_does_not_dirty_index_clusters() { + // Scenario from real deployment: + // - flip_activities and flippers in materialize.public schema + // - flip_activities has index on "quickstart" cluster + // - winning_bids in materialize.internal schema has index on "quickstart" + // - When flippers changes: + // - materialize.public schema becomes dirty + // - flip_activities becomes dirty (schema propagation) + // - BUT quickstart cluster should NOT become dirty + // - winning_bids should NOT be redeployed + + let flippers = ObjectId::new( + "materialize".to_string(), + "public".to_string(), + "flippers".to_string(), + ); + let flip_activities = ObjectId::new( + "materialize".to_string(), + "public".to_string(), + "flip_activities".to_string(), + ); + let winning_bids = ObjectId::new( + "materialize".to_string(), + "internal".to_string(), + "winning_bids".to_string(), + ); + + let base_facts = BaseFacts { + object_in_schema: vec![ + ( + flippers.clone(), + "materialize".to_string(), + "public".to_string(), + ), + ( + flip_activities.clone(), + "materialize".to_string(), + "public".to_string(), + ), + ( + winning_bids.clone(), + "materialize".to_string(), + "internal".to_string(), + ), + ], + depends_on: vec![], + stmt_uses_cluster: vec![], + index_uses_cluster: vec![ + ( + flip_activities.clone(), + "idx_flipper".to_string(), + "quickstart".to_string(), + ), + ( + winning_bids.clone(), + "idx_item".to_string(), + "quickstart".to_string(), + ), + ], + is_sink: BTreeSet::new(), + }; + + let mut changed_stmts = BTreeSet::new(); + changed_stmts.insert(flippers.clone()); + + let (dirty_stmts, dirty_clusters, dirty_schemas) = + compute_dirty_datalog(&changed_stmts, &base_facts); + + // materialize.public schema should be dirty + assert!(dirty_schemas.contains(&("materialize".to_string(), "public".to_string()))); + + // flip_activities should be dirty due to schema propagation + assert!( + dirty_stmts.contains(&flip_activities), + "flip_activities should be dirty due to schema propagation" + ); + + // CRITICAL: quickstart cluster should NOT be dirty + // flip_activities is dirty due to schema propagation, not because its statement changed + assert!( + !dirty_clusters.iter().any(|c| c.name == "quickstart"), + "quickstart cluster should NOT be dirty - flip_activities is dirty due to schema propagation, not statement change" + ); + + // winning_bids should NOT be dirty + assert!( + !dirty_stmts.contains(&winning_bids), + "winning_bids should NOT be dirty - quickstart cluster is not dirty" + ); + } + + #[test] + fn test_dependency_propagation_with_index_cluster_conflict() { + // Real-world bug scenario: + // - winning_bids changes (has index on quickstart) + // - flip_activities depends on winning_bids (also has index on quickstart) + // - flippers depends on flip_activities + // + // What happens: + // 1. winning_bids changes → quickstart becomes dirty + // 2. flip_activities becomes dirty (index on dirty quickstart) + // 3. BUT flip_activities also depends on winning_bids! + // 4. So flip_activities should ALSO be schema-propagating + // 5. Which should make materialize.public schema dirty + // 6. Which should make flippers dirty + // + // The bug was: step 4 was skipped because flip_activities was already dirty + + let winning_bids = ObjectId::new( + "materialize".to_string(), + "internal".to_string(), + "winning_bids".to_string(), + ); + let flip_activities = ObjectId::new( + "materialize".to_string(), + "public".to_string(), + "flip_activities".to_string(), + ); + let flippers = ObjectId::new( + "materialize".to_string(), + "public".to_string(), + "flippers".to_string(), + ); + + let base_facts = BaseFacts { + object_in_schema: vec![ + ( + winning_bids.clone(), + "materialize".to_string(), + "internal".to_string(), + ), + ( + flip_activities.clone(), + "materialize".to_string(), + "public".to_string(), + ), + ( + flippers.clone(), + "materialize".to_string(), + "public".to_string(), + ), + ], + depends_on: vec![ + (flip_activities.clone(), winning_bids.clone()), // flip_activities depends on winning_bids + (flippers.clone(), flip_activities.clone()), // flippers depends on flip_activities + ], + stmt_uses_cluster: vec![(winning_bids.clone(), "staging".to_string())], + index_uses_cluster: vec![ + ( + winning_bids.clone(), + "idx_item".to_string(), + "quickstart".to_string(), + ), + ( + flip_activities.clone(), + "idx_flipper".to_string(), + "quickstart".to_string(), + ), + ], + is_sink: BTreeSet::new(), + }; + + let mut changed_stmts = BTreeSet::new(); + changed_stmts.insert(winning_bids.clone()); + + let (dirty_stmts, dirty_clusters, dirty_schemas) = + compute_dirty_datalog(&changed_stmts, &base_facts); + + println!("Dirty stmts: {:?}", dirty_stmts); + println!("Dirty schemas: {:?}", dirty_schemas); + + // winning_bids should be dirty (changed) + assert!( + dirty_stmts.contains(&winning_bids), + "winning_bids should be dirty" + ); + + // materialize.internal schema should be dirty + assert!( + dirty_schemas.contains(&("materialize".to_string(), "internal".to_string())), + "materialize.internal schema should be dirty" + ); + + // quickstart cluster should be dirty (winning_bids has index on it) + assert!( + dirty_clusters.iter().any(|c| c.name == "quickstart"), + "quickstart cluster should be dirty" + ); + + // flip_activities should be dirty (depends on winning_bids) + assert!( + dirty_stmts.contains(&flip_activities), + "flip_activities should be dirty - depends on winning_bids" + ); + + // CRITICAL: materialize.public schema should be dirty + // flip_activities is dirty due to both: + // 1. Its index is on dirty quickstart (index-only dirty) + // 2. It depends on winning_bids (schema-propagating dirty) + // The second reason should make materialize.public schema dirty + assert!( + dirty_schemas.contains(&("materialize".to_string(), "public".to_string())), + "materialize.public schema should be dirty - flip_activities depends on winning_bids" + ); + + // flippers should be dirty (materialize.public schema is dirty) + assert!( + dirty_stmts.contains(&flippers), + "flippers should be dirty - its schema (materialize.public) is dirty" + ); + } + + #[test] + fn test_index_cluster_does_not_cause_unnecessary_redeployment() { + // Real-world scenario from auction_house project: + // - materialize.foo.b changes (has default index in quickstart) + // - materialize.internal.winning_bids has MV in staging cluster + index in quickstart + // - materialize.public.flip_activities depends on winning_bids + // + // Expected: + // - Only foo.b should be dirty + // - materialize.foo schema should be dirty + // - quickstart cluster should be dirty + // - staging cluster should NOT be dirty (no objects using it changed) + // - materialize.internal schema should NOT be dirty + // - winning_bids should NOT be dirty (index in dirty cluster doesn't cause redeployment) + // - flip_activities should NOT be dirty (winning_bids isn't dirty) + + let foo_b = ObjectId::new( + "materialize".to_string(), + "foo".to_string(), + "b".to_string(), + ); + let winning_bids = ObjectId::new( + "materialize".to_string(), + "internal".to_string(), + "winning_bids".to_string(), + ); + let flip_activities = ObjectId::new( + "materialize".to_string(), + "public".to_string(), + "flip_activities".to_string(), + ); + + let base_facts = BaseFacts { + object_in_schema: vec![ + (foo_b.clone(), "materialize".to_string(), "foo".to_string()), + ( + winning_bids.clone(), + "materialize".to_string(), + "internal".to_string(), + ), + ( + flip_activities.clone(), + "materialize".to_string(), + "public".to_string(), + ), + ], + depends_on: vec![(flip_activities.clone(), winning_bids.clone())], + // foo.b has default index in quickstart + // winning_bids has MV in staging, index in quickstart + stmt_uses_cluster: vec![(winning_bids.clone(), "staging".to_string())], + index_uses_cluster: vec![ + ( + foo_b.clone(), + "default_idx".to_string(), + "quickstart".to_string(), + ), + ( + winning_bids.clone(), + "idx1".to_string(), + "quickstart".to_string(), + ), + ], + is_sink: BTreeSet::new(), + }; + + let mut changed_stmts = BTreeSet::new(); + changed_stmts.insert(foo_b.clone()); + + let (dirty_stmts, dirty_clusters, dirty_schemas) = + compute_dirty_datalog(&changed_stmts, &base_facts); + + // Only foo.b should be dirty + assert!(dirty_stmts.contains(&foo_b), "foo.b should be dirty"); + assert_eq!( + dirty_stmts.len(), + 1, + "only foo.b should be dirty, got: {:?}", + dirty_stmts + ); + + // materialize.foo schema should be dirty + assert!( + dirty_schemas.contains(&("materialize".to_string(), "foo".to_string())), + "materialize.foo schema should be dirty" + ); + + // quickstart cluster should be dirty (foo.b has index on it) + assert!( + dirty_clusters.iter().any(|c| c.name == "quickstart"), + "quickstart cluster should be dirty" + ); + + // staging cluster should NOT be dirty (no changed objects use it) + assert!( + !dirty_clusters.iter().any(|c| c.name == "staging"), + "staging cluster should NOT be dirty" + ); + + // materialize.internal schema should NOT be dirty + assert!( + !dirty_schemas.contains(&("materialize".to_string(), "internal".to_string())), + "materialize.internal schema should NOT be dirty" + ); + + // winning_bids should NOT be dirty (even though it has index in quickstart) + assert!( + !dirty_stmts.contains(&winning_bids), + "winning_bids should NOT be dirty - index cluster doesn't cause redeployment" + ); + + // flip_activities should NOT be dirty (winning_bids isn't dirty) + assert!( + !dirty_stmts.contains(&flip_activities), + "flip_activities should NOT be dirty - winning_bids isn't dirty" + ); + } +} diff --git a/src/mz-deploy/src/project/deployment_snapshot.rs b/src/mz-deploy/src/project/deployment_snapshot.rs new file mode 100644 index 0000000000000..5f1cf8c3ec625 --- /dev/null +++ b/src/mz-deploy/src/project/deployment_snapshot.rs @@ -0,0 +1,284 @@ +//! Deployment snapshot tracking. +//! +//! This module provides functionality for capturing and comparing deployment state snapshots. +//! Instead of hashing raw files, we hash the normalized typed representation +//! objects, so formatting and comment changes don't trigger unnecessary redeployments. +//! +//! A deployment snapshot captures the state of all deployed objects with their content hashes, +//! enabling change detection (like git diff but for database objects) and supporting +//! blue/green deployment workflows. + +use std::collections::BTreeMap; +use std::hash::{Hash, Hasher}; + +use chrono::{DateTime, Utc}; +use sha2::{Digest, Sha256}; + +use crate::client::{ + Client, ConnectionError, DeploymentKind, DeploymentObjectRecord, SchemaDeploymentRecord, +}; +use crate::project::object_id::ObjectId; +use crate::project::{planned, typed}; + +/// A wrapper that bridges `std::hash::Hasher` to `sha2::Digest`. +/// +/// This allows us to use the `Hash` trait on AST nodes while using SHA256 for stability. +struct Sha256Hasher { + digest: Sha256, +} + +impl Sha256Hasher { + fn new() -> Self { + Self { + digest: Sha256::new(), + } + } + + fn finalize(self) -> String { + let result = self.digest.finalize(); + format!("sha256:{:x}", result) + } +} + +impl Hasher for Sha256Hasher { + fn write(&mut self, bytes: &[u8]) { + self.digest.update(bytes); + } + + fn finish(&self) -> u64 { + // This is never called when using Hash trait, but required by trait + // We use finalize() instead to get the full hash + panic!("Sha256Hasher::finish() should not be called, use finalize() instead"); + } +} + +/// Represents a point-in-time snapshot of deployment state. +/// +/// Maps object IDs to their content hashes, where the hash is computed from +/// the normalized typed representation (not raw file contents). +/// Also tracks which schemas were deployed as atomic units. +#[derive(Debug, Clone)] +pub struct DeploymentSnapshot { + /// Map of ObjectId to content hash + pub objects: BTreeMap, + /// Map of (database, schema) to deployment kind + /// - Objects: Regular schemas containing views/MVs that need swapping + /// - Sinks: Schemas containing only sinks (no swap needed, sinks created after swap) + /// - Tables: Schemas containing only tables + pub schemas: BTreeMap<(String, String), DeploymentKind>, +} + +/// Metadata collected during deployment. +#[derive(Debug, Clone)] +pub struct DeploymentMetadata { + /// Materialize user/role that performed the deployment + pub deployed_by: String, + /// Git commit hash if the project is in a git repository + pub git_commit: Option, +} + +/// Error types for deployment snapshot operations. +#[derive(Debug, thiserror::Error)] +pub enum DeploymentSnapshotError { + #[error("failed to connect to database: {0}")] + Connection(#[from] ConnectionError), + + #[error("failed to build snapshot from planned representation: {0}")] + PlannedAccess(String), + + #[error("invalid object FQN: {0}")] + InvalidFqn(String), + + #[error("deployment '{environment}' already exists")] + DeploymentAlreadyExists { environment: String }, + + #[error("deployment '{environment}' not found")] + DeploymentNotFound { environment: String }, + + #[error("deployment '{environment}' has already been promoted")] + DeploymentAlreadyPromoted { environment: String }, +} + +impl Default for DeploymentSnapshot { + fn default() -> Self { + Self { + objects: BTreeMap::new(), + schemas: BTreeMap::new(), + } + } +} + +/// Compute a deterministic hash of a typed DatabaseObject. +/// The hash includes: +/// - The main CREATE statement +/// - All indexes +/// +/// Uses SHA256 for stable, deterministic hashing across platforms and Rust versions. +pub fn compute_typed_hash(db_obj: &typed::DatabaseObject) -> String { + let mut hasher = Sha256Hasher::new(); + + // Hash the main statement directly using its Hash implementation + db_obj.stmt.hash(&mut hasher); + + let mut indexes = db_obj.indexes.clone(); + + // Ensure hash is stable by sorting indexes deterministically + indexes.sort_by(|a, b| { + a.in_cluster + .cmp(&b.in_cluster) + .then(a.on_name.cmp(&b.on_name)) + .then(a.name.cmp(&b.name)) + .then_with(|| { + let key_a = a.key_parts.as_ref().map(|ks| { + ks.iter() + .map(|e| e.to_string()) + .collect::>() + .join(",") + }); + let key_b = b.key_parts.as_ref().map(|ks| { + ks.iter() + .map(|e| e.to_string()) + .collect::>() + .join(",") + }); + + key_a.cmp(&key_b) + }) + }); + + // Hash all indexes directly using their Hash implementation + for index in &indexes { + index.hash(&mut hasher); + } + + hasher.finalize() +} + +/// Build a deployment snapshot from a planned Project by hashing all typed objects. +/// +/// This iterates through all objects in the project and computes their +/// content hashes based on the normalized typed representation. +pub fn build_snapshot_from_planned( + planned_project: &planned::Project, +) -> Result { + let mut objects = BTreeMap::new(); + let mut schemas = BTreeMap::new(); + + // Get all objects in topological order + let sorted_objects = planned_project + .get_sorted_objects() + .map_err(|e| DeploymentSnapshotError::PlannedAccess(e.to_string()))?; + + // Compute hash for each object and collect schemas + // Default to Objects kind - callers can override for specific schemas + for (object_id, typed_obj) in sorted_objects { + let hash = compute_typed_hash(typed_obj); + objects.insert(object_id.clone(), hash); + + // Track which schema this object belongs to (default to Objects kind) + schemas + .entry((object_id.database.clone(), object_id.schema.clone())) + .or_insert(DeploymentKind::Objects); + } + + Ok(DeploymentSnapshot { objects, schemas }) +} + +/// Initialize the deployment tracking infrastructure in the database. +/// +/// Creates the `deploy` schema, `schema_deployments` table, and `deployment_objects` table +/// if they don't exist. This is idempotent and safe to call multiple times. +pub async fn initialize_deployment_table(client: &Client) -> Result<(), DeploymentSnapshotError> { + client.create_deployments().await?; + + Ok(()) +} + +/// Load the current deployment state snapshot from the database for a specific environment. +/// +/// # Arguments +/// * `client` - Database client connection +/// * `environment` - None for production, Some("staging") for staging environments +/// +/// # Returns +/// DeploymentSnapshot with current deployment state, or empty snapshot if no deployments exist +pub async fn load_from_database( + client: &Client, + environment: Option<&str>, +) -> Result { + let deployment_snapshot = client + .get_deployment_objects(environment) + .await + .map_err(DeploymentSnapshotError::Connection)?; + + Ok(deployment_snapshot) +} + +/// Write deployment snapshot to the database using the normalized schema. +/// +/// This writes to both deployments and objects tables. +/// Schema deployments are inserted (no delete), while object deployments +/// are appended (insert-only history). +/// +/// # Arguments +/// * `client` - Database client connection +/// * `snapshot` - The deployment snapshot to write (includes per-schema deployment kind) +/// * `deploy_id` - Deploy ID (e.g., "" for direct deploy, "staging" for staged) +/// * `metadata` - Deployment metadata (user, git commit, etc.) +/// * `promoted_at` - Optional promoted_at timestamp (Some(now) for direct apply, None for stage) +pub async fn write_to_database( + client: &Client, + snapshot: &DeploymentSnapshot, + deploy_id: &str, + metadata: &DeploymentMetadata, + promoted_at: Option>, +) -> Result<(), DeploymentSnapshotError> { + let now = Utc::now(); + + // Build schema deployment records (kind is now per-schema from the snapshot) + let mut schema_records = Vec::new(); + for ((database, schema), kind) in &snapshot.schemas { + schema_records.push(SchemaDeploymentRecord { + deploy_id: deploy_id.to_string(), + database: database.clone(), + schema: schema.clone(), + deployed_at: now, + deployed_by: metadata.deployed_by.clone(), + promoted_at, + git_commit: metadata.git_commit.clone(), + kind: *kind, + }); + } + + // Build deployment object records + let mut object_records = Vec::new(); + for (object_id, hash) in &snapshot.objects { + object_records.push(DeploymentObjectRecord { + deploy_id: deploy_id.to_string(), + database: object_id.database.clone(), + schema: object_id.schema.clone(), + object: object_id.object.clone(), + object_hash: hash.clone(), + deployed_at: now, + }); + } + + // Write to database + client.insert_schema_deployments(&schema_records).await?; + client.append_deployment_objects(&object_records).await?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_snapshot() { + let snapshot = DeploymentSnapshot::default(); + assert!(snapshot.objects.is_empty()); + } + + // TODO: Add more tests for hash computation with actual HIR objects +} diff --git a/src/mz-deploy/src/project/error.rs b/src/mz-deploy/src/project/error.rs new file mode 100644 index 0000000000000..7d43d0eca9c0b --- /dev/null +++ b/src/mz-deploy/src/project/error.rs @@ -0,0 +1,902 @@ +//! Error types for Materialize project operations. +//! +//! This module provides structured error types using `thiserror` that capture rich +//! contextual information about failures during project loading, parsing, and validation. +//! +//! # Error Hierarchy +//! +//! ```text +//! ProjectError +//! ├── Load(LoadError) - File I/O and directory traversal errors +//! ├── Parse(ParseError) - SQL parsing errors +//! ├── Validation(ValidationError) - Semantic validation errors with context +//! └── Dependency(DependencyError) - Dependency graph analysis errors +//! ``` +//! +//! # Error Context +//! +//! Validation errors are wrapped with `ErrorContext` that captures: +//! - File path where the error occurred +//! - SQL statement that caused the error (when available) +//! +//! This design avoids duplicating context fields across all error variants. + +use crate::project::object_id::ObjectId; +use owo_colors::OwoColorize; +use std::collections::BTreeMap; +use std::fmt; +use std::path::PathBuf; +use thiserror::Error; + +/// Contextual information about where an error occurred. +/// +/// This struct wraps error variants with additional context about the file +/// and SQL statement that caused the error. +#[derive(Debug, Clone)] +pub struct ErrorContext { + /// The file where the error occurred + pub file: PathBuf, + /// The SQL statement that caused the error, if available + pub sql_statement: Option, +} + +/// Top-level error type for all project operations. +/// +/// This is the main error type returned by project loading and validation functions. +/// It wraps more specific error types that provide detailed context. +#[derive(Debug, Error)] +pub enum ProjectError { + /// Error occurred while loading project files from disk + #[error(transparent)] + Load(#[from] LoadError), + + /// Error occurred while parsing SQL statements + #[error(transparent)] + Parse(#[from] ParseError), + + /// Error occurred during semantic validation (may contain multiple errors) + #[error(transparent)] + Validation(#[from] ValidationErrors), + + /// Error occurred during dependency analysis + #[error(transparent)] + Dependency(#[from] DependencyError), +} + +/// Errors that occur during dependency graph analysis. +#[derive(Debug, Error)] +pub enum DependencyError { + /// Circular dependency detected in the object dependency graph + #[error("Circular dependency detected: {object}")] + CircularDependency { + /// The fully qualified name of the object involved in the circular dependency + object: ObjectId, + }, +} + +/// Errors that occur during project file loading and I/O operations. +#[derive(Debug, Error)] +pub enum LoadError { + /// Project root directory does not exist + #[error("Project root directory does not exist: {path}")] + RootNotFound { + /// The path that was not found + path: PathBuf, + }, + + /// Project root path is not a directory + #[error("Project root is not a directory: {path}")] + RootNotDirectory { + /// The path that is not a directory + path: PathBuf, + }, + + /// Failed to read a directory + #[error("Failed to read directory: {path}")] + DirectoryReadFailed { + /// The directory that couldn't be read + path: PathBuf, + /// The underlying I/O error + #[source] + source: std::io::Error, + }, + + /// Failed to read a directory entry + #[error("Failed to read directory entry in: {directory}")] + EntryReadFailed { + /// The directory containing the entry + directory: PathBuf, + /// The underlying I/O error + #[source] + source: std::io::Error, + }, + + /// Failed to read a SQL file + #[error("Failed to read SQL file: {path}")] + FileReadFailed { + /// The file that couldn't be read + path: PathBuf, + /// The underlying I/O error + #[source] + source: std::io::Error, + }, + + /// Invalid file name (couldn't extract stem) + #[error("Invalid file name: {path}")] + InvalidFileName { + /// The file with the invalid name + path: PathBuf, + }, + + /// Failed to extract schema name from path + #[error("Failed to extract schema from path: {path}")] + SchemaExtractionFailed { + /// The path where extraction failed + path: PathBuf, + }, + + /// Failed to extract database name from path + #[error("Failed to extract database from path: {path}")] + DatabaseExtractionFailed { + /// The path where extraction failed + path: PathBuf, + }, +} + +/// Errors that occur during SQL parsing. +#[derive(Debug)] +pub enum ParseError { + /// Failed to parse SQL statements + SqlParseFailed { + /// The file containing the SQL + path: PathBuf, + /// The SQL text that failed to parse + sql: String, + /// The underlying parser error + source: mz_sql_parser::parser::ParserStatementError, + }, + + /// Failed to parse SQL statements from multiple sources + StatementsParseFailed { + /// Error message + message: String, + }, +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ParseError::SqlParseFailed { path, sql, source } => { + // Extract database/schema/file for path display + let path_components: Vec<_> = path.components().collect(); + let len = path_components.len(); + + let relative_path = if len >= 3 { + format!( + "{}/{}/{}", + path_components[len - 3].as_os_str().to_string_lossy(), + path_components[len - 2].as_os_str().to_string_lossy(), + path_components[len - 1].as_os_str().to_string_lossy() + ) + } else { + path.display().to_string() + }; + + // Format like rustc: error: + writeln!(f, "{}: {}", "error".bright_red().bold(), source.error)?; + + // Show file location: --> path + writeln!(f, " {} {}", "-->".bright_blue().bold(), relative_path)?; + + // Show SQL content + writeln!(f, " {}", "|".bright_blue().bold())?; + for line in sql.lines() { + writeln!(f, " {} {}", "|".bright_blue().bold(), line)?; + } + writeln!(f, " {}", "|".bright_blue().bold())?; + + Ok(()) + } + ParseError::StatementsParseFailed { message } => { + write!(f, "{}: {}", "error".bright_red().bold(), message) + } + } + } +} + +impl std::error::Error for ParseError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + ParseError::SqlParseFailed { source, .. } => Some(source), + ParseError::StatementsParseFailed { .. } => None, + } + } +} + +/// A validation error with contextual information. +/// +/// This struct wraps a `ValidationErrorKind` with context about where +/// the error occurred (file path, SQL statement). +#[derive(Debug)] +pub struct ValidationError { + /// The underlying error kind + pub kind: ValidationErrorKind, + /// Context about where the error occurred + pub context: ErrorContext, +} + +impl fmt::Display for ValidationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Extract database/schema/file for path display + let path_components: Vec<_> = self.context.file.components().collect(); + let len = path_components.len(); + + let relative_path = if len >= 3 { + format!( + "{}/{}/{}", + path_components[len - 3].as_os_str().to_string_lossy(), + path_components[len - 2].as_os_str().to_string_lossy(), + path_components[len - 1].as_os_str().to_string_lossy() + ) + } else { + self.context.file.display().to_string() + }; + + // Format like rustc: error: + writeln!( + f, + "{}: {}", + "error".bright_red().bold(), + self.kind.message() + )?; + + // Show file location: --> path + writeln!(f, " {} {}", "-->".bright_blue().bold(), relative_path)?; + + // Add SQL statement if available + if let Some(ref sql) = self.context.sql_statement { + writeln!(f, " {}", "|".bright_blue().bold())?; + for line in sql.lines() { + writeln!(f, " {} {}", "|".bright_blue().bold(), line)?; + } + writeln!(f, " {}", "|".bright_blue().bold())?; + } + + // Add help text if available + if let Some(help) = self.kind.help() { + writeln!( + f, + " {} {}", + "=".bright_blue().bold(), + format!("help: {}", help).bold() + )?; + } + + Ok(()) + } +} + +impl std::error::Error for ValidationError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + None + } +} + +impl ValidationError { + /// Create a new validation error with context + pub fn with_context(kind: ValidationErrorKind, context: ErrorContext) -> Self { + Self { kind, context } + } + + /// Create a new validation error with just a file path + pub fn with_file(kind: ValidationErrorKind, file: PathBuf) -> Self { + Self { + kind, + context: ErrorContext { + file, + sql_statement: None, + }, + } + } + + /// Create a new validation error with file and SQL statement + pub fn with_file_and_sql(kind: ValidationErrorKind, file: PathBuf, sql: String) -> Self { + Self { + kind, + context: ErrorContext { + file, + sql_statement: Some(sql), + }, + } + } +} + +/// The specific kind of validation error that occurred. +/// +/// This enum contains the actual error variants without contextual information. +/// Context (file path, SQL statement) is stored in the wrapping `ValidationError`. +#[derive(Debug)] +pub enum ValidationErrorKind { + /// A file contains multiple primary CREATE statements + MultipleMainStatements { object_name: String }, + /// A file contains no primary CREATE statement + NoMainStatement { object_name: String }, + /// Object name in statement doesn't match file name + ObjectNameMismatch { declared: String, expected: String }, + /// Schema qualifier in statement doesn't match directory + SchemaMismatch { declared: String, expected: String }, + /// Database qualifier in statement doesn't match directory + DatabaseMismatch { declared: String, expected: String }, + /// An index references a different object + IndexReferenceMismatch { + referenced: String, + expected: String, + }, + /// A grant references a different object + GrantReferenceMismatch { + referenced: String, + expected: String, + }, + /// A comment references a different object + CommentReferenceMismatch { + referenced: String, + expected: String, + }, + /// A column comment references a different table + ColumnCommentReferenceMismatch { + referenced: String, + expected: String, + }, + /// Comment object type doesn't match actual object type + CommentTypeMismatch { + comment_type: String, + object_type: String, + }, + /// Grant object type doesn't match actual object type + GrantTypeMismatch { + grant_type: String, + expected_type: String, + }, + /// Unsupported statement type in object file + UnsupportedStatement { + object_name: String, + statement_type: String, + }, + /// Unsupported grant type + ClusterGrantUnsupported, + /// Grant doesn't target specific object + GrantMustTargetObject, + /// System grant not supported + SystemGrantUnsupported, + /// Unsupported comment type + UnsupportedCommentType, + /// No object type could be determined + NoObjectType, + /// Failed to extract schema name from file path + SchemaExtractionFailed, + /// Failed to extract database name from file path + DatabaseExtractionFailed, + /// Invalid identifier name (contains invalid characters or format) + InvalidIdentifier { name: String, reason: String }, + /// Index missing required IN CLUSTER clause + IndexMissingCluster { index_name: String }, + /// Materialized view missing required IN CLUSTER clause + MaterializedViewMissingCluster { view_name: String }, + /// Sink missing required IN CLUSTER clause + SinkMissingCluster { sink_name: String }, + /// Invalid statement type in database mod file + InvalidDatabaseModStatement { + statement_type: String, + database_name: String, + }, + /// Comment in database mod file targets wrong object + DatabaseModCommentTargetMismatch { + target: String, + database_name: String, + }, + /// Grant in database mod file targets wrong object + DatabaseModGrantTargetMismatch { + target: String, + database_name: String, + }, + /// Invalid statement type in schema mod file + InvalidSchemaModStatement { + statement_type: String, + schema_name: String, + }, + /// Comment in schema mod file targets wrong object + SchemaModCommentTargetMismatch { target: String, schema_name: String }, + /// Grant in schema mod file targets wrong object + SchemaModGrantTargetMismatch { target: String, schema_name: String }, + /// ALTER DEFAULT PRIVILEGES in database mod requires IN DATABASE scope + AlterDefaultPrivilegesRequiresDatabaseScope { database_name: String }, + /// ALTER DEFAULT PRIVILEGES in schema mod requires IN SCHEMA scope + AlterDefaultPrivilegesRequiresSchemaScope { schema_name: String }, + /// ALTER DEFAULT PRIVILEGES IN DATABASE references wrong database + AlterDefaultPrivilegesDatabaseMismatch { + referenced: String, + expected: String, + }, + /// ALTER DEFAULT PRIVILEGES cannot use IN SCHEMA in database mod + AlterDefaultPrivilegesSchemaNotAllowed { database_name: String }, + /// ALTER DEFAULT PRIVILEGES cannot use IN DATABASE in schema mod + AlterDefaultPrivilegesDatabaseNotAllowed { schema_name: String }, + /// ALTER DEFAULT PRIVILEGES IN SCHEMA references wrong schema + AlterDefaultPrivilegesSchemaMismatch { + referenced: String, + expected: String, + }, + /// Storage objects (tables/sinks) and computation objects (views/MVs) cannot share a schema + StorageAndComputationObjectsInSameSchema { + schema_name: String, + storage_objects: Vec, + computation_objects: Vec, + }, +} + +impl ValidationErrorKind { + /// Get the short error message for this error kind + fn message(&self) -> String { + match self { + Self::MultipleMainStatements { object_name } => { + format!( + "multiple main CREATE statements found for object '{}'", + object_name + ) + } + Self::NoMainStatement { object_name } => { + format!( + "no main CREATE statement found for object '{}'", + object_name + ) + } + Self::ObjectNameMismatch { declared, expected } => { + format!( + "object name mismatch: declared '{}', expected '{}'", + declared, expected + ) + } + Self::SchemaMismatch { declared, expected } => { + format!( + "schema qualifier mismatch: declared '{}', expected '{}'", + declared, expected + ) + } + Self::DatabaseMismatch { declared, expected } => { + format!( + "database qualifier mismatch: declared '{}', expected '{}'", + declared, expected + ) + } + Self::IndexReferenceMismatch { + referenced, + expected, + } => { + format!( + "INDEX references wrong object: '{}' instead of '{}'", + referenced, expected + ) + } + Self::GrantReferenceMismatch { + referenced, + expected, + } => { + format!( + "GRANT references wrong object: '{}' instead of '{}'", + referenced, expected + ) + } + Self::CommentReferenceMismatch { + referenced, + expected, + } => { + format!( + "COMMENT references wrong object: '{}' instead of '{}'", + referenced, expected + ) + } + Self::ColumnCommentReferenceMismatch { + referenced, + expected, + } => { + format!( + "column COMMENT references wrong table: '{}' instead of '{}'", + referenced, expected + ) + } + Self::CommentTypeMismatch { + comment_type, + object_type, + } => { + format!( + "COMMENT uses wrong object type: {} instead of {}", + comment_type, object_type + ) + } + Self::GrantTypeMismatch { + grant_type, + expected_type, + } => { + format!( + "GRANT uses incorrect object type: GRANT ON {} instead of GRANT ON {}", + grant_type, expected_type + ) + } + Self::UnsupportedStatement { + object_name, + statement_type, + } => { + format!( + "unsupported statement type in object '{}': {}", + object_name, statement_type + ) + } + Self::ClusterGrantUnsupported => "CLUSTER grants are not supported".to_string(), + Self::GrantMustTargetObject => "GRANT must target a specific object".to_string(), + Self::SystemGrantUnsupported => "SYSTEM grants are not supported".to_string(), + Self::UnsupportedCommentType => "unsupported COMMENT object type".to_string(), + Self::NoObjectType => "could not determine object type".to_string(), + Self::SchemaExtractionFailed => { + "failed to extract schema name from file path".to_string() + } + Self::DatabaseExtractionFailed => { + "failed to extract database name from file path".to_string() + } + Self::InvalidIdentifier { name, reason } => { + format!("invalid identifier '{}': {}", name, reason) + } + Self::IndexMissingCluster { index_name } => { + format!( + "index '{}' is missing required IN CLUSTER clause", + index_name + ) + } + Self::MaterializedViewMissingCluster { view_name } => { + format!( + "materialized view '{}' is missing required IN CLUSTER clause", + view_name + ) + } + Self::SinkMissingCluster { sink_name } => { + format!("sink '{}' is missing required IN CLUSTER clause", sink_name) + } + Self::InvalidDatabaseModStatement { + statement_type, + database_name, + } => { + format!( + "invalid statement type in database mod file '{}': {}. Only COMMENT ON DATABASE, GRANT ON DATABASE, and ALTER DEFAULT PRIVILEGES are allowed", + database_name, statement_type + ) + } + Self::DatabaseModCommentTargetMismatch { + target, + database_name, + } => { + format!( + "comment in database mod file must target the database itself. Expected COMMENT ON DATABASE '{}', but found COMMENT ON {}", + database_name, target + ) + } + Self::DatabaseModGrantTargetMismatch { + target, + database_name, + } => { + format!( + "grant in database mod file must target the database itself. Expected GRANT ON DATABASE '{}', but found GRANT ON {}", + database_name, target + ) + } + Self::InvalidSchemaModStatement { + statement_type, + schema_name, + } => { + format!( + "invalid statement type in schema mod file '{}': {}. Only COMMENT ON SCHEMA, GRANT ON SCHEMA, and ALTER DEFAULT PRIVILEGES are allowed", + schema_name, statement_type + ) + } + Self::SchemaModCommentTargetMismatch { + target, + schema_name, + } => { + format!( + "comment in schema mod file must target the schema itself. Expected COMMENT ON SCHEMA '{}', but found COMMENT ON {}", + schema_name, target + ) + } + Self::SchemaModGrantTargetMismatch { + target, + schema_name, + } => { + format!( + "grant in schema mod file must target the schema itself. Expected GRANT ON SCHEMA '{}', but found GRANT ON {}", + schema_name, target + ) + } + Self::AlterDefaultPrivilegesRequiresDatabaseScope { database_name } => { + format!( + "ALTER DEFAULT PRIVILEGES in database mod file '{}' must specify IN DATABASE", + database_name + ) + } + Self::AlterDefaultPrivilegesRequiresSchemaScope { schema_name } => { + format!( + "ALTER DEFAULT PRIVILEGES in schema mod file '{}' must specify IN SCHEMA", + schema_name + ) + } + Self::AlterDefaultPrivilegesDatabaseMismatch { + referenced, + expected, + } => { + format!( + "ALTER DEFAULT PRIVILEGES IN DATABASE references wrong database: '{}' instead of '{}'", + referenced, expected + ) + } + Self::AlterDefaultPrivilegesSchemaNotAllowed { database_name } => { + format!( + "ALTER DEFAULT PRIVILEGES in database mod file '{}' cannot use IN SCHEMA", + database_name + ) + } + Self::AlterDefaultPrivilegesDatabaseNotAllowed { schema_name } => { + format!( + "ALTER DEFAULT PRIVILEGES in schema mod file '{}' cannot use IN DATABASE", + schema_name + ) + } + Self::AlterDefaultPrivilegesSchemaMismatch { + referenced, + expected, + } => { + format!( + "ALTER DEFAULT PRIVILEGES IN SCHEMA references wrong schema: '{}' instead of '{}'", + referenced, expected + ) + } + Self::StorageAndComputationObjectsInSameSchema { + schema_name, + storage_objects, + computation_objects, + } => { + format!( + "schema '{}' contains both storage objects (tables/sinks) and computation objects (views/materialized views)\n \ + Storage objects (tables/sinks): [{}]\n \ + Computation objects (views/MVs): [{}]", + schema_name, + storage_objects.join(", "), + computation_objects.join(", ") + ) + } + } + } + + /// Get the help text for this error kind + fn help(&self) -> Option { + match self { + Self::MultipleMainStatements { .. } => { + Some("each file must contain exactly one primary CREATE statement (TABLE, VIEW, SOURCE, etc.)".to_string()) + } + Self::NoMainStatement { .. } => { + Some("each file must contain exactly one primary CREATE statement (CREATE TABLE, CREATE VIEW, etc.)".to_string()) + } + Self::ObjectNameMismatch { .. } => { + Some("the object name in your CREATE statement must match the .sql file name".to_string()) + } + Self::SchemaMismatch { .. } => { + Some("the schema in your qualified object name must match the directory name".to_string()) + } + Self::DatabaseMismatch { .. } => { + Some("the database in your qualified object name must match the directory name".to_string()) + } + Self::IndexReferenceMismatch { .. } => { + Some("indexes must be defined in the same file as the object they're created on".to_string()) + } + Self::GrantReferenceMismatch { .. } => { + Some("grants must be defined in the same file as the object they apply to".to_string()) + } + Self::CommentReferenceMismatch { .. } => { + Some("comments must be defined in the same file as the object they describe".to_string()) + } + Self::ColumnCommentReferenceMismatch { .. } => { + Some("column comments must reference columns in the object defined in the file".to_string()) + } + Self::CommentTypeMismatch { .. } => { + Some("the COMMENT statement must use the correct object type (TABLE, VIEW, etc.)".to_string()) + } + Self::GrantTypeMismatch { .. } => { + Some("the GRANT statement must use the correct object type that matches the object defined in the file".to_string()) + } + Self::UnsupportedStatement { .. } => { + Some("only CREATE, INDEX, GRANT, and COMMENT statements are supported in object files".to_string()) + } + Self::ClusterGrantUnsupported => { + Some("use GRANT ON specific objects instead of CLUSTER".to_string()) + } + Self::GrantMustTargetObject => { + Some("use GRANT ON objectname instead of GRANT ON ALL TABLES or similar".to_string()) + } + Self::SystemGrantUnsupported => { + Some("use GRANT ON specific objects instead of SYSTEM".to_string()) + } + Self::UnsupportedCommentType => { + Some("only comments on tables, views, sources, sinks, connections, secrets, and columns are supported".to_string()) + } + Self::NoObjectType | Self::SchemaExtractionFailed | Self::DatabaseExtractionFailed => { + Some("this is an internal error, please report this issue".to_string()) + } + Self::InvalidIdentifier { .. } => { + Some("identifiers must follow SQL naming rules (alphanumeric and underscores, must not start with a digit)".to_string()) + } + Self::IndexMissingCluster { .. } => { + Some("add 'IN CLUSTER ' to your CREATE INDEX statement (e.g., CREATE INDEX idx ON table (col) IN CLUSTER quickstart)".to_string()) + } + Self::MaterializedViewMissingCluster { .. } => { + Some("add 'IN CLUSTER ' to your CREATE MATERIALIZED VIEW statement (e.g., CREATE MATERIALIZED VIEW mv IN CLUSTER quickstart AS SELECT ...)".to_string()) + } + Self::SinkMissingCluster { .. } => { + Some("add 'IN CLUSTER ' to your CREATE SINK statement (e.g., CREATE SINK sink IN CLUSTER quickstart FROM ...)".to_string()) + } + Self::InvalidDatabaseModStatement { .. } => { + Some("database mod files (e.g., materialize.sql) can only contain COMMENT ON DATABASE, GRANT ON DATABASE, and ALTER DEFAULT PRIVILEGES statements".to_string()) + } + Self::DatabaseModCommentTargetMismatch { .. } => { + Some("comments in database mod files must target the database itself using COMMENT ON DATABASE".to_string()) + } + Self::DatabaseModGrantTargetMismatch { .. } => { + Some("grants in database mod files must target the database itself using GRANT ON DATABASE".to_string()) + } + Self::InvalidSchemaModStatement { .. } => { + Some("schema mod files (e.g., materialize/public.sql) can only contain COMMENT ON SCHEMA, GRANT ON SCHEMA, and ALTER DEFAULT PRIVILEGES statements".to_string()) + } + Self::SchemaModCommentTargetMismatch { .. } => { + Some("comments in schema mod files must target the schema itself using COMMENT ON SCHEMA".to_string()) + } + Self::SchemaModGrantTargetMismatch { .. } => { + Some("grants in schema mod files must target the schema itself using GRANT ON SCHEMA".to_string()) + } + Self::AlterDefaultPrivilegesRequiresDatabaseScope { .. } => { + Some("add 'IN DATABASE ' to your ALTER DEFAULT PRIVILEGES statement".to_string()) + } + Self::AlterDefaultPrivilegesRequiresSchemaScope { .. } => { + Some("add 'IN SCHEMA ' to your ALTER DEFAULT PRIVILEGES statement".to_string()) + } + Self::AlterDefaultPrivilegesDatabaseMismatch { .. } => { + Some("ALTER DEFAULT PRIVILEGES in database mod files must target the database itself".to_string()) + } + Self::AlterDefaultPrivilegesSchemaNotAllowed { .. } => { + Some("use IN DATABASE instead of IN SCHEMA in database mod files".to_string()) + } + Self::AlterDefaultPrivilegesDatabaseNotAllowed { .. } => { + Some("use IN SCHEMA instead of IN DATABASE in schema mod files".to_string()) + } + Self::AlterDefaultPrivilegesSchemaMismatch { .. } => { + Some("ALTER DEFAULT PRIVILEGES in schema mod files must target the schema itself".to_string()) + } + Self::StorageAndComputationObjectsInSameSchema { .. } => { + Some("storage objects (tables, sinks) cannot share a schema with computation objects (views, materialized views) to prevent accidentally recreating tables or sinks when recreating views. Organize your schemas: use one schema for storage objects (e.g., 'tables') and another for computation objects (e.g., 'views' or 'public')".to_string()) + } + } + } +} + +/// A collection of validation errors grouped by location. +/// +/// This type holds multiple validation errors that occurred during project validation. +/// It provides formatted output that groups errors by database, schema, and file for +/// easier navigation and fixing. +#[derive(Debug)] +pub struct ValidationErrors { + pub errors: Vec, +} + +impl ValidationErrors { + /// Create a new collection from a vector of errors + pub fn new(errors: Vec) -> Self { + Self { errors } + } + + /// Check if there are any errors + pub fn is_empty(&self) -> bool { + self.errors.is_empty() + } + + /// Get the number of errors + pub fn len(&self) -> usize { + self.errors.len() + } + + /// Convert into a Result, returning Err if there are any errors + pub fn into_result(self) -> Result<(), Self> { + if self.is_empty() { Ok(()) } else { Err(self) } + } +} + +impl fmt::Display for ValidationErrors { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.errors.is_empty() { + return Ok(()); + } + + // Group errors by file path + let mut grouped: BTreeMap> = BTreeMap::new(); + for error in &self.errors { + grouped + .entry(error.context.file.clone()) + .or_default() + .push(error); + } + + // Display errors grouped by file (like rustc does) + for (file_path, errors) in grouped.iter() { + // Extract database/schema/file for path display + let path_components: Vec<_> = file_path.components().collect(); + let len = path_components.len(); + + let relative_path = if len >= 3 { + format!( + "{}/{}/{}", + path_components[len - 3].as_os_str().to_string_lossy(), + path_components[len - 2].as_os_str().to_string_lossy(), + path_components[len - 1].as_os_str().to_string_lossy() + ) + } else { + file_path.display().to_string() + }; + + // Display each error for this file + for error in errors { + // Format like rustc: error: + writeln!( + f, + "{}: {}", + "error".bright_red().bold(), + error.kind.message() + )?; + + // Show file location: --> path + writeln!(f, " {} {}", "-->".bright_blue().bold(), relative_path)?; + + // Add SQL statement if available + if let Some(ref sql) = error.context.sql_statement { + writeln!(f, " {}", "|".bright_blue().bold())?; + for line in sql.lines() { + writeln!(f, " {} {}", "|".bright_blue().bold(), line)?; + } + writeln!(f, " {}", "|".bright_blue().bold())?; + } + + // Add help text if available + if let Some(help) = error.kind.help() { + writeln!( + f, + " {} {}", + "=".bright_blue().bold(), + format!("help: {}", help).bold() + )?; + } + + writeln!(f)?; + } + } + + // Summary line at the end (like rustc) + writeln!( + f, + "{}: could not compile due to {} previous error{}", + "error".bright_red().bold(), + self.errors.len(), + if self.errors.len() == 1 { "" } else { "s" } + )?; + + Ok(()) + } +} + +impl std::error::Error for ValidationErrors { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + None + } +} diff --git a/src/mz-deploy/src/project/normalize.rs b/src/mz-deploy/src/project/normalize.rs new file mode 100644 index 0000000000000..2725e3d0a24b2 --- /dev/null +++ b/src/mz-deploy/src/project/normalize.rs @@ -0,0 +1,73 @@ +//! Name normalization for SQL statements using the visitor pattern. +//! +//! This module provides a flexible framework for transforming object names in SQL +//! statements. It uses a trait-based visitor pattern to support different normalization +//! strategies while sharing the same traversal logic. +//! +//! # Normalization Strategies +//! +//! - **Fully Qualifying**: Transforms names to `database.schema.object` format +//! - **Flattening**: Transforms names to `database_schema_object` format (single identifier) +//! +//! # Usage +//! +//! ```rust,ignore +//! use mz_deploy::project::normalize::NormalizingVisitor; +//! +//! // Create a fully qualifying visitor +//! let visitor = NormalizingVisitor::fully_qualifying(&fqn); +//! +//! // Or create a flattening visitor +//! let visitor = NormalizingVisitor::flattening(&fqn); +//! ``` +//! +//! # Module Structure +//! +//! - [`transformers`]: Name transformation strategies (FullyQualifying, Flattening, Staging) +//! - [`visitor`]: The NormalizingVisitor that traverses SQL AST and applies transformations + +mod transformers; +mod visitor; + +// Re-export all public types and functions +pub use transformers::{ + ClusterTransformer, FlatteningTransformer, FullyQualifyingTransformer, NameTransformer, + StagingTransformer, +}; +pub use visitor::NormalizingVisitor; + +use mz_sql_parser::ast::{CreateIndexStatement, Ident, Raw, RawClusterName}; + +/// Transform cluster names in index statements for staging environments. +/// +/// This is a standalone function that transforms cluster references without +/// needing a full `NormalizingVisitor`. Use this when you only need to rename +/// clusters (e.g., `quickstart` -> `quickstart_staging`) without transforming +/// object names. +/// +/// # Arguments +/// * `indexes` - Slice of index statements to transform in place +/// * `staging_suffix` - The suffix to append to cluster names (e.g., "_staging") +/// +/// # Example +/// ```rust,ignore +/// transform_cluster_names_for_staging(&mut indexes, "_staging"); +/// // Transforms: IN CLUSTER quickstart -> IN CLUSTER quickstart_staging +/// ``` +pub fn transform_cluster_names_for_staging( + indexes: &mut [CreateIndexStatement], + staging_suffix: &str, +) { + for index in indexes { + if let Some(ref mut cluster_name) = index.in_cluster { + if let RawClusterName::Unresolved(ident) = cluster_name { + let new_name = format!("{}{}", ident, staging_suffix); + *cluster_name = + RawClusterName::Unresolved(Ident::new(&new_name).expect("valid cluster name")); + } + } + } +} + +#[cfg(test)] +mod tests; diff --git a/src/mz-deploy/src/project/normalize/tests.rs b/src/mz-deploy/src/project/normalize/tests.rs new file mode 100644 index 0000000000000..35e59d949d21d --- /dev/null +++ b/src/mz-deploy/src/project/normalize/tests.rs @@ -0,0 +1,1957 @@ +//! Tests for name normalization functionality. + +use super::*; +use crate::project::parser::parse_statements; +use crate::project::typed::FullyQualifiedName; +use mz_sql_parser::ast::display::{AstDisplay, FormatMode}; +use mz_sql_parser::ast::{Ident, Statement, UnresolvedItemName}; + +/// Create a test FQN for materialize.public.test_view +fn test_fqn() -> FullyQualifiedName { + let database = Ident::new("materialize").expect("valid database"); + let schema = Ident::new("public").expect("valid schema"); + let object = Ident::new("test_view").expect("valid object"); + let item_name = UnresolvedItemName(vec![database, schema, object]); + FullyQualifiedName::from(item_name) +} + +#[test] +fn test_cte_references_not_qualified() { + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + WITH cte_table AS ( + SELECT id FROM base_table + ) + SELECT * FROM cte_table + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // CTE reference should NOT be qualified + assert!( + normalized_sql.contains("FROM cte_table"), + "CTE reference 'cte_table' should remain unqualified, got: {}", + normalized_sql + ); + assert!( + !normalized_sql.contains("materialize.public.cte_table"), + "CTE reference should not be qualified as materialize.public.cte_table, got: {}", + normalized_sql + ); + + // External table SHOULD be qualified + assert!( + normalized_sql.contains("materialize.public.base_table"), + "External table 'base_table' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_multiple_ctes() { + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + WITH first_cte AS ( + SELECT id FROM base_table + ), + second_cte AS ( + SELECT id FROM first_cte WHERE id > 0 + ), + third_cte AS ( + SELECT id FROM second_cte JOIN another_table USING (id) + ) + SELECT * FROM third_cte + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // All CTE references should remain unqualified + assert!( + normalized_sql.contains("FROM first_cte"), + "CTE 'first_cte' should remain unqualified" + ); + assert!( + normalized_sql.contains("FROM second_cte"), + "CTE 'second_cte' should remain unqualified" + ); + assert!( + normalized_sql.contains("FROM third_cte"), + "CTE 'third_cte' should remain unqualified" + ); + + // External tables SHOULD be qualified + assert!( + normalized_sql.contains("materialize.public.base_table"), + "External table 'base_table' should be qualified" + ); + assert!( + normalized_sql.contains("materialize.public.another_table"), + "External table 'another_table' should be qualified" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_nested_cte_scope() { + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + WITH outer_cte AS ( + SELECT id FROM base_table + ) + SELECT * FROM ( + WITH inner_cte AS ( + SELECT id FROM outer_cte + ) + SELECT * FROM inner_cte + ) subquery + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // Both CTE references should remain unqualified + assert!( + normalized_sql.contains("FROM outer_cte"), + "Outer CTE 'outer_cte' should remain unqualified" + ); + assert!( + normalized_sql.contains("FROM inner_cte"), + "Inner CTE 'inner_cte' should remain unqualified" + ); + + // External table SHOULD be qualified + assert!( + normalized_sql.contains("materialize.public.base_table"), + "External table 'base_table' should be qualified" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_cte_with_joins() { + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + WITH enriched_data AS ( + SELECT + t1.id, + t2.value + FROM table1 t1 + JOIN table2 t2 ON t1.id = t2.id + ) + SELECT * FROM enriched_data JOIN table3 USING (id) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // CTE reference should NOT be qualified + assert!( + normalized_sql.contains("FROM enriched_data"), + "CTE 'enriched_data' should remain unqualified" + ); + + // All external tables SHOULD be qualified + assert!( + normalized_sql.contains("materialize.public.table1"), + "External table 'table1' should be qualified" + ); + assert!( + normalized_sql.contains("materialize.public.table2"), + "External table 'table2' should be qualified" + ); + assert!( + normalized_sql.contains("materialize.public.table3"), + "External table 'table3' should be qualified" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_cte_shadowing_external_table() { + // Test that a CTE with the same name as an external table shadows it + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + WITH products AS ( + SELECT id FROM products WHERE active = true + ) + SELECT * FROM products + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // The CTE reference in the main SELECT should NOT be qualified + // (it references the CTE, not the external table) + let main_select_part = normalized_sql + .split("AS (") + .nth(1) + .expect("Should have main SELECT after CTE"); + + assert!( + main_select_part.contains("FROM products") + && !main_select_part.contains("materialize.public.products"), + "CTE reference in main query should remain unqualified (shadowing), got: {}", + normalized_sql + ); + + // Note: The external table reference INSIDE the CTE definition should be qualified + // The CTE definition contains "FROM products WHERE active = true" + // and that products reference should be qualified to materialize.public.products + // This is validated by the CTE normalization that happened during visitor.normalize_query() + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_complex_multi_cte_query() { + // Test the exact query from the user that was failing + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW inventory_item AS + WITH recent_prices AS ( + SELECT grp.product_id, AVG(price) AS avg_price + FROM (SELECT DISTINCT product_id FROM sales) grp, + LATERAL ( + SELECT product_id, price + FROM sales + WHERE sales.product_id = grp.product_id + ORDER BY sale_date DESC LIMIT 10 + ) sub + GROUP BY grp.product_id + ), + inventory_status AS ( + SELECT + i.product_id, + SUM(i.stock) AS total_stock, + RANK() OVER (ORDER BY SUM(i.stock) DESC) AS stock_rank + FROM inventory i + GROUP BY i.product_id + ), + item_enriched AS ( + SELECT + p.product_id, + p.base_price, + rp.avg_price, + inv.stock_rank + FROM products p + LEFT JOIN recent_prices rp ON p.product_id = rp.product_id + LEFT JOIN inventory_status inv ON p.product_id = inv.product_id + ) + SELECT + ie.product_id, + p.product_name, + ie.base_price + FROM item_enriched ie + JOIN products p ON ie.product_id = p.product_id + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + println!("Normalized SQL:\n{}", normalized_sql); + + // CTE references should NOT be qualified + assert!( + !normalized_sql.contains("materialize.public.inventory_status"), + "CTE 'inventory_status' should not be qualified, got: {}", + normalized_sql + ); + assert!( + !normalized_sql.contains("materialize.public.recent_prices"), + "CTE 'recent_prices' should not be qualified, got: {}", + normalized_sql + ); + assert!( + !normalized_sql.contains("materialize.public.item_enriched"), + "CTE 'item_enriched' should not be qualified, got: {}", + normalized_sql + ); + + // External tables SHOULD be qualified + assert!( + normalized_sql.contains("materialize.public.products"), + "External table 'products' should be qualified" + ); + assert!( + normalized_sql.contains("materialize.public.sales"), + "External table 'sales' should be qualified" + ); + assert!( + normalized_sql.contains("materialize.public.inventory"), + "External table 'inventory' should be qualified" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +// ============================================================================ +// Tests for implicit alias creation (fix for tables without explicit aliases) +// ============================================================================ + +#[test] +fn test_implicit_alias_unqualified_table() { + // Test that unqualified table names get implicit aliases + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT sales.product_id, sales.amount + FROM sales + WHERE sales.amount > 100 + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // Should have implicit alias AS sales + assert!( + normalized_sql.contains("materialize.public.sales AS sales"), + "Expected implicit alias 'AS sales', got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_implicit_alias_schema_qualified_table() { + // Test that schema-qualified table names get implicit aliases + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT sales.product_id + FROM public.sales + WHERE sales.status = 'active' + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // Should have implicit alias using table name (last part) + assert!( + normalized_sql.contains("AS sales"), + "Expected implicit alias 'AS sales', got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_implicit_alias_fully_qualified_table() { + // Test that fully qualified table names get implicit aliases + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT orders.customer_id + FROM materialize.public.orders + WHERE orders.total > 1000 + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // Should have implicit alias using table name + assert!( + normalized_sql.contains("AS orders"), + "Expected implicit alias 'AS orders', got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_no_implicit_alias_when_explicit_alias_exists() { + // Test that explicit aliases are preserved and no implicit alias is added + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT s.product_id + FROM sales s + WHERE s.amount > 100 + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // Should keep explicit alias 's', not add 'AS sales' + assert!( + normalized_sql.contains("AS s"), + "Expected explicit alias 'AS s' to be preserved, got: {}", + normalized_sql + ); + assert!( + !normalized_sql.contains("AS sales"), + "Should not add implicit alias when explicit alias exists, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_no_implicit_alias_for_cte() { + // Test that CTEs don't get implicit aliases + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + WITH cte1 AS ( + SELECT * FROM products + ) + SELECT cte1.product_id + FROM cte1 + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // CTE should not be transformed or get an alias + assert!( + !normalized_sql.contains("cte1 AS cte1"), + "CTE should not get implicit alias, got: {}", + normalized_sql + ); + // Products should be qualified + assert!( + normalized_sql.contains("materialize.public.products"), + "Expected products to be qualified" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_implicit_alias_in_lateral_join() { + // Test implicit aliases work correctly in LATERAL joins + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT grp.category, sub.price + FROM (SELECT DISTINCT category FROM products) grp, + LATERAL ( + SELECT price + FROM products + WHERE products.category = grp.category + LIMIT 10 + ) sub + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // Products in LATERAL should have implicit alias + assert!( + normalized_sql.contains("materialize.public.products AS products"), + "Expected implicit alias in LATERAL join, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +// ============================================================================ +// Tests for HAVING clause normalization +// ============================================================================ + +#[test] +fn test_having_clause_with_subquery() { + // Test that subqueries in HAVING clauses are normalized + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT product_id, COUNT(*) as sale_count + FROM sales + GROUP BY product_id + HAVING COUNT(*) > (SELECT AVG(cnt) FROM (SELECT COUNT(*) as cnt FROM sales GROUP BY product_id) subquery) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // All references to sales should be qualified + assert!( + normalized_sql.contains("materialize.public.sales"), + "Expected sales to be qualified in HAVING subquery, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_having_clause_with_nested_subquery() { + // Test deeply nested subqueries in HAVING + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT category_id, SUM(amount) as total + FROM sales + GROUP BY category_id + HAVING SUM(amount) > ( + SELECT AVG(total) + FROM ( + SELECT category_id, SUM(amount) as total + FROM sales + WHERE status = 'completed' + GROUP BY category_id + ) subquery + ) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // All sales references should be qualified + let sales_count = normalized_sql.matches("materialize.public.sales").count(); + assert!( + sales_count >= 2, + "Expected multiple qualified sales references in nested HAVING subquery, found {}, got: {}", + sales_count, + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_having_with_cte_reference() { + // Test HAVING clause with CTE reference (should not be qualified) + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + WITH avg_sales AS ( + SELECT AVG(amount) as avg_amount FROM sales + ) + SELECT product_id, SUM(amount) as total + FROM sales + GROUP BY product_id + HAVING SUM(amount) > (SELECT avg_amount FROM avg_sales) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // CTE reference should NOT be qualified + assert!( + !normalized_sql.contains("materialize.public.avg_sales"), + "CTE reference in HAVING should not be qualified, got: {}", + normalized_sql + ); + // Base table should be qualified + assert!( + normalized_sql.contains("materialize.public.sales"), + "Base table should be qualified" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +// ============================================================================ +// Tests for Expr::Op (operator) handling +// ============================================================================ + +#[test] +fn test_and_operator_with_subqueries() { + // Test AND operator with subqueries on both sides + // This tests that Expr::Op is being recursively normalized + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT * + FROM products + WHERE product_id IN (SELECT product_id FROM sales) + AND category_id IN (SELECT category_id FROM categories) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // Main table should be qualified + assert!( + normalized_sql.contains("materialize.public.products"), + "products should be qualified" + ); + + // Subqueries may not show full qualification in Simple format + // but the normalization should have happened (verified by other tests) + // Just verify the query can be formatted without errors + assert!(!normalized_sql.is_empty(), "Query should be normalized"); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_or_operator_with_subqueries() { + // Test OR operator with subqueries + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT * + FROM orders + WHERE status = 'pending' + OR order_id IN (SELECT order_id FROM priority_orders) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // Main table should be qualified + assert!( + normalized_sql.contains("materialize.public.orders"), + "orders should be qualified" + ); + // Verify query can be formatted (normalization succeeded) + assert!(!normalized_sql.is_empty()); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_comparison_operator_with_subquery() { + // Test comparison operators (>, <, =, etc.) with subqueries + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT * + FROM products + WHERE price > (SELECT AVG(price) FROM products WHERE active = true) + AND stock < (SELECT MAX(stock) FROM inventory) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // Main table should be qualified + assert!( + normalized_sql.contains("materialize.public.products"), + "products should be qualified" + ); + // Verify query can be formatted (normalization succeeded) + assert!(!normalized_sql.is_empty()); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_nested_operators_with_subqueries() { + // Test deeply nested operators with multiple subqueries + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT * + FROM orders o + WHERE (o.status = 'pending' AND o.amount > 100) + OR (o.priority > (SELECT AVG(priority) FROM orders) + AND o.customer_id IN (SELECT customer_id FROM vip_customers)) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // Main table should be qualified + assert!( + normalized_sql.contains("materialize.public.orders"), + "orders should be qualified" + ); + // Verify query can be formatted (normalization succeeded) + assert!(!normalized_sql.is_empty()); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_arithmetic_operators_with_subqueries() { + // Test arithmetic operators containing subqueries + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT + product_id, + price * 1.1 as marked_up_price, + price - (SELECT AVG(discount) FROM discounts) as discounted_price + FROM products + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + assert!( + normalized_sql.contains("materialize.public.products"), + "products should be qualified" + ); + assert!( + normalized_sql.contains("materialize.public.discounts"), + "discounts should be qualified in arithmetic expression subquery" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +// ============================================================================ +// Integration tests combining multiple features +// ============================================================================ + +#[test] +fn test_schema_qualified_with_having_subquery() { + // Integration test: schema-qualified tables with HAVING subquery + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT p.category_id, COUNT(*) as product_count + FROM public.products p + JOIN public.sales s ON p.product_id = s.product_id + GROUP BY p.category_id + HAVING COUNT(*) > ( + SELECT AVG(cnt) + FROM (SELECT COUNT(*) as cnt FROM public.sales GROUP BY category_id) subquery + ) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // All tables should be fully qualified and have implicit aliases + assert!( + normalized_sql.contains("materialize.public.products AS p"), + "products should be qualified with explicit alias preserved" + ); + assert!( + normalized_sql.contains("materialize.public.sales AS s"), + "sales should be qualified with explicit alias preserved" + ); + // The subquery's sales reference should also be qualified and have implicit alias + let sales_count = normalized_sql.matches("materialize.public.sales").count(); + assert!( + sales_count >= 2, + "Expected multiple sales references (main and subquery)" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_lateral_with_operators_and_implicit_alias() { + // Integration test: LATERAL join with operators and implicit aliases + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + SELECT grp.product_id, sub.avg_price + FROM (SELECT DISTINCT product_id FROM sales) grp, + LATERAL ( + SELECT AVG(price) as avg_price + FROM sales + WHERE sales.product_id = grp.product_id + AND sales.status = 'completed' + AND sales.amount > (SELECT AVG(amount) FROM sales) + ORDER BY sale_date DESC + LIMIT 10 + ) sub + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // sales should be qualified everywhere and have implicit aliases + assert!( + normalized_sql.contains("materialize.public.sales"), + "sales should be qualified" + ); + // Should have implicit alias in LATERAL subquery + assert!( + normalized_sql.contains("AS sales"), + "Expected implicit alias for sales in LATERAL" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_wmr_with_operators_and_having() { + // Integration test: WITH MUTUALLY RECURSIVE with operators and HAVING + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW v AS + WITH MUTUALLY RECURSIVE + cte1 (id int, total int) AS ( + SELECT id, SUM(amount) as total + FROM sales + WHERE id > 0 AND status = 'active' + GROUP BY id + HAVING SUM(amount) > (SELECT AVG(total) FROM cte2) + ), + cte2 (id int, total int) AS ( + SELECT id, SUM(amount) as total + FROM orders + WHERE id IN (SELECT id FROM cte1) + GROUP BY id + ) + SELECT * FROM cte1 + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Normalized SQL:\n{}", normalized_sql); + + // External tables should be qualified + assert!( + normalized_sql.contains("materialize.public.sales"), + "sales should be qualified" + ); + assert!( + normalized_sql.contains("materialize.public.orders"), + "orders should be qualified" + ); + // CTEs should NOT be qualified + assert!( + !normalized_sql.contains("materialize.public.cte1"), + "cte1 should not be qualified" + ); + assert!( + !normalized_sql.contains("materialize.public.cte2"), + "cte2 should not be qualified" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +// ============================================================================ +// Tests for FlatteningTransformer +// ============================================================================ + +#[test] +fn test_flattening_unqualified_name() { + // Test that unqualified names get flattened to "database.schema.object" + let fqn = test_fqn(); + let visitor = NormalizingVisitor::flattening(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + SELECT * FROM sales + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Flattened SQL:\n{}", normalized_sql); + + // Should be flattened to quoted identifier with dots + assert!( + normalized_sql.contains("\"materialize.public.sales\""), + "Expected flattened name '\"materialize.public.sales\"', got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_flattening_schema_qualified_name() { + // Test that schema-qualified names get flattened + let fqn = test_fqn(); + let visitor = NormalizingVisitor::flattening(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + SELECT * FROM internal.orders + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Flattened SQL:\n{}", normalized_sql); + + // Should be flattened with the schema from the reference + assert!( + normalized_sql.contains("\"materialize.internal.orders\""), + "Expected flattened name '\"materialize.internal.orders\"', got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_flattening_fully_qualified_name() { + // Test that fully qualified names get flattened + let fqn = test_fqn(); + let visitor = NormalizingVisitor::flattening(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + SELECT * FROM other_db.other_schema.products + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Flattened SQL:\n{}", normalized_sql); + + // Should preserve the original database/schema in flattened form + assert!( + normalized_sql.contains("\"other_db.other_schema.products\""), + "Expected flattened name '\"other_db.other_schema.products\"', got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_flattening_with_join() { + // Test flattening with multiple tables in a join + let fqn = test_fqn(); + let visitor = NormalizingVisitor::flattening(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + SELECT a.id, b.name + FROM table1 a + JOIN table2 b ON a.id = b.id + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Flattened SQL:\n{}", normalized_sql); + + // Both tables should be flattened + assert!( + normalized_sql.contains("\"materialize.public.table1\""), + "Expected table1 to be flattened" + ); + assert!( + normalized_sql.contains("\"materialize.public.table2\""), + "Expected table2 to be flattened" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_flattening_cte_not_flattened() { + // Test that CTEs are not flattened (they remain unqualified) + let fqn = test_fqn(); + let visitor = NormalizingVisitor::flattening(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + WITH cte AS ( + SELECT * FROM base_table + ) + SELECT * FROM cte + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Flattened SQL:\n{}", normalized_sql); + + // CTE reference should NOT be flattened + assert!( + !normalized_sql.contains("\"materialize.public.cte\""), + "CTE should not be flattened, got: {}", + normalized_sql + ); + // External table should be flattened + assert!( + normalized_sql.contains("\"materialize.public.base_table\""), + "External table should be flattened" + ); + } else { + panic!("Expected CreateView statement"); + } +} + +// ============================================================================ +// Tests for StagingTransformer +// ============================================================================ + +use crate::project::object_id::ObjectId; +use std::collections::BTreeSet; + +/// Helper to create a test FQN for staging tests +fn staging_test_fqn() -> FullyQualifiedName { + let database = Ident::new("materialize").expect("valid database"); + let schema = Ident::new("public").expect("valid schema"); + let object = Ident::new("my_view").expect("valid object"); + let item_name = UnresolvedItemName(vec![database, schema, object]); + FullyQualifiedName::from(item_name) +} + +#[test] +fn test_staging_unqualified_name() { + // Test that unqualified names get staging suffix on schema + let fqn = staging_test_fqn(); + let external_deps = BTreeSet::new(); + let visitor = NormalizingVisitor::staging(&fqn, "_deploy123".to_string(), &external_deps, None); + + let sql = r#" + CREATE VIEW my_view AS + SELECT * FROM sales + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Staging SQL:\n{}", normalized_sql); + + // Schema should have staging suffix + assert!( + normalized_sql.contains("materialize.public_deploy123.sales"), + "Expected staging suffix on schema, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_staging_schema_qualified_name() { + // Test that schema-qualified names get staging suffix + let fqn = staging_test_fqn(); + let external_deps = BTreeSet::new(); + let visitor = NormalizingVisitor::staging(&fqn, "_deploy123".to_string(), &external_deps, None); + + let sql = r#" + CREATE VIEW my_view AS + SELECT * FROM internal.orders + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Staging SQL:\n{}", normalized_sql); + + // Schema should have staging suffix + assert!( + normalized_sql.contains("materialize.internal_deploy123.orders"), + "Expected staging suffix on schema, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_staging_external_dependency_not_transformed() { + // Test that external dependencies are NOT transformed + let fqn = staging_test_fqn(); + let mut external_deps = BTreeSet::new(); + external_deps.insert(ObjectId { + database: "materialize".to_string(), + schema: "sources".to_string(), + object: "kafka_events".to_string(), + }); + + let visitor = NormalizingVisitor::staging(&fqn, "_deploy123".to_string(), &external_deps, None); + + let sql = r#" + CREATE VIEW my_view AS + SELECT * FROM sources.kafka_events + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Staging SQL:\n{}", normalized_sql); + + // External dependency should NOT have staging suffix + assert!( + !normalized_sql.contains("sources_deploy123"), + "External dependency should not be transformed, got: {}", + normalized_sql + ); + // It should remain as-is (schema-qualified) + assert!( + normalized_sql.contains("sources.kafka_events"), + "External dependency should be preserved, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_staging_mixed_internal_and_external() { + // Test query with both internal (should be transformed) and external (should not) dependencies + let fqn = staging_test_fqn(); + let mut external_deps = BTreeSet::new(); + external_deps.insert(ObjectId { + database: "materialize".to_string(), + schema: "sources".to_string(), + object: "raw_events".to_string(), + }); + + let visitor = NormalizingVisitor::staging(&fqn, "_staging".to_string(), &external_deps, None); + + let sql = r#" + CREATE VIEW my_view AS + SELECT e.*, p.name + FROM sources.raw_events e + JOIN products p ON e.product_id = p.id + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Staging SQL:\n{}", normalized_sql); + + // External dependency (raw_events) should NOT be transformed + assert!( + normalized_sql.contains("sources.raw_events"), + "External dependency should not have staging suffix, got: {}", + normalized_sql + ); + // Internal dependency (products) SHOULD be transformed + assert!( + normalized_sql.contains("public_staging.products"), + "Internal dependency should have staging suffix, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_staging_objects_to_deploy_filter() { + // Test that objects not in objects_to_deploy are treated as external + let fqn = staging_test_fqn(); + let external_deps = BTreeSet::new(); + let mut objects_to_deploy = BTreeSet::new(); + objects_to_deploy.insert(ObjectId { + database: "materialize".to_string(), + schema: "public".to_string(), + object: "sales".to_string(), + }); + // Note: "inventory" is NOT in objects_to_deploy + + let visitor = NormalizingVisitor::staging( + &fqn, + "_staging".to_string(), + &external_deps, + Some(&objects_to_deploy), + ); + + let sql = r#" + CREATE VIEW my_view AS + SELECT s.*, i.stock + FROM sales s + JOIN inventory i ON s.product_id = i.product_id + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Staging SQL:\n{}", normalized_sql); + + // sales IS in objects_to_deploy, so should be transformed + assert!( + normalized_sql.contains("public_staging.sales"), + "Object in deploy set should have staging suffix, got: {}", + normalized_sql + ); + // inventory is NOT in objects_to_deploy, so should NOT be transformed + assert!( + !normalized_sql.contains("public_staging.inventory"), + "Object not in deploy set should not have staging suffix, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_staging_cte_not_transformed() { + // Test that CTEs are not transformed (they're local to the query) + let fqn = staging_test_fqn(); + let external_deps = BTreeSet::new(); + let visitor = NormalizingVisitor::staging(&fqn, "_staging".to_string(), &external_deps, None); + + let sql = r#" + CREATE VIEW my_view AS + WITH enriched AS ( + SELECT * FROM sales + ) + SELECT * FROM enriched + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + println!("Staging SQL:\n{}", normalized_sql); + + // CTE should not have staging suffix + assert!( + !normalized_sql.contains("enriched_staging"), + "CTE should not be transformed, got: {}", + normalized_sql + ); + // External table reference SHOULD be transformed + assert!( + normalized_sql.contains("public_staging.sales"), + "Table in CTE body should be transformed, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +// ============================================================================ +// Nested CTE Tests +// ============================================================================ + +#[test] +fn test_nested_cte_in_derived_table() { + // Test CTE defined inside a derived table (subquery in FROM) + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + SELECT * FROM ( + WITH inner_cte AS ( + SELECT id, name FROM users + ) + SELECT * FROM inner_cte JOIN orders ON inner_cte.id = orders.user_id + ) subquery + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // Inner CTE should remain unqualified + assert!( + normalized_sql.contains("inner_cte AS"), + "Inner CTE definition should remain, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("FROM inner_cte"), + "Inner CTE reference should remain unqualified, got: {}", + normalized_sql + ); + + // External tables should be qualified + assert!( + normalized_sql.contains("materialize.public.users"), + "External table 'users' should be qualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("materialize.public.orders"), + "External table 'orders' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_nested_cte_in_scalar_subquery() { + // Test CTE defined inside a scalar subquery (in SELECT list) + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + SELECT + id, + (WITH totals AS (SELECT SUM(amount) as total FROM transactions WHERE transactions.user_id = users.id) + SELECT total FROM totals) as user_total + FROM users + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // Inner CTE should remain unqualified + assert!( + normalized_sql.contains("totals AS"), + "Inner CTE definition should remain, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("FROM totals"), + "Inner CTE reference should remain unqualified, got: {}", + normalized_sql + ); + + // External tables should be qualified + assert!( + normalized_sql.contains("materialize.public.transactions"), + "External table 'transactions' should be qualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("materialize.public.users"), + "External table 'users' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_nested_cte_in_where_subquery() { + // Test CTE defined inside a subquery in WHERE clause + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + SELECT * FROM products + WHERE category_id IN ( + WITH active_categories AS ( + SELECT id FROM categories WHERE status = 'active' + ) + SELECT id FROM active_categories + ) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // Inner CTE should remain unqualified + assert!( + normalized_sql.contains("active_categories AS"), + "Inner CTE definition should remain, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("FROM active_categories"), + "Inner CTE reference should remain unqualified, got: {}", + normalized_sql + ); + + // External tables should be qualified + assert!( + normalized_sql.contains("materialize.public.products"), + "External table 'products' should be qualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("materialize.public.categories"), + "External table 'categories' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_triple_nested_ctes() { + // Test three levels of nested CTEs + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + WITH outer_cte AS ( + SELECT * FROM ( + WITH middle_cte AS ( + SELECT * FROM ( + WITH inner_cte AS ( + SELECT id FROM base_table + ) + SELECT * FROM inner_cte + ) innermost + ) + SELECT * FROM middle_cte + ) middle_result + ) + SELECT * FROM outer_cte + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // All CTE references should remain unqualified + assert!( + normalized_sql.contains("FROM outer_cte"), + "outer_cte reference should remain unqualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("FROM middle_cte"), + "middle_cte reference should remain unqualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("FROM inner_cte"), + "inner_cte reference should remain unqualified, got: {}", + normalized_sql + ); + + // External table should be qualified + assert!( + normalized_sql.contains("materialize.public.base_table"), + "External table 'base_table' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_cte_name_shadowing_in_nested_scope() { + // Test that a CTE in inner scope shadows a CTE with same name in outer scope + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + WITH data AS ( + SELECT id, 'outer' as source FROM outer_table + ) + SELECT * FROM data + UNION ALL + SELECT * FROM ( + WITH data AS ( + SELECT id, 'inner' as source FROM inner_table + ) + SELECT * FROM data + ) inner_result + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // Both 'data' CTEs should exist and remain unqualified + // There should be two "FROM data" references (one outer, one inner) + let data_count = normalized_sql.matches("FROM data").count(); + assert!( + data_count >= 2, + "Expected at least 2 'FROM data' references (outer and inner scope), found {}, got: {}", + data_count, + normalized_sql + ); + + // External tables should be qualified + assert!( + normalized_sql.contains("materialize.public.outer_table"), + "External table 'outer_table' should be qualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("materialize.public.inner_table"), + "External table 'inner_table' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_nested_cte_in_lateral_join() { + // Test CTE defined inside a LATERAL join subquery + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + SELECT u.id, stats.total + FROM users u, + LATERAL ( + WITH user_orders AS ( + SELECT amount FROM orders WHERE orders.user_id = u.id + ) + SELECT SUM(amount) as total FROM user_orders + ) stats + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // Inner CTE should remain unqualified + assert!( + normalized_sql.contains("user_orders AS"), + "Inner CTE definition should remain, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("FROM user_orders"), + "Inner CTE reference should remain unqualified, got: {}", + normalized_sql + ); + + // External tables should be qualified + assert!( + normalized_sql.contains("materialize.public.users"), + "External table 'users' should be qualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("materialize.public.orders"), + "External table 'orders' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_parallel_nested_ctes_in_union() { + // Test multiple independent CTEs in different branches of a UNION + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + SELECT * FROM ( + WITH left_cte AS (SELECT id FROM left_table) + SELECT * FROM left_cte + ) left_branch + UNION ALL + SELECT * FROM ( + WITH right_cte AS (SELECT id FROM right_table) + SELECT * FROM right_cte + ) right_branch + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // Both CTEs should remain unqualified + assert!( + normalized_sql.contains("FROM left_cte"), + "left_cte reference should remain unqualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("FROM right_cte"), + "right_cte reference should remain unqualified, got: {}", + normalized_sql + ); + + // External tables should be qualified + assert!( + normalized_sql.contains("materialize.public.left_table"), + "External table 'left_table' should be qualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("materialize.public.right_table"), + "External table 'right_table' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_outer_cte_visible_in_nested_subquery() { + // Test that outer CTE is visible in nested subqueries (without redefining) + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + WITH main_data AS ( + SELECT id, value FROM source_table + ) + SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM main_data WHERE value > 10 + ) inner_sub + ) outer_sub + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // The CTE reference in deeply nested subquery should remain unqualified + assert!( + normalized_sql.contains("FROM main_data"), + "main_data reference in nested subquery should remain unqualified, got: {}", + normalized_sql + ); + + // External table should be qualified + assert!( + normalized_sql.contains("materialize.public.source_table"), + "External table 'source_table' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_nested_cte_with_join_to_outer_cte() { + // Test nested CTE that joins with outer CTE + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + WITH outer_data AS ( + SELECT id, category FROM categories + ) + SELECT * FROM ( + WITH inner_data AS ( + SELECT product_id, price FROM products + ) + SELECT i.product_id, i.price, o.category + FROM inner_data i + JOIN outer_data o ON i.product_id = o.id + ) result + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // Both CTE references should remain unqualified + assert!( + normalized_sql.contains("FROM inner_data"), + "inner_data reference should remain unqualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("outer_data o") || normalized_sql.contains("outer_data AS o"), + "outer_data reference should remain unqualified, got: {}", + normalized_sql + ); + + // External tables should be qualified + assert!( + normalized_sql.contains("materialize.public.categories"), + "External table 'categories' should be qualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("materialize.public.products"), + "External table 'products' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_nested_cte_in_exists_subquery() { + // Test CTE defined inside an EXISTS subquery + let fqn = test_fqn(); + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + + let sql = r#" + CREATE VIEW test_view AS + SELECT * FROM main_table m + WHERE EXISTS ( + WITH related AS ( + SELECT id FROM related_table WHERE status = 'active' + ) + SELECT 1 FROM related WHERE related.id = m.related_id + ) + "#; + + let statements = parse_statements(vec![sql]).unwrap(); + if let Statement::CreateView(view) = &statements[0] { + let mut query = view.definition.query.clone(); + visitor.normalize_query(&mut query); + + let normalized_sql = query.to_ast_string(FormatMode::Simple); + + // Inner CTE should remain unqualified + assert!( + normalized_sql.contains("related AS"), + "Inner CTE definition should remain, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("FROM related"), + "Inner CTE reference should remain unqualified, got: {}", + normalized_sql + ); + + // External tables should be qualified + assert!( + normalized_sql.contains("materialize.public.main_table"), + "External table 'main_table' should be qualified, got: {}", + normalized_sql + ); + assert!( + normalized_sql.contains("materialize.public.related_table"), + "External table 'related_table' should be qualified, got: {}", + normalized_sql + ); + } else { + panic!("Expected CreateView statement"); + } +} diff --git a/src/mz-deploy/src/project/normalize/transformers.rs b/src/mz-deploy/src/project/normalize/transformers.rs new file mode 100644 index 0000000000000..fc04e4c981c6f --- /dev/null +++ b/src/mz-deploy/src/project/normalize/transformers.rs @@ -0,0 +1,283 @@ +//! Name transformation strategies for SQL AST normalization. +//! +//! This module provides different strategies for transforming object names in SQL statements. +//! Each transformer implements the `NameTransformer` trait, allowing the `NormalizingVisitor` +//! to apply different transformation strategies using the same traversal logic. + +use super::super::typed::FullyQualifiedName; +use crate::project::object_id::ObjectId; +use mz_sql_parser::ast::*; + +/// Trait for transforming object names in SQL AST nodes. +/// +/// Implementations of this trait define how names should be transformed +/// (e.g., fully qualified, flattened, etc.). +pub trait NameTransformer { + /// Transform a name using the implementing strategy. + /// + /// Takes an `UnresolvedItemName` and returns a transformed version according + /// to the strategy. The input may be partially qualified (1, 2, or 3 parts). + fn transform_name(&self, name: &UnresolvedItemName) -> UnresolvedItemName; + + /// Get the database name from the transformer's FQN context. + fn database_name(&self) -> &str; +} + +/// Transforms names to be fully qualified (`database.schema.object`). +/// +/// This is the default normalization strategy that ensures all object references +/// use the 3-part qualified format. +pub struct FullyQualifyingTransformer<'a> { + pub(crate) fqn: &'a FullyQualifiedName, +} + +impl<'a> NameTransformer for FullyQualifyingTransformer<'a> { + fn transform_name(&self, name: &UnresolvedItemName) -> UnresolvedItemName { + match name.0.len() { + 1 => { + // Unqualified: object only + // Convert to database.schema.object + let object = name.0[0].clone(); + let database = Ident::new(self.fqn.database()).expect("valid database identifier"); + let schema = Ident::new(self.fqn.schema()).expect("valid schema identifier"); + UnresolvedItemName(vec![database, schema, object]) + } + 2 => { + // Schema-qualified: schema.object + // Prepend database to make database.schema.object + let schema = name.0[0].clone(); + let object = name.0[1].clone(); + let database = Ident::new(self.fqn.database()).expect("valid database identifier"); + UnresolvedItemName(vec![database, schema, object]) + } + _ => { + // Already fully qualified or invalid - return as-is + name.clone() + } + } + } + + fn database_name(&self) -> &str { + self.fqn.database() + } +} + +/// Transforms names to be flattened (`database_schema_object`). +/// +/// This strategy creates a single unqualified identifier by concatenating +/// the database, schema, and object names with underscores. Useful for +/// temporary objects that need unqualified names. +pub struct FlatteningTransformer<'a> { + pub(crate) fqn: &'a FullyQualifiedName, +} + +impl<'a> NameTransformer for FlatteningTransformer<'a> { + fn transform_name(&self, name: &UnresolvedItemName) -> UnresolvedItemName { + // First, fully qualify the name to ensure we have all parts + let fully_qualified = match name.0.len() { + 1 => { + // Unqualified: object only - use FQN context + vec![ + self.fqn.database().to_string(), + self.fqn.schema().to_string(), + name.0[0].to_string(), + ] + } + 2 => { + // Schema-qualified: schema.object - use FQN database + vec![ + self.fqn.database().to_string(), + name.0[0].to_string(), + name.0[1].to_string(), + ] + } + 3 => { + // Already fully qualified + vec![ + name.0[0].to_string(), + name.0[1].to_string(), + name.0[2].to_string(), + ] + } + _ => { + // Invalid - return as-is + return name.clone(); + } + }; + + // Flatten to single identifier: "database.schema.object" + let flattened = fully_qualified.join("."); + let flattened_ident = Ident::new(&flattened).expect("valid flattened identifier"); + UnresolvedItemName(vec![flattened_ident]) + } + + fn database_name(&self) -> &str { + self.fqn.database() + } +} + +/// Transforms names for staging environments by appending a suffix to schema names. +/// +/// This strategy is used to create isolated staging environments where all objects +/// are deployed to schema names with a suffix (e.g., `public_staging`), and all +/// clusters are renamed with the same suffix (e.g., `quickstart_staging`). +/// +/// External dependencies (objects not defined in the project) are NOT transformed. +/// Objects not being deployed in this staging run are also treated as external. +pub struct StagingTransformer<'a> { + fqn: &'a FullyQualifiedName, + staging_suffix: String, + external_dependencies: &'a std::collections::BTreeSet, + objects_to_deploy: Option<&'a std::collections::BTreeSet>, +} + +impl<'a> StagingTransformer<'a> { + /// Create a new staging transformer with the given suffix. + /// + /// # Arguments + /// * `fqn` - The fully qualified name context + /// * `staging_suffix` - The suffix to append (e.g., "_staging") + /// * `external_dependencies` - Set of external dependencies that should NOT be transformed + /// * `objects_to_deploy` - Optional set of objects being deployed; objects not in this set are treated as external + pub fn new( + fqn: &'a FullyQualifiedName, + staging_suffix: String, + external_dependencies: &'a std::collections::BTreeSet, + objects_to_deploy: Option<&'a std::collections::BTreeSet>, + ) -> Self { + Self { + fqn, + staging_suffix, + external_dependencies, + objects_to_deploy, + } + } + + /// Check if a name refers to an external dependency or an object not being deployed + pub(crate) fn is_external(&self, name: &UnresolvedItemName) -> bool { + use ObjectId; + + // Try to construct an ObjectId from the name + let object_id = match name.0.len() { + 1 => { + // Unqualified: use default database and schema + ObjectId { + database: self.fqn.database().to_string(), + schema: self.fqn.schema().to_string(), + object: name.0[0].to_string(), + } + } + 2 => { + // Schema-qualified: use default database + ObjectId { + database: self.fqn.database().to_string(), + schema: name.0[0].to_string(), + object: name.0[1].to_string(), + } + } + 3 => { + // Fully qualified + ObjectId { + database: name.0[0].to_string(), + schema: name.0[1].to_string(), + object: name.0[2].to_string(), + } + } + _ => return false, // Invalid name, not external + }; + + // Check if it's in the external dependencies + if self.external_dependencies.contains(&object_id) { + return true; + } + + // If objects_to_deploy is specified, check if this object is NOT in that set + // If not being deployed, treat as external + if let Some(objects_to_deploy) = self.objects_to_deploy + && !objects_to_deploy.contains(&object_id) + { + return true; + } + + false + } +} + +impl<'a> NameTransformer for StagingTransformer<'a> { + fn transform_name(&self, name: &UnresolvedItemName) -> UnresolvedItemName { + // Check if this is an external dependency - if so, don't transform it + if self.is_external(name) { + return name.clone(); + } + + match name.0.len() { + 1 => { + // Unqualified: object only + // Add staging suffix to schema: database.schema_staging.object + let object = name.0[0].clone(); + let database = Ident::new(self.fqn.database()).expect("valid database identifier"); + let staging_schema = format!("{}{}", self.fqn.schema(), self.staging_suffix); + let schema = Ident::new(&staging_schema).expect("valid schema identifier"); + UnresolvedItemName(vec![database, schema, object]) + } + 2 => { + // Schema-qualified: schema.object + // Add staging suffix to schema: database.schema_staging.object + let schema_name = format!("{}{}", name.0[0], self.staging_suffix); + let schema = Ident::new(&schema_name).expect("valid schema identifier"); + let object = name.0[1].clone(); + let database = Ident::new(self.fqn.database()).expect("valid database identifier"); + UnresolvedItemName(vec![database, schema, object]) + } + 3 => { + // Fully qualified: database.schema.object + // Add staging suffix to schema: database.schema_staging.object + let database = name.0[0].clone(); + let schema_name = format!("{}{}", name.0[1], self.staging_suffix); + let schema = Ident::new(&schema_name).expect("valid schema identifier"); + let object = name.0[2].clone(); + UnresolvedItemName(vec![database, schema, object]) + } + _ => { + // Invalid - return as-is + name.clone() + } + } + } + + fn database_name(&self) -> &str { + self.fqn.database() + } +} + +/// Extension trait for transformers that also transform cluster names. +/// +/// This trait allows transformers to modify cluster references in addition to +/// object names. It's used by the StagingTransformer to rename clusters for +/// staging environments. +pub trait ClusterTransformer: NameTransformer { + /// Transform a cluster name according to the strategy. + fn transform_cluster(&self, cluster_name: &Ident) -> Ident; + + /// Get the original cluster name from a transformed name. + /// + /// This is used to look up production cluster configurations when creating + /// staging clusters. + fn get_original_cluster_name(&self, staged_name: &str) -> String; +} + +impl<'a> ClusterTransformer for StagingTransformer<'a> { + fn transform_cluster(&self, cluster_name: &Ident) -> Ident { + // Transform: quickstart → quickstart_staging + let staging_name = format!("{}{}", cluster_name, self.staging_suffix); + Ident::new(&staging_name).expect("valid cluster identifier") + } + + fn get_original_cluster_name(&self, staged_name: &str) -> String { + // Reverse transform: quickstart_staging → quickstart + staged_name + .strip_suffix(&self.staging_suffix) + .unwrap_or(staged_name) + .to_string() + } +} diff --git a/src/mz-deploy/src/project/normalize/visitor.rs b/src/mz-deploy/src/project/normalize/visitor.rs new file mode 100644 index 0000000000000..40878e9e65677 --- /dev/null +++ b/src/mz-deploy/src/project/normalize/visitor.rs @@ -0,0 +1,505 @@ +//! The NormalizingVisitor for traversing SQL AST and applying name transformations. +//! +//! This module contains the `NormalizingVisitor` struct which traverses SQL statements +//! and applies name transformations using a configurable strategy (via the `NameTransformer` trait). + +use super::super::typed::FullyQualifiedName; +use super::transformers::{ + ClusterTransformer, FlatteningTransformer, FullyQualifyingTransformer, NameTransformer, + StagingTransformer, +}; +use crate::project::object_id::ObjectId; +use mz_sql_parser::ast::*; + +/// Visitor that traverses SQL AST and transforms names using a given strategy. +/// +/// This struct is generic over the `NameTransformer` trait, allowing different +/// transformation strategies to reuse the same traversal logic. +pub struct NormalizingVisitor { + transformer: T, + cte_scope: std::cell::RefCell>>, +} + +impl NormalizingVisitor { + /// Create a new visitor with the given transformer. + pub fn new(transformer: T) -> Self { + Self { + transformer, + cte_scope: std::cell::RefCell::new(Vec::new()), + } + } + + /// Check if a name is a CTE currently in scope. + fn is_cte_in_scope(&self, name: &str) -> bool { + self.cte_scope + .borrow() + .iter() + .any(|scope| scope.contains(name)) + } + + /// Push a new CTE scope onto the stack. + fn push_cte_scope(&self, cte_names: std::collections::BTreeSet) { + self.cte_scope.borrow_mut().push(cte_names); + } + + /// Pop the current CTE scope from the stack. + fn pop_cte_scope(&self) { + self.cte_scope.borrow_mut().pop(); + } + + /// Get a reference to the transformer. + pub fn transformer(&self) -> &T { + &self.transformer + } + + /// Normalize a RawItemName to be transformed according to the strategy. + /// + /// Converts partially qualified or unqualified object references using + /// the current file's FQN context. + /// + /// CTEs (Common Table Expressions) are not transformed - they remain as-is. + pub fn normalize_raw_item_name(&self, name: &mut RawItemName) { + let unresolved = name.name_mut(); + + // Check if this is a CTE reference (unqualified single identifier) + // CTEs can only be referenced by their unqualified name + if unresolved.0.len() == 1 { + let name_str = unresolved.0[0].to_string(); + if self.is_cte_in_scope(&name_str) { + // This is a CTE reference - don't transform it + crate::verbose!("Skipping transform of CTE reference: {}", name_str); + return; + } + crate::verbose!("Transforming non-CTE reference: {}", name_str); + } + + *unresolved = self.transformer.transform_name(unresolved); + } + + /// Normalize an UnresolvedItemName to be transformed according to the strategy. + /// + /// Similar to normalize_raw_item_name, but works directly with UnresolvedItemName. + pub fn normalize_unresolved_item_name(&self, name: &mut UnresolvedItemName) { + *name = self.transformer.transform_name(name); + } + + /// Normalize an UnresolvedSchemaName to be fully qualified (`database.schema`). + /// + /// Converts unqualified schema names (e.g., `public`) to fully qualified + /// names (e.g., `materialize.public`) using the current file's FQN context. + pub fn normalize_unresolved_schema_name(&self, name: &mut UnresolvedSchemaName) { + match name.0.len() { + 1 => { + // Unqualified: schema only (e.g., "public") + // Prepend database to make database.schema + let schema = name.0[0].clone(); + let database = Ident::new(self.transformer.database_name()) + .expect("valid database identifier"); + name.0 = vec![database, schema]; + } + _ => { + // Already qualified or invalid - leave as-is + } + } + } + + /// Normalize connection references in CREATE SINK statements. + /// + /// Handles both Kafka and Iceberg sink types, ensuring their connection + /// references are normalized. + pub fn normalize_sink_connection(&self, connection: &mut CreateSinkConnection) { + match connection { + CreateSinkConnection::Kafka { connection, .. } => { + self.normalize_raw_item_name(connection); + } + CreateSinkConnection::Iceberg { connection, .. } => { + self.normalize_raw_item_name(connection); + } + } + } + + /// Normalize all table references in a query (used for views and materialized views). + /// + /// Recursively traverses the query AST to find and normalize all object references + /// in FROM clauses, JOINs, subqueries, and CTEs. + pub fn normalize_query(&self, query: &mut Query) { + // Collect CTE names from this query to track them in scope + let cte_names = match &query.ctes { + CteBlock::Simple(ctes) => ctes + .iter() + .map(|cte| cte.alias.name.to_string()) + .collect::>(), + CteBlock::MutuallyRecursive(mut_rec_block) => mut_rec_block + .ctes + .iter() + .map(|cte| cte.name.to_string()) + .collect::>(), + }; + + // Push CTE names onto scope stack + self.push_cte_scope(cte_names); + + // Normalize CTEs (WITH clause) + // Note: CTE definitions themselves can reference earlier CTEs in the same WITH clause + match &mut query.ctes { + CteBlock::Simple(ctes) => { + for cte in ctes { + self.normalize_query(&mut cte.query); + } + } + CteBlock::MutuallyRecursive(mut_rec_block) => { + for cte in &mut mut_rec_block.ctes { + self.normalize_query(&mut cte.query); + } + } + } + + // Normalize main query body (can reference all CTEs from this query) + self.normalize_set_expr(&mut query.body); + + // Pop CTE scope after processing this query + self.pop_cte_scope(); + } + + /// Normalize a set expression (SELECT, UNION, INTERSECT, EXCEPT, etc.). + pub fn normalize_set_expr(&self, set_expr: &mut SetExpr) { + match set_expr { + SetExpr::Select(select) => { + self.normalize_select(select); + } + SetExpr::Query(query) => { + self.normalize_query(query); + } + SetExpr::SetOperation { left, right, .. } => { + self.normalize_set_expr(left); + self.normalize_set_expr(right); + } + SetExpr::Values(_) | SetExpr::Show(_) | SetExpr::Table(_) => { + // These don't contain table references + } + } + } + + /// Normalize a SELECT statement. + /// + /// Handles table references in FROM, JOIN, WHERE (subqueries), and SELECT items (subqueries). + pub fn normalize_select(&self, select: &mut Select) { + // Normalize FROM clause + for table_with_joins in &mut select.from { + self.normalize_table_factor(&mut table_with_joins.relation); + + // Normalize JOINs + for join in &mut table_with_joins.joins { + self.normalize_table_factor(&mut join.relation); + } + } + + // Normalize WHERE clause (may contain subqueries) + if let Some(ref mut selection) = select.selection { + self.normalize_expr(selection); + } + + // Normalize HAVING clause (may contain subqueries) + if let Some(ref mut having) = select.having { + self.normalize_expr(having); + } + + // Normalize SELECT items (may contain subqueries in expressions) + for item in &mut select.projection { + if let SelectItem::Expr { expr, .. } = item { + self.normalize_expr(expr); + } + } + } + + /// Normalize a table factor (table reference, subquery, or nested join). + /// + /// This is the key function where actual table names are normalized. + pub fn normalize_table_factor(&self, table_factor: &mut TableFactor) { + match table_factor { + TableFactor::Table { name, alias } => { + // Save the original table name (the last part) before transformation + // This will be used as an implicit alias if one doesn't exist + let original_table_name = match name.name().0.len() { + 1 => { + // Unqualified: "sales" + let name_str = name.name().0[0].to_string(); + // Don't create an alias if this is a CTE reference (it won't be transformed) + if !self.is_cte_in_scope(&name_str) { + Some(name.name().0[0].clone()) + } else { + None + } + } + 2 | 3 => { + // Schema-qualified: "schema.sales" or fully qualified: "db.schema.sales" + // Extract the table name (last part) to use as implicit alias + Some(name.name().0.last().unwrap().clone()) + } + _ => None, + }; + + // Normalize the table name (e.g., "sales" -> "materialize.public.sales") + self.normalize_raw_item_name(name); + + // If there's no explicit alias and we have an original table name, create an implicit alias + // This ensures qualified column references like "sales.column" continue to work + // after the table name is transformed to "materialize.public.sales" or "materialize_public_sales" + if alias.is_none() { + if let Some(original) = original_table_name { + *alias = Some(TableAlias { + name: original, + columns: vec![], + strict: false, + }); + } + } + } + TableFactor::Derived { subquery, .. } => { + self.normalize_query(subquery); + } + TableFactor::NestedJoin { join, .. } => { + self.normalize_table_factor(&mut join.relation); + for nested_join in &mut join.joins { + self.normalize_table_factor(&mut nested_join.relation); + } + } + TableFactor::Function { .. } | TableFactor::RowsFrom { .. } => { + // Table functions might reference tables, but the structure is complex + // For now, we don't normalize these + } + } + } + + /// Normalize expressions (handles subqueries in WHERE, CASE, etc.). + pub fn normalize_expr(&self, expr: &mut Expr) { + match expr { + Expr::Subquery(query) | Expr::Exists(query) => { + self.normalize_query(query); + } + Expr::InSubquery { expr, subquery, .. } => { + self.normalize_expr(expr); + self.normalize_query(subquery); + } + Expr::Between { + expr, low, high, .. + } => { + self.normalize_expr(expr); + self.normalize_expr(low); + self.normalize_expr(high); + } + Expr::Cast { expr, .. } => { + self.normalize_expr(expr); + } + Expr::Case { + operand, + conditions, + results, + else_result, + } => { + if let Some(operand) = operand { + self.normalize_expr(operand); + } + for cond in conditions { + self.normalize_expr(cond); + } + for result in results { + self.normalize_expr(result); + } + if let Some(else_result) = else_result { + self.normalize_expr(else_result); + } + } + Expr::Function(func) => { + if let FunctionArgs::Args { args, order_by, .. } = &mut func.args { + for arg in args { + self.normalize_expr(arg); + } + for order in order_by { + self.normalize_expr(&mut order.expr); + } + } + } + Expr::Array(exprs) | Expr::List(exprs) => { + for expr in exprs { + self.normalize_expr(expr); + } + } + Expr::Row { exprs } => { + for expr in exprs { + self.normalize_expr(expr); + } + } + Expr::Collate { expr, .. } => { + self.normalize_expr(expr); + } + Expr::IsExpr { + expr, construct, .. + } => { + self.normalize_expr(expr); + if let IsExprConstruct::DistinctFrom(other_expr) = construct { + self.normalize_expr(other_expr); + } + } + Expr::Op { expr1, expr2, .. } => { + // Recursively normalize operands of binary/unary operations (e.g., AND, OR, =, >, <, +, etc.) + // This ensures subqueries in comparisons like "COUNT(*) > (SELECT ...)" are normalized + self.normalize_expr(expr1); + if let Some(expr2) = expr2 { + self.normalize_expr(expr2); + } + } + // Note: We intentionally don't transform Expr::Identifier or Expr::QualifiedWildcard + // because qualified column references (like `alias.column`) should reference table + // aliases from the FROM clause, not fully qualified table names. The aliases themselves + // are already attached to transformed table names in the FROM clause. + // + // These don't contain subqueries or table references + _ => {} + } + } + + /// Normalize index references. + /// + /// Indexes reference the table/view they're created on, and this reference + /// needs to be normalized. + pub fn normalize_index_references(&self, indexes: &mut [CreateIndexStatement]) { + for index in indexes { + self.normalize_raw_item_name(&mut index.on_name); + } + } + + /// Normalize cluster references in indexes. + /// + /// Indexes can specify an IN CLUSTER clause, and these cluster references + /// need to be normalized for staging environments. + pub fn normalize_index_clusters(&self, indexes: &mut [CreateIndexStatement]) + where + T: ClusterTransformer, + { + for index in indexes { + self.normalize_cluster_name(&mut index.in_cluster); + } + } + + /// Normalize grant target references. + /// + /// GRANT statements reference the object they grant permissions on, and these + /// references need to be normalized. + pub fn normalize_grant_references(&self, grants: &mut [GrantPrivilegesStatement]) { + for grant in grants { + if let GrantTargetSpecification::Object { + object_spec_inner, .. + } = &mut grant.target + && let GrantTargetSpecificationInner::Objects { names } = object_spec_inner + { + for obj in names { + if let UnresolvedObjectName::Item(item_name) = obj { + self.normalize_unresolved_item_name(item_name); + } + } + } + } + } + + /// Normalize comment object references. + /// + /// COMMENT statements reference the object they comment on, and these + /// references need to be normalized. + pub fn normalize_comment_references(&self, comments: &mut [CommentStatement]) { + for comment in comments { + match &mut comment.object { + CommentObjectType::Table { name } + | CommentObjectType::View { name } + | CommentObjectType::MaterializedView { name } + | CommentObjectType::Source { name } + | CommentObjectType::Sink { name } + | CommentObjectType::Connection { name } + | CommentObjectType::Secret { name } => { + self.normalize_raw_item_name(name); + } + CommentObjectType::Column { name } => { + // For columns, normalize the table/view reference (the relation) + self.normalize_raw_item_name(&mut name.relation); + } + _ => { + // Other comment types don't need normalization + } + } + } + } + + /// Normalize a cluster name using a ClusterTransformer. + /// + /// This method transforms cluster references in statements that support + /// the `IN CLUSTER` clause. It's primarily used by the StagingTransformer + /// to rename clusters for staging environments. + /// + /// # Type Parameter + /// `T` must implement `ClusterTransformer` for this method to be callable. + pub fn normalize_cluster_name(&self, cluster: &mut Option) + where + T: ClusterTransformer, + { + if let Some(cluster_name) = cluster { + match cluster_name { + RawClusterName::Unresolved(ident) => { + let transformed = self.transformer.transform_cluster(ident); + *cluster_name = RawClusterName::Unresolved(transformed); + } + RawClusterName::Resolved(_) => { + // Already resolved, leave as-is + } + } + } + } +} + +// Convenience constructors for common use cases +impl<'a> NormalizingVisitor> { + /// Create a visitor that fully qualifies names (`database.schema.object`). + pub fn fully_qualifying(fqn: &'a FullyQualifiedName) -> Self { + Self::new(FullyQualifyingTransformer { fqn }) + } +} + +impl<'a> NormalizingVisitor> { + /// Create a visitor that flattens names (`database_schema_object`). + pub fn flattening(fqn: &'a FullyQualifiedName) -> Self { + Self::new(FlatteningTransformer { fqn }) + } +} + +impl<'a> NormalizingVisitor> { + /// Create a visitor that transforms names for staging environments. + /// + /// This visitor appends a suffix to schema and cluster names to create + /// isolated staging environments. External dependencies and objects not + /// being deployed are NOT transformed. + /// + /// # Arguments + /// * `fqn` - The fully qualified name context + /// * `suffix` - The suffix to append (e.g., "_staging") + /// * `external_dependencies` - Set of external dependencies that should NOT be transformed + /// * `objects_to_deploy` - Optional set of objects being deployed; objects not in this set are treated as external + /// + /// # Example + /// ```rust,ignore + /// let visitor = NormalizingVisitor::staging(&fqn, "_staging".to_string(), &external_deps, Some(&objects)); + /// // Transforms: public → public_staging, quickstart → quickstart_staging + /// // But leaves external dependencies and non-deployed objects unchanged + /// ``` + pub fn staging( + fqn: &'a FullyQualifiedName, + suffix: String, + external_dependencies: &'a std::collections::BTreeSet, + objects_to_deploy: Option<&'a std::collections::BTreeSet>, + ) -> Self { + Self::new(StagingTransformer::new( + fqn, + suffix, + external_dependencies, + objects_to_deploy, + )) + } +} diff --git a/src/mz-deploy/src/project/object_id.rs b/src/mz-deploy/src/project/object_id.rs new file mode 100644 index 0000000000000..f864f5197e475 --- /dev/null +++ b/src/mz-deploy/src/project/object_id.rs @@ -0,0 +1,111 @@ +use mz_sql_parser::ast::{RawItemName, UnresolvedItemName}; + +/// A fully qualified object identifier. +/// +/// Used to uniquely identify database objects across the project. +/// Format: `database.schema.object` +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ObjectId { + pub database: String, + pub schema: String, + pub object: String, +} + +impl ObjectId { + /// Create a new ObjectId with the given database, schema, and object names. + pub fn new(database: String, schema: String, object: String) -> Self { + Self { + database, + schema, + object, + } + } + + /// Get the database name. + #[inline] + pub fn database(&self) -> &str { + &self.database + } + + /// Get the schema name. + #[inline] + pub fn schema(&self) -> &str { + &self.schema + } + + /// Get the object name. + #[inline] + pub fn object(&self) -> &str { + &self.object + } + + /// Create from an UnresolvedItemName with default database and schema + pub fn from_item_name( + name: &UnresolvedItemName, + default_database: &str, + default_schema: &str, + ) -> Self { + match name.0.as_slice() { + [object] => Self::new( + default_database.to_string(), + default_schema.to_string(), + object.to_string(), + ), + [schema, object] => Self::new( + default_database.to_string(), + schema.to_string(), + object.to_string(), + ), + [database, schema, object] => { + Self::new(database.to_string(), schema.to_string(), object.to_string()) + } + _ => Self::new( + default_database.to_string(), + default_schema.to_string(), + "unknown".to_string(), + ), + } + } + + /// Create from a RawItemName with default database and schema + pub fn from_raw_item_name( + name: &RawItemName, + default_database: &str, + default_schema: &str, + ) -> Self { + // RawItemName wraps UnresolvedItemName + Self::from_item_name(name.name(), default_database, default_schema) + } + + /// Parse an ObjectId from a fully qualified name string. + /// + /// # Arguments + /// * `fqn` - Fully qualified name in the format "database.schema.object" + /// + /// # Returns + /// ObjectId if the FQN is valid (has exactly 3 dot-separated parts) + /// + /// # Errors + /// Returns error if the FQN format is invalid + #[must_use = "this returns the parsed ObjectId, which should be used"] + pub fn from_fqn(fqn: &str) -> Result { + let parts: Vec<&str> = fqn.split('.').collect(); + if parts.len() != 3 { + return Err(format!( + "invalid FQN '{}': expected format 'database.schema.object'", + fqn + )); + } + Ok(ObjectId { + database: parts[0].to_string(), + schema: parts[1].to_string(), + object: parts[2].to_string(), + }) + } +} + +impl std::fmt::Display for ObjectId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}.{}.{}", self.database, self.schema, self.object) + } +} diff --git a/src/mz-deploy/src/project/parser.rs b/src/mz-deploy/src/project/parser.rs new file mode 100644 index 0000000000000..1d3fc192a5f51 --- /dev/null +++ b/src/mz-deploy/src/project/parser.rs @@ -0,0 +1,94 @@ +use super::error::ParseError; +use mz_sql_parser::ast::{Raw, Statement}; +use std::path::PathBuf; + +/// Parses one or more SQL statements from an iterable collection of strings. +/// +/// This function is only used in tests for simple parsing without file context. +#[cfg(test)] +pub fn parse_statements(raw: I) -> Result>, ParseError> +where + I: IntoIterator, + S: AsRef, +{ + let mut statements = vec![]; + for s in raw { + let parsed_results = mz_sql_parser::parser::parse_statements_with_limit(s.as_ref()) + .map_err(|e| ParseError::StatementsParseFailed { + message: format!("Parser limit error: {}", e), + })? + .map_err(|e| ParseError::StatementsParseFailed { + message: format!("Parse error: {}", e.error), + })?; + + let mut parsed: Vec> = parsed_results + .into_iter() + .map(|result| result.ast) + .collect(); + + statements.append(&mut parsed); + } + + Ok(statements) +} + +/// Parse SQL statements and add file context to any errors. +/// +/// This function directly parses SQL and creates SqlParseFailed errors with full context +/// including file path and SQL content for better error reporting. +pub fn parse_statements_with_context( + sql: &str, + path: PathBuf, +) -> Result>, ParseError> { + let mut statements = vec![]; + + let parsed_results = mz_sql_parser::parser::parse_statements_with_limit(sql) + .map_err(|e| ParseError::StatementsParseFailed { + message: format!("Parser limit error in file {}: {}", path.display(), e), + })? + .map_err(|e| ParseError::SqlParseFailed { + path: path.clone(), + sql: sql.to_string(), + source: e, + })?; + + let mut parsed: Vec> = parsed_results + .into_iter() + .map(|result| result.ast) + .collect(); + + statements.append(&mut parsed); + + Ok(statements) +} + +#[cfg(test)] +mod test { + use crate::project::parser::parse_statements; + + #[test] + fn validate() { + let _ = parse_statements(vec!["CREATE CLUSTER c (INTROSPECTION INTERVAL = 0)"]).unwrap(); + } + + // TODO: Re-enable when mz_sql_parser supports IN CLUSTER for indexes + // #[test] + // fn test_index_in_cluster() { + // let result = parse_statements(vec!["CREATE INDEX test_idx ON test (id) IN CLUSTER quickstart"]); + // println!("Parse result for INDEX: {:?}", result); + // assert!(result.is_ok(), "Failed to parse INDEX with IN CLUSTER: {:?}", result.err()); + // } + + #[test] + fn test_mv_in_cluster() { + let result = parse_statements(vec![ + "CREATE MATERIALIZED VIEW mv IN CLUSTER quickstart AS SELECT 1", + ]); + println!("Parse result for MV: {:?}", result); + assert!( + result.is_ok(), + "Failed to parse MV with IN CLUSTER: {:?}", + result.err() + ); + } +} diff --git a/src/mz-deploy/src/project/planned.rs b/src/mz-deploy/src/project/planned.rs new file mode 100644 index 0000000000000..3475fafbf257d --- /dev/null +++ b/src/mz-deploy/src/project/planned.rs @@ -0,0 +1,50 @@ +//! Planned representation for Materialize projects. +//! +//! This module provides a dependency-aware representation of a Materialize project. +//! It builds on top of the validated typed representation and adds dependency tracking between objects, +//! enabling topological sorting for deployment order. +//! +//! # Transformation Flow +//! +//! ```text +//! raw::Project → typed::Project → planned::Project +//! ↓ ↓ +//! (validated) (with dependencies) +//! ``` +//! +//! # Dependency Extraction +//! +//! Dependencies are extracted from: +//! - View and materialized view queries (FROM clauses, JOINs, subqueries, CTEs) +//! - Tables created from sources +//! - Indexes (the table/view they're created on) +//! - Sinks (the object they read from) +//! +//! # Example +//! +//! ```text +//! CREATE TABLE users (...); +//! CREATE VIEW active_users AS SELECT * FROM users WHERE active = true; +//! CREATE INDEX idx ON active_users (id); +//! +//! Dependencies: +//! - active_users depends on: users +//! - idx depends on: active_users +//! ``` +//! +//! # Module Structure +//! +//! - [`types`]: Type definitions (DatabaseObject, ModStatement, Schema, Database, Project) +//! - [`project`]: Project implementation methods (topological_sort, iter_objects, etc.) +//! - [`dependency`]: Dependency extraction from typed representation + +mod dependency; +mod project; +mod types; + +// Re-export all public types and functions +pub use dependency::{extract_dependencies, extract_external_indexes}; +pub use types::{Database, DatabaseObject, ModStatement, Project, Schema, SchemaType}; + +#[cfg(test)] +mod tests; diff --git a/src/mz-deploy/src/project/planned/dependency.rs b/src/mz-deploy/src/project/planned/dependency.rs new file mode 100644 index 0000000000000..53ee74514c69e --- /dev/null +++ b/src/mz-deploy/src/project/planned/dependency.rs @@ -0,0 +1,639 @@ +//! Dependency extraction and project conversion from typed representation. + +use super::super::ast::{Cluster, Statement}; +use super::super::typed; +use super::types::{Database, DatabaseObject, Project, Schema, SchemaType}; +use crate::project::object_id::ObjectId; +use mz_sql_parser::ast::*; +use std::collections::{BTreeMap, BTreeSet}; + +/// Determine the schema type based on the objects it contains. +/// +/// Returns: +/// - `SchemaType::Storage` if the schema contains tables, sinks, or tables from sources +/// - `SchemaType::Compute` if the schema contains views or materialized views +/// - `SchemaType::Empty` if the schema contains no objects +/// +/// Note: Due to validation in the typed phase, schemas cannot contain both storage +/// and compute objects, so we only need to check the first object. +fn determine_schema_type(objects: &[DatabaseObject]) -> SchemaType { + if objects.is_empty() { + return SchemaType::Empty; + } + + // Check the first object to determine schema type + // Validation ensures all objects in a schema are the same type + match &objects[0].typed_object.stmt { + Statement::CreateTable(_) + | Statement::CreateTableFromSource(_) + | Statement::CreateSink(_) => SchemaType::Storage, + Statement::CreateView(_) | Statement::CreateMaterializedView(_) => SchemaType::Compute, + } +} + +impl From for Project { + fn from(typed_project: typed::Project) -> Self { + let mut dependency_graph = BTreeMap::new(); + let mut databases = Vec::new(); + let mut defined_objects = BTreeSet::new(); + let mut cluster_dependencies = BTreeSet::new(); + let mut tests = Vec::new(); + + // First pass: collect all objects defined in the project + for typed_db in &typed_project.databases { + for typed_schema in &typed_db.schemas { + for typed_obj in &typed_schema.objects { + let object_id = ObjectId::new( + typed_db.name.clone(), + typed_schema.name.clone(), + typed_obj.stmt.ident().object.clone(), + ); + defined_objects.insert(object_id); + } + } + } + + // Second pass: build dependency graph and track external dependencies and clusters + let mut external_dependencies = BTreeSet::new(); + + for typed_db in typed_project.databases { + let mut schemas = Vec::new(); + + for typed_schema in typed_db.schemas { + let mut objects = Vec::new(); + + for typed_obj in typed_schema.objects { + let object_id = ObjectId::new( + typed_db.name.clone(), + typed_schema.name.clone(), + typed_obj.stmt.ident().object.clone(), + ); + + // Extract dependencies from the statement + let (dependencies, clusters) = + extract_dependencies(&typed_obj.stmt, &typed_db.name, &typed_schema.name); + + // Track cluster dependencies + for cluster in clusters { + cluster_dependencies.insert(cluster); + } + + // Check for external dependencies + for dep in &dependencies { + if !defined_objects.contains(dep) { + external_dependencies.insert(dep.clone()); + } + } + + dependency_graph.insert(object_id.clone(), dependencies.clone()); + + // Collect tests for this object + for test_stmt in &typed_obj.tests { + let unit_test = + crate::unit_test::UnitTest::from_execute_statement(test_stmt); + tests.push((object_id.clone(), unit_test)); + } + + objects.push(DatabaseObject { + id: object_id, + typed_object: typed_obj, + dependencies, + }); + } + + // Determine schema type based on objects + let schema_type = determine_schema_type(&objects); + + schemas.push(Schema { + name: typed_schema.name, + objects, + mod_statements: typed_schema.mod_statements, + schema_type, + }); + } + + databases.push(Database { + name: typed_db.name, + schemas, + mod_statements: typed_db.mod_statements, + }); + } + + Project { + databases, + dependency_graph, + external_dependencies, + cluster_dependencies, + tests, + } + } +} + +/// Find all external indexes on an object. That is, +/// any index that lives on a different cluster than the one where +/// the main object is installed. +pub fn extract_external_indexes( + object: &DatabaseObject, +) -> Vec<(Cluster, CreateIndexStatement)> { + match &object.typed_object.stmt { + Statement::CreateMaterializedView(materialized_view) => { + let mv_cluster = + Cluster::new(materialized_view.in_cluster.clone().unwrap().to_string()); + + object + .typed_object + .indexes + .iter() + .filter_map(|index| { + let index_cluster = Cluster::new(index.in_cluster.clone().unwrap().to_string()); + + (mv_cluster != index_cluster).then(|| (index_cluster, index.clone())) + }) + .collect() + } + _ => object + .typed_object + .indexes + .iter() + .map(|index| { + let cluster = Cluster::new(index.in_cluster.clone().unwrap().to_string()); + (cluster, index.clone()) + }) + .collect(), + } +} + +/// Extract all dependencies from a statement. +/// +/// Returns a tuple of (object_dependencies, cluster_dependencies). +/// +/// This function is public to allow the changeset module to analyze +/// cluster dependencies for incremental deployment. +pub fn extract_dependencies( + stmt: &Statement, + default_database: &str, + default_schema: &str, +) -> (BTreeSet, BTreeSet) { + let mut deps = BTreeSet::new(); + let mut clusters = BTreeSet::new(); + + match stmt { + Statement::CreateView(s) => { + extract_query_dependencies( + &s.definition.query, + default_database, + default_schema, + &mut deps, + ); + } + Statement::CreateMaterializedView(s) => { + extract_query_dependencies(&s.query, default_database, default_schema, &mut deps); + + // Extract cluster dependency from IN CLUSTER clause + if let Some(ref cluster_name) = s.in_cluster { + clusters.insert(Cluster::new(cluster_name.to_string())); + } + } + Statement::CreateTableFromSource(s) => { + // Table depends on the source it's created from + let source_id = + ObjectId::from_raw_item_name(&s.source, default_database, default_schema); + deps.insert(source_id); + } + Statement::CreateSink(s) => { + // Sink depends on the shard it reads from + let from_id = ObjectId::from_raw_item_name(&s.from, default_database, default_schema); + deps.insert(from_id); + + if let Some(ref cluster_name) = s.in_cluster { + clusters.insert(Cluster::new(cluster_name.to_string())); + } + } + // These don't have dependencies on other database objects + Statement::CreateTable(_) => {} + } + + (deps, clusters) +} + +/// Extract dependencies from a query (used by views and materialized views). +fn extract_query_dependencies( + query: &Query, + default_database: &str, + default_schema: &str, + deps: &mut BTreeSet, +) { + extract_query_dependencies_with_ctes( + query, + default_database, + default_schema, + deps, + &BTreeSet::new(), + ); +} + +/// Extract dependencies from a query, with parent CTE scope. +fn extract_query_dependencies_with_ctes( + query: &Query, + default_database: &str, + default_schema: &str, + deps: &mut BTreeSet, + parent_cte_names: &BTreeSet, +) { + // Collect CTE names from this query level + let local_cte_names = match &query.ctes { + CteBlock::Simple(ctes) => ctes + .iter() + .map(|cte| cte.alias.name.to_string()) + .collect::>(), + CteBlock::MutuallyRecursive(mut_rec_block) => mut_rec_block + .ctes + .iter() + .map(|cte| cte.name.to_string()) + .collect::>(), + }; + + // Merge parent and local CTE names for the combined scope + let mut combined_cte_names = parent_cte_names.clone(); + combined_cte_names.extend(local_cte_names.iter().cloned()); + + // Extract from CTEs (WITH clause) + match &query.ctes { + CteBlock::Simple(ctes) => { + // For Simple CTEs, build scope incrementally: each CTE can only see + // parent CTEs and earlier CTEs at this level (not itself or later CTEs) + let mut incremental_cte_names = parent_cte_names.clone(); + for cte in ctes { + extract_query_dependencies_with_ctes( + &cte.query, + default_database, + default_schema, + deps, + &incremental_cte_names, + ); + // Add this CTE to the scope for the next CTE + incremental_cte_names.insert(cte.alias.name.to_string()); + } + } + CteBlock::MutuallyRecursive(mut_rec_block) => { + // For MutuallyRecursive CTEs, all CTEs can reference each other in any order + // Pass the combined scope (parent + all CTEs at this level) to each CTE + for cte in &mut_rec_block.ctes { + extract_query_dependencies_with_ctes( + &cte.query, + default_database, + default_schema, + deps, + &combined_cte_names, + ); + } + } + } + + // Extract from the main query body, passing combined CTE names to exclude + extract_set_expr_dependencies_with_ctes( + &query.body, + default_database, + default_schema, + deps, + &combined_cte_names, + ); +} + +/// Extract dependencies from a set expression, excluding CTE names. +fn extract_set_expr_dependencies_with_ctes( + set_expr: &SetExpr, + default_database: &str, + default_schema: &str, + deps: &mut BTreeSet, + cte_names: &BTreeSet, +) { + match set_expr { + SetExpr::Select(select) => { + extract_select_dependencies_with_ctes( + select, + default_database, + default_schema, + deps, + cte_names, + ); + } + SetExpr::Query(query) => { + extract_query_dependencies_with_ctes( + query, + default_database, + default_schema, + deps, + cte_names, + ); + } + SetExpr::SetOperation { left, right, .. } => { + extract_set_expr_dependencies_with_ctes( + left, + default_database, + default_schema, + deps, + cte_names, + ); + extract_set_expr_dependencies_with_ctes( + right, + default_database, + default_schema, + deps, + cte_names, + ); + } + SetExpr::Values(_) | SetExpr::Show(_) | SetExpr::Table(_) => { + // These don't reference other tables + } + } +} + +/// Extract dependencies from a SELECT statement, excluding CTE names. +fn extract_select_dependencies_with_ctes( + select: &Select, + default_database: &str, + default_schema: &str, + deps: &mut BTreeSet, + cte_names: &BTreeSet, +) { + // Extract from FROM clause + for table_with_joins in &select.from { + extract_table_factor_dependencies_with_ctes( + &table_with_joins.relation, + default_database, + default_schema, + deps, + cte_names, + ); + + // Extract from JOINs + for join in &table_with_joins.joins { + extract_table_factor_dependencies_with_ctes( + &join.relation, + default_database, + default_schema, + deps, + cte_names, + ); + } + } + + // Extract from WHERE clause (subqueries) + if let Some(ref selection) = select.selection { + extract_expr_dependencies_with_ctes( + selection, + default_database, + default_schema, + deps, + cte_names, + ); + } + + // Extract from SELECT items (subqueries, function calls) + for item in &select.projection { + if let SelectItem::Expr { expr, .. } = item { + extract_expr_dependencies_with_ctes( + expr, + default_database, + default_schema, + deps, + cte_names, + ); + } + } +} + +/// Extract dependencies from a table factor, excluding CTE names. +fn extract_table_factor_dependencies_with_ctes( + table_factor: &TableFactor, + default_database: &str, + default_schema: &str, + deps: &mut BTreeSet, + cte_names: &BTreeSet, +) { + match table_factor { + TableFactor::Table { name, .. } => { + // name is &RawItemName + // Check if this is a CTE reference (unqualified single identifier) + let unresolved_name = name.name(); + if unresolved_name.0.len() == 1 { + let table_name = unresolved_name.0[0].to_string(); + if cte_names.contains(&table_name) { + // This is a CTE reference - don't add it as a dependency + return; + } + } + + let obj_id = ObjectId::from_raw_item_name(name, default_database, default_schema); + deps.insert(obj_id); + } + TableFactor::Derived { subquery, .. } => { + extract_query_dependencies_with_ctes( + subquery, + default_database, + default_schema, + deps, + cte_names, + ); + } + TableFactor::Function { .. } => { + // Table functions might reference tables, but this is complex to extract + // For now, we don't track these dependencies + } + TableFactor::RowsFrom { .. } => { + // ROWS FROM might reference tables, but this is complex to extract + // For now, we don't track these dependencies + } + TableFactor::NestedJoin { join, .. } => { + extract_table_factor_dependencies_with_ctes( + &join.relation, + default_database, + default_schema, + deps, + cte_names, + ); + for nested_join in &join.joins { + extract_table_factor_dependencies_with_ctes( + &nested_join.relation, + default_database, + default_schema, + deps, + cte_names, + ); + } + } + } +} + +/// Extract dependencies from an expression, excluding CTE names. +fn extract_expr_dependencies_with_ctes( + expr: &Expr, + default_database: &str, + default_schema: &str, + deps: &mut BTreeSet, + cte_names: &BTreeSet, +) { + match expr { + Expr::Subquery(query) | Expr::Exists(query) => { + extract_query_dependencies_with_ctes( + query, + default_database, + default_schema, + deps, + cte_names, + ); + } + Expr::InSubquery { expr, subquery, .. } => { + extract_expr_dependencies_with_ctes( + expr, + default_database, + default_schema, + deps, + cte_names, + ); + extract_query_dependencies_with_ctes( + subquery, + default_database, + default_schema, + deps, + cte_names, + ); + } + Expr::Between { + expr, low, high, .. + } => { + extract_expr_dependencies_with_ctes( + expr, + default_database, + default_schema, + deps, + cte_names, + ); + extract_expr_dependencies_with_ctes( + low, + default_database, + default_schema, + deps, + cte_names, + ); + extract_expr_dependencies_with_ctes( + high, + default_database, + default_schema, + deps, + cte_names, + ); + } + Expr::Op { expr1, expr2, .. } => { + // Extract from operands of binary/unary operations (e.g., AND, OR, =, +, etc.) + extract_expr_dependencies_with_ctes( + expr1, + default_database, + default_schema, + deps, + cte_names, + ); + if let Some(expr2) = expr2 { + extract_expr_dependencies_with_ctes( + expr2, + default_database, + default_schema, + deps, + cte_names, + ); + } + } + Expr::Cast { expr, .. } => { + extract_expr_dependencies_with_ctes( + expr, + default_database, + default_schema, + deps, + cte_names, + ); + } + Expr::Case { + operand, + conditions, + results, + else_result, + } => { + if let Some(operand) = operand { + extract_expr_dependencies_with_ctes( + operand, + default_database, + default_schema, + deps, + cte_names, + ); + } + for cond in conditions { + extract_expr_dependencies_with_ctes( + cond, + default_database, + default_schema, + deps, + cte_names, + ); + } + for result in results { + extract_expr_dependencies_with_ctes( + result, + default_database, + default_schema, + deps, + cte_names, + ); + } + if let Some(else_result) = else_result { + extract_expr_dependencies_with_ctes( + else_result, + default_database, + default_schema, + deps, + cte_names, + ); + } + } + Expr::Function(func) => { + // Extract from function arguments + match &func.args { + FunctionArgs::Star => {} + FunctionArgs::Args { args, order_by } => { + for arg in args { + extract_expr_dependencies_with_ctes( + arg, + default_database, + default_schema, + deps, + cte_names, + ); + } + for order in order_by { + extract_expr_dependencies_with_ctes( + &order.expr, + default_database, + default_schema, + deps, + cte_names, + ); + } + } + } + } + Expr::Array(exprs) | Expr::List(exprs) | Expr::Row { exprs } => { + for expr in exprs { + extract_expr_dependencies_with_ctes( + expr, + default_database, + default_schema, + deps, + cte_names, + ); + } + } + // Other expression types don't contain subqueries + _ => {} + } +} diff --git a/src/mz-deploy/src/project/planned/project.rs b/src/mz-deploy/src/project/planned/project.rs new file mode 100644 index 0000000000000..32685c920aebc --- /dev/null +++ b/src/mz-deploy/src/project/planned/project.rs @@ -0,0 +1,376 @@ +//! Project implementation methods for querying and traversal. + +use super::super::ast::Statement; +use super::super::error::DependencyError; +use super::super::typed; +use super::types::{DatabaseObject, ModStatement, Project}; +use crate::project::object_id::ObjectId; +use std::collections::{BTreeMap, BTreeSet}; + +impl Project { + /// Get topologically sorted objects for deployment. + /// + /// Returns objects in an order where dependencies come before dependents. + /// External dependencies are excluded from the sort as they are not deployable. + pub fn topological_sort(&self) -> Result, DependencyError> { + let mut sorted = Vec::new(); + let mut visited = BTreeSet::new(); + let mut in_progress = BTreeSet::new(); + + for object_id in self.dependency_graph.keys() { + // Skip external dependencies - we don't deploy them + if self.external_dependencies.contains(object_id) { + continue; + } + + if !visited.contains(object_id) { + self.visit(object_id, &mut visited, &mut in_progress, &mut sorted)?; + } + } + + Ok(sorted) + } + + pub fn get_tables(&self) -> impl Iterator { + self.databases + .iter() + .flat_map(|db| db.schemas.iter()) + .flat_map(|schema| schema.objects.iter()) + .filter(|object| { + matches!( + object.typed_object.stmt, + Statement::CreateTable(_) | Statement::CreateTableFromSource(_) + ) + }) + .map(|object| object.id.clone()) + } + + fn visit( + &self, + object_id: &ObjectId, + visited: &mut BTreeSet, + in_progress: &mut BTreeSet, + sorted: &mut Vec, + ) -> Result<(), DependencyError> { + if self.external_dependencies.contains(object_id) { + return Ok(()); + } + + if in_progress.contains(object_id) { + return Err(DependencyError::CircularDependency { + object: object_id.clone(), + }); + } + + if visited.contains(object_id) { + return Ok(()); + } + + in_progress.insert(object_id.clone()); + + if let Some(deps) = self.dependency_graph.get(object_id) { + for dep in deps { + self.visit(dep, visited, in_progress, sorted)?; + } + } + + in_progress.remove(object_id); + visited.insert(object_id.clone()); + sorted.push(object_id.clone()); + + Ok(()) + } + + /// Get all database objects in topological order with their typed statements. + /// + /// Returns a vector of (ObjectId, typed DatabaseObject) tuples in deployment order. + /// This allows access to the fully qualified SQL statements for each object. + pub fn get_sorted_objects( + &self, + ) -> Result, DependencyError> { + let sorted_ids = self.topological_sort()?; + let mut result = Vec::new(); + + for object_id in sorted_ids { + // Find the corresponding typed object + if let Some(typed_obj) = self.find_typed_object(&object_id) { + result.push((object_id, typed_obj)); + } + } + + Ok(result) + } + + /// Find the typed object for a given ObjectId. + fn find_typed_object(&self, object_id: &ObjectId) -> Option<&typed::DatabaseObject> { + for database in &self.databases { + if database.name != object_id.database { + continue; + } + for schema in &database.schemas { + if schema.name != object_id.schema { + continue; + } + for obj in &schema.objects { + if obj.id == *object_id { + return Some(&obj.typed_object); + } + } + } + } + None + } + + /// Returns all module-level statements in execution order. + /// + /// Module statements are executed before object statements and come from + /// database.sql or schema.sql files. They're used for setup like grants, + /// comments, and other database/schema-level configuration. + /// + /// # Execution Order + /// + /// 1. All database-level mod statements (in the order databases appear) + /// 2. All schema-level mod statements (in the order schemas appear) + /// + /// This ensures that database setup happens before schema setup, which + /// happens before object creation. + /// + /// # Returns + /// + /// A vector of `ModStatement` enums, each containing: + /// - Context (database name, schema name for schema-level statements) + /// - Reference to the statement to execute + pub fn iter_mod_statements(&self) -> Vec> { + let mut result = Vec::new(); + + // First: all database-level mod statements + for database in &self.databases { + if let Some(stmts) = &database.mod_statements { + for stmt in stmts { + result.push(ModStatement::Database { + database: &database.name, + statement: stmt, + }); + } + } + } + + // Second: all schema-level mod statements + for database in &self.databases { + for schema in &database.schemas { + if let Some(stmts) = &schema.mod_statements { + for stmt in stmts { + result.push(ModStatement::Schema { + database: &database.name, + schema: &schema.name, + statement: stmt, + }); + } + } + } + } + + result + } + + /// Build a reverse dependency graph. + /// + /// Maps each object to the set of objects that depend on it. + /// Used for incremental deployment to find downstream dependencies. + /// + /// # Returns + /// HashMap where key is an ObjectId and value is the set of objects that depend on it + pub fn build_reverse_dependency_graph(&self) -> BTreeMap> { + let mut reverse: BTreeMap> = BTreeMap::new(); + + for (obj_id, deps) in &self.dependency_graph { + for dep in deps { + reverse + .entry(dep.clone()) + .or_default() + .insert(obj_id.clone()); + } + } + + reverse + } + + /// Get topologically sorted objects filtered by a set of object IDs. + /// + /// Returns objects in deployment order, but only those in the filter set. + /// Maintains topological ordering within the filtered subset. + /// + /// # Arguments + /// * `filter` - Set of ObjectIds to include in the result + /// + /// # Returns + /// Vector of (ObjectId, typed DatabaseObject) tuples in deployment order + pub fn get_sorted_objects_filtered( + &self, + filter: &BTreeSet, + ) -> Result, DependencyError> { + let sorted_ids = self.topological_sort()?; + + // Filter to only include objects in the filter set + let filtered_ids: Vec = sorted_ids + .into_iter() + .filter(|id| filter.contains(id)) + .collect(); + + let mut result = Vec::new(); + for object_id in filtered_ids { + if let Some(typed_obj) = self.find_typed_object(&object_id) { + result.push((object_id, typed_obj)); + } + } + + Ok(result) + } + + /// Iterate over all database objects in the project. + /// + /// This flattens the database → schema → object hierarchy into a single iterator. + /// + /// # Returns + /// Iterator over references to all DatabaseObject instances in the project + /// + /// # Example + /// ```ignore + /// for obj in project.iter_objects() { + /// println!("Object: {}", obj.id); + /// } + /// ``` + pub fn iter_objects(&self) -> impl Iterator { + self.databases + .iter() + .flat_map(|db| db.schemas.iter()) + .flat_map(|schema| schema.objects.iter()) + } + + /// Find a database object by its ObjectId. + /// + /// This is more efficient than manually iterating through the hierarchy. + /// + /// # Arguments + /// * `id` - The ObjectId to search for + /// + /// # Returns + /// Some(&DatabaseObject) if found, None otherwise + /// + /// # Example + /// ```ignore + /// if let Some(obj) = project.find_object(&object_id) { + /// println!("Found: {}", obj.id); + /// } + /// ``` + pub fn find_object(&self, id: &ObjectId) -> Option<&DatabaseObject> { + self.iter_objects().find(|obj| &obj.id == id) + } + + /// Validate that sources and sinks don't share clusters with indexes or materialized views. + /// + /// This validation prevents accidentally recreating sources/sinks when updating compute objects. + /// + /// # Arguments + /// * `sources_by_cluster` - Map of cluster name to list of source FQNs from the database + /// + /// # Returns + /// * `Ok(())` if no conflicts found + /// * `Err((cluster_name, compute_objects, storage_objects))` if conflicts detected + /// + /// # Example + /// ```ignore + /// let sources = query_sources_by_cluster(&client).await?; + /// project.validate_cluster_isolation(&sources)?; + /// ``` + pub fn validate_cluster_isolation( + &self, + sources_by_cluster: &BTreeMap>, + ) -> Result<(), (String, Vec, Vec)> { + // Build a map of cluster -> compute objects (indexes, MVs) + let mut cluster_compute_objects: BTreeMap> = BTreeMap::new(); + + for db in &self.databases { + for schema in &db.schemas { + for obj in &schema.objects { + // Check for materialized views + if let Statement::CreateMaterializedView(mv) = &obj.typed_object.stmt { + if let Some(cluster_name) = &mv.in_cluster { + cluster_compute_objects + .entry(cluster_name.to_string()) + .or_default() + .push(obj.id.to_string()); + } + } + + // Check for indexes + for index in &obj.typed_object.indexes { + if let Some(cluster_name) = &index.in_cluster { + let index_name = index + .name + .as_ref() + .map(|n| format!(" (index: {})", n)) + .unwrap_or_default(); + cluster_compute_objects + .entry(cluster_name.to_string()) + .or_default() + .push(format!("{}{}", obj.id, index_name)); + } + } + } + } + } + + // Build a map of cluster -> sinks + let mut cluster_sinks: BTreeMap> = BTreeMap::new(); + + for db in &self.databases { + for schema in &db.schemas { + for obj in &schema.objects { + if let Statement::CreateSink(sink) = &obj.typed_object.stmt { + if let Some(cluster_name) = &sink.in_cluster { + cluster_sinks + .entry(cluster_name.to_string()) + .or_default() + .push(obj.id.to_string()); + } + } + } + } + } + + // Get all clusters that have compute objects or sinks + let mut all_clusters: BTreeSet = BTreeSet::new(); + all_clusters.extend(cluster_compute_objects.keys().cloned()); + all_clusters.extend(cluster_sinks.keys().cloned()); + + // Check for conflicts: cluster has both compute objects AND (sources OR sinks) + for cluster_name in all_clusters { + let compute_objects = cluster_compute_objects.get(&cluster_name); + let sources = sources_by_cluster.get(&cluster_name); + let sinks = cluster_sinks.get(&cluster_name); + + let has_compute = compute_objects.is_some() && !compute_objects.unwrap().is_empty(); + let has_sources = sources.is_some() && !sources.unwrap().is_empty(); + let has_sinks = sinks.is_some() && !sinks.unwrap().is_empty(); + + if has_compute && (has_sources || has_sinks) { + let mut storage_objects = Vec::new(); + if let Some(sources) = sources { + storage_objects.extend(sources.iter().cloned()); + } + if let Some(sinks) = sinks { + storage_objects.extend(sinks.iter().cloned()); + } + + return Err(( + cluster_name, + compute_objects.unwrap().clone(), + storage_objects, + )); + } + } + + Ok(()) + } +} diff --git a/src/mz-deploy/src/project/planned/tests.rs b/src/mz-deploy/src/project/planned/tests.rs new file mode 100644 index 0000000000000..f1cb359496dcf --- /dev/null +++ b/src/mz-deploy/src/project/planned/tests.rs @@ -0,0 +1,1978 @@ +//! Tests for the planned representation module. + +use super::super::ast::{Cluster, Statement}; +use super::super::typed; +use super::dependency::extract_dependencies; +use super::types::{Database, DatabaseObject, Project, Schema, SchemaType}; +use crate::project::object_id::ObjectId; +use mz_sql_parser::ast::Ident; +use std::collections::{BTreeMap, BTreeSet}; + +#[test] +fn test_object_id_from_item_name() { + use mz_sql_parser::ast::UnresolvedItemName; + + let name = UnresolvedItemName(vec![Ident::new("users").unwrap()]); + let id = ObjectId::from_item_name(&name, "db", "public"); + assert_eq!(id.database, "db"); + assert_eq!(id.schema, "public"); + assert_eq!(id.object, "users"); + + let name = UnresolvedItemName(vec![ + Ident::new("myschema").unwrap(), + Ident::new("users").unwrap(), + ]); + let id = ObjectId::from_item_name(&name, "db", "public"); + assert_eq!(id.database, "db"); + assert_eq!(id.schema, "myschema"); + assert_eq!(id.object, "users"); + + let name = UnresolvedItemName(vec![ + Ident::new("mydb").unwrap(), + Ident::new("myschema").unwrap(), + Ident::new("users").unwrap(), + ]); + let id = ObjectId::from_item_name(&name, "db", "public"); + assert_eq!(id.database, "mydb"); + assert_eq!(id.schema, "myschema"); + assert_eq!(id.object, "users"); +} + +#[test] +fn test_object_id_fqn() { + let id = ObjectId::new("db".to_string(), "schema".to_string(), "table".to_string()); + assert_eq!(id.to_string(), "db.schema.table"); +} + +#[test] +fn test_cluster_equality() { + let c1 = Cluster::new("quickstart".to_string()); + let c2 = Cluster::new("quickstart".to_string()); + let c3 = Cluster::new("prod".to_string()); + + assert_eq!(c1, c2); + assert_ne!(c1, c3); +} + +#[test] +fn test_cluster_in_hashset() { + let mut clusters = BTreeSet::new(); + clusters.insert(Cluster::new("quickstart".to_string())); + clusters.insert(Cluster::new("quickstart".to_string())); // duplicate + clusters.insert(Cluster::new("prod".to_string())); + + assert_eq!(clusters.len(), 2); + assert!(clusters.contains(&Cluster::new("quickstart".to_string()))); + assert!(clusters.contains(&Cluster::new("prod".to_string()))); +} + +#[test] +fn test_extract_dependencies_materialized_view_with_cluster() { + let sql = "CREATE MATERIALIZED VIEW mv IN CLUSTER quickstart AS SELECT * FROM users"; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateMaterializedView(mv_stmt) = &parsed[0].ast { + let stmt = Statement::CreateMaterializedView(mv_stmt.clone()); + let (deps, clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have one dependency (users table) + assert_eq!(deps.len(), 1); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "users".to_string() + ))); + + // Should have one cluster dependency + assert_eq!(clusters.len(), 1); + assert!(clusters.contains(&Cluster::new("quickstart".to_string()))); + } else { + panic!("Expected CreateMaterializedView statement"); + } +} + +#[test] +fn test_extract_dependencies_materialized_view_without_cluster() { + let sql = "CREATE MATERIALIZED VIEW mv AS SELECT * FROM users"; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateMaterializedView(mv_stmt) = &parsed[0].ast { + let stmt = Statement::CreateMaterializedView(mv_stmt.clone()); + let (deps, clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have one dependency (users table) + assert_eq!(deps.len(), 1); + + // Should have no cluster dependencies + assert_eq!(clusters.len(), 0); + } else { + panic!("Expected CreateMaterializedView statement"); + } +} + +#[test] +fn test_extract_dependencies_view_no_clusters() { + let sql = "CREATE VIEW v AS SELECT * FROM users"; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (_deps, clusters) = extract_dependencies(&stmt, "db", "public"); + + // Views don't have cluster dependencies + assert_eq!(clusters.len(), 0); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_multiple_materialized_views_with_different_clusters() { + let sqls = vec![ + "CREATE MATERIALIZED VIEW mv1 IN CLUSTER quickstart AS SELECT * FROM t1", + "CREATE MATERIALIZED VIEW mv2 IN CLUSTER prod AS SELECT * FROM t2", + "CREATE MATERIALIZED VIEW mv3 IN CLUSTER quickstart AS SELECT * FROM t3", + ]; + + let mut all_clusters = BTreeSet::new(); + + for sql in sqls { + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + if let mz_sql_parser::ast::Statement::CreateMaterializedView(mv_stmt) = &parsed[0].ast { + let stmt = Statement::CreateMaterializedView(mv_stmt.clone()); + let (_deps, clusters) = extract_dependencies(&stmt, "db", "public"); + all_clusters.extend(clusters); + } + } + + // Should have 2 unique clusters (quickstart and prod) + assert_eq!(all_clusters.len(), 2); + assert!(all_clusters.contains(&Cluster::new("quickstart".to_string()))); + assert!(all_clusters.contains(&Cluster::new("prod".to_string()))); +} + +#[test] +fn test_build_reverse_dependency_graph() { + // Create a simple dependency graph + let mut dependency_graph = BTreeMap::new(); + + let obj1 = ObjectId::new("db".to_string(), "public".to_string(), "table1".to_string()); + let obj2 = ObjectId::new("db".to_string(), "public".to_string(), "view1".to_string()); + let obj3 = ObjectId::new("db".to_string(), "public".to_string(), "view2".to_string()); + + // view1 depends on table1 + let mut deps1 = BTreeSet::new(); + deps1.insert(obj1.clone()); + dependency_graph.insert(obj2.clone(), deps1); + + // view2 depends on view1 + let mut deps2 = BTreeSet::new(); + deps2.insert(obj2.clone()); + dependency_graph.insert(obj3.clone(), deps2); + + // table1 has no dependencies + dependency_graph.insert(obj1.clone(), BTreeSet::new()); + + let project = Project { + databases: vec![], + dependency_graph, + external_dependencies: BTreeSet::new(), + cluster_dependencies: BTreeSet::new(), + tests: vec![], + }; + + // Build reverse graph + let reverse = project.build_reverse_dependency_graph(); + + // table1 should have view1 as a dependent + assert!(reverse.get(&obj1).unwrap().contains(&obj2)); + + // view1 should have view2 as a dependent + assert!(reverse.get(&obj2).unwrap().contains(&obj3)); + + // view2 should have no dependents + assert!(!reverse.contains_key(&obj3)); +} + +#[test] +fn test_get_sorted_objects_filtered() { + use crate::project::raw; + use crate::project::typed; + use std::fs; + use tempfile::TempDir; + + let temp_dir = TempDir::new().unwrap(); + let src_dir = temp_dir.path(); + + // Create test structure with separate schemas for tables and views + let db_path = src_dir.join("test_db"); + let tables_schema_path = db_path.join("tables"); + let views_schema_path = db_path.join("views"); + fs::create_dir_all(&tables_schema_path).unwrap(); + fs::create_dir_all(&views_schema_path).unwrap(); + + // Create table in tables schema + fs::write( + tables_schema_path.join("table1.sql"), + "CREATE TABLE table1 (id INT);", + ) + .unwrap(); + + // Create view depending on table in views schema + fs::write( + views_schema_path.join("view1.sql"), + "CREATE VIEW view1 AS SELECT * FROM tables.table1;", + ) + .unwrap(); + + // Create another view depending on view1 in views schema + fs::write( + views_schema_path.join("view2.sql"), + "CREATE VIEW view2 AS SELECT * FROM view1;", + ) + .unwrap(); + + // Load and convert to planned + let raw_project = raw::load_project(src_dir).unwrap(); + let typed_project = typed::Project::try_from(raw_project).unwrap(); + let planned_project = Project::from(typed_project); + + // Create filter that only includes view1 + let mut filter = BTreeSet::new(); + let view1_id = ObjectId::new( + "test_db".to_string(), + "views".to_string(), + "view1".to_string(), + ); + filter.insert(view1_id.clone()); + + // Get filtered objects + let filtered = planned_project + .get_sorted_objects_filtered(&filter) + .unwrap(); + + // Should only contain view1 + assert_eq!(filtered.len(), 1); + assert_eq!(filtered[0].0, view1_id); +} + +#[test] +fn test_extract_dependencies_with_mutually_recursive_ctes() { + // Test basic mutually recursive CTEs that reference each other and external tables + let sql = r#" + CREATE MATERIALIZED VIEW mv AS + WITH MUTUALLY RECURSIVE + is_even (n int, result bool) AS ( + SELECT 0 as n, TRUE as result + UNION ALL + SELECT ni.n, ie_prev.result + FROM numbers_input ni, is_odd ie_prev + WHERE ni.n > 0 AND ni.n - 1 = ie_prev.n + ), + is_odd (n int, result bool) AS ( + SELECT ni.n, NOT ie.result as result + FROM numbers_input ni, is_even ie + WHERE ni.n > 0 AND ni.n - 1 = ie.n + ) + SELECT n, result AS is_even + FROM is_even + ORDER BY n + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateMaterializedView(mv_stmt) = &parsed[0].ast { + let stmt = Statement::CreateMaterializedView(mv_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should only have dependency on numbers_input, not on is_even or is_odd (internal CTEs) + assert_eq!(deps.len(), 1); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "numbers_input".to_string() + ))); + } else { + panic!("Expected CreateMaterializedView statement"); + } +} + +#[test] +fn test_extract_dependencies_mutually_recursive_with_subquery() { + // Test mutually recursive CTEs with subqueries in WHERE clause + let sql = r#" + CREATE VIEW v AS + WITH MUTUALLY RECURSIVE + cte1 (id int) AS ( + SELECT id FROM table1 + WHERE id IN (SELECT id FROM cte2) + ), + cte2 (id int) AS ( + SELECT id FROM table2 + WHERE EXISTS (SELECT 1 FROM cte1) + ) + SELECT * FROM cte1 + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on table1 and table2, but not on cte1 or cte2 + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table1".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table2".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_extract_dependencies_mutually_recursive_with_derived_table() { + // Test mutually recursive CTEs with derived tables (subqueries in FROM) + let sql = r#" + CREATE MATERIALIZED VIEW mv AS + WITH MUTUALLY RECURSIVE + cte1 (id int, value text) AS ( + SELECT id, value FROM ( + SELECT id, value FROM base_table WHERE id > 0 + ) sub + WHERE id IN (SELECT id FROM cte2) + ), + cte2 (id int, value text) AS ( + SELECT id, value FROM ( + SELECT id, value FROM another_table + WHERE value IN (SELECT value FROM cte1) + ) + ) + SELECT * FROM cte2 + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateMaterializedView(mv_stmt) = &parsed[0].ast { + let stmt = Statement::CreateMaterializedView(mv_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on base_table and another_table + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "base_table".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "another_table".to_string() + ))); + } else { + panic!("Expected CreateMaterializedView statement"); + } +} + +#[test] +fn test_extract_dependencies_mutually_recursive_nested_cte_reference() { + // Test that CTE references inside nested queries don't get added as dependencies + let sql = r#" + CREATE VIEW v AS + WITH MUTUALLY RECURSIVE + cte_a (id int) AS ( + SELECT id FROM real_table + WHERE id IN ( + SELECT id FROM ( + SELECT id FROM cte_b + ) subquery + ) + ), + cte_b (id int) AS ( + SELECT id FROM cte_a + ) + SELECT * FROM cte_b + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should only have dependency on real_table, not on cte_a or cte_b + assert_eq!(deps.len(), 1); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "real_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_extract_dependencies_simple_cte_cannot_forward_reference() { + // Test that Simple CTEs build scope incrementally + // In this case, cte1 tries to reference cte2 which comes later + // With our incremental scoping, cte2 will be treated as an external table + let sql = r#" + CREATE VIEW v AS + WITH + cte1 AS (SELECT * FROM cte2), + cte2 AS (SELECT * FROM base_table) + SELECT * FROM cte1 + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // With incremental scoping, cte1 doesn't know about cte2 yet + // So cte2 is treated as an external dependency (along with base_table from cte2's definition) + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "cte2".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "base_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_extract_dependencies_simple_cte_backward_reference() { + // Test that Simple CTEs can reference earlier CTEs + let sql = r#" + CREATE VIEW v AS + WITH + cte1 AS (SELECT * FROM base_table), + cte2 AS (SELECT * FROM cte1) + SELECT * FROM cte2 + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // cte2 can see cte1, so only base_table is an external dependency + assert_eq!(deps.len(), 1); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "base_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_uncorrelated_subquery_in_where() { + // Test uncorrelated subquery in WHERE clause + let sql = r#" + CREATE VIEW v AS + SELECT * FROM table1 + WHERE id IN (SELECT id FROM table2 WHERE status = 'active') + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on both table1 and table2 + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table1".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table2".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_correlated_subquery_in_where() { + // Test correlated subquery in WHERE clause (references outer query) + let sql = r#" + CREATE VIEW v AS + SELECT * FROM table1 t1 + WHERE EXISTS ( + SELECT 1 FROM table2 t2 + WHERE t2.parent_id = t1.id + AND t2.status = 'active' + ) + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on both table1 and table2 + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table1".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table2".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_subquery_in_select_list() { + // Test subquery in SELECT list (scalar subquery) + let sql = r#" + CREATE VIEW v AS + SELECT + id, + name, + (SELECT COUNT(*) FROM orders WHERE orders.user_id = users.id) as order_count + FROM users + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on both users and orders + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "users".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "orders".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_nested_uncorrelated_subqueries() { + // Test nested uncorrelated subqueries + let sql = r#" + CREATE VIEW v AS + SELECT * FROM table1 + WHERE id IN ( + SELECT user_id FROM table2 + WHERE category_id IN ( + SELECT id FROM table3 WHERE active = true + ) + ) + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on table1, table2, and table3 + assert_eq!(deps.len(), 3); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table1".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table2".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table3".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_subquery_with_simple_cte() { + // Test subquery referencing a Simple CTE (should not be treated as external dependency) + let sql = r#" + CREATE VIEW v AS + WITH cte1 AS ( + SELECT * FROM base_table + ) + SELECT * FROM table1 + WHERE id IN (SELECT id FROM cte1) + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on table1 and base_table, but NOT on cte1 + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table1".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "base_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_correlated_subquery_with_cte() { + // Test correlated subquery with CTE + let sql = r#" + CREATE VIEW v AS + WITH active_users AS ( + SELECT id, name FROM users WHERE active = true + ) + SELECT * FROM orders o + WHERE EXISTS ( + SELECT 1 FROM active_users au + WHERE au.id = o.user_id + ) + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on orders and users, but NOT on active_users + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "orders".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "users".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_subquery_in_from_with_cte() { + // Test subquery in FROM clause (derived table) that references CTE + let sql = r#" + CREATE VIEW v AS + WITH summary AS ( + SELECT category, COUNT(*) as cnt FROM products GROUP BY category + ) + SELECT * FROM ( + SELECT s.category, s.cnt, c.name + FROM summary s + JOIN categories c ON s.category = c.id + ) derived + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on products and categories, but NOT on summary + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "products".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "categories".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_multiple_subqueries_mixed_correlation() { + // Test multiple subqueries with mixed correlation + // Split into two tests since Materialize may have parser issues with complex WHERE clauses + let sql1 = r#" + CREATE VIEW v AS + SELECT t1.id + FROM table1 t1 + WHERE t1.id IN (SELECT user_id FROM table2) + "#; + + let sql2 = r#" + CREATE VIEW v2 AS + SELECT t1.id + FROM table1 t1 + WHERE EXISTS ( + SELECT 1 FROM table3 t3 + WHERE t3.parent_id = t1.id + ) + "#; + + // Test first query with IN subquery + let parsed1 = mz_sql_parser::parser::parse_statements(sql1).unwrap(); + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed1[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table1".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table2".to_string() + ))); + } + + // Test second query with EXISTS subquery + let parsed2 = mz_sql_parser::parser::parse_statements(sql2).unwrap(); + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed2[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table1".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table3".to_string() + ))); + } +} + +#[test] +fn test_subquery_in_case_expression() { + // Test subquery in CASE expression + let sql = r#" + CREATE VIEW v AS + SELECT + id, + CASE + WHEN status = 'pending' THEN (SELECT COUNT(*) FROM pending_queue) + WHEN status = 'active' THEN (SELECT COUNT(*) FROM active_queue) + ELSE 0 + END as queue_size + FROM tasks + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on tasks, pending_queue, and active_queue + assert_eq!(deps.len(), 3); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "tasks".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "pending_queue".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "active_queue".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_subquery_with_table_alias() { + // Test subquery with table alias + let sql = r#" + CREATE VIEW v AS + SELECT t1.id + FROM table1 t1 + WHERE t1.id IN (SELECT user_id FROM table2) + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on table1 and table2 + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table1".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table2".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_wmr_with_correlated_subquery() { + // Test WITH MUTUALLY RECURSIVE with correlated subquery + let sql = r#" + CREATE VIEW v AS + WITH MUTUALLY RECURSIVE + cte1 (id int, parent_id int) AS ( + SELECT id, parent_id FROM base_table + WHERE EXISTS ( + SELECT 1 FROM cte2 c2 + WHERE c2.id = base_table.parent_id + ) + ), + cte2 (id int, parent_id int) AS ( + SELECT id, parent_id FROM another_table + WHERE id IN (SELECT parent_id FROM cte1) + ) + SELECT * FROM cte1 + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on base_table and another_table, but NOT on cte1 or cte2 + assert_eq!(deps.len(), 2); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "base_table".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "another_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +// Helper function to create a minimal Project for cluster isolation testing +fn create_test_project_for_cluster_validation() -> Project { + Project { + databases: vec![], + dependency_graph: BTreeMap::new(), + external_dependencies: BTreeSet::new(), + cluster_dependencies: BTreeSet::new(), + tests: vec![], + } +} + +#[test] +fn test_validate_cluster_isolation_no_conflicts() { + let project = create_test_project_for_cluster_validation(); + let sources_by_cluster = BTreeMap::new(); + + let result = project.validate_cluster_isolation(&sources_by_cluster); + assert!(result.is_ok()); +} + +#[test] +fn test_validate_cluster_isolation_separate_clusters() { + // Create a project with MV on compute_cluster and sink on storage_cluster + let mv_sql = "CREATE MATERIALIZED VIEW mv IN CLUSTER compute_cluster AS SELECT 1"; + let sink_sql = "CREATE SINK sink IN CLUSTER storage_cluster FROM mv INTO KAFKA CONNECTION conn (TOPIC 'test')"; + + let mv_parsed = mz_sql_parser::parser::parse_statements(mv_sql).unwrap(); + let sink_parsed = mz_sql_parser::parser::parse_statements(sink_sql).unwrap(); + + let mv_stmt = + if let mz_sql_parser::ast::Statement::CreateMaterializedView(s) = &mv_parsed[0].ast { + Statement::CreateMaterializedView(s.clone()) + } else { + panic!("Expected CreateMaterializedView"); + }; + + let sink_stmt = if let mz_sql_parser::ast::Statement::CreateSink(s) = &sink_parsed[0].ast { + Statement::CreateSink(s.clone()) + } else { + panic!("Expected CreateSink"); + }; + + let mv_obj = DatabaseObject { + id: ObjectId::new("db".to_string(), "schema".to_string(), "mv".to_string()), + typed_object: typed::DatabaseObject { + stmt: mv_stmt, + indexes: vec![], + grants: vec![], + comments: vec![], + tests: vec![], + }, + dependencies: BTreeSet::new(), + }; + + let sink_obj = DatabaseObject { + id: ObjectId::new("db".to_string(), "schema".to_string(), "sink".to_string()), + typed_object: typed::DatabaseObject { + stmt: sink_stmt, + indexes: vec![], + grants: vec![], + comments: vec![], + tests: vec![], + }, + dependencies: BTreeSet::new(), + }; + + let project = Project { + databases: vec![Database { + name: "db".to_string(), + schemas: vec![Schema { + name: "schema".to_string(), + objects: vec![mv_obj, sink_obj], + mod_statements: None, + schema_type: SchemaType::Storage, // Has sink + }], + mod_statements: None, + }], + dependency_graph: BTreeMap::new(), + external_dependencies: BTreeSet::new(), + cluster_dependencies: BTreeSet::new(), + tests: vec![], + }; + + // Sources on storage_cluster (different from compute objects) + let mut sources_by_cluster = BTreeMap::new(); + sources_by_cluster.insert( + "storage_cluster".to_string(), + vec!["db.schema.source1".to_string()], + ); + + let result = project.validate_cluster_isolation(&sources_by_cluster); + assert!( + result.is_ok(), + "Should succeed when storage and compute are on separate clusters" + ); +} + +#[test] +fn test_validate_cluster_isolation_conflict_mv_and_source() { + // Create a project with MV on shared_cluster + let mv_sql = "CREATE MATERIALIZED VIEW mv IN CLUSTER shared_cluster AS SELECT 1"; + let mv_parsed = mz_sql_parser::parser::parse_statements(mv_sql).unwrap(); + + let mv_stmt = + if let mz_sql_parser::ast::Statement::CreateMaterializedView(s) = &mv_parsed[0].ast { + Statement::CreateMaterializedView(s.clone()) + } else { + panic!("Expected CreateMaterializedView"); + }; + + let mv_obj = DatabaseObject { + id: ObjectId::new("db".to_string(), "schema".to_string(), "mv".to_string()), + typed_object: typed::DatabaseObject { + stmt: mv_stmt, + indexes: vec![], + grants: vec![], + comments: vec![], + tests: vec![], + }, + dependencies: BTreeSet::new(), + }; + + let project = Project { + databases: vec![Database { + name: "db".to_string(), + schemas: vec![Schema { + name: "schema".to_string(), + objects: vec![mv_obj], + mod_statements: None, + schema_type: SchemaType::Compute, // Has MV + }], + mod_statements: None, + }], + dependency_graph: BTreeMap::new(), + external_dependencies: BTreeSet::new(), + cluster_dependencies: BTreeSet::new(), + tests: vec![], + }; + + // Source on the same cluster as MV + let mut sources_by_cluster = BTreeMap::new(); + sources_by_cluster.insert( + "shared_cluster".to_string(), + vec!["db.schema.source1".to_string()], + ); + + let result = project.validate_cluster_isolation(&sources_by_cluster); + assert!( + result.is_err(), + "Should fail when MV and source share a cluster" + ); + + if let Err((cluster_name, compute_objects, storage_objects)) = result { + assert_eq!(cluster_name, "shared_cluster"); + assert_eq!(compute_objects.len(), 1); + assert!(compute_objects.contains(&"db.schema.mv".to_string())); + assert_eq!(storage_objects.len(), 1); + assert!(storage_objects.contains(&"db.schema.source1".to_string())); + } +} + +#[test] +fn test_validate_cluster_isolation_only_compute_objects() { + // Create a project with only MVs and indexes (no sinks) + let mv_sql = "CREATE MATERIALIZED VIEW mv IN CLUSTER compute_cluster AS SELECT 1"; + let mv_parsed = mz_sql_parser::parser::parse_statements(mv_sql).unwrap(); + + let mv_stmt = + if let mz_sql_parser::ast::Statement::CreateMaterializedView(s) = &mv_parsed[0].ast { + Statement::CreateMaterializedView(s.clone()) + } else { + panic!("Expected CreateMaterializedView"); + }; + + let mv_obj = DatabaseObject { + id: ObjectId::new("db".to_string(), "schema".to_string(), "mv".to_string()), + typed_object: typed::DatabaseObject { + stmt: mv_stmt, + indexes: vec![], + grants: vec![], + comments: vec![], + tests: vec![], + }, + dependencies: BTreeSet::new(), + }; + + let project = Project { + databases: vec![Database { + name: "db".to_string(), + schemas: vec![Schema { + name: "schema".to_string(), + objects: vec![mv_obj], + mod_statements: None, + schema_type: SchemaType::Compute, // Has MV + }], + mod_statements: None, + }], + dependency_graph: BTreeMap::new(), + external_dependencies: BTreeSet::new(), + cluster_dependencies: BTreeSet::new(), + tests: vec![], + }; + + // No sources on any cluster + let sources_by_cluster = BTreeMap::new(); + + let result = project.validate_cluster_isolation(&sources_by_cluster); + assert!( + result.is_ok(), + "Should succeed when cluster only has compute objects" + ); +} + +#[test] +fn test_validate_cluster_isolation_only_storage_objects() { + // Create a project with only a sink (no MVs or indexes) + let sink_sql = "CREATE SINK sink IN CLUSTER storage_cluster FROM t INTO KAFKA CONNECTION conn (TOPIC 'test')"; + let sink_parsed = mz_sql_parser::parser::parse_statements(sink_sql).unwrap(); + + let sink_stmt = if let mz_sql_parser::ast::Statement::CreateSink(s) = &sink_parsed[0].ast { + Statement::CreateSink(s.clone()) + } else { + panic!("Expected CreateSink"); + }; + + let sink_obj = DatabaseObject { + id: ObjectId::new("db".to_string(), "schema".to_string(), "sink".to_string()), + typed_object: typed::DatabaseObject { + stmt: sink_stmt, + indexes: vec![], + grants: vec![], + comments: vec![], + tests: vec![], + }, + dependencies: BTreeSet::new(), + }; + + let project = Project { + databases: vec![Database { + name: "db".to_string(), + schemas: vec![Schema { + name: "schema".to_string(), + objects: vec![sink_obj], + mod_statements: None, + schema_type: SchemaType::Storage, // Has sink + }], + mod_statements: None, + }], + dependency_graph: BTreeMap::new(), + external_dependencies: BTreeSet::new(), + cluster_dependencies: BTreeSet::new(), + tests: vec![], + }; + + // Sources on the same cluster + let mut sources_by_cluster = BTreeMap::new(); + sources_by_cluster.insert( + "storage_cluster".to_string(), + vec!["db.schema.source1".to_string()], + ); + + let result = project.validate_cluster_isolation(&sources_by_cluster); + assert!( + result.is_ok(), + "Should succeed when cluster only has storage objects (sources + sinks)" + ); +} + +// ============================================================================ +// Edge case tests for external dependency extraction +// ============================================================================ + +#[test] +fn test_dependencies_through_lateral_join() { + // Test that LATERAL join dependencies are extracted correctly + let sql = r#" + CREATE VIEW v AS + SELECT grp.category, sub.total + FROM (SELECT DISTINCT category FROM categories) grp, + LATERAL ( + SELECT SUM(amount) as total + FROM sales + WHERE sales.category = grp.category + ) sub + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on both categories and sales + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "categories".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "sales".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_in_window_function() { + // Test that dependencies in window function PARTITION BY/ORDER BY are extracted + let sql = r#" + CREATE VIEW v AS + SELECT + id, + amount, + SUM(amount) OVER (PARTITION BY category ORDER BY created_at) as running_total, + RANK() OVER (PARTITION BY region ORDER BY amount DESC) as rank + FROM orders + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependency on orders + assert_eq!(deps.len(), 1, "Expected 1 dependency, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "orders".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_in_window_function_with_subquery() { + // Test window function with a scalar subquery in the frame + let sql = r#" + CREATE VIEW v AS + SELECT + id, + amount, + amount - (SELECT AVG(amount) FROM sales) as diff_from_avg + FROM orders + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on both orders and sales + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "orders".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "sales".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_in_union() { + // Test that UNION queries extract dependencies from all branches + let sql = r#" + CREATE VIEW v AS + SELECT id, name FROM customers + UNION + SELECT id, name FROM vendors + UNION ALL + SELECT id, name FROM partners + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on all three tables + assert_eq!(deps.len(), 3, "Expected 3 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "customers".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "vendors".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "partners".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_in_intersect_except() { + // Test that INTERSECT and EXCEPT queries extract dependencies + let sql = r#" + CREATE VIEW v AS + SELECT id FROM all_users + INTERSECT + SELECT id FROM active_users + EXCEPT + SELECT id FROM banned_users + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on all three tables + assert_eq!(deps.len(), 3, "Expected 3 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "all_users".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "active_users".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "banned_users".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_with_self_join() { + // Test self-join where the same table appears twice with different aliases + let sql = r#" + CREATE VIEW v AS + SELECT e.id, e.name, m.name as manager_name + FROM employees e + LEFT JOIN employees m ON e.manager_id = m.id + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have only one dependency (employees) even though it's used twice + assert_eq!( + deps.len(), + 1, + "Expected 1 dependency (self-join), found: {:?}", + deps + ); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "employees".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_nested_derived_tables() { + // Test deeply nested derived tables (subqueries in FROM) + let sql = r#" + CREATE VIEW v AS + SELECT * + FROM ( + SELECT * + FROM ( + SELECT * + FROM ( + SELECT * FROM deep_table + ) level1 + ) level2 + ) level3 + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should extract dependency from deeply nested subquery + assert_eq!( + deps.len(), + 1, + "Expected 1 dependency from nested subquery, found: {:?}", + deps + ); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "deep_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_cross_schema_reference() { + // Test that cross-schema references are extracted with correct schema + let sql = r#" + CREATE VIEW v AS + SELECT a.id, b.name + FROM public.table1 a + JOIN internal.table2 b ON a.id = b.id + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies in different schemas + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table1".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "internal".to_string(), + "table2".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_cross_database_reference() { + // Test that cross-database references are extracted with correct database + let sql = r#" + CREATE VIEW v AS + SELECT a.id, b.name + FROM db1.schema1.table1 a + JOIN db2.schema2.table2 b ON a.id = b.id + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies in different databases + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db1".to_string(), + "schema1".to_string(), + "table1".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db2".to_string(), + "schema2".to_string(), + "table2".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_with_values_clause() { + // Test that VALUES clause doesn't create dependencies + let sql = r#" + CREATE VIEW v AS + SELECT * FROM (VALUES (1, 'a'), (2, 'b'), (3, 'c')) AS t(id, name) + UNION ALL + SELECT id, name FROM real_table + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should only have dependency on real_table, not the VALUES clause + assert_eq!(deps.len(), 1, "Expected 1 dependency, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "real_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_complex_join_with_subqueries() { + // Test complex join with subqueries on both sides + let sql = r#" + CREATE VIEW v AS + SELECT l.id, r.total + FROM ( + SELECT id, category FROM products WHERE active = true + ) l + JOIN ( + SELECT category, SUM(amount) as total FROM sales GROUP BY category + ) r ON l.category = r.category + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on both products and sales + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "products".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "sales".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +// ============================================================================ +// Nested CTE Dependency Extraction Tests +// ============================================================================ + +#[test] +fn test_dependencies_nested_cte_in_derived_table() { + // Test dependency extraction with CTE inside a derived table + let sql = r#" + CREATE VIEW v AS + SELECT * FROM ( + WITH inner_cte AS ( + SELECT id, name FROM users + ) + SELECT * FROM inner_cte JOIN orders ON inner_cte.id = orders.user_id + ) subquery + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on users and orders, but NOT inner_cte + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "users".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "orders".to_string() + ))); + // inner_cte should NOT be a dependency + assert!(!deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "inner_cte".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_nested_cte_in_scalar_subquery() { + // Test dependency extraction with CTE inside a scalar subquery + let sql = r#" + CREATE VIEW v AS + SELECT + id, + (WITH totals AS (SELECT SUM(amount) as total FROM transactions WHERE transactions.user_id = users.id) + SELECT total FROM totals) as user_total + FROM users + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on users and transactions, but NOT totals + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "users".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "transactions".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_triple_nested_ctes() { + // Test dependency extraction with three levels of nested CTEs + let sql = r#" + CREATE VIEW v AS + WITH outer_cte AS ( + SELECT * FROM ( + WITH middle_cte AS ( + SELECT * FROM ( + WITH inner_cte AS ( + SELECT id FROM base_table + ) + SELECT * FROM inner_cte + ) innermost + ) + SELECT * FROM middle_cte + ) middle_result + ) + SELECT * FROM outer_cte + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should only have dependency on base_table, not on any CTEs + assert_eq!(deps.len(), 1, "Expected 1 dependency, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "base_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_cte_shadowing_at_different_levels() { + // Test that CTE shadowing doesn't incorrectly add dependencies + let sql = r#" + CREATE VIEW v AS + WITH data AS ( + SELECT id FROM outer_table + ) + SELECT * FROM data + UNION ALL + SELECT * FROM ( + WITH data AS ( + SELECT id FROM inner_table + ) + SELECT * FROM data + ) inner_result + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on outer_table and inner_table, but NOT 'data' + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "outer_table".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "inner_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_nested_cte_in_lateral_join() { + // Test dependency extraction with CTE inside a LATERAL join + let sql = r#" + CREATE VIEW v AS + SELECT u.id, stats.total + FROM users u, + LATERAL ( + WITH user_orders AS ( + SELECT amount FROM orders WHERE orders.user_id = u.id + ) + SELECT SUM(amount) as total FROM user_orders + ) stats + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on users and orders, but NOT user_orders + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "users".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "orders".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_parallel_ctes_in_union_branches() { + // Test dependency extraction with independent CTEs in UNION branches + let sql = r#" + CREATE VIEW v AS + SELECT * FROM ( + WITH left_cte AS (SELECT id FROM left_table) + SELECT * FROM left_cte + ) left_branch + UNION ALL + SELECT * FROM ( + WITH right_cte AS (SELECT id FROM right_table) + SELECT * FROM right_cte + ) right_branch + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on left_table and right_table, but NOT on CTEs + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "left_table".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "right_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_outer_cte_visible_in_nested_subquery() { + // Test that outer CTE references in nested subqueries are not treated as dependencies + let sql = r#" + CREATE VIEW v AS + WITH main_data AS ( + SELECT id, value FROM source_table + ) + SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM main_data WHERE value > 10 + ) inner_sub + ) outer_sub + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should only have dependency on source_table, not main_data + assert_eq!(deps.len(), 1, "Expected 1 dependency, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "source_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_nested_cte_joining_outer_cte() { + // Test nested CTE that joins with outer CTE + let sql = r#" + CREATE VIEW v AS + WITH outer_data AS ( + SELECT id, category FROM categories + ) + SELECT * FROM ( + WITH inner_data AS ( + SELECT product_id, price FROM products + ) + SELECT i.product_id, i.price, o.category + FROM inner_data i + JOIN outer_data o ON i.product_id = o.id + ) result + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on categories and products, but NOT on CTEs + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "categories".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "products".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_nested_cte_in_exists() { + // Test CTE inside EXISTS subquery + let sql = r#" + CREATE VIEW v AS + SELECT * FROM main_table m + WHERE EXISTS ( + WITH related AS ( + SELECT id FROM related_table WHERE status = 'active' + ) + SELECT 1 FROM related WHERE related.id = m.related_id + ) + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on main_table and related_table, but NOT 'related' + assert_eq!(deps.len(), 2, "Expected 2 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "main_table".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "related_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_nested_cte_with_multiple_tables_per_level() { + // Test nested CTEs where each level references multiple external tables + let sql = r#" + CREATE VIEW v AS + WITH outer_cte AS ( + SELECT a.id, b.name FROM table_a a JOIN table_b b ON a.id = b.id + ) + SELECT * FROM ( + WITH inner_cte AS ( + SELECT c.id, d.value FROM table_c c JOIN table_d d ON c.id = d.id + ) + SELECT o.id, o.name, i.value + FROM outer_cte o + JOIN inner_cte i ON o.id = i.id + ) result + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should have dependencies on all four tables + assert_eq!(deps.len(), 4, "Expected 4 dependencies, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table_a".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table_b".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table_c".to_string() + ))); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "table_d".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} + +#[test] +fn test_dependencies_mutually_recursive_with_nested_cte_in_subquery() { + // Test MUTUALLY RECURSIVE with a nested simple CTE inside a subquery + let sql = r#" + CREATE VIEW v AS + WITH MUTUALLY RECURSIVE + cte_a (id int) AS ( + SELECT id FROM ( + WITH nested AS (SELECT id FROM base_table) + SELECT * FROM nested + ) sub + WHERE id IN (SELECT id FROM cte_b) + ), + cte_b (id int) AS ( + SELECT id FROM cte_a + ) + SELECT * FROM cte_b + "#; + let parsed = mz_sql_parser::parser::parse_statements(sql).unwrap(); + + if let mz_sql_parser::ast::Statement::CreateView(view_stmt) = &parsed[0].ast { + let stmt = Statement::CreateView(view_stmt.clone()); + let (deps, _clusters) = extract_dependencies(&stmt, "db", "public"); + + // Should only have dependency on base_table + assert_eq!(deps.len(), 1, "Expected 1 dependency, found: {:?}", deps); + assert!(deps.contains(&ObjectId::new( + "db".to_string(), + "public".to_string(), + "base_table".to_string() + ))); + } else { + panic!("Expected CreateView statement"); + } +} diff --git a/src/mz-deploy/src/project/planned/types.rs b/src/mz-deploy/src/project/planned/types.rs new file mode 100644 index 0000000000000..46ae0735c2101 --- /dev/null +++ b/src/mz-deploy/src/project/planned/types.rs @@ -0,0 +1,93 @@ +//! Type definitions for the planned representation. + +use super::super::ast::Cluster; +use super::super::typed; +use crate::project::object_id::ObjectId; +use mz_sql_parser::ast::*; +use std::collections::{BTreeMap, BTreeSet}; + +/// A database object with its dependencies. +#[derive(Debug)] +pub struct DatabaseObject { + /// The object identifier + pub id: ObjectId, + /// The validated typed statement + pub typed_object: typed::DatabaseObject, + /// Set of objects this object depends on + pub dependencies: BTreeSet, +} + +/// A module-level statement with context about where it should be executed. +/// +/// Module statements are executed before object statements and come from +/// database.sql or schema.sql files. They're used for setup like grants, +/// comments, and other database/schema-level configuration. +#[derive(Debug)] +pub enum ModStatement<'a> { + /// Database-level statement (from database.sql file) + Database { + /// The database name + database: &'a str, + /// The statement to execute + statement: &'a mz_sql_parser::ast::Statement, + }, + /// Schema-level statement (from schema.sql file) + Schema { + /// The database name + database: &'a str, + /// The schema name + schema: &'a str, + /// The statement to execute + statement: &'a mz_sql_parser::ast::Statement, + }, +} + +/// The type of objects contained in a schema. +/// +/// Schemas are segregated by object type to prevent accidental recreation: +/// - Storage schemas contain tables, sinks, and tables from sources +/// - Compute schemas contain views and materialized views +/// - Empty schemas contain no objects +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SchemaType { + /// Schema contains storage objects (tables, sinks) + Storage, + /// Schema contains computation objects (views, materialized views) + Compute, + /// Schema contains no objects + Empty, +} + +/// A schema containing objects with dependency information. +#[derive(Debug)] +pub struct Schema { + pub name: String, + pub objects: Vec, + /// Optional module-level statements (from schema.sql file) + pub mod_statements: Option>>, + /// The type of objects in this schema (Storage, Compute, or Empty) + pub schema_type: SchemaType, +} + +/// A database containing schemas with dependency information. +#[derive(Debug)] +pub struct Database { + pub name: String, + pub schemas: Vec, + /// Optional module-level statements (from database.sql file) + pub mod_statements: Option>>, +} + +/// A project with full dependency tracking. +#[derive(Debug)] +pub struct Project { + pub databases: Vec, + /// Global dependency graph: object_id -> set of dependencies + pub dependency_graph: BTreeMap>, + /// External dependencies: objects referenced but not defined in this project + pub external_dependencies: BTreeSet, + /// Cluster dependencies: clusters referenced by indexes and materialized views + pub cluster_dependencies: BTreeSet, + /// Unit tests defined in the project, organized by the object they test + pub tests: Vec<(ObjectId, crate::unit_test::UnitTest)>, +} diff --git a/src/mz-deploy/src/project/raw.rs b/src/mz-deploy/src/project/raw.rs new file mode 100644 index 0000000000000..4ea725cc8c453 --- /dev/null +++ b/src/mz-deploy/src/project/raw.rs @@ -0,0 +1,674 @@ +//! Raw representation of Materialize project files. +//! +//! This module provides functionality for loading and parsing Materialize project files +//! from a directory structure into an unvalidated "raw" representation. The raw types +//! mirror the file system structure and contain parsed SQL AST nodes, but do not enforce +//! semantic validation rules. +//! +//! # Raw vs Typed +//! +//! The raw representation is the first stage in a multi-stage parsing pipeline: +//! +//! ```text +//! File System → raw::Project (parsed but unvalidated) +//! ↓ +//! typed::Project (validated) +//! ``` +//! +//! **Raw stage:** +//! - Reads files from disk +//! - Parses SQL into AST nodes +//! - Organizes by directory structure +//! - No semantic validation +//! +//! **Typed stage:** +//! - Validates object names match file names +//! - Validates qualified names match directory structure +//! - Validates cross-references between statements +//! - Enforces type consistency +//! +//! # Directory Structure +//! +//! The expected directory structure is: +//! ```text +//! project_root/ +//! database_name/ ← Database directory +//! database_name.sql ← Optional database-level statements (sibling) +//! schema_name/ ← Schema directory +//! schema_name.sql ← Optional schema-level statements (sibling) +//! object_name.sql ← DatabaseObject +//! another_object.sql ← DatabaseObject +//! ``` +//! +//! # Database and Schema .sql Files +//! +//! Special `.sql` files can appear as siblings to database and schema directories. +//! These typically contain setup statements like: +//! - `GRANT` statements for database/schema permissions +//! - `COMMENT` statements for database/schema documentation +//! - Other administrative SQL +//! +//! These files are parsed and stored in the raw representation but are not carried +//! forward to the HIR, as validation focuses on individual object files. + +use super::error::{LoadError, ProjectError}; +use super::parser::parse_statements_with_context; +use mz_sql_parser::ast::{Raw, Statement}; +use std::collections::BTreeMap; +use std::fs; +use std::path::{Path, PathBuf}; + +/// A database object loaded from a single `.sql` file, containing parsed but unvalidated SQL. +/// +/// Represents a single file in a schema directory. The file is parsed into SQL AST nodes +/// but no semantic validation is performed at this stage. +/// +/// # Contents +/// +/// A typical object file contains: +/// - One primary CREATE statement (table, view, source, etc.) +/// - Zero or more supporting statements (indexes, grants, comments) +/// +/// Example `users.sql`: +/// ```sql +/// CREATE TABLE users ( +/// id INT, +/// name TEXT +/// ); +/// +/// CREATE INDEX users_id_idx ON users (id); +/// GRANT SELECT ON users TO analyst_role; +/// COMMENT ON TABLE users IS 'User data'; +/// ``` +/// +/// All statements are parsed into `statements` field without validation of their +/// relationships or correctness. +#[derive(Debug, Clone)] +pub struct DatabaseObject { + /// The name of the file (without extension) + pub name: String, + /// The full path to the file + pub path: PathBuf, + /// The parsed SQL statements from the file + /// + /// All statements in the file are parsed in order. At this stage, no validation + /// is performed on statement types, relationships, or consistency. + pub statements: Vec>, +} + +/// A schema directory containing multiple database objects and optional setup statements. +/// +/// Represents a schema directory within a database. Each schema contains multiple +/// `.sql` files (one per database object) and an optional sibling `.sql` file for schema-level +/// setup (e.g., `public.sql` next to `public/` directory). +/// +/// # Directory Mapping +/// +/// ```text +/// database_name/ +/// schema_name/ ← Schema directory +/// schema_name.sql ← Optional: mod_statements (sibling to directory) +/// users.sql ← DatabaseObject +/// orders.sql ← DatabaseObject +/// ``` +/// +/// # Schema .sql Usage +/// +/// The optional `schema_name.sql` file (sibling to the schema directory) might contain: +/// ```sql +/// -- Schema grants +/// GRANT USAGE ON SCHEMA schema_name TO analyst_role; +/// +/// -- Schema comments +/// COMMENT ON SCHEMA schema_name IS 'Analytics data'; +/// ``` +#[derive(Debug, Clone)] +pub struct Schema { + /// The name of the schema (directory name) + pub name: String, + /// Optional statements from sibling .sql file for the schema + /// + /// If a sibling `.sql` file exists (e.g., `public.sql` next to `public/`), + /// it is parsed and stored here. These statements are typically schema-level + /// setup code like GRANT or COMMENT statements. + pub mod_statements: Option>>, + /// All database objects in this schema + /// + /// Each object corresponds to one `.sql` file in the schema directory. + pub objects: Vec, +} + +/// A database directory containing multiple schemas and optional setup statements. +/// +/// Represents a database directory within the project. Each database contains multiple +/// schema directories and an optional sibling `.sql` file for database-level setup +/// (e.g., `materialize.sql` next to `materialize/` directory). +/// +/// # Directory Mapping +/// +/// ```text +/// project_root/ +/// database_name/ ← Database directory +/// database_name.sql ← Optional: mod_statements (sibling to directory) +/// public/ ← Schema +/// users.sql +/// analytics/ ← Schema +/// reports.sql +/// ``` +/// +/// # Database .sql Usage +/// +/// The optional `database_name.sql` file (sibling to the database directory) might contain: +/// ```sql +/// -- Database grants +/// GRANT CREATE ON DATABASE database_name TO admin_role; +/// +/// -- Database comments +/// COMMENT ON DATABASE database_name IS 'Production database'; +/// ``` +#[derive(Debug, Clone)] +pub struct Database { + /// The name of the database (directory name) + pub name: String, + /// Optional statements from sibling .sql file for the database + /// + /// If a sibling `.sql` file exists (e.g., `materialize.sql` next to `materialize/`), + /// it is parsed and stored here. These statements are typically database-level + /// setup code like GRANT or COMMENT statements. + pub mod_statements: Option>>, + /// All schemas in this database, keyed by schema name + /// + /// Each schema corresponds to one subdirectory in the database directory. + /// Hidden directories (starting with `.`) are excluded. + pub schemas: BTreeMap, +} + +/// The complete unvalidated project structure loaded from the file system. +/// +/// Represents the entire project directory tree with all databases, schemas, and +/// objects loaded and parsed but not yet validated. This is the top-level raw type +/// returned by [`load_project`]. +/// +/// # Purpose +/// +/// The `Project` type serves as the entry point for working with Materialize project +/// files. After loading with `load_project`, you typically convert it to the validated +/// HIR representation. +#[derive(Debug, Clone)] +pub struct Project { + /// The root directory of the project + /// + /// This is the absolute path to the directory that was passed to `load_project`. + /// All other paths in the project are relative to or beneath this root. + pub root: PathBuf, + /// All databases in this project, keyed by database name + /// + /// Each database corresponds to one subdirectory in the project root. + /// Hidden directories (starting with `.`) are excluded. + pub databases: BTreeMap, +} + +/// Loads and parses a Materialize project from a directory structure. +pub fn load_project>(root: P) -> Result { + let root = root.as_ref(); + + if !root.exists() { + return Err(LoadError::RootNotFound { + path: root.to_path_buf(), + } + .into()); + } + + if !root.is_dir() { + return Err(LoadError::RootNotDirectory { + path: root.to_path_buf(), + } + .into()); + } + + let mut databases = BTreeMap::new(); + + // Iterate over database directories (first level) + for db_entry in fs::read_dir(root).map_err(|source| LoadError::DirectoryReadFailed { + path: root.to_path_buf(), + source, + })? { + let db_entry = db_entry.map_err(|source| LoadError::EntryReadFailed { + directory: root.to_path_buf(), + source, + })?; + let db_path = db_entry.path(); + + // Skip non-directories and hidden directories + if !db_path.is_dir() || db_entry.file_name().to_string_lossy().starts_with('.') { + continue; + } + + let db_name = db_entry.file_name().to_string_lossy().to_string(); + let mut schemas = BTreeMap::new(); + + // Check for database-level sibling .sql file (e.g., materialize.sql next to materialize/) + let db_mod_path = root.join(format!("{}.sql", db_name)); + let db_mod_statements = if db_mod_path.exists() { + let sql_content = + fs::read_to_string(&db_mod_path).map_err(|source| LoadError::FileReadFailed { + path: db_mod_path.clone(), + source, + })?; + Some(parse_statements_with_context( + &sql_content, + db_mod_path.clone(), + )?) + } else { + None + }; + + // Iterate over schema directories (second level) + for schema_entry in + fs::read_dir(&db_path).map_err(|source| LoadError::DirectoryReadFailed { + path: db_path.clone(), + source, + })? + { + let schema_entry = schema_entry.map_err(|source| LoadError::EntryReadFailed { + directory: db_path.clone(), + source, + })?; + let schema_path = schema_entry.path(); + + // Skip non-directories, hidden directories, and .sql files (schema-level .sql files are handled separately) + if !schema_path.is_dir() || schema_entry.file_name().to_string_lossy().starts_with('.') + { + continue; + } + + let schema_name = schema_entry.file_name().to_string_lossy().to_string(); + let mut objects = Vec::new(); + + // Check for schema-level sibling .sql file (e.g., public.sql next to public/) + let schema_mod_path = db_path.join(format!("{}.sql", schema_name)); + let schema_mod_statements = if schema_mod_path.exists() { + let sql_content = fs::read_to_string(&schema_mod_path).map_err(|source| { + LoadError::FileReadFailed { + path: schema_mod_path.clone(), + source, + } + })?; + Some(parse_statements_with_context( + &sql_content, + schema_mod_path.clone(), + )?) + } else { + None + }; + + // Iterate over SQL files (third level) + for object_entry in + fs::read_dir(&schema_path).map_err(|source| LoadError::DirectoryReadFailed { + path: schema_path.clone(), + source, + })? + { + let object_entry = object_entry.map_err(|source| LoadError::EntryReadFailed { + directory: schema_path.clone(), + source, + })?; + let object_path = object_entry.path(); + + // Only process .sql files + if !object_path.is_file() + || object_path.extension().and_then(|s| s.to_str()) != Some("sql") + { + continue; + } + + let object_name = object_path + .file_stem() + .and_then(|s| s.to_str()) + .ok_or_else(|| LoadError::InvalidFileName { + path: object_path.clone(), + })? + .to_string(); + + // Read and parse the SQL file + let sql_content = fs::read_to_string(&object_path).map_err(|source| { + LoadError::FileReadFailed { + path: object_path.clone(), + source, + } + })?; + + let statements = parse_statements_with_context(&sql_content, object_path.clone())?; + + objects.push(DatabaseObject { + name: object_name, + path: object_path, + statements, + }); + } + + // Only add schema if it has objects or mod statements + if !objects.is_empty() || schema_mod_statements.is_some() { + schemas.insert( + schema_name.clone(), + Schema { + name: schema_name, + mod_statements: schema_mod_statements, + objects, + }, + ); + } + } + + // Only add database if it has schemas or mod statements + if !schemas.is_empty() || db_mod_statements.is_some() { + databases.insert( + db_name.clone(), + Database { + name: db_name, + mod_statements: db_mod_statements, + schemas, + }, + ); + } + } + + Ok(Project { + root: root.to_path_buf(), + databases, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + #[test] + fn test_load_project_basic_structure() { + // Create a temporary directory structure + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path(); + + // Create database/schema/object structure + let db_path = root.join("my_database"); + let schema_path = db_path.join("my_schema"); + fs::create_dir_all(&schema_path).unwrap(); + + // Write a simple SQL file + let sql_file = schema_path.join("my_table.sql"); + fs::write(&sql_file, "CREATE TABLE t (id INT);").unwrap(); + + // Load the project + let project = load_project(root).unwrap(); + + // Verify structure + assert_eq!(project.databases.len(), 1); + assert!(project.databases.contains_key("my_database")); + + let database = &project.databases["my_database"]; + assert_eq!(database.schemas.len(), 1); + assert!(database.schemas.contains_key("my_schema")); + + let schema = &database.schemas["my_schema"]; + assert_eq!(schema.objects.len(), 1); + assert_eq!(schema.objects[0].name, "my_table"); + assert_eq!(schema.objects[0].statements.len(), 1); + } + + #[test] + fn test_load_project_multiple_databases_and_schemas() { + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path(); + + // Create multiple databases with multiple schemas + for db in ["db1", "db2"] { + for schema in ["schema1", "schema2"] { + let schema_path = root.join(db).join(schema); + fs::create_dir_all(&schema_path).unwrap(); + + // Create a SQL file in each schema + let sql_file = schema_path.join("object.sql"); + fs::write(&sql_file, "CREATE TABLE t (id INT);").unwrap(); + } + } + + let project = load_project(root).unwrap(); + + assert_eq!(project.databases.len(), 2); + for db_name in ["db1", "db2"] { + let database = &project.databases[db_name]; + assert_eq!(database.schemas.len(), 2); + } + } + + #[test] + fn test_load_project_ignores_hidden_directories() { + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path(); + + // Create a hidden directory + let hidden_path = root.join(".hidden").join("schema"); + fs::create_dir_all(&hidden_path).unwrap(); + fs::write(hidden_path.join("object.sql"), "CREATE TABLE t (id INT);").unwrap(); + + // Create a normal directory + let normal_path = root.join("normal").join("schema"); + fs::create_dir_all(&normal_path).unwrap(); + fs::write(normal_path.join("object.sql"), "CREATE TABLE t (id INT);").unwrap(); + + let project = load_project(root).unwrap(); + + assert_eq!(project.databases.len(), 1); + assert!(project.databases.contains_key("normal")); + assert!(!project.databases.contains_key(".hidden")); + } + + #[test] + fn test_load_project_with_database_level_sql() { + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path(); + + // Create database with sibling .sql file + let db_path = root.join("my_database"); + let schema_path = db_path.join("my_schema"); + fs::create_dir_all(&schema_path).unwrap(); + + // Write database-level sibling .sql file (my_database.sql next to my_database/) + fs::write( + root.join("my_database.sql"), + "GRANT CREATE ON DATABASE my_database TO admin;", + ) + .unwrap(); + + // Write a regular object + fs::write(schema_path.join("object.sql"), "CREATE TABLE t (id INT);").unwrap(); + + let project = load_project(root).unwrap(); + + let database = &project.databases["my_database"]; + assert!(database.mod_statements.is_some()); + let mod_stmts = database.mod_statements.as_ref().unwrap(); + assert_eq!(mod_stmts.len(), 1); + } + + #[test] + fn test_load_project_with_schema_level_sql() { + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path(); + + // Create schema with sibling .sql file + let db_path = root.join("my_database"); + let schema_path = db_path.join("my_schema"); + fs::create_dir_all(&schema_path).unwrap(); + + // Write schema-level sibling .sql file (my_schema.sql next to my_schema/) + fs::write( + db_path.join("my_schema.sql"), + "GRANT USAGE ON SCHEMA my_schema TO analyst;", + ) + .unwrap(); + + // Write a regular object + fs::write(schema_path.join("object.sql"), "CREATE TABLE t (id INT);").unwrap(); + + let project = load_project(root).unwrap(); + + let schema = &project.databases["my_database"].schemas["my_schema"]; + assert!(schema.mod_statements.is_some()); + let mod_stmts = schema.mod_statements.as_ref().unwrap(); + assert_eq!(mod_stmts.len(), 1); + } + + #[test] + fn test_load_project_schema_sql_not_treated_as_object() { + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path(); + + let db_path = root.join("my_database"); + let schema_path = db_path.join("my_schema"); + fs::create_dir_all(&schema_path).unwrap(); + + // Write both schema-level .sql (sibling) and regular objects + fs::write( + db_path.join("my_schema.sql"), + "GRANT USAGE ON SCHEMA my_schema TO analyst;", + ) + .unwrap(); + fs::write(schema_path.join("table1.sql"), "CREATE TABLE t1 (id INT);").unwrap(); + fs::write(schema_path.join("table2.sql"), "CREATE TABLE t2 (id INT);").unwrap(); + + let project = load_project(root).unwrap(); + + let schema = &project.databases["my_database"].schemas["my_schema"]; + + // Schema-level .sql should be in mod_statements, not objects + assert!(schema.mod_statements.is_some()); + assert_eq!(schema.objects.len(), 2); + + // Verify that objects only include table1 and table2 + let object_names: Vec<_> = schema.objects.iter().map(|o| o.name.as_str()).collect(); + assert!(object_names.contains(&"table1")); + assert!(object_names.contains(&"table2")); + } + + #[test] + fn test_load_project_schema_with_only_schema_sql() { + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path(); + + let db_path = root.join("my_database"); + let schema_path = db_path.join("my_schema"); + fs::create_dir_all(&schema_path).unwrap(); + + // Write only schema-level .sql (sibling), no other objects + fs::write( + db_path.join("my_schema.sql"), + "GRANT USAGE ON SCHEMA my_schema TO analyst;", + ) + .unwrap(); + + let project = load_project(root).unwrap(); + + // Schema should still be loaded even with only schema-level .sql + assert!(project.databases.contains_key("my_database")); + let database = &project.databases["my_database"]; + assert!(database.schemas.contains_key("my_schema")); + + let schema = &database.schemas["my_schema"]; + assert!(schema.mod_statements.is_some()); + assert_eq!(schema.objects.len(), 0); + } + + #[test] + fn test_cluster_dependencies_through_full_pipeline() { + use crate::project::ast::Cluster; + use crate::project::planned; + use crate::project::typed; + + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path(); + + // Create database/schema structure with separate schemas for tables and views + let db_path = root.join("test_db"); + let tables_schema_path = db_path.join("tables"); + let views_schema_path = db_path.join("views"); + fs::create_dir_all(&tables_schema_path).unwrap(); + fs::create_dir_all(&views_schema_path).unwrap(); + + // Create a base table in tables schema + fs::write( + tables_schema_path.join("users.sql"), + "CREATE TABLE users (id INT, name TEXT);", + ) + .unwrap(); + + // Create materialized views with different clusters in views schema + fs::write( + views_schema_path.join("mv1.sql"), + "CREATE MATERIALIZED VIEW mv1 IN CLUSTER quickstart AS SELECT * FROM tables.users;", + ) + .unwrap(); + + fs::write( + views_schema_path.join("mv2.sql"), + "CREATE MATERIALIZED VIEW mv2 IN CLUSTER prod AS SELECT id FROM tables.users;", + ) + .unwrap(); + + fs::write( + views_schema_path.join("mv3.sql"), + "CREATE MATERIALIZED VIEW mv3 IN CLUSTER quickstart AS SELECT name FROM tables.users;", + ) + .unwrap(); + + // Create a regular view (no cluster) in views schema + fs::write( + views_schema_path.join("view1.sql"), + "CREATE VIEW view1 AS SELECT * FROM tables.users;", + ) + .unwrap(); + + // Load raw project + let raw_project = load_project(root).unwrap(); + assert_eq!(raw_project.databases.len(), 1); + + // Convert to typed + let typed_project = typed::Project::try_from(raw_project).unwrap(); + assert_eq!(typed_project.databases.len(), 1); + + // Convert to planned + let planned_project = planned::Project::from(typed_project); + + // Verify cluster dependencies + assert_eq!(planned_project.cluster_dependencies.len(), 2); + assert!( + planned_project + .cluster_dependencies + .contains(&Cluster::new("quickstart".to_string())) + ); + assert!( + planned_project + .cluster_dependencies + .contains(&Cluster::new("prod".to_string())) + ); + + // Verify objects exist + assert_eq!(planned_project.databases.len(), 1); + let database = &planned_project.databases[0]; + assert_eq!(database.name, "test_db"); + assert_eq!(database.schemas.len(), 2); // tables and views schemas + + // Find the schemas + let tables_schema = database + .schemas + .iter() + .find(|s| s.name == "tables") + .unwrap(); + let views_schema = database.schemas.iter().find(|s| s.name == "views").unwrap(); + + assert_eq!(tables_schema.objects.len(), 1); // 1 table + assert_eq!(views_schema.objects.len(), 4); // 3 MVs + 1 view + } +} diff --git a/src/mz-deploy/src/project/typed.rs b/src/mz-deploy/src/project/typed.rs new file mode 100644 index 0000000000000..ac4230ec5d900 --- /dev/null +++ b/src/mz-deploy/src/project/typed.rs @@ -0,0 +1,49 @@ +//! Typed representation for Materialize projects. +//! +//! This module provides a validated, type-safe representation of a Materialize project +//! structure. It transforms the raw parsed AST from the `raw` module into a semantically +//! validated typed representation that enforces structural constraints and relationships. +//! +//! # Transformation Flow +//! +//! ```text +//! File System -> raw::Project -> typed::Project (validated) +//! | | +//! raw::Database -> typed::Database +//! | | +//! raw::Schema -> typed::Schema +//! | | +//! raw::DatabaseObject -> typed::DatabaseObject +//! ``` +//! +//! # Validation Rules +//! +//! During transformation from raw to typed, the following validations are performed: +//! +//! - **Object Identity**: Each file must contain exactly one primary CREATE statement +//! (table, view, source, etc.), and the object name must match the file name. +//! +//! - **Path Consistency**: Qualified names in CREATE statements must match the directory +//! structure (e.g., `CREATE TABLE db.schema.table` in `db/schema/table.sql`). +//! +//! - **Reference Validation**: Supporting statements (indexes, grants, comments) must +//! reference the primary object defined in the same file. +//! +//! - **Type Consistency**: GRANT and COMMENT statements must use the correct object type +//! for the primary object. +//! +//! # Module Structure +//! +//! - [`types`]: Core type definitions (FullyQualifiedName, DatabaseObject, Schema, Database, Project) +//! - [`validation`]: Validation helper functions used during conversion +//! - [`conversion`]: TryFrom implementations for raw to typed conversion + +mod conversion; +mod types; +mod validation; + +// Re-export all public types from types module +pub use types::{Database, DatabaseObject, FullyQualifiedName, Project, Schema}; + +#[cfg(test)] +mod tests; diff --git a/src/mz-deploy/src/project/typed/conversion.rs b/src/mz-deploy/src/project/typed/conversion.rs new file mode 100644 index 0000000000000..aad83add0dc0e --- /dev/null +++ b/src/mz-deploy/src/project/typed/conversion.rs @@ -0,0 +1,346 @@ +//! Conversion implementations for typed representation. +//! +//! This module contains the TryFrom implementations that convert raw +//! parsed representations into validated typed representations. + +use super::super::ast::Statement; +use super::super::normalize::NormalizingVisitor; +use super::types::{Database, DatabaseObject, FullyQualifiedName, Project, Schema}; +use super::validation::{ + validate_comment_references, validate_database_mod_statements, validate_fqn_identifiers, + validate_grant_references, validate_ident, validate_index_clusters, validate_index_references, + validate_mv_cluster, validate_no_storage_and_computation_in_schema, + validate_schema_mod_statements, validate_sink_cluster, +}; +use crate::project::error::{ValidationError, ValidationErrorKind, ValidationErrors}; +use mz_sql_parser::ast::*; +use std::path::PathBuf; + +impl TryFrom for DatabaseObject { + type Error = ValidationErrors; + + /// Converts a raw database object into a validated HIR database object. + /// + /// # Validation + /// + /// This conversion performs the following validations: + /// - Ensures exactly one primary CREATE statement exists + /// - Validates that the object name in the statement matches the file name + /// - Validates that the qualified name matches the directory structure + /// - Validates that all indexes reference this object + /// - Validates that all grants reference this object and use the correct type + /// - Validates that all comments reference this object and use the correct type + /// + /// # Errors + /// + /// Returns all validation errors found (may contain multiple errors): + /// - No primary CREATE statement is found + /// - Multiple primary CREATE statements are found + /// - The object name doesn't match the file name + /// - Qualified names don't match the directory structure + /// - Supporting statements reference different objects + /// - Object types are inconsistent + /// - Unsupported statement types are encountered + fn try_from(value: super::super::raw::DatabaseObject) -> Result { + let mut errors = Vec::new(); + let mut main_stmt: Option = None; + let mut object_type: Option = None; + let mut indexes = Vec::new(); + let mut grants = Vec::new(); + let mut comments = Vec::new(); + let mut tests = Vec::new(); + + for stmt in value.statements { + match stmt { + mz_sql_parser::ast::Statement::CreateSink(s) => { + if main_stmt.is_some() { + errors.push(ValidationError::with_file( + ValidationErrorKind::MultipleMainStatements { + object_name: value.name.clone(), + }, + value.path.clone(), + )); + } else { + main_stmt = Some(Statement::CreateSink(s)); + object_type = Some(ObjectType::Sink) + } + } + mz_sql_parser::ast::Statement::CreateView(s) => { + if main_stmt.is_some() { + errors.push(ValidationError::with_file( + ValidationErrorKind::MultipleMainStatements { + object_name: value.name.clone(), + }, + value.path.clone(), + )); + } else { + main_stmt = Some(Statement::CreateView(s)); + object_type = Some(ObjectType::View) + } + } + mz_sql_parser::ast::Statement::CreateMaterializedView(s) => { + if main_stmt.is_some() { + errors.push(ValidationError::with_file( + ValidationErrorKind::MultipleMainStatements { + object_name: value.name.clone(), + }, + value.path.clone(), + )); + } else { + main_stmt = Some(Statement::CreateMaterializedView(s)); + object_type = Some(ObjectType::MaterializedView) + } + } + mz_sql_parser::ast::Statement::CreateTable(s) => { + if main_stmt.is_some() { + errors.push(ValidationError::with_file( + ValidationErrorKind::MultipleMainStatements { + object_name: value.name.clone(), + }, + value.path.clone(), + )); + } else { + main_stmt = Some(Statement::CreateTable(s)); + object_type = Some(ObjectType::Table) + } + } + mz_sql_parser::ast::Statement::CreateTableFromSource(s) => { + if main_stmt.is_some() { + errors.push(ValidationError::with_file( + ValidationErrorKind::MultipleMainStatements { + object_name: value.name.clone(), + }, + value.path.clone(), + )); + } else { + main_stmt = Some(Statement::CreateTableFromSource(s)); + object_type = Some(ObjectType::Table) + } + } + + // Supporting statements + mz_sql_parser::ast::Statement::CreateIndex(s) => { + indexes.push(s); + } + mz_sql_parser::ast::Statement::GrantPrivileges(s) => { + grants.push(s); + } + mz_sql_parser::ast::Statement::Comment(s) => { + comments.push(s); + } + + // Test statements are collected for later execution + mz_sql_parser::ast::Statement::ExecuteUnitTest(s) => { + tests.push(s); + } + + // Unsupported statements + other => { + errors.push(ValidationError::with_file( + ValidationErrorKind::UnsupportedStatement { + object_name: value.name.clone(), + statement_type: format!("{:?}", other), + }, + value.path.clone(), + )); + } + } + } + + // Check for main statement + if main_stmt.is_none() { + errors.push(ValidationError::with_file( + ValidationErrorKind::NoMainStatement { + object_name: value.name.clone(), + }, + value.path.clone(), + )); + } + + // Check for object type + if object_type.is_none() { + errors.push(ValidationError::with_file( + ValidationErrorKind::NoObjectType, + value.path.clone(), + )); + } + + // If we have fatal errors (no main statement or object type), return early + // since we can't continue validation without them + if !errors.is_empty() && (main_stmt.is_none() || object_type.is_none()) { + return Err(ValidationErrors::new(errors)); + } + + // Unwrap is safe here because we checked above + let stmt = main_stmt.unwrap(); + let obj_type = object_type.unwrap(); + + let fqn = match FullyQualifiedName::try_from((value.path.as_path(), value.name.as_str())) { + Ok(fqn) => fqn, + Err(e) => { + errors.push(e); + // Return early if we can't extract FQN + return Err(ValidationErrors::new(errors)); + } + }; + + // Get identifier from original statement before normalization + let main_ident = stmt.ident(); + + // Validate the original statement identifier against FQN + validate_ident(&stmt, &fqn, &mut errors); + + // Validate identifier format (lowercase, valid characters) + validate_fqn_identifiers(&fqn, &mut errors); + + // Normalize statement name and dependencies + let stmt = stmt.normalize_stmt(&fqn); + + // Normalize index, grant, and comment references to be fully qualified + let visitor = NormalizingVisitor::fully_qualifying(&fqn); + visitor.normalize_index_references(&mut indexes); + visitor.normalize_grant_references(&mut grants); + visitor.normalize_comment_references(&mut comments); + + // Validate cluster requirements + validate_index_clusters(&fqn, &indexes, &mut errors); + validate_mv_cluster(&fqn, &stmt, &mut errors); + validate_sink_cluster(&fqn, &stmt, &mut errors); + + validate_index_references(&fqn, &indexes, &main_ident, &mut errors); + validate_grant_references(&fqn, &grants, &main_ident, obj_type, &mut errors); + validate_comment_references(&fqn, &comments, &main_ident, &obj_type, &mut errors); + + if !errors.is_empty() { + return Err(ValidationErrors::new(errors)); + } + + Ok(DatabaseObject { + stmt, + indexes, + grants, + comments, + tests, + }) + } +} + +impl TryFrom for Schema { + type Error = ValidationErrors; + + /// Converts a raw schema into a validated HIR schema. + /// + /// Validates each database object in the schema. Collects all validation errors + /// from all objects and returns them together. + fn try_from(value: super::super::raw::Schema) -> Result { + let mut all_errors = Vec::new(); + let mut objects = Vec::new(); + + for obj in value.objects { + match DatabaseObject::try_from(obj) { + Ok(db_obj) => objects.push(db_obj), + Err(errs) => { + // Collect errors from this object + all_errors.extend(errs.errors); + } + } + } + + validate_no_storage_and_computation_in_schema(&value.name, &objects, &mut all_errors); + + if !all_errors.is_empty() { + return Err(ValidationErrors::new(all_errors)); + } + + Ok(Self { + name: value.name.clone(), + objects, + mod_statements: value.mod_statements, + }) + } +} + +impl TryFrom for Database { + type Error = ValidationErrors; + + /// Converts a raw database into a validated HIR database. + /// + /// Validates each schema in the database. Collects all validation errors + /// from all schemas and objects and returns them together. + fn try_from(value: super::super::raw::Database) -> Result { + let mut all_errors = Vec::new(); + let mut schemas = Vec::new(); + + // Validate database mod statements if they exist + if let Some(ref mod_stmts) = value.mod_statements { + let db_mod_path = PathBuf::from(format!("{}.sql", value.name)); + validate_database_mod_statements(&value.name, &db_mod_path, mod_stmts, &mut all_errors); + } + + for (schema_name, mut schema) in value.schemas { + // Validate schema mod statements if they exist (need database context) + if let Some(ref mut mod_stmts) = schema.mod_statements { + let schema_mod_path = PathBuf::from(format!("{}/{}.sql", value.name, schema_name)); + validate_schema_mod_statements( + &value.name, + &schema_name, + &schema_mod_path, + mod_stmts, + &mut all_errors, + ); + } + + match Schema::try_from(schema) { + Ok(s) => schemas.push(s), + Err(errs) => { + // Collect errors from this schema + all_errors.extend(errs.errors); + } + } + } + + if !all_errors.is_empty() { + return Err(ValidationErrors::new(all_errors)); + } + + Ok(Self { + name: value.name.clone(), + schemas, + mod_statements: value.mod_statements, + }) + } +} + +impl TryFrom for Project { + type Error = ValidationErrors; + + /// Converts a raw project into a fully validated HIR project. + /// + /// This performs a complete validation of the entire project tree. Collects + /// all validation errors from all databases, schemas, and objects and returns + /// them together, grouped by location. + /// + /// # Errors + /// + /// Returns all validation errors found across the entire project hierarchy. + fn try_from(value: super::super::raw::Project) -> Result { + let mut all_errors = Vec::new(); + let mut databases = Vec::new(); + + for (_, database) in value.databases { + match Database::try_from(database) { + Ok(db) => databases.push(db), + Err(errs) => { + // Collect errors from this database + all_errors.extend(errs.errors); + } + } + } + + if !all_errors.is_empty() { + return Err(ValidationErrors::new(all_errors)); + } + + Ok(Self { databases }) + } +} diff --git a/src/mz-deploy/src/project/typed/tests.rs b/src/mz-deploy/src/project/typed/tests.rs new file mode 100644 index 0000000000000..6f763c3d20fd3 --- /dev/null +++ b/src/mz-deploy/src/project/typed/tests.rs @@ -0,0 +1,2115 @@ +//! Tests for the typed representation module. + +use super::super::parser::parse_statements; +use super::super::raw; +use super::types::{Database, DatabaseObject, Schema}; +use mz_sql_parser::ast::*; +use std::path::PathBuf; +use tempfile::TempDir; + +fn create_raw_object(name: &str, path: PathBuf, sql: &str) -> raw::DatabaseObject { + let statements = parse_statements(vec![sql]).unwrap(); + raw::DatabaseObject { + name: name.to_string(), + path, + statements, + } +} + +#[test] +fn test_valid_simple_object_name() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let raw = create_raw_object("foo", path, "CREATE TABLE foo (id INT);"); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); +} + +#[test] +fn test_valid_qualified_schema() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let raw = create_raw_object("foo", path, "CREATE TABLE public.foo (id INT);"); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); +} + +#[test] +fn test_valid_fully_qualified() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let raw = create_raw_object("foo", path, "CREATE TABLE materialize.public.foo (id INT);"); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); +} + +#[test] +fn test_invalid_object_name_mismatch() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let raw = create_raw_object("foo", path, "CREATE TABLE bar (id INT);"); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("object name mismatch")); + assert!(err.to_string().contains("bar")); + assert!(err.to_string().contains("foo")); +} + +#[test] +fn test_invalid_schema_mismatch() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let raw = create_raw_object("foo", path, "CREATE TABLE private.foo (id INT);"); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_err()); + let err = result.unwrap_err(); + let err_str = err.to_string(); + // Check for the schema mismatch error content + assert!(err_str.contains("schema qualifier mismatch")); + assert!(err_str.contains("private")); + assert!(err_str.contains("public")); +} + +#[test] +fn test_invalid_database_mismatch() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let raw = create_raw_object("foo", path, "CREATE TABLE other_db.public.foo (id INT);"); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_err()); + let err = result.unwrap_err(); + let err_str = err.to_string(); + // Check for the database mismatch error content + assert!(err_str.contains("database qualifier mismatch")); + assert!(err_str.contains("other_db")); + assert!(err_str.contains("materialize")); +} + +#[test] +fn test_valid_with_indexes_and_grants() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let sql = r#" + CREATE TABLE foo (id INT); + CREATE INDEX idx_foo IN CLUSTER c ON foo (id); + GRANT SELECT ON foo TO user1; + COMMENT ON TABLE foo IS 'test table'; + "#; + + let raw = create_raw_object("foo", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + assert_eq!(obj.indexes.len(), 1); + assert_eq!(obj.grants.len(), 1); + assert_eq!(obj.comments.len(), 1); +} + +#[test] +fn test_invalid_index_on_different_object() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let sql = r#" + CREATE TABLE foo (id INT); + CREATE INDEX idx_bar ON bar (id); + "#; + + let raw = create_raw_object("foo", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("INDEX")); + assert!(err.to_string().contains("bar")); + assert!(err.to_string().contains("foo")); +} + +#[test] +fn test_invalid_grant_on_different_object() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let sql = r#" + CREATE TABLE foo (id INT); + GRANT SELECT ON bar TO user1; + "#; + + let raw = create_raw_object("foo", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("GRANT")); + assert!(err.to_string().contains("bar")); + assert!(err.to_string().contains("foo")); +} + +#[test] +fn test_invalid_comment_on_different_object() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let sql = r#" + CREATE TABLE foo (id INT); + COMMENT ON TABLE bar IS 'wrong table'; + "#; + + let raw = create_raw_object("foo", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("COMMENT")); + assert!(err.to_string().contains("bar")); + assert!(err.to_string().contains("foo")); +} + +#[test] +fn test_valid_column_comment() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let sql = r#" + CREATE TABLE foo (id INT, name TEXT); + COMMENT ON COLUMN foo.id IS 'primary key'; + COMMENT ON COLUMN foo.name IS 'user name'; + "#; + + let raw = create_raw_object("foo", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + assert_eq!(obj.comments.len(), 2); +} + +#[test] +fn test_invalid_column_comment_on_different_table() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let sql = r#" + CREATE TABLE foo (id INT); + COMMENT ON COLUMN bar.id IS 'wrong table'; + "#; + + let raw = create_raw_object("foo", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("column COMMENT")); + assert!(err.to_string().contains("bar")); + assert!(err.to_string().contains("foo")); +} + +#[test] +fn test_invalid_comment_type_mismatch() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/foo.sql"); + + let sql = r#" + CREATE TABLE foo (id INT); + COMMENT ON VIEW foo IS 'this is actually a table'; + "#; + + let raw = create_raw_object("foo", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("View")); + assert!(err.to_string().contains("Table")); +} + +// ===== Dependency Normalization Tests ===== +// These tests verify that all object references within statements +// are normalized to be fully qualified (database.schema.object). + +#[test] +fn test_view_dependency_normalization() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/active_users.sql"); + + // View references "users" without qualification + let sql = r#" + CREATE VIEW active_users AS + SELECT id, name FROM users WHERE active = true; + "#; + + let raw = create_raw_object("active_users", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + // Verify the statement name is normalized + let view_stmt = match obj.stmt { + super::super::ast::Statement::CreateView(ref s) => s, + _ => panic!("Expected CreateView statement"), + }; + + // Verify the view name is fully qualified + assert_eq!( + view_stmt.definition.name.to_string(), + "materialize.public.active_users" + ); + + // Verify the table reference in the query is normalized + // The query body should reference materialize.public.users + let query = &view_stmt.definition.query; + match &query.body { + SetExpr::Select(select) => { + assert_eq!(select.from.len(), 1); + match &select.from[0].relation { + TableFactor::Table { name, .. } => { + assert_eq!(name.name().to_string(), "materialize.public.users"); + } + _ => panic!("Expected table reference"), + } + } + _ => panic!("Expected SELECT statement"), + } +} + +#[test] +fn test_materialized_view_dependency_normalization() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir + .path() + .join("materialize/public/active_users_mv.sql"); + + // Materialized view references "users" without qualification + let sql = r#" + CREATE MATERIALIZED VIEW active_users_mv IN CLUSTER quickstart AS + SELECT * FROM users; + "#; + + let raw = create_raw_object("active_users_mv", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + // Verify the statement is a materialized view + let mv_stmt = match obj.stmt { + super::super::ast::Statement::CreateMaterializedView(ref s) => s, + _ => panic!("Expected CreateMaterializedView statement"), + }; + + // Verify the table reference is normalized + match &mv_stmt.query.body { + SetExpr::Select(select) => { + assert_eq!(select.from.len(), 1); + match &select.from[0].relation { + TableFactor::Table { name, .. } => { + assert_eq!(name.name().to_string(), "materialize.public.users"); + } + _ => panic!("Expected table reference"), + } + } + _ => panic!("Expected SELECT statement"), + } +} + +#[test] +fn test_view_with_join_normalization() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/user_orders.sql"); + + // View with JOIN - both table references unqualified + let sql = r#" + CREATE VIEW user_orders AS + SELECT u.id, u.name, o.order_id + FROM users u + JOIN orders o ON u.id = o.user_id; + "#; + + let raw = create_raw_object("user_orders", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + let view_stmt = match obj.stmt { + super::super::ast::Statement::CreateView(ref s) => s, + _ => panic!("Expected CreateView statement"), + }; + + // Verify both table references are normalized + match &view_stmt.definition.query.body { + SetExpr::Select(select) => { + assert_eq!(select.from.len(), 1); + let table_with_joins = &select.from[0]; + + // Check main table (users) + match &table_with_joins.relation { + TableFactor::Table { name, .. } => { + assert_eq!(name.name().to_string(), "materialize.public.users"); + } + _ => panic!("Expected table reference for users"), + } + + // Check joined table (orders) + assert_eq!(table_with_joins.joins.len(), 1); + match &table_with_joins.joins[0].relation { + TableFactor::Table { name, .. } => { + assert_eq!(name.name().to_string(), "materialize.public.orders"); + } + _ => panic!("Expected table reference for orders"), + } + } + _ => panic!("Expected SELECT statement"), + } +} + +#[test] +fn test_view_with_subquery_normalization() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/recent_orders.sql"); + + // View with subquery - both references unqualified + let sql = r#" + CREATE VIEW recent_orders AS + SELECT * FROM orders + WHERE user_id IN (SELECT id FROM users WHERE active = true); + "#; + + let raw = create_raw_object("recent_orders", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + let view_stmt = match obj.stmt { + super::super::ast::Statement::CreateView(ref s) => s, + _ => panic!("Expected CreateView statement"), + }; + + // Verify main query table reference is normalized + match &view_stmt.definition.query.body { + SetExpr::Select(select) => { + // Check main FROM clause (orders) + match &select.from[0].relation { + TableFactor::Table { name, .. } => { + assert_eq!(name.name().to_string(), "materialize.public.orders"); + } + _ => panic!("Expected table reference for orders"), + } + + // Verify subquery also has normalized table reference + // The WHERE clause contains the subquery, but verifying the exact + // structure is complex - the main assertion above covers the key point + } + _ => panic!("Expected SELECT statement"), + } +} + +#[test] +fn test_view_with_cte_normalization() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/user_summary.sql"); + + // View with CTE (WITH clause) - unqualified table references + let sql = r#" + CREATE VIEW user_summary AS + WITH active_users AS ( + SELECT id, name FROM users WHERE active = true + ) + SELECT * FROM active_users; + "#; + + let raw = create_raw_object("user_summary", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + let view_stmt = match obj.stmt { + super::super::ast::Statement::CreateView(ref s) => s, + _ => panic!("Expected CreateView statement"), + }; + + // Verify CTE references are normalized + let query = &view_stmt.definition.query; + match &query.ctes { + CteBlock::Simple(ctes) => { + assert_eq!(ctes.len(), 1); + // Verify the CTE query references the normalized table + match &ctes[0].query.body { + SetExpr::Select(select) => match &select.from[0].relation { + TableFactor::Table { name, .. } => { + assert_eq!(name.name().to_string(), "materialize.public.users"); + } + _ => panic!("Expected table reference in CTE"), + }, + _ => panic!("Expected SELECT in CTE"), + } + } + _ => panic!("Expected simple CTE block"), + } +} + +#[test] +fn test_table_from_source_normalization() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/kafka_table.sql"); + + // CREATE TABLE FROM SOURCE with unqualified source reference + let sql = r#" + CREATE TABLE kafka_table + FROM SOURCE kafka_source (REFERENCE public.kafka_table); + "#; + + let raw = create_raw_object("kafka_table", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + // Verify the source reference is normalized + let table_stmt = match obj.stmt { + super::super::ast::Statement::CreateTableFromSource(ref s) => s, + _ => panic!("Expected CreateTableFromSource statement"), + }; + + // Verify source name is fully qualified + assert_eq!( + table_stmt.source.to_string(), + "materialize.public.kafka_source" + ); +} + +#[test] +fn test_sink_normalization() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/kafka_sink.sql"); + + // CREATE SINK with unqualified FROM and connection references + let sql = r#" + CREATE SINK kafka_sink + IN CLUSTER quickstart + FROM users + INTO KAFKA CONNECTION kafka_conn (TOPIC 'users'); + "#; + + let raw = create_raw_object("kafka_sink", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + let sink_stmt = match obj.stmt { + super::super::ast::Statement::CreateSink(ref s) => s, + _ => panic!("Expected CreateSink statement"), + }; + + // Verify FROM reference is normalized + assert_eq!(sink_stmt.from.to_string(), "materialize.public.users"); + + // Verify connection reference is normalized + match &sink_stmt.connection { + CreateSinkConnection::Kafka { connection, .. } => { + assert_eq!(connection.to_string(), "materialize.public.kafka_conn"); + } + _ => panic!("Expected Kafka sink connection"), + } +} + +#[test] +fn test_index_normalization() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/users.sql"); + + // Table with index - index references unqualified table name + let sql = r#" + CREATE TABLE users (id INT, name TEXT); + CREATE INDEX users_id_idx IN CLUSTER c ON users (id); + "#; + + let raw = create_raw_object("users", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + // Verify index reference is normalized + assert_eq!(obj.indexes.len(), 1); + let index = &obj.indexes[0]; + assert_eq!(index.on_name.to_string(), "materialize.public.users"); +} + +#[test] +fn test_comment_normalization() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/users.sql"); + + // Table with comments - comment references unqualified table name + let sql = r#" + CREATE TABLE users (id INT, name TEXT); + COMMENT ON TABLE users IS 'User accounts'; + COMMENT ON COLUMN users.id IS 'Unique identifier'; + "#; + + let raw = create_raw_object("users", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + // Verify comment references are normalized + assert_eq!(obj.comments.len(), 2); + + // Check table comment + match &obj.comments[0].object { + CommentObjectType::Table { name } => { + assert_eq!(name.to_string(), "materialize.public.users"); + } + _ => panic!("Expected Table comment"), + } + + // Check column comment (should normalize the table reference) + match &obj.comments[1].object { + CommentObjectType::Column { name } => { + assert_eq!(name.relation.to_string(), "materialize.public.users"); + assert_eq!(name.column.to_string(), "id"); + } + _ => panic!("Expected Column comment"), + } +} + +#[test] +fn test_schema_qualified_dependency_normalization() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/active_users.sql"); + + // View with schema-qualified (but not fully qualified) table reference + let sql = r#" + CREATE VIEW active_users AS + SELECT * FROM public.users WHERE active = true; + "#; + + let raw = create_raw_object("active_users", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + let view_stmt = match obj.stmt { + super::super::ast::Statement::CreateView(ref s) => s, + _ => panic!("Expected CreateView statement"), + }; + + // Verify the schema-qualified reference is now fully qualified + match &view_stmt.definition.query.body { + SetExpr::Select(select) => { + match &select.from[0].relation { + TableFactor::Table { name, .. } => { + // Should prepend database name + assert_eq!(name.name().to_string(), "materialize.public.users"); + } + _ => panic!("Expected table reference"), + } + } + _ => panic!("Expected SELECT statement"), + } +} + +#[test] +fn test_already_fully_qualified_unchanged() { + let temp_dir = TempDir::new().unwrap(); + let path = temp_dir.path().join("materialize/public/cross_db_view.sql"); + + // View with already fully qualified table reference + let sql = r#" + CREATE VIEW cross_db_view AS + SELECT * FROM other_db.other_schema.other_table; + "#; + + let raw = create_raw_object("cross_db_view", path, sql); + let result = DatabaseObject::try_from(raw); + + assert!(result.is_ok()); + let obj = result.unwrap(); + + let view_stmt = match obj.stmt { + super::super::ast::Statement::CreateView(ref s) => s, + _ => panic!("Expected CreateView statement"), + }; + + // Verify already fully qualified names remain unchanged + match &view_stmt.definition.query.body { + SetExpr::Select(select) => { + match &select.from[0].relation { + TableFactor::Table { name, .. } => { + // Should remain as-is + assert_eq!(name.name().to_string(), "other_db.other_schema.other_table"); + } + _ => panic!("Expected table reference"), + } + } + _ => panic!("Expected SELECT statement"), + } +} + +#[test] +fn test_valid_database_mod_comment() { + use std::collections::BTreeMap; + + // Valid COMMENT ON DATABASE statement + let sql = "COMMENT ON DATABASE materialize IS 'Main database';"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: Some(statements), + schemas: BTreeMap::new(), + }; + + let result = Database::try_from(raw_db); + assert!(result.is_ok(), "Valid database comment should be accepted"); +} + +#[test] +fn test_valid_database_mod_grant() { + use std::collections::BTreeMap; + + // Valid GRANT ON DATABASE statement + let sql = "GRANT USAGE ON DATABASE materialize TO user1;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: Some(statements), + schemas: BTreeMap::new(), + }; + + let result = Database::try_from(raw_db); + assert!(result.is_ok(), "Valid database grant should be accepted"); +} + +#[test] +fn test_invalid_database_mod_wrong_statement_type() { + use std::collections::BTreeMap; + + // Invalid: CREATE TABLE in database mod file + let sql = "CREATE TABLE users (id INT);"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: Some(statements), + schemas: BTreeMap::new(), + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "CREATE TABLE in database mod file should be rejected" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("InvalidDatabaseModStatement"), + "Should report invalid statement type" + ); +} + +#[test] +fn test_invalid_database_mod_comment_wrong_target() { + use std::collections::BTreeMap; + + // Invalid: COMMENT ON SCHEMA in database mod file + let sql = "COMMENT ON SCHEMA public IS 'Public schema';"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: Some(statements), + schemas: BTreeMap::new(), + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "COMMENT ON SCHEMA in database mod file should be rejected" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("DatabaseModCommentTargetMismatch"), + "Should report wrong comment target" + ); +} + +#[test] +fn test_invalid_database_mod_comment_wrong_database() { + use std::collections::BTreeMap; + + // Invalid: COMMENT ON different database + let sql = "COMMENT ON DATABASE other_db IS 'Other database';"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: Some(statements), + schemas: BTreeMap::new(), + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "COMMENT ON wrong database should be rejected" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("DatabaseModCommentTargetMismatch"), + "Should report wrong database target" + ); +} + +#[test] +fn test_valid_schema_mod_comment() { + use std::collections::BTreeMap; + + // Valid COMMENT ON SCHEMA statement (unqualified, will be normalized) + let sql = "COMMENT ON SCHEMA public IS 'Public schema';"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_ok(), + "Valid schema comment should be accepted: {:?}", + result.err() + ); +} + +#[test] +fn test_valid_schema_mod_grant() { + use std::collections::BTreeMap; + + // Valid GRANT ON SCHEMA statement (unqualified, will be normalized) + let sql = "GRANT USAGE ON SCHEMA public TO user1;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_ok(), + "Valid schema grant should be accepted: {:?}", + result.err() + ); +} + +#[test] +fn test_invalid_schema_mod_wrong_statement_type() { + use std::collections::BTreeMap; + + // Invalid: CREATE VIEW in schema mod file + let sql = "CREATE VIEW v AS SELECT 1;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "CREATE VIEW in schema mod file should be rejected" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("InvalidSchemaModStatement"), + "Should report invalid statement type" + ); +} + +#[test] +fn test_invalid_schema_mod_comment_wrong_target() { + use std::collections::BTreeMap; + + // Invalid: COMMENT ON TABLE in schema mod file + let sql = "COMMENT ON TABLE users IS 'Users table';"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "COMMENT ON TABLE in schema mod file should be rejected" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("SchemaModCommentTargetMismatch"), + "Should report wrong comment target" + ); +} + +#[test] +fn test_invalid_schema_mod_comment_wrong_schema() { + use std::collections::BTreeMap; + + // Invalid: COMMENT ON different schema + let sql = "COMMENT ON SCHEMA other_schema IS 'Other schema';"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "COMMENT ON wrong schema should be rejected" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("SchemaModCommentTargetMismatch"), + "Should report wrong schema target" + ); +} + +#[test] +fn test_valid_database_mod_alter_default_privileges() { + use std::collections::BTreeMap; + + // Valid ALTER DEFAULT PRIVILEGES with IN DATABASE + let sql = "ALTER DEFAULT PRIVILEGES FOR ROLE user1 IN DATABASE materialize GRANT SELECT ON TABLES TO user2;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: Some(statements), + schemas: BTreeMap::new(), + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_ok(), + "Valid ALTER DEFAULT PRIVILEGES IN DATABASE should be accepted: {:?}", + result.err() + ); +} + +#[test] +fn test_invalid_database_mod_alter_default_privileges_no_scope() { + use std::collections::BTreeMap; + + // Invalid: Missing IN DATABASE + let sql = "ALTER DEFAULT PRIVILEGES FOR ROLE user1 GRANT SELECT ON TABLES TO user2;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: Some(statements), + schemas: BTreeMap::new(), + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "ALTER DEFAULT PRIVILEGES without scope should be rejected" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("AlterDefaultPrivilegesRequiresDatabaseScope"), + "Should require IN DATABASE" + ); +} + +#[test] +fn test_invalid_database_mod_alter_default_privileges_wrong_database() { + use std::collections::BTreeMap; + + // Invalid: Wrong database + let sql = "ALTER DEFAULT PRIVILEGES FOR ROLE user1 IN DATABASE other_db GRANT SELECT ON TABLES TO user2;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: Some(statements), + schemas: BTreeMap::new(), + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "ALTER DEFAULT PRIVILEGES with wrong database should be rejected" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("AlterDefaultPrivilegesDatabaseMismatch"), + "Should report wrong database" + ); +} + +#[test] +fn test_invalid_database_mod_alter_default_privileges_with_schema() { + use std::collections::BTreeMap; + + // Invalid: IN SCHEMA not allowed in database mod + let sql = + "ALTER DEFAULT PRIVILEGES FOR ROLE user1 IN SCHEMA public GRANT SELECT ON TABLES TO user2;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: Some(statements), + schemas: BTreeMap::new(), + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "ALTER DEFAULT PRIVILEGES with IN SCHEMA should be rejected in database mod" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("AlterDefaultPrivilegesSchemaNotAllowed"), + "Should reject IN SCHEMA" + ); +} + +#[test] +fn test_valid_schema_mod_alter_default_privileges_unqualified() { + use std::collections::BTreeMap; + + // Valid ALTER DEFAULT PRIVILEGES with unqualified schema + let sql = + "ALTER DEFAULT PRIVILEGES FOR ROLE user1 IN SCHEMA public GRANT SELECT ON TABLES TO user2;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_ok(), + "Valid ALTER DEFAULT PRIVILEGES with unqualified schema should be accepted: {:?}", + result.err() + ); +} + +#[test] +fn test_valid_schema_mod_alter_default_privileges_qualified() { + use std::collections::BTreeMap; + + // Valid ALTER DEFAULT PRIVILEGES with qualified schema + let sql = "ALTER DEFAULT PRIVILEGES FOR ROLE user1 IN SCHEMA materialize.public GRANT SELECT ON TABLES TO user2;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_ok(), + "Valid ALTER DEFAULT PRIVILEGES with qualified schema should be accepted: {:?}", + result.err() + ); +} + +#[test] +fn test_invalid_schema_mod_alter_default_privileges_no_scope() { + use std::collections::BTreeMap; + + // Invalid: Missing IN SCHEMA + let sql = "ALTER DEFAULT PRIVILEGES FOR ROLE user1 GRANT SELECT ON TABLES TO user2;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "ALTER DEFAULT PRIVILEGES without scope should be rejected in schema mod" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("AlterDefaultPrivilegesRequiresSchemaScope"), + "Should require IN SCHEMA" + ); +} + +#[test] +fn test_invalid_schema_mod_alter_default_privileges_with_database() { + use std::collections::BTreeMap; + + // Invalid: IN DATABASE not allowed in schema mod + let sql = "ALTER DEFAULT PRIVILEGES FOR ROLE user1 IN DATABASE materialize GRANT SELECT ON TABLES TO user2;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "ALTER DEFAULT PRIVILEGES with IN DATABASE should be rejected in schema mod" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("AlterDefaultPrivilegesDatabaseNotAllowed"), + "Should reject IN DATABASE" + ); +} + +#[test] +fn test_invalid_schema_mod_alter_default_privileges_wrong_schema() { + use std::collections::BTreeMap; + + // Invalid: Wrong schema + let sql = "ALTER DEFAULT PRIVILEGES FOR ROLE user1 IN SCHEMA other_schema GRANT SELECT ON TABLES TO user2;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_err(), + "ALTER DEFAULT PRIVILEGES with wrong schema should be rejected" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("AlterDefaultPrivilegesSchemaMismatch"), + "Should report wrong schema" + ); +} + +#[test] +fn test_schema_mod_comment_normalization() { + use std::collections::BTreeMap; + + // Test that unqualified schema name gets normalized to qualified + let sql = "COMMENT ON SCHEMA public IS 'Public schema';"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_ok(), + "Valid schema comment should be accepted: {:?}", + result.err() + ); + + let db = result.unwrap(); + let schema = db.schemas.iter().find(|s| s.name == "public").unwrap(); + + // Check that the schema name was normalized in the mod statement + if let Some(mod_stmts) = &schema.mod_statements { + assert_eq!(mod_stmts.len(), 1, "Should have one mod statement"); + let stmt_sql = format!("{}", mod_stmts[0]); + assert!( + stmt_sql.contains("materialize.public"), + "Schema name should be normalized to materialize.public, got: {}", + stmt_sql + ); + } else { + panic!("Schema should have mod statements"); + } +} + +#[test] +fn test_schema_mod_grant_normalization() { + use std::collections::BTreeMap; + + // Test that unqualified schema name gets normalized to qualified + let sql = "GRANT USAGE ON SCHEMA public TO user1;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_ok(), + "Valid schema grant should be accepted: {:?}", + result.err() + ); + + let db = result.unwrap(); + let schema = db.schemas.iter().find(|s| s.name == "public").unwrap(); + + // Check that the schema name was normalized in the mod statement + if let Some(mod_stmts) = &schema.mod_statements { + assert_eq!(mod_stmts.len(), 1, "Should have one mod statement"); + let stmt_sql = format!("{}", mod_stmts[0]); + assert!( + stmt_sql.contains("materialize.public"), + "Schema name should be normalized to materialize.public, got: {}", + stmt_sql + ); + } else { + panic!("Schema should have mod statements"); + } +} + +#[test] +fn test_schema_mod_alter_default_privileges_normalization() { + use std::collections::BTreeMap; + + // Test that unqualified schema name gets normalized to qualified + let sql = + "ALTER DEFAULT PRIVILEGES FOR ROLE user1 IN SCHEMA public GRANT SELECT ON TABLES TO user2;"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_ok(), + "Valid ALTER DEFAULT PRIVILEGES should be accepted: {:?}", + result.err() + ); + + let db = result.unwrap(); + let schema = db.schemas.iter().find(|s| s.name == "public").unwrap(); + + // Check that the schema name was normalized in the mod statement + if let Some(mod_stmts) = &schema.mod_statements { + assert_eq!(mod_stmts.len(), 1, "Should have one mod statement"); + let stmt_sql = format!("{}", mod_stmts[0]); + assert!( + stmt_sql.contains("materialize.public"), + "Schema name should be normalized to materialize.public, got: {}", + stmt_sql + ); + } else { + panic!("Schema should have mod statements"); + } +} + +#[test] +fn test_schema_mod_already_qualified_names() { + use std::collections::BTreeMap; + + // Test that already qualified names remain unchanged + let sql = "COMMENT ON SCHEMA materialize.public IS 'test';"; + let statements = parse_statements(vec![sql]).unwrap(); + + let raw_schema = raw::Schema { + name: "public".to_string(), + mod_statements: Some(statements), + objects: Vec::new(), + }; + + let mut schemas = BTreeMap::new(); + schemas.insert("public".to_string(), raw_schema); + + let raw_db = raw::Database { + name: "materialize".to_string(), + mod_statements: None, + schemas, + }; + + let result = Database::try_from(raw_db); + assert!( + result.is_ok(), + "Qualified schema comment should be accepted: {:?}", + result.err() + ); + + let db = result.unwrap(); + let schema = db.schemas.iter().find(|s| s.name == "public").unwrap(); + + // Check that the already qualified name remains qualified + if let Some(mod_stmts) = &schema.mod_statements { + assert_eq!(mod_stmts.len(), 1, "Should have one mod statement"); + let stmt_sql = format!("{}", mod_stmts[0]); + assert!( + stmt_sql.contains("materialize.public"), + "Schema name should remain materialize.public, got: {}", + stmt_sql + ); + } else { + panic!("Schema should have mod statements"); + } +} + +// Tests for schema segregation validation (storage vs computation objects) + +#[test] +fn test_schema_with_tables_and_views_fails() { + let table_sql = "CREATE TABLE users (id INT);"; + let view_sql = "CREATE VIEW active_users AS SELECT * FROM users;"; + + let table_stmts = parse_statements(vec![table_sql]).unwrap(); + let view_stmts = parse_statements(vec![view_sql]).unwrap(); + + let raw_table = raw::DatabaseObject { + name: "users".to_string(), + path: PathBuf::from("materialize/mixed/users.sql"), + statements: table_stmts, + }; + + let raw_view = raw::DatabaseObject { + name: "active_users".to_string(), + path: PathBuf::from("materialize/mixed/active_users.sql"), + statements: view_stmts, + }; + + let raw_schema = raw::Schema { + name: "mixed".to_string(), + objects: vec![raw_table, raw_view], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_err(), + "Schema with both tables and views should fail validation" + ); + + let err = result.unwrap_err(); + let err_msg = format!("{:?}", err); + assert!( + err_msg.contains("StorageAndComputationObjectsInSameSchema"), + "Should report storage and computation mix" + ); + assert!(err_msg.contains("mixed"), "Should mention schema name"); +} + +#[test] +fn test_schema_with_tables_and_materialized_views_fails() { + let table_sql = "CREATE TABLE orders (id INT);"; + let mv_sql = "CREATE MATERIALIZED VIEW order_summary AS SELECT COUNT(*) FROM orders;"; + + let table_stmts = parse_statements(vec![table_sql]).unwrap(); + let mv_stmts = parse_statements(vec![mv_sql]).unwrap(); + + let raw_table = raw::DatabaseObject { + name: "orders".to_string(), + path: PathBuf::from("materialize/mixed/orders.sql"), + statements: table_stmts, + }; + + let raw_mv = raw::DatabaseObject { + name: "order_summary".to_string(), + path: PathBuf::from("materialize/mixed/order_summary.sql"), + statements: mv_stmts, + }; + + let raw_schema = raw::Schema { + name: "mixed".to_string(), + objects: vec![raw_table, raw_mv], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_err(), + "Schema with both tables and materialized views should fail validation" + ); +} + +#[test] +fn test_schema_with_sinks_and_views_fails() { + let sink_sql = "CREATE SINK user_sink IN CLUSTER quickstart FROM users INTO KAFKA CONNECTION kafka_conn (TOPIC 'users');"; + let view_sql = "CREATE VIEW user_view AS SELECT * FROM users;"; + + let sink_stmts = parse_statements(vec![sink_sql]).unwrap(); + let view_stmts = parse_statements(vec![view_sql]).unwrap(); + + let raw_sink = raw::DatabaseObject { + name: "user_sink".to_string(), + path: PathBuf::from("materialize/mixed/user_sink.sql"), + statements: sink_stmts, + }; + + let raw_view = raw::DatabaseObject { + name: "user_view".to_string(), + path: PathBuf::from("materialize/mixed/user_view.sql"), + statements: view_stmts, + }; + + let raw_schema = raw::Schema { + name: "mixed".to_string(), + objects: vec![raw_sink, raw_view], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_err(), + "Schema with both sinks and views should fail validation" + ); +} + +#[test] +fn test_schema_with_sinks_and_materialized_views_fails() { + let sink_sql = "CREATE SINK order_sink IN CLUSTER quickstart FROM orders INTO KAFKA CONNECTION kafka_conn (TOPIC 'orders');"; + let mv_sql = + "CREATE MATERIALIZED VIEW order_mv IN CLUSTER quickstart AS SELECT COUNT(*) FROM orders;"; + + let sink_stmts = parse_statements(vec![sink_sql]).unwrap(); + let mv_stmts = parse_statements(vec![mv_sql]).unwrap(); + + let raw_sink = raw::DatabaseObject { + name: "order_sink".to_string(), + path: PathBuf::from("materialize/mixed/order_sink.sql"), + statements: sink_stmts, + }; + + let raw_mv = raw::DatabaseObject { + name: "order_mv".to_string(), + path: PathBuf::from("materialize/mixed/order_mv.sql"), + statements: mv_stmts, + }; + + let raw_schema = raw::Schema { + name: "mixed".to_string(), + objects: vec![raw_sink, raw_mv], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_err(), + "Schema with both sinks and materialized views should fail validation" + ); +} + +#[test] +fn test_schema_with_tables_sinks_and_views_fails() { + let table_sql = "CREATE TABLE users (id INT);"; + let sink_sql = "CREATE SINK user_sink IN CLUSTER quickstart FROM users INTO KAFKA CONNECTION kafka_conn (TOPIC 'users');"; + let view_sql = "CREATE VIEW user_view AS SELECT * FROM users;"; + + let table_stmts = parse_statements(vec![table_sql]).unwrap(); + let sink_stmts = parse_statements(vec![sink_sql]).unwrap(); + let view_stmts = parse_statements(vec![view_sql]).unwrap(); + + let raw_table = raw::DatabaseObject { + name: "users".to_string(), + path: PathBuf::from("materialize/mixed/users.sql"), + statements: table_stmts, + }; + + let raw_sink = raw::DatabaseObject { + name: "user_sink".to_string(), + path: PathBuf::from("materialize/mixed/user_sink.sql"), + statements: sink_stmts, + }; + + let raw_view = raw::DatabaseObject { + name: "user_view".to_string(), + path: PathBuf::from("materialize/mixed/user_view.sql"), + statements: view_stmts, + }; + + let raw_schema = raw::Schema { + name: "mixed".to_string(), + objects: vec![raw_table, raw_sink, raw_view], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_err(), + "Schema with tables, sinks, and views should fail validation" + ); +} + +#[test] +fn test_schema_with_only_tables_succeeds() { + let table1_sql = "CREATE TABLE users (id INT);"; + let table2_sql = "CREATE TABLE orders (id INT);"; + + let table1_stmts = parse_statements(vec![table1_sql]).unwrap(); + let table2_stmts = parse_statements(vec![table2_sql]).unwrap(); + + let raw_table1 = raw::DatabaseObject { + name: "users".to_string(), + path: PathBuf::from("materialize/tables/users.sql"), + statements: table1_stmts, + }; + + let raw_table2 = raw::DatabaseObject { + name: "orders".to_string(), + path: PathBuf::from("materialize/tables/orders.sql"), + statements: table2_stmts, + }; + + let raw_schema = raw::Schema { + name: "tables".to_string(), + objects: vec![raw_table1, raw_table2], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_ok(), + "Schema with only tables should pass validation" + ); +} + +#[test] +fn test_schema_with_tables_and_sinks_succeeds() { + let table_sql = "CREATE TABLE users (id INT);"; + let sink_sql = "CREATE SINK user_sink IN CLUSTER quickstart FROM users INTO KAFKA CONNECTION kafka_conn (TOPIC 'users');"; + + let table_stmts = parse_statements(vec![table_sql]).unwrap(); + let sink_stmts = parse_statements(vec![sink_sql]).unwrap(); + + let raw_table = raw::DatabaseObject { + name: "users".to_string(), + path: PathBuf::from("materialize/storage/users.sql"), + statements: table_stmts, + }; + + let raw_sink = raw::DatabaseObject { + name: "user_sink".to_string(), + path: PathBuf::from("materialize/storage/user_sink.sql"), + statements: sink_stmts, + }; + + let raw_schema = raw::Schema { + name: "storage".to_string(), + objects: vec![raw_table, raw_sink], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_ok(), + "Schema with tables and sinks should pass validation (both storage objects)" + ); +} + +#[test] +fn test_schema_with_only_views_succeeds() { + let view1_sql = "CREATE VIEW user_view AS SELECT * FROM users;"; + let view2_sql = "CREATE VIEW order_view AS SELECT * FROM orders;"; + + let view1_stmts = parse_statements(vec![view1_sql]).unwrap(); + let view2_stmts = parse_statements(vec![view2_sql]).unwrap(); + + let raw_view1 = raw::DatabaseObject { + name: "user_view".to_string(), + path: PathBuf::from("materialize/views/user_view.sql"), + statements: view1_stmts, + }; + + let raw_view2 = raw::DatabaseObject { + name: "order_view".to_string(), + path: PathBuf::from("materialize/views/order_view.sql"), + statements: view2_stmts, + }; + + let raw_schema = raw::Schema { + name: "views".to_string(), + objects: vec![raw_view1, raw_view2], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_ok(), + "Schema with only views should pass validation" + ); +} + +#[test] +fn test_schema_with_views_and_materialized_views_succeeds() { + let view_sql = "CREATE VIEW user_view AS SELECT * FROM users;"; + let mv_sql = "CREATE MATERIALIZED VIEW user_summary IN CLUSTER quickstart AS SELECT COUNT(*) FROM users;"; + + let view_stmts = parse_statements(vec![view_sql]).unwrap(); + let mv_stmts = parse_statements(vec![mv_sql]).unwrap(); + + let raw_view = raw::DatabaseObject { + name: "user_view".to_string(), + path: PathBuf::from("materialize/computation/user_view.sql"), + statements: view_stmts, + }; + + let raw_mv = raw::DatabaseObject { + name: "user_summary".to_string(), + path: PathBuf::from("materialize/computation/user_summary.sql"), + statements: mv_stmts, + }; + + let raw_schema = raw::Schema { + name: "computation".to_string(), + objects: vec![raw_view, raw_mv], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_ok(), + "Schema with views and materialized views should pass validation (both computation objects)" + ); +} + +#[test] +fn test_schema_with_table_from_source_and_view_fails() { + let table_sql = "CREATE TABLE users FROM SOURCE kafka_source (REFERENCE users);"; + let view_sql = "CREATE VIEW user_view AS SELECT * FROM users;"; + + let table_stmts = parse_statements(vec![table_sql]).unwrap(); + let view_stmts = parse_statements(vec![view_sql]).unwrap(); + + let raw_table = raw::DatabaseObject { + name: "users".to_string(), + path: PathBuf::from("materialize/mixed/users.sql"), + statements: table_stmts, + }; + + let raw_view = raw::DatabaseObject { + name: "user_view".to_string(), + path: PathBuf::from("materialize/mixed/user_view.sql"), + statements: view_stmts, + }; + + let raw_schema = raw::Schema { + name: "mixed".to_string(), + objects: vec![raw_table, raw_view], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_err(), + "Schema with CREATE TABLE FROM SOURCE and view should fail validation (table from source is a storage object)" + ); +} + +#[test] +fn test_sink_missing_cluster_fails() { + let sink_sql = + "CREATE SINK user_sink FROM users INTO KAFKA CONNECTION kafka_conn (TOPIC 'users');"; + let sink_stmts = parse_statements(vec![sink_sql]).unwrap(); + + let raw_sink = raw::DatabaseObject { + name: "user_sink".to_string(), + path: PathBuf::from("materialize/sinks/user_sink.sql"), + statements: sink_stmts, + }; + + let raw_schema = raw::Schema { + name: "sinks".to_string(), + objects: vec![raw_sink], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_err(), + "Sink without IN CLUSTER clause should fail validation" + ); + + // Verify it's the correct error type + if let Err(crate::project::error::ValidationErrors { errors }) = result { + assert_eq!(errors.len(), 1); + match &errors[0].kind { + crate::project::error::ValidationErrorKind::SinkMissingCluster { sink_name } => { + // Name is fully qualified after normalization + assert_eq!(sink_name, "materialize.sinks.user_sink"); + } + _ => panic!( + "Expected SinkMissingCluster error, got {:?}", + errors[0].kind + ), + } + } else { + panic!("Expected ValidationErrors"); + } +} + +#[test] +fn test_sink_with_cluster_succeeds() { + let sink_sql = "CREATE SINK user_sink IN CLUSTER quickstart FROM users INTO KAFKA CONNECTION kafka_conn (TOPIC 'users');"; + let sink_stmts = parse_statements(vec![sink_sql]).unwrap(); + + let raw_sink = raw::DatabaseObject { + name: "user_sink".to_string(), + path: PathBuf::from("materialize/sinks/user_sink.sql"), + statements: sink_stmts, + }; + + let raw_schema = raw::Schema { + name: "sinks".to_string(), + objects: vec![raw_sink], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_ok(), + "Sink with IN CLUSTER clause should pass validation" + ); +} + +#[test] +fn test_materialized_view_missing_cluster_fails() { + let mv_sql = "CREATE MATERIALIZED VIEW user_summary AS SELECT COUNT(*) FROM users;"; + let mv_stmts = parse_statements(vec![mv_sql]).unwrap(); + + let raw_mv = raw::DatabaseObject { + name: "user_summary".to_string(), + path: PathBuf::from("materialize/views/user_summary.sql"), + statements: mv_stmts, + }; + + let raw_schema = raw::Schema { + name: "views".to_string(), + objects: vec![raw_mv], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_err(), + "Materialized view without IN CLUSTER clause should fail validation" + ); + + // Verify it's the correct error type + if let Err(crate::project::error::ValidationErrors { errors }) = result { + assert_eq!(errors.len(), 1); + match &errors[0].kind { + crate::project::error::ValidationErrorKind::MaterializedViewMissingCluster { + view_name, + } => { + // Name is fully qualified after normalization + assert_eq!(view_name, "materialize.views.user_summary"); + } + _ => panic!( + "Expected MaterializedViewMissingCluster error, got {:?}", + errors[0].kind + ), + } + } else { + panic!("Expected ValidationErrors"); + } +} + +#[test] +fn test_index_missing_cluster_fails() { + let table_sql = "CREATE TABLE users (id INT);"; + let index_sql = "CREATE INDEX idx ON users (id);"; + + let stmts = parse_statements(vec![table_sql, index_sql]).unwrap(); + + let raw_table = raw::DatabaseObject { + name: "users".to_string(), + path: PathBuf::from("materialize/tables/users.sql"), + statements: stmts, + }; + + let raw_schema = raw::Schema { + name: "tables".to_string(), + objects: vec![raw_table], + mod_statements: None, + }; + + let result = Schema::try_from(raw_schema); + assert!( + result.is_err(), + "Index without IN CLUSTER clause should fail validation" + ); + + // Verify it's the correct error type + if let Err(crate::project::error::ValidationErrors { errors }) = result { + assert_eq!(errors.len(), 1); + match &errors[0].kind { + crate::project::error::ValidationErrorKind::IndexMissingCluster { index_name } => { + assert_eq!(index_name, "idx"); + } + _ => panic!( + "Expected IndexMissingCluster error, got {:?}", + errors[0].kind + ), + } + } else { + panic!("Expected ValidationErrors"); + } +} + +// ============================================================================ +// Identifier format validation tests +// ============================================================================ + +mod identifier_validation { + use super::super::validation::{IdentifierKind, validate_identifier_format}; + + #[test] + fn test_valid_lowercase_identifier() { + assert!(validate_identifier_format("users", IdentifierKind::Object).is_ok()); + assert!(validate_identifier_format("my_table", IdentifierKind::Object).is_ok()); + assert!(validate_identifier_format("user123", IdentifierKind::Object).is_ok()); + assert!(validate_identifier_format("_private", IdentifierKind::Object).is_ok()); + assert!(validate_identifier_format("price$", IdentifierKind::Object).is_ok()); + assert!(validate_identifier_format("a1b2c3", IdentifierKind::Object).is_ok()); + } + + #[test] + fn test_valid_unicode_identifiers() { + // Letters with diacritical marks + assert!(validate_identifier_format("café", IdentifierKind::Object).is_ok()); + assert!(validate_identifier_format("naïve", IdentifierKind::Object).is_ok()); + // Non-Latin letters + assert!(validate_identifier_format("日本語", IdentifierKind::Object).is_ok()); + assert!(validate_identifier_format("данные", IdentifierKind::Object).is_ok()); + } + + #[test] + fn test_invalid_uppercase_start() { + let result = validate_identifier_format("Users", IdentifierKind::Object); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("uppercase"), + "Error should mention uppercase: {}", + err + ); + assert!( + err.contains("position 1"), + "Error should mention position 1: {}", + err + ); + } + + #[test] + fn test_invalid_uppercase_middle() { + let result = validate_identifier_format("myTable", IdentifierKind::Object); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("uppercase"), + "Error should mention uppercase: {}", + err + ); + assert!( + err.contains("'T'"), + "Error should mention the character: {}", + err + ); + } + + #[test] + fn test_invalid_digit_start() { + let result = validate_identifier_format("123table", IdentifierKind::Object); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("starts with digit"), + "Error should mention starting with digit: {}", + err + ); + } + + #[test] + fn test_invalid_special_char_start() { + let result = validate_identifier_format("$price", IdentifierKind::Object); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("starts with invalid character"), + "Error should mention invalid start: {}", + err + ); + } + + #[test] + fn test_invalid_hyphen() { + let result = validate_identifier_format("my-table", IdentifierKind::Object); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("invalid character"), + "Error should mention invalid character: {}", + err + ); + assert!(err.contains("'-'"), "Error should mention hyphen: {}", err); + } + + #[test] + fn test_invalid_space() { + let result = validate_identifier_format("my table", IdentifierKind::Object); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("invalid character"), + "Error should mention invalid character: {}", + err + ); + } + + #[test] + fn test_empty_identifier() { + let result = validate_identifier_format("", IdentifierKind::Object); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("cannot be empty"), + "Error should mention empty: {}", + err + ); + } + + #[test] + fn test_all_identifier_kinds() { + // Test that error messages correctly use the identifier kind + let db_err = validate_identifier_format("MyDB", IdentifierKind::Database).unwrap_err(); + assert!( + db_err.contains("database name"), + "Should mention database: {}", + db_err + ); + + let schema_err = + validate_identifier_format("MySchema", IdentifierKind::Schema).unwrap_err(); + assert!( + schema_err.contains("schema name"), + "Should mention schema: {}", + schema_err + ); + + let obj_err = validate_identifier_format("MyObject", IdentifierKind::Object).unwrap_err(); + assert!( + obj_err.contains("object name"), + "Should mention object: {}", + obj_err + ); + + let cluster_err = + validate_identifier_format("MyCluster", IdentifierKind::Cluster).unwrap_err(); + assert!( + cluster_err.contains("cluster name"), + "Should mention cluster: {}", + cluster_err + ); + } + + #[test] + fn test_all_uppercase() { + let result = validate_identifier_format("MY_TABLE", IdentifierKind::Object); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + err.contains("uppercase"), + "Error should mention uppercase: {}", + err + ); + } + + #[test] + fn test_valid_underscore_only_start() { + // Single underscore followed by valid chars + assert!(validate_identifier_format("_", IdentifierKind::Object).is_ok()); + assert!(validate_identifier_format("__", IdentifierKind::Object).is_ok()); + assert!(validate_identifier_format("___test", IdentifierKind::Object).is_ok()); + } + + #[test] + fn test_valid_dollar_sign_in_middle() { + assert!(validate_identifier_format("price$usd", IdentifierKind::Object).is_ok()); + assert!(validate_identifier_format("a$b$c", IdentifierKind::Object).is_ok()); + } +} diff --git a/src/mz-deploy/src/project/typed/types.rs b/src/mz-deploy/src/project/typed/types.rs new file mode 100644 index 0000000000000..30a08d1cb3f97 --- /dev/null +++ b/src/mz-deploy/src/project/typed/types.rs @@ -0,0 +1,419 @@ +//! Core type definitions for the typed representation. +//! +//! This module contains the primary types used to represent a validated +//! Materialize project structure. + +use super::super::ast::Statement; +use super::super::normalize::{ClusterTransformer, NameTransformer, NormalizingVisitor}; +use super::super::object_id::ObjectId; +use crate::project::error::ValidationError; +use crate::project::error::ValidationErrorKind; +use mz_sql_parser::ast::*; +use std::collections::BTreeSet; +use std::path::PathBuf; + +/// Fully qualified name parsed from file path structure. +/// +/// Represents the canonical `database.schema.object` name based on directory structure. +/// File path format: `///.sql` +/// +/// This struct is created during typed validation and is used to: +/// - Normalize statement names to be fully qualified +/// - Validate that SQL statement names match the directory structure +/// - Provide a consistent FQN for error messages and validation +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FullyQualifiedName { + id: ObjectId, + pub path: PathBuf, + item_name: UnresolvedItemName, +} + +impl FullyQualifiedName { + /// Get the database name. + #[inline] + pub fn database(&self) -> &str { + self.id.database() + } + + /// Get the schema name. + #[inline] + pub fn schema(&self) -> &str { + self.id.schema() + } + + /// Get the object name. + #[inline] + pub fn object(&self) -> &str { + self.id.object() + } + + /// Get the ObjectId. + pub fn object_id(&self) -> &ObjectId { + &self.id + } + + /// Get the UnresolvedItemName for updating statement names. + pub fn to_item_name(&self) -> UnresolvedItemName { + self.item_name.clone() + } +} + +impl From for FullyQualifiedName { + fn from(value: UnresolvedItemName) -> Self { + let id = ObjectId::new( + value.0[0].to_string(), + value.0[1].to_string(), + value.0[2].to_string(), + ); + Self { + id, + path: PathBuf::new(), + item_name: value, + } + } +} + +impl TryFrom<(&std::path::Path, &str)> for FullyQualifiedName { + type Error = ValidationError; + + /// Extract fully qualified name from file path. + /// + /// Path format: `///.sql` + /// Returns error if path structure is invalid. + fn try_from(value: (&std::path::Path, &str)) -> Result { + let (path, object_name) = value; + + // Extract schema (parent directory) + let schema = path + .parent() + .and_then(|p| p.file_name()) + .and_then(|s| s.to_str()) + .ok_or_else(|| { + ValidationError::with_file( + ValidationErrorKind::SchemaExtractionFailed, + path.to_path_buf(), + ) + })?; + + // Extract database (parent of schema directory) + let database = path + .parent() + .and_then(|p| p.parent()) + .and_then(|p| p.file_name()) + .and_then(|s| s.to_str()) + .ok_or_else(|| { + ValidationError::with_file( + ValidationErrorKind::DatabaseExtractionFailed, + path.to_path_buf(), + ) + })?; + + // Create Ident instances for each component + let database_ident = Ident::new(database).map_err(|e| { + ValidationError::with_file( + ValidationErrorKind::InvalidIdentifier { + name: database.to_string(), + reason: e.to_string(), + }, + path.to_path_buf(), + ) + })?; + + let schema_ident = Ident::new(schema).map_err(|e| { + ValidationError::with_file( + ValidationErrorKind::InvalidIdentifier { + name: schema.to_string(), + reason: e.to_string(), + }, + path.to_path_buf(), + ) + })?; + + let object_ident = Ident::new(object_name).map_err(|e| { + ValidationError::with_file( + ValidationErrorKind::InvalidIdentifier { + name: object_name.to_string(), + reason: e.to_string(), + }, + path.to_path_buf(), + ) + })?; + + // Create the UnresolvedItemName + let item_name = UnresolvedItemName(vec![database_ident, schema_ident, object_ident]); + + // Create ObjectId + let id = ObjectId::new( + database.to_string(), + schema.to_string(), + object_name.to_string(), + ); + + Ok(FullyQualifiedName { + id, + path: path.to_path_buf(), + item_name, + }) + } +} + +/// The primary CREATE statement for a database object. +impl Statement { + /// Normalizes the statement name to be fully qualified. + pub fn normalize_stmt(self, fqn: &FullyQualifiedName) -> Self { + let visitor = NormalizingVisitor::fully_qualifying(fqn); + self.normalize_name_with(&visitor, &fqn.to_item_name()) + .normalize_dependencies_with(&visitor) + } + + /// Normalizes the statement name using a custom transformer. + pub fn normalize_name_with( + self, + visitor: &NormalizingVisitor, + item_name: &UnresolvedItemName, + ) -> Self { + let transformed_name = visitor.transformer().transform_name(item_name); + + match self { + Statement::CreateSink(mut s) => { + s.name = Some(transformed_name); + Statement::CreateSink(s) + } + Statement::CreateView(mut s) => { + s.definition.name = transformed_name; + Statement::CreateView(s) + } + Statement::CreateMaterializedView(mut s) => { + s.name = transformed_name; + Statement::CreateMaterializedView(s) + } + Statement::CreateTable(mut s) => { + s.name = transformed_name; + Statement::CreateTable(s) + } + Statement::CreateTableFromSource(mut s) => { + s.name = transformed_name; + Statement::CreateTableFromSource(s) + } + } + } + + /// Normalizes all object references within the statement using a custom transformer. + pub fn normalize_dependencies_with( + self, + visitor: &NormalizingVisitor, + ) -> Self { + match self { + Statement::CreateView(mut s) => { + visitor.normalize_query(&mut s.definition.query); + Statement::CreateView(s) + } + Statement::CreateMaterializedView(mut s) => { + visitor.normalize_query(&mut s.query); + Statement::CreateMaterializedView(s) + } + Statement::CreateTableFromSource(mut s) => { + visitor.normalize_raw_item_name(&mut s.source); + Statement::CreateTableFromSource(s) + } + Statement::CreateSink(mut s) => { + visitor.normalize_raw_item_name(&mut s.from); + visitor.normalize_sink_connection(&mut s.connection); + Statement::CreateSink(s) + } + // These statements don't have dependencies on other database objects + Statement::CreateTable(_) => self, + } + } + + /// Normalize cluster references using a ClusterTransformer. + /// + /// This method is separate from normalize_dependencies_with because cluster + /// normalization is only needed for staging environments, not regular deployments. + pub fn normalize_cluster_with( + self, + visitor: &NormalizingVisitor, + ) -> Self { + match self { + Statement::CreateMaterializedView(mut s) => { + visitor.normalize_cluster_name(&mut s.in_cluster); + Statement::CreateMaterializedView(s) + } + Statement::CreateSink(mut s) => { + visitor.normalize_cluster_name(&mut s.in_cluster); + Statement::CreateSink(s) + } + // These statements don't have cluster references + _ => self, + } + } +} + +/// A validated database object with its primary statement and supporting declarations. +/// +/// Represents a single database object (table, view, source, etc.) that has been +/// validated to ensure: +/// - Exactly one primary CREATE statement exists +/// - The object name matches the file name +/// - All supporting statements (indexes, grants, comments) reference this object +/// - Object types are consistent across statements +/// +/// # Structure +/// +/// Each `DatabaseObject` is loaded from a single `.sql` file and contains: +/// - One primary statement (CREATE TABLE, CREATE VIEW, etc.) +/// - Zero or more CREATE INDEX statements (for indexable objects) +/// - Zero or more GRANT statements +/// - Zero or more COMMENT statements +/// +/// # Example +/// +/// For a file `my_schema/users.sql`: +/// ```sql +/// CREATE TABLE users ( +/// id INT, +/// name TEXT +/// ); +/// +/// CREATE INDEX users_id_idx ON users (id); +/// GRANT SELECT ON users TO analyst_role; +/// COMMENT ON TABLE users IS 'User account information'; +/// ``` +/// +/// This would be validated and represented as a single `DatabaseObject`. +#[derive(Debug)] +pub struct DatabaseObject { + /// The primary CREATE statement for this object + pub stmt: Statement, + /// Indexes defined on this object + pub indexes: Vec>, + /// Grant statements for this object + pub grants: Vec>, + /// Comment statements for this object or its columns + pub comments: Vec>, + /// Unit tests for this object + pub tests: Vec>, +} + +impl DatabaseObject { + pub fn clusters(&self) -> BTreeSet { + let mut cluster_set = BTreeSet::new(); + if let Statement::CreateMaterializedView(mv) = &self.stmt { + if let Some(RawClusterName::Unresolved(cluster_name)) = &mv.in_cluster { + cluster_set.insert(cluster_name.to_string()); + } + } + + if let Statement::CreateSink(sink) = &self.stmt { + if let Some(RawClusterName::Unresolved(cluster_name)) = &sink.in_cluster { + cluster_set.insert(cluster_name.to_string()); + } + } + + for index in &self.indexes { + if let Some(RawClusterName::Unresolved(cluster_name)) = &index.in_cluster { + cluster_set.insert(cluster_name.to_string()); + } + } + cluster_set + } + + /// Convert the statement to a Query for type checking purposes. + pub fn to_query(&self) -> Option> { + match &self.stmt { + Statement::CreateView(stmt) => Some(stmt.definition.query.clone()), + Statement::CreateMaterializedView(stmt) => Some(stmt.query.clone()), + Statement::CreateTable(_) => None, + _ => None, + } + } +} + +/// A validated schema containing multiple database objects. +/// +/// Represents a schema directory in the project structure. Each schema contains +/// multiple database objects (tables, views, sources, etc.) that have all been +/// validated. +/// +/// # Directory Mapping +/// +/// A schema corresponds to a directory in the project structure: +/// ```text +/// database_name/ +/// schema_name/ <- Schema +/// object1.sql <- DatabaseObject +/// object2.sql <- DatabaseObject +/// mod.sql (optional, not represented in HIR) +/// ``` +/// +/// Note: `mod.sql` files are parsed in the raw representation but not carried +/// forward to the HIR, as they typically contain schema-level setup statements. +#[derive(Debug)] +pub struct Schema { + /// The name of the schema (directory name) + pub name: String, + /// All validated database objects in this schema + pub objects: Vec, + /// Optional module-level statements (from schema.sql file) + pub mod_statements: Option>>, +} + +/// A validated database containing multiple schemas. +/// +/// Represents a database directory in the project structure. Each database contains +/// multiple schemas, each of which contains multiple database objects. +/// +/// # Directory Mapping +/// +/// A database corresponds to a directory in the project structure: +/// ```text +/// project_root/ +/// database_name/ <- Database +/// schema1/ <- Schema +/// object1.sql +/// schema2/ <- Schema +/// object2.sql +/// mod.sql (optional, not represented in HIR) +/// ``` +#[derive(Debug)] +pub struct Database { + /// The name of the database (directory name) + pub name: String, + /// All validated schemas in this database + pub schemas: Vec, + /// Optional module-level statements (from database.sql file) + pub mod_statements: Option>>, +} + +/// A fully validated Materialize project. +/// +/// Represents the complete validated project structure, containing all databases, +/// schemas, and objects. This is the top-level typed project that should be used after +/// successfully loading and validating a project from the file system. +/// +/// # Usage +/// +/// ```no_run +/// use mz_deploy::project::raw; +/// use mz_deploy::project::typed::Project; +/// +/// // Load raw project from file system +/// let raw_project = raw::load_project("./my_project").unwrap(); +/// +/// // Convert to validated typed project +/// let typed_project = Project::try_from(raw_project).unwrap(); +/// ``` +/// +/// # Validation Guarantees +/// +/// A successfully created `Project` guarantees: +/// - All object names match their file names +/// - All qualified names match the directory structure +/// - All supporting statements reference the correct objects +/// - All object types are consistent across statements +/// - No unsupported statement types are present +#[derive(Debug)] +pub struct Project { + /// All validated databases in this project + pub databases: Vec, +} diff --git a/src/mz-deploy/src/project/typed/validation.rs b/src/mz-deploy/src/project/typed/validation.rs new file mode 100644 index 0000000000000..c2212e9f6ac4c --- /dev/null +++ b/src/mz-deploy/src/project/typed/validation.rs @@ -0,0 +1,1205 @@ +//! Validation functions for typed representation. +//! +//! This module contains helper functions used during the conversion from +//! raw to typed representation to validate various constraints. + +use super::super::ast::DatabaseIdent; +use super::super::ast::Statement; +use super::types::{DatabaseObject, FullyQualifiedName}; +use crate::project::error::{ValidationError, ValidationErrorKind}; +use mz_sql_parser::ast::*; +use std::path::PathBuf; + +/// The type of identifier being validated (for error messages). +#[derive(Debug, Clone, Copy)] +pub enum IdentifierKind { + Database, + Schema, + Object, + Cluster, +} + +impl std::fmt::Display for IdentifierKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Database => write!(f, "database"), + Self::Schema => write!(f, "schema"), + Self::Object => write!(f, "object"), + Self::Cluster => write!(f, "cluster"), + } + } +} + +/// Validates an identifier follows naming rules. +/// +/// # Rules +/// +/// - **Start Character**: Must begin with a lowercase letter (a-z, including letters with +/// diacritical marks and non-Latin letters) or an underscore (_). +/// - **Subsequent Characters**: Can include lowercase letters, digits (0-9), underscores (_), +/// or dollar signs ($). +/// - **Case**: All characters must be lowercase. +/// +/// # Arguments +/// +/// * `name` - The identifier name to validate +/// * `kind` - The type of identifier (for error messages) +/// +/// # Returns +/// +/// * `Ok(())` if the identifier is valid +/// * `Err(String)` with a descriptive error message if invalid +/// +/// # Examples +/// +/// ```text +/// Valid identifiers: +/// users, _temp, my_table, café, 日本語, user123, price$ +/// +/// Invalid identifiers: +/// Users (uppercase) +/// 123table (starts with digit) +/// my-table (contains hyphen) +/// MY_TABLE (uppercase) +/// ``` +pub fn validate_identifier_format(name: &str, kind: IdentifierKind) -> Result<(), String> { + if name.is_empty() { + return Err(format!("{} name cannot be empty", kind)); + } + + let mut chars = name.chars().peekable(); + + // Check first character + if let Some(first) = chars.next() { + if first.is_uppercase() { + return Err(format!( + "{} name '{}' contains uppercase character '{}' at position 1. \ + Identifiers must be lowercase.", + kind, name, first + )); + } + + if first.is_ascii_digit() { + return Err(format!( + "{} name '{}' starts with digit '{}'. \ + Identifiers must start with a letter or underscore.", + kind, name, first + )); + } + + // First char must be a letter (including unicode letters) or underscore + if !first.is_alphabetic() && first != '_' { + return Err(format!( + "{} name '{}' starts with invalid character '{}'. \ + Identifiers must start with a letter or underscore.", + kind, name, first + )); + } + } + + // Check subsequent characters + for (pos, ch) in chars.enumerate() { + let position = pos + 2; // +2 because we already consumed first char and positions are 1-indexed + + if ch.is_uppercase() { + return Err(format!( + "{} name '{}' contains uppercase character '{}' at position {}. \ + Identifiers must be lowercase.", + kind, name, ch, position + )); + } + + // Valid subsequent chars: letters (lowercase), digits, underscore, dollar sign + let is_valid = ch.is_alphabetic() || ch.is_ascii_digit() || ch == '_' || ch == '$'; + + if !is_valid { + return Err(format!( + "{} name '{}' contains invalid character '{}' at position {}. \ + Identifiers can only contain letters, digits, underscores, and dollar signs.", + kind, name, ch, position + )); + } + } + + Ok(()) +} + +/// Validates all identifiers in a FullyQualifiedName (database, schema, object). +/// +/// # Arguments +/// +/// * `fqn` - The fully qualified name to validate +/// * `path` - The file path (for error reporting) +/// * `errors` - Vector to collect validation errors +pub fn validate_fqn_identifiers(fqn: &FullyQualifiedName, errors: &mut Vec) { + // Validate database name + if let Err(reason) = validate_identifier_format(fqn.database(), IdentifierKind::Database) { + errors.push(ValidationError::with_file( + ValidationErrorKind::InvalidIdentifier { + name: fqn.database().to_string(), + reason, + }, + fqn.path.clone(), + )); + } + + // Validate schema name + if let Err(reason) = validate_identifier_format(fqn.schema(), IdentifierKind::Schema) { + errors.push(ValidationError::with_file( + ValidationErrorKind::InvalidIdentifier { + name: fqn.schema().to_string(), + reason, + }, + fqn.path.clone(), + )); + } + + // Validate object name + if let Err(reason) = validate_identifier_format(fqn.object(), IdentifierKind::Object) { + errors.push(ValidationError::with_file( + ValidationErrorKind::InvalidIdentifier { + name: fqn.object().to_string(), + reason, + }, + fqn.path.clone(), + )); + } +} + +/// Validates a cluster name follows naming rules. +/// +/// # Arguments +/// +/// * `cluster_name` - The cluster name to validate +/// * `path` - The file path (for error reporting) +/// +/// # Returns +/// +/// * `Ok(())` if valid +/// * `Err(ValidationError)` if invalid +pub fn validate_cluster_name(cluster_name: &str, path: &PathBuf) -> Result<(), ValidationError> { + validate_identifier_format(cluster_name, IdentifierKind::Cluster).map_err(|reason| { + ValidationError::with_file( + ValidationErrorKind::InvalidIdentifier { + name: cluster_name.to_string(), + reason, + }, + path.clone(), + ) + }) +} + +/// Validates that the statement's identifier matches the expected file path structure. +/// +/// Ensures that the object name in the CREATE statement matches the file name, and +/// that any schema/database qualifiers match the directory structure. +/// +/// # Validation Rules +/// +/// - The object name must match the file name (without `.sql` extension) +/// - If the statement includes a schema qualifier, it must match the parent directory name +/// - If the statement includes a database qualifier, it must match the grandparent directory name +/// +/// # Examples +/// +/// Valid mappings: +/// ```text +/// materialize/public/users.sql -> CREATE TABLE users (...) +/// materialize/public/users.sql -> CREATE TABLE public.users (...) +/// materialize/public/users.sql -> CREATE TABLE materialize.public.users (...) +/// ``` +/// +/// Invalid mappings: +/// ```text +/// materialize/public/users.sql -> CREATE TABLE customers (...) X name mismatch +/// materialize/public/users.sql -> CREATE TABLE private.users (...) X schema mismatch +/// materialize/public/users.sql -> CREATE TABLE other.public.users (...) X database mismatch +/// ``` +pub(super) fn validate_ident( + stmt: &Statement, + fqn: &FullyQualifiedName, + errors: &mut Vec, +) { + let ident = stmt.ident(); + + // The object name in the statement must match the file name + if ident.object != fqn.object() { + errors.push(ValidationError::with_file( + ValidationErrorKind::ObjectNameMismatch { + declared: ident.object.clone(), + expected: fqn.object().to_string(), + }, + fqn.path.clone(), + )); + } + + // If the statement includes a schema qualifier, validate it matches the path-derived schema + if let Some(ref stmt_schema) = ident.schema + && stmt_schema != fqn.schema() + { + errors.push(ValidationError::with_file( + ValidationErrorKind::SchemaMismatch { + declared: stmt_schema.clone(), + expected: fqn.schema().to_string(), + }, + fqn.path.clone(), + )); + } + + // If the statement includes a database qualifier, validate it matches the path-derived database + if let Some(ref stmt_database) = ident.database + && stmt_database != fqn.database() + { + errors.push(ValidationError::with_file( + ValidationErrorKind::DatabaseMismatch { + declared: stmt_database.clone(), + expected: fqn.database().to_string(), + }, + fqn.path.clone(), + )); + } +} + +/// Validates that a COMMENT statement targets the correct object with the correct type. +/// +/// Ensures that: +/// 1. The comment references the main object defined in the file +/// 2. The object type specified in the COMMENT matches the actual object type +/// +/// # Object Type Matching +/// +/// Materialize allows comments on various object types (TABLE, VIEW, MATERIALIZED VIEW, etc.). +/// The COMMENT statement must use the correct object type keyword matching the actual object. +/// +/// # Examples +/// +/// Valid: +/// ```sql +/// CREATE TABLE users (...); +/// COMMENT ON TABLE users IS 'user data'; +/// ``` +/// +/// Invalid: +/// ```sql +/// CREATE TABLE users (...); +/// COMMENT ON VIEW users IS 'user data'; -- type mismatch +/// COMMENT ON TABLE customers IS 'data'; -- wrong object +/// ``` +fn validate_comment_target( + comment_name: &RawItemName, + main_ident: &DatabaseIdent, + main_obj_type: &ObjectType, + comment_obj_type: ObjectType, + fqn: &FullyQualifiedName, + comment_sql: &str, + errors: &mut Vec, +) { + let comment_target: DatabaseIdent = comment_name.name().clone().into(); + + // Check that the comment references the main object + if !comment_target.matches(main_ident) { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::CommentReferenceMismatch { + referenced: comment_target.object, + expected: main_ident.object.clone(), + }, + fqn.path.clone(), + comment_sql.to_string(), + )); + } + + // Check that the comment type matches the object type + if *main_obj_type != comment_obj_type { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::CommentTypeMismatch { + comment_type: format!("{:?}", comment_obj_type), + object_type: format!("{:?}", main_obj_type), + }, + fqn.path.clone(), + comment_sql.to_string(), + )); + } +} + +/// Extract the target name and ObjectType from a CommentObjectType. +/// +/// Returns Some((name, object_type)) for supported comment types, None for unsupported types. +/// Column comments are handled separately since they reference the parent table. +fn comment_object_to_target(obj: &CommentObjectType) -> Option<(&RawItemName, ObjectType)> { + match obj { + CommentObjectType::Table { name } => Some((name, ObjectType::Table)), + CommentObjectType::View { name } => Some((name, ObjectType::View)), + CommentObjectType::MaterializedView { name } => Some((name, ObjectType::MaterializedView)), + CommentObjectType::Source { name } => Some((name, ObjectType::Source)), + CommentObjectType::Sink { name } => Some((name, ObjectType::Sink)), + CommentObjectType::Connection { name } => Some((name, ObjectType::Connection)), + CommentObjectType::Secret { name } => Some((name, ObjectType::Secret)), + // Column, Index, Func, Type, Role, Database, Schema, Cluster, etc. are not supported + // or handled separately + _ => None, + } +} + +/// Get a human-readable name for a CommentObjectType variant. +fn comment_object_type_name(obj: &CommentObjectType) -> &'static str { + match obj { + CommentObjectType::Table { .. } => "TABLE", + CommentObjectType::View { .. } => "VIEW", + CommentObjectType::MaterializedView { .. } => "MATERIALIZED VIEW", + CommentObjectType::Source { .. } => "SOURCE", + CommentObjectType::Sink { .. } => "SINK", + CommentObjectType::Connection { .. } => "CONNECTION", + CommentObjectType::Secret { .. } => "SECRET", + CommentObjectType::Schema { .. } => "SCHEMA", + CommentObjectType::Database { .. } => "DATABASE", + CommentObjectType::Column { .. } => "COLUMN", + CommentObjectType::Index { .. } => "INDEX", + CommentObjectType::Func { .. } => "FUNCTION", + CommentObjectType::Type { .. } => "TYPE", + CommentObjectType::Role { .. } => "ROLE", + CommentObjectType::Cluster { .. } => "CLUSTER", + CommentObjectType::ClusterReplica { .. } => "CLUSTER REPLICA", + CommentObjectType::ContinualTask { .. } => "CONTINUAL TASK", + CommentObjectType::NetworkPolicy { .. } => "NETWORK POLICY", + } +} + +/// Validates that all COMMENT statements in a file reference the main object. +/// +/// Processes all COMMENT statements and ensures they target either: +/// - The main object defined in the file, OR +/// - A column of the main object +/// +/// This validation ensures that each object file is self-contained and doesn't +/// reference other objects. +/// +/// # Supported Comment Types +/// +/// - `COMMENT ON TABLE` - for tables and materialized views +/// - `COMMENT ON VIEW` - for views +/// - `COMMENT ON MATERIALIZED VIEW` - for materialized views +/// - `COMMENT ON SOURCE` - for sources and subsources +/// - `COMMENT ON SINK` - for sinks +/// - `COMMENT ON CONNECTION` - for connections +/// - `COMMENT ON SECRET` - for secrets +/// - `COMMENT ON COLUMN` - for columns of the main object +/// +/// # Errors +/// +/// Returns an error if: +/// - A comment references a different object +/// - The comment type doesn't match the object type +/// - An unsupported comment type is used +pub(super) fn validate_comment_references( + fqn: &FullyQualifiedName, + comments: &[CommentStatement], + main_ident: &DatabaseIdent, + obj_type: &ObjectType, + errors: &mut Vec, +) { + for comment in comments.iter() { + let comment_sql = format!("{};", comment); + + // Handle column comments specially (they reference the parent table) + if let CommentObjectType::Column { name } = &comment.object { + let column_parent: DatabaseIdent = name.relation.name().clone().into(); + if !column_parent.matches(main_ident) { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::ColumnCommentReferenceMismatch { + referenced: column_parent.object, + expected: main_ident.object.clone(), + }, + fqn.path.clone(), + comment_sql, + )); + } + continue; + } + + // Handle supported object types + if let Some((name, comment_obj_type)) = comment_object_to_target(&comment.object) { + validate_comment_target( + name, + main_ident, + obj_type, + comment_obj_type, + fqn, + &comment_sql, + errors, + ); + continue; + } + + // Unsupported comment type + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::UnsupportedCommentType, + fqn.path.clone(), + comment_sql, + )); + } +} + +/// Validates that all indexes specify a cluster. +/// +/// Indexes in Materialize must specify which cluster they run on using the IN CLUSTER clause. +/// This ensures deterministic deployment and avoids implicit cluster selection. +/// +/// # Example +/// +/// Valid: +/// ```sql +/// CREATE INDEX idx ON table (col) IN CLUSTER quickstart; +/// ``` +/// +/// Invalid: +/// ```sql +/// CREATE INDEX idx ON table (col); -- missing cluster +/// ``` +pub(super) fn validate_index_clusters( + fqn: &FullyQualifiedName, + indexes: &[CreateIndexStatement], + errors: &mut Vec, +) { + for index in indexes.iter() { + match &index.in_cluster { + None => { + let index_sql = format!("{};", index); + let index_name = index + .name + .as_ref() + .map(|n| n.to_string()) + .unwrap_or_else(|| "".to_string()); + + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::IndexMissingCluster { index_name }, + fqn.path.clone(), + index_sql, + )); + } + Some(cluster) => { + // Validate cluster name format + let cluster_name = cluster.to_string(); + if let Err(e) = validate_cluster_name(&cluster_name, &fqn.path) { + errors.push(e); + } + } + } + } +} + +/// Validates that a materialized view specifies a cluster. +/// +/// Materialized views in Materialize must specify which cluster they run on using the IN CLUSTER clause. +/// This ensures deterministic deployment and avoids implicit cluster selection. +/// +/// # Example +/// +/// Valid: +/// ```sql +/// CREATE MATERIALIZED VIEW mv IN CLUSTER quickstart AS SELECT ...; +/// ``` +/// +/// Invalid: +/// ```sql +/// CREATE MATERIALIZED VIEW mv AS SELECT ...; -- missing cluster +/// ``` +pub(super) fn validate_mv_cluster( + fqn: &FullyQualifiedName, + stmt: &Statement, + errors: &mut Vec, +) { + if let Statement::CreateMaterializedView(mv) = stmt { + match &mv.in_cluster { + None => { + let mv_sql = format!("{};", mv); + let view_name = mv.name.to_string(); + + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::MaterializedViewMissingCluster { view_name }, + fqn.path.clone(), + mv_sql, + )); + } + Some(cluster) => { + // Validate cluster name format + let cluster_name = cluster.to_string(); + if let Err(e) = validate_cluster_name(&cluster_name, &fqn.path) { + errors.push(e); + } + } + } + } +} + +/// Validates that a sink specifies a cluster. +/// +/// Sinks in Materialize must specify which cluster they run on using the IN CLUSTER clause. +/// This ensures deterministic deployment and avoids implicit cluster selection. +/// +/// # Example +/// +/// Valid: +/// ```sql +/// CREATE SINK sink IN CLUSTER quickstart FROM table INTO ...; +/// ``` +/// +/// Invalid: +/// ```sql +/// CREATE SINK sink FROM table INTO ...; -- missing cluster +/// ``` +pub(super) fn validate_sink_cluster( + fqn: &FullyQualifiedName, + stmt: &Statement, + errors: &mut Vec, +) { + if let Statement::CreateSink(sink) = stmt { + match &sink.in_cluster { + None => { + let sink_sql = format!("{};", sink); + let sink_name = sink + .name + .as_ref() + .map(|n| n.to_string()) + .unwrap_or_else(|| "".to_string()); + + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::SinkMissingCluster { sink_name }, + fqn.path.clone(), + sink_sql, + )); + } + Some(cluster) => { + // Validate cluster name format + let cluster_name = cluster.to_string(); + if let Err(e) = validate_cluster_name(&cluster_name, &fqn.path) { + errors.push(e); + } + } + } + } +} + +/// Validates that all CREATE INDEX statements reference the main object. +/// +/// Ensures that every index defined in the file is created on the object +/// defined in the same file. This maintains the principle that each file +/// is self-contained. +/// +/// # Example +/// +/// Valid: +/// ```sql +/// CREATE TABLE users (id INT, name TEXT); +/// CREATE INDEX users_id_idx ON users (id); +/// ``` +/// +/// Invalid: +/// ```sql +/// CREATE TABLE users (id INT, name TEXT); +/// CREATE INDEX orders_id_idx ON orders (id); -- wrong object +/// ``` +pub(super) fn validate_index_references( + fqn: &FullyQualifiedName, + indexes: &[CreateIndexStatement], + main_ident: &DatabaseIdent, + errors: &mut Vec, +) { + for index in indexes.iter() { + let on: DatabaseIdent = index.on_name.name().clone().into(); + if !on.matches(main_ident) { + let index_sql = format!("{};", index); + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::IndexReferenceMismatch { + referenced: on.object, + expected: main_ident.object.clone(), + }, + fqn.path.clone(), + index_sql, + )); + } + } +} + +/// Validates that all GRANT statements reference the main object with the correct type. +/// +/// Ensures that: +/// 1. Every grant targets the object defined in the same file +/// 2. The object type in the GRANT matches the actual object type +/// 3. Only supported grant types are used (no SYSTEM grants, no ALL TABLES IN SCHEMA) +/// +/// # Object Type Handling +/// +/// Materialize's GRANT syntax has specific requirements: +/// - Tables, views, materialized views, and sources all use `GRANT ... ON TABLE` +/// - Other objects (connections, secrets, sinks) use their specific type +/// +/// # Supported Grants +/// +/// - `GRANT ... ON TABLE` - for tables, views, materialized views, sources +/// - `GRANT ... ON CONNECTION` - for connections +/// - `GRANT ... ON SECRET` - for secrets +/// - `GRANT ... ON SINK` - for sinks +/// +/// # Example +/// +/// Valid: +/// ```sql +/// CREATE TABLE users (...); +/// GRANT SELECT ON TABLE users TO analyst_role; +/// ``` +/// +/// Invalid: +/// ```sql +/// CREATE TABLE users (...); +/// GRANT SELECT ON orders TO analyst_role; -- wrong object +/// ``` +pub(super) fn validate_grant_references( + fqn: &FullyQualifiedName, + grants: &[GrantPrivilegesStatement], + main_ident: &DatabaseIdent, + main_object_type: ObjectType, + errors: &mut Vec, +) { + for grant in grants.iter() { + let grant_sql = format!("{};", grant); + + match &grant.target { + GrantTargetSpecification::Object { + object_type, + object_spec_inner, + .. + } => match object_spec_inner { + GrantTargetSpecificationInner::Objects { names } => { + check_grant_object_type( + fqn, + main_object_type, + *object_type, + &grant_sql, + errors, + ); + + for obj in names { + match obj { + UnresolvedObjectName::Item(item_name) => { + let grant_target: DatabaseIdent = item_name.clone().into(); + if !grant_target.matches(main_ident) { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::GrantReferenceMismatch { + referenced: grant_target.object, + expected: main_ident.object.clone(), + }, + fqn.path.clone(), + grant_sql.clone(), + )); + } + } + _ => { + // skip + } + } + } + } + _ => { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::GrantMustTargetObject, + fqn.path.clone(), + grant_sql.clone(), + )); + } + }, + _ => { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::SystemGrantUnsupported, + fqn.path.clone(), + grant_sql, + )); + } + } + } +} + +/// Validates that the GRANT statement uses the correct object type for the target object. +/// +/// Materialize has specific rules about which object types can be used in GRANT statements: +/// +/// # Type Mapping Rules +/// +/// - **Tables, Views, Materialized Views, Sources**: Must use `GRANT ... ON TABLE` +/// - This is because Materialize treats these objects similarly for privilege management +/// - **Connections, Secrets, Sinks**: Must use their specific type +/// - e.g., `GRANT ... ON CONNECTION`, `GRANT ... ON SECRET` +/// +/// # Examples +/// +/// ```sql +/// -- For a table +/// GRANT SELECT ON TABLE users TO role; +/// +/// -- For a materialized view +/// GRANT SELECT ON TABLE my_mv TO role; +/// GRANT SELECT ON MATERIALIZED VIEW my_mv TO role; -- invalid +/// +/// -- For a connection +/// GRANT USAGE ON CONNECTION kafka_conn TO role; +/// ``` +fn check_grant_object_type( + fqn: &FullyQualifiedName, + main_object_type: ObjectType, + grant_object_type: ObjectType, + grant_sql: &str, + errors: &mut Vec, +) { + if matches!( + main_object_type, + ObjectType::Table | ObjectType::Source | ObjectType::View | ObjectType::MaterializedView + ) { + if grant_object_type != ObjectType::Table { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::GrantTypeMismatch { + grant_type: format!("{}", grant_object_type), + expected_type: "TABLE".to_string(), + }, + fqn.path.clone(), + grant_sql.to_string(), + )); + } + } else if grant_object_type != main_object_type { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::GrantTypeMismatch { + grant_type: format!("{}", grant_object_type), + expected_type: format!("{}", main_object_type), + }, + fqn.path.clone(), + grant_sql.to_string(), + )); + } +} + +/// Validate database mod file statements. +/// +/// Database mod files can only contain: +/// - COMMENT ON DATABASE (targeting the database itself) +/// - GRANT ON DATABASE (targeting the database itself) +/// - ALTER DEFAULT PRIVILEGES +pub(super) fn validate_database_mod_statements( + database_name: &str, + database_path: &std::path::Path, + statements: &[mz_sql_parser::ast::Statement], + errors: &mut Vec, +) { + use mz_sql_parser::ast::Statement as MzStatement; + + for stmt in statements { + let stmt_sql = format!("{};", stmt); + + match stmt { + MzStatement::Comment(comment_stmt) => { + // Must be COMMENT ON DATABASE targeting this database + match &comment_stmt.object { + CommentObjectType::Database { name } => { + // Check if it targets this database + let target_db = name.to_string(); + if target_db != database_name { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::DatabaseModCommentTargetMismatch { + target: format!("DATABASE {}", target_db), + database_name: database_name.to_string(), + }, + database_path.to_path_buf(), + stmt_sql, + )); + } + } + _ => { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::DatabaseModCommentTargetMismatch { + target: comment_object_type_name(&comment_stmt.object).to_string(), + database_name: database_name.to_string(), + }, + database_path.to_path_buf(), + stmt_sql, + )); + } + } + } + MzStatement::GrantPrivileges(grant_stmt) => { + // Must be GRANT ON DATABASE targeting this database + match &grant_stmt.target { + GrantTargetSpecification::Object { + object_type, + object_spec_inner, + .. + } => { + if object_type != &ObjectType::Database { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::DatabaseModGrantTargetMismatch { + target: format!("{}", object_type), + database_name: database_name.to_string(), + }, + database_path.to_path_buf(), + stmt_sql.clone(), + )); + } + + // Check that it targets this specific database + if let GrantTargetSpecificationInner::Objects { names } = object_spec_inner + { + for name in names { + if let UnresolvedObjectName::Item(item_name) = name { + let target_db = item_name.to_string(); + if target_db != database_name { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::DatabaseModGrantTargetMismatch { + target: format!("DATABASE {}", target_db), + database_name: database_name.to_string(), + }, + database_path.to_path_buf(), + stmt_sql.clone(), + )); + } + } + } + } + } + _ => { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::DatabaseModGrantTargetMismatch { + target: "SYSTEM or other".to_string(), + database_name: database_name.to_string(), + }, + database_path.to_path_buf(), + stmt_sql, + )); + } + } + } + MzStatement::AlterDefaultPrivileges(alter_stmt) => { + // Must specify IN DATABASE targeting this database + match &alter_stmt.target_objects { + GrantTargetAllSpecification::AllDatabases { databases } => { + // Validate all databases reference the current database + for db_name in databases { + let db_str = db_name.to_string(); + if db_str != database_name { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::AlterDefaultPrivilegesDatabaseMismatch { + referenced: db_str, + expected: database_name.to_string(), + }, + database_path.to_path_buf(), + stmt_sql.clone(), + )); + } + } + } + GrantTargetAllSpecification::AllSchemas { .. } => { + // Reject: IN SCHEMA not allowed in database mod files + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::AlterDefaultPrivilegesSchemaNotAllowed { + database_name: database_name.to_string(), + }, + database_path.to_path_buf(), + stmt_sql.clone(), + )); + } + GrantTargetAllSpecification::All => { + // Reject: Must specify IN DATABASE + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::AlterDefaultPrivilegesRequiresDatabaseScope { + database_name: database_name.to_string(), + }, + database_path.to_path_buf(), + stmt_sql.clone(), + )); + } + } + } + _ => { + // Reject all other statement types + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::InvalidDatabaseModStatement { + statement_type: format!("{:?}", stmt) + .split('(') + .next() + .unwrap_or("unknown") + .to_string(), + database_name: database_name.to_string(), + }, + database_path.to_path_buf(), + stmt_sql, + )); + } + } + } +} + +/// Validate schema mod file statements and normalize names. +/// +/// Schema mod files can only contain: +/// - COMMENT ON SCHEMA (targeting the schema itself) +/// - GRANT ON SCHEMA (targeting the schema itself) +/// - ALTER DEFAULT PRIVILEGES +/// +/// Names are normalized to include the database qualifier. +pub(super) fn validate_schema_mod_statements( + database_name: &str, + schema_name: &str, + schema_path: &std::path::Path, + statements: &mut [mz_sql_parser::ast::Statement], + errors: &mut Vec, +) { + use mz_sql_parser::ast::Statement as MzStatement; + + // Helper function to normalize unqualified schema names + let normalize_schema_name = |name: &mut UnresolvedSchemaName| { + if name.0.len() == 1 { + // Unqualified: prepend database to make database.schema + let schema = name.0[0].clone(); + let database = Ident::new(database_name).expect("valid database identifier"); + name.0 = vec![database, schema]; + } + // Already qualified or invalid - leave as-is + }; + + for stmt in statements.iter_mut() { + let stmt_sql = format!("{};", stmt); + + match stmt { + MzStatement::Comment(comment_stmt) => { + // Must be COMMENT ON SCHEMA targeting this schema + match &mut comment_stmt.object { + CommentObjectType::Schema { name } => { + // Check if it targets this schema (can be qualified or unqualified) + let target_schema = name.to_string(); + let is_match = target_schema == schema_name + || target_schema == format!("{}.{}", database_name, schema_name); + + if !is_match { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::SchemaModCommentTargetMismatch { + target: format!("SCHEMA {}", target_schema), + schema_name: format!("{}.{}", database_name, schema_name), + }, + schema_path.to_path_buf(), + stmt_sql, + )); + } else { + // Normalize the schema name to be fully qualified + normalize_schema_name(name); + } + } + _ => { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::SchemaModCommentTargetMismatch { + target: comment_object_type_name(&comment_stmt.object).to_string(), + schema_name: format!("{}.{}", database_name, schema_name), + }, + schema_path.to_path_buf(), + stmt_sql, + )); + } + } + } + MzStatement::GrantPrivileges(grant_stmt) => { + // Must be GRANT ON SCHEMA targeting this schema + match &mut grant_stmt.target { + GrantTargetSpecification::Object { + object_type, + object_spec_inner, + .. + } => { + if object_type != &ObjectType::Schema { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::SchemaModGrantTargetMismatch { + target: format!("{}", object_type), + schema_name: format!("{}.{}", database_name, schema_name), + }, + schema_path.to_path_buf(), + stmt_sql.clone(), + )); + } + + // Check that it targets this specific schema + if let GrantTargetSpecificationInner::Objects { names } = object_spec_inner + { + for name in names { + if let UnresolvedObjectName::Schema(schema_name_obj) = name { + let target_schema = schema_name_obj.to_string(); + let is_match = target_schema == schema_name + || target_schema + == format!("{}.{}", database_name, schema_name); + + if !is_match { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::SchemaModGrantTargetMismatch { + target: format!("SCHEMA {}", target_schema), + schema_name: format!( + "{}.{}", + database_name, schema_name + ), + }, + schema_path.to_path_buf(), + stmt_sql.clone(), + )); + } else { + // Normalize the schema name to be fully qualified + normalize_schema_name(schema_name_obj); + } + } + } + } + } + _ => { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::SchemaModGrantTargetMismatch { + target: "SYSTEM or other".to_string(), + schema_name: format!("{}.{}", database_name, schema_name), + }, + schema_path.to_path_buf(), + stmt_sql, + )); + } + } + } + MzStatement::AlterDefaultPrivileges(alter_stmt) => { + // Must specify IN SCHEMA targeting this schema + match &mut alter_stmt.target_objects { + GrantTargetAllSpecification::AllSchemas { schemas } => { + // Validate each schema reference + for schema_name_obj in schemas { + let schema_str = schema_name_obj.to_string(); + + // Check if it matches the current schema (qualified or unqualified) + let is_match = schema_str == schema_name + || schema_str == format!("{}.{}", database_name, schema_name); + + if !is_match { + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::AlterDefaultPrivilegesSchemaMismatch { + referenced: schema_str, + expected: format!("{}.{}", database_name, schema_name), + }, + schema_path.to_path_buf(), + stmt_sql.clone(), + )); + } else { + // Normalize the schema name to be fully qualified + normalize_schema_name(schema_name_obj); + } + } + } + GrantTargetAllSpecification::AllDatabases { .. } => { + // Reject: IN DATABASE not allowed in schema mod files + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::AlterDefaultPrivilegesDatabaseNotAllowed { + schema_name: format!("{}.{}", database_name, schema_name), + }, + schema_path.to_path_buf(), + stmt_sql.clone(), + )); + } + GrantTargetAllSpecification::All => { + // Reject: Must specify IN SCHEMA + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::AlterDefaultPrivilegesRequiresSchemaScope { + schema_name: format!("{}.{}", database_name, schema_name), + }, + schema_path.to_path_buf(), + stmt_sql.clone(), + )); + } + } + } + _ => { + // Reject all other statement types + errors.push(ValidationError::with_file_and_sql( + ValidationErrorKind::InvalidSchemaModStatement { + statement_type: format!("{:?}", stmt) + .split('(') + .next() + .unwrap_or("unknown") + .to_string(), + schema_name: format!("{}.{}", database_name, schema_name), + }, + schema_path.to_path_buf(), + stmt_sql, + )); + } + } + } +} + +/// Validates that a schema doesn't mix storage objects with computation objects. +/// +/// This validation prevents accidentally recreating tables or sinks when recreating views, +/// which would cause data loss. Storage and computation objects should be in separate schemas. +/// +/// # Object Groups +/// +/// - **Storage objects**: Tables, Sinks (can coexist in same schema) +/// - **Computation objects**: Views, Materialized Views (can coexist in same schema) +/// - These two groups CANNOT mix in the same schema +/// +/// # Validation Rules +/// +/// Valid combinations within a schema: +/// - Tables only +/// - Tables + Sinks +/// - Sinks only +/// - Views only +/// - Views + Materialized Views +/// - Materialized Views only +/// +/// Invalid combinations: +/// - Tables + Views +/// - Tables + Materialized Views +/// - Sinks + Views +/// - Sinks + Materialized Views +/// - Tables + Sinks + Views +/// - (any mix of storage and computation) +/// +/// # Arguments +/// +/// * `schema_name` - The name of the schema being validated +/// * `objects` - All database objects in the schema +/// * `errors` - Vector to collect validation errors +pub(super) fn validate_no_storage_and_computation_in_schema( + schema_name: &str, + objects: &[DatabaseObject], + errors: &mut Vec, +) { + let mut has_storage = false; + let mut has_computation = false; + let mut storage_names = Vec::new(); + let mut computation_names = Vec::new(); + + for obj in objects { + match &obj.stmt { + // Storage objects (persist data) + Statement::CreateTable(_) + | Statement::CreateTableFromSource(_) + | Statement::CreateSink(_) => { + has_storage = true; + let ident = obj.stmt.ident(); + storage_names.push(ident.object.clone()); + } + // Computation objects (transform data) + Statement::CreateView(_) | Statement::CreateMaterializedView(_) => { + has_computation = true; + let ident = obj.stmt.ident(); + computation_names.push(ident.object.clone()); + } + } + } + + if has_storage && has_computation { + errors.push(ValidationError::with_file( + ValidationErrorKind::StorageAndComputationObjectsInSameSchema { + schema_name: schema_name.to_string(), + storage_objects: storage_names, + computation_objects: computation_names, + }, + PathBuf::from(schema_name), + )); + } +} diff --git a/src/mz-deploy/src/types.rs b/src/mz-deploy/src/types.rs new file mode 100644 index 0000000000000..264f92078b1cd --- /dev/null +++ b/src/mz-deploy/src/types.rs @@ -0,0 +1,203 @@ +mod typechecker; + +pub use typechecker::{ + ObjectTypeCheckError, TypeCheckError, TypeCheckErrors, TypeChecker, typecheck_with_client, +}; + +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use std::fs; +use std::path::{Path, PathBuf}; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum TypesError { + #[error("failed to read types.lock at {path}")] + FileReadFailed { + path: PathBuf, + #[source] + source: std::io::Error, + }, + #[error("failed to write types.lock at {path}")] + FileWriteFailed { + path: PathBuf, + #[source] + source: std::io::Error, + }, + #[error("failed to parse types.lock at {path}")] + ParseFailed { + path: PathBuf, + #[source] + source: serde_json::Error, + }, + #[error("failed to serialize types.lock")] + SerializeFailed { + #[source] + source: serde_json::Error, + }, + #[error("failed to create directory {path}")] + DirectoryCreationFailed { + path: PathBuf, + #[source] + source: std::io::Error, + }, +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +pub struct ColumnType { + pub r#type: String, + pub nullable: bool, +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +pub struct Types { + pub version: u8, + pub objects: BTreeMap>, +} + +impl Default for Types { + fn default() -> Self { + Types { + version: 1, + objects: BTreeMap::new(), + } + } +} + +/// Load the types.lock file from the specified directory. +/// Returns an error if the file doesn't exist or cannot be parsed. +pub fn load_types_lock(directory: &Path) -> Result { + let path = directory.join("types.lock"); + + let contents = fs::read_to_string(&path).map_err(|source| TypesError::FileReadFailed { + path: path.clone(), + source, + })?; + + serde_json::from_str(&contents).map_err(|source| TypesError::ParseFailed { path, source }) +} + +impl Types { + /// Write the types.lock file to the specified directory. + /// Overwrites any existing file at that location. + pub fn write_types_lock(&self, directory: &Path) -> Result<(), TypesError> { + let path = directory.join("types.lock"); + + let contents = serde_json::to_string_pretty(self) + .map_err(|source| TypesError::SerializeFailed { source })?; + + fs::write(&path, contents).map_err(|source| TypesError::FileWriteFailed { path, source }) + } + + /// Write the types.cache file to the .mz-deploy directory. + /// + /// This cache stores the column types of internal project views after type checking. + /// It is used by the test command to validate unit tests without re-typechecking. + pub fn write_types_cache(&self, directory: &Path) -> Result<(), TypesError> { + let cache_dir = directory.join(".mz-deploy"); + + // Create .mz-deploy directory if it doesn't exist + if !cache_dir.exists() { + fs::create_dir_all(&cache_dir).map_err(|source| { + TypesError::DirectoryCreationFailed { + path: cache_dir.clone(), + source, + } + })?; + } + + let path = cache_dir.join("types.cache"); + let contents = serde_json::to_string_pretty(self) + .map_err(|source| TypesError::SerializeFailed { source })?; + + fs::write(&path, contents).map_err(|source| TypesError::FileWriteFailed { path, source }) + } + + /// Merge another Types instance into this one. + /// + /// Objects from `other` will be added to this Types. If the same object + /// exists in both, the one from `other` will overwrite. + pub fn merge(&mut self, other: &Types) { + for (key, value) in &other.objects { + self.objects.insert(key.clone(), value.clone()); + } + } + + /// Get the column schema for an object by its fully qualified name. + pub fn get_object(&self, fqn: &str) -> Option<&BTreeMap> { + self.objects.get(fqn) + } +} + +/// Load the types.cache file from the .mz-deploy directory. +/// +/// This cache contains column types for internal project views, generated during type checking. +/// Returns an error if the file doesn't exist or cannot be parsed. +pub fn load_types_cache(directory: &Path) -> Result { + let path = directory.join(".mz-deploy").join("types.cache"); + + let contents = fs::read_to_string(&path).map_err(|source| TypesError::FileReadFailed { + path: path.clone(), + source, + })?; + + serde_json::from_str(&contents).map_err(|source| TypesError::ParseFailed { path, source }) +} + +/// Check if the types.cache is stale compared to the project source files. +/// +/// Returns true if any SQL file in the project directory is newer than the cache file. +pub fn is_types_cache_stale(directory: &Path) -> bool { + let cache_path = directory.join(".mz-deploy").join("types.cache"); + + // If cache doesn't exist, it's considered stale + let cache_metadata = match fs::metadata(&cache_path) { + Ok(m) => m, + Err(_) => return true, + }; + + let cache_modified = match cache_metadata.modified() { + Ok(t) => t, + Err(_) => return true, + }; + + // Check all SQL files in the project + check_files_newer_than(directory, cache_modified) +} + +/// Recursively check if any .sql file is newer than the given time. +fn check_files_newer_than(dir: &Path, threshold: std::time::SystemTime) -> bool { + let entries = match fs::read_dir(dir) { + Ok(entries) => entries, + Err(_) => return false, + }; + + for entry in entries.flatten() { + let path = entry.path(); + + // Skip hidden directories and files + if path + .file_name() + .and_then(|n| n.to_str()) + .is_some_and(|n| n.starts_with('.')) + { + continue; + } + + if path.is_dir() { + if check_files_newer_than(&path, threshold) { + return true; + } + } else if path.extension().is_some_and(|ext| ext == "sql") { + if let Ok(metadata) = fs::metadata(&path) { + if let Ok(modified) = metadata.modified() { + if modified > threshold { + return true; + } + } + } + } + } + + false +} diff --git a/src/mz-deploy/src/types/typechecker.rs b/src/mz-deploy/src/types/typechecker.rs new file mode 100644 index 0000000000000..958dd8a8d8e46 --- /dev/null +++ b/src/mz-deploy/src/types/typechecker.rs @@ -0,0 +1,369 @@ +//! Type checking trait and error types for Materialize projects. + +use crate::client::Client; +use crate::project::ast::Statement; +use crate::project::normalize::NormalizingVisitor; +use crate::project::object_id::ObjectId; +use crate::project::planned::Project; +use crate::project::typed::FullyQualifiedName; +use crate::verbose; +use mz_sql_parser::ast::{ + CreateViewStatement, Ident, IfExistsBehavior, UnresolvedItemName, ViewDefinition, +}; +use owo_colors::OwoColorize; +use std::collections::BTreeMap; +use std::fmt; +use std::path::{Path, PathBuf}; +use thiserror::Error; + +/// Errors that can occur during type checking +#[derive(Debug, Error)] +pub enum TypeCheckError { + /// Failed to start Docker container + #[error("failed to start Materialize container: {0}")] + ContainerStartFailed(#[source] Box), + + /// Failed to connect to Materialize + #[error("failed to connect to Materialize: {0}")] + ConnectionFailed(#[from] crate::client::ConnectionError), + + /// Failed to create temporary tables for external dependencies + #[error("failed to create temporary table for external dependency {object}: {source}")] + ExternalDependencyFailed { + object: ObjectId, + #[source] + source: Box, + }, + + /// Type checking failed for an object + #[error(transparent)] + TypeCheckFailed(#[from] ObjectTypeCheckError), + + /// Multiple type check errors occurred + #[error(transparent)] + Multiple(#[from] TypeCheckErrors), + + /// Database error during setup + #[error("database error during setup: {0}")] + DatabaseSetupError(String), + + /// Failed to get sorted objects + #[error("failed to get sorted objects: {0}")] + SortError(#[from] crate::project::error::DependencyError), + + /// Failed to write types cache + #[error("failed to write types cache: {0}")] + TypesCacheWriteFailed(#[from] crate::types::TypesError), +} + +/// A single type check error for a specific object +#[derive(Debug)] +pub struct ObjectTypeCheckError { + /// The object that failed type checking + pub object_id: ObjectId, + /// The file path of the object + pub file_path: PathBuf, + /// The SQL statement that failed + pub sql_statement: String, + /// The database error message + pub error_message: String, + /// Optional detail from the database + pub detail: Option, + /// Optional hint from the database + pub hint: Option, +} + +impl fmt::Display for ObjectTypeCheckError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Extract database/schema/file for path display + let path_components: Vec<_> = self.file_path.components().collect(); + let len = path_components.len(); + + let relative_path = if len >= 3 { + format!( + "{}/{}/{}", + path_components[len - 3].as_os_str().to_string_lossy(), + path_components[len - 2].as_os_str().to_string_lossy(), + path_components[len - 1].as_os_str().to_string_lossy() + ) + } else { + self.file_path.display().to_string() + }; + + // Format like rustc errors + writeln!( + f, + "{}: type check failed for '{}'", + "error".bright_red().bold(), + self.object_id + )?; + writeln!(f, " {} {}", "-->".bright_blue().bold(), relative_path)?; + writeln!(f)?; + + // Show the SQL statement (first few lines if long) + let lines: Vec<_> = self.sql_statement.lines().collect(); + writeln!(f, " {}", "|".bright_blue().bold())?; + for (idx, line) in lines.iter().take(10).enumerate() { + writeln!(f, " {} {}", "|".bright_blue().bold(), line)?; + if idx == 9 && lines.len() > 10 { + writeln!( + f, + " {} ... ({} more lines)", + "|".bright_blue().bold(), + lines.len() - 10 + )?; + break; + } + } + writeln!(f, " {}", "|".bright_blue().bold())?; + writeln!(f)?; + + // Show error message + writeln!( + f, + " {}: {}", + "error".bright_red().bold(), + self.error_message + )?; + + if let Some(ref detail) = self.detail { + writeln!(f, " {}: {}", "detail".bright_cyan().bold(), detail)?; + } + + if let Some(ref hint) = self.hint { + writeln!( + f, + " {} {}", + "=".bright_blue().bold(), + format!("hint: {}", hint).bold() + )?; + } + + Ok(()) + } +} + +impl std::error::Error for ObjectTypeCheckError {} + +/// Collection of type check errors +#[derive(Debug)] +pub struct TypeCheckErrors { + pub errors: Vec, +} + +impl fmt::Display for TypeCheckErrors { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (idx, error) in self.errors.iter().enumerate() { + if idx > 0 { + writeln!(f)?; + } + write!(f, "{}", error)?; + } + + writeln!(f)?; + writeln!( + f, + "{}: could not type check due to {} previous error{}", + "error".bright_red().bold(), + self.errors.len(), + if self.errors.len() == 1 { "" } else { "s" } + )?; + + Ok(()) + } +} + +impl std::error::Error for TypeCheckErrors {} + +/// Trait for type checking Materialize projects +#[async_trait::async_trait] +pub trait TypeChecker: Send + Sync { + /// Type check a project, validating all object definitions + /// + /// This method: + /// 1. Creates temporary tables/views for external dependencies (from types.lock) + /// 2. Creates temporary views for all project objects in topological order + /// 3. Reports any type errors found + /// + /// Returns Ok(()) if all objects pass type checking, or Err with detailed errors + async fn typecheck(&self, project: &Project) -> Result<(), TypeCheckError>; +} + +/// Type check a project using a pre-configured client +/// +/// This function performs type checking by creating temporary views/tables for all project +/// objects in topological order. The client should already be connected and have external +/// dependencies staged as temporary tables. +/// +/// # Arguments +/// * `client` - A connected client with external dependencies already staged +/// * `project` - The project to type check +/// * `project_root` - Root directory of the project (for error reporting) +/// +/// # Returns +/// Ok(()) if all objects pass type checking, or Err with detailed errors +pub async fn typecheck_with_client( + client: &mut Client, + project: &Project, + project_root: &Path, +) -> Result<(), TypeCheckError> { + // Type check objects in topological order + let object_paths = build_object_paths(project, project_root); + let sorted_objects = project.get_sorted_objects()?; + + verbose!( + "Type checking {} objects in topological order", + sorted_objects.len() + ); + let mut errors = Vec::new(); + + for (object_id, typed_object) in &sorted_objects { + verbose!("Type checking: {}", object_id); + + // Build the FQN from the object_id + let fqn = FullyQualifiedName::from(UnresolvedItemName(vec![ + Ident::new(&object_id.database).expect("valid database"), + Ident::new(&object_id.schema).expect("valid schema"), + Ident::new(&object_id.object).expect("valid object"), + ])); + + if let Some(statement) = create_temporary_view_sql(&typed_object.stmt, &fqn) { + let sql = statement.to_string(); + match client.execute(&sql.to_string(), &[]).await { + Ok(_) => { + verbose!(" ✓ Type check passed"); + } + Err(e) => { + // ConnectionError wraps tokio_postgres::Error, extract the database error + let (error_message, detail, hint) = match &e { + crate::client::ConnectionError::Query(pg_err) => { + if let Some(db_err) = pg_err.as_db_error() { + ( + db_err.message().to_string(), + db_err.detail().map(|s| s.to_string()), + db_err.hint().map(|s| s.to_string()), + ) + } else { + (pg_err.to_string(), None, None) + } + } + _ => (e.to_string(), None, None), + }; + + verbose!(" ✗ Type check failed: {}", error_message); + + let path = object_paths.get(object_id).cloned().unwrap_or_else(|| { + project_root + .join(&object_id.database) + .join(&object_id.schema) + .join(format!("{}.sql", object_id.object)) + }); + + errors.push(ObjectTypeCheckError { + object_id: object_id.clone(), + file_path: path, + sql_statement: sql, + error_message, + detail, + hint, + }); + } + } + } else { + verbose!(" - Skipping non-view/table object"); + } + } + + if errors.is_empty() { + verbose!("All type checks passed!"); + + // Query types for all successfully type-checked views and write to cache + let view_object_ids: Vec<&ObjectId> = sorted_objects + .iter() + .filter(|(_, typed_obj)| is_view_or_materialized_view(&typed_obj.stmt)) + .map(|(oid, _)| oid) + .collect(); + + if !view_object_ids.is_empty() { + verbose!( + "Caching types for {} view(s) to types.cache", + view_object_ids.len() + ); + + // Query types using flattened names (temporary views) + let internal_types = client.query_internal_types(&view_object_ids, true).await?; + + // Write types.cache + internal_types.write_types_cache(project_root)?; + verbose!("Successfully wrote types.cache"); + } + + Ok(()) + } else { + Err(TypeCheckError::Multiple(TypeCheckErrors { errors })) + } +} + +/// Check if a statement is a view or materialized view +fn is_view_or_materialized_view(stmt: &Statement) -> bool { + matches!( + stmt, + Statement::CreateView(_) | Statement::CreateMaterializedView(_) + ) +} + +/// Build object path mapping for error reporting +fn build_object_paths(project: &Project, project_root: &Path) -> BTreeMap { + let mut paths = BTreeMap::new(); + for obj in project.iter_objects() { + let path = project_root + .join(&obj.id.database) + .join(&obj.id.schema) + .join(format!("{}.sql", obj.id.object)); + paths.insert(obj.id.clone(), path); + } + paths +} + +/// Create temporary view for a project object +fn create_temporary_view_sql(stmt: &Statement, fqn: &FullyQualifiedName) -> Option { + let visitor = NormalizingVisitor::flattening(fqn); + + match stmt { + Statement::CreateView(view) => { + let mut view = view.clone(); + view.temporary = true; + + let normalized = Statement::CreateView(view) + .normalize_name_with(&visitor, &fqn.to_item_name()) + .normalize_dependencies_with(&visitor); + + Some(normalized) + } + Statement::CreateMaterializedView(mv) => { + let view_stmt = CreateViewStatement { + if_exists: IfExistsBehavior::Error, + temporary: true, + definition: ViewDefinition { + name: mv.name.clone(), + columns: mv.columns.clone(), + query: mv.query.clone(), + }, + }; + let normalized = Statement::CreateView(view_stmt) + .normalize_name_with(&visitor, &fqn.to_item_name()) + .normalize_dependencies_with(&visitor); + + Some(normalized) + } + Statement::CreateTable(_) | Statement::CreateTableFromSource(_) => { + // loaded from types.lock + None + } + _ => { + // Other statement types (sources, sinks, connections, secrets) + // cannot be type-checked in this way + None + } + } +} diff --git a/src/mz-deploy/src/unit_test.rs b/src/mz-deploy/src/unit_test.rs new file mode 100644 index 0000000000000..7a937d21f9081 --- /dev/null +++ b/src/mz-deploy/src/unit_test.rs @@ -0,0 +1,1913 @@ +//! Unit test parsing and desugaring for SQL views. +//! +//! This module provides functionality to parse custom unit test syntax and desugar it +//! into executable SQL statements that create temporary views and run test assertions. +//! +//! Tests are defined inline within the same SQL file as the view definition using +//! the EXECUTE UNIT TEST syntax. +//! +//! # Syntax +//! +//! ```sql +//! EXECUTE UNIT TEST test_name +//! FOR database.schema.view_name +//! [AT TIME 'timestamp'] -- optional, sets mz_now() during test +//! MOCK database.schema.mock1(col1 TYPE1, col2 TYPE2) AS ( +//! SELECT * FROM VALUES (...) +//! ), +//! MOCK database.schema.mock2(col TYPE) AS ( +//! SELECT * FROM VALUES (...) +//! ), +//! EXPECTED(col1 TYPE1, col2 TYPE2) AS ( +//! SELECT * FROM VALUES (...) +//! ); +//! ``` +//! +//! The `AT TIME` clause is optional. When provided, it specifies the timestamp value +//! that `mz_now()` will return during test execution. This is useful for testing +//! views that use temporal filters based on `mz_now()`. +//! +//! # Output +//! +//! The test is desugared into: +//! 1. CREATE TEMPORARY VIEW for each mock +//! 2. CREATE TEMPORARY VIEW for expected results +//! 3. CREATE TEMPORARY VIEW for the target (using flattened naming) +//! 4. Test query that returns rows with status column indicating failures +//! (includes AS OF clause when AT TIME is specified) + +use crate::project::ast::Statement; +use crate::project::normalize::NormalizingVisitor; +use crate::project::object_id::ObjectId; +use crate::project::typed::FullyQualifiedName; +use crate::types::Types; +use mz_sql_parser::ast::{CreateViewStatement, IfExistsBehavior, ViewDefinition}; +use owo_colors::OwoColorize; +use std::collections::BTreeSet; +use std::fmt; +use thiserror::Error; + +/// Represents a parsed unit test definition. +#[derive(Debug, Clone)] +pub struct UnitTest { + /// Name of the test (e.g., "test_flippers") + pub name: String, + /// Fully qualified name of the target view being tested + pub target_view: String, + /// Optional timestamp for mz_now() during test execution + pub at_time: Option, + /// Mock views to create for dependencies + pub mocks: Vec, + /// Expected results definition + pub expected: ExpectedResult, +} + +impl UnitTest { + /// Convert an ExecuteUnitTestStatement from the AST into a UnitTest. + pub fn from_execute_statement( + stmt: &mz_sql_parser::ast::ExecuteUnitTestStatement, + ) -> Self { + use mz_sql_parser::ast::display::{AstDisplay, FormatMode}; + + let name = stmt.name.to_string(); + let target_view = stmt.target.to_ast_string(FormatMode::Simple); + + // Convert at_time if present + let at_time = stmt + .at_time + .as_ref() + .map(|expr| expr.to_ast_string(FormatMode::Simple)); + + // Convert mocks + let mocks = stmt + .mocks + .iter() + .map(|mock| { + let fqn = mock.name.to_ast_string(FormatMode::Simple); + let columns = mock + .columns + .iter() + .map(|col| { + ( + col.name.to_string(), + col.data_type.to_ast_string(FormatMode::Simple), + ) + }) + .collect(); + let query = mock.query.to_ast_string(FormatMode::Simple); + MockView { + fqn, + columns, + query, + } + }) + .collect(); + + // Convert expected + let expected = ExpectedResult { + columns: stmt + .expected + .columns + .iter() + .map(|col| { + ( + col.name.to_string(), + col.data_type.to_ast_string(FormatMode::Simple), + ) + }) + .collect(), + query: stmt.expected.query.to_ast_string(FormatMode::Simple), + }; + + UnitTest { + name, + target_view, + at_time, + mocks, + expected, + } + } +} + +/// A mock view definition that replaces a real dependency. +#[derive(Debug, Clone)] +pub struct MockView { + /// Fully qualified name (e.g., "materialize.public.flipper_activity") + pub fqn: String, + /// Column definitions as (name, type) pairs + pub columns: Vec<(String, String)>, + /// SQL query body (the part after AS) + pub query: String, +} + +/// Expected results for the test. +#[derive(Debug, Clone)] +pub struct ExpectedResult { + /// Column definitions as (name, type) pairs + pub columns: Vec<(String, String)>, + /// SQL query body (the part after AS) + pub query: String, +} + +// ============================================================================= +// Test Validation +// ============================================================================= + +/// Errors that can occur during unit test validation. +#[derive(Debug, Error)] +pub enum TestValidationError { + /// A required dependency is not mocked + #[error("unmocked dependency")] + UnmockedDependency(UnmockedDependencyError), + + /// A mock is missing required columns + #[error("mock schema mismatch")] + MockSchemaMismatch(MockSchemaMismatchError), + + /// Expected output doesn't match target view schema + #[error("expected output schema mismatch")] + ExpectedSchemaMismatch(ExpectedSchemaMismatchError), + + /// The AT TIME value is not a valid timestamp + #[error("invalid at_time timestamp")] + InvalidAtTime(InvalidAtTimeError), + + /// Types cache is missing or stale + #[error("types cache unavailable: {reason}")] + TypesCacheUnavailable { reason: String }, +} + +/// Error: A dependency of the target view is not mocked. +#[derive(Debug)] +pub struct UnmockedDependencyError { + /// Test name + pub test_name: String, + /// The target view being tested + pub target_view: String, + /// Dependencies that are not mocked + pub missing_mocks: Vec, +} + +impl fmt::Display for UnmockedDependencyError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "{}: test '{}' has unmocked dependencies", + "error".bright_red().bold(), + self.test_name.cyan() + )?; + writeln!( + f, + " {} target view: {}", + "-->".bright_blue().bold(), + self.target_view.yellow() + )?; + writeln!(f)?; + writeln!( + f, + " {} The following dependencies must be mocked:", + "|".bright_blue().bold() + )?; + for dep in &self.missing_mocks { + writeln!(f, " {} - {}", "|".bright_blue().bold(), dep.yellow())?; + } + writeln!(f)?; + writeln!( + f, + " {} Add mocks for these dependencies in the WITH clause of the test", + "=".bright_blue().bold() + )?; + Ok(()) + } +} + +impl std::error::Error for UnmockedDependencyError {} + +/// Error: A mock's columns don't match the actual schema. +#[derive(Debug)] +pub struct MockSchemaMismatchError { + /// Test name + pub test_name: String, + /// The mock that has mismatched columns + pub mock_fqn: String, + /// Columns in mock that don't exist in actual schema + pub extra_columns: Vec, + /// Columns in actual schema missing from mock (name, type) + pub missing_columns: Vec<(String, String)>, + /// Columns with wrong types (column_name, mock_type, actual_type) + pub type_mismatches: Vec<(String, String, String)>, + /// The actual schema columns with types (for showing expected signature) + pub actual_schema: Vec<(String, String)>, +} + +impl fmt::Display for MockSchemaMismatchError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "{}: mock '{}' schema doesn't match actual schema", + "error".bright_red().bold(), + self.mock_fqn.cyan() + )?; + writeln!( + f, + " {} in test: {}", + "-->".bright_blue().bold(), + self.test_name.yellow() + )?; + writeln!(f)?; + + if !self.missing_columns.is_empty() { + writeln!( + f, + " {} Missing columns (required but not in mock):", + "|".bright_blue().bold() + )?; + for (col, typ) in &self.missing_columns { + writeln!( + f, + " {} - {} {}", + "|".bright_blue().bold(), + col.red(), + typ.to_uppercase().dimmed() + )?; + } + } + + if !self.extra_columns.is_empty() { + writeln!( + f, + " {} Extra columns (in mock but not in actual schema):", + "|".bright_blue().bold() + )?; + for col in &self.extra_columns { + writeln!(f, " {} - {}", "|".bright_blue().bold(), col.yellow())?; + } + } + + if !self.type_mismatches.is_empty() { + writeln!(f, " {} Type mismatches:", "|".bright_blue().bold())?; + for (col, mock_type, actual_type) in &self.type_mismatches { + writeln!( + f, + " {} - {}: mock has '{}', expected '{}'", + "|".bright_blue().bold(), + col.cyan(), + mock_type.red(), + actual_type.green() + )?; + } + } + + writeln!(f)?; + + // Show the expected mock signature + if !self.actual_schema.is_empty() { + writeln!(f, " {} Expected mock signature:", "=".bright_blue().bold())?; + let cols: Vec = self + .actual_schema + .iter() + .map(|(name, typ)| format!("{} {}", name, typ.to_uppercase())) + .collect(); + writeln!( + f, + " {} MOCK {}({}) AS (...)", + "|".bright_blue().bold(), + self.mock_fqn.green(), + cols.join(", ").green() + )?; + } + + Ok(()) + } +} + +impl std::error::Error for MockSchemaMismatchError {} + +/// Error: Expected output columns don't match the target view schema. +#[derive(Debug)] +pub struct ExpectedSchemaMismatchError { + /// Test name + pub test_name: String, + /// The target view being tested + pub target_view: String, + /// Columns in expected that don't exist in target schema + pub extra_columns: Vec, + /// Columns in target schema missing from expected (name, type) + pub missing_columns: Vec<(String, String)>, + /// Columns with wrong types (column_name, expected_type, actual_type) + pub type_mismatches: Vec<(String, String, String)>, + /// The actual schema columns with types (for showing expected signature) + pub actual_schema: Vec<(String, String)>, +} + +impl fmt::Display for ExpectedSchemaMismatchError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "{}: expected output schema doesn't match target view", + "error".bright_red().bold() + )?; + writeln!( + f, + " {} target: {} | test: {}", + "-->".bright_blue().bold(), + self.target_view.cyan(), + self.test_name.yellow() + )?; + writeln!(f)?; + + if !self.missing_columns.is_empty() { + writeln!( + f, + " {} Missing columns (in target view but not in expected):", + "|".bright_blue().bold() + )?; + for (col, typ) in &self.missing_columns { + writeln!( + f, + " {} - {} {}", + "|".bright_blue().bold(), + col.red(), + typ.to_uppercase().dimmed() + )?; + } + } + + if !self.extra_columns.is_empty() { + writeln!( + f, + " {} Extra columns (in expected but not in target view):", + "|".bright_blue().bold() + )?; + for col in &self.extra_columns { + writeln!(f, " {} - {}", "|".bright_blue().bold(), col.yellow())?; + } + } + + if !self.type_mismatches.is_empty() { + writeln!(f, " {} Type mismatches:", "|".bright_blue().bold())?; + for (col, expected_type, actual_type) in &self.type_mismatches { + writeln!( + f, + " {} - {}: has '{}', expected '{}'", + "|".bright_blue().bold(), + col.cyan(), + expected_type.red(), + actual_type.green() + )?; + } + } + + writeln!(f)?; + + // Show the expected signature + if !self.actual_schema.is_empty() { + writeln!(f, " {} Expected signature:", "=".bright_blue().bold())?; + let cols: Vec = self + .actual_schema + .iter() + .map(|(name, typ)| format!("{} {}", name, typ.to_uppercase())) + .collect(); + writeln!( + f, + " {} EXPECTED({}) AS (...)", + "|".bright_blue().bold(), + cols.join(", ").green() + )?; + } + + Ok(()) + } +} + +impl std::error::Error for ExpectedSchemaMismatchError {} + +/// Error: The AT TIME value is not a valid timestamp. +#[derive(Debug)] +pub struct InvalidAtTimeError { + /// Test name + pub test_name: String, + /// The invalid AT TIME value + pub at_time_value: String, + /// The database error message + pub db_error: String, +} + +impl fmt::Display for InvalidAtTimeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "{}: test '{}' has invalid AT TIME value", + "error".bright_red().bold(), + self.test_name.cyan() + )?; + writeln!( + f, + " {} value: {}", + "-->".bright_blue().bold(), + self.at_time_value.yellow() + )?; + writeln!(f)?; + + // Extract the useful part of the error message + // DB errors like: "Error: invalid input syntax for type mz_timestamp: ..." + // We want to show from "invalid input syntax..." forward + let display_error = self + .db_error + .find("invalid input syntax") + .map(|idx| &self.db_error[idx..]) + .unwrap_or(&self.db_error); + + writeln!(f, " {} {}", "|".bright_blue().bold(), display_error.red())?; + writeln!(f)?; + writeln!( + f, + " {} The AT TIME value must be a valid timestamp that can be cast to mz_timestamp", + "=".bright_blue().bold() + )?; + writeln!( + f, + " {} Example: AT TIME '2024-01-15 10:00:00'", + "=".bright_blue().bold() + )?; + Ok(()) + } +} + +impl std::error::Error for InvalidAtTimeError {} + +/// Validate a unit test against the known types. +/// +/// This function performs three validations: +/// 1. All dependencies of the target view are mocked +/// 2. Each mock's columns match the actual schema of the mocked object +/// 3. The expected output columns match the target view's output schema +/// +/// # Arguments +/// * `test` - The unit test to validate +/// * `target_id` - The ObjectId of the target view +/// * `types` - Combined types from types.lock (external) and types.cache (internal) +/// * `dependencies` - Dependencies of the target view from the project's dependency graph +/// +/// # Returns +/// Ok(()) if validation passes, Err with detailed error messages if validation fails +pub fn validate_unit_test( + test: &UnitTest, + target_id: &ObjectId, + types: &Types, + dependencies: &BTreeSet, +) -> Result<(), TestValidationError> { + let target_fqn = target_id.to_string(); + + // Build set of mocked FQNs (normalize to fully qualified) + let mocked_fqns: BTreeSet = test + .mocks + .iter() + .map(|m| normalize_fqn(&m.fqn, target_id)) + .collect(); + + // 1. Check that all dependencies are mocked + let missing_mocks: Vec = dependencies + .iter() + .filter(|dep| !mocked_fqns.contains(&dep.to_string())) + .map(|dep| dep.to_string()) + .collect(); + + if !missing_mocks.is_empty() { + return Err(TestValidationError::UnmockedDependency( + UnmockedDependencyError { + test_name: test.name.clone(), + target_view: target_fqn.clone(), + missing_mocks, + }, + )); + } + + // 2. Validate each mock's schema against the actual types + for mock in &test.mocks { + let mock_fqn = normalize_fqn(&mock.fqn, target_id); + + if let Some(actual_columns) = types.get_object(&mock_fqn) { + let (extra, missing, type_mismatches) = compare_columns(&mock.columns, actual_columns); + + if !extra.is_empty() || !missing.is_empty() || !type_mismatches.is_empty() { + // Extract actual schema for error message + let actual_schema: Vec<(String, String)> = actual_columns + .iter() + .map(|(name, col_type)| (name.clone(), col_type.r#type.clone())) + .collect(); + + return Err(TestValidationError::MockSchemaMismatch( + MockSchemaMismatchError { + test_name: test.name.clone(), + mock_fqn, + extra_columns: extra, + missing_columns: missing, + type_mismatches, + actual_schema, + }, + )); + } + } + // If the mock isn't in types, it might be an external dependency not in types.lock + // We allow this to be permissive - the database will catch it during execution + } + + // 3. Validate expected output schema against target view + if let Some(target_columns) = types.get_object(&target_fqn) { + let (extra, missing, type_mismatches) = + compare_columns(&test.expected.columns, target_columns); + + if !extra.is_empty() || !missing.is_empty() || !type_mismatches.is_empty() { + // Extract actual schema for error message + let actual_schema: Vec<(String, String)> = target_columns + .iter() + .map(|(name, col_type)| (name.clone(), col_type.r#type.clone())) + .collect(); + + return Err(TestValidationError::ExpectedSchemaMismatch( + ExpectedSchemaMismatchError { + test_name: test.name.clone(), + target_view: target_fqn, + extra_columns: extra, + missing_columns: missing, + type_mismatches, + actual_schema, + }, + )); + } + } + // If target isn't in types, we'll catch it during test execution + + Ok(()) +} + +/// Normalize a potentially partial FQN to a fully qualified name using the target's context. +fn normalize_fqn(fqn: &str, target_id: &ObjectId) -> String { + let parts: Vec<&str> = fqn.split('.').collect(); + match parts.len() { + 1 => format!("{}.{}.{}", target_id.database, target_id.schema, fqn), + 2 => format!("{}.{}", target_id.database, fqn), + _ => fqn.to_string(), + } +} + +/// Compare test columns against actual schema columns. +/// +/// Returns (extra_columns, missing_columns_with_types, type_mismatches). +fn compare_columns( + test_columns: &[(String, String)], + actual_columns: &std::collections::BTreeMap, +) -> ( + Vec, + Vec<(String, String)>, + Vec<(String, String, String)>, +) { + let test_col_names: BTreeSet<&str> = test_columns.iter().map(|(n, _)| n.as_str()).collect(); + let actual_col_names: BTreeSet<&str> = actual_columns.keys().map(|s| s.as_str()).collect(); + + // Extra columns in test but not in actual + let extra: Vec = test_col_names + .difference(&actual_col_names) + .map(|s| (*s).to_string()) + .collect(); + + // Missing columns in actual but not in test (with their types) + let missing: Vec<(String, String)> = actual_col_names + .difference(&test_col_names) + .map(|s| { + let typ = actual_columns + .get(*s) + .map(|c| c.r#type.clone()) + .unwrap_or_default(); + ((*s).to_string(), typ) + }) + .collect(); + + // Type mismatches for columns present in both + let type_mismatches: Vec<(String, String, String)> = test_columns + .iter() + .filter_map(|(name, test_type)| { + actual_columns.get(name).and_then(|actual| { + // Normalize types for comparison (case-insensitive, strip whitespace) + let test_normalized = normalize_type(test_type); + let actual_normalized = normalize_type(&actual.r#type); + + if test_normalized != actual_normalized { + Some((name.clone(), test_type.clone(), actual.r#type.clone())) + } else { + None + } + }) + }) + .collect(); + + (extra, missing, type_mismatches) +} + +/// Normalize a SQL type for comparison. +/// +/// This handles Materialize type aliases so that equivalent types compare equal. +/// Based on: https://materialize.com/docs/sql/types/ +fn normalize_type(t: &str) -> String { + let normalized = t.trim().to_lowercase(); + + // Map Materialize type aliases to canonical forms + match normalized.as_str() { + // Integer types + "int" | "int4" | "integer" => "integer".to_string(), + "int8" | "bigint" => "bigint".to_string(), + "int2" | "smallint" => "smallint".to_string(), + + // Floating point types + "float4" | "real" => "real".to_string(), + "float" | "float8" | "double" | "double precision" => "double precision".to_string(), + + // Boolean + "bool" | "boolean" => "boolean".to_string(), + + // Text/String types + "string" | "text" => "text".to_string(), + "varchar" | "character varying" => "text".to_string(), + + // Numeric/Decimal + "decimal" | "numeric" => "numeric".to_string(), + + // JSON types + "json" | "jsonb" => "jsonb".to_string(), + + // Timestamp types + "timestamptz" | "timestamp with time zone" => "timestamp with time zone".to_string(), + + _ => { + // Handle parameterized types like varchar(255) -> text, numeric(10,2) -> numeric + if normalized.starts_with("varchar") || normalized.starts_with("character varying") { + "text".to_string() + } else if normalized.starts_with("numeric") || normalized.starts_with("decimal") { + "numeric".to_string() + } else if normalized.starts_with("timestamp with time zone") + || normalized.starts_with("timestamptz") + { + "timestamp with time zone".to_string() + } else { + normalized + } + } + } +} + +/// Desugar unit test into executable SQL statements. +/// +/// Returns a vector of SQL strings in order: +/// 1. CREATE TEMPORARY VIEW for each mock +/// 2. CREATE TEMPORARY VIEW for expected +/// 3. CREATE TEMPORARY VIEW for the target (flattened) +/// 4. Test query with status column +/// +/// # Arguments +/// +/// * `test` - The parsed unit test +/// * `target_stmt` - The statement defining the target view +/// * `target_fqn` - Fully qualified name of the target view +pub fn desugar_unit_test( + test: &UnitTest, + target_stmt: &Statement, + target_fqn: &FullyQualifiedName, +) -> Vec { + let mut statements = Vec::new(); + + // 1. Create temporary views for mocks + // Qualify mock names with target's database and schema if not already qualified + for mock in &test.mocks { + let qualified_mock = qualify_mock_name(mock, target_fqn); + statements.push(create_mock_view_sql(&qualified_mock)); + } + + // 2. Create temporary view for expected + statements.push(create_expected_view_sql(&test.expected)); + + // 3. Create temporary view for target (flattened) + statements.push(create_target_view_sql(target_stmt, target_fqn)); + + // 4. Create test query + let target_fqn_str = format!( + "{}.{}.{}", + target_fqn.database(), + target_fqn.schema(), + target_fqn.object() + ); + let flattened_target_name = flatten_fqn(&target_fqn_str); + statements.push(create_test_query_sql( + &flattened_target_name, + test.at_time.as_deref(), + )); + + statements +} + +/// Quote a fully qualified name as a single identifier with dots. +/// +/// # Example +/// +/// ```ignore +/// assert_eq!(flatten_fqn("materialize.public.flippers"), "\"materialize.public.flippers\""); +/// ``` +fn flatten_fqn(fqn: &str) -> String { + format!("\"{}\"", fqn) +} + +/// Qualify a mock name with the target's FQN context if it's not already qualified. +fn qualify_mock_name(mock: &MockView, target_fqn: &FullyQualifiedName) -> MockView { + // Count the number of parts in the FQN (parts = dots + 1) + // 1 part: object + // 2 parts: schema.object + // 3 parts: database.schema.object + let parts = mock.fqn.matches('.').count() + 1; + + let qualified_fqn = match parts { + 1 => { + // Unqualified: object only + // Prepend database.schema + format!( + "{}.{}.{}", + target_fqn.database(), + target_fqn.schema(), + mock.fqn + ) + } + 2 => { + // Partially qualified: schema.object + // Prepend database + format!("{}.{}", target_fqn.database(), mock.fqn) + } + _ => { + // Fully qualified (3+ parts) + mock.fqn.clone() + } + }; + + MockView { + fqn: qualified_fqn, + columns: mock.columns.clone(), + query: mock.query.clone(), + } +} + +/// Create SQL for a mock temporary view. +fn create_mock_view_sql(mock: &MockView) -> String { + let flattened_name = flatten_fqn(&mock.fqn); + let columns_def = mock + .columns + .iter() + .map(|(name, typ)| format!("{} {}", name, typ)) + .collect::>() + .join(", "); + + format!( + "CREATE TEMPORARY VIEW {} AS\nWITH MUTUALLY RECURSIVE data({}) AS (\n {}\n)\nSELECT * FROM data;", + flattened_name, columns_def, mock.query + ) +} + +/// Create SQL for the expected temporary view. +fn create_expected_view_sql(expected: &ExpectedResult) -> String { + let columns_def = expected + .columns + .iter() + .map(|(name, typ)| format!("{} {}", name, typ)) + .collect::>() + .join(", "); + + format!( + "CREATE TEMPORARY VIEW expected AS\nWITH MUTUALLY RECURSIVE data({}) AS (\n {}\n)\nSELECT * FROM data;", + columns_def, expected.query + ) +} + +/// Create SQL for the target view as a temporary view with flattened naming. +fn create_target_view_sql(stmt: &Statement, fqn: &FullyQualifiedName) -> String { + let visitor = NormalizingVisitor::flattening(fqn); + let transformed_stmt = stmt + .clone() + .normalize_name_with(&visitor, &fqn.to_item_name()) + .normalize_dependencies_with(&visitor); + + match transformed_stmt { + Statement::CreateView(view) => { + let stmt = CreateViewStatement { + if_exists: IfExistsBehavior::Error, + temporary: true, + definition: view.definition.clone(), + }; + + stmt.to_string() + } + Statement::CreateMaterializedView(materialized_view) => { + let stmt = CreateViewStatement { + if_exists: IfExistsBehavior::Error, + temporary: true, + definition: ViewDefinition { + name: materialized_view.name, + columns: materialized_view.columns, + query: materialized_view.query, + }, + }; + + stmt.to_string() + } + _ => unimplemented!(), + } +} + +/// Create the test assertion query that returns failures. +/// +/// Returns rows with a 'status' column that indicates the type of failure: +/// - 'MISSING': Expected rows not found in actual results +/// - 'UNEXPECTED': Actual rows not found in expected results +/// +/// Empty result means the test passed. +/// +/// If `at_time` is provided, the query includes an `AS OF` clause to set +/// the value of `mz_now()` during test execution. +fn create_test_query_sql(flattened_target_name: &str, at_time: Option<&str>) -> String { + let as_of_clause = at_time + .map(|t| format!(" AS OF {}::mz_timestamp", t)) + .unwrap_or_default(); + format!( + r#"SELECT 'MISSING' as status, * FROM expected +EXCEPT +SELECT 'MISSING', * FROM {} + +UNION ALL + +SELECT 'UNEXPECTED' as status, * FROM {} +EXCEPT +SELECT 'UNEXPECTED', * FROM expected{}"#, + flattened_target_name, flattened_target_name, as_of_clause + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::ColumnType; + use std::collections::BTreeMap; + + #[test] + fn test_flatten_fqn() { + assert_eq!( + flatten_fqn("materialize.public.flippers"), + "\"materialize.public.flippers\"" + ); + assert_eq!(flatten_fqn("a.b.c"), "\"a.b.c\""); + assert_eq!(flatten_fqn("single"), "\"single\""); + } + + #[test] + fn test_create_mock_view_sql() { + let mock = MockView { + fqn: "materialize.public.users".to_string(), + columns: vec![ + ("id".to_string(), "BIGINT".to_string()), + ("name".to_string(), "TEXT".to_string()), + ], + query: "SELECT * FROM VALUES ((1, 'alice'))".to_string(), + }; + + let sql = create_mock_view_sql(&mock); + + assert!(sql.contains("CREATE TEMPORARY VIEW \"materialize.public.users\"")); + assert!(sql.contains("WITH MUTUALLY RECURSIVE data(id BIGINT, name TEXT)")); + assert!(sql.contains("SELECT * FROM VALUES ((1, 'alice'))")); + assert!(sql.contains("SELECT * FROM data")); + } + + #[test] + fn test_create_expected_view_sql() { + let expected = ExpectedResult { + columns: vec![ + ("id".to_string(), "BIGINT".to_string()), + ("count".to_string(), "INT".to_string()), + ], + query: "SELECT * FROM VALUES ((1, 10))".to_string(), + }; + + let sql = create_expected_view_sql(&expected); + + assert!(sql.contains("CREATE TEMPORARY VIEW expected")); + assert!(sql.contains("WITH MUTUALLY RECURSIVE data(id BIGINT, count INT)")); + assert!(sql.contains("SELECT * FROM VALUES ((1, 10))")); + assert!(sql.contains("SELECT * FROM data")); + } + + #[test] + fn test_create_test_query_sql() { + let sql = create_test_query_sql("materialize_public_my_view", None); + + assert!(sql.contains("SELECT 'MISSING' as status, * FROM expected")); + assert!(sql.contains("SELECT 'MISSING', * FROM materialize_public_my_view")); + assert!(sql.contains("SELECT 'UNEXPECTED' as status, * FROM materialize_public_my_view")); + assert!(sql.contains("SELECT 'UNEXPECTED', * FROM expected")); + assert!(sql.contains("UNION ALL")); + assert!(sql.contains("EXCEPT")); + assert!(!sql.contains("AS OF")); // No AS OF when at_time is None + } + + #[test] + fn test_create_test_query_sql_with_at_time() { + let sql = + create_test_query_sql("materialize_public_my_view", Some("'2024-01-15 10:00:00'")); + + assert!(sql.contains("SELECT 'MISSING' as status, * FROM expected")); + assert!(sql.contains("AS OF '2024-01-15 10:00:00'::mz_timestamp")); + } + + // ========================================================================= + // Validation Tests + // ========================================================================= + + fn make_test_types() -> Types { + let mut objects = BTreeMap::new(); + + // Add users table schema + let mut users_cols = BTreeMap::new(); + users_cols.insert( + "id".to_string(), + ColumnType { + r#type: "bigint".to_string(), + nullable: false, + }, + ); + users_cols.insert( + "name".to_string(), + ColumnType { + r#type: "text".to_string(), + nullable: true, + }, + ); + users_cols.insert( + "email".to_string(), + ColumnType { + r#type: "text".to_string(), + nullable: true, + }, + ); + objects.insert("materialize.public.users".to_string(), users_cols); + + // Add orders table schema + let mut orders_cols = BTreeMap::new(); + orders_cols.insert( + "id".to_string(), + ColumnType { + r#type: "bigint".to_string(), + nullable: false, + }, + ); + orders_cols.insert( + "user_id".to_string(), + ColumnType { + r#type: "bigint".to_string(), + nullable: false, + }, + ); + orders_cols.insert( + "amount".to_string(), + ColumnType { + r#type: "numeric".to_string(), + nullable: true, + }, + ); + objects.insert("materialize.public.orders".to_string(), orders_cols); + + // Add target view schema (user_order_summary) + let mut summary_cols = BTreeMap::new(); + summary_cols.insert( + "user_id".to_string(), + ColumnType { + r#type: "bigint".to_string(), + nullable: false, + }, + ); + summary_cols.insert( + "user_name".to_string(), + ColumnType { + r#type: "text".to_string(), + nullable: true, + }, + ); + summary_cols.insert( + "total_orders".to_string(), + ColumnType { + r#type: "bigint".to_string(), + nullable: true, + }, + ); + objects.insert( + "materialize.public.user_order_summary".to_string(), + summary_cols, + ); + + Types { + version: 1, + objects, + } + } + + fn make_target_id() -> ObjectId { + ObjectId { + database: "materialize".to_string(), + schema: "public".to_string(), + object: "user_order_summary".to_string(), + } + } + + fn make_dependencies() -> BTreeSet { + let mut deps = BTreeSet::new(); + deps.insert(ObjectId { + database: "materialize".to_string(), + schema: "public".to_string(), + object: "users".to_string(), + }); + deps.insert(ObjectId { + database: "materialize".to_string(), + schema: "public".to_string(), + object: "orders".to_string(), + }); + deps + } + + #[test] + fn test_validate_unit_test_passes_with_correct_mocks() { + let test = UnitTest { + name: "test_user_summary".to_string(), + target_view: "materialize.public.user_order_summary".to_string(), + at_time: None, + mocks: vec![ + MockView { + fqn: "materialize.public.users".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("name".to_string(), "text".to_string()), + ("email".to_string(), "text".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 'alice@example.com')".to_string(), + }, + MockView { + fqn: "materialize.public.orders".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("user_id".to_string(), "bigint".to_string()), + ("amount".to_string(), "numeric".to_string()), + ], + query: "SELECT * FROM VALUES (1, 1, 100.00)".to_string(), + }, + ], + expected: ExpectedResult { + columns: vec![ + ("user_id".to_string(), "bigint".to_string()), + ("user_name".to_string(), "text".to_string()), + ("total_orders".to_string(), "bigint".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 1)".to_string(), + }, + }; + + let types = make_test_types(); + let target_id = make_target_id(); + let dependencies = make_dependencies(); + + let result = validate_unit_test(&test, &target_id, &types, &dependencies); + assert!(result.is_ok(), "Expected validation to pass: {:?}", result); + } + + #[test] + fn test_validate_unit_test_fails_with_unmocked_dependency() { + let test = UnitTest { + name: "test_user_summary".to_string(), + target_view: "materialize.public.user_order_summary".to_string(), + at_time: None, + mocks: vec![ + // Only mocking users, not orders + MockView { + fqn: "materialize.public.users".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("name".to_string(), "text".to_string()), + ("email".to_string(), "text".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 'alice@example.com')".to_string(), + }, + ], + expected: ExpectedResult { + columns: vec![ + ("user_id".to_string(), "bigint".to_string()), + ("user_name".to_string(), "text".to_string()), + ("total_orders".to_string(), "bigint".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 1)".to_string(), + }, + }; + + let types = make_test_types(); + let target_id = make_target_id(); + let dependencies = make_dependencies(); + + let result = validate_unit_test(&test, &target_id, &types, &dependencies); + assert!(result.is_err()); + + match result.unwrap_err() { + TestValidationError::UnmockedDependency(err) => { + assert_eq!(err.test_name, "test_user_summary"); + assert!( + err.missing_mocks + .contains(&"materialize.public.orders".to_string()) + ); + } + other => panic!("Expected UnmockedDependency error, got: {:?}", other), + } + } + + #[test] + fn test_validate_unit_test_fails_with_missing_mock_column() { + let test = UnitTest { + name: "test_user_summary".to_string(), + target_view: "materialize.public.user_order_summary".to_string(), + at_time: None, + mocks: vec![ + MockView { + fqn: "materialize.public.users".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("name".to_string(), "text".to_string()), + // Missing 'email' column + ], + query: "SELECT * FROM VALUES (1, 'alice')".to_string(), + }, + MockView { + fqn: "materialize.public.orders".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("user_id".to_string(), "bigint".to_string()), + ("amount".to_string(), "numeric".to_string()), + ], + query: "SELECT * FROM VALUES (1, 1, 100.00)".to_string(), + }, + ], + expected: ExpectedResult { + columns: vec![ + ("user_id".to_string(), "bigint".to_string()), + ("user_name".to_string(), "text".to_string()), + ("total_orders".to_string(), "bigint".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 1)".to_string(), + }, + }; + + let types = make_test_types(); + let target_id = make_target_id(); + let dependencies = make_dependencies(); + + let result = validate_unit_test(&test, &target_id, &types, &dependencies); + assert!(result.is_err()); + + match result.unwrap_err() { + TestValidationError::MockSchemaMismatch(err) => { + assert_eq!(err.test_name, "test_user_summary"); + assert_eq!(err.mock_fqn, "materialize.public.users"); + assert!(err.missing_columns.iter().any(|(name, _)| name == "email")); + assert!(err.extra_columns.is_empty()); + } + other => panic!("Expected MockSchemaMismatch error, got: {:?}", other), + } + } + + #[test] + fn test_validate_unit_test_fails_with_extra_mock_column() { + let test = UnitTest { + name: "test_user_summary".to_string(), + target_view: "materialize.public.user_order_summary".to_string(), + at_time: None, + mocks: vec![ + MockView { + fqn: "materialize.public.users".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("name".to_string(), "text".to_string()), + ("email".to_string(), "text".to_string()), + ("extra_column".to_string(), "int".to_string()), // Extra column + ], + query: "SELECT * FROM VALUES (1, 'alice', 'alice@example.com', 42)".to_string(), + }, + MockView { + fqn: "materialize.public.orders".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("user_id".to_string(), "bigint".to_string()), + ("amount".to_string(), "numeric".to_string()), + ], + query: "SELECT * FROM VALUES (1, 1, 100.00)".to_string(), + }, + ], + expected: ExpectedResult { + columns: vec![ + ("user_id".to_string(), "bigint".to_string()), + ("user_name".to_string(), "text".to_string()), + ("total_orders".to_string(), "bigint".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 1)".to_string(), + }, + }; + + let types = make_test_types(); + let target_id = make_target_id(); + let dependencies = make_dependencies(); + + let result = validate_unit_test(&test, &target_id, &types, &dependencies); + assert!(result.is_err()); + + match result.unwrap_err() { + TestValidationError::MockSchemaMismatch(err) => { + assert_eq!(err.mock_fqn, "materialize.public.users"); + assert!(err.extra_columns.contains(&"extra_column".to_string())); + assert!(err.missing_columns.is_empty()); + } + other => panic!("Expected MockSchemaMismatch error, got: {:?}", other), + } + } + + #[test] + fn test_validate_unit_test_fails_with_type_mismatch() { + let test = UnitTest { + name: "test_user_summary".to_string(), + target_view: "materialize.public.user_order_summary".to_string(), + at_time: None, + mocks: vec![ + MockView { + fqn: "materialize.public.users".to_string(), + columns: vec![ + ("id".to_string(), "text".to_string()), // Wrong type: should be bigint + ("name".to_string(), "text".to_string()), + ("email".to_string(), "text".to_string()), + ], + query: "SELECT * FROM VALUES ('1', 'alice', 'alice@example.com')".to_string(), + }, + MockView { + fqn: "materialize.public.orders".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("user_id".to_string(), "bigint".to_string()), + ("amount".to_string(), "numeric".to_string()), + ], + query: "SELECT * FROM VALUES (1, 1, 100.00)".to_string(), + }, + ], + expected: ExpectedResult { + columns: vec![ + ("user_id".to_string(), "bigint".to_string()), + ("user_name".to_string(), "text".to_string()), + ("total_orders".to_string(), "bigint".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 1)".to_string(), + }, + }; + + let types = make_test_types(); + let target_id = make_target_id(); + let dependencies = make_dependencies(); + + let result = validate_unit_test(&test, &target_id, &types, &dependencies); + assert!(result.is_err()); + + match result.unwrap_err() { + TestValidationError::MockSchemaMismatch(err) => { + assert_eq!(err.mock_fqn, "materialize.public.users"); + assert!( + err.type_mismatches + .iter() + .any(|(col, mock_t, _)| { col == "id" && mock_t == "text" }) + ); + } + other => panic!("Expected MockSchemaMismatch error, got: {:?}", other), + } + } + + #[test] + fn test_validate_unit_test_fails_with_expected_schema_mismatch() { + let test = UnitTest { + name: "test_user_summary".to_string(), + target_view: "materialize.public.user_order_summary".to_string(), + at_time: None, + mocks: vec![ + MockView { + fqn: "materialize.public.users".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("name".to_string(), "text".to_string()), + ("email".to_string(), "text".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 'alice@example.com')".to_string(), + }, + MockView { + fqn: "materialize.public.orders".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("user_id".to_string(), "bigint".to_string()), + ("amount".to_string(), "numeric".to_string()), + ], + query: "SELECT * FROM VALUES (1, 1, 100.00)".to_string(), + }, + ], + expected: ExpectedResult { + columns: vec![ + ("user_id".to_string(), "bigint".to_string()), + // Missing 'user_name' column + ("total_orders".to_string(), "bigint".to_string()), + ], + query: "SELECT * FROM VALUES (1, 1)".to_string(), + }, + }; + + let types = make_test_types(); + let target_id = make_target_id(); + let dependencies = make_dependencies(); + + let result = validate_unit_test(&test, &target_id, &types, &dependencies); + assert!(result.is_err()); + + match result.unwrap_err() { + TestValidationError::ExpectedSchemaMismatch(err) => { + assert_eq!(err.test_name, "test_user_summary"); + assert_eq!(err.target_view, "materialize.public.user_order_summary"); + assert!( + err.missing_columns + .iter() + .any(|(name, _)| name == "user_name") + ); + } + other => panic!("Expected ExpectedSchemaMismatch error, got: {:?}", other), + } + } + + #[test] + fn test_validate_unit_test_fails_with_expected_type_mismatch() { + let test = UnitTest { + name: "test_user_summary".to_string(), + target_view: "materialize.public.user_order_summary".to_string(), + at_time: None, + mocks: vec![ + MockView { + fqn: "materialize.public.users".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("name".to_string(), "text".to_string()), + ("email".to_string(), "text".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 'alice@example.com')".to_string(), + }, + MockView { + fqn: "materialize.public.orders".to_string(), + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("user_id".to_string(), "bigint".to_string()), + ("amount".to_string(), "numeric".to_string()), + ], + query: "SELECT * FROM VALUES (1, 1, 100.00)".to_string(), + }, + ], + expected: ExpectedResult { + columns: vec![ + ("user_id".to_string(), "bigint".to_string()), + ("user_name".to_string(), "bigint".to_string()), // Wrong type: should be text + ("total_orders".to_string(), "bigint".to_string()), + ], + query: "SELECT * FROM VALUES (1, 1, 1)".to_string(), + }, + }; + + let types = make_test_types(); + let target_id = make_target_id(); + let dependencies = make_dependencies(); + + let result = validate_unit_test(&test, &target_id, &types, &dependencies); + assert!(result.is_err()); + + match result.unwrap_err() { + TestValidationError::ExpectedSchemaMismatch(err) => { + assert!( + err.type_mismatches + .iter() + .any(|(col, exp_t, _)| { col == "user_name" && exp_t == "bigint" }) + ); + } + other => panic!("Expected ExpectedSchemaMismatch error, got: {:?}", other), + } + } + + // ========================================================================= + // FQN Normalization Tests + // ========================================================================= + + #[test] + fn test_normalize_fqn_unqualified() { + let target_id = ObjectId { + database: "mydb".to_string(), + schema: "myschema".to_string(), + object: "myview".to_string(), + }; + + let normalized = normalize_fqn("users", &target_id); + assert_eq!(normalized, "mydb.myschema.users"); + } + + #[test] + fn test_normalize_fqn_schema_qualified() { + let target_id = ObjectId { + database: "mydb".to_string(), + schema: "myschema".to_string(), + object: "myview".to_string(), + }; + + let normalized = normalize_fqn("other_schema.users", &target_id); + assert_eq!(normalized, "mydb.other_schema.users"); + } + + #[test] + fn test_normalize_fqn_fully_qualified() { + let target_id = ObjectId { + database: "mydb".to_string(), + schema: "myschema".to_string(), + object: "myview".to_string(), + }; + + let normalized = normalize_fqn("other_db.other_schema.users", &target_id); + assert_eq!(normalized, "other_db.other_schema.users"); + } + + // ========================================================================= + // Type Normalization Tests (based on https://materialize.com/docs/sql/types/) + // ========================================================================= + + #[test] + fn test_normalize_type_integer_aliases() { + // integer = int = int4 + assert_eq!(normalize_type("INT"), "integer"); + assert_eq!(normalize_type("int4"), "integer"); + assert_eq!(normalize_type("integer"), "integer"); + assert_eq!(normalize_type("INTEGER"), "integer"); + } + + #[test] + fn test_normalize_type_bigint_aliases() { + // bigint = int8 + assert_eq!(normalize_type("INT8"), "bigint"); + assert_eq!(normalize_type("bigint"), "bigint"); + assert_eq!(normalize_type("BIGINT"), "bigint"); + } + + #[test] + fn test_normalize_type_smallint_aliases() { + // smallint = int2 + assert_eq!(normalize_type("INT2"), "smallint"); + assert_eq!(normalize_type("smallint"), "smallint"); + assert_eq!(normalize_type("SMALLINT"), "smallint"); + } + + #[test] + fn test_normalize_type_real_aliases() { + // real = float4 + assert_eq!(normalize_type("float4"), "real"); + assert_eq!(normalize_type("FLOAT4"), "real"); + assert_eq!(normalize_type("real"), "real"); + assert_eq!(normalize_type("REAL"), "real"); + } + + #[test] + fn test_normalize_type_double_precision_aliases() { + // double precision = float = float8 = double + assert_eq!(normalize_type("float"), "double precision"); + assert_eq!(normalize_type("FLOAT"), "double precision"); + assert_eq!(normalize_type("float8"), "double precision"); + assert_eq!(normalize_type("FLOAT8"), "double precision"); + assert_eq!(normalize_type("double"), "double precision"); + assert_eq!(normalize_type("DOUBLE"), "double precision"); + assert_eq!(normalize_type("double precision"), "double precision"); + assert_eq!(normalize_type("DOUBLE PRECISION"), "double precision"); + } + + #[test] + fn test_normalize_type_boolean_aliases() { + // boolean = bool + assert_eq!(normalize_type("bool"), "boolean"); + assert_eq!(normalize_type("boolean"), "boolean"); + assert_eq!(normalize_type("BOOL"), "boolean"); + assert_eq!(normalize_type("BOOLEAN"), "boolean"); + } + + #[test] + fn test_normalize_type_text_aliases() { + // text = string = varchar = character varying + assert_eq!(normalize_type("text"), "text"); + assert_eq!(normalize_type("TEXT"), "text"); + assert_eq!(normalize_type("string"), "text"); + assert_eq!(normalize_type("STRING"), "text"); + assert_eq!(normalize_type("varchar"), "text"); + assert_eq!(normalize_type("VARCHAR"), "text"); + assert_eq!(normalize_type("varchar(255)"), "text"); + assert_eq!(normalize_type("character varying"), "text"); + assert_eq!(normalize_type("character varying(100)"), "text"); + } + + #[test] + fn test_normalize_type_numeric_aliases() { + // numeric = decimal + assert_eq!(normalize_type("numeric"), "numeric"); + assert_eq!(normalize_type("NUMERIC"), "numeric"); + assert_eq!(normalize_type("decimal"), "numeric"); + assert_eq!(normalize_type("DECIMAL"), "numeric"); + assert_eq!(normalize_type("numeric(10,2)"), "numeric"); + assert_eq!(normalize_type("decimal(18,4)"), "numeric"); + } + + #[test] + fn test_normalize_type_jsonb_aliases() { + // jsonb = json + assert_eq!(normalize_type("json"), "jsonb"); + assert_eq!(normalize_type("JSON"), "jsonb"); + assert_eq!(normalize_type("jsonb"), "jsonb"); + assert_eq!(normalize_type("JSONB"), "jsonb"); + } + + #[test] + fn test_normalize_type_timestamptz_aliases() { + // timestamp with time zone = timestamptz + assert_eq!(normalize_type("timestamptz"), "timestamp with time zone"); + assert_eq!(normalize_type("TIMESTAMPTZ"), "timestamp with time zone"); + assert_eq!( + normalize_type("timestamp with time zone"), + "timestamp with time zone" + ); + assert_eq!( + normalize_type("TIMESTAMP WITH TIME ZONE"), + "timestamp with time zone" + ); + } + + #[test] + fn test_normalize_type_preserves_other_types() { + // Types without aliases should be preserved as-is (lowercased) + assert_eq!(normalize_type("timestamp"), "timestamp"); + assert_eq!(normalize_type("TIMESTAMP"), "timestamp"); + assert_eq!(normalize_type("date"), "date"); + assert_eq!(normalize_type("time"), "time"); + assert_eq!(normalize_type("interval"), "interval"); + assert_eq!(normalize_type("uuid"), "uuid"); + assert_eq!(normalize_type("bytea"), "bytea"); + assert_eq!(normalize_type("oid"), "oid"); + assert_eq!(normalize_type("uint2"), "uint2"); + assert_eq!(normalize_type("uint4"), "uint4"); + assert_eq!(normalize_type("uint8"), "uint8"); + } + + #[test] + fn test_normalize_type_handles_whitespace() { + assert_eq!(normalize_type(" INT "), "integer"); + assert_eq!(normalize_type("\ttext\n"), "text"); + assert_eq!(normalize_type(" double precision "), "double precision"); + } + + #[test] + fn test_normalize_type_case_insensitive() { + // Validation should be completely case-agnostic + // integer variants + assert_eq!(normalize_type("integer"), normalize_type("INTEGER")); + assert_eq!(normalize_type("integer"), normalize_type("Integer")); + assert_eq!(normalize_type("integer"), normalize_type("iNtEgEr")); + assert_eq!(normalize_type("int"), normalize_type("INT")); + assert_eq!(normalize_type("int"), normalize_type("Int")); + + // bigint variants + assert_eq!(normalize_type("bigint"), normalize_type("BIGINT")); + assert_eq!(normalize_type("bigint"), normalize_type("BigInt")); + assert_eq!(normalize_type("int8"), normalize_type("INT8")); + + // text variants + assert_eq!(normalize_type("text"), normalize_type("TEXT")); + assert_eq!(normalize_type("text"), normalize_type("Text")); + assert_eq!(normalize_type("string"), normalize_type("STRING")); + assert_eq!(normalize_type("string"), normalize_type("String")); + + // boolean variants + assert_eq!(normalize_type("boolean"), normalize_type("BOOLEAN")); + assert_eq!(normalize_type("boolean"), normalize_type("Boolean")); + assert_eq!(normalize_type("bool"), normalize_type("BOOL")); + assert_eq!(normalize_type("bool"), normalize_type("Bool")); + + // numeric variants + assert_eq!(normalize_type("numeric"), normalize_type("NUMERIC")); + assert_eq!(normalize_type("numeric"), normalize_type("Numeric")); + assert_eq!(normalize_type("decimal"), normalize_type("DECIMAL")); + + // double precision variants + assert_eq!( + normalize_type("double precision"), + normalize_type("DOUBLE PRECISION") + ); + assert_eq!( + normalize_type("double precision"), + normalize_type("Double Precision") + ); + + // timestamp with time zone variants + assert_eq!( + normalize_type("timestamp with time zone"), + normalize_type("TIMESTAMP WITH TIME ZONE") + ); + assert_eq!(normalize_type("timestamptz"), normalize_type("TIMESTAMPTZ")); + assert_eq!(normalize_type("timestamptz"), normalize_type("TimestampTZ")); + + // jsonb variants + assert_eq!(normalize_type("jsonb"), normalize_type("JSONB")); + assert_eq!(normalize_type("jsonb"), normalize_type("JsonB")); + assert_eq!(normalize_type("json"), normalize_type("JSON")); + } + + // ========================================================================= + // Column Comparison Tests + // ========================================================================= + + #[test] + fn test_compare_columns_exact_match() { + let test_columns = vec![ + ("id".to_string(), "bigint".to_string()), + ("name".to_string(), "text".to_string()), + ]; + + let mut actual_columns = BTreeMap::new(); + actual_columns.insert( + "id".to_string(), + ColumnType { + r#type: "bigint".to_string(), + nullable: false, + }, + ); + actual_columns.insert( + "name".to_string(), + ColumnType { + r#type: "text".to_string(), + nullable: true, + }, + ); + + let (extra, missing, type_mismatches) = compare_columns(&test_columns, &actual_columns); + assert!(extra.is_empty()); + assert!(missing.is_empty()); + assert!(type_mismatches.is_empty()); + } + + #[test] + fn test_compare_columns_with_type_aliases() { + let test_columns = vec![ + ("id".to_string(), "INT".to_string()), // Should match "integer" + ("count".to_string(), "INT8".to_string()), // Should match "bigint" + ]; + + let mut actual_columns = BTreeMap::new(); + actual_columns.insert( + "id".to_string(), + ColumnType { + r#type: "integer".to_string(), + nullable: false, + }, + ); + actual_columns.insert( + "count".to_string(), + ColumnType { + r#type: "bigint".to_string(), + nullable: false, + }, + ); + + let (extra, missing, type_mismatches) = compare_columns(&test_columns, &actual_columns); + assert!(extra.is_empty()); + assert!(missing.is_empty()); + assert!(type_mismatches.is_empty()); + } + + #[test] + fn test_compare_columns_detects_extra() { + let test_columns = vec![ + ("id".to_string(), "bigint".to_string()), + ("extra".to_string(), "text".to_string()), + ]; + + let mut actual_columns = BTreeMap::new(); + actual_columns.insert( + "id".to_string(), + ColumnType { + r#type: "bigint".to_string(), + nullable: false, + }, + ); + + let (extra, missing, _) = compare_columns(&test_columns, &actual_columns); + assert_eq!(extra, vec!["extra".to_string()]); + assert!(missing.is_empty()); + } + + #[test] + fn test_compare_columns_detects_missing() { + let test_columns = vec![("id".to_string(), "bigint".to_string())]; + + let mut actual_columns = BTreeMap::new(); + actual_columns.insert( + "id".to_string(), + ColumnType { + r#type: "bigint".to_string(), + nullable: false, + }, + ); + actual_columns.insert( + "name".to_string(), + ColumnType { + r#type: "text".to_string(), + nullable: true, + }, + ); + + let (extra, missing, _) = compare_columns(&test_columns, &actual_columns); + assert!(extra.is_empty()); + assert_eq!(missing, vec![("name".to_string(), "text".to_string())]); + } + + #[test] + fn test_compare_columns_detects_type_mismatch() { + let test_columns = vec![("id".to_string(), "text".to_string())]; + + let mut actual_columns = BTreeMap::new(); + actual_columns.insert( + "id".to_string(), + ColumnType { + r#type: "bigint".to_string(), + nullable: false, + }, + ); + + let (_, _, type_mismatches) = compare_columns(&test_columns, &actual_columns); + assert_eq!(type_mismatches.len(), 1); + assert_eq!(type_mismatches[0].0, "id"); + assert_eq!(type_mismatches[0].1, "text"); + assert_eq!(type_mismatches[0].2, "bigint"); + } + + // ========================================================================= + // Partial FQN Mock Resolution Tests + // ========================================================================= + + #[test] + fn test_validate_with_unqualified_mock_name() { + // Test that unqualified mock names get resolved correctly + let test = UnitTest { + name: "test_partial_fqn".to_string(), + target_view: "materialize.public.user_order_summary".to_string(), + at_time: None, + mocks: vec![ + MockView { + fqn: "users".to_string(), // Unqualified - should resolve to materialize.public.users + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("name".to_string(), "text".to_string()), + ("email".to_string(), "text".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 'alice@example.com')".to_string(), + }, + MockView { + fqn: "public.orders".to_string(), // Schema qualified - should resolve to materialize.public.orders + columns: vec![ + ("id".to_string(), "bigint".to_string()), + ("user_id".to_string(), "bigint".to_string()), + ("amount".to_string(), "numeric".to_string()), + ], + query: "SELECT * FROM VALUES (1, 1, 100.00)".to_string(), + }, + ], + expected: ExpectedResult { + columns: vec![ + ("user_id".to_string(), "bigint".to_string()), + ("user_name".to_string(), "text".to_string()), + ("total_orders".to_string(), "bigint".to_string()), + ], + query: "SELECT * FROM VALUES (1, 'alice', 1)".to_string(), + }, + }; + + let types = make_test_types(); + let target_id = make_target_id(); + let dependencies = make_dependencies(); + + let result = validate_unit_test(&test, &target_id, &types, &dependencies); + assert!(result.is_ok(), "Expected validation to pass: {:?}", result); + } + + #[test] + fn test_validate_passes_with_no_dependencies() { + let test = UnitTest { + name: "test_no_deps".to_string(), + target_view: "materialize.public.my_view".to_string(), + at_time: None, + mocks: vec![], + expected: ExpectedResult { + columns: vec![("result".to_string(), "integer".to_string())], + query: "SELECT * FROM VALUES (42)".to_string(), + }, + }; + + let types = Types::default(); + let target_id = ObjectId { + database: "materialize".to_string(), + schema: "public".to_string(), + object: "my_view".to_string(), + }; + let dependencies = BTreeSet::new(); // No dependencies + + let result = validate_unit_test(&test, &target_id, &types, &dependencies); + assert!(result.is_ok()); + } + + #[test] + fn test_validate_skips_unknown_mock() { + // If a mock isn't in types, validation should be permissive + let test = UnitTest { + name: "test_unknown_mock".to_string(), + target_view: "materialize.public.my_view".to_string(), + at_time: None, + mocks: vec![MockView { + fqn: "materialize.public.unknown_table".to_string(), + columns: vec![("id".to_string(), "bigint".to_string())], + query: "SELECT * FROM VALUES (1)".to_string(), + }], + expected: ExpectedResult { + columns: vec![("result".to_string(), "integer".to_string())], + query: "SELECT * FROM VALUES (42)".to_string(), + }, + }; + + let types = Types::default(); // Empty types - unknown_table not in types + let target_id = ObjectId { + database: "materialize".to_string(), + schema: "public".to_string(), + object: "my_view".to_string(), + }; + + // Dependency is present but not in types + let mut dependencies = BTreeSet::new(); + dependencies.insert(ObjectId { + database: "materialize".to_string(), + schema: "public".to_string(), + object: "unknown_table".to_string(), + }); + + // Should pass because mock covers the dependency, even though type info is missing + let result = validate_unit_test(&test, &target_id, &types, &dependencies); + assert!(result.is_ok()); + } +} diff --git a/src/mz-deploy/src/utils.rs b/src/mz-deploy/src/utils.rs new file mode 100644 index 0000000000000..29a3dba921600 --- /dev/null +++ b/src/mz-deploy/src/utils.rs @@ -0,0 +1,5 @@ +pub mod docker_runtime; +pub mod git; +pub mod log; +pub mod progress; +pub mod sql_utils; diff --git a/src/mz-deploy/src/utils/docker_runtime.rs b/src/mz-deploy/src/utils/docker_runtime.rs new file mode 100644 index 0000000000000..1d5c120e688d6 --- /dev/null +++ b/src/mz-deploy/src/utils/docker_runtime.rs @@ -0,0 +1,299 @@ +//! Docker runtime for running a Materialize container for testing and type checking. +//! +//! This module manages a persistent Docker container and provides a connected database client +//! with external dependencies already staged as temporary tables. + +use crate::client::{Client, Profile}; +use crate::types::{TypeCheckError, Types}; +use crate::verbose; +use tokio::process::Command; +use tokio::time::{Duration, sleep}; + +/// Name of the persistent Docker container +const CONTAINER_NAME: &str = "mz-deploy-typecheck"; + +/// Host port to bind for the persistent container +const CONTAINER_PORT: u16 = 16875; + +/// Docker runtime for managing a Materialize container +pub struct DockerRuntime { + /// Materialize Docker image to use + image: String, +} + +impl DockerRuntime { + /// Create a new Docker runtime with the default Materialize image + pub fn new() -> Self { + Self { + image: "materialize/materialized:latest".to_string(), + } + } + + /// Set a custom Docker image + pub fn with_image(mut self, image: impl Into) -> Self { + self.image = image.into(); + self + } + + /// Get a connected client with external dependencies staged + /// + /// This method: + /// 1. Ensures the Docker container is running and healthy + /// 2. Connects to the Materialize database + /// 3. Creates temporary tables for all external dependencies from types.lock + /// 4. Returns the connected client ready for use + /// + /// # Arguments + /// * `types` - The types.lock data containing table schemas for type checking + /// + /// # Returns + /// A connected Client with tables from types.lock already staged + pub async fn get_client(&self, types: &Types) -> Result { + // Ensure the persistent container is running and healthy + self.ensure_container().await?; + + // Connect to Materialize (using fixed port) + let profile = Profile { + name: "docker-typecheck".to_string(), + host: "localhost".to_string(), + port: CONTAINER_PORT, + username: Some("materialize".to_string()), + password: None, + }; + + verbose!("Connecting to Materialize..."); + let client = Client::connect_with_profile(profile).await?; + + // Create temporary tables from types.lock (includes external deps and project tables) + if !types.objects.is_empty() { + verbose!( + "Creating {} temporary tables from types.lock", + types.objects.len() + ); + self.create_tables_from_types_lock(&client, types).await?; + } + + Ok(client) + } + + /// Check if the persistent container exists + async fn container_exists(&self) -> Result { + let output = Command::new("docker") + .args([ + "ps", + "-a", + "--filter", + &format!("name=^{}$", CONTAINER_NAME), + "--format", + "{{.Names}}", + ]) + .output() + .await + .map_err(|e| TypeCheckError::ContainerStartFailed(Box::new(e)))?; + + Ok(output.status.success() && !output.stdout.is_empty()) + } + + /// Check if the persistent container is running + async fn container_is_running(&self) -> Result { + let output = Command::new("docker") + .args([ + "ps", + "--filter", + &format!("name=^{}$", CONTAINER_NAME), + "--format", + "{{.Names}}", + ]) + .output() + .await + .map_err(|e| TypeCheckError::ContainerStartFailed(Box::new(e)))?; + + Ok(output.status.success() && !output.stdout.is_empty()) + } + + /// Check if the container is healthy by attempting a connection + async fn container_is_healthy(&self) -> bool { + let profile = Profile { + name: "docker-typecheck".to_string(), + host: "localhost".to_string(), + port: CONTAINER_PORT, + username: Some("materialize".to_string()), + password: None, + }; + + // Try to connect - if this succeeds, container is healthy + match Client::connect_with_profile(profile).await { + Ok(_) => true, + Err(_) => false, + } + } + + /// Remove the persistent container + async fn remove_container(&self) -> Result<(), TypeCheckError> { + verbose!("Removing existing container: {}", CONTAINER_NAME); + let output = Command::new("docker") + .args(["rm", "-f", CONTAINER_NAME]) + .output() + .await + .map_err(|e| TypeCheckError::ContainerStartFailed(Box::new(e)))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(TypeCheckError::ContainerStartFailed( + format!("Failed to remove container: {}", stderr).into(), + )); + } + + Ok(()) + } + + /// Create and start the persistent container + async fn create_container(&self) -> Result<(), TypeCheckError> { + verbose!( + "Creating persistent container: {} (image: {})", + CONTAINER_NAME, + self.image + ); + + let output = Command::new("docker") + .args([ + "run", + "-d", + "--name", + CONTAINER_NAME, + "-p", + &format!("{}:6875", CONTAINER_PORT), + &self.image, + ]) + .output() + .await + .map_err(|e| TypeCheckError::ContainerStartFailed(Box::new(e)))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(TypeCheckError::ContainerStartFailed( + format!("Failed to create container: {}", stderr).into(), + )); + } + + Ok(()) + } + + /// Start an existing stopped container + async fn start_existing_container(&self) -> Result<(), TypeCheckError> { + verbose!("Starting existing container: {}", CONTAINER_NAME); + + let output = Command::new("docker") + .args(["start", CONTAINER_NAME]) + .output() + .await + .map_err(|e| TypeCheckError::ContainerStartFailed(Box::new(e)))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(TypeCheckError::ContainerStartFailed( + format!("Failed to start container: {}", stderr).into(), + )); + } + + Ok(()) + } + + /// Wait for the container to be ready to accept connections + async fn wait_for_container(&self) -> Result<(), TypeCheckError> { + verbose!("Waiting for container to be ready..."); + + // Wait up to 30 seconds for the container to be ready + for i in 0..30 { + if self.container_is_healthy().await { + verbose!("Container is ready!"); + return Ok(()); + } + if i < 29 { + sleep(Duration::from_secs(1)).await; + } + } + + Err(TypeCheckError::ContainerStartFailed( + "Container failed to become healthy within 30 seconds".into(), + )) + } + + /// Ensure the persistent container is running and healthy + async fn ensure_container(&self) -> Result<(), TypeCheckError> { + let exists = self.container_exists().await?; + let is_running = if exists { + self.container_is_running().await? + } else { + false + }; + + if is_running { + // Container is running, check if it's healthy + verbose!("Found running container: {}", CONTAINER_NAME); + if self.container_is_healthy().await { + verbose!("Container is healthy, reusing it"); + return Ok(()); + } else { + verbose!("Container is unhealthy, recreating it"); + self.remove_container().await?; + } + } else if exists { + // Container exists but is not running, try to start it + verbose!("Found stopped container: {}", CONTAINER_NAME); + match self.start_existing_container().await { + Ok(_) => { + self.wait_for_container().await?; + return Ok(()); + } + Err(_) => { + verbose!("Failed to start stopped container, recreating it"); + self.remove_container().await?; + } + } + } + + // Create new container + self.create_container().await?; + self.wait_for_container().await?; + + Ok(()) + } + + /// Create temporary tables from types.lock for type checking + async fn create_tables_from_types_lock( + &self, + client: &Client, + types: &Types, + ) -> Result<(), TypeCheckError> { + for (fqn, columns) in &types.objects { + let mut col_defs = Vec::new(); + for (col_name, col_type) in columns { + let nullable = if col_type.nullable { "" } else { " NOT NULL" }; + col_defs.push(format!("{} {}{}", col_name, col_type.r#type, nullable)); + } + + let create_sql = format!( + "CREATE TEMPORARY TABLE \"{}\" ({})", + fqn, + col_defs.join(", ") + ); + + verbose!("Creating temporary table: {}", fqn); + client.execute(&create_sql, &[]).await.map_err(|e| { + TypeCheckError::DatabaseSetupError(format!( + "failed to create temporary table for '{}': {}", + fqn, e + )) + })?; + } + + Ok(()) + } +} + +impl Default for DockerRuntime { + fn default() -> Self { + Self::new() + } +} diff --git a/src/mz-deploy/src/utils/git.rs b/src/mz-deploy/src/utils/git.rs new file mode 100644 index 0000000000000..62845f248431d --- /dev/null +++ b/src/mz-deploy/src/utils/git.rs @@ -0,0 +1,45 @@ +use std::process::Command; + +/// Get the current git commit hash of the project directory. +/// +/// Returns Some(commit_hash) if the directory is a git repository with a valid HEAD, +/// otherwise returns None. +/// +/// # Arguments +/// * `directory` - Path to the project directory +pub fn get_git_commit(directory: &std::path::Path) -> Option { + let output = Command::new("git") + .arg("rev-parse") + .arg("HEAD") + .current_dir(directory) + .output() + .ok()?; + + if !output.status.success() { + return None; + } + + let commit = String::from_utf8(output.stdout).ok()?; + Some(commit.trim().to_string()) +} + +/// Returns `true` if the repo contains any uncommitted or unstaged changes. +/// +/// This uses `git status --porcelain`, which outputs a line for each changed file. +/// Any non-empty output means the repo is dirty. +/// +/// # Arguments +/// * `directory` - Path to the project directory +pub fn is_dirty(directory: &std::path::Path) -> bool { + let output = Command::new("git") + .args(["status", "--porcelain"]) + .current_dir(directory) + .output(); + + let out = match output { + Ok(out) => out, + Err(_) => return false, + }; + + !String::from_utf8_lossy(&out.stdout).trim().is_empty() +} diff --git a/src/mz-deploy/src/utils/log.rs b/src/mz-deploy/src/utils/log.rs new file mode 100644 index 0000000000000..3894ee37a1ff7 --- /dev/null +++ b/src/mz-deploy/src/utils/log.rs @@ -0,0 +1,34 @@ +//! Logging utilities for mz-deploy. +//! +//! This module provides a simple verbose logging system that can be enabled +//! via the `--verbose` CLI flag. When verbose mode is enabled, diagnostic +//! messages are printed to stdout to help users understand what the tool +//! is doing. +use std::sync::atomic::{AtomicBool, Ordering}; + +/// Global verbose mode flag. +/// +/// This is a thread-safe atomic boolean that stores whether verbose +/// logging is enabled. It uses relaxed memory ordering since the exact +/// timing of when verbose mode is enabled/disabled doesn't matter. +static VERBOSE: AtomicBool = AtomicBool::new(false); + +/// Enable or disable verbose logging. +pub fn set_verbose(v: bool) { + VERBOSE.store(v, Ordering::Relaxed); +} + +/// Check if verbose logging is currently enabled. +pub fn verbose_enabled() -> bool { + VERBOSE.load(Ordering::Relaxed) +} + +/// Print a message only when verbose mode is enabled. +#[macro_export] +macro_rules! verbose { + ($($arg:tt)*) => { + if $crate::utils::log::verbose_enabled() { + println!($($arg)*); + } + }; +} diff --git a/src/mz-deploy/src/utils/progress.rs b/src/mz-deploy/src/utils/progress.rs new file mode 100644 index 0000000000000..4ffbb263a4fdd --- /dev/null +++ b/src/mz-deploy/src/utils/progress.rs @@ -0,0 +1,111 @@ +//! Progress reporting utilities for user-facing output. +//! +//! This module provides helper functions for displaying progress and status +//! during command execution. It uses colors and symbols to create clear, +//! scannable output similar to tools like dbt. + +use owo_colors::OwoColorize; +use std::time::Duration; + +/// Print a stage start message with yellow arrow. +/// +/// # Example +/// ```ignore +/// stage_start("Parsing SQL files"); +/// // Output: → Parsing SQL files... +/// ``` +pub fn stage_start(name: &str) { + println!("\n{} {}...", "→".yellow(), name); +} + +/// Print a stage completion message with green checkmark and duration. +/// +/// # Example +/// ```ignore +/// stage_success("15 objects parsed", Duration::from_millis(100)); +/// // Output: ✓ 15 objects parsed (0.1s) +/// ``` +pub fn stage_success(message: &str, duration: Duration) { + let seconds = duration.as_secs_f64(); + println!( + " {} {} {}", + "✓".green(), + message, + format!("({}s)", format_duration(seconds)).dimmed() + ); +} + +/// Print an informational message with blue info symbol. +/// +/// # Example +/// ```ignore +/// info("3 external dependencies detected"); +/// // Output: ℹ 3 external dependencies detected +/// ``` +pub fn info(message: &str) { + println!(" {} {}", "ℹ".blue(), message); +} + +/// Print a success message with green checkmark. +/// +/// # Example +/// ```ignore +/// success("All objects validated"); +/// // Output: ✓ All objects validated +/// ``` +pub fn success(message: &str) { + println!(" {} {}", "✓".green(), message); +} + +/// Print an error message with red X symbol. +/// +/// # Example +/// ```ignore +/// error("Type checking failed"); +/// // Output: ✗ Type checking failed +/// ``` +pub fn error(message: &str) { + println!(" {} {}", "✗".red(), message); +} + +/// Print a final summary message with green checkmark and total duration. +/// +/// # Example +/// ```ignore +/// summary("Project successfully compiled", Duration::from_secs(3)); +/// // Output: ✓ Project successfully compiled in 3.0s +/// ``` +pub fn summary(message: &str, duration: Duration) { + let seconds = duration.as_secs_f64(); + println!( + "\n{} {} {}", + "✓".green().bold(), + message, + format!("in {}s", format_duration(seconds)).dimmed() + ); +} + +/// Format duration to show appropriate precision. +/// - < 1s: show 2 decimal places +/// - >= 1s: show 1 decimal place +fn format_duration(seconds: f64) -> String { + if seconds < 1.0 { + format!("{:.2}", seconds) + } else { + format!("{:.1}", seconds) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_format_duration() { + assert_eq!(format_duration(0.05), "0.05"); + assert_eq!(format_duration(0.123), "0.12"); + assert_eq!(format_duration(1.0), "1.0"); + assert_eq!(format_duration(2.567), "2.6"); + assert_eq!(format_duration(10.12), "10.1"); + } +} diff --git a/src/mz-deploy/src/utils/sql_utils.rs b/src/mz-deploy/src/utils/sql_utils.rs new file mode 100644 index 0000000000000..219d9b3f6c8b8 --- /dev/null +++ b/src/mz-deploy/src/utils/sql_utils.rs @@ -0,0 +1,3 @@ +pub fn quote_identifier(name: &str) -> String { + format!("\"{}\"", name.replace('"', "\"\"")) +} diff --git a/src/mz-deploy/tests/type_checker_test.rs b/src/mz-deploy/tests/type_checker_test.rs new file mode 100644 index 0000000000000..c9f34f71df949 --- /dev/null +++ b/src/mz-deploy/tests/type_checker_test.rs @@ -0,0 +1,130 @@ +//! Integration tests for TypeChecker trait and implementations + +#[cfg(test)] +mod tests { + use mz_deploy::project; + use std::fs; + use tempfile::TempDir; + + #[cfg(feature = "docker-typecheck")] + #[tokio::test] + async fn test_docker_typechecker_with_external_dependencies() { + use mz_deploy::types::{ColumnType, Types, typecheck_with_client}; + use mz_deploy::utils::docker_runtime::DockerRuntime; + use std::collections::BTreeMap; + + // Create a test project with external dependencies + let temp_dir = TempDir::new().unwrap(); + let project_path = temp_dir.path(); + + // Create project structure + let materialize_dir = project_path.join("materialize"); + fs::create_dir(&materialize_dir).unwrap(); + let public_dir = materialize_dir.join("public"); + fs::create_dir(&public_dir).unwrap(); + + // Create a view that references an external table + let sql_content = r#" +CREATE VIEW user_summary AS +SELECT + u.id, + u.name, + COUNT(*) AS total_orders +FROM external_db.external_schema.users u +JOIN external_db.external_schema.orders o ON u.id = o.user_id +GROUP BY u.id, u.name; +"#; + fs::write(public_dir.join("user_summary.sql"), sql_content).unwrap(); + + // Create types.lock with external dependencies + let mut objects = BTreeMap::new(); + + // Define external_db.external_schema.users + let mut users_columns = BTreeMap::new(); + users_columns.insert( + "id".to_string(), + ColumnType { + r#type: "INTEGER".to_string(), + nullable: false, + }, + ); + users_columns.insert( + "name".to_string(), + ColumnType { + r#type: "TEXT".to_string(), + nullable: false, + }, + ); + objects.insert( + "external_db.external_schema.users".to_string(), + users_columns, + ); + + // Define external_db.external_schema.orders + let mut orders_columns = BTreeMap::new(); + orders_columns.insert( + "id".to_string(), + ColumnType { + r#type: "INTEGER".to_string(), + nullable: false, + }, + ); + orders_columns.insert( + "user_id".to_string(), + ColumnType { + r#type: "INTEGER".to_string(), + nullable: false, + }, + ); + objects.insert( + "external_db.external_schema.orders".to_string(), + orders_columns, + ); + + let types = Types { + version: 1, + objects, + }; + + // Write types.lock + types.write_types_lock(project_path).unwrap(); + + // Load the project + let mir_project = project::plan(project_path).unwrap(); + + // Create Docker runtime and get client + let runtime = DockerRuntime::new(); + let mut client = match runtime.get_client(&types).await { + Ok(client) => client, + Err(e) => { + println!( + "Docker not available for testing (expected if Docker not installed): {}", + e + ); + // If Docker is not available, this is expected behavior + if e.to_string().contains("container") + || e.to_string().contains("Docker") + || e.to_string().contains("docker") + { + // This is expected when Docker is not available + return; + } + // Re-throw other errors + panic!("Unexpected error: {}", e); + } + }; + + // Run type checking with the client + let result = typecheck_with_client(&mut client, &mir_project, project_path).await; + + // This might fail if there are actual type errors, which is unexpected + match result { + Ok(_) => { + println!("Type checking passed with Docker"); + } + Err(e) => { + panic!("Type checking failed: {}", e); + } + } + } +} diff --git a/src/sql-lexer/src/keywords.txt b/src/sql-lexer/src/keywords.txt index e126ccd581f50..dc67b40ce5363 100644 --- a/src/sql-lexer/src/keywords.txt +++ b/src/sql-lexer/src/keywords.txt @@ -286,6 +286,7 @@ Message Metadata Minute Minutes +Mock Mode Month Months @@ -468,6 +469,7 @@ Task Tasks Temp Temporary +Test Text Then Tick @@ -495,6 +497,7 @@ Unbounded Uncommitted Union Unique +Unit Unknown Unnest Until diff --git a/src/sql-parser/src/ast/defs/statement.rs b/src/sql-parser/src/ast/defs/statement.rs index 22cb27da9ea26..a68a0aa52c91b 100644 --- a/src/sql-parser/src/ast/defs/statement.rs +++ b/src/sql-parser/src/ast/defs/statement.rs @@ -104,6 +104,7 @@ pub enum Statement { Close(CloseStatement), Prepare(PrepareStatement), Execute(ExecuteStatement), + ExecuteUnitTest(ExecuteUnitTestStatement), Deallocate(DeallocateStatement), Raise(RaiseStatement), GrantRole(GrantRoleStatement), @@ -182,6 +183,7 @@ impl AstDisplay for Statement { Statement::Fetch(stmt) => f.write_node(stmt), Statement::Prepare(stmt) => f.write_node(stmt), Statement::Execute(stmt) => f.write_node(stmt), + Statement::ExecuteUnitTest(stmt) => f.write_node(stmt), Statement::Deallocate(stmt) => f.write_node(stmt), Statement::Raise(stmt) => f.write_node(stmt), Statement::GrantRole(stmt) => f.write_node(stmt), @@ -263,6 +265,7 @@ pub fn statement_kind_label_value(kind: StatementKind) -> &'static str { StatementKind::Close => "close", StatementKind::Prepare => "prepare", StatementKind::Execute => "execute", + StatementKind::ExecuteUnitTest => "execute_unit_test", StatementKind::Deallocate => "deallocate", StatementKind::Raise => "raise", StatementKind::GrantRole => "grant_role", @@ -5011,6 +5014,72 @@ impl AstDisplay for ExecuteStatement { } impl_display_t!(ExecuteStatement); +/// `EXECUTE UNIT TEST ...` +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ExecuteUnitTestStatement { + pub name: Ident, + pub target: T::ItemName, + pub at_time: Option>, + pub mocks: Vec>, + pub expected: ExpectedResultDef, +} + +impl AstDisplay for ExecuteUnitTestStatement { + fn fmt(&self, f: &mut AstFormatter) { + f.write_str("EXECUTE UNIT TEST "); + f.write_node(&self.name); + f.write_str(" FOR "); + f.write_node(&self.target); + if let Some(at_time) = &self.at_time { + f.write_str(" AT TIME "); + f.write_node(at_time); + } + for mock in &self.mocks { + f.write_str(" MOCK "); + f.write_node(mock); + } + f.write_str(" EXPECTED"); + f.write_node(&self.expected); + } +} +impl_display_t!(ExecuteUnitTestStatement); + +/// Mock view definition for EXECUTE UNIT TEST +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct MockViewDef { + pub name: T::ItemName, + pub columns: Vec>, + pub query: Query, +} + +impl AstDisplay for MockViewDef { + fn fmt(&self, f: &mut AstFormatter) { + f.write_node(&self.name); + f.write_str("("); + f.write_node(&display::comma_separated(&self.columns)); + f.write_str(") AS "); + f.write_node(&self.query); + } +} +impl_display_t!(MockViewDef); + +/// Expected result definition for EXECUTE UNIT TEST +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ExpectedResultDef { + pub columns: Vec>, + pub query: Query, +} + +impl AstDisplay for ExpectedResultDef { + fn fmt(&self, f: &mut AstFormatter) { + f.write_str("("); + f.write_node(&display::comma_separated(&self.columns)); + f.write_str(") AS "); + f.write_node(&self.query); + } +} +impl_display_t!(ExpectedResultDef); + /// `DEALLOCATE ...` #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DeallocateStatement { diff --git a/src/sql-parser/src/parser.rs b/src/sql-parser/src/parser.rs index 32c71f4ac5f61..e5aa1f0cc318d 100644 --- a/src/sql-parser/src/parser.rs +++ b/src/sql-parser/src/parser.rs @@ -9248,6 +9248,12 @@ impl<'a> Parser<'a> { /// Parse a `EXECUTE` statement, assuming that the `EXECUTE` token /// has already been consumed. fn parse_execute(&mut self) -> Result, ParserError> { + // Check if this is EXECUTE UNIT TEST + if self.parse_keywords(&[UNIT, TEST]) { + return self.parse_execute_unit_test(); + } + + // Otherwise parse as regular EXECUTE (prepared statement) let name = self.parse_identifier()?; let params = if self.consume_token(&Token::LParen) { let params = self.parse_comma_separated(Parser::parse_expr)?; @@ -9259,6 +9265,93 @@ impl<'a> Parser<'a> { Ok(Statement::Execute(ExecuteStatement { name, params })) } + /// Parse an `EXECUTE UNIT TEST` statement, assuming that the + /// `EXECUTE UNIT TEST` tokens have already been consumed. + fn parse_execute_unit_test(&mut self) -> Result, ParserError> { + // Parse test name + let name = self.parse_identifier()?; + + // Expect FOR keyword + self.expect_keyword(FOR)?; + + // Parse target view name + let target = self.parse_raw_name()?; + + // Parse optional AT TIME clause + let at_time = if self.parse_keywords(&[AT, TIME]) { + Some(self.parse_expr()?) + } else { + None + }; + + // Parse MOCK definitions (0 or more) + let mut mocks = Vec::new(); + while self.parse_keyword(MOCK) { + let mock_name = self.parse_raw_name()?; + + // Parse column definitions + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(|parser| { + Ok(ColumnDef { + name: parser.parse_identifier()?, + data_type: parser.parse_data_type()?, + collation: None, + options: vec![], + }) + })?; + self.expect_token(&Token::RParen)?; + + // Parse AS + self.expect_keyword(AS)?; + + // Parse query + let query = self.parse_query()?; + + mocks.push(MockViewDef { + name: mock_name, + columns, + query, + }); + + // Consume optional comma + let _ = self.consume_token(&Token::Comma); + } + + // Parse EXPECTED definition (required, exactly 1) + self.expect_keyword(EXPECTED)?; + + // Parse column definitions + self.expect_token(&Token::LParen)?; + let expected_columns = self.parse_comma_separated(|parser| { + Ok(ColumnDef { + name: parser.parse_identifier()?, + data_type: parser.parse_data_type()?, + collation: None, + options: vec![], + }) + })?; + self.expect_token(&Token::RParen)?; + + // Parse AS + self.expect_keyword(AS)?; + + // Parse query + let expected_query = self.parse_query()?; + + let expected = ExpectedResultDef { + columns: expected_columns, + query: expected_query, + }; + + Ok(Statement::ExecuteUnitTest(ExecuteUnitTestStatement { + name, + target, + at_time, + mocks, + expected, + })) + } + /// Parse a `DEALLOCATE` statement, assuming that the `DEALLOCATE` token /// has already been consumed. fn parse_deallocate(&mut self) -> Result, ParserError> { diff --git a/src/sql/src/plan.rs b/src/sql/src/plan.rs index 0cc487cbf50fa..78d2fbe4cd490 100644 --- a/src/sql/src/plan.rs +++ b/src/sql/src/plan.rs @@ -95,7 +95,7 @@ pub(crate) mod plan_utils; pub(crate) mod query; pub(crate) mod scope; pub(crate) mod side_effecting_func; -pub(crate) mod statement; +pub mod statement; pub(crate) mod transform_ast; pub(crate) mod transform_hir; pub(crate) mod typeconv; @@ -322,6 +322,7 @@ impl Plan { StatementKind::Update => &[PlanKind::ReadThenWrite], StatementKind::ValidateConnection => &[PlanKind::ValidateConnection], StatementKind::AlterRetainHistory => &[PlanKind::AlterRetainHistory], + StatementKind::ExecuteUnitTest => &[], } } diff --git a/src/sql/src/plan/statement.rs b/src/sql/src/plan/statement.rs index a013fd153f074..76d76ce0a20e9 100644 --- a/src/sql/src/plan/statement.rs +++ b/src/sql/src/plan/statement.rs @@ -254,6 +254,12 @@ pub fn describe( scl::describe_inspect_shard(&scx, stmt)? } Statement::ValidateConnection(stmt) => validate::describe_validate_connection(&scx, stmt)?, + Statement::ExecuteUnitTest(_) => { + return Err(PlanError::Unsupported { + feature: "EXECUTE UNIT TEST statement".to_string(), + discussion_no: None, + }); + } }; let desc = desc.with_params(scx.finalize_param_types()?); @@ -438,6 +444,12 @@ pub fn plan( Statement::Raise(stmt) => raise::plan_raise(scx, stmt), Statement::Show(ShowStatement::InspectShard(stmt)) => scl::plan_inspect_shard(scx, stmt), Statement::ValidateConnection(stmt) => validate::plan_validate_connection(scx, stmt), + Statement::ExecuteUnitTest(_) => { + return Err(PlanError::Unsupported { + feature: "EXECUTE UNIT TEST statement".to_string(), + discussion_no: None, + }); + } }; if let Ok(plan) = &plan { @@ -1143,6 +1155,7 @@ impl From<&Statement> for StatementClassifica Statement::Raise(_) => Other, Statement::Show(ShowStatement::InspectShard(_)) => Other, Statement::ValidateConnection(_) => Other, + Statement::ExecuteUnitTest(_) => Other, } } }