@@ -429,15 +429,15 @@ describe("ClickHousePrinter", () => {
429429 } ) ;
430430
431431 it ( "should handle IS NULL syntax" , ( ) => {
432- const { sql } = printQuery ( "SELECT * FROM task_runs WHERE error IS NULL" ) ;
432+ const { sql } = printQuery ( "SELECT * FROM task_runs WHERE started_at IS NULL" ) ;
433433
434- expect ( sql ) . toContain ( "isNull(error )" ) ;
434+ expect ( sql ) . toContain ( "isNull(started_at )" ) ;
435435 } ) ;
436436
437437 it ( "should handle IS NOT NULL syntax" , ( ) => {
438- const { sql } = printQuery ( "SELECT * FROM task_runs WHERE error IS NOT NULL" ) ;
438+ const { sql } = printQuery ( "SELECT * FROM task_runs WHERE started_at IS NOT NULL" ) ;
439439
440- expect ( sql ) . toContain ( "isNotNull(error )" ) ;
440+ expect ( sql ) . toContain ( "isNotNull(started_at )" ) ;
441441 } ) ;
442442 } ) ;
443443
@@ -2172,6 +2172,152 @@ describe("Column metadata", () => {
21722172 } ) ;
21732173} ) ;
21742174
2175+ describe ( "Unknown column blocking" , ( ) => {
2176+ /**
2177+ * These tests verify that unknown columns are blocked at compile time,
2178+ * preventing access to internal ClickHouse columns that aren't exposed in the schema.
2179+ */
2180+
2181+ describe ( "should block unknown columns in SELECT" , ( ) => {
2182+ it ( "should throw error for unknown column in SELECT list" , ( ) => {
2183+ expect ( ( ) => {
2184+ printQuery ( "SELECT id, unknown_column FROM task_runs" ) ;
2185+ } ) . toThrow ( QueryError ) ;
2186+ expect ( ( ) => {
2187+ printQuery ( "SELECT id, unknown_column FROM task_runs" ) ;
2188+ } ) . toThrow ( / u n k n o w n .* c o l u m n .* u n k n o w n _ c o l u m n / i) ;
2189+ } ) ;
2190+
2191+ it ( "should throw error for internal ClickHouse column name not in schema" , ( ) => {
2192+ // The schema exposes 'created' but the internal ClickHouse column is 'created_at'
2193+ // Using the internal name directly should be blocked
2194+ const schema = createSchemaRegistry ( [ runsSchema ] ) ;
2195+ const ctx = createPrinterContext ( {
2196+ organizationId : "org_test" ,
2197+ projectId : "proj_test" ,
2198+ environmentId : "env_test" ,
2199+ schema,
2200+ } ) ;
2201+
2202+ // 'created_at' is not in runsSchema - only 'created' which maps to 'created_at'
2203+ expect ( ( ) => {
2204+ printQuery ( "SELECT id, created_at FROM runs" , ctx ) ;
2205+ } ) . toThrow ( QueryError ) ;
2206+ } ) ;
2207+
2208+ it ( "should suggest TSQL column name when user types ClickHouse column name" , ( ) => {
2209+ // The schema exposes 'created' but the internal ClickHouse column is 'created_at'
2210+ // When user types 'created_at', we should suggest 'created'
2211+ const schema = createSchemaRegistry ( [ runsSchema ] ) ;
2212+ const ctx = createPrinterContext ( {
2213+ organizationId : "org_test" ,
2214+ projectId : "proj_test" ,
2215+ environmentId : "env_test" ,
2216+ schema,
2217+ } ) ;
2218+
2219+ expect ( ( ) => {
2220+ printQuery ( "SELECT id, created_at FROM runs" , ctx ) ;
2221+ } ) . toThrow ( / D i d y o u m e a n " c r e a t e d " / ) ;
2222+ } ) ;
2223+
2224+ it ( "should throw error for unknown qualified column (table.column)" , ( ) => {
2225+ expect ( ( ) => {
2226+ printQuery ( "SELECT task_runs.unknown_column FROM task_runs" ) ;
2227+ } ) . toThrow ( QueryError ) ;
2228+ } ) ;
2229+ } ) ;
2230+
2231+ describe ( "should block unknown columns in WHERE" , ( ) => {
2232+ it ( "should throw error for unknown column in WHERE clause" , ( ) => {
2233+ expect ( ( ) => {
2234+ printQuery ( "SELECT id FROM task_runs WHERE unknown_column = 'test'" ) ;
2235+ } ) . toThrow ( QueryError ) ;
2236+ } ) ;
2237+
2238+ it ( "should throw error for unknown column in complex WHERE" , ( ) => {
2239+ expect ( ( ) => {
2240+ printQuery ( "SELECT id FROM task_runs WHERE status = 'completed' AND unknown_column > 10" ) ;
2241+ } ) . toThrow ( QueryError ) ;
2242+ } ) ;
2243+ } ) ;
2244+
2245+ describe ( "should block unknown columns in ORDER BY" , ( ) => {
2246+ it ( "should throw error for unknown column in ORDER BY" , ( ) => {
2247+ expect ( ( ) => {
2248+ printQuery ( "SELECT id, status FROM task_runs ORDER BY unknown_column DESC" ) ;
2249+ } ) . toThrow ( QueryError ) ;
2250+ } ) ;
2251+ } ) ;
2252+
2253+ describe ( "should block unknown columns in GROUP BY" , ( ) => {
2254+ it ( "should throw error for unknown column in GROUP BY" , ( ) => {
2255+ expect ( ( ) => {
2256+ printQuery ( "SELECT count(*) FROM task_runs GROUP BY unknown_column" ) ;
2257+ } ) . toThrow ( QueryError ) ;
2258+ } ) ;
2259+ } ) ;
2260+
2261+ describe ( "should allow SELECT aliases" , ( ) => {
2262+ it ( "should allow ORDER BY to reference aliased columns" , ( ) => {
2263+ // This should NOT throw - 'cnt' is a valid alias from SELECT
2264+ const { sql } = printQuery (
2265+ "SELECT status, count(*) AS cnt FROM task_runs GROUP BY status ORDER BY cnt DESC"
2266+ ) ;
2267+ expect ( sql ) . toContain ( "ORDER BY cnt DESC" ) ;
2268+ } ) ;
2269+
2270+ it ( "should allow HAVING to reference aliased columns" , ( ) => {
2271+ const { sql } = printQuery (
2272+ "SELECT status, count(*) AS cnt FROM task_runs GROUP BY status HAVING cnt > 10"
2273+ ) ;
2274+ expect ( sql ) . toContain ( "HAVING" ) ;
2275+ } ) ;
2276+
2277+ it ( "should allow ORDER BY to reference implicit aggregation names" , ( ) => {
2278+ // COUNT() without alias gets implicit name 'count'
2279+ const { sql } = printQuery (
2280+ "SELECT status, count() FROM task_runs GROUP BY status ORDER BY count DESC"
2281+ ) ;
2282+ expect ( sql ) . toContain ( "ORDER BY count DESC" ) ;
2283+ } ) ;
2284+ } ) ;
2285+
2286+ // Note: CTE support is limited - CTEs are not added to the table context,
2287+ // so CTE column references are treated as unknown. This is a pre-existing limitation.
2288+ // The tests below verify that unknown column blocking doesn't break existing behavior.
2289+
2290+ describe ( "should allow subquery alias references" , ( ) => {
2291+ it ( "should allow referencing columns from subquery in FROM clause" , ( ) => {
2292+ const { sql } = printQuery ( `
2293+ SELECT sub.status_name, sub.total
2294+ FROM (
2295+ SELECT status AS status_name, count(*) AS total
2296+ FROM task_runs
2297+ GROUP BY status
2298+ ) AS sub
2299+ ORDER BY sub.total DESC
2300+ ` ) ;
2301+ expect ( sql ) . toContain ( "status_name" ) ;
2302+ expect ( sql ) . toContain ( "total" ) ;
2303+ } ) ;
2304+
2305+ it ( "should allow unqualified references to subquery columns" , ( ) => {
2306+ const { sql } = printQuery ( `
2307+ SELECT status_name, total
2308+ FROM (
2309+ SELECT status AS status_name, count(*) AS total
2310+ FROM task_runs
2311+ GROUP BY status
2312+ )
2313+ WHERE total > 10
2314+ ` ) ;
2315+ expect ( sql ) . toContain ( "status_name" ) ;
2316+ expect ( sql ) . toContain ( "total" ) ;
2317+ } ) ;
2318+ } ) ;
2319+ } ) ;
2320+
21752321describe ( "Field Mapping Value Transformation" , ( ) => {
21762322 // Test schema with a field mapping column
21772323 const fieldMappingSchema : TableSchema = {
@@ -2278,3 +2424,191 @@ describe("Field Mapping Value Transformation", () => {
22782424 // But the column metadata should use the exposed name
22792425 } ) ;
22802426} ) ;
2427+
2428+ describe ( "Internal-only column blocking" , ( ) => {
2429+ /**
2430+ * These tests verify that internal columns (tenant columns, required filter columns)
2431+ * that are NOT exposed in tableSchema.columns are blocked from user queries in
2432+ * SELECT, ORDER BY, GROUP BY, and HAVING clauses, but allowed in system-generated
2433+ * tenant isolation guards.
2434+ */
2435+
2436+ // Schema with hidden tenant columns (not exposed in public columns)
2437+ const hiddenTenantSchema : TableSchema = {
2438+ name : "hidden_tenant_runs" ,
2439+ clickhouseName : "trigger_dev.task_runs_v2" ,
2440+ columns : {
2441+ // Public columns - these are exposed to users
2442+ id : { name : "id" , ...column ( "String" ) } ,
2443+ status : { name : "status" , ...column ( "String" ) } ,
2444+ task_identifier : { name : "task_identifier" , ...column ( "String" ) } ,
2445+ created_at : { name : "created_at" , ...column ( "DateTime64" ) } ,
2446+ } ,
2447+ // Tenant columns are NOT in the public columns above
2448+ tenantColumns : {
2449+ organizationId : "organization_id" ,
2450+ projectId : "project_id" ,
2451+ environmentId : "environment_id" ,
2452+ } ,
2453+ } ;
2454+
2455+ // Schema with hidden required filter column
2456+ const hiddenFilterSchema : TableSchema = {
2457+ name : "hidden_filter_runs" ,
2458+ clickhouseName : "trigger_dev.task_runs_v2" ,
2459+ columns : {
2460+ id : { name : "id" , ...column ( "String" ) } ,
2461+ status : { name : "status" , ...column ( "String" ) } ,
2462+ created_at : { name : "created_at" , ...column ( "DateTime64" ) } ,
2463+ } ,
2464+ tenantColumns : {
2465+ organizationId : "organization_id" ,
2466+ projectId : "project_id" ,
2467+ environmentId : "environment_id" ,
2468+ } ,
2469+ requiredFilters : [
2470+ { column : "engine" , value : "V2" } , // 'engine' is not in public columns
2471+ ] ,
2472+ } ;
2473+
2474+ function createHiddenTenantContext ( ) : PrinterContext {
2475+ const schema = createSchemaRegistry ( [ hiddenTenantSchema ] ) ;
2476+ return createPrinterContext ( {
2477+ organizationId : "org_test" ,
2478+ projectId : "proj_test" ,
2479+ environmentId : "env_test" ,
2480+ schema,
2481+ } ) ;
2482+ }
2483+
2484+ function createHiddenFilterContext ( ) : PrinterContext {
2485+ const schema = createSchemaRegistry ( [ hiddenFilterSchema ] ) ;
2486+ return createPrinterContext ( {
2487+ organizationId : "org_test" ,
2488+ projectId : "proj_test" ,
2489+ environmentId : "env_test" ,
2490+ schema,
2491+ } ) ;
2492+ }
2493+
2494+ describe ( "should block internal-only columns in SELECT" , ( ) => {
2495+ it ( "should throw error when selecting hidden tenant column (organization_id)" , ( ) => {
2496+ const ctx = createHiddenTenantContext ( ) ;
2497+ expect ( ( ) => {
2498+ printQuery ( "SELECT id, organization_id FROM hidden_tenant_runs" , ctx ) ;
2499+ } ) . toThrow ( QueryError ) ;
2500+ expect ( ( ) => {
2501+ printQuery ( "SELECT id, organization_id FROM hidden_tenant_runs" , ctx ) ;
2502+ } ) . toThrow ( / n o t a v a i l a b l e f o r q u e r y i n g / i) ;
2503+ } ) ;
2504+
2505+ it ( "should throw error when selecting hidden tenant column (project_id)" , ( ) => {
2506+ const ctx = createHiddenTenantContext ( ) ;
2507+ expect ( ( ) => {
2508+ printQuery ( "SELECT project_id FROM hidden_tenant_runs" , ctx ) ;
2509+ } ) . toThrow ( / n o t a v a i l a b l e f o r q u e r y i n g / i) ;
2510+ } ) ;
2511+
2512+ it ( "should throw error when selecting hidden tenant column (environment_id)" , ( ) => {
2513+ const ctx = createHiddenTenantContext ( ) ;
2514+ expect ( ( ) => {
2515+ printQuery ( "SELECT environment_id FROM hidden_tenant_runs" , ctx ) ;
2516+ } ) . toThrow ( / n o t a v a i l a b l e f o r q u e r y i n g / i) ;
2517+ } ) ;
2518+
2519+ it ( "should throw error when selecting hidden required filter column" , ( ) => {
2520+ const ctx = createHiddenFilterContext ( ) ;
2521+ expect ( ( ) => {
2522+ printQuery ( "SELECT id, engine FROM hidden_filter_runs" , ctx ) ;
2523+ } ) . toThrow ( / n o t a v a i l a b l e f o r q u e r y i n g / i) ;
2524+ } ) ;
2525+ } ) ;
2526+
2527+ describe ( "should block internal-only columns in ORDER BY" , ( ) => {
2528+ it ( "should throw error when ordering by hidden tenant column" , ( ) => {
2529+ const ctx = createHiddenTenantContext ( ) ;
2530+ expect ( ( ) => {
2531+ printQuery ( "SELECT id, status FROM hidden_tenant_runs ORDER BY organization_id" , ctx ) ;
2532+ } ) . toThrow ( / n o t a v a i l a b l e f o r q u e r y i n g / i) ;
2533+ } ) ;
2534+
2535+ it ( "should throw error when ordering by hidden required filter column" , ( ) => {
2536+ const ctx = createHiddenFilterContext ( ) ;
2537+ expect ( ( ) => {
2538+ printQuery ( "SELECT id, status FROM hidden_filter_runs ORDER BY engine" , ctx ) ;
2539+ } ) . toThrow ( / n o t a v a i l a b l e f o r q u e r y i n g / i) ;
2540+ } ) ;
2541+ } ) ;
2542+
2543+ describe ( "should block internal-only columns in GROUP BY" , ( ) => {
2544+ it ( "should throw error when grouping by hidden tenant column" , ( ) => {
2545+ const ctx = createHiddenTenantContext ( ) ;
2546+ expect ( ( ) => {
2547+ printQuery ( "SELECT count(*) FROM hidden_tenant_runs GROUP BY organization_id" , ctx ) ;
2548+ } ) . toThrow ( / n o t a v a i l a b l e f o r q u e r y i n g / i) ;
2549+ } ) ;
2550+
2551+ it ( "should throw error when grouping by hidden required filter column" , ( ) => {
2552+ const ctx = createHiddenFilterContext ( ) ;
2553+ expect ( ( ) => {
2554+ printQuery ( "SELECT count(*) FROM hidden_filter_runs GROUP BY engine" , ctx ) ;
2555+ } ) . toThrow ( / n o t a v a i l a b l e f o r q u e r y i n g / i) ;
2556+ } ) ;
2557+ } ) ;
2558+
2559+ describe ( "should block internal-only columns in HAVING" , ( ) => {
2560+ it ( "should throw error when using hidden column in HAVING" , ( ) => {
2561+ const ctx = createHiddenTenantContext ( ) ;
2562+ expect ( ( ) => {
2563+ printQuery (
2564+ "SELECT status, count(*) as cnt FROM hidden_tenant_runs GROUP BY status HAVING organization_id = 'x'" ,
2565+ ctx
2566+ ) ;
2567+ } ) . toThrow ( / n o t a v a i l a b l e f o r q u e r y i n g / i) ;
2568+ } ) ;
2569+ } ) ;
2570+
2571+ describe ( "should allow tenant guard in WHERE clause" , ( ) => {
2572+ it ( "should successfully execute query with tenant isolation (hidden tenant columns work internally)" , ( ) => {
2573+ const ctx = createHiddenTenantContext ( ) ;
2574+ // This should succeed - the tenant guard is injected internally and should work
2575+ const { sql } = printQuery ( "SELECT id, status FROM hidden_tenant_runs" , ctx ) ;
2576+
2577+ // Verify tenant guards are present in WHERE clause
2578+ expect ( sql ) . toContain ( "organization_id" ) ;
2579+ expect ( sql ) . toContain ( "project_id" ) ;
2580+ expect ( sql ) . toContain ( "environment_id" ) ;
2581+ // But NOT in SELECT
2582+ expect ( sql ) . toMatch ( / S E L E C T \s + i d , \s * s t a t u s \s + F R O M / i) ;
2583+ } ) ;
2584+
2585+ it ( "should successfully execute query with required filter (hidden filter columns work internally)" , ( ) => {
2586+ const ctx = createHiddenFilterContext ( ) ;
2587+ const { sql, params } = printQuery ( "SELECT id, status FROM hidden_filter_runs" , ctx ) ;
2588+
2589+ // Verify required filter is present in WHERE clause
2590+ expect ( sql ) . toContain ( "engine" ) ;
2591+ // The value V2 is parameterized, check that it's in the params
2592+ expect ( Object . values ( params ) ) . toContain ( "V2" ) ;
2593+ } ) ;
2594+ } ) ;
2595+
2596+ describe ( "should allow exposed tenant columns" , ( ) => {
2597+ // In task_runs schema, tenant columns ARE exposed in the public columns
2598+ it ( "should allow selecting exposed tenant column" , ( ) => {
2599+ // task_runs schema exposes organization_id in its columns
2600+ const { sql } = printQuery ( "SELECT id, organization_id FROM task_runs" ) ;
2601+ expect ( sql ) . toContain ( "SELECT id, organization_id" ) ;
2602+ } ) ;
2603+
2604+ it ( "should allow ordering by exposed tenant column" , ( ) => {
2605+ const { sql } = printQuery ( "SELECT id, status FROM task_runs ORDER BY organization_id" ) ;
2606+ expect ( sql ) . toContain ( "ORDER BY organization_id" ) ;
2607+ } ) ;
2608+
2609+ it ( "should allow grouping by exposed tenant column" , ( ) => {
2610+ const { sql } = printQuery ( "SELECT organization_id, count(*) FROM task_runs GROUP BY organization_id" ) ;
2611+ expect ( sql ) . toContain ( "GROUP BY organization_id" ) ;
2612+ } ) ;
2613+ } ) ;
2614+ } ) ;
0 commit comments