Skip to content

Commit 1bca378

Browse files
authored
Query fixes (#2876)
Don’t allow aliased columns to be queried – it was actually safe but confusing. We call `created_at` -> `triggered_at` but we still allowed created_at which was confusing. Now we have nice errors if you try select columns that aren’t selectable. Also removed a ClickHouse setting `allow_experimental_object_type` which worked fine locally but stopped all queries working on ClickHouse Cloud 🤦‍♂️
1 parent b042b0b commit 1bca378

File tree

7 files changed

+636
-25
lines changed

7 files changed

+636
-25
lines changed

apps/webapp/app/services/queryService.server.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ function getDefaultClickhouseSettings(): ClickHouseSettings {
4949
),
5050

5151
// Safety settings
52-
allow_experimental_object_type: 1,
5352
format_csv_allow_double_quotes: 0,
5453
readonly: "1", // Ensure queries are read-only
5554
};

internal-packages/tsql/src/query/printer.test.ts

Lines changed: 338 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,15 +429,15 @@ describe("ClickHousePrinter", () => {
429429
});
430430

431431
it("should handle IS NULL syntax", () => {
432-
const { sql } = printQuery("SELECT * FROM task_runs WHERE error IS NULL");
432+
const { sql } = printQuery("SELECT * FROM task_runs WHERE started_at IS NULL");
433433

434-
expect(sql).toContain("isNull(error)");
434+
expect(sql).toContain("isNull(started_at)");
435435
});
436436

437437
it("should handle IS NOT NULL syntax", () => {
438-
const { sql } = printQuery("SELECT * FROM task_runs WHERE error IS NOT NULL");
438+
const { sql } = printQuery("SELECT * FROM task_runs WHERE started_at IS NOT NULL");
439439

440-
expect(sql).toContain("isNotNull(error)");
440+
expect(sql).toContain("isNotNull(started_at)");
441441
});
442442
});
443443

@@ -2172,6 +2172,152 @@ describe("Column metadata", () => {
21722172
});
21732173
});
21742174

2175+
describe("Unknown column blocking", () => {
2176+
/**
2177+
* These tests verify that unknown columns are blocked at compile time,
2178+
* preventing access to internal ClickHouse columns that aren't exposed in the schema.
2179+
*/
2180+
2181+
describe("should block unknown columns in SELECT", () => {
2182+
it("should throw error for unknown column in SELECT list", () => {
2183+
expect(() => {
2184+
printQuery("SELECT id, unknown_column FROM task_runs");
2185+
}).toThrow(QueryError);
2186+
expect(() => {
2187+
printQuery("SELECT id, unknown_column FROM task_runs");
2188+
}).toThrow(/unknown.*column.*unknown_column/i);
2189+
});
2190+
2191+
it("should throw error for internal ClickHouse column name not in schema", () => {
2192+
// The schema exposes 'created' but the internal ClickHouse column is 'created_at'
2193+
// Using the internal name directly should be blocked
2194+
const schema = createSchemaRegistry([runsSchema]);
2195+
const ctx = createPrinterContext({
2196+
organizationId: "org_test",
2197+
projectId: "proj_test",
2198+
environmentId: "env_test",
2199+
schema,
2200+
});
2201+
2202+
// 'created_at' is not in runsSchema - only 'created' which maps to 'created_at'
2203+
expect(() => {
2204+
printQuery("SELECT id, created_at FROM runs", ctx);
2205+
}).toThrow(QueryError);
2206+
});
2207+
2208+
it("should suggest TSQL column name when user types ClickHouse column name", () => {
2209+
// The schema exposes 'created' but the internal ClickHouse column is 'created_at'
2210+
// When user types 'created_at', we should suggest 'created'
2211+
const schema = createSchemaRegistry([runsSchema]);
2212+
const ctx = createPrinterContext({
2213+
organizationId: "org_test",
2214+
projectId: "proj_test",
2215+
environmentId: "env_test",
2216+
schema,
2217+
});
2218+
2219+
expect(() => {
2220+
printQuery("SELECT id, created_at FROM runs", ctx);
2221+
}).toThrow(/Did you mean "created"/);
2222+
});
2223+
2224+
it("should throw error for unknown qualified column (table.column)", () => {
2225+
expect(() => {
2226+
printQuery("SELECT task_runs.unknown_column FROM task_runs");
2227+
}).toThrow(QueryError);
2228+
});
2229+
});
2230+
2231+
describe("should block unknown columns in WHERE", () => {
2232+
it("should throw error for unknown column in WHERE clause", () => {
2233+
expect(() => {
2234+
printQuery("SELECT id FROM task_runs WHERE unknown_column = 'test'");
2235+
}).toThrow(QueryError);
2236+
});
2237+
2238+
it("should throw error for unknown column in complex WHERE", () => {
2239+
expect(() => {
2240+
printQuery("SELECT id FROM task_runs WHERE status = 'completed' AND unknown_column > 10");
2241+
}).toThrow(QueryError);
2242+
});
2243+
});
2244+
2245+
describe("should block unknown columns in ORDER BY", () => {
2246+
it("should throw error for unknown column in ORDER BY", () => {
2247+
expect(() => {
2248+
printQuery("SELECT id, status FROM task_runs ORDER BY unknown_column DESC");
2249+
}).toThrow(QueryError);
2250+
});
2251+
});
2252+
2253+
describe("should block unknown columns in GROUP BY", () => {
2254+
it("should throw error for unknown column in GROUP BY", () => {
2255+
expect(() => {
2256+
printQuery("SELECT count(*) FROM task_runs GROUP BY unknown_column");
2257+
}).toThrow(QueryError);
2258+
});
2259+
});
2260+
2261+
describe("should allow SELECT aliases", () => {
2262+
it("should allow ORDER BY to reference aliased columns", () => {
2263+
// This should NOT throw - 'cnt' is a valid alias from SELECT
2264+
const { sql } = printQuery(
2265+
"SELECT status, count(*) AS cnt FROM task_runs GROUP BY status ORDER BY cnt DESC"
2266+
);
2267+
expect(sql).toContain("ORDER BY cnt DESC");
2268+
});
2269+
2270+
it("should allow HAVING to reference aliased columns", () => {
2271+
const { sql } = printQuery(
2272+
"SELECT status, count(*) AS cnt FROM task_runs GROUP BY status HAVING cnt > 10"
2273+
);
2274+
expect(sql).toContain("HAVING");
2275+
});
2276+
2277+
it("should allow ORDER BY to reference implicit aggregation names", () => {
2278+
// COUNT() without alias gets implicit name 'count'
2279+
const { sql } = printQuery(
2280+
"SELECT status, count() FROM task_runs GROUP BY status ORDER BY count DESC"
2281+
);
2282+
expect(sql).toContain("ORDER BY count DESC");
2283+
});
2284+
});
2285+
2286+
// Note: CTE support is limited - CTEs are not added to the table context,
2287+
// so CTE column references are treated as unknown. This is a pre-existing limitation.
2288+
// The tests below verify that unknown column blocking doesn't break existing behavior.
2289+
2290+
describe("should allow subquery alias references", () => {
2291+
it("should allow referencing columns from subquery in FROM clause", () => {
2292+
const { sql } = printQuery(`
2293+
SELECT sub.status_name, sub.total
2294+
FROM (
2295+
SELECT status AS status_name, count(*) AS total
2296+
FROM task_runs
2297+
GROUP BY status
2298+
) AS sub
2299+
ORDER BY sub.total DESC
2300+
`);
2301+
expect(sql).toContain("status_name");
2302+
expect(sql).toContain("total");
2303+
});
2304+
2305+
it("should allow unqualified references to subquery columns", () => {
2306+
const { sql } = printQuery(`
2307+
SELECT status_name, total
2308+
FROM (
2309+
SELECT status AS status_name, count(*) AS total
2310+
FROM task_runs
2311+
GROUP BY status
2312+
)
2313+
WHERE total > 10
2314+
`);
2315+
expect(sql).toContain("status_name");
2316+
expect(sql).toContain("total");
2317+
});
2318+
});
2319+
});
2320+
21752321
describe("Field Mapping Value Transformation", () => {
21762322
// Test schema with a field mapping column
21772323
const fieldMappingSchema: TableSchema = {
@@ -2278,3 +2424,191 @@ describe("Field Mapping Value Transformation", () => {
22782424
// But the column metadata should use the exposed name
22792425
});
22802426
});
2427+
2428+
describe("Internal-only column blocking", () => {
2429+
/**
2430+
* These tests verify that internal columns (tenant columns, required filter columns)
2431+
* that are NOT exposed in tableSchema.columns are blocked from user queries in
2432+
* SELECT, ORDER BY, GROUP BY, and HAVING clauses, but allowed in system-generated
2433+
* tenant isolation guards.
2434+
*/
2435+
2436+
// Schema with hidden tenant columns (not exposed in public columns)
2437+
const hiddenTenantSchema: TableSchema = {
2438+
name: "hidden_tenant_runs",
2439+
clickhouseName: "trigger_dev.task_runs_v2",
2440+
columns: {
2441+
// Public columns - these are exposed to users
2442+
id: { name: "id", ...column("String") },
2443+
status: { name: "status", ...column("String") },
2444+
task_identifier: { name: "task_identifier", ...column("String") },
2445+
created_at: { name: "created_at", ...column("DateTime64") },
2446+
},
2447+
// Tenant columns are NOT in the public columns above
2448+
tenantColumns: {
2449+
organizationId: "organization_id",
2450+
projectId: "project_id",
2451+
environmentId: "environment_id",
2452+
},
2453+
};
2454+
2455+
// Schema with hidden required filter column
2456+
const hiddenFilterSchema: TableSchema = {
2457+
name: "hidden_filter_runs",
2458+
clickhouseName: "trigger_dev.task_runs_v2",
2459+
columns: {
2460+
id: { name: "id", ...column("String") },
2461+
status: { name: "status", ...column("String") },
2462+
created_at: { name: "created_at", ...column("DateTime64") },
2463+
},
2464+
tenantColumns: {
2465+
organizationId: "organization_id",
2466+
projectId: "project_id",
2467+
environmentId: "environment_id",
2468+
},
2469+
requiredFilters: [
2470+
{ column: "engine", value: "V2" }, // 'engine' is not in public columns
2471+
],
2472+
};
2473+
2474+
function createHiddenTenantContext(): PrinterContext {
2475+
const schema = createSchemaRegistry([hiddenTenantSchema]);
2476+
return createPrinterContext({
2477+
organizationId: "org_test",
2478+
projectId: "proj_test",
2479+
environmentId: "env_test",
2480+
schema,
2481+
});
2482+
}
2483+
2484+
function createHiddenFilterContext(): PrinterContext {
2485+
const schema = createSchemaRegistry([hiddenFilterSchema]);
2486+
return createPrinterContext({
2487+
organizationId: "org_test",
2488+
projectId: "proj_test",
2489+
environmentId: "env_test",
2490+
schema,
2491+
});
2492+
}
2493+
2494+
describe("should block internal-only columns in SELECT", () => {
2495+
it("should throw error when selecting hidden tenant column (organization_id)", () => {
2496+
const ctx = createHiddenTenantContext();
2497+
expect(() => {
2498+
printQuery("SELECT id, organization_id FROM hidden_tenant_runs", ctx);
2499+
}).toThrow(QueryError);
2500+
expect(() => {
2501+
printQuery("SELECT id, organization_id FROM hidden_tenant_runs", ctx);
2502+
}).toThrow(/not available for querying/i);
2503+
});
2504+
2505+
it("should throw error when selecting hidden tenant column (project_id)", () => {
2506+
const ctx = createHiddenTenantContext();
2507+
expect(() => {
2508+
printQuery("SELECT project_id FROM hidden_tenant_runs", ctx);
2509+
}).toThrow(/not available for querying/i);
2510+
});
2511+
2512+
it("should throw error when selecting hidden tenant column (environment_id)", () => {
2513+
const ctx = createHiddenTenantContext();
2514+
expect(() => {
2515+
printQuery("SELECT environment_id FROM hidden_tenant_runs", ctx);
2516+
}).toThrow(/not available for querying/i);
2517+
});
2518+
2519+
it("should throw error when selecting hidden required filter column", () => {
2520+
const ctx = createHiddenFilterContext();
2521+
expect(() => {
2522+
printQuery("SELECT id, engine FROM hidden_filter_runs", ctx);
2523+
}).toThrow(/not available for querying/i);
2524+
});
2525+
});
2526+
2527+
describe("should block internal-only columns in ORDER BY", () => {
2528+
it("should throw error when ordering by hidden tenant column", () => {
2529+
const ctx = createHiddenTenantContext();
2530+
expect(() => {
2531+
printQuery("SELECT id, status FROM hidden_tenant_runs ORDER BY organization_id", ctx);
2532+
}).toThrow(/not available for querying/i);
2533+
});
2534+
2535+
it("should throw error when ordering by hidden required filter column", () => {
2536+
const ctx = createHiddenFilterContext();
2537+
expect(() => {
2538+
printQuery("SELECT id, status FROM hidden_filter_runs ORDER BY engine", ctx);
2539+
}).toThrow(/not available for querying/i);
2540+
});
2541+
});
2542+
2543+
describe("should block internal-only columns in GROUP BY", () => {
2544+
it("should throw error when grouping by hidden tenant column", () => {
2545+
const ctx = createHiddenTenantContext();
2546+
expect(() => {
2547+
printQuery("SELECT count(*) FROM hidden_tenant_runs GROUP BY organization_id", ctx);
2548+
}).toThrow(/not available for querying/i);
2549+
});
2550+
2551+
it("should throw error when grouping by hidden required filter column", () => {
2552+
const ctx = createHiddenFilterContext();
2553+
expect(() => {
2554+
printQuery("SELECT count(*) FROM hidden_filter_runs GROUP BY engine", ctx);
2555+
}).toThrow(/not available for querying/i);
2556+
});
2557+
});
2558+
2559+
describe("should block internal-only columns in HAVING", () => {
2560+
it("should throw error when using hidden column in HAVING", () => {
2561+
const ctx = createHiddenTenantContext();
2562+
expect(() => {
2563+
printQuery(
2564+
"SELECT status, count(*) as cnt FROM hidden_tenant_runs GROUP BY status HAVING organization_id = 'x'",
2565+
ctx
2566+
);
2567+
}).toThrow(/not available for querying/i);
2568+
});
2569+
});
2570+
2571+
describe("should allow tenant guard in WHERE clause", () => {
2572+
it("should successfully execute query with tenant isolation (hidden tenant columns work internally)", () => {
2573+
const ctx = createHiddenTenantContext();
2574+
// This should succeed - the tenant guard is injected internally and should work
2575+
const { sql } = printQuery("SELECT id, status FROM hidden_tenant_runs", ctx);
2576+
2577+
// Verify tenant guards are present in WHERE clause
2578+
expect(sql).toContain("organization_id");
2579+
expect(sql).toContain("project_id");
2580+
expect(sql).toContain("environment_id");
2581+
// But NOT in SELECT
2582+
expect(sql).toMatch(/SELECT\s+id,\s*status\s+FROM/i);
2583+
});
2584+
2585+
it("should successfully execute query with required filter (hidden filter columns work internally)", () => {
2586+
const ctx = createHiddenFilterContext();
2587+
const { sql, params } = printQuery("SELECT id, status FROM hidden_filter_runs", ctx);
2588+
2589+
// Verify required filter is present in WHERE clause
2590+
expect(sql).toContain("engine");
2591+
// The value V2 is parameterized, check that it's in the params
2592+
expect(Object.values(params)).toContain("V2");
2593+
});
2594+
});
2595+
2596+
describe("should allow exposed tenant columns", () => {
2597+
// In task_runs schema, tenant columns ARE exposed in the public columns
2598+
it("should allow selecting exposed tenant column", () => {
2599+
// task_runs schema exposes organization_id in its columns
2600+
const { sql } = printQuery("SELECT id, organization_id FROM task_runs");
2601+
expect(sql).toContain("SELECT id, organization_id");
2602+
});
2603+
2604+
it("should allow ordering by exposed tenant column", () => {
2605+
const { sql } = printQuery("SELECT id, status FROM task_runs ORDER BY organization_id");
2606+
expect(sql).toContain("ORDER BY organization_id");
2607+
});
2608+
2609+
it("should allow grouping by exposed tenant column", () => {
2610+
const { sql } = printQuery("SELECT organization_id, count(*) FROM task_runs GROUP BY organization_id");
2611+
expect(sql).toContain("GROUP BY organization_id");
2612+
});
2613+
});
2614+
});

0 commit comments

Comments
 (0)