Skip to content

Commit 840f504

Browse files
committed
Improve access errors to columns not in the schema
1 parent c8686b5 commit 840f504

File tree

4 files changed

+277
-17
lines changed

4 files changed

+277
-17
lines changed

internal-packages/tsql/src/query/printer.test.ts

Lines changed: 136 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,15 +429,15 @@ describe("ClickHousePrinter", () => {
429429
});
430430

431431
it("should handle IS NULL syntax", () => {
432-
const { sql } = printQuery("SELECT * FROM task_runs WHERE error IS NULL");
432+
const { sql } = printQuery("SELECT * FROM task_runs WHERE started_at IS NULL");
433433

434-
expect(sql).toContain("isNull(error)");
434+
expect(sql).toContain("isNull(started_at)");
435435
});
436436

437437
it("should handle IS NOT NULL syntax", () => {
438-
const { sql } = printQuery("SELECT * FROM task_runs WHERE error IS NOT NULL");
438+
const { sql } = printQuery("SELECT * FROM task_runs WHERE started_at IS NOT NULL");
439439

440-
expect(sql).toContain("isNotNull(error)");
440+
expect(sql).toContain("isNotNull(started_at)");
441441
});
442442
});
443443

@@ -2172,6 +2172,138 @@ describe("Column metadata", () => {
21722172
});
21732173
});
21742174

2175+
describe("Unknown column blocking", () => {
2176+
/**
2177+
* These tests verify that unknown columns are blocked at compile time,
2178+
* preventing access to internal ClickHouse columns that aren't exposed in the schema.
2179+
*/
2180+
2181+
describe("should block unknown columns in SELECT", () => {
2182+
it("should throw error for unknown column in SELECT list", () => {
2183+
expect(() => {
2184+
printQuery("SELECT id, unknown_column FROM task_runs");
2185+
}).toThrow(QueryError);
2186+
expect(() => {
2187+
printQuery("SELECT id, unknown_column FROM task_runs");
2188+
}).toThrow(/unknown.*column.*unknown_column/i);
2189+
});
2190+
2191+
it("should throw error for internal ClickHouse column name not in schema", () => {
2192+
// The schema exposes 'created' but the internal ClickHouse column is 'created_at'
2193+
// Using the internal name directly should be blocked
2194+
const schema = createSchemaRegistry([runsSchema]);
2195+
const ctx = createPrinterContext({
2196+
organizationId: "org_test",
2197+
projectId: "proj_test",
2198+
environmentId: "env_test",
2199+
schema,
2200+
});
2201+
2202+
// 'created_at' is not in runsSchema - only 'created' which maps to 'created_at'
2203+
expect(() => {
2204+
printQuery("SELECT id, created_at FROM runs", ctx);
2205+
}).toThrow(QueryError);
2206+
});
2207+
2208+
it("should throw error for unknown qualified column (table.column)", () => {
2209+
expect(() => {
2210+
printQuery("SELECT task_runs.unknown_column FROM task_runs");
2211+
}).toThrow(QueryError);
2212+
});
2213+
});
2214+
2215+
describe("should block unknown columns in WHERE", () => {
2216+
it("should throw error for unknown column in WHERE clause", () => {
2217+
expect(() => {
2218+
printQuery("SELECT id FROM task_runs WHERE unknown_column = 'test'");
2219+
}).toThrow(QueryError);
2220+
});
2221+
2222+
it("should throw error for unknown column in complex WHERE", () => {
2223+
expect(() => {
2224+
printQuery(
2225+
"SELECT id FROM task_runs WHERE status = 'completed' AND unknown_column > 10"
2226+
);
2227+
}).toThrow(QueryError);
2228+
});
2229+
});
2230+
2231+
describe("should block unknown columns in ORDER BY", () => {
2232+
it("should throw error for unknown column in ORDER BY", () => {
2233+
expect(() => {
2234+
printQuery("SELECT id, status FROM task_runs ORDER BY unknown_column DESC");
2235+
}).toThrow(QueryError);
2236+
});
2237+
});
2238+
2239+
describe("should block unknown columns in GROUP BY", () => {
2240+
it("should throw error for unknown column in GROUP BY", () => {
2241+
expect(() => {
2242+
printQuery("SELECT count(*) FROM task_runs GROUP BY unknown_column");
2243+
}).toThrow(QueryError);
2244+
});
2245+
});
2246+
2247+
describe("should allow SELECT aliases", () => {
2248+
it("should allow ORDER BY to reference aliased columns", () => {
2249+
// This should NOT throw - 'cnt' is a valid alias from SELECT
2250+
const { sql } = printQuery(
2251+
"SELECT status, count(*) AS cnt FROM task_runs GROUP BY status ORDER BY cnt DESC"
2252+
);
2253+
expect(sql).toContain("ORDER BY cnt DESC");
2254+
});
2255+
2256+
it("should allow HAVING to reference aliased columns", () => {
2257+
const { sql } = printQuery(
2258+
"SELECT status, count(*) AS cnt FROM task_runs GROUP BY status HAVING cnt > 10"
2259+
);
2260+
expect(sql).toContain("HAVING");
2261+
});
2262+
2263+
it("should allow ORDER BY to reference implicit aggregation names", () => {
2264+
// COUNT() without alias gets implicit name 'count'
2265+
const { sql } = printQuery(
2266+
"SELECT status, count() FROM task_runs GROUP BY status ORDER BY count DESC"
2267+
);
2268+
expect(sql).toContain("ORDER BY count DESC");
2269+
});
2270+
});
2271+
2272+
// Note: CTE support is limited - CTEs are not added to the table context,
2273+
// so CTE column references are treated as unknown. This is a pre-existing limitation.
2274+
// The tests below verify that unknown column blocking doesn't break existing behavior.
2275+
2276+
describe("should allow subquery alias references", () => {
2277+
it("should allow referencing columns from subquery in FROM clause", () => {
2278+
const { sql } = printQuery(`
2279+
SELECT sub.status_name, sub.total
2280+
FROM (
2281+
SELECT status AS status_name, count(*) AS total
2282+
FROM task_runs
2283+
GROUP BY status
2284+
) AS sub
2285+
ORDER BY sub.total DESC
2286+
`);
2287+
expect(sql).toContain("status_name");
2288+
expect(sql).toContain("total");
2289+
});
2290+
2291+
it("should allow unqualified references to subquery columns", () => {
2292+
const { sql } = printQuery(`
2293+
SELECT status_name, total
2294+
FROM (
2295+
SELECT status AS status_name, count(*) AS total
2296+
FROM task_runs
2297+
GROUP BY status
2298+
)
2299+
WHERE total > 10
2300+
`);
2301+
expect(sql).toContain("status_name");
2302+
expect(sql).toContain("total");
2303+
});
2304+
});
2305+
});
2306+
21752307
describe("Field Mapping Value Transformation", () => {
21762308
// Test schema with a field mapping column
21772309
const fieldMappingSchema: TableSchema = {

internal-packages/tsql/src/query/printer.ts

Lines changed: 133 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,16 @@ export class ClickHousePrinter {
116116
private inGroupByContext = false;
117117
/** Columns hidden when SELECT * is expanded to core columns only */
118118
private hiddenColumns: string[] = [];
119+
/**
120+
* Set of column aliases defined in the current SELECT clause.
121+
* Used to allow ORDER BY/HAVING to reference aliased columns.
122+
*/
123+
private selectAliases: Set<string> = new Set();
124+
/**
125+
* Set of internal ClickHouse column names that are allowed (e.g., tenant columns).
126+
* These are populated from tableSchema.tenantColumns when processing joins.
127+
*/
128+
private allowedInternalColumns: Set<string> = new Set();
119129

120130
constructor(
121131
private context: PrinterContext,
@@ -310,9 +320,18 @@ export class ClickHousePrinter {
310320
const isTopLevelQuery =
311321
this.stack.length <= 1 || (this.stack.length === 2 && partOfSelectUnion);
312322

313-
// Clear table contexts for top-level queries (subqueries inherit parent context)
323+
// Save and clear table contexts
324+
// Top-level queries clear contexts; subqueries save parent context and create fresh context
325+
const savedTableContexts = new Map(this.tableContexts);
326+
const savedInternalColumns = new Set(this.allowedInternalColumns);
314327
if (isTopLevelQuery) {
315328
this.tableContexts.clear();
329+
this.allowedInternalColumns.clear();
330+
} else {
331+
// Subqueries get fresh contexts - they don't inherit parent tables
332+
// (the parent will restore its context after the subquery is processed)
333+
this.tableContexts = new Map();
334+
this.allowedInternalColumns = new Set();
316335
}
317336

318337
// Build WHERE clause starting with any existing where
@@ -349,6 +368,16 @@ export class ClickHousePrinter {
349368
nextJoin = nextJoin.next_join;
350369
}
351370

371+
// Extract SELECT column aliases BEFORE visiting columns
372+
// This allows ORDER BY/HAVING to reference aliased columns
373+
const savedAliases = this.selectAliases;
374+
this.selectAliases = new Set();
375+
if (node.select) {
376+
for (const col of node.select) {
377+
this.extractSelectAlias(col);
378+
}
379+
}
380+
352381
// Process SELECT columns and collect metadata
353382
// Using flatMap because asterisk expansion can return multiple columns
354383
let columns: string[];
@@ -478,9 +507,56 @@ export class ClickHousePrinter {
478507
response = this.pretty ? `(${response.trim()})` : `(${response})`;
479508
}
480509

510+
// Restore saved contexts (for nested queries)
511+
this.selectAliases = savedAliases;
512+
this.tableContexts = savedTableContexts;
513+
this.allowedInternalColumns = savedInternalColumns;
514+
481515
return response;
482516
}
483517

518+
/**
519+
* Extract column aliases from a SELECT expression.
520+
* Handles explicit aliases (AS name) and implicit names from aggregations/functions.
521+
*
522+
* NOTE: We intentionally do NOT add field names as aliases here.
523+
* Field names (e.g., SELECT status) are columns from the table, not aliases.
524+
* Only explicit aliases (SELECT x AS name) and implicit function names
525+
* (SELECT COUNT() → 'count') should be added.
526+
*/
527+
private extractSelectAlias(expr: Expression): void {
528+
// Handle explicit Alias: SELECT ... AS name
529+
if ((expr as Alias).expression_type === "alias") {
530+
this.selectAliases.add((expr as Alias).alias);
531+
return;
532+
}
533+
534+
// Handle implicit names from function calls (e.g., COUNT() → 'count')
535+
if ((expr as Call).expression_type === "call") {
536+
const call = expr as Call;
537+
// Aggregations and functions get implicit lowercase names
538+
this.selectAliases.add(call.name.toLowerCase());
539+
return;
540+
}
541+
542+
// Handle implicit names from arithmetic operations (e.g., a + b → 'plus')
543+
if ((expr as ArithmeticOperation).expression_type === "arithmetic_operation") {
544+
const op = expr as ArithmeticOperation;
545+
const opNames: Record<ArithmeticOperationOp, string> = {
546+
[ArithmeticOperationOp.Add]: "plus",
547+
[ArithmeticOperationOp.Sub]: "minus",
548+
[ArithmeticOperationOp.Mult]: "multiply",
549+
[ArithmeticOperationOp.Div]: "divide",
550+
[ArithmeticOperationOp.Mod]: "modulo",
551+
};
552+
this.selectAliases.add(opNames[op.op]);
553+
return;
554+
}
555+
556+
// Field references (e.g., SELECT status) are NOT aliases - they're columns
557+
// that will be validated against the table schema. Don't add them here.
558+
}
559+
484560
/**
485561
* Visit a SELECT column expression with metadata collection
486562
*
@@ -1338,6 +1414,15 @@ export class ClickHousePrinter {
13381414
// Register this table context for column name resolution
13391415
this.tableContexts.set(effectiveAlias, tableSchema);
13401416

1417+
// Register tenant columns as allowed internal columns
1418+
// These are ClickHouse column names used for tenant isolation
1419+
if (tableSchema.tenantColumns) {
1420+
const { organizationId, projectId, environmentId } = tableSchema.tenantColumns;
1421+
if (organizationId) this.allowedInternalColumns.add(organizationId);
1422+
if (projectId) this.allowedInternalColumns.add(projectId);
1423+
if (environmentId) this.allowedInternalColumns.add(environmentId);
1424+
}
1425+
13411426
// Add tenant isolation guard
13421427
extraWhere = this.createTenantGuard(tableSchema, effectiveAlias);
13431428
} else if (
@@ -2081,6 +2166,8 @@ export class ClickHousePrinter {
20812166
/**
20822167
* Resolve field chain to use ClickHouse column names where applicable
20832168
* Handles both qualified (table.column) and unqualified (column) references
2169+
*
2170+
* @throws QueryError if the column is not found in any table schema and is not a SELECT alias
20842171
*/
20852172
private resolveFieldChain(chain: Array<string | number>): Array<string | number> {
20862173
if (chain.length === 0) {
@@ -2101,11 +2188,11 @@ export class ClickHousePrinter {
21012188
// This is a table.column reference
21022189
const columnName = chain[1];
21032190
if (typeof columnName === "string") {
2104-
const resolvedColumn = this.resolveColumnName(tableSchema, columnName);
2191+
const resolvedColumn = this.resolveColumnName(tableSchema, columnName, tableAlias);
21052192
return [tableAlias, resolvedColumn, ...chain.slice(2)];
21062193
}
21072194
}
2108-
// Not a known table alias, might be a nested field - return as-is
2195+
// Not a known table alias, might be a nested field or JSON path - return as-is
21092196
return chain;
21102197
}
21112198

@@ -2119,20 +2206,59 @@ export class ClickHousePrinter {
21192206
}
21202207
}
21212208

2122-
// Column not found in any table context - return as-is (might be a function, subquery alias, etc.)
2209+
// Check if it's a SELECT alias (e.g., from COUNT() or explicit AS)
2210+
if (this.selectAliases.has(columnName)) {
2211+
return chain; // Valid alias reference
2212+
}
2213+
2214+
// Check if this is an allowed internal column (e.g., tenant columns)
2215+
// These are ClickHouse column names that are allowed for internal use only
2216+
if (this.allowedInternalColumns.has(columnName)) {
2217+
return chain;
2218+
}
2219+
2220+
// Column not found in any table context and not a SELECT alias
2221+
// This is a security issue - block access to unknown columns
2222+
if (this.tableContexts.size > 0) {
2223+
// Only throw if we have tables in context (otherwise might be subquery)
2224+
const availableColumns = this.getAvailableColumnNames();
2225+
throw new QueryError(
2226+
`Unknown column "${columnName}". Available columns: ${availableColumns.join(", ")}`
2227+
);
2228+
}
2229+
2230+
// No tables in context (might be FROM subquery without alias) - return as-is
21232231
return chain;
21242232
}
21252233

2234+
/**
2235+
* Get list of available column names from all tables in context
2236+
*/
2237+
private getAvailableColumnNames(): string[] {
2238+
const columns: string[] = [];
2239+
for (const tableSchema of this.tableContexts.values()) {
2240+
columns.push(...Object.keys(tableSchema.columns));
2241+
}
2242+
return [...new Set(columns)].sort();
2243+
}
2244+
21262245
/**
21272246
* Resolve a column name to its ClickHouse name using the table schema
2247+
*
2248+
* @throws QueryError if the column is not found in the table schema
21282249
*/
2129-
private resolveColumnName(tableSchema: TableSchema, columnName: string): string {
2250+
private resolveColumnName(tableSchema: TableSchema, columnName: string, tableAlias?: string): string {
21302251
const columnSchema = tableSchema.columns[columnName];
21312252
if (columnSchema) {
21322253
return columnSchema.clickhouseName || columnSchema.name;
21332254
}
2134-
// Column not in schema - return as-is (might be a computed column, etc.)
2135-
return columnName;
2255+
2256+
// Column not in schema - this is a security issue, block access
2257+
const availableColumns = Object.keys(tableSchema.columns).sort().join(", ");
2258+
const tableName = tableAlias || tableSchema.name;
2259+
throw new QueryError(
2260+
`Unknown column "${columnName}" on table "${tableName}". Available columns: ${availableColumns}`
2261+
);
21362262
}
21372263

21382264
private visitPlaceholder(node: Placeholder): string {

0 commit comments

Comments
 (0)