-
Notifications
You must be signed in to change notification settings - Fork 33
Open
Description
lineage_with_schema fails when a query contains a correlated scalar subquery that references the outer table. The outer table is in the schema but qualify_columns cannot resolve it from inside the subquery scope.
sqlglot
# /// script
# requires-python = ">=3.11"
# dependencies = ["sqlglot"]
# ///
from sqlglot import lineage, parse_one
from sqlglot.optimizer import optimize
from sqlglot.optimizer.qualify import qualify
from sqlglot.optimizer.annotate_types import annotate_types
schema = {"t1": {"id": "int"}, "t2": {"id": "int", "val": "int"}}
for sql in [
"SELECT id FROM t1",
"SELECT id, (SELECT AVG(val) FROM t2 WHERE t2.id = t1.id) AS avg_val FROM t1",
]:
parsed = parse_one(sql, dialect="bigquery")
optimized = optimize(parsed, schema, dialect="bigquery", rules=(qualify, annotate_types))
node = lineage.lineage("id", optimized, schema, dialect="bigquery")
print(f"OK: {sql}")polyglot repro
use polyglot_sql::{self as pgsql, DialectType, MappingSchema, Schema};
use polyglot_sql::expressions::DataType;
fn main() {
let dialect = DialectType::BigQuery;
let mut schema = MappingSchema::with_dialect(dialect);
let _ = schema.add_table("t1", &[("id".into(), DataType::BigInt { length: None })], Some(dialect));
let _ = schema.add_table("t2", &[
("id".into(), DataType::BigInt { length: None }),
("val".into(), DataType::BigInt { length: None }),
], Some(dialect));
let sql_ok = "SELECT id FROM t1";
let expr = &pgsql::parse(sql_ok, dialect).unwrap()[0];
match pgsql::lineage::lineage_with_schema("id", expr, Some(&schema as &dyn Schema), Some(dialect), false) {
Ok(_) => println!("OK: {}", sql_ok),
Err(e) => println!("FAIL: {}\n {}", sql_ok, e),
}
let sql_fail = "SELECT id, (SELECT AVG(val) FROM t2 WHERE t2.id = t1.id) AS avg_val FROM t1";
let expr = &pgsql::parse(sql_fail, dialect).unwrap()[0];
match pgsql::lineage::lineage_with_schema("id", expr, Some(&schema as &dyn Schema), Some(dialect), false) {
Ok(_) => println!("OK: {}", sql_fail),
Err(e) => println!("FAIL: {}\n {}", sql_fail, e),
}
}Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels