Skip to content

lineage_with_schema fails with "Unknown table" on correlated scalar subqueries #47

@karakanb

Description

@karakanb

lineage_with_schema fails when a query contains a correlated scalar subquery that references the outer table. The outer table is in the schema but qualify_columns cannot resolve it from inside the subquery scope.

sqlglot

# /// script
# requires-python = ">=3.11"
# dependencies = ["sqlglot"]
# ///

from sqlglot import lineage, parse_one
from sqlglot.optimizer import optimize
from sqlglot.optimizer.qualify import qualify
from sqlglot.optimizer.annotate_types import annotate_types

schema = {"t1": {"id": "int"}, "t2": {"id": "int", "val": "int"}}

for sql in [
    "SELECT id FROM t1",
    "SELECT id, (SELECT AVG(val) FROM t2 WHERE t2.id = t1.id) AS avg_val FROM t1",
]:
    parsed = parse_one(sql, dialect="bigquery")
    optimized = optimize(parsed, schema, dialect="bigquery", rules=(qualify, annotate_types))
    node = lineage.lineage("id", optimized, schema, dialect="bigquery")
    print(f"OK:   {sql}")

polyglot repro

use polyglot_sql::{self as pgsql, DialectType, MappingSchema, Schema};
use polyglot_sql::expressions::DataType;

fn main() {
    let dialect = DialectType::BigQuery;

    let mut schema = MappingSchema::with_dialect(dialect);
    let _ = schema.add_table("t1", &[("id".into(), DataType::BigInt { length: None })], Some(dialect));
    let _ = schema.add_table("t2", &[
        ("id".into(), DataType::BigInt { length: None }),
        ("val".into(), DataType::BigInt { length: None }),
    ], Some(dialect));

    let sql_ok = "SELECT id FROM t1";
    let expr = &pgsql::parse(sql_ok, dialect).unwrap()[0];
    match pgsql::lineage::lineage_with_schema("id", expr, Some(&schema as &dyn Schema), Some(dialect), false) {
        Ok(_) => println!("OK:   {}", sql_ok),
        Err(e) => println!("FAIL: {}\n      {}", sql_ok, e),
    }

    let sql_fail = "SELECT id, (SELECT AVG(val) FROM t2 WHERE t2.id = t1.id) AS avg_val FROM t1";
    let expr = &pgsql::parse(sql_fail, dialect).unwrap()[0];
    match pgsql::lineage::lineage_with_schema("id", expr, Some(&schema as &dyn Schema), Some(dialect), false) {
        Ok(_) => println!("OK:   {}", sql_fail),
        Err(e) => println!("FAIL: {}\n      {}", sql_fail, e),
    }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions