Skip to content

lineage_with_schema fails with "Unknown column" when schema uses dotted table names #48

@karakanb

Description

@karakanb

lineage_with_schema fails when the schema key contains dots (e.g. "raw.t1"). add_table("raw.t1", ...) doesn't match the qualified column lookup for SELECT a FROM raw.t1.

sqlglot

# /// script
# requires-python = ">=3.11"
# dependencies = ["sqlglot"]
# ///

from sqlglot import lineage, parse_one
from sqlglot.optimizer import optimize
from sqlglot.optimizer.qualify import qualify
from sqlglot.optimizer.annotate_types import annotate_types

for sql, schema in [
    ("SELECT a FROM t1", {"t1": {"a": "int"}}),
    ("SELECT a FROM raw.t1", {"raw": {"t1": {"a": "int"}}}),
]:
    parsed = parse_one(sql, dialect="bigquery")
    optimized = optimize(parsed, schema, dialect="bigquery", rules=(qualify, annotate_types))
    node = lineage.lineage("a", optimized, schema, dialect="bigquery")
    print(f"OK:   {sql}")

polyglot repro

use polyglot_sql::{self as pgsql, DialectType, MappingSchema, Schema};
use polyglot_sql::expressions::DataType;

fn main() {
    let dialect = DialectType::BigQuery;

    let mut schema_flat = MappingSchema::with_dialect(dialect);
    let _ = schema_flat.add_table("t1", &[("a".into(), DataType::BigInt { length: None })], Some(dialect));

    let sql_flat = "SELECT a FROM t1";
    let expr = &pgsql::parse(sql_flat, dialect).unwrap()[0];
    match pgsql::lineage::lineage_with_schema("a", expr, Some(&schema_flat as &dyn Schema), Some(dialect), false) {
        Ok(_) => println!("OK:   {}", sql_flat),
        Err(e) => println!("FAIL: {}\n      {}", sql_flat, e),
    }

    let mut schema_dotted = MappingSchema::with_dialect(dialect);
    let _ = schema_dotted.add_table("raw.t1", &[("a".into(), DataType::BigInt { length: None })], Some(dialect));

    let sql_dotted = "SELECT a FROM raw.t1";
    let expr = &pgsql::parse(sql_dotted, dialect).unwrap()[0];
    match pgsql::lineage::lineage_with_schema("a", expr, Some(&schema_dotted as &dyn Schema), Some(dialect), false) {
        Ok(_) => println!("OK:   {}", sql_dotted),
        Err(e) => println!("FAIL: {}\n      {}", sql_dotted, e),
    }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions