From e7c9217d75a56e32440a6d3e05db4bc5c0c3e775 Mon Sep 17 00:00:00 2001 From: h3n4l Date: Wed, 10 Jun 2026 11:10:16 +0800 Subject: [PATCH] fix(trino/analysis): expand SELECT * over derived relations to the exact projection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A star over a derived table or CTE was recorded as a single opaque "*" result; the consumer (Bytebase) expanded it against the BASE tables' catalog metadata, producing the wrong column count and order for the executed query — its positional masker then slid, returning a sensitive column under a non-sensitive column's masker (BYT-9678). Expand a star inline when its projection is provably width- and order-correct: every covered relation is a derived relation (subquery in FROM, CTE reference, aliased UNNEST, aliased parenthesized join) whose projection is fully resolved, and no USING/NATURAL join coalesces columns in the scope. The expanded columns carry their select-item ordinal, and the top-level pass splices them into the walk's Results in place of the "*" entry; ordinary items keep the additive union, and an unexpandable star stays byte-for-byte opaque (including a qualified star's single relation-ref shape consumers key on), so the consumer's metadata-based expansion applies exactly as before. Also resolved by the same mechanism: - TABLE expands to the CTE's projection; - a top-level VALUES, which previously produced no Results at all (zero positional maskers), synthesizes its exact projection; - a set operation whose star arm is resolved merges the other arms' lineage at the true expanded width; - a qualified outer reference through a derived relation whose body is a resolved star (d.phone over (SELECT * FROM (SELECT phone …)) d) resolves. Semantics verified against live Trino 481: ROW(a, b) in VALUES unpacks to two columns exactly like (a, b); with a(k, p), `SELECT a.* FROM a JOIN b USING (k)` returns only [p] — USING strips the join columns from a QUALIFIED star too, so coalescing conservatively blocks all star expansion in the scope; an alias on a parenthesized join hides the inner relation aliases. Cross-reviewed (Codex): 6 findings — 3 fixed here (aliased-join binding, TABLE-over-CTE, top-level VALUES), 3 refuted/accepted-as-safe with oracle evidence recorded in the regression tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- trino/analysis/query_span.go | 7 + trino/analysis/query_span_resolve.go | 292 ++++++++++++++++++--- trino/analysis/query_span_star_test.go | 337 +++++++++++++++++++++++++ 3 files changed, 607 insertions(+), 29 deletions(-) create mode 100644 trino/analysis/query_span_star_test.go diff --git a/trino/analysis/query_span.go b/trino/analysis/query_span.go index b8cce54e..b0d050a8 100644 --- a/trino/analysis/query_span.go +++ b/trino/analysis/query_span.go @@ -38,6 +38,13 @@ type QuerySpan struct { // Results is the list of output columns produced by the outermost query. For // a set operation (UNION/INTERSECT/EXCEPT) the left arm wins, matching SQL's // "column names come from the first select" rule. + // + // A star select item appears in one of two forms: expanded — when every + // relation it covers is a derived relation (subquery in FROM, CTE, aliased + // UNNEST) whose projection is fully resolved, the star is replaced by its + // exact output columns in order — or as a single "*" / ".*" entry when + // expansion would need catalog metadata (base tables, coalescing joins), + // which the consumer expands itself. Results []ColumnInfo // PredicateColumns is the set of columns referenced in filter/join positions diff --git a/trino/analysis/query_span_resolve.go b/trino/analysis/query_span_resolve.go index 3f5e335e..183eff13 100644 --- a/trino/analysis/query_span_resolve.go +++ b/trino/analysis/query_span_resolve.go @@ -30,9 +30,13 @@ import ( // base table (or an unrecognised relation) is left as-is. // // Scope: derived-relation projection (subqueries in FROM, CTE references), -// UNNEST output columns, scalar subqueries in the select list, and set-operation +// UNNEST output columns, scalar subqueries in the select list, set-operation // arm merging (each output column's lineage is the union of that column across -// all arms). +// all arms), and star expansion — a `*` / `.*` covering only derived +// relations with fully-resolved projections is replaced by its exact output +// columns (width and order), so `SELECT *` over a derived table or CTE masks +// positionally; a star that needs catalog metadata (base table, coalescing +// join, unresolved relation) stays a single opaque "*" result, as before. func resolveDerivedLineage(stmt ast.Node, span *QuerySpan) { if span == nil { return @@ -42,20 +46,79 @@ func resolveDerivedLineage(stmt ast.Node, span *QuerySpan) { return } // Recompute the outermost query's column lineage with relation-scope - // resolution (derived tables, CTEs, UNNEST, scalar subqueries, and - // set-operation arm merging) and union it into the primary walk's Results - // positionally. Both produce one column per output position in the same order - // — a set operation's columns come from its first arm, exactly as the walk - // records them — so index i aligns. The union is additive (resolved sources - // extend the walk's), so even a positional mismatch could only over-include, - // never drop a previously-correct ref. + // resolution (derived tables, CTEs, UNNEST, scalar subqueries, set-operation + // arm merging, and star expansion) and fold it into the primary walk's + // Results. The walk produces exactly one Result per select item; the resolved + // columns carry their select-item ordinal, so each walk Result aligns with + // its resolved group: + // - an ordinary item (one resolved column) keeps its walk Result with the + // resolved sources unioned in — additive, so lineage only deepens; + // - a resolved star is SPLICED: its walk Result (the single "*" entry, + // which had no real source columns) is replaced by the expanded output + // columns, giving the projection its true width and order so the + // positional masker downstream stays aligned; + // - an opaque star (not expandable without metadata) is left byte-for-byte + // untouched, preserving the exact result shape consumers key on to apply + // their own metadata-based expansion. cols := resolveQueryCols(qs.Query, nil) + + // A top-level VALUES produces no Results in the primary walk (it has no + // select items), leaving the consumer with zero positional maskers — a + // sensitive value in a VALUES row (e.g. a scalar subquery) would pass + // through unmasked. When the walk produced nothing but the resolver knows + // the exact projection (VALUES columns are always resolved), synthesize the + // Results from it. Other statement shapes always have walk Results, so this + // only fires for VALUES (including set operations whose first arm is one). + if len(span.Results) == 0 { + if len(cols) > 0 && !hasOpaque(cols) { + for _, c := range cols { + span.Results = append(span.Results, ColumnInfo{Name: c.name, SourceColumns: c.sources}) + } + } + return + } + + groups := groupByItem(cols) + newResults := make([]ColumnInfo, 0, len(span.Results)) for i := range span.Results { - if i >= len(cols) { - break + r := span.Results[i] + var g []outCol + if i < len(groups) { + g = groups[i] + } + switch { + case len(g) == 0: + newResults = append(newResults, r) + case g[0].opaque: + // Unresolved star: leave the walk's result untouched (including its + // source refs — a qualified star's single relation-name ref is shape + // consumers detect; unioning anything in could break that detection). + newResults = append(newResults, r) + case len(g) == 1 && !g[0].fromStar: + r.SourceColumns = unionRefs(r.SourceColumns, g[0].sources) + newResults = append(newResults, r) + default: + // Resolved star: splice the expanded projection in place of the "*". + for _, c := range g { + newResults = append(newResults, ColumnInfo{Name: c.name, SourceColumns: c.sources}) + } + } + } + span.Results = newResults +} + +// groupByItem buckets resolved columns by their select-item ordinal, preserving +// order within each bucket. Ordinals are dense (assigned per select item), so a +// slice indexed by ordinal suffices. +func groupByItem(cols []outCol) [][]outCol { + var groups [][]outCol + for _, c := range cols { + for len(groups) <= c.item { + groups = append(groups, nil) } - span.Results[i].SourceColumns = unionRefs(span.Results[i].SourceColumns, cols[i].sources) + groups[c.item] = append(groups[c.item], c) } + return groups } // --------------------------------------------------------------------------- @@ -64,9 +127,39 @@ func resolveDerivedLineage(stmt ast.Node, span *QuerySpan) { // outCol is one output column of a (sub)query or relation: its name and the // base-column refs that feed it, recovered through any derived relations. +// +// A star select item is represented in one of two ways: +// - resolved: the star is expanded inline into the covered relations' output +// columns (each marked fromStar), so the projection's width and order are +// exact — this is what lets `SELECT *` over a derived table or CTE be +// masked positionally; +// - opaque: the star cannot be expanded without catalog metadata (it covers a +// base table, an unresolved relation, or a coalescing join), so a single +// placeholder entry (opaque=true) holds its position and the projection's +// true width is unknown. type outCol struct { name string sources []ColumnRef + // item is the select-item ordinal this column came from, used by the + // top-level splice to align resolved columns with the primary walk's + // Results (a resolved star yields several columns with the same item). + item int + // fromStar marks a column produced by expanding a resolved star. + fromStar bool + // opaque marks an unresolved star placeholder; a projection containing one + // has unknown width and blocks star expansion through it. + opaque bool +} + +// hasOpaque reports whether cols contains an unresolved star placeholder (the +// projection's width is then unknown). +func hasOpaque(cols []outCol) bool { + for _, c := range cols { + if c.opaque { + return true + } + } + return false } // cteDefs maps a CTE name (lower-cased) to its resolved output columns, with a @@ -142,8 +235,16 @@ func resolveNodeCols(node parser.QueryNode, cte *cteDefs) []outCol { case *parser.ParenQuery: return resolveQueryCols(n.Inner, cte) case *parser.TableQuery: - // TABLE name == SELECT * FROM name. - return []outCol{{name: "*"}} + // TABLE name == SELECT * FROM name. Over an in-scope CTE the projection + // is the CTE's resolved columns (verified against Trino 481: + // `WITH w AS (SELECT phone, name …) TABLE w` returns [phone, name]); + // over a base table the star needs catalog metadata and stays opaque. + if parts := normalizedParts(n.Name); len(parts) == 1 && cte != nil { + if cols, ok := cte.lookup(parts[0]); ok && len(cols) > 0 && !hasOpaque(cols) { + return stampStar(cols, 0, nil) + } + } + return []outCol{{name: "*", opaque: true}} case *parser.ValuesQuery: return resolveValuesCols(n, cte) default: @@ -167,7 +268,7 @@ func resolveValuesCols(n *parser.ValuesQuery, cte *cteDefs) []outCol { for _, row := range n.Rows { for i, e := range valuesRowElements(row) { for len(cols) <= i { - cols = append(cols, outCol{}) + cols = append(cols, outCol{item: len(cols)}) } cols[i].sources = unionRefs(cols[i].sources, scope.resolveExprRefs(e)) } @@ -185,16 +286,28 @@ func valuesRowElements(row parser.Expr) []parser.Expr { } // mergeArms positionally merges the resolved columns of two set-operation arms. -// The result has the left arm's shape (count and names — SQL takes the output -// column names from the first SELECT); each column's sources are the union of -// the two arms' sources at that position, so a sensitive value contributed by -// either arm forces the column to be masked. A right arm with fewer columns than -// the left (only in malformed SQL) contributes nothing beyond the positions it -// has. +// The result has the left arm's shape (count, names, and item stamps — SQL takes +// the output column names from the first SELECT); each column's sources are the +// union of the two arms' sources at that position, so a sensitive value +// contributed by either arm forces the column to be masked. A right arm with +// fewer columns than the left (only in malformed SQL) contributes nothing beyond +// the positions it has. +// +// With stars expanded inline, both arms of a valid set operation have their true +// width, so the positional union is exact — including a resolved star arm +// (`SELECT * FROM (…) d UNION SELECT phone FROM t` merges phone into d's +// expanded column). When either arm still contains an OPAQUE star its width is +// unknown and positional pairing would be unsound, so the merge is skipped and +// the left arm passes through unchanged (parity with the walk: the consumer's +// own star expansion applies and the other arm's lineage remains a documented +// residual rather than a mis-attributed union). func mergeArms(left, right []outCol) []outCol { + if hasOpaque(left) || hasOpaque(right) { + return left + } out := make([]outCol, len(left)) - for i := range left { - out[i] = outCol{name: left[i].name, sources: left[i].sources} + copy(out, left) + for i := range out { if i < len(right) { out[i].sources = unionRefs(left[i].sources, right[i].sources) } @@ -203,34 +316,77 @@ func mergeArms(left, right []outCol) []outCol { } // resolveSpecCols computes the resolved output columns of one SELECT block: it -// builds the FROM scope, then resolves each select item's direct column refs -// through that scope. +// builds the FROM scope, then resolves each select item through that scope. A +// star item is expanded inline when every relation it covers is a derived +// relation with a fully-resolved projection (see starExpansion); otherwise it +// stays a single opaque placeholder, exactly as before. func resolveSpecCols(spec *parser.QuerySpec, cte *cteDefs) []outCol { if spec == nil { return nil } scope := newRScope(spec.From, cte) out := make([]outCol, 0, len(spec.Items)) - for _, item := range spec.Items { + for idx, item := range spec.Items { switch item.Kind { case parser.SelectAll: - out = append(out, outCol{name: "*"}) + if exp := scope.starExpansion(""); exp != nil { + out = append(out, stampStar(exp, idx, nil)...) + } else { + out = append(out, outCol{name: "*", item: idx, opaque: true}) + } continue case parser.SelectAllFrom: + if q := starQualifier(item.Expr); q != "" { + if exp := scope.starExpansion(q); exp != nil { + out = append(out, stampStar(exp, idx, item.Aliases)...) + continue + } + } name := renderExprName(item.Expr) if name == "" { name = "*" } else { name += ".*" } - out = append(out, outCol{name: name, sources: scope.resolveExprRefs(item.Expr)}) + out = append(out, outCol{name: name, sources: scope.resolveExprRefs(item.Expr), item: idx, opaque: true}) continue } name := identName(item.Alias) if name == "" { name = renderExprName(item.Expr) } - out = append(out, outCol{name: name, sources: scope.resolveExprRefs(item.Expr)}) + out = append(out, outCol{name: name, sources: scope.resolveExprRefs(item.Expr), item: idx}) + } + return out +} + +// starQualifier returns the relation qualifier a `.*` select item names: +// the rightmost identifier of the row expression (the relation/alias for `d.*`, +// the table for `sch.t.*`). Empty when the expression is not a plain name chain +// (e.g. a row-valued function), in which case the star is not expandable. +func starQualifier(expr parser.Expr) string { + switch e := expr.(type) { + case *parser.ColumnRef: + return identName(e.Name) + case *parser.Dereference: + return identName(e.FieldName) + case *parser.ParenExpr: + return starQualifier(e.Expr) + } + return "" +} + +// stampStar returns a copy of a star's expanded columns stamped with the star's +// select-item ordinal and the fromStar marker, with any `AS (a, b, …)` column +// aliases applied positionally. The input (a relation's shared projection) is +// never mutated. +func stampStar(cols []outCol, item int, aliases []*ast.Identifier) []outCol { + out := make([]outCol, len(cols)) + copy(out, cols) + out = applyColumnAliases(out, aliases) + for i := range out { + out[i].item = item + out[i].fromStar = true } return out } @@ -246,6 +402,11 @@ func resolveSpecCols(spec *parser.QuerySpec, cte *cteDefs) []outCol { type rscope struct { rels []rbind cte *cteDefs + // coalesced is set when the FROM tree contains a USING or NATURAL join, + // which coalesces the join columns into single output columns — `SELECT *` + // then has a different width/order than the relations' concatenation, so + // star expansion is blocked in this scope. + coalesced bool } // rbind binds one relation in a FROM scope to a name. derived marks a subquery @@ -274,6 +435,34 @@ func (s *rscope) add(rel parser.Relation, alias string, colAliases []*ast.Identi case *parser.AliasedRelation: s.add(n.Inner, identName(n.Alias), n.ColumnAliases) case *parser.Join: + if alias != "" { + // An aliased (parenthesized) join — ((…) a JOIN (…) b ON …) AS j — + // is one relation named j whose projection is the join's column + // concatenation; Trino hides the inner aliases behind it (verified + // against Trino 481: `a.phone` does not resolve through `… AS j`). + // Bind a single synthetic relation: derived when the join subtree + // is fully resolvable (all derived, no coalescing), opaque-blocking + // otherwise. + sub := &rscope{cte: s.cte} + if len(n.Using) > 0 || n.Natural { + sub.coalesced = true + } + sub.add(n.Left, "", nil) + sub.add(n.Right, "", nil) + cols := sub.starExpansion("") + s.rels = append(s.rels, rbind{name: alias, derived: cols != nil, cols: applyColumnAliases(cols, colAliases)}) + return + } + if len(n.Using) > 0 || n.Natural { + // USING/NATURAL coalesces the join columns, changing the output + // width and order: an unqualified `*` lists the coalesced columns + // once, first; and a QUALIFIED star EXCLUDES the using columns from + // its relation's projection (verified against Trino 481: with + // a(k, p), `SELECT a.* FROM a JOIN b USING (k)` returns only [p]). + // Star expansion in this scope would therefore be width-wrong in + // both forms; block it. Named-column resolution is unaffected. + s.coalesced = true + } s.add(n.Left, "", nil) s.add(n.Right, "", nil) case *parser.ParenRelation: @@ -344,6 +533,51 @@ func (s *rscope) unnestColumns(n *parser.UnnestRelation, colAliases []*ast.Ident return cols } +// starExpansion returns the exact output columns a star select item covers, or +// nil when expansion is not provably width- and order-correct. An empty +// qualifier is an unqualified `*` (covering every FROM relation, in order); a +// non-empty qualifier is `.*` (covering exactly that relation). +// +// Expansion requires certainty — a wrong width or order would misalign the +// positional masker downstream, which is precisely the bug this resolves — so +// it bails (nil) unless: +// - no USING/NATURAL join coalesces columns in this scope, and +// - every covered relation is a derived relation (subquery, CTE reference, or +// aliased UNNEST) whose projection is fully resolved: non-empty and free of +// opaque star placeholders. A base table (width known only to catalog +// metadata), a lateral/table-function relation, an UNNEST without column +// aliases, or a qualifier matching zero or several relations all bail. +// +// A nil return leaves the star opaque — the consumer's metadata-based expansion +// applies, exactly as before this resolver existed. +func (s *rscope) starExpansion(qualifier string) []outCol { + if s.coalesced || len(s.rels) == 0 { + return nil + } + if qualifier != "" { + var match *rbind + count := 0 + for i := range s.rels { + if strings.EqualFold(s.rels[i].name, qualifier) { + match = &s.rels[i] + count++ + } + } + if count != 1 || !match.derived || len(match.cols) == 0 || hasOpaque(match.cols) { + return nil + } + return match.cols + } + var out []outCol + for _, rb := range s.rels { + if !rb.derived || len(rb.cols) == 0 || hasOpaque(rb.cols) { + return nil + } + out = append(out, rb.cols...) + } + return out +} + // resolveRefs resolves a list of result-column refs through the scope, // deduplicating the result while preserving first-seen order. func (s *rscope) resolveRefs(refs []ColumnRef) []ColumnRef { diff --git a/trino/analysis/query_span_star_test.go b/trino/analysis/query_span_star_test.go new file mode 100644 index 00000000..fcd99c12 --- /dev/null +++ b/trino/analysis/query_span_star_test.go @@ -0,0 +1,337 @@ +package analysis + +import ( + "strings" + "testing" +) + +// resultNames returns the result column names in order. +func resultNames(span *QuerySpan) []string { + out := make([]string, 0, len(span.Results)) + for _, r := range span.Results { + out = append(out, r.Name) + } + return out +} + +func sameNames(got []string, want ...string) bool { + if len(got) != len(want) { + return false + } + for i := range got { + if got[i] != want[i] { + return false + } + } + return true +} + +// TestGetQuerySpan_StarOverDerivedExpands covers BYT-9678: SELECT * over a +// derived table must expand to the derived relation's exact projection (width +// and order), each column carrying its base lineage — not remain an opaque "*" +// that the consumer expands to the base table's full column set (which +// misaligns the positional masker). +func TestGetQuerySpan_StarOverDerivedExpands(t *testing.T) { + span, err := GetQuerySpan("SELECT * FROM (SELECT phone, name FROM customer) d") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "phone", "name") { + t.Fatalf("Results = %v, want exactly [phone name] in order", got) + } + if !hasSource(span.Results[0].SourceColumns, ColumnRef{Column: "phone"}) { + t.Errorf("Results[0].SourceColumns = %+v, want {Column:phone}", span.Results[0].SourceColumns) + } + if !hasSource(span.Results[1].SourceColumns, ColumnRef{Column: "name"}) { + t.Errorf("Results[1].SourceColumns = %+v, want {Column:name}", span.Results[1].SourceColumns) + } +} + +// TestGetQuerySpan_StarOverCTEExpands expands SELECT * over a CTE reference. +func TestGetQuerySpan_StarOverCTEExpands(t *testing.T) { + span, err := GetQuerySpan("WITH w AS (SELECT phone, name FROM customer) SELECT * FROM w") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "phone", "name") { + t.Fatalf("Results = %v, want [phone name]", got) + } + if !hasSource(span.Results[0].SourceColumns, ColumnRef{Column: "phone"}) { + t.Errorf("Results[0].SourceColumns = %+v, want {Column:phone}", span.Results[0].SourceColumns) + } +} + +// TestGetQuerySpan_QualifiedStarOverDerivedExpands expands d.* over a derived +// table, including a renamed projection. +func TestGetQuerySpan_QualifiedStarOverDerivedExpands(t *testing.T) { + span, err := GetQuerySpan("SELECT d.* FROM (SELECT phone AS x FROM customer) d") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "x") { + t.Fatalf("Results = %v, want [x]", got) + } + if !hasSource(span.Results[0].SourceColumns, ColumnRef{Column: "phone"}) { + t.Errorf("x.SourceColumns = %+v, want {Column:phone}", span.Results[0].SourceColumns) + } +} + +// TestGetQuerySpan_StarOverBaseStaysOpaque guards parity: a star over a base +// table cannot be expanded without metadata and must stay a single "*" result +// for the consumer's metadata-based expansion. +func TestGetQuerySpan_StarOverBaseStaysOpaque(t *testing.T) { + span, err := GetQuerySpan("SELECT * FROM customer") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "*") { + t.Fatalf("Results = %v, want exactly [*]", got) + } +} + +// TestGetQuerySpan_StarOverMixedBaseDerivedStaysOpaque guards parity: a star +// covering a base table joined with a derived relation has unknown total width +// (the base side needs metadata) and must stay opaque. +func TestGetQuerySpan_StarOverMixedBaseDerivedStaysOpaque(t *testing.T) { + span, err := GetQuerySpan("SELECT * FROM customer JOIN (SELECT x FROM t) d ON true") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "*") { + t.Fatalf("Results = %v, want exactly [*] (mixed base+derived must not expand)", got) + } +} + +// TestGetQuerySpan_StarOverUsingJoinStaysOpaque guards that a USING join blocks +// expansion: the coalesced join column changes the output width/order. +func TestGetQuerySpan_StarOverUsingJoinStaysOpaque(t *testing.T) { + span, err := GetQuerySpan("WITH a AS (SELECT k, p FROM t1), b AS (SELECT k, q FROM t2) SELECT * FROM a JOIN b USING (k)") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "*") { + t.Fatalf("Results = %v, want exactly [*] (USING join coalesces; must not expand)", got) + } +} + +// TestGetQuerySpan_StarOverDerivedContainingBaseStarStaysOpaque guards that a +// derived relation whose own projection is an unexpandable star (over a base +// table) makes the outer star opaque too — its width is unknown. +func TestGetQuerySpan_StarOverDerivedContainingBaseStarStaysOpaque(t *testing.T) { + span, err := GetQuerySpan("SELECT * FROM (SELECT * FROM customer) d") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "*") { + t.Fatalf("Results = %v, want exactly [*] (inner base star is opaque)", got) + } +} + +// TestGetQuerySpan_NestedDerivedStarExpands expands a star whose derived +// relation itself contains a resolved star over another derived relation. +func TestGetQuerySpan_NestedDerivedStarExpands(t *testing.T) { + span, err := GetQuerySpan("SELECT * FROM (SELECT * FROM (SELECT phone AS x FROM customer) i) d") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "x") { + t.Fatalf("Results = %v, want [x] (nested resolved stars expand transitively)", got) + } + if !hasSource(span.Results[0].SourceColumns, ColumnRef{Column: "phone"}) { + t.Errorf("x.SourceColumns = %+v, want {Column:phone}", span.Results[0].SourceColumns) + } +} + +// TestGetQuerySpan_MixedItemsStarSplice verifies the splice keeps non-star items +// aligned: SELECT a, * over a derived relation yields [a, b, c]. +func TestGetQuerySpan_MixedItemsStarSplice(t *testing.T) { + span, err := GetQuerySpan("SELECT a, * FROM (SELECT b, c FROM t) d") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "a", "b", "c") { + t.Fatalf("Results = %v, want [a b c]", got) + } +} + +// TestGetQuerySpan_SetOpResolvedStarArmMerges verifies that a set operation +// whose left arm is a RESOLVED star merges the right arm positionally at the +// expanded width: the sensitive right-arm column lands on the expanded column. +func TestGetQuerySpan_SetOpResolvedStarArmMerges(t *testing.T) { + span, err := GetQuerySpan("SELECT * FROM (SELECT name FROM customer) d UNION SELECT phone FROM customer") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "name") { + t.Fatalf("Results = %v, want [name] (expanded star arm)", got) + } + srcs := span.Results[0].SourceColumns + if !hasSource(srcs, ColumnRef{Column: "name"}) || !hasSource(srcs, ColumnRef{Column: "phone"}) { + t.Errorf("name.SourceColumns = %+v, want both {Column:name} and {Column:phone} (arms merged at expanded width)", srcs) + } +} + +// TestGetQuerySpan_StarOverUnaliasedUnnestStaysOpaque guards that UNNEST without +// column aliases (output width unknown without type metadata) blocks expansion. +func TestGetQuerySpan_StarOverUnaliasedUnnestStaysOpaque(t *testing.T) { + span, err := GetQuerySpan("SELECT * FROM (SELECT phones FROM customer) d CROSS JOIN UNNEST(d.phones)") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "*") { + t.Fatalf("Results = %v, want exactly [*] (unaliased UNNEST width unknown)", got) + } +} + +// TestGetQuerySpan_StarOverDerivedWithUnnestExpands expands a star over a +// derived relation joined with an ALIASED UNNEST (both widths known). +func TestGetQuerySpan_StarOverDerivedWithUnnestExpands(t *testing.T) { + span, err := GetQuerySpan("SELECT * FROM (SELECT phones FROM customer) d CROSS JOIN UNNEST(d.phones) AS t(p)") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "phones", "p") { + t.Fatalf("Results = %v, want [phones p]", got) + } + if !hasSource(span.Results[1].SourceColumns, ColumnRef{Column: "phones"}) { + t.Errorf("p.SourceColumns = %+v, want {Column:phones}", span.Results[1].SourceColumns) + } +} + +// TestGetQuerySpan_QualifiedStarThroughInnerStarResolves covers the transitive +// fix from inline star expansion: a qualified outer ref through a derived +// relation whose projection is a resolved star now finds the column. +func TestGetQuerySpan_QualifiedStarThroughInnerStarResolves(t *testing.T) { + span, err := GetQuerySpan("SELECT d.phone FROM (SELECT * FROM (SELECT phone FROM customer) c) d") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + r, ok := resultByName(span, "phone") + if !ok { + t.Fatalf("Results = %+v, want a column named phone", span.Results) + } + if !hasSource(r.SourceColumns, ColumnRef{Column: "phone"}) { + t.Errorf("phone.SourceColumns = %+v, want {Column:phone} (through inner resolved star)", r.SourceColumns) + } +} + +// TestGetQuerySpan_AliasedJoinStarExpands covers an alias on a parenthesized +// join: ((…) a JOIN (…) b) AS j is one relation whose projection is the join's +// column concatenation (verified against Trino 481), so j.* and * expand. +func TestGetQuerySpan_AliasedJoinStarExpands(t *testing.T) { + span, err := GetQuerySpan("SELECT j.* FROM ((SELECT phone FROM customer) a JOIN (SELECT name FROM customer) b ON true) AS j") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "phone", "name") { + t.Fatalf("Results = %v, want [phone name]", got) + } + if !hasSource(span.Results[0].SourceColumns, ColumnRef{Column: "phone"}) { + t.Errorf("Results[0].SourceColumns = %+v, want {Column:phone}", span.Results[0].SourceColumns) + } + + span, err = GetQuerySpan("SELECT * FROM ((SELECT phone FROM customer) a JOIN (SELECT name FROM customer) b ON true) AS j") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "phone", "name") { + t.Fatalf("unqualified Results = %v, want [phone name] (the aliased join is ONE relation; its parts must not double-count)", got) + } +} + +// TestGetQuerySpan_AliasedJoinWithBaseStaysOpaque guards that an aliased join +// containing a base table is bound as an unresolvable relation: stars through +// it stay opaque. +func TestGetQuerySpan_AliasedJoinWithBaseStaysOpaque(t *testing.T) { + span, err := GetQuerySpan("SELECT j.* FROM (customer c JOIN (SELECT x FROM t) d ON true) AS j") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "j.*") { + t.Fatalf("Results = %v, want exactly [j.*] (base table inside the aliased join)", got) + } +} + +// TestGetQuerySpan_TableQueryOverCTEExpands covers TABLE : equivalent to +// SELECT * FROM cte, whose projection is the CTE's resolved columns (verified +// against Trino 481). +func TestGetQuerySpan_TableQueryOverCTEExpands(t *testing.T) { + span, err := GetQuerySpan("WITH w AS (SELECT phone, name FROM customer) TABLE w") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "phone", "name") { + t.Fatalf("Results = %v, want [phone name]", got) + } + if !hasSource(span.Results[0].SourceColumns, ColumnRef{Column: "phone"}) { + t.Errorf("Results[0].SourceColumns = %+v, want {Column:phone}", span.Results[0].SourceColumns) + } +} + +// TestGetQuerySpan_TopLevelValuesResults covers a top-level VALUES: the primary +// walk emits no Results (no select items), which left the consumer with zero +// positional maskers; the resolver must synthesize the projection so a +// sensitive value in a VALUES row is maskable. +func TestGetQuerySpan_TopLevelValuesResults(t *testing.T) { + span, err := GetQuerySpan("VALUES ('x', (SELECT phone FROM customer LIMIT 1))") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if len(span.Results) != 2 { + t.Fatalf("Results = %+v, want exactly 2 columns", span.Results) + } + if !hasSource(span.Results[1].SourceColumns, ColumnRef{Column: "phone"}) { + t.Errorf("Results[1].SourceColumns = %+v, want {Column:phone}", span.Results[1].SourceColumns) + } +} + +// TestGetQuerySpan_ValuesRowConstructorArity locks the oracle-verified VALUES +// arity: ROW('a', expr) in VALUES is unpacked into 2 output columns exactly +// like ('a', expr) (verified against Trino 481, where AS v(r) over it errors +// with "alias list has 1 entries but 2 columns"). +func TestGetQuerySpan_ValuesRowConstructorArity(t *testing.T) { + span, err := GetQuerySpan("SELECT * FROM (VALUES ROW(1, (SELECT phone FROM customer LIMIT 1))) AS v(a, b)") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "a", "b") { + t.Fatalf("Results = %v, want [a b] (ROW unpacks to 2 columns)", got) + } + if !hasSource(span.Results[1].SourceColumns, ColumnRef{Column: "phone"}) { + t.Errorf("b.SourceColumns = %+v, want {Column:phone}", span.Results[1].SourceColumns) + } +} + +// TestGetQuerySpan_QualifiedStarBesideUsingJoinStaysOpaque locks the +// oracle-verified USING semantics: with a(k, phone), `SELECT a.* FROM a JOIN b +// USING (k)` returns only [phone] — Trino EXCLUDES the using columns from a +// qualified star — so expanding a.* to a's full projection would be +// width-wrong. The conservative scope-wide block is correct. +func TestGetQuerySpan_QualifiedStarBesideUsingJoinStaysOpaque(t *testing.T) { + span, err := GetQuerySpan("WITH a AS (SELECT k, phone FROM customer), b AS (SELECT k FROM orders) SELECT a.* FROM a JOIN b USING (k)") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + if got := resultNames(span); !sameNames(got, "a.*") { + t.Fatalf("Results = %v, want exactly [a.*] (USING strips join columns from a qualified star; expansion would misalign)", got) + } +} + +// TestGetQuerySpan_QualifiedStarOverBaseShapePreserved guards the consumer +// contract for an OPAQUE qualified star: its result keeps exactly the walk's +// single relation-name source ref (consumers detect a qualified star by that +// shape: one ref whose Column equals the qualifier), even when a derived +// relation in scope exposes a column of the same name. +func TestGetQuerySpan_QualifiedStarOverBaseShapePreserved(t *testing.T) { + span, err := GetQuerySpan("SELECT u.*, d.x FROM users u JOIN (SELECT phone AS u, phone AS x FROM customer) d ON true") + if err != nil { + t.Fatalf("GetQuerySpan returned error: %v", err) + } + r, ok := resultByName(span, "u.*") + if !ok { + t.Fatalf("Results = %+v, want a column named u.*", span.Results) + } + if len(r.SourceColumns) != 1 || !strings.EqualFold(r.SourceColumns[0].Column, "u") { + t.Errorf("u.*.SourceColumns = %+v, want exactly the single relation ref {Column:u} (shape consumers key on)", r.SourceColumns) + } +}