From a37da5572dd58f85103faf8f64b78923f7822435 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Fri, 17 Oct 2025 15:00:25 -0700 Subject: [PATCH 01/30] Added trace logging to join planning --- sql/analyzer/analyzer.go | 2 ++ sql/analyzer/indexed_joins.go | 63 +++++++++++++++++++++++++++++++--- sql/memo/join_order_builder.go | 33 ++++++++++++++++-- sql/memo/memo.go | 22 ++++++++++++ 4 files changed, 114 insertions(+), 6 deletions(-) diff --git a/sql/analyzer/analyzer.go b/sql/analyzer/analyzer.go index 56bc882cad..f545ef4209 100644 --- a/sql/analyzer/analyzer.go +++ b/sql/analyzer/analyzer.go @@ -297,6 +297,8 @@ type Analyzer struct { Batches []*Batch // Whether to log various debugging messages Debug bool + // Whether to output detailed trace logging for join planning + Trace bool // Whether to output the query plan at each step of the analyzer Verbose bool } diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index a250274c06..3284c19bf4 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -137,6 +137,7 @@ func recSchemaToGetFields(n sql.Node, sch sql.Schema) []sql.Expression { func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Scope, qFlags *sql.QueryFlags) (ret sql.Node, err error) { m := memo.NewMemo(ctx, a.Catalog, scope, len(scope.Schema()), a.Coster, qFlags) m.Debug = a.Debug + m.Trace = a.Trace defer func() { if r := recover(); r != nil { @@ -161,45 +162,62 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco hints := m.SessionHints() hints = append(hints, memo.ExtractJoinHint(n)...) + m.TraceLog("Adding index scans") err = addIndexScans(ctx, m) if err != nil { return nil, err } + + m.TraceLog("Converting semi joins to inner joins") err = convertSemiToInnerJoin(m) if err != nil { return nil, err } + + m.TraceLog("Converting anti joins to left joins") err = convertAntiToLeftJoin(m) if err != nil { return nil, err } + + m.TraceLog("Adding right semi joins") err = addRightSemiJoins(ctx, m) if err != nil { return nil, err } + m.TraceLog("Adding lookup joins") err = addLookupJoins(ctx, m) if err != nil { return nil, err } if !mergeJoinsDisabled(hints) { + m.TraceLog("Adding merge joins") err = addMergeJoins(ctx, m) if err != nil { return nil, err } + } else { + m.TraceLog("Skipping merge joins (disabled by hints)") } + m.TraceLog("Computing cardinality for memo groups") memo.CardMemoGroups(ctx, m.Root()) + m.TraceLog("Adding cross hash joins") err = addCrossHashJoins(m) if err != nil { return nil, err } + + m.TraceLog("Adding hash joins") err = addHashJoins(m) if err != nil { return nil, err } + + m.TraceLog("Adding range heap joins") err = addRangeHeapJoin(m) if err != nil { return nil, err @@ -207,14 +225,18 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco // Once we've enumerated all expression groups, we can apply hints. This must be done after expression // groups have been identified, so that the applied hints use the correct metadata. + m.TraceLog("Applying %d hints", len(hints)) for _, h := range hints { + m.TraceLog("Applying hint: %s", h.Typ.String()) m.ApplyHint(h) } + m.TraceLog("Starting cost-based optimization") err = m.OptimizeRoot() if err != nil { return nil, err } + m.TraceLog("Completed cost-based optimization") if a.Verbose && a.Debug { a.Log("%s", m.String()) @@ -267,10 +289,14 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { } if len(join.Filter) == 0 { + m.TraceLog("Skipping lookup join for %T - no filters", e) return nil } + m.TraceLog("Considering lookup join for %T with %d filters", e, len(join.Filter)) + tableId, indexes, extraFilters := lookupCandidates(right.First, false) + m.TraceLog("Found %d index candidates for lookup join", len(indexes)) var rt sql.TableNode var aliasName string @@ -281,10 +307,12 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { var ok bool rt, ok = n.Child.(sql.TableNode) if !ok { + m.TraceLog("Skipping lookup join - table alias child is not TableNode") return nil } aliasName = n.Name() default: + m.TraceLog("Skipping lookup join - unsupported table node type: %T", n) return nil } @@ -322,13 +350,17 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { return nil } - for _, idx := range indexes { + for i, idx := range indexes { keyExprs, matchedFilters, nullmask := keyExprsForIndex(tableId, idx.Cols(), append(join.Filter, extraFilters...)) if keyExprs == nil { + m.TraceLog("Index %d: no matching key expressions found", i) continue } + m.TraceLog("Index %d: found %d key expressions, %d matched filters", i, len(keyExprs), len(matchedFilters)) + ita, err := plan.NewIndexedAccessForTableNode(ctx, rt, plan.NewLookupBuilder(idx.SqlIdx(), keyExprs, nullmask)) if err != nil { + m.TraceLog("Index %d: failed to create indexed table access: %v", i, err) return err } lookup := &memo.IndexScan{ @@ -350,6 +382,7 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { } } + m.TraceLog("Adding lookup join with index %d, %d remaining filters", i, len(filters)) m.MemoizeLookupJoin(e.Group(), join.Left, join.Right, join.Op, filters, lookup) } return nil @@ -793,33 +826,42 @@ func addHashJoins(m *memo.Memo) error { join := e.(memo.JoinRel).JoinPrivate() if len(join.Filter) == 0 { + m.TraceLog("Skipping hash join for %T - no filters", e) return nil } + m.TraceLog("Considering hash join for %T with %d filters", e, len(join.Filter)) + var fromExpr, toExpr []sql.Expression - for _, f := range join.Filter { + for i, f := range join.Filter { switch f := f.(type) { case *expression.Equals: if satisfiesScalarRefs(f.Left(), join.Left.RelProps.OutputTables()) && satisfiesScalarRefs(f.Right(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Right()) toExpr = append(toExpr, f.Left()) + m.TraceLog("Filter %d: left->right hash key mapping", i) } else if satisfiesScalarRefs(f.Right(), join.Left.RelProps.OutputTables()) && satisfiesScalarRefs(f.Left(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Left()) toExpr = append(toExpr, f.Right()) + m.TraceLog("Filter %d: right->left hash key mapping", i) } else { + m.TraceLog("Filter %d: does not satisfy scalar refs for hash join", i) return nil } default: + m.TraceLog("Filter %d: not an equality expression, skipping hash join", i) return nil } } switch join.Right.First.(type) { case *memo.RecursiveTable: + m.TraceLog("Skipping hash join - right side is recursive table") return nil } + m.TraceLog("Adding hash join with %d key expressions", len(toExpr)) m.MemoizeHashJoin(e.Group(), join, toExpr, fromExpr) return nil }) @@ -1027,16 +1069,22 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { } if len(join.Filter) == 0 { + m.TraceLog("Skipping merge join for %T - no filters", e) return nil } + m.TraceLog("Considering merge join for %T with %d filters", e, len(join.Filter)) + leftTabId, lIndexes, lFilters := lookupCandidates(join.Left.First, true) rightTabId, rIndexes, rFilters := lookupCandidates(join.Right.First, true) if leftTabId == 0 || rightTabId == 0 { + m.TraceLog("Skipping merge join - no valid table candidates found") return nil } + m.TraceLog("Found %d left indexes, %d right indexes for merge join", len(lIndexes), len(rIndexes)) + leftTab := join.Left.RelProps.TableIdNodes()[0] rightTab := join.Right.RelProps.TableIdNodes()[0] @@ -1091,20 +1139,25 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { // While matchedFilters is not empty: // Check to see if any rIndexes match that set of filters // Remove the last matched filter - for _, lIndex := range lIndexes { + for i, lIndex := range lIndexes { if lIndex.Order() == sql.IndexOrderNone { // lookups can be unordered, merge indexes need to // be globally ordered + m.TraceLog("Left index %d: skipping - unordered index", i) continue } matchedEqFilters := matchedFiltersForLeftIndex(lIndex, join.Left.RelProps.FuncDeps().Constants(), eqFilters) + m.TraceLog("Left index %d: matched %d equality filters", i, len(matchedEqFilters)) + for len(matchedEqFilters) > 0 { - for _, rIndex := range rIndexes { + for j, rIndex := range rIndexes { if rIndex.Order() == sql.IndexOrderNone { + m.TraceLog("Right index %d: skipping - unordered index", j) continue } if rightIndexMatchesFilters(rIndex, join.Left.RelProps.FuncDeps().Constants(), matchedEqFilters) { + m.TraceLog("Found matching index pair: left[%d] <-> right[%d]", i, j) jb := join.Copy() if d, ok := jb.Left.First.(*memo.Distinct); ok && lIndex.SqlIdx().IsUnique() { jb.Left = d.Child @@ -1147,8 +1200,10 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { return err } if !success { + m.TraceLog("Failed to create index scan for right index %d", j) continue } + m.TraceLog("Adding merge join with left index %d, right index %d", i, j) m.MemoizeMergeJoin(e.Group(), join.Left, join.Right, lIndexScan, rIndexScan, jb.Op.AsMerge(), newFilters, false) } } diff --git a/sql/memo/join_order_builder.go b/sql/memo/join_order_builder.go index 652fe2611d..931fe9fa1e 100644 --- a/sql/memo/join_order_builder.go +++ b/sql/memo/join_order_builder.go @@ -166,21 +166,34 @@ func (j *joinOrderBuilder) useFastReorder() bool { } func (j *joinOrderBuilder) ReorderJoin(n sql.Node) { + j.m.TraceLog("Starting join reordering for node: %T", n) j.populateSubgraph(n) + if j.useFastReorder() { - j.buildSingleLookupPlan() - return + j.m.TraceLog("Using fast reorder algorithm (large join with %d tables)", len(j.vertices)) + if j.buildSingleLookupPlan() { + j.m.TraceLog("Successfully built single lookup plan") + return + } + j.m.TraceLog("Failed to build single lookup plan, falling back to exhaustive enumeration") } else if j.hasCrossJoin { + j.m.TraceLog("Join contains cross joins, attempting single lookup plan first") // Rely on FastReorder to avoid plans that drop filters with cross joins if j.buildSingleLookupPlan() { + j.m.TraceLog("Successfully built single lookup plan for cross join") return } + j.m.TraceLog("Failed to build single lookup plan for cross join, using exhaustive enumeration") } + // TODO: consider if buildSingleLookupPlan can/should run after ensureClosure. This could allow us to use analysis // from ensureClosure in buildSingleLookupPlan, but the equivalence sets could create multiple possible join orders // for the single-lookup plan, which would complicate things. + j.m.TraceLog("Building transitive closure for %d edges", len(j.edges)) j.ensureClosure(j.m.root) + j.m.TraceLog("Starting exhaustive subset enumeration") j.dpEnumerateSubsets() + j.m.TraceLog("Completed join reordering") return } @@ -474,11 +487,13 @@ func (j *joinOrderBuilder) makeTransitiveEdge(col1, col2 sql.ColumnId) { } func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { + j.m.TraceLog("Building join operation: %s", n.JoinType().String()) leftV, leftE, _ := j.populateSubgraph(n.Left()) rightV, rightE, _ := j.populateSubgraph(n.Right()) typ := n.JoinType() if typ.IsPhysical() { typ = plan.JoinTypeInner + j.m.TraceLog("Converted physical join type to inner join") } isInner := typ.IsInner() op := &operator{ @@ -490,6 +505,7 @@ func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { } filters := expression.SplitConjunction(n.JoinCond()) + j.m.TraceLog("Join filters: %v", filters) union := leftV.union(rightV) group, ok := j.plans[union] if !ok { @@ -499,11 +515,14 @@ func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { group = j.memoize(op.joinType, left, right, filters, nil) j.plans[union] = group j.m.root = group + j.m.TraceLog("Created new memo group for join combination") } if !isInner { + j.m.TraceLog("Building non-inner edge for join type: %s", typ.String()) j.buildNonInnerEdge(op, filters...) } else { + j.m.TraceLog("Building inner edge for join type: %s", typ.String()) j.buildInnerEdge(op, filters...) } return group @@ -668,10 +687,13 @@ func setPrinter(all, s1, s2 vertexSet) { // addPlans finds operators that let us join (s1 op s2) and (s2 op s1). func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { + j.m.TraceLog("Considering join between vertex sets s1=%s, s2=%s", s1.String(), s2.String()) + // all inner filters could be applied if j.plans[s1] == nil || j.plans[s2] == nil { // Both inputs must have plans. // need this to prevent cross-joins higher in tree + j.m.TraceLog("Skipping join - one or both input plans are nil") return } @@ -691,6 +713,7 @@ func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { } isRedundant = isRedundant || e.joinIsRedundant(s1, s2) addInnerJoin = true + j.m.TraceLog("Found applicable inner edge %d with filters: %v", i, e.filters) } } @@ -699,12 +722,14 @@ func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { for i, ok := j.nonInnerEdges.Next(0); ok; i, ok = j.nonInnerEdges.Next(i + 1) { e := &j.edges[i] if e.applicable(s1, s2) { + j.m.TraceLog("Found applicable non-inner edge %d, adding join: %s", i, e.op.joinType.String()) j.addJoin(e.op.joinType, s1, s2, e.filters, innerJoinFilters, e.joinIsRedundant(s1, s2)) return } if e.applicable(s2, s1) { // This is necessary because we only iterate s1 up to subset / 2 // in DPSube() + j.m.TraceLog("Found applicable non-inner edge %d (swapped), adding join: %s", i, e.op.joinType.String()) j.addJoin(e.op.joinType, s2, s1, e.filters, innerJoinFilters, e.joinIsRedundant(s2, s1)) return } @@ -715,10 +740,14 @@ func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { // already been constructed, because doing so can lead to a case where an // inner join replaces a non-inner join. if innerJoinFilters == nil { + j.m.TraceLog("Adding cross join between s1=%s, s2=%s", s1.String(), s2.String()) j.addJoin(plan.JoinTypeCross, s1, s2, nil, nil, isRedundant) } else { + j.m.TraceLog("Adding inner join between s1=%s, s2=%s with filters: %v", s1.String(), s2.String(), innerJoinFilters) j.addJoin(plan.JoinTypeInner, s1, s2, innerJoinFilters, nil, isRedundant) } + } else { + j.m.TraceLog("No applicable edges found for join between s1=%s, s2=%s", s1.String(), s2.String()) } } diff --git a/sql/memo/memo.go b/sql/memo/memo.go index cc017fad8d..bbc477be30 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -48,6 +48,7 @@ type Memo struct { scopeLen int cnt uint16 Debug bool + Trace bool } func NewMemo(ctx *sql.Context, stats sql.StatsProvider, s *plan.Scope, scopeLen int, cost Coster, qFlags *sql.QueryFlags) *Memo { @@ -71,6 +72,17 @@ func (m *Memo) HandleErr(err error) { panic(MemoErr{Err: err}) } +// TraceLog logs a trace message if tracing is enabled +func (m *Memo) TraceLog(msg string, args ...interface{}) { + if m.Trace { + if len(args) > 0 { + m.Ctx.GetLogger().Debugf("TRACE: %s", fmt.Sprintf(msg, args...)) + } else { + m.Ctx.GetLogger().Debugf("TRACE: %s", msg) + } + } +} + func (m *Memo) Root() *ExprGroup { return m.root } @@ -423,6 +435,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { return nil } + m.TraceLog("Optimizing group %d", grp.Id) n := grp.First if _, ok := n.(SourceRel); ok { // We should order the search bottom-up so that physical operators @@ -434,10 +447,12 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { grp.HintOk = true grp.Best = grp.First grp.Best.SetDistinct(NoDistinctOp) + m.TraceLog("Group %d: source relation, setting as best plan", grp.Id) return nil } for n != nil { + m.TraceLog("Evaluating plan %T in group %d", n, grp.Id) var cost float64 for _, g := range n.Children() { err := m.optimizeMemoGroup(g) @@ -454,10 +469,12 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { if grp.RelProps.Distinct.IsHash() { if sortedInputs(n) { n.SetDistinct(SortedDistinctOp) + m.TraceLog("Plan %T: using sorted distinct", n) } else { n.SetDistinct(HashDistinctOp) d := &Distinct{Child: grp} relCost += float64(statsForRel(m.Ctx, d).RowCount()) + m.TraceLog("Plan %T: using hash distinct", n) } } else { n.SetDistinct(NoDistinctOp) @@ -465,6 +482,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { n.SetCost(relCost) cost += relCost + m.TraceLog("Plan %T: relCost=%.2f, totalCost=%.2f", n, relCost, cost) m.updateBest(grp, n, cost) n = n.Next() } @@ -483,15 +501,19 @@ func (m *Memo) updateBest(grp *ExprGroup, n RelExpr, cost float64) { grp.Best = n grp.Cost = cost grp.HintOk = true + m.TraceLog("Set best plan for group %d to hinted plan %T with cost %.2f", grp.Id, n, cost) return } grp.updateBest(n, cost) + m.TraceLog("Updated best plan for group %d to hinted plan %T with cost %.2f", grp.Id, n, cost) } else if grp.Best == nil || !grp.HintOk { grp.updateBest(n, cost) + m.TraceLog("Updated best plan for group %d to plan %T with cost %.2f (no hints satisfied)", grp.Id, n, cost) } return } grp.updateBest(n, cost) + m.TraceLog("Updated best plan for group %d to plan %T with cost %.2f", grp.Id, n, cost) } func (m *Memo) BestRootPlan(ctx *sql.Context) (sql.Node, error) { From ba4bc16fcf79a189fbda3da6f6287ffff2e7ed10 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Fri, 17 Oct 2025 16:15:02 -0700 Subject: [PATCH 02/30] new trace logger --- sql/analyzer/costed_index_scan.go | 3 + sql/analyzer/indexed_joins.go | 131 ++++++++++++++++-------------- sql/memo/join_order_builder.go | 60 +++++++------- sql/memo/memo.go | 39 ++++----- sql/memo/rel_props.go | 11 ++- sql/memo/trace_logger.go | 56 +++++++++++++ 6 files changed, 186 insertions(+), 114 deletions(-) create mode 100755 sql/memo/trace_logger.go diff --git a/sql/analyzer/costed_index_scan.go b/sql/analyzer/costed_index_scan.go index e41d07cfaa..5b12889612 100644 --- a/sql/analyzer/costed_index_scan.go +++ b/sql/analyzer/costed_index_scan.go @@ -334,6 +334,9 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta } func addIndexScans(ctx *sql.Context, m *memo.Memo) error { + m.Tracer.PushDebugContext("addIndexScans") + defer m.Tracer.PopDebugContext() + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { filter, ok := e.(*memo.Filter) if !ok { diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 3284c19bf4..52bf047171 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -137,7 +137,7 @@ func recSchemaToGetFields(n sql.Node, sch sql.Schema) []sql.Expression { func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Scope, qFlags *sql.QueryFlags) (ret sql.Node, err error) { m := memo.NewMemo(ctx, a.Catalog, scope, len(scope.Schema()), a.Coster, qFlags) m.Debug = a.Debug - m.Trace = a.Trace + m.EnableTrace(a.Trace) defer func() { if r := recover(); r != nil { @@ -162,62 +162,52 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco hints := m.SessionHints() hints = append(hints, memo.ExtractJoinHint(n)...) - m.TraceLog("Adding index scans") err = addIndexScans(ctx, m) if err != nil { return nil, err } - - m.TraceLog("Converting semi joins to inner joins") + err = convertSemiToInnerJoin(m) if err != nil { return nil, err } - - m.TraceLog("Converting anti joins to left joins") + err = convertAntiToLeftJoin(m) if err != nil { return nil, err } - - m.TraceLog("Adding right semi joins") + err = addRightSemiJoins(ctx, m) if err != nil { return nil, err } - - m.TraceLog("Adding lookup joins") + err = addLookupJoins(ctx, m) if err != nil { return nil, err } if !mergeJoinsDisabled(hints) { - m.TraceLog("Adding merge joins") err = addMergeJoins(ctx, m) if err != nil { return nil, err } } else { - m.TraceLog("Skipping merge joins (disabled by hints)") + m.Tracer.Log("Skipping merge joins (disabled by hints)") } - m.TraceLog("Computing cardinality for memo groups") - memo.CardMemoGroups(ctx, m.Root()) + m.CardMemoGroups(ctx, m.Root()) - m.TraceLog("Adding cross hash joins") err = addCrossHashJoins(m) if err != nil { return nil, err } - - m.TraceLog("Adding hash joins") + err = addHashJoins(m) if err != nil { return nil, err } - - m.TraceLog("Adding range heap joins") + err = addRangeHeapJoin(m) if err != nil { return nil, err @@ -225,18 +215,17 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco // Once we've enumerated all expression groups, we can apply hints. This must be done after expression // groups have been identified, so that the applied hints use the correct metadata. - m.TraceLog("Applying %d hints", len(hints)) for _, h := range hints { - m.TraceLog("Applying hint: %s", h.Typ.String()) + m.Tracer.Log("Applying hint: %s", []interface{}{h.Typ.String()}...) m.ApplyHint(h) } - m.TraceLog("Starting cost-based optimization") + m.Tracer.Log("Starting cost-based optimization") err = m.OptimizeRoot() if err != nil { return nil, err } - m.TraceLog("Completed cost-based optimization") + m.Tracer.Log("Completed cost-based optimization") if a.Verbose && a.Debug { a.Log("%s", m.String()) @@ -266,6 +255,9 @@ func mergeJoinsDisabled(hints []memo.Hint) bool { // attributes in the join filter. Costing is responsible for choosing the most // appropriate execution plan among options added to an expression group. func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { + m.Tracer.PushDebugContext("addLookupJoins") + defer m.Tracer.PopDebugContext() + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { var right *memo.ExprGroup var join *memo.JoinBase @@ -280,7 +272,7 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { case *memo.LeftJoin: right = e.Right join = e.JoinBase - //TODO fullouterjoin + // TODO fullouterjoin case *memo.SemiJoin: right = e.Right join = e.JoinBase @@ -289,14 +281,14 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { } if len(join.Filter) == 0 { - m.TraceLog("Skipping lookup join for %T - no filters", e) + m.Tracer.Log("Skipping lookup join for %T - no filters", []interface{}{e}...) return nil } - m.TraceLog("Considering lookup join for %T with %d filters", e, len(join.Filter)) + m.Tracer.Log("Considering lookup join for %T with %d filters", []interface{}{e, len(join.Filter)}...) tableId, indexes, extraFilters := lookupCandidates(right.First, false) - m.TraceLog("Found %d index candidates for lookup join", len(indexes)) + m.Tracer.Log("Found %d index candidates for lookup join", []interface{}{len(indexes)}...) var rt sql.TableNode var aliasName string @@ -307,12 +299,12 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { var ok bool rt, ok = n.Child.(sql.TableNode) if !ok { - m.TraceLog("Skipping lookup join - table alias child is not TableNode") + m.Tracer.Log("Skipping lookup join - table alias child is not TableNode") return nil } aliasName = n.Name() default: - m.TraceLog("Skipping lookup join - unsupported table node type: %T", n) + m.Tracer.Log("Skipping lookup join - unsupported table node type: %T", []interface{}{n}...) return nil } @@ -353,14 +345,14 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { for i, idx := range indexes { keyExprs, matchedFilters, nullmask := keyExprsForIndex(tableId, idx.Cols(), append(join.Filter, extraFilters...)) if keyExprs == nil { - m.TraceLog("Index %d: no matching key expressions found", i) + m.Tracer.Log("Index %d: no matching key expressions found", []interface{}{i}...) continue } - m.TraceLog("Index %d: found %d key expressions, %d matched filters", i, len(keyExprs), len(matchedFilters)) - + m.Tracer.Log("Index %d: found %d key expressions, %d matched filters", []interface{}{i, len(keyExprs), len(matchedFilters)}...) + ita, err := plan.NewIndexedAccessForTableNode(ctx, rt, plan.NewLookupBuilder(idx.SqlIdx(), keyExprs, nullmask)) if err != nil { - m.TraceLog("Index %d: failed to create indexed table access: %v", i, err) + m.Tracer.Log("Index %d: failed to create indexed table access: %v", []interface{}{i, err}...) return err } lookup := &memo.IndexScan{ @@ -382,7 +374,7 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { } } - m.TraceLog("Adding lookup join with index %d, %d remaining filters", i, len(filters)) + m.Tracer.Log("Adding lookup join with index %d, %d remaining filters", []interface{}{i, len(filters)}...) m.MemoizeLookupJoin(e.Group(), join.Left, join.Right, join.Op, filters, lookup) } return nil @@ -466,6 +458,9 @@ func exprRefsTable(e sql.Expression, tableId sql.TableId) bool { // https://www.researchgate.net/publication/221311318_Cost-Based_Query_Transformation_in_Oracle // TODO: need more elegant way to extend the number of groups, interner func convertSemiToInnerJoin(m *memo.Memo) error { + m.Tracer.PushDebugContext("convertSemiToInnerJoin") + defer m.Tracer.PopDebugContext() + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { semi, ok := e.(*memo.SemiJoin) if !ok { @@ -556,6 +551,9 @@ func convertSemiToInnerJoin(m *memo.Memo) error { // convertAntiToLeftJoin adds left join alternatives for anti join // ANTI_JOIN(left, right) => PROJECT(left sch) -> FILTER(right attr IS NULL) -> LEFT_JOIN(left, right) func convertAntiToLeftJoin(m *memo.Memo) error { + m.Tracer.PushDebugContext("convertAntiToLeftJoin") + defer m.Tracer.PopDebugContext() + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { anti, ok := e.(*memo.AntiJoin) if !ok { @@ -660,6 +658,9 @@ func convertAntiToLeftJoin(m *memo.Memo) error { // addRightSemiJoins allows for a reversed semiJoin operator when // the join attributes of the left side are provably unique. func addRightSemiJoins(ctx *sql.Context, m *memo.Memo) error { + m.Tracer.PushDebugContext("addRightSemiJoins") + defer m.Tracer.PopDebugContext() + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { semi, ok := e.(*memo.SemiJoin) if !ok { @@ -781,6 +782,9 @@ func dfsLookupCandidates(rel memo.RelExpr, limitOk bool) (sql.TableId, []*memo.I } func addCrossHashJoins(m *memo.Memo) error { + m.Tracer.PushDebugContext("addCrossHashJoins") + defer m.Tracer.PopDebugContext() + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { switch e.(type) { case *memo.CrossJoin: @@ -817,6 +821,9 @@ func addCrossHashJoins(m *memo.Memo) error { } func addHashJoins(m *memo.Memo) error { + m.Tracer.PushDebugContext("addHashJoins") + defer m.Tracer.PopDebugContext() + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { switch e.(type) { case *memo.InnerJoin, *memo.LeftJoin: @@ -826,42 +833,42 @@ func addHashJoins(m *memo.Memo) error { join := e.(memo.JoinRel).JoinPrivate() if len(join.Filter) == 0 { - m.TraceLog("Skipping hash join for %T - no filters", e) + m.Tracer.Log("Skipping hash join for %T - no filters", []interface{}{e}...) return nil } - m.TraceLog("Considering hash join for %T with %d filters", e, len(join.Filter)) + m.Tracer.Log("Considering hash join for %T with %d filters", []interface{}{e, len(join.Filter)}...) var fromExpr, toExpr []sql.Expression for i, f := range join.Filter { switch f := f.(type) { case *expression.Equals: if satisfiesScalarRefs(f.Left(), join.Left.RelProps.OutputTables()) && - satisfiesScalarRefs(f.Right(), join.Right.RelProps.OutputTables()) { + satisfiesScalarRefs(f.Right(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Right()) toExpr = append(toExpr, f.Left()) - m.TraceLog("Filter %d: left->right hash key mapping", i) + m.Tracer.Log("Filter %d: left->right hash key mapping", []interface{}{i}...) } else if satisfiesScalarRefs(f.Right(), join.Left.RelProps.OutputTables()) && - satisfiesScalarRefs(f.Left(), join.Right.RelProps.OutputTables()) { + satisfiesScalarRefs(f.Left(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Left()) toExpr = append(toExpr, f.Right()) - m.TraceLog("Filter %d: right->left hash key mapping", i) + m.Tracer.Log("Filter %d: right->left hash key mapping", []interface{}{i}...) } else { - m.TraceLog("Filter %d: does not satisfy scalar refs for hash join", i) + m.Tracer.Log("Filter %d: does not satisfy scalar refs for hash join", []interface{}{i}...) return nil } default: - m.TraceLog("Filter %d: not an equality expression, skipping hash join", i) + m.Tracer.Log("Filter %d: not an equality expression, skipping hash join", []interface{}{i}...) return nil } } switch join.Right.First.(type) { case *memo.RecursiveTable: - m.TraceLog("Skipping hash join - right side is recursive table") + m.Tracer.Log("Skipping hash join - right side is recursive table") return nil } - m.TraceLog("Adding hash join with %d key expressions", len(toExpr)) + m.Tracer.Log("Adding hash join with %d key expressions", []interface{}{len(toExpr)}...) m.MemoizeHashJoin(e.Group(), join, toExpr, fromExpr) return nil }) @@ -951,6 +958,9 @@ func getRangeFilters(filters []sql.Expression) (ranges []rangeFilter) { // - SELECT * FROM a JOIN b on a.value BETWEEN b.min AND b.max // - SELECT * FROM a JOIN b on b.min <= a.value AND a.value < b.max func addRangeHeapJoin(m *memo.Memo) error { + m.Tracer.PushDebugContext("addRangeHeapJoin") + defer m.Tracer.PopDebugContext() + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { switch e.(type) { case *memo.InnerJoin, *memo.LeftJoin: @@ -1056,6 +1066,9 @@ func satisfiesScalarRefs(e sql.Expression, tables sql.FastIntSet) bool { // filter. // TODO: sort-merge joins func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { + m.Tracer.PushDebugContext("addMergeJoins") + defer m.Tracer.PopDebugContext() + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { var join *memo.JoinBase switch e := e.(type) { @@ -1063,27 +1076,27 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { join = e.JoinBase case *memo.LeftJoin: join = e.JoinBase - //TODO semijoin, antijoin, fullouterjoin + // TODO semijoin, antijoin, fullouterjoin default: return nil } if len(join.Filter) == 0 { - m.TraceLog("Skipping merge join for %T - no filters", e) + m.Tracer.Log("Skipping merge join for %T - no filters", []interface{}{e}...) return nil } - m.TraceLog("Considering merge join for %T with %d filters", e, len(join.Filter)) + m.Tracer.Log("Considering merge join for %T with %d filters", []interface{}{e, len(join.Filter)}...) leftTabId, lIndexes, lFilters := lookupCandidates(join.Left.First, true) rightTabId, rIndexes, rFilters := lookupCandidates(join.Right.First, true) if leftTabId == 0 || rightTabId == 0 { - m.TraceLog("Skipping merge join - no valid table candidates found") + m.Tracer.Log("Skipping merge join - no valid table candidates found") return nil } - m.TraceLog("Found %d left indexes, %d right indexes for merge join", len(lIndexes), len(rIndexes)) + m.Tracer.Log("Found %d left indexes, %d right indexes for merge join", []interface{}{len(lIndexes), len(rIndexes)}...) leftTab := join.Left.RelProps.TableIdNodes()[0] rightTab := join.Right.RelProps.TableIdNodes()[0] @@ -1099,7 +1112,7 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { r := eq.Right() if !expressionReferencesOneColumn(l) || - !expressionReferencesOneColumn(r) { + !expressionReferencesOneColumn(r) { continue } @@ -1110,10 +1123,10 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { var swap bool if expressionReferencesTable(l, leftTabId) && - expressionReferencesTable(r, rightTabId) { + expressionReferencesTable(r, rightTabId) { } else if expressionReferencesTable(r, leftTabId) && - expressionReferencesTable(l, rightTabId) { + expressionReferencesTable(l, rightTabId) { swap = true l, r = r, l } else { @@ -1143,21 +1156,21 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { if lIndex.Order() == sql.IndexOrderNone { // lookups can be unordered, merge indexes need to // be globally ordered - m.TraceLog("Left index %d: skipping - unordered index", i) + m.Tracer.Log("Left index %d: skipping - unordered index", []interface{}{i}...) continue } matchedEqFilters := matchedFiltersForLeftIndex(lIndex, join.Left.RelProps.FuncDeps().Constants(), eqFilters) - m.TraceLog("Left index %d: matched %d equality filters", i, len(matchedEqFilters)) - + m.Tracer.Log("Left index %d: matched %d equality filters", []interface{}{i, len(matchedEqFilters)}...) + for len(matchedEqFilters) > 0 { for j, rIndex := range rIndexes { if rIndex.Order() == sql.IndexOrderNone { - m.TraceLog("Right index %d: skipping - unordered index", j) + m.Tracer.Log("Right index %d: skipping - unordered index", []interface{}{j}...) continue } if rightIndexMatchesFilters(rIndex, join.Left.RelProps.FuncDeps().Constants(), matchedEqFilters) { - m.TraceLog("Found matching index pair: left[%d] <-> right[%d]", i, j) + m.Tracer.Log("Found matching index pair: left[%d] <-> right[%d]", []interface{}{i, j}...) jb := join.Copy() if d, ok := jb.Left.First.(*memo.Distinct); ok && lIndex.SqlIdx().IsUnique() { jb.Left = d.Child @@ -1200,10 +1213,10 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { return err } if !success { - m.TraceLog("Failed to create index scan for right index %d", j) + m.Tracer.Log("Failed to create index scan for right index %d", []interface{}{j}...) continue } - m.TraceLog("Adding merge join with left index %d, right index %d", i, j) + m.Tracer.Log("Adding merge join with left index %d, right index %d", []interface{}{i, j}...) m.MemoizeMergeJoin(e.Group(), join.Left, join.Right, lIndexScan, rIndexScan, jb.Op.AsMerge(), newFilters, false) } } diff --git a/sql/memo/join_order_builder.go b/sql/memo/join_order_builder.go index 931fe9fa1e..407bb09198 100644 --- a/sql/memo/join_order_builder.go +++ b/sql/memo/join_order_builder.go @@ -166,34 +166,34 @@ func (j *joinOrderBuilder) useFastReorder() bool { } func (j *joinOrderBuilder) ReorderJoin(n sql.Node) { - j.m.TraceLog("Starting join reordering for node: %T", n) + j.m.Tracer.Log("Starting join reordering for node: %T", []interface{}{n}...) j.populateSubgraph(n) - + if j.useFastReorder() { - j.m.TraceLog("Using fast reorder algorithm (large join with %d tables)", len(j.vertices)) + j.m.Tracer.Log("Using fast reorder algorithm (large join with %d tables)", []interface{}{len(j.vertices)}...) if j.buildSingleLookupPlan() { - j.m.TraceLog("Successfully built single lookup plan") + j.m.Tracer.Log("Successfully built single lookup plan", []interface{}{}...) return } - j.m.TraceLog("Failed to build single lookup plan, falling back to exhaustive enumeration") + j.m.Tracer.Log("Failed to build single lookup plan, falling back to exhaustive enumeration", []interface{}{}...) } else if j.hasCrossJoin { - j.m.TraceLog("Join contains cross joins, attempting single lookup plan first") + j.m.Tracer.Log("Join contains cross joins, attempting single lookup plan first", []interface{}{}...) // Rely on FastReorder to avoid plans that drop filters with cross joins if j.buildSingleLookupPlan() { - j.m.TraceLog("Successfully built single lookup plan for cross join") + j.m.Tracer.Log("Successfully built single lookup plan for cross join", []interface{}{}...) return } - j.m.TraceLog("Failed to build single lookup plan for cross join, using exhaustive enumeration") + j.m.Tracer.Log("Failed to build single lookup plan for cross join, using exhaustive enumeration", []interface{}{}...) } - + // TODO: consider if buildSingleLookupPlan can/should run after ensureClosure. This could allow us to use analysis // from ensureClosure in buildSingleLookupPlan, but the equivalence sets could create multiple possible join orders // for the single-lookup plan, which would complicate things. - j.m.TraceLog("Building transitive closure for %d edges", len(j.edges)) + j.m.Tracer.Log("Building transitive closure for %d edges", []interface{}{len(j.edges)}...) j.ensureClosure(j.m.root) - j.m.TraceLog("Starting exhaustive subset enumeration") + j.m.Tracer.Log("Starting exhaustive subset enumeration", []interface{}{}...) j.dpEnumerateSubsets() - j.m.TraceLog("Completed join reordering") + j.m.Tracer.Log("Completed join reordering", []interface{}{}...) return } @@ -487,13 +487,13 @@ func (j *joinOrderBuilder) makeTransitiveEdge(col1, col2 sql.ColumnId) { } func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { - j.m.TraceLog("Building join operation: %s", n.JoinType().String()) + j.m.Tracer.Log("Building join operation: %s", []interface{}{n.JoinType().String()}...) leftV, leftE, _ := j.populateSubgraph(n.Left()) rightV, rightE, _ := j.populateSubgraph(n.Right()) typ := n.JoinType() if typ.IsPhysical() { typ = plan.JoinTypeInner - j.m.TraceLog("Converted physical join type to inner join") + j.m.Tracer.Log("Converted physical join type to inner join", []interface{}{}...) } isInner := typ.IsInner() op := &operator{ @@ -505,7 +505,7 @@ func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { } filters := expression.SplitConjunction(n.JoinCond()) - j.m.TraceLog("Join filters: %v", filters) + j.m.Tracer.Log("Join filters: %v", []interface{}{filters}...) union := leftV.union(rightV) group, ok := j.plans[union] if !ok { @@ -515,14 +515,14 @@ func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { group = j.memoize(op.joinType, left, right, filters, nil) j.plans[union] = group j.m.root = group - j.m.TraceLog("Created new memo group for join combination") + j.m.Tracer.Log("Created new memo group for join combination", []interface{}{}...) } if !isInner { - j.m.TraceLog("Building non-inner edge for join type: %s", typ.String()) + j.m.Tracer.Log("Building non-inner edge for join type: %s", []interface{}{typ.String()}...) j.buildNonInnerEdge(op, filters...) } else { - j.m.TraceLog("Building inner edge for join type: %s", typ.String()) + j.m.Tracer.Log("Building inner edge for join type: %s", []interface{}{typ.String()}...) j.buildInnerEdge(op, filters...) } return group @@ -687,19 +687,19 @@ func setPrinter(all, s1, s2 vertexSet) { // addPlans finds operators that let us join (s1 op s2) and (s2 op s1). func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { - j.m.TraceLog("Considering join between vertex sets s1=%s, s2=%s", s1.String(), s2.String()) - + j.m.Tracer.Log("Considering join between vertex sets s1=%s, s2=%s", []interface{}{s1.String(), s2.String()}...) + // all inner filters could be applied if j.plans[s1] == nil || j.plans[s2] == nil { // Both inputs must have plans. // need this to prevent cross-joins higher in tree - j.m.TraceLog("Skipping join - one or both input plans are nil") + j.m.Tracer.Log("Skipping join - one or both input plans are nil", []interface{}{}...) return } - //TODO collect all inner join filters that can be used as select filters - //TODO collect functional dependencies to avoid redundant filters - //TODO relational nodes track functional dependencies + // TODO collect all inner join filters that can be used as select filters + // TODO collect functional dependencies to avoid redundant filters + // TODO relational nodes track functional dependencies var innerJoinFilters []sql.Expression var addInnerJoin bool @@ -713,7 +713,7 @@ func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { } isRedundant = isRedundant || e.joinIsRedundant(s1, s2) addInnerJoin = true - j.m.TraceLog("Found applicable inner edge %d with filters: %v", i, e.filters) + j.m.Tracer.Log("Found applicable inner edge %d with filters: %v", []interface{}{i, e.filters}...) } } @@ -722,14 +722,14 @@ func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { for i, ok := j.nonInnerEdges.Next(0); ok; i, ok = j.nonInnerEdges.Next(i + 1) { e := &j.edges[i] if e.applicable(s1, s2) { - j.m.TraceLog("Found applicable non-inner edge %d, adding join: %s", i, e.op.joinType.String()) + j.m.Tracer.Log("Found applicable non-inner edge %d, adding join: %s", []interface{}{i, e.op.joinType.String()}...) j.addJoin(e.op.joinType, s1, s2, e.filters, innerJoinFilters, e.joinIsRedundant(s1, s2)) return } if e.applicable(s2, s1) { // This is necessary because we only iterate s1 up to subset / 2 // in DPSube() - j.m.TraceLog("Found applicable non-inner edge %d (swapped), adding join: %s", i, e.op.joinType.String()) + j.m.Tracer.Log("Found applicable non-inner edge %d (swapped), adding join: %s", []interface{}{i, e.op.joinType.String()}...) j.addJoin(e.op.joinType, s2, s1, e.filters, innerJoinFilters, e.joinIsRedundant(s2, s1)) return } @@ -740,14 +740,14 @@ func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { // already been constructed, because doing so can lead to a case where an // inner join replaces a non-inner join. if innerJoinFilters == nil { - j.m.TraceLog("Adding cross join between s1=%s, s2=%s", s1.String(), s2.String()) + j.m.Tracer.Log("Adding cross join between s1=%s, s2=%s", []interface{}{s1.String(), s2.String()}...) j.addJoin(plan.JoinTypeCross, s1, s2, nil, nil, isRedundant) } else { - j.m.TraceLog("Adding inner join between s1=%s, s2=%s with filters: %v", s1.String(), s2.String(), innerJoinFilters) + j.m.Tracer.Log("Adding inner join between s1=%s, s2=%s with filters: %v", []interface{}{s1.String(), s2.String(), innerJoinFilters}...) j.addJoin(plan.JoinTypeInner, s1, s2, innerJoinFilters, nil, isRedundant) } } else { - j.m.TraceLog("No applicable edges found for join between s1=%s, s2=%s", s1.String(), s2.String()) + j.m.Tracer.Log("No applicable edges found for join between s1=%s, s2=%s", []interface{}{s1.String(), s2.String()}...) } } diff --git a/sql/memo/memo.go b/sql/memo/memo.go index bbc477be30..7d2ee5c91c 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -48,7 +48,7 @@ type Memo struct { scopeLen int cnt uint16 Debug bool - Trace bool + Tracer *TraceLogger } func NewMemo(ctx *sql.Context, stats sql.StatsProvider, s *plan.Scope, scopeLen int, cost Coster, qFlags *sql.QueryFlags) *Memo { @@ -61,6 +61,7 @@ func NewMemo(ctx *sql.Context, stats sql.StatsProvider, s *plan.Scope, scopeLen TableProps: newTableProps(), hints: &joinHints{}, QFlags: qFlags, + Tracer: &TraceLogger{}, } } @@ -72,15 +73,8 @@ func (m *Memo) HandleErr(err error) { panic(MemoErr{Err: err}) } -// TraceLog logs a trace message if tracing is enabled -func (m *Memo) TraceLog(msg string, args ...interface{}) { - if m.Trace { - if len(args) > 0 { - m.Ctx.GetLogger().Debugf("TRACE: %s", fmt.Sprintf(msg, args...)) - } else { - m.Ctx.GetLogger().Debugf("TRACE: %s", msg) - } - } +func (m *Memo) EnableTrace(enable bool) { + m.Tracer.traceEnabled = enable } func (m *Memo) Root() *ExprGroup { @@ -411,6 +405,9 @@ func (m *Memo) MemoizeMax1Row(grp, child *ExprGroup) *ExprGroup { // OptimizeRoot finds the implementation for the root expression // that has the lowest cost. func (m *Memo) OptimizeRoot() error { + m.Tracer.PushDebugContext("OptimizeRoot") + defer m.Tracer.PopDebugContext() + err := m.optimizeMemoGroup(m.root) if err != nil { return err @@ -435,7 +432,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { return nil } - m.TraceLog("Optimizing group %d", grp.Id) + m.Tracer.Log("Optimizing group %d", grp.Id) n := grp.First if _, ok := n.(SourceRel); ok { // We should order the search bottom-up so that physical operators @@ -447,12 +444,12 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { grp.HintOk = true grp.Best = grp.First grp.Best.SetDistinct(NoDistinctOp) - m.TraceLog("Group %d: source relation, setting as best plan", grp.Id) + m.Tracer.Log("Group %d: source relation, setting as best plan", grp.Id) return nil } for n != nil { - m.TraceLog("Evaluating plan %T in group %d", n, grp.Id) + m.Tracer.Log("Evaluating plan %T in group %d", n, grp.Id) var cost float64 for _, g := range n.Children() { err := m.optimizeMemoGroup(g) @@ -469,12 +466,12 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { if grp.RelProps.Distinct.IsHash() { if sortedInputs(n) { n.SetDistinct(SortedDistinctOp) - m.TraceLog("Plan %T: using sorted distinct", n) + m.Tracer.Log("Plan %T: using sorted distinct", n) } else { n.SetDistinct(HashDistinctOp) d := &Distinct{Child: grp} - relCost += float64(statsForRel(m.Ctx, d).RowCount()) - m.TraceLog("Plan %T: using hash distinct", n) + relCost += float64(m.statsForRel(m.Ctx, d).RowCount()) + m.Tracer.Log("Plan %T: using hash distinct", n) } } else { n.SetDistinct(NoDistinctOp) @@ -482,7 +479,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { n.SetCost(relCost) cost += relCost - m.TraceLog("Plan %T: relCost=%.2f, totalCost=%.2f", n, relCost, cost) + m.Tracer.Log("Plan %T: relCost=%.2f, totalCost=%.2f", n, relCost, cost) m.updateBest(grp, n, cost) n = n.Next() } @@ -501,19 +498,19 @@ func (m *Memo) updateBest(grp *ExprGroup, n RelExpr, cost float64) { grp.Best = n grp.Cost = cost grp.HintOk = true - m.TraceLog("Set best plan for group %d to hinted plan %T with cost %.2f", grp.Id, n, cost) + m.Tracer.Log("Set best plan for group %d to hinted plan %T with cost %.2f", grp.Id, n, cost) return } grp.updateBest(n, cost) - m.TraceLog("Updated best plan for group %d to hinted plan %T with cost %.2f", grp.Id, n, cost) + m.Tracer.Log("Updated best plan for group %d to hinted plan %T with cost %.2f", grp.Id, n, cost) } else if grp.Best == nil || !grp.HintOk { grp.updateBest(n, cost) - m.TraceLog("Updated best plan for group %d to plan %T with cost %.2f (no hints satisfied)", grp.Id, n, cost) + m.Tracer.Log("Updated best plan for group %d to plan %T with cost %.2f (no hints satisfied)", grp.Id, n, cost) } return } grp.updateBest(n, cost) - m.TraceLog("Updated best plan for group %d to plan %T with cost %.2f", grp.Id, n, cost) + m.Tracer.Log("Updated best plan for group %d to plan %T with cost %.2f", grp.Id, n, cost) } func (m *Memo) BestRootPlan(ctx *sql.Context) (sql.Node, error) { diff --git a/sql/memo/rel_props.go b/sql/memo/rel_props.go index f1cc8d5d17..e0ebc2c0b3 100644 --- a/sql/memo/rel_props.go +++ b/sql/memo/rel_props.go @@ -312,7 +312,7 @@ func (p *relProps) populateFds() { p.fds = fds } -func CardMemoGroups(ctx *sql.Context, g *ExprGroup) { +func (m *Memo) CardMemoGroups(ctx *sql.Context, g *ExprGroup) { // card checking is called after indexScans and lookups joins are generated, // both of which have metadata that makes cardinality estimation more // accurate. @@ -320,13 +320,16 @@ func CardMemoGroups(ctx *sql.Context, g *ExprGroup) { return } for _, g := range g.children() { - CardMemoGroups(ctx, g) + m.CardMemoGroups(ctx, g) } - s := statsForRel(ctx, g.First) + s := m.statsForRel(ctx, g.First) g.RelProps.SetStats(s) } -func statsForRel(ctx *sql.Context, rel RelExpr) sql.Statistic { +func (m *Memo) statsForRel(ctx *sql.Context, rel RelExpr) sql.Statistic { + m.Tracer.PushDebugContext("statsForRel") + defer m.Tracer.PopDebugContext() + var stat sql.Statistic switch rel := rel.(type) { case JoinRel: diff --git a/sql/memo/trace_logger.go b/sql/memo/trace_logger.go new file mode 100755 index 0000000000..c9a54964e1 --- /dev/null +++ b/sql/memo/trace_logger.go @@ -0,0 +1,56 @@ +// Copyright 2025 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package memo + +import ( + "strings" + + "github.com/sirupsen/logrus" +) + +type TraceLogger struct { + // A stack of debugger context. See PushDebugContext, PopDebugContext + contextStack []string + traceEnabled bool +} + +var log = logrus.New() + +// PushDebugContext pushes the given context string onto the context stack, to use when logging debug messages. +func (a *TraceLogger) PushDebugContext(msg string) { + if a != nil && a.traceEnabled { + a.contextStack = append(a.contextStack, msg) + } +} + +// PopDebugContext pops a context message off the context stack. +func (a *TraceLogger) PopDebugContext() { + if a != nil && len(a.contextStack) > 0 { + a.contextStack = a.contextStack[:len(a.contextStack)-1] + } +} + +// Log prints an INFO message to stdout with the given message and args +// if the analyzer is in debug mode. +func (a *TraceLogger) Log(msg string, args ...interface{}) { + if a != nil && a.traceEnabled { + if len(a.contextStack) > 0 { + ctx := strings.Join(a.contextStack, "/") + log.Infof("%s: "+msg, append([]interface{}{ctx}, args...)...) + } else { + log.Infof(msg, args...) + } + } +} \ No newline at end of file From 2eb911c26d1f2bf9ab38e4458679a068ba48fb56 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Fri, 24 Oct 2025 16:10:17 -0700 Subject: [PATCH 03/30] Fixing unnecessary boxing --- sql/analyzer/indexed_joins.go | 72 +++++++++++++++++------------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 52bf047171..f1a9c1c683 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -181,7 +181,7 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco if err != nil { return nil, err } - + err = addLookupJoins(ctx, m) if err != nil { return nil, err @@ -216,7 +216,7 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco // Once we've enumerated all expression groups, we can apply hints. This must be done after expression // groups have been identified, so that the applied hints use the correct metadata. for _, h := range hints { - m.Tracer.Log("Applying hint: %s", []interface{}{h.Typ.String()}...) + m.Tracer.Log("Applying hint: %s", h.Typ.String()) m.ApplyHint(h) } @@ -257,7 +257,7 @@ func mergeJoinsDisabled(hints []memo.Hint) bool { func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { m.Tracer.PushDebugContext("addLookupJoins") defer m.Tracer.PopDebugContext() - + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { var right *memo.ExprGroup var join *memo.JoinBase @@ -281,14 +281,14 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { } if len(join.Filter) == 0 { - m.Tracer.Log("Skipping lookup join for %T - no filters", []interface{}{e}...) + m.Tracer.Log("Skipping lookup join for %T - no filters", e) return nil } - m.Tracer.Log("Considering lookup join for %T with %d filters", []interface{}{e, len(join.Filter)}...) + m.Tracer.Log("Considering lookup join for %T with %d filters", e, len(join.Filter)) tableId, indexes, extraFilters := lookupCandidates(right.First, false) - m.Tracer.Log("Found %d index candidates for lookup join", []interface{}{len(indexes)}...) + m.Tracer.Log("Found %d index candidates for lookup join", len(indexes)) var rt sql.TableNode var aliasName string @@ -304,7 +304,7 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { } aliasName = n.Name() default: - m.Tracer.Log("Skipping lookup join - unsupported table node type: %T", []interface{}{n}...) + m.Tracer.Log("Skipping lookup join - unsupported table node type: %T", n) return nil } @@ -345,14 +345,14 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { for i, idx := range indexes { keyExprs, matchedFilters, nullmask := keyExprsForIndex(tableId, idx.Cols(), append(join.Filter, extraFilters...)) if keyExprs == nil { - m.Tracer.Log("Index %d: no matching key expressions found", []interface{}{i}...) + m.Tracer.Log("Index %d: no matching key expressions found", i) continue } - m.Tracer.Log("Index %d: found %d key expressions, %d matched filters", []interface{}{i, len(keyExprs), len(matchedFilters)}...) + m.Tracer.Log("Index %d: found %d key expressions, %d matched filters", i, len(keyExprs), len(matchedFilters)) ita, err := plan.NewIndexedAccessForTableNode(ctx, rt, plan.NewLookupBuilder(idx.SqlIdx(), keyExprs, nullmask)) if err != nil { - m.Tracer.Log("Index %d: failed to create indexed table access: %v", []interface{}{i, err}...) + m.Tracer.Log("Index %d: failed to create indexed table access: %v", i, err) return err } lookup := &memo.IndexScan{ @@ -374,7 +374,7 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { } } - m.Tracer.Log("Adding lookup join with index %d, %d remaining filters", []interface{}{i, len(filters)}...) + m.Tracer.Log("Adding lookup join with index %d, %d remaining filters", i, len(filters)) m.MemoizeLookupJoin(e.Group(), join.Left, join.Right, join.Op, filters, lookup) } return nil @@ -460,7 +460,7 @@ func exprRefsTable(e sql.Expression, tableId sql.TableId) bool { func convertSemiToInnerJoin(m *memo.Memo) error { m.Tracer.PushDebugContext("convertSemiToInnerJoin") defer m.Tracer.PopDebugContext() - + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { semi, ok := e.(*memo.SemiJoin) if !ok { @@ -553,7 +553,7 @@ func convertSemiToInnerJoin(m *memo.Memo) error { func convertAntiToLeftJoin(m *memo.Memo) error { m.Tracer.PushDebugContext("convertAntiToLeftJoin") defer m.Tracer.PopDebugContext() - + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { anti, ok := e.(*memo.AntiJoin) if !ok { @@ -660,7 +660,7 @@ func convertAntiToLeftJoin(m *memo.Memo) error { func addRightSemiJoins(ctx *sql.Context, m *memo.Memo) error { m.Tracer.PushDebugContext("addRightSemiJoins") defer m.Tracer.PopDebugContext() - + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { semi, ok := e.(*memo.SemiJoin) if !ok { @@ -784,7 +784,7 @@ func dfsLookupCandidates(rel memo.RelExpr, limitOk bool) (sql.TableId, []*memo.I func addCrossHashJoins(m *memo.Memo) error { m.Tracer.PushDebugContext("addCrossHashJoins") defer m.Tracer.PopDebugContext() - + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { switch e.(type) { case *memo.CrossJoin: @@ -823,7 +823,7 @@ func addCrossHashJoins(m *memo.Memo) error { func addHashJoins(m *memo.Memo) error { m.Tracer.PushDebugContext("addHashJoins") defer m.Tracer.PopDebugContext() - + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { switch e.(type) { case *memo.InnerJoin, *memo.LeftJoin: @@ -833,11 +833,11 @@ func addHashJoins(m *memo.Memo) error { join := e.(memo.JoinRel).JoinPrivate() if len(join.Filter) == 0 { - m.Tracer.Log("Skipping hash join for %T - no filters", []interface{}{e}...) + m.Tracer.Log("Skipping hash join for %T - no filters", e) return nil } - m.Tracer.Log("Considering hash join for %T with %d filters", []interface{}{e, len(join.Filter)}...) + m.Tracer.Log("Considering hash join for %T with %d filters", e, len(join.Filter)) var fromExpr, toExpr []sql.Expression for i, f := range join.Filter { @@ -847,18 +847,18 @@ func addHashJoins(m *memo.Memo) error { satisfiesScalarRefs(f.Right(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Right()) toExpr = append(toExpr, f.Left()) - m.Tracer.Log("Filter %d: left->right hash key mapping", []interface{}{i}...) + m.Tracer.Log("Filter %d: left->right hash key mapping", i) } else if satisfiesScalarRefs(f.Right(), join.Left.RelProps.OutputTables()) && satisfiesScalarRefs(f.Left(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Left()) toExpr = append(toExpr, f.Right()) - m.Tracer.Log("Filter %d: right->left hash key mapping", []interface{}{i}...) + m.Tracer.Log("Filter %d: right->left hash key mapping", i) } else { - m.Tracer.Log("Filter %d: does not satisfy scalar refs for hash join", []interface{}{i}...) + m.Tracer.Log("Filter %d: does not satisfy scalar refs for hash join", i) return nil } default: - m.Tracer.Log("Filter %d: not an equality expression, skipping hash join", []interface{}{i}...) + m.Tracer.Log("Filter %d: not an equality expression, skipping hash join", i) return nil } } @@ -868,7 +868,7 @@ func addHashJoins(m *memo.Memo) error { return nil } - m.Tracer.Log("Adding hash join with %d key expressions", []interface{}{len(toExpr)}...) + m.Tracer.Log("Adding hash join with %d key expressions", len(toExpr)) m.MemoizeHashJoin(e.Group(), join, toExpr, fromExpr) return nil }) @@ -960,7 +960,7 @@ func getRangeFilters(filters []sql.Expression) (ranges []rangeFilter) { func addRangeHeapJoin(m *memo.Memo) error { m.Tracer.PushDebugContext("addRangeHeapJoin") defer m.Tracer.PopDebugContext() - + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { switch e.(type) { case *memo.InnerJoin, *memo.LeftJoin: @@ -985,8 +985,8 @@ func addRangeHeapJoin(m *memo.Memo) error { for _, filter := range getRangeFilters(join.Filter) { if !(satisfiesScalarRefs(filter.value, join.Left.RelProps.OutputTables()) && - satisfiesScalarRefs(filter.min, join.Right.RelProps.OutputTables()) && - satisfiesScalarRefs(filter.max, join.Right.RelProps.OutputTables())) { + satisfiesScalarRefs(filter.min, join.Right.RelProps.OutputTables()) && + satisfiesScalarRefs(filter.max, join.Right.RelProps.OutputTables())) { return nil } // For now, only match expressions that are exactly a column reference. @@ -1068,7 +1068,7 @@ func satisfiesScalarRefs(e sql.Expression, tables sql.FastIntSet) bool { func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { m.Tracer.PushDebugContext("addMergeJoins") defer m.Tracer.PopDebugContext() - + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { var join *memo.JoinBase switch e := e.(type) { @@ -1082,11 +1082,11 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { } if len(join.Filter) == 0 { - m.Tracer.Log("Skipping merge join for %T - no filters", []interface{}{e}...) + m.Tracer.Log("Skipping merge join for %T - no filters", e) return nil } - m.Tracer.Log("Considering merge join for %T with %d filters", []interface{}{e, len(join.Filter)}...) + m.Tracer.Log("Considering merge join for %T with %d filters", e, len(join.Filter)) leftTabId, lIndexes, lFilters := lookupCandidates(join.Left.First, true) rightTabId, rIndexes, rFilters := lookupCandidates(join.Right.First, true) @@ -1096,7 +1096,7 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { return nil } - m.Tracer.Log("Found %d left indexes, %d right indexes for merge join", []interface{}{len(lIndexes), len(rIndexes)}...) + m.Tracer.Log("Found %d left indexes, %d right indexes for merge join", len(lIndexes), len(rIndexes)) leftTab := join.Left.RelProps.TableIdNodes()[0] rightTab := join.Right.RelProps.TableIdNodes()[0] @@ -1156,21 +1156,21 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { if lIndex.Order() == sql.IndexOrderNone { // lookups can be unordered, merge indexes need to // be globally ordered - m.Tracer.Log("Left index %d: skipping - unordered index", []interface{}{i}...) + m.Tracer.Log("Left index %d: skipping - unordered index", i) continue } matchedEqFilters := matchedFiltersForLeftIndex(lIndex, join.Left.RelProps.FuncDeps().Constants(), eqFilters) - m.Tracer.Log("Left index %d: matched %d equality filters", []interface{}{i, len(matchedEqFilters)}...) + m.Tracer.Log("Left index %d: matched %d equality filters", i, len(matchedEqFilters)) for len(matchedEqFilters) > 0 { for j, rIndex := range rIndexes { if rIndex.Order() == sql.IndexOrderNone { - m.Tracer.Log("Right index %d: skipping - unordered index", []interface{}{j}...) + m.Tracer.Log("Right index %d: skipping - unordered index", j) continue } if rightIndexMatchesFilters(rIndex, join.Left.RelProps.FuncDeps().Constants(), matchedEqFilters) { - m.Tracer.Log("Found matching index pair: left[%d] <-> right[%d]", []interface{}{i, j}...) + m.Tracer.Log("Found matching index pair: left[%d] <-> right[%d]", i, j) jb := join.Copy() if d, ok := jb.Left.First.(*memo.Distinct); ok && lIndex.SqlIdx().IsUnique() { jb.Left = d.Child @@ -1213,10 +1213,10 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { return err } if !success { - m.Tracer.Log("Failed to create index scan for right index %d", []interface{}{j}...) + m.Tracer.Log("Failed to create index scan for right index %d", j) continue } - m.Tracer.Log("Adding merge join with left index %d, right index %d", []interface{}{i, j}...) + m.Tracer.Log("Adding merge join with left index %d, right index %d", i, j) m.MemoizeMergeJoin(e.Group(), join.Left, join.Right, lIndexScan, rIndexScan, jb.Op.AsMerge(), newFilters, false) } } From 3ce4000713eafaed1e52e6024c0ebc0d8ddcbb5b Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Fri, 24 Oct 2025 16:19:24 -0700 Subject: [PATCH 04/30] better logging --- sql/analyzer/indexed_joins.go | 38 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index f1a9c1c683..0ab19198eb 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -285,8 +285,6 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { return nil } - m.Tracer.Log("Considering lookup join for %T with %d filters", e, len(join.Filter)) - tableId, indexes, extraFilters := lookupCandidates(right.First, false) m.Tracer.Log("Found %d index candidates for lookup join", len(indexes)) @@ -342,17 +340,17 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { return nil } - for i, idx := range indexes { + for _, idx := range indexes { keyExprs, matchedFilters, nullmask := keyExprsForIndex(tableId, idx.Cols(), append(join.Filter, extraFilters...)) if keyExprs == nil { - m.Tracer.Log("Index %d: no matching key expressions found", i) + m.Tracer.Log("Index %s: no matching key expressions found", idx.SqlIdx().ID()) continue } - m.Tracer.Log("Index %d: found %d key expressions, %d matched filters", i, len(keyExprs), len(matchedFilters)) + m.Tracer.Log("Index %s: found %d key expressions, %d matched filters", idx.SqlIdx().ID(), len(keyExprs), len(matchedFilters)) ita, err := plan.NewIndexedAccessForTableNode(ctx, rt, plan.NewLookupBuilder(idx.SqlIdx(), keyExprs, nullmask)) if err != nil { - m.Tracer.Log("Index %d: failed to create indexed table access: %v", i, err) + m.Tracer.Log("Index %s: failed to create indexed table access: %v", idx.SqlIdx().ID(), err) return err } lookup := &memo.IndexScan{ @@ -374,7 +372,7 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { } } - m.Tracer.Log("Adding lookup join with index %d, %d remaining filters", i, len(filters)) + m.Tracer.Log("Adding lookup join with index %s, %d remaining filters", idx.SqlIdx().ID(), len(filters)) m.MemoizeLookupJoin(e.Group(), join.Left, join.Right, join.Op, filters, lookup) } return nil @@ -840,25 +838,25 @@ func addHashJoins(m *memo.Memo) error { m.Tracer.Log("Considering hash join for %T with %d filters", e, len(join.Filter)) var fromExpr, toExpr []sql.Expression - for i, f := range join.Filter { + for _, f := range join.Filter { switch f := f.(type) { case *expression.Equals: if satisfiesScalarRefs(f.Left(), join.Left.RelProps.OutputTables()) && satisfiesScalarRefs(f.Right(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Right()) toExpr = append(toExpr, f.Left()) - m.Tracer.Log("Filter %d: left->right hash key mapping", i) + m.Tracer.Log("Filter %s: left->right hash key mapping", f.String()) } else if satisfiesScalarRefs(f.Right(), join.Left.RelProps.OutputTables()) && satisfiesScalarRefs(f.Left(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Left()) toExpr = append(toExpr, f.Right()) - m.Tracer.Log("Filter %d: right->left hash key mapping", i) + m.Tracer.Log("Filter %s: right->left hash key mapping", f.String()) } else { - m.Tracer.Log("Filter %d: does not satisfy scalar refs for hash join", i) + m.Tracer.Log("Filter %s: does not satisfy scalar refs for hash join", f.String()) return nil } default: - m.Tracer.Log("Filter %d: not an equality expression, skipping hash join", i) + m.Tracer.Log("Filter %s: not an equality expression, skipping hash join", f.String()) return nil } } @@ -1152,25 +1150,25 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { // While matchedFilters is not empty: // Check to see if any rIndexes match that set of filters // Remove the last matched filter - for i, lIndex := range lIndexes { + for _, lIndex := range lIndexes { if lIndex.Order() == sql.IndexOrderNone { // lookups can be unordered, merge indexes need to // be globally ordered - m.Tracer.Log("Left index %d: skipping - unordered index", i) + m.Tracer.Log("Left index %s: skipping - unordered index", lIndex.SqlIdx().ID()) continue } matchedEqFilters := matchedFiltersForLeftIndex(lIndex, join.Left.RelProps.FuncDeps().Constants(), eqFilters) - m.Tracer.Log("Left index %d: matched %d equality filters", i, len(matchedEqFilters)) + m.Tracer.Log("Left index %s: matched %d equality filters", lIndex.SqlIdx().ID(), len(matchedEqFilters)) for len(matchedEqFilters) > 0 { - for j, rIndex := range rIndexes { + for _, rIndex := range rIndexes { if rIndex.Order() == sql.IndexOrderNone { - m.Tracer.Log("Right index %d: skipping - unordered index", j) + m.Tracer.Log("Right index %s: skipping - unordered index", rIndex.SqlIdx().ID()) continue } if rightIndexMatchesFilters(rIndex, join.Left.RelProps.FuncDeps().Constants(), matchedEqFilters) { - m.Tracer.Log("Found matching index pair: left[%d] <-> right[%d]", i, j) + m.Tracer.Log("Found matching index pair: left[%s] <-> right[%s]", lIndex.SqlIdx().ID(), rIndex.SqlIdx().ID()) jb := join.Copy() if d, ok := jb.Left.First.(*memo.Distinct); ok && lIndex.SqlIdx().IsUnique() { jb.Left = d.Child @@ -1213,10 +1211,10 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { return err } if !success { - m.Tracer.Log("Failed to create index scan for right index %d", j) + m.Tracer.Log("Failed to create index scan for right index %s", rIndex.SqlIdx().ID()) continue } - m.Tracer.Log("Adding merge join with left index %d, right index %d", i, j) + m.Tracer.Log("Adding merge join with left index %s, right index %s", lIndex.SqlIdx().ID(), rIndex.SqlIdx().ID()) m.MemoizeMergeJoin(e.Group(), join.Left, join.Right, lIndexScan, rIndexScan, jb.Op.AsMerge(), newFilters, false) } } From 642b73fdf41995b7f47b247316472f4482f3d3ad Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Fri, 24 Oct 2025 16:43:04 -0700 Subject: [PATCH 05/30] removed silly generated formatting code, now artisanally crafted by skilled crafstmen --- optgen/cmd/support/memo_gen.go | 58 -------------------------- sql/memo/memo.go | 75 +++++++++++++++++++++++++++++++--- sql/memo/memo.og.go | 64 ----------------------------- 3 files changed, 70 insertions(+), 127 deletions(-) diff --git a/optgen/cmd/support/memo_gen.go b/optgen/cmd/support/memo_gen.go index 1015ccf46c..f3b0ff802a 100644 --- a/optgen/cmd/support/memo_gen.go +++ b/optgen/cmd/support/memo_gen.go @@ -70,8 +70,6 @@ func (g *MemoGen) Generate(defines GenDefs, w io.Writer) { g.genChildlessGroupInterface(define) } } - g.genFormatters(g.defines) - } func (g *MemoGen) genImport() { @@ -195,59 +193,3 @@ func (g *MemoGen) genUnaryGroupInterface(define ExprDef) { fmt.Fprintf(g.w, "}\n\n") } - -func (g *MemoGen) genFormatters(defines []ExprDef) { - // printer - fmt.Fprintf(g.w, "func FormatExpr(r exprType) string {\n") - fmt.Fprintf(g.w, " switch r := r.(type) {\n") - for _, d := range defines { - loweredName := strings.ToLower(d.Name) - fmt.Fprintf(g.w, " case *%s:\n", d.Name) - if loweredName == "indexscan" { - fmt.Fprintf(g.w, " if r.Alias != \"\" {\n") - fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%s\", r.Alias)\n", loweredName) - fmt.Fprintf(g.w, " }\n") - } - if d.SourceType != "" { - fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%s\", r.Name())\n", loweredName) - } else if d.Join || d.Binary { - fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s %%d %%d\", r.Left.Id, r.Right.Id)\n", loweredName) - } else if d.Unary { - fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%d\", r.Child.Id)\n", loweredName) - } else { - panic("unreachable") - } - } - fmt.Fprintf(g.w, " default:\n") - fmt.Fprintf(g.w, " panic(fmt.Sprintf(\"unknown RelExpr type: %%T\", r))\n") - fmt.Fprintf(g.w, " }\n") - fmt.Fprintf(g.w, "}\n\n") - - // to sqlNode - fmt.Fprintf(g.w, "func buildRelExpr(b *ExecBuilder, r RelExpr, children ...sql.Node) (sql.Node, error) {\n") - fmt.Fprintf(g.w, " var result sql.Node\n") - fmt.Fprintf(g.w, " var err error\n\n") - fmt.Fprintf(g.w, " switch r := r.(type) {\n") - for _, d := range defines { - if d.SkipExec { - continue - } - fmt.Fprintf(g.w, " case *%s:\n", d.Name) - fmt.Fprintf(g.w, " result, err = b.build%s(r, children...)\n", strings.Title(d.Name)) - } - fmt.Fprintf(g.w, " default:\n") - fmt.Fprintf(g.w, " panic(fmt.Sprintf(\"unknown RelExpr type: %%T\", r))\n") - fmt.Fprintf(g.w, " }\n\n") - fmt.Fprintf(g.w, " if err != nil {\n") - fmt.Fprintf(g.w, " return nil, err\n") - fmt.Fprintf(g.w, " }\n\n") - fmt.Fprintf(g.w, "if withDescribeStats, ok := result.(sql.WithDescribeStats); ok {\n") - fmt.Fprintf(g.w, " withDescribeStats.SetDescribeStats(*DescribeStats(r))\n") - fmt.Fprintf(g.w, "}\n") - fmt.Fprintf(g.w, " result, err = r.Group().finalize(result)\n") - fmt.Fprintf(g.w, " if err != nil {\n") - fmt.Fprintf(g.w, " return nil, err\n") - fmt.Fprintf(g.w, " }\n") - fmt.Fprintf(g.w, " return result, nil\n") - fmt.Fprintf(g.w, "}\n\n") -} diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 7d2ee5c91c..451ecd0af6 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -407,7 +407,7 @@ func (m *Memo) MemoizeMax1Row(grp, child *ExprGroup) *ExprGroup { func (m *Memo) OptimizeRoot() error { m.Tracer.PushDebugContext("OptimizeRoot") defer m.Tracer.PopDebugContext() - + err := m.optimizeMemoGroup(m.root) if err != nil { return err @@ -432,7 +432,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { return nil } - m.Tracer.Log("Optimizing group %d", grp.Id) + m.Tracer.Log("Optimizing group %d (%s)", grp.Id, grp.String()) n := grp.First if _, ok := n.(SourceRel); ok { // We should order the search bottom-up so that physical operators @@ -449,7 +449,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { } for n != nil { - m.Tracer.Log("Evaluating plan %T in group %d", n, grp.Id) + m.Tracer.Log("Evaluating plan %s in group %d", n.String(), grp.Id) var cost float64 for _, g := range n.Children() { err := m.optimizeMemoGroup(g) @@ -466,12 +466,12 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { if grp.RelProps.Distinct.IsHash() { if sortedInputs(n) { n.SetDistinct(SortedDistinctOp) - m.Tracer.Log("Plan %T: using sorted distinct", n) + m.Tracer.Log("Plan %s: using sorted distinct", n.String()) } else { n.SetDistinct(HashDistinctOp) d := &Distinct{Child: grp} relCost += float64(m.statsForRel(m.Ctx, d).RowCount()) - m.Tracer.Log("Plan %T: using hash distinct", n) + m.Tracer.Log("Plan %s: using hash distinct", n.String()) } } else { n.SetDistinct(NoDistinctOp) @@ -897,3 +897,68 @@ type RangeHeap struct { RangeClosedOnLowerBound bool RangeClosedOnUpperBound bool } + +// FormatExpr returns a string representation of a relExpr for debugging purposes. +func FormatExpr(r exprType) string { + switch r := r.(type) { + case *CrossJoin: + return fmt.Sprintf("crossjoin %d %d", r.Left.Id, r.Right.Id) + case *InnerJoin: + return fmt.Sprintf("innerjoin %d %d", r.Left.Id, r.Right.Id) + case *LeftJoin: + return fmt.Sprintf("leftjoin %d %d", r.Left.Id, r.Right.Id) + case *SemiJoin: + return fmt.Sprintf("semijoin %d %d", r.Left.Id, r.Right.Id) + case *AntiJoin: + return fmt.Sprintf("antijoin %d %d", r.Left.Id, r.Right.Id) + case *LookupJoin: + return fmt.Sprintf("lookupjoin %d %d", r.Left.Id, r.Right.Id) + case *RangeHeapJoin: + return fmt.Sprintf("rangeheapjoin %d %d", r.Left.Id, r.Right.Id) + case *ConcatJoin: + return fmt.Sprintf("concatjoin %d %d", r.Left.Id, r.Right.Id) + case *HashJoin: + return fmt.Sprintf("hashjoin %d %d", r.Left.Id, r.Right.Id) + case *MergeJoin: + return fmt.Sprintf("mergejoin %d %d", r.Left.Id, r.Right.Id) + case *FullOuterJoin: + return fmt.Sprintf("fullouterjoin %d %d", r.Left.Id, r.Right.Id) + case *LateralJoin: + return fmt.Sprintf("lateraljoin %d %d", r.Left.Id, r.Right.Id) + case *TableScan: + return fmt.Sprintf("tablescan: %s", r.Name()) + case *IndexScan: + if r.Alias != "" { + return fmt.Sprintf("indexscan: %s", r.Alias) + } + return fmt.Sprintf("indexscan: %s", r.Name()) + case *Values: + return fmt.Sprintf("values: %s", r.Name()) + case *TableAlias: + return fmt.Sprintf("tablealias: %s", r.Name()) + case *RecursiveTable: + return fmt.Sprintf("recursivetable: %s", r.Name()) + case *RecursiveCte: + return fmt.Sprintf("recursivecte: %s", r.Name()) + case *SubqueryAlias: + return fmt.Sprintf("subqueryalias: %s", r.Name()) + case *TableFunc: + return fmt.Sprintf("tablefunc: %s", r.Name()) + case *JSONTable: + return fmt.Sprintf("jsontable: %s", r.Name()) + case *EmptyTable: + return fmt.Sprintf("emptytable: %s", r.Name()) + case *SetOp: + return fmt.Sprintf("setop: %s", r.Name()) + case *Project: + return fmt.Sprintf("project: %d", r.Child.Id) + case *Distinct: + return fmt.Sprintf("distinct: %d", r.Child.Id) + case *Max1Row: + return fmt.Sprintf("max1row: %d", r.Child.Id) + case *Filter: + return fmt.Sprintf("filter: %d", r.Child.Id) + default: + panic(fmt.Sprintf("unknown RelExpr type: %T", r)) + } +} diff --git a/sql/memo/memo.og.go b/sql/memo/memo.og.go index 09d52be0f9..e3c250e33b 100644 --- a/sql/memo/memo.og.go +++ b/sql/memo/memo.og.go @@ -634,70 +634,6 @@ func (r *Filter) outputCols() sql.ColSet { return r.Child.RelProps.OutputCols() } -func FormatExpr(r exprType) string { - switch r := r.(type) { - case *CrossJoin: - return fmt.Sprintf("crossjoin %d %d", r.Left.Id, r.Right.Id) - case *InnerJoin: - return fmt.Sprintf("innerjoin %d %d", r.Left.Id, r.Right.Id) - case *LeftJoin: - return fmt.Sprintf("leftjoin %d %d", r.Left.Id, r.Right.Id) - case *SemiJoin: - return fmt.Sprintf("semijoin %d %d", r.Left.Id, r.Right.Id) - case *AntiJoin: - return fmt.Sprintf("antijoin %d %d", r.Left.Id, r.Right.Id) - case *LookupJoin: - return fmt.Sprintf("lookupjoin %d %d", r.Left.Id, r.Right.Id) - case *RangeHeapJoin: - return fmt.Sprintf("rangeheapjoin %d %d", r.Left.Id, r.Right.Id) - case *ConcatJoin: - return fmt.Sprintf("concatjoin %d %d", r.Left.Id, r.Right.Id) - case *HashJoin: - return fmt.Sprintf("hashjoin %d %d", r.Left.Id, r.Right.Id) - case *MergeJoin: - return fmt.Sprintf("mergejoin %d %d", r.Left.Id, r.Right.Id) - case *FullOuterJoin: - return fmt.Sprintf("fullouterjoin %d %d", r.Left.Id, r.Right.Id) - case *LateralJoin: - return fmt.Sprintf("lateraljoin %d %d", r.Left.Id, r.Right.Id) - case *TableScan: - return fmt.Sprintf("tablescan: %s", r.Name()) - case *IndexScan: - if r.Alias != "" { - return fmt.Sprintf("indexscan: %s", r.Alias) - } - return fmt.Sprintf("indexscan: %s", r.Name()) - case *Values: - return fmt.Sprintf("values: %s", r.Name()) - case *TableAlias: - return fmt.Sprintf("tablealias: %s", r.Name()) - case *RecursiveTable: - return fmt.Sprintf("recursivetable: %s", r.Name()) - case *RecursiveCte: - return fmt.Sprintf("recursivecte: %s", r.Name()) - case *SubqueryAlias: - return fmt.Sprintf("subqueryalias: %s", r.Name()) - case *TableFunc: - return fmt.Sprintf("tablefunc: %s", r.Name()) - case *JSONTable: - return fmt.Sprintf("jsontable: %s", r.Name()) - case *EmptyTable: - return fmt.Sprintf("emptytable: %s", r.Name()) - case *SetOp: - return fmt.Sprintf("setop: %s", r.Name()) - case *Project: - return fmt.Sprintf("project: %d", r.Child.Id) - case *Distinct: - return fmt.Sprintf("distinct: %d", r.Child.Id) - case *Max1Row: - return fmt.Sprintf("max1row: %d", r.Child.Id) - case *Filter: - return fmt.Sprintf("filter: %d", r.Child.Id) - default: - panic(fmt.Sprintf("unknown RelExpr type: %T", r)) - } -} - func buildRelExpr(b *ExecBuilder, r RelExpr, children ...sql.Node) (sql.Node, error) { var result sql.Node var err error From 417089aabfd17a71b0b6d15f139f35554a639144 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Fri, 24 Oct 2025 16:45:38 -0700 Subject: [PATCH 06/30] Better debug string for index lookups --- sql/memo/memo.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 451ecd0af6..85fa479a8b 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -912,7 +912,7 @@ func FormatExpr(r exprType) string { case *AntiJoin: return fmt.Sprintf("antijoin %d %d", r.Left.Id, r.Right.Id) case *LookupJoin: - return fmt.Sprintf("lookupjoin %d %d", r.Left.Id, r.Right.Id) + return fmt.Sprintf("lookupjoin %d %d on %s", r.Left.Id, r.Right.Id, r.Lookup.Index.idx.ID()) case *RangeHeapJoin: return fmt.Sprintf("rangeheapjoin %d %d", r.Left.Id, r.Right.Id) case *ConcatJoin: @@ -929,9 +929,9 @@ func FormatExpr(r exprType) string { return fmt.Sprintf("tablescan: %s", r.Name()) case *IndexScan: if r.Alias != "" { - return fmt.Sprintf("indexscan: %s", r.Alias) + return fmt.Sprintf("indexscan on %s: %s", r.Index.SqlIdx().ID(), r.Alias) } - return fmt.Sprintf("indexscan: %s", r.Name()) + return fmt.Sprintf("indexscan on %s: %s", r.Index.SqlIdx().ID(), r.Name()) case *Values: return fmt.Sprintf("values: %s", r.Name()) case *TableAlias: From 9e17536b016fb3d5ad69d15626828546c0bc71fc Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Fri, 24 Oct 2025 16:47:42 -0700 Subject: [PATCH 07/30] more logging --- sql/memo/memo.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 85fa479a8b..95ef83195a 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -498,19 +498,19 @@ func (m *Memo) updateBest(grp *ExprGroup, n RelExpr, cost float64) { grp.Best = n grp.Cost = cost grp.HintOk = true - m.Tracer.Log("Set best plan for group %d to hinted plan %T with cost %.2f", grp.Id, n, cost) + m.Tracer.Log("Set best plan for group %d to hinted plan %s with cost %.2f", grp.Id, n.String(), cost) return } grp.updateBest(n, cost) - m.Tracer.Log("Updated best plan for group %d to hinted plan %T with cost %.2f", grp.Id, n, cost) + m.Tracer.Log("Updated best plan for group %d to hinted plan %s with cost %.2f", grp.Id, n.String(), cost) } else if grp.Best == nil || !grp.HintOk { grp.updateBest(n, cost) - m.Tracer.Log("Updated best plan for group %d to plan %T with cost %.2f (no hints satisfied)", grp.Id, n, cost) + m.Tracer.Log("Updated best plan for group %d to plan %s with cost %.2f (no hints satisfied)", grp.Id, n.String(), cost) } return } grp.updateBest(n, cost) - m.Tracer.Log("Updated best plan for group %d to plan %T with cost %.2f", grp.Id, n, cost) + m.Tracer.Log("Updated best plan for group %d to plan %s with cost %.2f", grp.Id, n.String(), cost) } func (m *Memo) BestRootPlan(ctx *sql.Context) (sql.Node, error) { From 7e5f816834b4a24358e3107118868b678db95ac1 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Fri, 24 Oct 2025 17:02:57 -0700 Subject: [PATCH 08/30] Fixed bad comment, more logging --- sql/memo/expr_group.go | 9 +++++---- sql/memo/memo.go | 10 ++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index 59bf1d5cd4..a8bd412268 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -77,14 +77,15 @@ func (e *ExprGroup) children() []*ExprGroup { return children } -// updateBest updates a group's Best to the given expression or a hinted -// operator if the hinted plan is not found. Join operator is applied as -// a local rather than global property. -func (e *ExprGroup) updateBest(n RelExpr, grpCost float64) { +// updateBest updates a group's Best to the given expression if the cost is lower than the current best. +// Returns whether the best plan was updated. +func (e *ExprGroup) updateBest(n RelExpr, grpCost float64) bool { if e.Best == nil || grpCost < e.Cost { e.Best = n e.Cost = grpCost + return true } + return false } func (e *ExprGroup) finalize(node sql.Node) (sql.Node, error) { diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 95ef83195a..320407fee0 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -501,11 +501,13 @@ func (m *Memo) updateBest(grp *ExprGroup, n RelExpr, cost float64) { m.Tracer.Log("Set best plan for group %d to hinted plan %s with cost %.2f", grp.Id, n.String(), cost) return } - grp.updateBest(n, cost) - m.Tracer.Log("Updated best plan for group %d to hinted plan %s with cost %.2f", grp.Id, n.String(), cost) + if grp.updateBest(n, cost) { + m.Tracer.Log("Updated best plan for group %d to hinted plan %s with cost %.2f", grp.Id, n.String(), cost) + } } else if grp.Best == nil || !grp.HintOk { - grp.updateBest(n, cost) - m.Tracer.Log("Updated best plan for group %d to plan %s with cost %.2f (no hints satisfied)", grp.Id, n.String(), cost) + if grp.updateBest(n, cost) { + m.Tracer.Log("Updated best plan for group %d to plan %s with cost %.2f (no hints satisfied)", grp.Id, n.String(), cost) + } } return } From 8c9193e84b6076727c4f14dc43ebe2c8f2a9eda7 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Fri, 24 Oct 2025 17:04:07 -0700 Subject: [PATCH 09/30] Better logging again --- sql/memo/memo.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 320407fee0..2ce375915a 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -511,8 +511,9 @@ func (m *Memo) updateBest(grp *ExprGroup, n RelExpr, cost float64) { } return } - grp.updateBest(n, cost) - m.Tracer.Log("Updated best plan for group %d to plan %s with cost %.2f", grp.Id, n.String(), cost) + if grp.updateBest(n, cost) { + m.Tracer.Log("Updated best plan for group %d to plan %s with cost %.2f", grp.Id, n.String(), cost) + } } func (m *Memo) BestRootPlan(ctx *sql.Context) (sql.Node, error) { From 0e44b59ac60ecc153a4fd7a62b2626776cb90f55 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Mon, 27 Oct 2025 13:07:22 -0700 Subject: [PATCH 10/30] more logging improvements --- sql/analyzer/indexed_joins.go | 8 ++++---- sql/memo/memo.go | 8 ++++++++ sql/memo/trace_logger.go | 7 ++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 0ab19198eb..81ccd3f124 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -220,7 +220,7 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco m.ApplyHint(h) } - m.Tracer.Log("Starting cost-based optimization") + m.Tracer.Log("Starting cost-based optimization for groups %s", m) err = m.OptimizeRoot() if err != nil { return nil, err @@ -835,7 +835,7 @@ func addHashJoins(m *memo.Memo) error { return nil } - m.Tracer.Log("Considering hash join for %T with %d filters", e, len(join.Filter)) + m.Tracer.Log("Considering hash join with %d filters", len(join.Filter)) var fromExpr, toExpr []sql.Expression for _, f := range join.Filter { @@ -845,12 +845,12 @@ func addHashJoins(m *memo.Memo) error { satisfiesScalarRefs(f.Right(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Right()) toExpr = append(toExpr, f.Left()) - m.Tracer.Log("Filter %s: left->right hash key mapping", f.String()) + m.Tracer.Log("Filter %s: found a left->right hash key mapping", f.String()) } else if satisfiesScalarRefs(f.Right(), join.Left.RelProps.OutputTables()) && satisfiesScalarRefs(f.Left(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Left()) toExpr = append(toExpr, f.Right()) - m.Tracer.Log("Filter %s: right->left hash key mapping", f.String()) + m.Tracer.Log("Filter %s: found a right->left hash key mapping", f.String()) } else { m.Tracer.Log("Filter %s: does not satisfy scalar refs for hash join", f.String()) return nil diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 2ce375915a..dde13f4cfa 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -607,6 +607,10 @@ func (m *Memo) SetJoinOp(op HintType, left, right string) { m.hints.ops = append(m.hints.ops, hint) } +var _ fmt.Stringer = (*Memo)(nil) + +// var _ fmt.Formatter = (*Memo)(nil) + func (m *Memo) String() string { exprs := make([]string, m.cnt) groups := make([]*ExprGroup, 0) @@ -641,6 +645,10 @@ func (m *Memo) String() string { return b.String() } +// func (m *Memo) Format(f fmt.State, verb rune) { +// return m.String() +// } + type tableProps struct { grpToName map[GroupId]string nameToGrp map[string]GroupId diff --git a/sql/memo/trace_logger.go b/sql/memo/trace_logger.go index c9a54964e1..b553d86c67 100755 --- a/sql/memo/trace_logger.go +++ b/sql/memo/trace_logger.go @@ -15,6 +15,7 @@ package memo import ( + "fmt" "strings" "github.com/sirupsen/logrus" @@ -48,9 +49,9 @@ func (a *TraceLogger) Log(msg string, args ...interface{}) { if a != nil && a.traceEnabled { if len(a.contextStack) > 0 { ctx := strings.Join(a.contextStack, "/") - log.Infof("%s: "+msg, append([]interface{}{ctx}, args...)...) + fmt.Printf("%s: "+msg+"\n", append([]interface{}{ctx}, args...)...) } else { - log.Infof(msg, args...) + fmt.Printf(msg+"\n", args...) } } -} \ No newline at end of file +} From 17dd5290434d8689e66ae69489cce27a240ceae4 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Mon, 27 Oct 2025 14:16:57 -0700 Subject: [PATCH 11/30] more formatting --- optgen/cmd/support/memo_gen.go | 123 +++++++------- sql/analyzer/indexed_joins.go | 3 + sql/memo/expr_group.go | 2 +- sql/memo/memo.go | 92 +++++++---- sql/memo/memo.og.go | 293 +++++++++++++++++++++------------ 5 files changed, 318 insertions(+), 195 deletions(-) diff --git a/optgen/cmd/support/memo_gen.go b/optgen/cmd/support/memo_gen.go index f3b0ff802a..cd4c8be67d 100644 --- a/optgen/cmd/support/memo_gen.go +++ b/optgen/cmd/support/memo_gen.go @@ -42,32 +42,33 @@ func DecodeMemoExprs(path string) (MemoExprs, error) { var _ GenDefs = (*MemoExprs)(nil) type MemoGen struct { - defines []ExprDef - w io.Writer + defns []ExprDef + w io.Writer } -func (g *MemoGen) Generate(defines GenDefs, w io.Writer) { - g.defines = defines.(MemoExprs).Exprs +func (g *MemoGen) Generate(defns GenDefs, w io.Writer) { + g.defns = defns.(MemoExprs).Exprs g.w = w g.genImport() - for _, define := range g.defines { - g.genType(define) - g.genRelInterfaces(define) - - g.genStringer(define) - if define.SourceType != "" { - g.genSourceRelInterface(define) + for _, defn := range g.defns { + g.genType(defn) + g.genRelInterfaces(defn) + + g.genStringer(defn) + g.genFormatter(defn) + if defn.SourceType != "" { + g.genSourceRelInterface(defn) } - if define.Join { - g.genJoinRelInterface(define) - } else if define.Binary { - g.genBinaryGroupInterface(define) - } else if define.Unary { - g.genUnaryGroupInterface(define) + if defn.Join { + g.genJoinRelInterface(defn) + } else if defn.Binary { + g.genBinaryGroupInterface(defn) + } else if defn.Unary { + g.genUnaryGroupInterface(defn) } else { - g.genChildlessGroupInterface(define) + g.genChildlessGroupInterface(defn) } } } @@ -81,108 +82,116 @@ func (g *MemoGen) genImport() { fmt.Fprintf(g.w, ")\n\n") } -func (g *MemoGen) genType(define ExprDef) { - fmt.Fprintf(g.w, "type %s struct {\n", strings.Title(define.Name)) - if define.SourceType != "" { +func (g *MemoGen) genType(defn ExprDef) { + fmt.Fprintf(g.w, "type %s struct {\n", strings.Title(defn.Name)) + if defn.SourceType != "" { fmt.Fprintf(g.w, " *sourceBase\n") - fmt.Fprintf(g.w, " Table %s\n", define.SourceType) - } else if define.Join { + fmt.Fprintf(g.w, " Table %s\n", defn.SourceType) + } else if defn.Join { fmt.Fprintf(g.w, " *JoinBase\n") - } else if define.Unary { + } else if defn.Unary { fmt.Fprintf(g.w, " *relBase\n") fmt.Fprintf(g.w, " Child *ExprGroup\n") - } else if define.Binary { + } else if defn.Binary { fmt.Fprintf(g.w, " *relBase\n") fmt.Fprintf(g.w, " Left *ExprGroup\n") fmt.Fprintf(g.w, " Right *ExprGroup\n") } - for _, attr := range define.Attrs { + for _, attr := range defn.Attrs { fmt.Fprintf(g.w, " %s %s\n", strings.Title(attr[0]), attr[1]) } fmt.Fprintf(g.w, "}\n\n") } -func (g *MemoGen) genRelInterfaces(define ExprDef) { - fmt.Fprintf(g.w, "var _ RelExpr = (*%s)(nil)\n", define.Name) - if define.SourceType != "" { - fmt.Fprintf(g.w, "var _ SourceRel = (*%s)(nil)\n", define.Name) - } else if define.Join { - fmt.Fprintf(g.w, "var _ JoinRel = (*%s)(nil)\n", define.Name) - } else if define.Unary || define.Binary { +func (g *MemoGen) genRelInterfaces(defn ExprDef) { + fmt.Fprintf(g.w, "var _ RelExpr = (*%s)(nil)\n", defn.Name) + fmt.Fprintf(g.w, "var _ fmt.Formatter = (*%s)(nil)\n", defn.Name) + fmt.Fprintf(g.w, "var _ fmt.Stringer = (*%s)(nil)\n", defn.Name) + if defn.SourceType != "" { + fmt.Fprintf(g.w, "var _ SourceRel = (*%s)(nil)\n", defn.Name) + } else if defn.Join { + fmt.Fprintf(g.w, "var _ JoinRel = (*%s)(nil)\n", defn.Name) + } else if defn.Unary || defn.Binary { } else { panic("unreachable") } fmt.Fprintf(g.w, "\n") } -func (g *MemoGen) genScalarInterfaces(define ExprDef) { - fmt.Fprintf(g.w, "var _ ScalarExpr = (*%s)(nil)\n", define.Name) +func (g *MemoGen) genScalarInterfaces(defn ExprDef) { + fmt.Fprintf(g.w, "var _ ScalarExpr = (*%s)(nil)\n", defn.Name) fmt.Fprintf(g.w, "\n") - fmt.Fprintf(g.w, "func (r *%s) ExprId() ScalarExprId {\n", define.Name) - fmt.Fprintf(g.w, " return ScalarExpr%s\n", strings.Title(define.Name)) + fmt.Fprintf(g.w, "func (r *%s) ExprId() ScalarExprId {\n", defn.Name) + fmt.Fprintf(g.w, " return ScalarExpr%s\n", strings.Title(defn.Name)) + fmt.Fprintf(g.w, "}\n\n") +} + +func (g *MemoGen) genStringer(defn ExprDef) { + fmt.Fprintf(g.w, "func (r *%s) String() string {\n", defn.Name) + fmt.Fprint(g.w, " return fmt.Sprintf(\"%s\", r)\n") fmt.Fprintf(g.w, "}\n\n") } -func (g *MemoGen) genStringer(define ExprDef) { - fmt.Fprintf(g.w, "func (r *%s) String() string {\n", define.Name) - fmt.Fprintf(g.w, " return FormatExpr(r)\n") +func (g *MemoGen) genFormatter(defn ExprDef) { + fmt.Fprintf(g.w, "func (r *%s) Format(s fmt.State, verb rune) {\n", defn.Name) + fmt.Fprintf(g.w, " FormatExpr(r, s, verb)\n") fmt.Fprintf(g.w, "}\n\n") } -func (g *MemoGen) genSourceRelInterface(define ExprDef) { - fmt.Fprintf(g.w, "func (r *%s) Name() string {\n", define.Name) - if !define.SkipName { +func (g *MemoGen) genSourceRelInterface(defn ExprDef) { + fmt.Fprintf(g.w, "func (r *%s) Name() string {\n", defn.Name) + if !defn.SkipName { fmt.Fprintf(g.w, " return strings.ToLower(r.Table.Name())\n") } else { fmt.Fprintf(g.w, " return \"\"\n") } fmt.Fprintf(g.w, "}\n\n") - fmt.Fprintf(g.w, "func (r *%s) TableId() sql.TableId {\n", define.Name) + fmt.Fprintf(g.w, "func (r *%s) TableId() sql.TableId {\n", defn.Name) fmt.Fprintf(g.w, " return TableIdForSource(r.g.Id)\n") fmt.Fprintf(g.w, "}\n\n") - fmt.Fprintf(g.w, "func (r *%s) TableIdNode() plan.TableIdNode {\n", define.Name) - if define.SkipTableId { + fmt.Fprintf(g.w, "func (r *%s) TableIdNode() plan.TableIdNode {\n", defn.Name) + if defn.SkipTableId { fmt.Fprintf(g.w, " return nil\n") } else { fmt.Fprintf(g.w, " return r.Table\n") } fmt.Fprintf(g.w, "}\n\n") - fmt.Fprintf(g.w, "func (r *%s) OutputCols() sql.Schema {\n", define.Name) + fmt.Fprintf(g.w, "func (r *%s) OutputCols() sql.Schema {\n", defn.Name) fmt.Fprintf(g.w, " return r.Table.Schema()\n") fmt.Fprintf(g.w, "}\n\n") } -func (g *MemoGen) genJoinRelInterface(define ExprDef) { - fmt.Fprintf(g.w, "func (r *%s) JoinPrivate() *JoinBase {\n", define.Name) +func (g *MemoGen) genJoinRelInterface(defn ExprDef) { + fmt.Fprintf(g.w, "func (r *%s) JoinPrivate() *JoinBase {\n", defn.Name) fmt.Fprintf(g.w, " return r.JoinBase\n") fmt.Fprintf(g.w, "}\n\n") } -func (g *MemoGen) genBinaryGroupInterface(define ExprDef) { - fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", define.Name) +func (g *MemoGen) genBinaryGroupInterface(defn ExprDef) { + fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", defn.Name) fmt.Fprintf(g.w, " return []*ExprGroup{r.Left, r.Right}\n") fmt.Fprintf(g.w, "}\n\n") } -func (g *MemoGen) genChildlessGroupInterface(define ExprDef) { - fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", define.Name) +func (g *MemoGen) genChildlessGroupInterface(defn ExprDef) { + fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", defn.Name) fmt.Fprintf(g.w, " return nil\n") fmt.Fprintf(g.w, "}\n\n") } -func (g *MemoGen) genUnaryGroupInterface(define ExprDef) { - fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", define.Name) +func (g *MemoGen) genUnaryGroupInterface(defn ExprDef) { + fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", defn.Name) fmt.Fprintf(g.w, " return []*ExprGroup{r.Child}\n") fmt.Fprintf(g.w, "}\n\n") - fmt.Fprintf(g.w, "func (r *%s) outputCols() sql.ColSet {\n", define.Name) - switch define.Name { + fmt.Fprintf(g.w, "func (r *%s) outputCols() sql.ColSet {\n", defn.Name) + switch defn.Name { case "Project": fmt.Fprintf(g.w, " return getProjectColset(r)\n") diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 81ccd3f124..48b29f19ec 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -259,6 +259,9 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { + m.Tracer.PushDebugContext(e.String()) + defer m.Tracer.PopDebugContext() + var right *memo.ExprGroup var join *memo.JoinBase diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index a8bd412268..188879edc0 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -169,7 +169,7 @@ func (e *ExprGroup) String() string { sep := "" for n != nil { b.WriteString(sep) - b.WriteString(fmt.Sprintf("(%s", FormatExpr(n))) + b.WriteString(fmt.Sprintf("(%s", n)) if e.Best != nil { cost := n.Cost() if cost == 0 { diff --git a/sql/memo/memo.go b/sql/memo/memo.go index dde13f4cfa..459b26a9ac 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -16,6 +16,7 @@ package memo import ( "fmt" + "io" "strings" "github.com/dolthub/go-mysql-server/sql" @@ -449,7 +450,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { } for n != nil { - m.Tracer.Log("Evaluating plan %s in group %d", n.String(), grp.Id) + m.Tracer.Log("Evaluating plan %s in group %d", n, grp.Id) var cost float64 for _, g := range n.Children() { err := m.optimizeMemoGroup(g) @@ -466,12 +467,12 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { if grp.RelProps.Distinct.IsHash() { if sortedInputs(n) { n.SetDistinct(SortedDistinctOp) - m.Tracer.Log("Plan %s: using sorted distinct", n.String()) + m.Tracer.Log("Plan %s: using sorted distinct", n) } else { n.SetDistinct(HashDistinctOp) d := &Distinct{Child: grp} relCost += float64(m.statsForRel(m.Ctx, d).RowCount()) - m.Tracer.Log("Plan %s: using hash distinct", n.String()) + m.Tracer.Log("Plan %s: using hash distinct", n) } } else { n.SetDistinct(NoDistinctOp) @@ -479,7 +480,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { n.SetCost(relCost) cost += relCost - m.Tracer.Log("Plan %T: relCost=%.2f, totalCost=%.2f", n, relCost, cost) + m.Tracer.Log("Plan %s: relCost=%.2f, totalCost=%.2f", n, relCost, cost) m.updateBest(grp, n, cost) n = n.Next() } @@ -910,66 +911,91 @@ type RangeHeap struct { } // FormatExpr returns a string representation of a relExpr for debugging purposes. -func FormatExpr(r exprType) string { +func FormatExpr(r exprType, s fmt.State, verb rune) { switch r := r.(type) { case *CrossJoin: - return fmt.Sprintf("crossjoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("crossjoin %d %d", r.Left.Id, r.Right.Id)) case *InnerJoin: - return fmt.Sprintf("innerjoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("innerjoin %d %d", r.Left.Id, r.Right.Id)) case *LeftJoin: - return fmt.Sprintf("leftjoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("leftjoin %d %d", r.Left.Id, r.Right.Id)) case *SemiJoin: - return fmt.Sprintf("semijoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("semijoin %d %d", r.Left.Id, r.Right.Id)) case *AntiJoin: - return fmt.Sprintf("antijoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("antijoin %d %d", r.Left.Id, r.Right.Id)) case *LookupJoin: - return fmt.Sprintf("lookupjoin %d %d on %s", r.Left.Id, r.Right.Id, r.Lookup.Index.idx.ID()) + io.WriteString(s, fmt.Sprintf("lookupjoin %d %d on %s", r.Left.Id, r.Right.Id, r.Lookup.Index.idx.ID())) case *RangeHeapJoin: - return fmt.Sprintf("rangeheapjoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("rangeheapjoin %d %d", r.Left.Id, r.Right.Id)) case *ConcatJoin: - return fmt.Sprintf("concatjoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("concatjoin %d %d", r.Left.Id, r.Right.Id)) case *HashJoin: - return fmt.Sprintf("hashjoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("hashjoin %d %d", r.Left.Id, r.Right.Id)) case *MergeJoin: - return fmt.Sprintf("mergejoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("mergejoin %d %d", r.Left.Id, r.Right.Id)) case *FullOuterJoin: - return fmt.Sprintf("fullouterjoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("fullouterjoin %d %d", r.Left.Id, r.Right.Id)) case *LateralJoin: - return fmt.Sprintf("lateraljoin %d %d", r.Left.Id, r.Right.Id) + io.WriteString(s, fmt.Sprintf("lateraljoin %d %d", r.Left.Id, r.Right.Id)) case *TableScan: - return fmt.Sprintf("tablescan: %s", r.Name()) + io.WriteString(s, fmt.Sprintf("tablescan: %s", r.Name())) case *IndexScan: if r.Alias != "" { - return fmt.Sprintf("indexscan on %s: %s", r.Index.SqlIdx().ID(), r.Alias) + io.WriteString(s, fmt.Sprintf("indexscan on %s: %s", r.Index.SqlIdx().ID(), r.Alias)) } - return fmt.Sprintf("indexscan on %s: %s", r.Index.SqlIdx().ID(), r.Name()) + io.WriteString(s, fmt.Sprintf("indexscan on %s: %s", r.Index.SqlIdx().ID(), r.Name())) case *Values: - return fmt.Sprintf("values: %s", r.Name()) + io.WriteString(s, fmt.Sprintf("values: %s", r.Name())) case *TableAlias: - return fmt.Sprintf("tablealias: %s", r.Name()) + io.WriteString(s, fmt.Sprintf("tablealias: %s", r.Name())) case *RecursiveTable: - return fmt.Sprintf("recursivetable: %s", r.Name()) + io.WriteString(s, fmt.Sprintf("recursivetable: %s", r.Name())) case *RecursiveCte: - return fmt.Sprintf("recursivecte: %s", r.Name()) + io.WriteString(s, fmt.Sprintf("recursivecte: %s", r.Name())) case *SubqueryAlias: - return fmt.Sprintf("subqueryalias: %s", r.Name()) + io.WriteString(s, fmt.Sprintf("subqueryalias: %s", r.Name())) case *TableFunc: - return fmt.Sprintf("tablefunc: %s", r.Name()) + io.WriteString(s, fmt.Sprintf("tablefunc: %s", r.Name())) case *JSONTable: - return fmt.Sprintf("jsontable: %s", r.Name()) + io.WriteString(s, fmt.Sprintf("jsontable: %s", r.Name())) case *EmptyTable: - return fmt.Sprintf("emptytable: %s", r.Name()) + io.WriteString(s, fmt.Sprintf("emptytable: %s", r.Name())) case *SetOp: - return fmt.Sprintf("setop: %s", r.Name()) + io.WriteString(s, fmt.Sprintf("setop: %s", r.Name())) case *Project: - return fmt.Sprintf("project: %d", r.Child.Id) + io.WriteString(s, fmt.Sprintf("project: %d", r.Child.Id)) case *Distinct: - return fmt.Sprintf("distinct: %d", r.Child.Id) + io.WriteString(s, fmt.Sprintf("distinct: %d", r.Child.Id)) case *Max1Row: - return fmt.Sprintf("max1row: %d", r.Child.Id) + io.WriteString(s, fmt.Sprintf("max1row: %d", r.Child.Id)) case *Filter: - return fmt.Sprintf("filter: %d", r.Child.Id) + io.WriteString(s, fmt.Sprintf("filter: %d", r.Child.Id)) default: panic(fmt.Sprintf("unknown RelExpr type: %T", r)) } } + +// Format implements fmt.Formatter and can be formatted by the fmt package. The +// following verbs are supported +// +// %s print the error. If the error has a Cause it will be +// printed recursively +// %v see %s +// %+v extended format. Each Frame of the error's StackTrace will +// be printed in detail. +// func FormatExpr(s fmt.State, verb rune) { +// switch verb { +// case 'v': +// if s.Flag('+') { +// io.WriteString(s, err.Error()+"\n") +// err.stack.Format(s, verb) +// return +// } +// +// fallthrough +// case 's': +// io.WriteString(s, err.Error()) +// case 'q': +// fmt.Fprintf(s, "%q", err.Error()) +// } +// } diff --git a/sql/memo/memo.og.go b/sql/memo/memo.og.go index e3c250e33b..ac69c46f9e 100644 --- a/sql/memo/memo.og.go +++ b/sql/memo/memo.og.go @@ -15,10 +15,16 @@ type CrossJoin struct { } var _ RelExpr = (*CrossJoin)(nil) +var _ fmt.Formatter = (*CrossJoin)(nil) +var _ fmt.Stringer = (*CrossJoin)(nil) var _ JoinRel = (*CrossJoin)(nil) func (r *CrossJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *CrossJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *CrossJoin) JoinPrivate() *JoinBase { @@ -30,10 +36,16 @@ type InnerJoin struct { } var _ RelExpr = (*InnerJoin)(nil) +var _ fmt.Formatter = (*InnerJoin)(nil) +var _ fmt.Stringer = (*InnerJoin)(nil) var _ JoinRel = (*InnerJoin)(nil) func (r *InnerJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *InnerJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *InnerJoin) JoinPrivate() *JoinBase { @@ -45,10 +57,16 @@ type LeftJoin struct { } var _ RelExpr = (*LeftJoin)(nil) +var _ fmt.Formatter = (*LeftJoin)(nil) +var _ fmt.Stringer = (*LeftJoin)(nil) var _ JoinRel = (*LeftJoin)(nil) func (r *LeftJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *LeftJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *LeftJoin) JoinPrivate() *JoinBase { @@ -60,10 +78,16 @@ type SemiJoin struct { } var _ RelExpr = (*SemiJoin)(nil) +var _ fmt.Formatter = (*SemiJoin)(nil) +var _ fmt.Stringer = (*SemiJoin)(nil) var _ JoinRel = (*SemiJoin)(nil) func (r *SemiJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *SemiJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *SemiJoin) JoinPrivate() *JoinBase { @@ -75,10 +99,16 @@ type AntiJoin struct { } var _ RelExpr = (*AntiJoin)(nil) +var _ fmt.Formatter = (*AntiJoin)(nil) +var _ fmt.Stringer = (*AntiJoin)(nil) var _ JoinRel = (*AntiJoin)(nil) func (r *AntiJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *AntiJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *AntiJoin) JoinPrivate() *JoinBase { @@ -92,10 +122,16 @@ type LookupJoin struct { } var _ RelExpr = (*LookupJoin)(nil) +var _ fmt.Formatter = (*LookupJoin)(nil) +var _ fmt.Stringer = (*LookupJoin)(nil) var _ JoinRel = (*LookupJoin)(nil) func (r *LookupJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *LookupJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *LookupJoin) JoinPrivate() *JoinBase { @@ -108,10 +144,16 @@ type RangeHeapJoin struct { } var _ RelExpr = (*RangeHeapJoin)(nil) +var _ fmt.Formatter = (*RangeHeapJoin)(nil) +var _ fmt.Stringer = (*RangeHeapJoin)(nil) var _ JoinRel = (*RangeHeapJoin)(nil) func (r *RangeHeapJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *RangeHeapJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *RangeHeapJoin) JoinPrivate() *JoinBase { @@ -124,10 +166,16 @@ type ConcatJoin struct { } var _ RelExpr = (*ConcatJoin)(nil) +var _ fmt.Formatter = (*ConcatJoin)(nil) +var _ fmt.Stringer = (*ConcatJoin)(nil) var _ JoinRel = (*ConcatJoin)(nil) func (r *ConcatJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *ConcatJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *ConcatJoin) JoinPrivate() *JoinBase { @@ -141,10 +189,16 @@ type HashJoin struct { } var _ RelExpr = (*HashJoin)(nil) +var _ fmt.Formatter = (*HashJoin)(nil) +var _ fmt.Stringer = (*HashJoin)(nil) var _ JoinRel = (*HashJoin)(nil) func (r *HashJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *HashJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *HashJoin) JoinPrivate() *JoinBase { @@ -161,10 +215,16 @@ type MergeJoin struct { } var _ RelExpr = (*MergeJoin)(nil) +var _ fmt.Formatter = (*MergeJoin)(nil) +var _ fmt.Stringer = (*MergeJoin)(nil) var _ JoinRel = (*MergeJoin)(nil) func (r *MergeJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *MergeJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *MergeJoin) JoinPrivate() *JoinBase { @@ -176,10 +236,16 @@ type FullOuterJoin struct { } var _ RelExpr = (*FullOuterJoin)(nil) +var _ fmt.Formatter = (*FullOuterJoin)(nil) +var _ fmt.Stringer = (*FullOuterJoin)(nil) var _ JoinRel = (*FullOuterJoin)(nil) func (r *FullOuterJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *FullOuterJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *FullOuterJoin) JoinPrivate() *JoinBase { @@ -191,10 +257,16 @@ type LateralJoin struct { } var _ RelExpr = (*LateralJoin)(nil) +var _ fmt.Formatter = (*LateralJoin)(nil) +var _ fmt.Stringer = (*LateralJoin)(nil) var _ JoinRel = (*LateralJoin)(nil) func (r *LateralJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *LateralJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *LateralJoin) JoinPrivate() *JoinBase { @@ -207,10 +279,16 @@ type TableScan struct { } var _ RelExpr = (*TableScan)(nil) +var _ fmt.Formatter = (*TableScan)(nil) +var _ fmt.Stringer = (*TableScan)(nil) var _ SourceRel = (*TableScan)(nil) func (r *TableScan) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *TableScan) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *TableScan) Name() string { @@ -242,10 +320,16 @@ type IndexScan struct { } var _ RelExpr = (*IndexScan)(nil) +var _ fmt.Formatter = (*IndexScan)(nil) +var _ fmt.Stringer = (*IndexScan)(nil) var _ SourceRel = (*IndexScan)(nil) func (r *IndexScan) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *IndexScan) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *IndexScan) Name() string { @@ -274,10 +358,16 @@ type Values struct { } var _ RelExpr = (*Values)(nil) +var _ fmt.Formatter = (*Values)(nil) +var _ fmt.Stringer = (*Values)(nil) var _ SourceRel = (*Values)(nil) func (r *Values) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *Values) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *Values) Name() string { @@ -306,10 +396,16 @@ type TableAlias struct { } var _ RelExpr = (*TableAlias)(nil) +var _ fmt.Formatter = (*TableAlias)(nil) +var _ fmt.Stringer = (*TableAlias)(nil) var _ SourceRel = (*TableAlias)(nil) func (r *TableAlias) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *TableAlias) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *TableAlias) Name() string { @@ -338,10 +434,16 @@ type RecursiveTable struct { } var _ RelExpr = (*RecursiveTable)(nil) +var _ fmt.Formatter = (*RecursiveTable)(nil) +var _ fmt.Stringer = (*RecursiveTable)(nil) var _ SourceRel = (*RecursiveTable)(nil) func (r *RecursiveTable) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *RecursiveTable) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *RecursiveTable) Name() string { @@ -370,10 +472,16 @@ type RecursiveCte struct { } var _ RelExpr = (*RecursiveCte)(nil) +var _ fmt.Formatter = (*RecursiveCte)(nil) +var _ fmt.Stringer = (*RecursiveCte)(nil) var _ SourceRel = (*RecursiveCte)(nil) func (r *RecursiveCte) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *RecursiveCte) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *RecursiveCte) Name() string { @@ -402,10 +510,16 @@ type SubqueryAlias struct { } var _ RelExpr = (*SubqueryAlias)(nil) +var _ fmt.Formatter = (*SubqueryAlias)(nil) +var _ fmt.Stringer = (*SubqueryAlias)(nil) var _ SourceRel = (*SubqueryAlias)(nil) func (r *SubqueryAlias) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *SubqueryAlias) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *SubqueryAlias) Name() string { @@ -434,10 +548,16 @@ type TableFunc struct { } var _ RelExpr = (*TableFunc)(nil) +var _ fmt.Formatter = (*TableFunc)(nil) +var _ fmt.Stringer = (*TableFunc)(nil) var _ SourceRel = (*TableFunc)(nil) func (r *TableFunc) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *TableFunc) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *TableFunc) Name() string { @@ -466,10 +586,16 @@ type JSONTable struct { } var _ RelExpr = (*JSONTable)(nil) +var _ fmt.Formatter = (*JSONTable)(nil) +var _ fmt.Stringer = (*JSONTable)(nil) var _ SourceRel = (*JSONTable)(nil) func (r *JSONTable) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *JSONTable) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *JSONTable) Name() string { @@ -498,10 +624,16 @@ type EmptyTable struct { } var _ RelExpr = (*EmptyTable)(nil) +var _ fmt.Formatter = (*EmptyTable)(nil) +var _ fmt.Stringer = (*EmptyTable)(nil) var _ SourceRel = (*EmptyTable)(nil) func (r *EmptyTable) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *EmptyTable) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *EmptyTable) Name() string { @@ -530,10 +662,16 @@ type SetOp struct { } var _ RelExpr = (*SetOp)(nil) +var _ fmt.Formatter = (*SetOp)(nil) +var _ fmt.Stringer = (*SetOp)(nil) var _ SourceRel = (*SetOp)(nil) func (r *SetOp) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *SetOp) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *SetOp) Name() string { @@ -563,9 +701,15 @@ type Project struct { } var _ RelExpr = (*Project)(nil) +var _ fmt.Formatter = (*Project)(nil) +var _ fmt.Stringer = (*Project)(nil) func (r *Project) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *Project) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *Project) Children() []*ExprGroup { @@ -582,9 +726,15 @@ type Distinct struct { } var _ RelExpr = (*Distinct)(nil) +var _ fmt.Formatter = (*Distinct)(nil) +var _ fmt.Stringer = (*Distinct)(nil) func (r *Distinct) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *Distinct) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *Distinct) Children() []*ExprGroup { @@ -601,9 +751,15 @@ type Max1Row struct { } var _ RelExpr = (*Max1Row)(nil) +var _ fmt.Formatter = (*Max1Row)(nil) +var _ fmt.Stringer = (*Max1Row)(nil) func (r *Max1Row) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *Max1Row) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *Max1Row) Children() []*ExprGroup { @@ -621,9 +777,15 @@ type Filter struct { } var _ RelExpr = (*Filter)(nil) +var _ fmt.Formatter = (*Filter)(nil) +var _ fmt.Stringer = (*Filter)(nil) func (r *Filter) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) +} + +func (r *Filter) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *Filter) Children() []*ExprGroup { @@ -633,80 +795,3 @@ func (r *Filter) Children() []*ExprGroup { func (r *Filter) outputCols() sql.ColSet { return r.Child.RelProps.OutputCols() } - -func buildRelExpr(b *ExecBuilder, r RelExpr, children ...sql.Node) (sql.Node, error) { - var result sql.Node - var err error - - switch r := r.(type) { - case *CrossJoin: - result, err = b.buildCrossJoin(r, children...) - case *InnerJoin: - result, err = b.buildInnerJoin(r, children...) - case *LeftJoin: - result, err = b.buildLeftJoin(r, children...) - case *SemiJoin: - result, err = b.buildSemiJoin(r, children...) - case *AntiJoin: - result, err = b.buildAntiJoin(r, children...) - case *LookupJoin: - result, err = b.buildLookupJoin(r, children...) - case *RangeHeapJoin: - result, err = b.buildRangeHeapJoin(r, children...) - case *ConcatJoin: - result, err = b.buildConcatJoin(r, children...) - case *HashJoin: - result, err = b.buildHashJoin(r, children...) - case *MergeJoin: - result, err = b.buildMergeJoin(r, children...) - case *FullOuterJoin: - result, err = b.buildFullOuterJoin(r, children...) - case *LateralJoin: - result, err = b.buildLateralJoin(r, children...) - case *TableScan: - result, err = b.buildTableScan(r, children...) - case *IndexScan: - result, err = b.buildIndexScan(r, children...) - case *Values: - result, err = b.buildValues(r, children...) - case *TableAlias: - result, err = b.buildTableAlias(r, children...) - case *RecursiveTable: - result, err = b.buildRecursiveTable(r, children...) - case *RecursiveCte: - result, err = b.buildRecursiveCte(r, children...) - case *SubqueryAlias: - result, err = b.buildSubqueryAlias(r, children...) - case *TableFunc: - result, err = b.buildTableFunc(r, children...) - case *JSONTable: - result, err = b.buildJSONTable(r, children...) - case *EmptyTable: - result, err = b.buildEmptyTable(r, children...) - case *SetOp: - result, err = b.buildSetOp(r, children...) - case *Project: - result, err = b.buildProject(r, children...) - case *Distinct: - result, err = b.buildDistinct(r, children...) - case *Max1Row: - result, err = b.buildMax1Row(r, children...) - case *Filter: - result, err = b.buildFilter(r, children...) - default: - panic(fmt.Sprintf("unknown RelExpr type: %T", r)) - } - - if err != nil { - return nil, err - } - - if withDescribeStats, ok := result.(sql.WithDescribeStats); ok { - withDescribeStats.SetDescribeStats(*DescribeStats(r)) - } - result, err = r.Group().finalize(result) - if err != nil { - return nil, err - } - return result, nil -} From 2ad95c6a5e4a7e434f890210cc8296ba8ca82a5c Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Mon, 27 Oct 2025 14:27:14 -0700 Subject: [PATCH 12/30] Restore accidentally deleted functionalty from generation --- optgen/cmd/support/memo_gen.go | 29 +++++++++++++ sql/memo/memo.og.go | 77 ++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/optgen/cmd/support/memo_gen.go b/optgen/cmd/support/memo_gen.go index cd4c8be67d..fcfde8e95c 100644 --- a/optgen/cmd/support/memo_gen.go +++ b/optgen/cmd/support/memo_gen.go @@ -71,6 +71,7 @@ func (g *MemoGen) Generate(defns GenDefs, w io.Writer) { g.genChildlessGroupInterface(defn) } } + g.genBuildRelExpr(g.defns) } func (g *MemoGen) genImport() { @@ -200,5 +201,33 @@ func (g *MemoGen) genUnaryGroupInterface(defn ExprDef) { } fmt.Fprintf(g.w, "}\n\n") +} +func (g *MemoGen) genBuildRelExpr(defns []ExprDef) { + fmt.Fprintf(g.w, "func buildRelExpr(b *ExecBuilder, r RelExpr, children ...sql.Node) (sql.Node, error) {\n") + fmt.Fprintf(g.w, " var result sql.Node\n") + fmt.Fprintf(g.w, " var err error\n\n") + fmt.Fprintf(g.w, " switch r := r.(type) {\n") + for _, d := range defns { + if d.SkipExec { + continue + } + fmt.Fprintf(g.w, " case *%s:\n", d.Name) + fmt.Fprintf(g.w, " result, err = b.build%s(r, children...)\n", strings.Title(d.Name)) + } + fmt.Fprintf(g.w, " default:\n") + fmt.Fprintf(g.w, " panic(fmt.Sprintf(\"unknown RelExpr type: %%T\", r))\n") + fmt.Fprintf(g.w, " }\n\n") + fmt.Fprintf(g.w, " if err != nil {\n") + fmt.Fprintf(g.w, " return nil, err\n") + fmt.Fprintf(g.w, " }\n\n") + fmt.Fprintf(g.w, "if withDescribeStats, ok := result.(sql.WithDescribeStats); ok {\n") + fmt.Fprintf(g.w, " withDescribeStats.SetDescribeStats(*DescribeStats(r))\n") + fmt.Fprintf(g.w, "}\n") + fmt.Fprintf(g.w, " result, err = r.Group().finalize(result)\n") + fmt.Fprintf(g.w, " if err != nil {\n") + fmt.Fprintf(g.w, " return nil, err\n") + fmt.Fprintf(g.w, " }\n") + fmt.Fprintf(g.w, " return result, nil\n") + fmt.Fprintf(g.w, "}\n\n") } diff --git a/sql/memo/memo.og.go b/sql/memo/memo.og.go index ac69c46f9e..e4db9bbd25 100644 --- a/sql/memo/memo.og.go +++ b/sql/memo/memo.og.go @@ -795,3 +795,80 @@ func (r *Filter) Children() []*ExprGroup { func (r *Filter) outputCols() sql.ColSet { return r.Child.RelProps.OutputCols() } + +func buildRelExpr(b *ExecBuilder, r RelExpr, children ...sql.Node) (sql.Node, error) { + var result sql.Node + var err error + + switch r := r.(type) { + case *CrossJoin: + result, err = b.buildCrossJoin(r, children...) + case *InnerJoin: + result, err = b.buildInnerJoin(r, children...) + case *LeftJoin: + result, err = b.buildLeftJoin(r, children...) + case *SemiJoin: + result, err = b.buildSemiJoin(r, children...) + case *AntiJoin: + result, err = b.buildAntiJoin(r, children...) + case *LookupJoin: + result, err = b.buildLookupJoin(r, children...) + case *RangeHeapJoin: + result, err = b.buildRangeHeapJoin(r, children...) + case *ConcatJoin: + result, err = b.buildConcatJoin(r, children...) + case *HashJoin: + result, err = b.buildHashJoin(r, children...) + case *MergeJoin: + result, err = b.buildMergeJoin(r, children...) + case *FullOuterJoin: + result, err = b.buildFullOuterJoin(r, children...) + case *LateralJoin: + result, err = b.buildLateralJoin(r, children...) + case *TableScan: + result, err = b.buildTableScan(r, children...) + case *IndexScan: + result, err = b.buildIndexScan(r, children...) + case *Values: + result, err = b.buildValues(r, children...) + case *TableAlias: + result, err = b.buildTableAlias(r, children...) + case *RecursiveTable: + result, err = b.buildRecursiveTable(r, children...) + case *RecursiveCte: + result, err = b.buildRecursiveCte(r, children...) + case *SubqueryAlias: + result, err = b.buildSubqueryAlias(r, children...) + case *TableFunc: + result, err = b.buildTableFunc(r, children...) + case *JSONTable: + result, err = b.buildJSONTable(r, children...) + case *EmptyTable: + result, err = b.buildEmptyTable(r, children...) + case *SetOp: + result, err = b.buildSetOp(r, children...) + case *Project: + result, err = b.buildProject(r, children...) + case *Distinct: + result, err = b.buildDistinct(r, children...) + case *Max1Row: + result, err = b.buildMax1Row(r, children...) + case *Filter: + result, err = b.buildFilter(r, children...) + default: + panic(fmt.Sprintf("unknown RelExpr type: %T", r)) + } + + if err != nil { + return nil, err + } + + if withDescribeStats, ok := result.(sql.WithDescribeStats); ok { + withDescribeStats.SetDescribeStats(*DescribeStats(r)) + } + result, err = r.Group().finalize(result) + if err != nil { + return nil, err + } + return result, nil +} From 33503ff6ffe79a1f0ae7dc90de33e8bfafe19801 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Mon, 27 Oct 2025 14:30:56 -0700 Subject: [PATCH 13/30] logging --- sql/analyzer/indexed_joins.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 48b29f19ec..96eb8b674f 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -826,6 +826,9 @@ func addHashJoins(m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { + m.Tracer.PushDebugContext(e.String()) + defer m.Tracer.PopDebugContext() + switch e.(type) { case *memo.InnerJoin, *memo.LeftJoin: default: @@ -848,18 +851,18 @@ func addHashJoins(m *memo.Memo) error { satisfiesScalarRefs(f.Right(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Right()) toExpr = append(toExpr, f.Left()) - m.Tracer.Log("Filter %s: found a left->right hash key mapping", f.String()) + m.Tracer.Log("Filter %s: found a left->right hash key mapping", f) } else if satisfiesScalarRefs(f.Right(), join.Left.RelProps.OutputTables()) && satisfiesScalarRefs(f.Left(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Left()) toExpr = append(toExpr, f.Right()) - m.Tracer.Log("Filter %s: found a right->left hash key mapping", f.String()) + m.Tracer.Log("Filter %s: found a right->left hash key mapping", f) } else { - m.Tracer.Log("Filter %s: does not satisfy scalar refs for hash join", f.String()) + m.Tracer.Log("Filter %s: does not satisfy scalar refs for hash join", f) return nil } default: - m.Tracer.Log("Filter %s: not an equality expression, skipping hash join", f.String()) + m.Tracer.Log("Filter %s: not an equality expression, skipping hash join", f) return nil } } @@ -1071,6 +1074,9 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { + m.Tracer.PushDebugContext(e.String()) + defer m.Tracer.PopDebugContext() + var join *memo.JoinBase switch e := e.(type) { case *memo.InnerJoin: From 105e4f16cc51ce114fde129226052fda8ed4b2ac Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Mon, 27 Oct 2025 15:39:43 -0700 Subject: [PATCH 14/30] logging --- sql/memo/join_order_builder.go | 65 +++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/sql/memo/join_order_builder.go b/sql/memo/join_order_builder.go index 407bb09198..bb9b8b3c0c 100644 --- a/sql/memo/join_order_builder.go +++ b/sql/memo/join_order_builder.go @@ -166,34 +166,36 @@ func (j *joinOrderBuilder) useFastReorder() bool { } func (j *joinOrderBuilder) ReorderJoin(n sql.Node) { - j.m.Tracer.Log("Starting join reordering for node: %T", []interface{}{n}...) + j.m.Tracer.PushDebugContext("ReorderJoin") + defer j.m.Tracer.PopDebugContext() + j.populateSubgraph(n) if j.useFastReorder() { - j.m.Tracer.Log("Using fast reorder algorithm (large join with %d tables)", []interface{}{len(j.vertices)}...) + j.m.Tracer.Log("Using fast reorder algorithm (large join with %d tables)", len(j.vertices)) if j.buildSingleLookupPlan() { - j.m.Tracer.Log("Successfully built single lookup plan", []interface{}{}...) + j.m.Tracer.Log("Successfully built single lookup plan") return } - j.m.Tracer.Log("Failed to build single lookup plan, falling back to exhaustive enumeration", []interface{}{}...) + j.m.Tracer.Log("Failed to build single lookup plan, falling back to exhaustive enumeration") } else if j.hasCrossJoin { - j.m.Tracer.Log("Join contains cross joins, attempting single lookup plan first", []interface{}{}...) + j.m.Tracer.Log("Join contains cross joins, attempting single lookup plan first") // Rely on FastReorder to avoid plans that drop filters with cross joins if j.buildSingleLookupPlan() { - j.m.Tracer.Log("Successfully built single lookup plan for cross join", []interface{}{}...) + j.m.Tracer.Log("Successfully built single lookup plan for cross join") return } - j.m.Tracer.Log("Failed to build single lookup plan for cross join, using exhaustive enumeration", []interface{}{}...) + j.m.Tracer.Log("Failed to build single lookup plan for cross join, using exhaustive enumeration") } // TODO: consider if buildSingleLookupPlan can/should run after ensureClosure. This could allow us to use analysis // from ensureClosure in buildSingleLookupPlan, but the equivalence sets could create multiple possible join orders // for the single-lookup plan, which would complicate things. - j.m.Tracer.Log("Building transitive closure for %d edges", []interface{}{len(j.edges)}...) + j.m.Tracer.Log("Building transitive closure for %d edges", len(j.edges)) j.ensureClosure(j.m.root) - j.m.Tracer.Log("Starting exhaustive subset enumeration", []interface{}{}...) + j.m.Tracer.Log("Starting exhaustive subset enumeration") j.dpEnumerateSubsets() - j.m.Tracer.Log("Completed join reordering", []interface{}{}...) + j.m.Tracer.Log("Completed join reordering") return } @@ -487,13 +489,15 @@ func (j *joinOrderBuilder) makeTransitiveEdge(col1, col2 sql.ColumnId) { } func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { - j.m.Tracer.Log("Building join operation: %s", []interface{}{n.JoinType().String()}...) + j.m.Tracer.PushDebugContext("buildJoinOp") + defer j.m.Tracer.PopDebugContext() + leftV, leftE, _ := j.populateSubgraph(n.Left()) rightV, rightE, _ := j.populateSubgraph(n.Right()) typ := n.JoinType() if typ.IsPhysical() { typ = plan.JoinTypeInner - j.m.Tracer.Log("Converted physical join type to inner join", []interface{}{}...) + j.m.Tracer.Log("Converted physical join type to inner join") } isInner := typ.IsInner() op := &operator{ @@ -505,24 +509,24 @@ func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { } filters := expression.SplitConjunction(n.JoinCond()) - j.m.Tracer.Log("Join filters: %v", []interface{}{filters}...) + j.m.Tracer.Log("Join filters: %v", filters) union := leftV.union(rightV) group, ok := j.plans[union] if !ok { // TODO: memo and root should be initialized prior to join planning left := j.plans[leftV] right := j.plans[rightV] - group = j.memoize(op.joinType, left, right, filters, nil) + group = j.memoize(op.joinType, left, right, filters) j.plans[union] = group j.m.root = group - j.m.Tracer.Log("Created new memo group for join combination", []interface{}{}...) + j.m.Tracer.Log("Created new memo group for join combination") } if !isInner { - j.m.Tracer.Log("Building non-inner edge for join type: %s", []interface{}{typ.String()}...) + j.m.Tracer.Log("Building non-inner edge for join type: %s", typ.String()) j.buildNonInnerEdge(op, filters...) } else { - j.m.Tracer.Log("Building inner edge for join type: %s", []interface{}{typ.String()}...) + j.m.Tracer.Log("Building inner edge for join type: %s", typ.String()) j.buildInnerEdge(op, filters...) } return group @@ -652,6 +656,9 @@ func (j *joinOrderBuilder) checkSize() { // adding plans to the tree when we find two sets that can // be joined func (j *joinOrderBuilder) dpEnumerateSubsets() { + j.m.Tracer.PushDebugContext("dpEnumerateSubsets") + defer j.m.Tracer.PopDebugContext() + all := j.allVertices() for subset := vertexSet(1); subset <= all; subset++ { if subset.isSingleton() { @@ -687,13 +694,14 @@ func setPrinter(all, s1, s2 vertexSet) { // addPlans finds operators that let us join (s1 op s2) and (s2 op s1). func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { - j.m.Tracer.Log("Considering join between vertex sets s1=%s, s2=%s", []interface{}{s1.String(), s2.String()}...) + j.m.Tracer.PushDebugContext(fmt.Sprintf("addPlans/%s<->%s", s1, s2)) + defer j.m.Tracer.PopDebugContext() // all inner filters could be applied if j.plans[s1] == nil || j.plans[s2] == nil { // Both inputs must have plans. // need this to prevent cross-joins higher in tree - j.m.Tracer.Log("Skipping join - one or both input plans are nil", []interface{}{}...) + j.m.Tracer.Log("Skipping join - one or both input plans are nil") return } @@ -713,7 +721,7 @@ func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { } isRedundant = isRedundant || e.joinIsRedundant(s1, s2) addInnerJoin = true - j.m.Tracer.Log("Found applicable inner edge %d with filters: %v", []interface{}{i, e.filters}...) + j.m.Tracer.Log("Found applicable inner edge %d with filters: %v", i, e.filters) } } @@ -722,14 +730,14 @@ func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { for i, ok := j.nonInnerEdges.Next(0); ok; i, ok = j.nonInnerEdges.Next(i + 1) { e := &j.edges[i] if e.applicable(s1, s2) { - j.m.Tracer.Log("Found applicable non-inner edge %d, adding join: %s", []interface{}{i, e.op.joinType.String()}...) + j.m.Tracer.Log("Found applicable non-inner edge %d, adding join: %s", i, e.op.joinType.String()) j.addJoin(e.op.joinType, s1, s2, e.filters, innerJoinFilters, e.joinIsRedundant(s1, s2)) return } if e.applicable(s2, s1) { // This is necessary because we only iterate s1 up to subset / 2 // in DPSube() - j.m.Tracer.Log("Found applicable non-inner edge %d (swapped), adding join: %s", []interface{}{i, e.op.joinType.String()}...) + j.m.Tracer.Log("Found applicable non-inner edge %d (swapped), adding join: %s", i, e.op.joinType.String()) j.addJoin(e.op.joinType, s2, s1, e.filters, innerJoinFilters, e.joinIsRedundant(s2, s1)) return } @@ -740,14 +748,14 @@ func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { // already been constructed, because doing so can lead to a case where an // inner join replaces a non-inner join. if innerJoinFilters == nil { - j.m.Tracer.Log("Adding cross join between s1=%s, s2=%s", []interface{}{s1.String(), s2.String()}...) + j.m.Tracer.Log("Adding cross join") j.addJoin(plan.JoinTypeCross, s1, s2, nil, nil, isRedundant) } else { - j.m.Tracer.Log("Adding inner join between s1=%s, s2=%s with filters: %v", []interface{}{s1.String(), s2.String(), innerJoinFilters}...) + j.m.Tracer.Log("Adding inner join with filters: %v", innerJoinFilters) j.addJoin(plan.JoinTypeInner, s1, s2, innerJoinFilters, nil, isRedundant) } } else { - j.m.Tracer.Log("No applicable edges found for join between s1=%s, s2=%s", []interface{}{s1.String(), s2.String()}...) + j.m.Tracer.Log("No applicable edges found for join") } } @@ -762,7 +770,7 @@ func (j *joinOrderBuilder) addJoin(op plan.JoinType, s1, s2 vertexSet, joinFilte group, ok := j.plans[union] if !isRedundant { if !ok { - group = j.memoize(op, left, right, joinFilter, selFilters) + group = j.memoize(op, left, right, joinFilter) j.plans[union] = group } else { j.addJoinToGroup(op, left, right, joinFilter, selFilters, group) @@ -810,7 +818,6 @@ func (j *joinOrderBuilder) memoize( left *ExprGroup, right *ExprGroup, joinFilter []sql.Expression, - selFilter []sql.Expression, ) *ExprGroup { rel := j.constructJoin(op, left, right, joinFilter, nil) return j.m.NewExprGroup(rel) @@ -937,9 +944,9 @@ func (e *edge) populateEdgeProps(tableIds []sql.TableId, edges []edge) { e.freeVars = cols // TODO implement, we currently limit transforms assuming no strong null safety - //e.nullRejectedRels = e.nullRejectingTables(nullAccepting, allNames, allV) + // e.nullRejectedRels = e.nullRejectingTables(nullAccepting, allNames, allV) - //SES is vertexSet of all tables referenced in cols + // SES is vertexSet of all tables referenced in cols e.calcSES(tables, tableIds) // use CD-C to expand dependency sets for operators // front load preventing applicable operators that would push crossjoins From befaca28411aa320f9b58dd8343747e285508e0f Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Tue, 28 Oct 2025 11:46:36 -0700 Subject: [PATCH 15/30] logging (is this thing on) --- sql/analyzer/indexed_joins.go | 12 ++++++++--- sql/memo/expr_group.go | 37 ++++++++++++++++++++++++++++++++++ sql/memo/join_order_builder.go | 10 +++++---- sql/memo/memo.go | 31 ++++++++++++++++------------ 4 files changed, 70 insertions(+), 20 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 96eb8b674f..69ed7f9c1f 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -259,7 +259,7 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { - m.Tracer.PushDebugContext(e.String()) + m.Tracer.PushDebugContext(fmt.Sprintf("%+v", e)) defer m.Tracer.PopDebugContext() var right *memo.ExprGroup @@ -663,6 +663,9 @@ func addRightSemiJoins(ctx *sql.Context, m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { + m.Tracer.PushDebugContext(fmt.Sprintf("%+v", e)) + defer m.Tracer.PopDebugContext() + semi, ok := e.(*memo.SemiJoin) if !ok { return nil @@ -826,7 +829,7 @@ func addHashJoins(m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { - m.Tracer.PushDebugContext(e.String()) + m.Tracer.PushDebugContext(fmt.Sprintf("%+v", e)) defer m.Tracer.PopDebugContext() switch e.(type) { @@ -966,6 +969,9 @@ func addRangeHeapJoin(m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { + m.Tracer.PushDebugContext(fmt.Sprintf("%+v", e)) + defer m.Tracer.PopDebugContext() + switch e.(type) { case *memo.InnerJoin, *memo.LeftJoin: default: @@ -1074,7 +1080,7 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { - m.Tracer.PushDebugContext(e.String()) + m.Tracer.PushDebugContext(fmt.Sprintf("%+v", e)) defer m.Tracer.PopDebugContext() var join *memo.JoinBase diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index 188879edc0..85faf45adc 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -16,6 +16,7 @@ package memo import ( "fmt" + "io" "strings" "github.com/dolthub/go-mysql-server/sql" @@ -37,6 +38,42 @@ type ExprGroup struct { HintOk bool } +// Format implements the fmt.Formatter interface. +func (e *ExprGroup) Format(f fmt.State, verb rune) { + // TODO: is e.First the right thing to print here? + expr := e.Best + if expr == nil { + expr = e.First + } + switch ex := expr.(type) { + case *TableAlias: + io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) + if verb == 'v' && f.Flag('+') { + io.WriteString(f, "[") + io.WriteString(f, ex.Name()) + io.WriteString(f, "]") + } + case *TableScan: + io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) + if verb == 'v' && f.Flag('+') { + io.WriteString(f, "[") + io.WriteString(f, ex.Name()) + io.WriteString(f, "]") + } + case *SubqueryAlias: + io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) + if verb == 'v' && f.Flag('+') { + io.WriteString(f, "[") + io.WriteString(f, ex.Name()) + io.WriteString(f, "]") + } + default: + io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) + } +} + +var _ fmt.Formatter = (*ExprGroup)(nil) + func newExprGroup(m *Memo, id GroupId, expr exprType) *ExprGroup { // bit of circularity: |grp| references |rel|, |rel| references |grp|, // and |relProps| references |rel| and |grp| info. diff --git a/sql/memo/join_order_builder.go b/sql/memo/join_order_builder.go index bb9b8b3c0c..e7db698871 100644 --- a/sql/memo/join_order_builder.go +++ b/sql/memo/join_order_builder.go @@ -191,9 +191,7 @@ func (j *joinOrderBuilder) ReorderJoin(n sql.Node) { // TODO: consider if buildSingleLookupPlan can/should run after ensureClosure. This could allow us to use analysis // from ensureClosure in buildSingleLookupPlan, but the equivalence sets could create multiple possible join orders // for the single-lookup plan, which would complicate things. - j.m.Tracer.Log("Building transitive closure for %d edges", len(j.edges)) j.ensureClosure(j.m.root) - j.m.Tracer.Log("Starting exhaustive subset enumeration") j.dpEnumerateSubsets() j.m.Tracer.Log("Completed join reordering") return @@ -435,6 +433,9 @@ func (j *joinOrderBuilder) findVertexFromGroup(grp GroupId) vertexIndex { // makeTransitiveEdge constructs a new join tree edge and memo group // on an equality filter between two columns. func (j *joinOrderBuilder) makeTransitiveEdge(col1, col2 sql.ColumnId) { + j.m.Tracer.PushDebugContext("makeTransitiveEdge") + defer j.m.Tracer.PopDebugContext() + var vert vertexSet v1, _, v1found := j.findVertexFromCol(col1) v2, _, v2found := j.findVertexFromCol(col2) @@ -483,9 +484,10 @@ func (j *joinOrderBuilder) makeTransitiveEdge(col1, col2 sql.ColumnId) { return } - j.edges = append(j.edges, *j.makeEdge(op, expression.NewEquals(gf1, gf2))) + eq := expression.NewEquals(gf1, gf2) + j.m.Tracer.Log("adding edge %s", eq) + j.edges = append(j.edges, *j.makeEdge(op, eq)) j.innerEdges.Add(len(j.edges) - 1) - } func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 459b26a9ac..d20d457526 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -910,33 +910,38 @@ type RangeHeap struct { RangeClosedOnUpperBound bool } -// FormatExpr returns a string representation of a relExpr for debugging purposes. +// FormatExpr formats an exprType for debugging purposes, compatible with fmt.Formatter func FormatExpr(r exprType, s fmt.State, verb rune) { + verbString := fmt.Sprintf("%%%c", verb) + if verb == 'v' && s.Flag('+') { + verbString = "%+v" + } switch r := r.(type) { case *CrossJoin: - io.WriteString(s, fmt.Sprintf("crossjoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("crossjoin "+verbString+" "+verbString, r.Left, r.Right)) case *InnerJoin: - io.WriteString(s, fmt.Sprintf("innerjoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("innerjoin "+verbString+" "+verbString, r.Left, r.Right)) case *LeftJoin: - io.WriteString(s, fmt.Sprintf("leftjoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("leftjoin "+verbString+" "+verbString, r.Left, r.Right)) case *SemiJoin: - io.WriteString(s, fmt.Sprintf("semijoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("semijoin "+verbString+" "+verbString, r.Left, r.Right)) case *AntiJoin: - io.WriteString(s, fmt.Sprintf("antijoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("antijoin "+verbString+" "+verbString, r.Left, r.Right)) case *LookupJoin: - io.WriteString(s, fmt.Sprintf("lookupjoin %d %d on %s", r.Left.Id, r.Right.Id, r.Lookup.Index.idx.ID())) + io.WriteString(s, fmt.Sprintf("lookupjoin "+verbString+" "+verbString+" on %s", + r.Left, r.Right, r.Lookup.Index.idx.ID())) case *RangeHeapJoin: - io.WriteString(s, fmt.Sprintf("rangeheapjoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("rangeheapjoin "+verbString+" "+verbString, r.Left, r.Right)) case *ConcatJoin: - io.WriteString(s, fmt.Sprintf("concatjoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("concatjoin "+verbString+" "+verbString, r.Left, r.Right)) case *HashJoin: - io.WriteString(s, fmt.Sprintf("hashjoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("hashjoin "+verbString+" "+verbString, r.Left, r.Right)) case *MergeJoin: - io.WriteString(s, fmt.Sprintf("mergejoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("mergejoin "+verbString+" "+verbString, r.Left, r.Right)) case *FullOuterJoin: - io.WriteString(s, fmt.Sprintf("fullouterjoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("fullouterjoin "+verbString+" "+verbString, r.Left, r.Right)) case *LateralJoin: - io.WriteString(s, fmt.Sprintf("lateraljoin %d %d", r.Left.Id, r.Right.Id)) + io.WriteString(s, fmt.Sprintf("lateraljoin "+verbString+" "+verbString, r.Left, r.Right)) case *TableScan: io.WriteString(s, fmt.Sprintf("tablescan: %s", r.Name())) case *IndexScan: From c8b42ad518464516a08dc40f1e7893066133ea2c Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Tue, 28 Oct 2025 14:33:07 -0700 Subject: [PATCH 16/30] logging fixes --- sql/analyzer/costed_index_scan.go | 6 +++--- sql/memo/expr_group.go | 7 +++++-- sql/memo/memo.go | 10 ++++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/sql/analyzer/costed_index_scan.go b/sql/analyzer/costed_index_scan.go index 5b12889612..68be635157 100644 --- a/sql/analyzer/costed_index_scan.go +++ b/sql/analyzer/costed_index_scan.go @@ -131,7 +131,7 @@ func costedIndexLookup(ctx *sql.Context, n sql.Node, a *Analyzer, iat sql.IndexA if err != nil { return n, transform.SameTree, err } - // TODO(next): this is getting a GMSCast node and not getting an index assigned here + ita, stats, filters, err := getCostedIndexScan(ctx, a.Catalog, rt, indexes, SplitConjunction(oldFilter), qFlags) if err != nil || ita == nil { return n, transform.SameTree, err @@ -336,7 +336,7 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta func addIndexScans(ctx *sql.Context, m *memo.Memo) error { m.Tracer.PushDebugContext("addIndexScans") defer m.Tracer.PopDebugContext() - + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { filter, ok := e.(*memo.Filter) if !ok { @@ -931,7 +931,7 @@ func (b *indexScanRangeBuilder) rangeBuildOr(f *iScanOr, inScan bool) (sql.MySQL // imprecise filters cannot be removed b.markImprecise(f) - //todo union the or ranges + // todo union the or ranges var ret sql.MySQLRangeCollection for _, c := range f.children { var ranges sql.MySQLRangeCollection diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index 85faf45adc..8bbf42e3ca 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -40,7 +40,6 @@ type ExprGroup struct { // Format implements the fmt.Formatter interface. func (e *ExprGroup) Format(f fmt.State, verb rune) { - // TODO: is e.First the right thing to print here? expr := e.Best if expr == nil { expr = e.First @@ -68,7 +67,11 @@ func (e *ExprGroup) Format(f fmt.State, verb rune) { io.WriteString(f, "]") } default: - io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) + verbString := fmt.Sprintf("%%%c", verb) + if verb == 'v' && f.Flag('+') { + verbString = "%+v" + } + io.WriteString(f, fmt.Sprintf("%d ("+verbString+")", ex.Group().Id, ex)) } } diff --git a/sql/memo/memo.go b/sql/memo/memo.go index d20d457526..667dd18702 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -433,7 +433,9 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { return nil } - m.Tracer.Log("Optimizing group %d (%s)", grp.Id, grp.String()) + m.Tracer.PushDebugContext(fmt.Sprintf("optimizeMemoGroup/%d", grp.Id)) + defer m.Tracer.PopDebugContext() + n := grp.First if _, ok := n.(SourceRel); ok { // We should order the search bottom-up so that physical operators @@ -445,12 +447,12 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { grp.HintOk = true grp.Best = grp.First grp.Best.SetDistinct(NoDistinctOp) - m.Tracer.Log("Group %d: source relation, setting as best plan", grp.Id) + m.Tracer.Log("source relation, setting as best plan", grp) return nil } for n != nil { - m.Tracer.Log("Evaluating plan %s in group %d", n, grp.Id) + m.Tracer.Log("Evaluating plan (%+v)", n) var cost float64 for _, g := range n.Children() { err := m.optimizeMemoGroup(g) @@ -480,7 +482,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { n.SetCost(relCost) cost += relCost - m.Tracer.Log("Plan %s: relCost=%.2f, totalCost=%.2f", n, relCost, cost) + m.Tracer.Log("Plan %+v: relCost=%.2f, totalCost=%.2f", n, relCost, cost) m.updateBest(grp, n, cost) n = n.Next() } From adce7681755595a54a6b38de720fdf2debbf3a9b Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Tue, 28 Oct 2025 16:16:59 -0700 Subject: [PATCH 17/30] better debug cost printing --- sql/analyzer/indexed_joins.go | 2 +- sql/memo/expr_group.go | 72 +++++++++++++++++++++++---------- sql/memo/memo.go | 75 +++++++++++++++++++---------------- 3 files changed, 91 insertions(+), 58 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 69ed7f9c1f..e0db01bd83 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -225,7 +225,7 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco if err != nil { return nil, err } - m.Tracer.Log("Completed cost-based optimization") + m.Tracer.Log("Completed cost-based optimization: %s", m.CostDebugString()) if a.Verbose && a.Debug { a.Log("%s", m.String()) diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index 8bbf42e3ca..a59031322c 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -17,6 +17,7 @@ package memo import ( "fmt" "io" + "sort" "strings" "github.com/dolthub/go-mysql-server/sql" @@ -47,31 +48,25 @@ func (e *ExprGroup) Format(f fmt.State, verb rune) { switch ex := expr.(type) { case *TableAlias: io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) - if verb == 'v' && f.Flag('+') { - io.WriteString(f, "[") - io.WriteString(f, ex.Name()) - io.WriteString(f, "]") - } + io.WriteString(f, "[") + io.WriteString(f, ex.Name()) + io.WriteString(f, "]") case *TableScan: io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) - if verb == 'v' && f.Flag('+') { - io.WriteString(f, "[") - io.WriteString(f, ex.Name()) - io.WriteString(f, "]") - } + io.WriteString(f, "[") + io.WriteString(f, ex.Name()) + io.WriteString(f, "]") case *SubqueryAlias: io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) - if verb == 'v' && f.Flag('+') { - io.WriteString(f, "[") - io.WriteString(f, ex.Name()) - io.WriteString(f, "]") - } + io.WriteString(f, "[") + io.WriteString(f, ex.Name()) + io.WriteString(f, "]") default: - verbString := fmt.Sprintf("%%%c", verb) if verb == 'v' && f.Flag('+') { - verbString = "%+v" + io.WriteString(f, fmt.Sprintf("%d{%+v}", ex.Group().Id, ex)) + } else { + io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) } - io.WriteString(f, fmt.Sprintf("%d ("+verbString+")", ex.Group().Id, ex)) } } @@ -108,11 +103,9 @@ func (e *ExprGroup) children() []*ExprGroup { if !ok { return e.children() } - n := relExpr children := make([]*ExprGroup, 0) - for n != nil { + for n := relExpr; n != nil; n = n.Next() { children = append(children, n.Children()...) - n = n.Next() } return children } @@ -216,7 +209,7 @@ func (e *ExprGroup) String() string { // if source relation we want the cardinality cost = float64(n.Group().RelProps.GetStats().RowCount()) } - b.WriteString(fmt.Sprintf(" %.1f", n.Cost())) + b.WriteString(fmt.Sprintf(" %.1f", cost)) childCost := 0.0 for _, c := range n.Children() { @@ -235,3 +228,38 @@ func (e *ExprGroup) String() string { } return b.String() } + +// CostTreeString returns a string representation of the expression group for use in cost debug printing +func (e *ExprGroup) CostTreeString(prefix string) string { + b := strings.Builder{} + n := e.First + costSortedGroups := make([]RelExpr, 0) + for n != nil { + costSortedGroups = append(costSortedGroups, n) + n = n.Next() + } + sort.Slice(costSortedGroups, func(i, j int) bool { + return costSortedGroups[i].Cost() < costSortedGroups[j].Cost() + }) + + for i, n := range costSortedGroups { + b.WriteString("\n") + + beg := prefix + "├── " + if i == len(costSortedGroups)-1 { + beg = prefix + "└── " + } + b.WriteString(fmt.Sprintf("%s(%s", beg, n)) + if e.Best != nil { + cost := n.Cost() + if cost == 0 { + // if source relation we want the cardinality + cost = float64(n.Group().RelProps.GetStats().RowCount()) + } + b.WriteString(fmt.Sprintf(" %.1f", cost)) + } + b.WriteString(")") + } + + return b.String() +} diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 667dd18702..3ca74db51f 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -452,7 +452,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { } for n != nil { - m.Tracer.Log("Evaluating plan (%+v)", n) + m.Tracer.Log("Evaluating plan (%s)", n) var cost float64 for _, g := range n.Children() { err := m.optimizeMemoGroup(g) @@ -482,7 +482,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { n.SetCost(relCost) cost += relCost - m.Tracer.Log("Plan %+v: relCost=%.2f, totalCost=%.2f", n, relCost, cost) + m.Tracer.Log("Plan %s: relCost=%.2f, totalCost=%.2f", n, relCost, cost) m.updateBest(grp, n, cost) n = n.Next() } @@ -612,17 +612,13 @@ func (m *Memo) SetJoinOp(op HintType, left, right string) { var _ fmt.Stringer = (*Memo)(nil) -// var _ fmt.Formatter = (*Memo)(nil) - func (m *Memo) String() string { exprs := make([]string, m.cnt) groups := make([]*ExprGroup, 0) if m.root != nil { - r := m.root.First - for r != nil { + for r := m.root.First; r != nil; r = r.Next() { groups = append(groups, r.Group()) groups = append(groups, r.Children()...) - r = r.Next() } } for len(groups) > 0 { @@ -648,9 +644,43 @@ func (m *Memo) String() string { return b.String() } -// func (m *Memo) Format(f fmt.State, verb rune) { -// return m.String() -// } +func (m *Memo) CostDebugString() interface{} { + exprs := make([]string, m.cnt) + groups := make([]*ExprGroup, 0) + if m.root != nil { + for r := m.root.First; r != nil; r = r.Next() { + groups = append(groups, r.Group()) + groups = append(groups, r.Children()...) + } + } + for len(groups) > 0 { + newGroups := make([]*ExprGroup, 0) + for _, g := range groups { + if exprs[int(TableIdForSource(g.Id))] != "" { + continue + } + + prefix := "| " + if len(groups) == 1 { + prefix = " " + } + + exprs[int(TableIdForSource(g.Id))] = g.CostTreeString(prefix) + newGroups = append(newGroups, g.children()...) + } + groups = newGroups + } + b := strings.Builder{} + b.WriteString("costed memo:\n") + beg := "├──" + for i, g := range exprs { + if i == len(exprs)-1 { + beg = "└──" + } + b.WriteString(fmt.Sprintf("%s G%d: %s\n", beg, i+1, g)) + } + return b.String() +} type tableProps struct { grpToName map[GroupId]string @@ -981,28 +1011,3 @@ func FormatExpr(r exprType, s fmt.State, verb rune) { panic(fmt.Sprintf("unknown RelExpr type: %T", r)) } } - -// Format implements fmt.Formatter and can be formatted by the fmt package. The -// following verbs are supported -// -// %s print the error. If the error has a Cause it will be -// printed recursively -// %v see %s -// %+v extended format. Each Frame of the error's StackTrace will -// be printed in detail. -// func FormatExpr(s fmt.State, verb rune) { -// switch verb { -// case 'v': -// if s.Flag('+') { -// io.WriteString(s, err.Error()+"\n") -// err.stack.Format(s, verb) -// return -// } -// -// fallthrough -// case 's': -// io.WriteString(s, err.Error()) -// case 'q': -// fmt.Fprintf(s, "%q", err.Error()) -// } -// } From 8d8f8003a679d5fadc1d4c8ae38d6feedfb49eba Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Tue, 28 Oct 2025 17:06:26 -0700 Subject: [PATCH 18/30] eliminated duplicates from ExprGroup.children, more logging changes --- sql/memo/expr_group.go | 29 ++++++++++++++--------------- sql/memo/memo.go | 23 +++++++++++++++++------ sql/memo/rel_props.go | 4 ++-- sql/memo/select_hints.go | 2 +- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index a59031322c..406679bdc0 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -17,6 +17,8 @@ package memo import ( "fmt" "io" + "iter" + "maps" "sort" "strings" @@ -27,12 +29,11 @@ import ( // ExprGroup is a linked list of plans that return the same result set // defined by row count and schema. type ExprGroup struct { - m *Memo - RelProps *relProps - First RelExpr - Best RelExpr - _children []*ExprGroup - Cost float64 + m *Memo + RelProps *relProps + First RelExpr + Best RelExpr + Cost float64 Id GroupId Done bool @@ -98,16 +99,14 @@ func (e *ExprGroup) Prepend(rel RelExpr) { // children returns a unioned list of child ExprGroup for // every logical plan in this group. -func (e *ExprGroup) children() []*ExprGroup { - relExpr, ok := e.First.(RelExpr) - if !ok { - return e.children() - } - children := make([]*ExprGroup, 0) - for n := relExpr; n != nil; n = n.Next() { - children = append(children, n.Children()...) +func (e *ExprGroup) children() iter.Seq[*ExprGroup] { + children := make(map[GroupId]*ExprGroup) + for n := e.First; n != nil; n = n.Next() { + for _, n := range n.Children() { + children[n.Id] = n + } } - return children + return maps.Values(children) } // updateBest updates a group's Best to the given expression if the cost is lower than the current best. diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 3ca74db51f..3f4d706462 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -17,6 +17,7 @@ package memo import ( "fmt" "io" + "slices" "strings" "github.com/dolthub/go-mysql-server/sql" @@ -628,7 +629,7 @@ func (m *Memo) String() string { continue } exprs[int(TableIdForSource(g.Id))] = g.String() - newGroups = append(newGroups, g.children()...) + slices.AppendSeq(newGroups, g.children()) } groups = newGroups } @@ -647,12 +648,23 @@ func (m *Memo) String() string { func (m *Memo) CostDebugString() interface{} { exprs := make([]string, m.cnt) groups := make([]*ExprGroup, 0) + uniqGroups := make(map[GroupId]bool) + + b := strings.Builder{} + b.WriteString(fmt.Sprintf("costed memo (root group %d):\n", m.root.Id)) + if m.root != nil { for r := m.root.First; r != nil; r = r.Next() { groups = append(groups, r.Group()) - groups = append(groups, r.Children()...) + uniqGroups[r.Group().Id] = true + for _, c := range r.Children() { + uniqGroups[c.Id] = true + groups = append(groups, c) + } } } + + numGroups := len(uniqGroups) for len(groups) > 0 { newGroups := make([]*ExprGroup, 0) for _, g := range groups { @@ -661,17 +673,16 @@ func (m *Memo) CostDebugString() interface{} { } prefix := "| " - if len(groups) == 1 { + if int(g.Id) == numGroups { prefix = " " } exprs[int(TableIdForSource(g.Id))] = g.CostTreeString(prefix) - newGroups = append(newGroups, g.children()...) + slices.AppendSeq(newGroups, g.children()) } groups = newGroups } - b := strings.Builder{} - b.WriteString("costed memo:\n") + beg := "├──" for i, g := range exprs { if i == len(exprs)-1 { diff --git a/sql/memo/rel_props.go b/sql/memo/rel_props.go index e0ebc2c0b3..2601e61f42 100644 --- a/sql/memo/rel_props.go +++ b/sql/memo/rel_props.go @@ -319,7 +319,7 @@ func (m *Memo) CardMemoGroups(ctx *sql.Context, g *ExprGroup) { if g.RelProps.stat != nil { return } - for _, g := range g.children() { + for g := range g.children() { m.CardMemoGroups(ctx, g) } s := m.statsForRel(ctx, g.First) @@ -329,7 +329,7 @@ func (m *Memo) CardMemoGroups(ctx *sql.Context, g *ExprGroup) { func (m *Memo) statsForRel(ctx *sql.Context, rel RelExpr) sql.Statistic { m.Tracer.PushDebugContext("statsForRel") defer m.Tracer.PopDebugContext() - + var stat sql.Statistic switch rel := rel.(type) { case JoinRel: diff --git a/sql/memo/select_hints.go b/sql/memo/select_hints.go index c197181453..a3a69c3def 100644 --- a/sql/memo/select_hints.go +++ b/sql/memo/select_hints.go @@ -199,7 +199,7 @@ func (o joinOrderHint) build(grp *ExprGroup) { } o.groups[grp.Id] = s - for _, g := range grp.children() { + for g := range grp.children() { if _, ok := o.groups[g.Id]; !ok { // avoid duplicate work o.build(g) From e3fd09c25c71116b4c69aabf98c661603f9453b2 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 29 Oct 2025 11:08:46 -0700 Subject: [PATCH 19/30] bug fix for children(), added iterator method --- sql/memo/expr_group.go | 6 ++---- sql/memo/memo.go | 39 ++++++++++++++++++++++----------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index 406679bdc0..e0a3dcd4d3 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -101,7 +101,7 @@ func (e *ExprGroup) Prepend(rel RelExpr) { // every logical plan in this group. func (e *ExprGroup) children() iter.Seq[*ExprGroup] { children := make(map[GroupId]*ExprGroup) - for n := e.First; n != nil; n = n.Next() { + for n := range IterRelExprs(e.First) { for _, n := range n.Children() { children[n.Id] = n } @@ -231,11 +231,9 @@ func (e *ExprGroup) String() string { // CostTreeString returns a string representation of the expression group for use in cost debug printing func (e *ExprGroup) CostTreeString(prefix string) string { b := strings.Builder{} - n := e.First costSortedGroups := make([]RelExpr, 0) - for n != nil { + for n := e.First; n != nil; n = n.Next() { costSortedGroups = append(costSortedGroups, n) - n = n.Next() } sort.Slice(costSortedGroups, func(i, j int) bool { return costSortedGroups[i].Cost() < costSortedGroups[j].Cost() diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 3f4d706462..655449ed66 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -17,6 +17,7 @@ package memo import ( "fmt" "io" + "iter" "slices" "strings" @@ -617,11 +618,10 @@ func (m *Memo) String() string { exprs := make([]string, m.cnt) groups := make([]*ExprGroup, 0) if m.root != nil { - for r := m.root.First; r != nil; r = r.Next() { - groups = append(groups, r.Group()) - groups = append(groups, r.Children()...) - } + groups = append(groups, m.root.First.Group()) } + + // breadth-first traversal of memo groups via their children for len(groups) > 0 { newGroups := make([]*ExprGroup, 0) for _, g := range groups { @@ -629,7 +629,7 @@ func (m *Memo) String() string { continue } exprs[int(TableIdForSource(g.Id))] = g.String() - slices.AppendSeq(newGroups, g.children()) + newGroups = slices.AppendSeq(newGroups, g.children()) } groups = newGroups } @@ -648,23 +648,15 @@ func (m *Memo) String() string { func (m *Memo) CostDebugString() interface{} { exprs := make([]string, m.cnt) groups := make([]*ExprGroup, 0) - uniqGroups := make(map[GroupId]bool) b := strings.Builder{} b.WriteString(fmt.Sprintf("costed memo (root group %d):\n", m.root.Id)) if m.root != nil { - for r := m.root.First; r != nil; r = r.Next() { - groups = append(groups, r.Group()) - uniqGroups[r.Group().Id] = true - for _, c := range r.Children() { - uniqGroups[c.Id] = true - groups = append(groups, c) - } - } + groups = append(groups, m.root.First.Group()) } - numGroups := len(uniqGroups) + // breadth-first traversal of memo groups via their children for len(groups) > 0 { newGroups := make([]*ExprGroup, 0) for _, g := range groups { @@ -673,12 +665,12 @@ func (m *Memo) CostDebugString() interface{} { } prefix := "| " - if int(g.Id) == numGroups { + if int(g.Id) == int(m.cnt) { prefix = " " } exprs[int(TableIdForSource(g.Id))] = g.CostTreeString(prefix) - slices.AppendSeq(newGroups, g.children()) + newGroups = slices.AppendSeq(newGroups, g.children()) } groups = newGroups } @@ -767,6 +759,19 @@ func relKey(r RelExpr) uint64 { return uint64(key) } +// IterRelExprs returns an iterator over the linked list of RelExprs beginning at the head e +func IterRelExprs(e RelExpr) iter.Seq[RelExpr] { + curr := e + return func(yield func(RelExpr) bool) { + for curr != nil { + if !yield(curr) { + return + } + curr = curr.Next() + } + } +} + type distinctOp uint8 const ( From b1426ea360ea46794a226b3258555ffe5fd3debe Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 29 Oct 2025 11:38:04 -0700 Subject: [PATCH 20/30] New Iter method --- sql/memo/dfs.go | 5 ++--- sql/memo/expr_group.go | 17 +++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sql/memo/dfs.go b/sql/memo/dfs.go index 1990a5b02f..1b4b3520de 100644 --- a/sql/memo/dfs.go +++ b/sql/memo/dfs.go @@ -26,8 +26,8 @@ func dfsRelHelper(grp *ExprGroup, seen map[GroupId]struct{}, cb func(rel RelExpr } else { seen[grp.Id] = struct{}{} } - n := grp.First - for n != nil { + + for n := range IterRelExprs(grp.First) { for _, c := range n.Children() { err := dfsRelHelper(c, seen, cb) if err != nil { @@ -38,7 +38,6 @@ func dfsRelHelper(grp *ExprGroup, seen map[GroupId]struct{}, cb func(rel RelExpr if err != nil { return err } - n = n.Next() } return nil } diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index e0a3dcd4d3..a37acd3449 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -19,6 +19,7 @@ import ( "io" "iter" "maps" + "slices" "sort" "strings" @@ -97,11 +98,16 @@ func (e *ExprGroup) Prepend(rel RelExpr) { rel.SetNext(first) } +// Iter returns an iterator over the RelExprs in this ExprGroup. +func (e *ExprGroup) Iter() iter.Seq[RelExpr] { + return IterRelExprs(e.First) +} + // children returns a unioned list of child ExprGroup for // every logical plan in this group. func (e *ExprGroup) children() iter.Seq[*ExprGroup] { children := make(map[GroupId]*ExprGroup) - for n := range IterRelExprs(e.First) { + for n := range e.Iter() { for _, n := range n.Children() { children[n.Id] = n } @@ -197,9 +203,8 @@ func (e *ExprGroup) fixTableScanPath() bool { func (e *ExprGroup) String() string { b := strings.Builder{} - n := e.First sep := "" - for n != nil { + for n := range e.Iter() { b.WriteString(sep) b.WriteString(fmt.Sprintf("(%s", n)) if e.Best != nil { @@ -223,7 +228,6 @@ func (e *ExprGroup) String() string { b.WriteString(")") } sep = " " - n = n.Next() } return b.String() } @@ -231,10 +235,7 @@ func (e *ExprGroup) String() string { // CostTreeString returns a string representation of the expression group for use in cost debug printing func (e *ExprGroup) CostTreeString(prefix string) string { b := strings.Builder{} - costSortedGroups := make([]RelExpr, 0) - for n := e.First; n != nil; n = n.Next() { - costSortedGroups = append(costSortedGroups, n) - } + costSortedGroups := slices.Collect(e.Iter()) sort.Slice(costSortedGroups, func(i, j int) bool { return costSortedGroups[i].Cost() < costSortedGroups[j].Cost() }) From 2d17565c908335909403ed66cd424f318e8f0fcc Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 29 Oct 2025 13:08:14 -0700 Subject: [PATCH 21/30] physical plan debug string --- sql/analyzer/indexed_joins.go | 2 ++ sql/memo/dfs.go | 2 +- sql/memo/expr_group.go | 14 ++++++++++++++ sql/memo/memo.go | 14 ++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index e0db01bd83..e9264a3e54 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -234,6 +234,8 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco scope.JoinTrees = append(scope.JoinTrees, m.String()) } + m.Tracer.Log("Best root plan: %s", m.BestPlanDebugString()) + return m.BestRootPlan(ctx) } diff --git a/sql/memo/dfs.go b/sql/memo/dfs.go index 1b4b3520de..1e07392abc 100644 --- a/sql/memo/dfs.go +++ b/sql/memo/dfs.go @@ -27,7 +27,7 @@ func dfsRelHelper(grp *ExprGroup, seen map[GroupId]struct{}, cb func(rel RelExpr seen[grp.Id] = struct{}{} } - for n := range IterRelExprs(grp.First) { + for n := range grp.Iter() { for _, c := range n.Children() { err := dfsRelHelper(c, seen, cb) if err != nil { diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index a37acd3449..e1e17f1b7e 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -261,3 +261,17 @@ func (e *ExprGroup) CostTreeString(prefix string) string { return b.String() } + +// BestPlanDebugString returns a string representation of the physical best plan for use in cost debug printing +func (e *ExprGroup) BestPlanDebugString() string { + tp := sql.NewTreePrinter() + tp.WriteNode(fmt.Sprintf("G%d [%s] Cost: %.1f", e.Id, e.Best)) + children := e.Best.Children() + childrenStrings := make([]string, len(children)) + for i, c := range children { + childrenStrings[i] = c.BestPlanDebugString() + } + + tp.WriteChildren(childrenStrings...) + return tp.String() +} diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 655449ed66..463824e603 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -645,6 +645,9 @@ func (m *Memo) String() string { return b.String() } +// CostDebugString returns a string representation of the memo with cost +// information for each expression, ordered by best to worst for each group, +// displayed in a tree structure. func (m *Memo) CostDebugString() interface{} { exprs := make([]string, m.cnt) groups := make([]*ExprGroup, 0) @@ -685,6 +688,17 @@ func (m *Memo) CostDebugString() interface{} { return b.String() } +// BestPlanDebugString returns a physical tree representation of the best plan for each group in the tree that is +// referenced by the best plan in the root. This differs from other debug strings in that it represents the groups +// as children of their parents, rather than as a flat list, and only includes groups that are part of the best plan. +func (m *Memo) BestPlanDebugString() interface{} { + if m.root == nil { + return "" + } + + return m.root.BestPlanDebugString() +} + type tableProps struct { grpToName map[GroupId]string nameToGrp map[string]GroupId From 1c3a130e340f6f0f1d0da6e586b49b2ab1e2f571 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 29 Oct 2025 13:10:24 -0700 Subject: [PATCH 22/30] bug fixes --- sql/analyzer/indexed_joins.go | 4 ++-- sql/memo/expr_group.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index e9264a3e54..266b1f8660 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -234,8 +234,8 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco scope.JoinTrees = append(scope.JoinTrees, m.String()) } - m.Tracer.Log("Best root plan: %s", m.BestPlanDebugString()) - + m.Tracer.Log("Best root plan:\n%s", m.BestPlanDebugString()) + return m.BestRootPlan(ctx) } diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index e1e17f1b7e..f0c99ee250 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -265,7 +265,7 @@ func (e *ExprGroup) CostTreeString(prefix string) string { // BestPlanDebugString returns a string representation of the physical best plan for use in cost debug printing func (e *ExprGroup) BestPlanDebugString() string { tp := sql.NewTreePrinter() - tp.WriteNode(fmt.Sprintf("G%d [%s] Cost: %.1f", e.Id, e.Best)) + tp.WriteNode(fmt.Sprintf("G%d [%s] Cost: %.1f", e.Id, e.Best, e.Best.Cost())) children := e.Best.Children() childrenStrings := make([]string, len(children)) for i, c := range children { From 0e07997129004ca243d6fa8a907b96eec8ad4d65 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 29 Oct 2025 13:30:18 -0700 Subject: [PATCH 23/30] formatting --- sql/analyzer/indexed_joins.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 266b1f8660..a01e561f13 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -225,7 +225,7 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco if err != nil { return nil, err } - m.Tracer.Log("Completed cost-based optimization: %s", m.CostDebugString()) + m.Tracer.Log("Completed cost-based optimization:\n%s", m.CostDebugString()) if a.Verbose && a.Debug { a.Log("%s", m.String()) @@ -853,12 +853,12 @@ func addHashJoins(m *memo.Memo) error { switch f := f.(type) { case *expression.Equals: if satisfiesScalarRefs(f.Left(), join.Left.RelProps.OutputTables()) && - satisfiesScalarRefs(f.Right(), join.Right.RelProps.OutputTables()) { + satisfiesScalarRefs(f.Right(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Right()) toExpr = append(toExpr, f.Left()) m.Tracer.Log("Filter %s: found a left->right hash key mapping", f) } else if satisfiesScalarRefs(f.Right(), join.Left.RelProps.OutputTables()) && - satisfiesScalarRefs(f.Left(), join.Right.RelProps.OutputTables()) { + satisfiesScalarRefs(f.Left(), join.Right.RelProps.OutputTables()) { fromExpr = append(fromExpr, f.Left()) toExpr = append(toExpr, f.Right()) m.Tracer.Log("Filter %s: found a right->left hash key mapping", f) @@ -997,8 +997,8 @@ func addRangeHeapJoin(m *memo.Memo) error { for _, filter := range getRangeFilters(join.Filter) { if !(satisfiesScalarRefs(filter.value, join.Left.RelProps.OutputTables()) && - satisfiesScalarRefs(filter.min, join.Right.RelProps.OutputTables()) && - satisfiesScalarRefs(filter.max, join.Right.RelProps.OutputTables())) { + satisfiesScalarRefs(filter.min, join.Right.RelProps.OutputTables()) && + satisfiesScalarRefs(filter.max, join.Right.RelProps.OutputTables())) { return nil } // For now, only match expressions that are exactly a column reference. @@ -1127,7 +1127,7 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { r := eq.Right() if !expressionReferencesOneColumn(l) || - !expressionReferencesOneColumn(r) { + !expressionReferencesOneColumn(r) { continue } @@ -1138,10 +1138,10 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { var swap bool if expressionReferencesTable(l, leftTabId) && - expressionReferencesTable(r, rightTabId) { + expressionReferencesTable(r, rightTabId) { } else if expressionReferencesTable(r, leftTabId) && - expressionReferencesTable(l, rightTabId) { + expressionReferencesTable(l, rightTabId) { swap = true l, r = r, l } else { From 36fbcde059a62ce4fbfadd85a026f993f2f57097 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 29 Oct 2025 15:21:18 -0700 Subject: [PATCH 24/30] Added new pseudo-extension point, which was why Doltgres was not finding indexes for certain queries --- sql/analyzer/analyzer.go | 3 +++ sql/memo/join_order_builder.go | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sql/analyzer/analyzer.go b/sql/analyzer/analyzer.go index f545ef4209..0e710c26e3 100644 --- a/sql/analyzer/analyzer.go +++ b/sql/analyzer/analyzer.go @@ -36,6 +36,7 @@ import ( const debugAnalyzerKey = "DEBUG_ANALYZER" const verboseAnalyzerKey = "VERBOSE_ANALYZER" +const traceAnalyzerKey = "TRACE_ANALYZER" const maxAnalysisIterations = 8 @@ -215,6 +216,7 @@ func (s simpleLogFormatter) Format(entry *logrus.Entry) ([]byte, error) { func (ab *Builder) Build() *Analyzer { _, debug := os.LookupEnv(debugAnalyzerKey) _, verbose := os.LookupEnv(verboseAnalyzerKey) + _, trace := os.LookupEnv(traceAnalyzerKey) var batches = []*Batch{ { Desc: "pre-analyzer", @@ -266,6 +268,7 @@ func (ab *Builder) Build() *Analyzer { return &Analyzer{ Debug: debug || ab.debug, Verbose: verbose, + Trace: trace, contextStack: make([]string, 0), Batches: batches, Catalog: NewCatalog(ab.provider), diff --git a/sql/memo/join_order_builder.go b/sql/memo/join_order_builder.go index e7db698871..ee8b0ac6eb 100644 --- a/sql/memo/join_order_builder.go +++ b/sql/memo/join_order_builder.go @@ -26,6 +26,10 @@ import ( "github.com/dolthub/go-mysql-server/sql/plan" ) +// SplitConjunction is a pseudo-extension point of expression.SplitConjunction, used to alter the logic +// for different integrators. +var SplitConjunction func(expr sql.Expression) []sql.Expression = expression.SplitConjunction + // joinOrderBuilder enumerates valid plans for a join tree. We build the join // tree bottom up, first joining single nodes with join condition "edges", then // single nodes to hypernodes (1+n), and finally hyper nodes to @@ -510,7 +514,7 @@ func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { rightEdges: rightE, } - filters := expression.SplitConjunction(n.JoinCond()) + filters := SplitConjunction(n.JoinCond()) j.m.Tracer.Log("Join filters: %v", filters) union := leftV.union(rightV) group, ok := j.plans[union] @@ -538,7 +542,7 @@ func (j *joinOrderBuilder) buildFilter(child sql.Node, e sql.Expression) (vertex // memoize child childV, childE, childGrp := j.populateSubgraph(child) - filterGrp := j.m.MemoizeFilter(nil, childGrp, expression.SplitConjunction(e)) + filterGrp := j.m.MemoizeFilter(nil, childGrp, SplitConjunction(e)) // filter will absorb child relation for join reordering j.plans[childV] = filterGrp From 8e875555cc5665ee0904b075a2adcbc7dacca52e Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Tue, 4 Nov 2025 17:03:39 -0800 Subject: [PATCH 25/30] Fixed test --- sql/analyzer/indexed_joins_test.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/analyzer/indexed_joins_test.go b/sql/analyzer/indexed_joins_test.go index 1f5061c6ed..08602f816a 100644 --- a/sql/analyzer/indexed_joins_test.go +++ b/sql/analyzer/indexed_joins_test.go @@ -40,16 +40,16 @@ func TestHashJoins(t *testing.T) { memo: `memo: ├── G1: (tablescan: ab) ├── G2: (tablescan: xy) -├── G3: (hashjoin 1 2) (hashjoin 2 1) (innerjoin 2 1) (innerjoin 1 2) +├── G3: (hashjoin 1[ab] 2[xy]) (hashjoin 2[xy] 1[ab]) (innerjoin 2[xy] 1[ab]) (innerjoin 1[ab] 2[xy]) ├── G4: (tablescan: pq) -├── G5: (hashjoin 3 4) (hashjoin 1 9) (hashjoin 9 1) (hashjoin 2 8) (hashjoin 8 2) (hashjoin 4 3) (innerjoin 4 3) (innerjoin 8 2) (innerjoin 2 8) (innerjoin 9 1) (innerjoin 1 9) (innerjoin 3 4) +├── G5: (hashjoin 3 4[pq]) (hashjoin 1[ab] 9) (hashjoin 9 1[ab]) (hashjoin 2[xy] 8) (hashjoin 8 2[xy]) (hashjoin 4[pq] 3) (innerjoin 4[pq] 3) (innerjoin 8 2[xy]) (innerjoin 2[xy] 8) (innerjoin 9 1[ab]) (innerjoin 1[ab] 9) (innerjoin 3 4[pq]) ├── G6: (tablescan: uv) -├── G7: (hashjoin 5 6) (hashjoin 1 12) (hashjoin 12 1) (hashjoin 2 11) (hashjoin 11 2) (hashjoin 3 10) (hashjoin 10 3) (hashjoin 6 5) (innerjoin 6 5) (innerjoin 10 3) (innerjoin 3 10) (innerjoin 11 2) (innerjoin 2 11) (innerjoin 12 1) (innerjoin 1 12) (innerjoin 5 6) -├── G8: (hashjoin 1 4) (hashjoin 4 1) (innerjoin 4 1) (innerjoin 1 4) -├── G9: (hashjoin 2 4) (hashjoin 4 2) (innerjoin 4 2) (innerjoin 2 4) -├── G10: (hashjoin 4 6) (hashjoin 6 4) (innerjoin 6 4) (innerjoin 4 6) -├── G11: (hashjoin 1 10) (hashjoin 10 1) (hashjoin 8 6) (hashjoin 6 8) (innerjoin 6 8) (innerjoin 8 6) (innerjoin 10 1) (innerjoin 1 10) -└── G12: (hashjoin 2 10) (hashjoin 10 2) (hashjoin 9 6) (hashjoin 6 9) (innerjoin 6 9) (innerjoin 9 6) (innerjoin 10 2) (innerjoin 2 10) +├── G7: (hashjoin 5 6[uv]) (hashjoin 1[ab] 12) (hashjoin 12 1[ab]) (hashjoin 2[xy] 11) (hashjoin 11 2[xy]) (hashjoin 3 10) (hashjoin 10 3) (hashjoin 6[uv] 5) (innerjoin 6[uv] 5) (innerjoin 10 3) (innerjoin 3 10) (innerjoin 11 2[xy]) (innerjoin 2[xy] 11) (innerjoin 12 1[ab]) (innerjoin 1[ab] 12) (innerjoin 5 6[uv]) +├── G8: (hashjoin 1[ab] 4[pq]) (hashjoin 4[pq] 1[ab]) (innerjoin 4[pq] 1[ab]) (innerjoin 1[ab] 4[pq]) +├── G9: (hashjoin 2[xy] 4[pq]) (hashjoin 4[pq] 2[xy]) (innerjoin 4[pq] 2[xy]) (innerjoin 2[xy] 4[pq]) +├── G10: (hashjoin 4[pq] 6[uv]) (hashjoin 6[uv] 4[pq]) (innerjoin 6[uv] 4[pq]) (innerjoin 4[pq] 6[uv]) +├── G11: (hashjoin 1[ab] 10) (hashjoin 10 1[ab]) (hashjoin 8 6[uv]) (hashjoin 6[uv] 8) (innerjoin 6[uv] 8) (innerjoin 8 6[uv]) (innerjoin 10 1[ab]) (innerjoin 1[ab] 10) +└── G12: (hashjoin 2[xy] 10) (hashjoin 10 2[xy]) (hashjoin 9 6[uv]) (hashjoin 6[uv] 9) (innerjoin 6[uv] 9) (innerjoin 9 6[uv]) (innerjoin 10 2[xy]) (innerjoin 2[xy] 10) `, }, } From 7b77937fb59be1d109921fd71aef51981b95b117 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Tue, 4 Nov 2025 17:37:04 -0800 Subject: [PATCH 26/30] fixed vet errors --- optgen/cmd/support/memo_gen.go | 2 +- sql/memo/expr_group.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/optgen/cmd/support/memo_gen.go b/optgen/cmd/support/memo_gen.go index fcfde8e95c..356f79e3a5 100644 --- a/optgen/cmd/support/memo_gen.go +++ b/optgen/cmd/support/memo_gen.go @@ -132,7 +132,7 @@ func (g *MemoGen) genScalarInterfaces(defn ExprDef) { func (g *MemoGen) genStringer(defn ExprDef) { fmt.Fprintf(g.w, "func (r *%s) String() string {\n", defn.Name) - fmt.Fprint(g.w, " return fmt.Sprintf(\"%s\", r)\n") + fmt.Fprintf(g.w, " return fmt.Sprintf(\"%%s\", r)\n") fmt.Fprintf(g.w, "}\n\n") } diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index f0c99ee250..a98be55307 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -265,7 +265,7 @@ func (e *ExprGroup) CostTreeString(prefix string) string { // BestPlanDebugString returns a string representation of the physical best plan for use in cost debug printing func (e *ExprGroup) BestPlanDebugString() string { tp := sql.NewTreePrinter() - tp.WriteNode(fmt.Sprintf("G%d [%s] Cost: %.1f", e.Id, e.Best, e.Best.Cost())) + tp.WriteNode("G%d [%s] Cost: %.1f", e.Id, e.Best, e.Best.Cost()) children := e.Best.Children() childrenStrings := make([]string, len(children)) for i, c := range children { From b75f3b721971e7488966798c282fccd379f97983 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 5 Nov 2025 12:58:47 -0800 Subject: [PATCH 27/30] Fixed tests --- optgen/cmd/support/memo_gen_test.go | 35 ++++++------- sql/memo/join_order_builder_test.go | 78 ++++++++++++++--------------- 2 files changed, 57 insertions(+), 56 deletions(-) diff --git a/optgen/cmd/support/memo_gen_test.go b/optgen/cmd/support/memo_gen_test.go index 4b96aa4baf..ef1f7f3e3d 100644 --- a/optgen/cmd/support/memo_gen_test.go +++ b/optgen/cmd/support/memo_gen_test.go @@ -12,7 +12,7 @@ func TestMemoGen(t *testing.T) { expected string }{ expected: ` - import ( + import ( "fmt" "strings" "github.com/dolthub/go-mysql-server/sql" @@ -26,10 +26,16 @@ func TestMemoGen(t *testing.T) { } var _ RelExpr = (*hashJoin)(nil) + var _ fmt.Formatter = (*hashJoin)(nil) + var _ fmt.Stringer = (*hashJoin)(nil) var _ JoinRel = (*hashJoin)(nil) func (r *hashJoin) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) + } + + func (r *hashJoin) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *hashJoin) JoinPrivate() *JoinBase { @@ -42,10 +48,16 @@ func TestMemoGen(t *testing.T) { } var _ RelExpr = (*tableScan)(nil) + var _ fmt.Formatter = (*tableScan)(nil) + var _ fmt.Stringer = (*tableScan)(nil) var _ SourceRel = (*tableScan)(nil) func (r *tableScan) String() string { - return FormatExpr(r) + return fmt.Sprintf("%s", r) + } + + func (r *tableScan) Format(s fmt.State, verb rune) { + FormatExpr(r, s, verb) } func (r *tableScan) Name() string { @@ -68,17 +80,6 @@ func TestMemoGen(t *testing.T) { return nil } - func FormatExpr(r exprType) string { - switch r := r.(type) { - case *hashJoin: - return fmt.Sprintf("hashjoin %d %d", r.Left.Id, r.Right.Id) - case *tableScan: - return fmt.Sprintf("tablescan: %s", r.Name()) - default: - panic(fmt.Sprintf("unknown RelExpr type: %T", r)) - } - } - func buildRelExpr(b *ExecBuilder, r RelExpr, children ...sql.Node) (sql.Node, error) { var result sql.Node var err error @@ -96,9 +97,9 @@ func TestMemoGen(t *testing.T) { return nil, err } - if withDescribeStats, ok := result.(sql.WithDescribeStats); ok { - withDescribeStats.SetDescribeStats(*DescribeStats(r)) - } + if withDescribeStats, ok := result.(sql.WithDescribeStats); ok { + withDescribeStats.SetDescribeStats(*DescribeStats(r)) + } result, err = r.Group().finalize(result) if err != nil { return nil, err diff --git a/sql/memo/join_order_builder_test.go b/sql/memo/join_order_builder_test.go index 8ea10875c6..a0dd6b63ab 100644 --- a/sql/memo/join_order_builder_test.go +++ b/sql/memo/join_order_builder_test.go @@ -57,19 +57,19 @@ func TestJoinOrderBuilder(t *testing.T) { plans: `memo: ├── G1: (tablescan: a) ├── G2: (tablescan: b) -├── G3: (innerjoin 2 1) (innerjoin 1 2) +├── G3: (innerjoin 2[b] 1[a]) (innerjoin 1[a] 2[b]) ├── G4: (tablescan: c) -├── G5: (innerjoin 4 3) (innerjoin 8 2) (innerjoin 2 8) (innerjoin 9 1) (innerjoin 1 9) (innerjoin 3 4) +├── G5: (innerjoin 4[c] 3) (innerjoin 8 2[b]) (innerjoin 2[b] 8) (innerjoin 9 1[a]) (innerjoin 1[a] 9) (innerjoin 3 4[c]) ├── G6: (tablescan: d) -├── G7: (innerjoin 6 5) (innerjoin 10 9) (innerjoin 9 10) (innerjoin 11 8) (innerjoin 8 11) (innerjoin 12 4) (innerjoin 4 12) (innerjoin 13 3) (innerjoin 3 13) (innerjoin 14 2) (innerjoin 2 14) (innerjoin 15 1) (innerjoin 1 15) (innerjoin 5 6) -├── G8: (innerjoin 4 1) (innerjoin 1 4) -├── G9: (innerjoin 4 2) (innerjoin 2 4) -├── G10: (innerjoin 6 1) (innerjoin 1 6) -├── G11: (innerjoin 6 2) (innerjoin 2 6) -├── G12: (innerjoin 6 3) (innerjoin 3 6) (innerjoin 10 2) (innerjoin 2 10) (innerjoin 11 1) (innerjoin 1 11) -├── G13: (innerjoin 6 4) (innerjoin 4 6) -├── G14: (innerjoin 6 8) (innerjoin 8 6) (innerjoin 10 4) (innerjoin 4 10) (innerjoin 13 1) (innerjoin 1 13) -└── G15: (innerjoin 6 9) (innerjoin 9 6) (innerjoin 11 4) (innerjoin 4 11) (innerjoin 13 2) (innerjoin 2 13) +├── G7: (innerjoin 6[d] 5) (innerjoin 10 9) (innerjoin 9 10) (innerjoin 11 8) (innerjoin 8 11) (innerjoin 12 4[c]) (innerjoin 4[c] 12) (innerjoin 13 3) (innerjoin 3 13) (innerjoin 14 2[b]) (innerjoin 2[b] 14) (innerjoin 15 1[a]) (innerjoin 1[a] 15) (innerjoin 5 6[d]) +├── G8: (innerjoin 4[c] 1[a]) (innerjoin 1[a] 4[c]) +├── G9: (innerjoin 4[c] 2[b]) (innerjoin 2[b] 4[c]) +├── G10: (innerjoin 6[d] 1[a]) (innerjoin 1[a] 6[d]) +├── G11: (innerjoin 6[d] 2[b]) (innerjoin 2[b] 6[d]) +├── G12: (innerjoin 6[d] 3) (innerjoin 3 6[d]) (innerjoin 10 2[b]) (innerjoin 2[b] 10) (innerjoin 11 1[a]) (innerjoin 1[a] 11) +├── G13: (innerjoin 6[d] 4[c]) (innerjoin 4[c] 6[d]) +├── G14: (innerjoin 6[d] 8) (innerjoin 8 6[d]) (innerjoin 10 4[c]) (innerjoin 4[c] 10) (innerjoin 13 1[a]) (innerjoin 1[a] 13) +└── G15: (innerjoin 6[d] 9) (innerjoin 9 6[d]) (innerjoin 11 4[c]) (innerjoin 4[c] 11) (innerjoin 13 2[b]) (innerjoin 2[b] 13) `, }, { @@ -102,32 +102,32 @@ func TestJoinOrderBuilder(t *testing.T) { plans: `memo: ├── G1: (tablescan: a) ├── G2: (tablescan: b) -├── G3: (leftjoin 1 2) +├── G3: (leftjoin 1[a] 2[b]) ├── G4: (tablescan: c) ├── G5: (tablescan: d) -├── G6: (fullouterjoin 4 5) +├── G6: (fullouterjoin 4[c] 5[d]) ├── G7: (tablescan: e) -├── G8: (leftjoin 6 7) -├── G9: (innerjoin 8 3) (leftjoin 14 2) (innerjoin 3 8) +├── G8: (leftjoin 6 7[e]) +├── G9: (innerjoin 8 3) (leftjoin 14 2[b]) (innerjoin 3 8) ├── G10: (tablescan: f) ├── G11: (tablescan: g) -├── G12: (innerjoin 11 10) (innerjoin 10 11) -├── G13: (innerjoin 11 19) (innerjoin 19 11) (innerjoin 21 17) (innerjoin 17 21) (innerjoin 22 16) (innerjoin 16 22) (innerjoin 24 10) (innerjoin 10 24) (innerjoin 12 9) (innerjoin 26 8) (innerjoin 8 26) (innerjoin 27 3) (innerjoin 3 27) (leftjoin 28 2) (innerjoin 9 12) -├── G14: (innerjoin 8 1) (innerjoin 1 8) -├── G15: (innerjoin 10 1) (innerjoin 1 10) -├── G16: (innerjoin 10 3) (innerjoin 3 10) (leftjoin 15 2) -├── G17: (innerjoin 10 8) (innerjoin 8 10) -├── G18: (innerjoin 10 14) (innerjoin 14 10) (innerjoin 15 8) (innerjoin 8 15) (innerjoin 17 1) (innerjoin 1 17) -├── G19: (innerjoin 10 9) (innerjoin 9 10) (innerjoin 16 8) (innerjoin 8 16) (innerjoin 17 3) (innerjoin 3 17) (leftjoin 18 2) -├── G20: (innerjoin 11 1) (innerjoin 1 11) -├── G21: (innerjoin 11 3) (innerjoin 3 11) (leftjoin 20 2) -├── G22: (innerjoin 11 8) (innerjoin 8 11) -├── G23: (innerjoin 11 14) (innerjoin 14 11) (innerjoin 20 8) (innerjoin 8 20) (innerjoin 22 1) (innerjoin 1 22) -├── G24: (innerjoin 11 9) (innerjoin 9 11) (innerjoin 21 8) (innerjoin 8 21) (innerjoin 22 3) (innerjoin 3 22) (leftjoin 23 2) -├── G25: (innerjoin 11 15) (innerjoin 15 11) (innerjoin 20 10) (innerjoin 10 20) (innerjoin 12 1) (innerjoin 1 12) -├── G26: (innerjoin 11 16) (innerjoin 16 11) (innerjoin 21 10) (innerjoin 10 21) (innerjoin 12 3) (innerjoin 3 12) (leftjoin 25 2) -├── G27: (innerjoin 11 17) (innerjoin 17 11) (innerjoin 22 10) (innerjoin 10 22) (innerjoin 12 8) (innerjoin 8 12) -└── G28: (innerjoin 11 18) (innerjoin 18 11) (innerjoin 20 17) (innerjoin 17 20) (innerjoin 22 15) (innerjoin 15 22) (innerjoin 23 10) (innerjoin 10 23) (innerjoin 12 14) (innerjoin 14 12) (innerjoin 25 8) (innerjoin 8 25) (innerjoin 27 1) (innerjoin 1 27) +├── G12: (innerjoin 11[g] 10[f]) (innerjoin 10[f] 11[g]) +├── G13: (innerjoin 11[g] 19) (innerjoin 19 11[g]) (innerjoin 21 17) (innerjoin 17 21) (innerjoin 22 16) (innerjoin 16 22) (innerjoin 24 10[f]) (innerjoin 10[f] 24) (innerjoin 12 9) (innerjoin 26 8) (innerjoin 8 26) (innerjoin 27 3) (innerjoin 3 27) (leftjoin 28 2[b]) (innerjoin 9 12) +├── G14: (innerjoin 8 1[a]) (innerjoin 1[a] 8) +├── G15: (innerjoin 10[f] 1[a]) (innerjoin 1[a] 10[f]) +├── G16: (innerjoin 10[f] 3) (innerjoin 3 10[f]) (leftjoin 15 2[b]) +├── G17: (innerjoin 10[f] 8) (innerjoin 8 10[f]) +├── G18: (innerjoin 10[f] 14) (innerjoin 14 10[f]) (innerjoin 15 8) (innerjoin 8 15) (innerjoin 17 1[a]) (innerjoin 1[a] 17) +├── G19: (innerjoin 10[f] 9) (innerjoin 9 10[f]) (innerjoin 16 8) (innerjoin 8 16) (innerjoin 17 3) (innerjoin 3 17) (leftjoin 18 2[b]) +├── G20: (innerjoin 11[g] 1[a]) (innerjoin 1[a] 11[g]) +├── G21: (innerjoin 11[g] 3) (innerjoin 3 11[g]) (leftjoin 20 2[b]) +├── G22: (innerjoin 11[g] 8) (innerjoin 8 11[g]) +├── G23: (innerjoin 11[g] 14) (innerjoin 14 11[g]) (innerjoin 20 8) (innerjoin 8 20) (innerjoin 22 1[a]) (innerjoin 1[a] 22) +├── G24: (innerjoin 11[g] 9) (innerjoin 9 11[g]) (innerjoin 21 8) (innerjoin 8 21) (innerjoin 22 3) (innerjoin 3 22) (leftjoin 23 2[b]) +├── G25: (innerjoin 11[g] 15) (innerjoin 15 11[g]) (innerjoin 20 10[f]) (innerjoin 10[f] 20) (innerjoin 12 1[a]) (innerjoin 1[a] 12) +├── G26: (innerjoin 11[g] 16) (innerjoin 16 11[g]) (innerjoin 21 10[f]) (innerjoin 10[f] 21) (innerjoin 12 3) (innerjoin 3 12) (leftjoin 25 2[b]) +├── G27: (innerjoin 11[g] 17) (innerjoin 17 11[g]) (innerjoin 22 10[f]) (innerjoin 10[f] 22) (innerjoin 12 8) (innerjoin 8 12) +└── G28: (innerjoin 11[g] 18) (innerjoin 18 11[g]) (innerjoin 20 17) (innerjoin 17 20) (innerjoin 22 15) (innerjoin 15 22) (innerjoin 23 10[f]) (innerjoin 10[f] 23) (innerjoin 12 14) (innerjoin 14 12) (innerjoin 25 8) (innerjoin 8 25) (innerjoin 27 1[a]) (innerjoin 1[a] 27) `, }, { @@ -146,10 +146,10 @@ func TestJoinOrderBuilder(t *testing.T) { plans: `memo: ├── G1: (tablescan: a) ├── G2: (tablescan: c) -├── G3: (crossjoin 1 2) +├── G3: (crossjoin 1[a] 2[c]) ├── G4: (tablescan: b) -├── G5: (innerjoin 1 6) (innerjoin 6 1) (innerjoin 3 4) -└── G6: (innerjoin 4 2) (innerjoin 2 4) +├── G5: (innerjoin 1[a] 6) (innerjoin 6 1[a]) (innerjoin 3 4[b]) +└── G6: (innerjoin 4[b] 2[c]) (innerjoin 2[c] 4[b]) `, }, { @@ -173,12 +173,12 @@ func TestJoinOrderBuilder(t *testing.T) { plans: `memo: ├── G1: (tablescan: c) ├── G2: (tablescan: d) -├── G3: (innerjoin 1 2) (innerjoin 2 1) (innerjoin 1 2) +├── G3: (innerjoin 1[c] 2[d]) (innerjoin 2[d] 1[c]) (innerjoin 1[c] 2[d]) ├── G4: (tablescan: a) ├── G5: (tablescan: b) -├── G6: (innerjoin 4 5) -├── G7: (innerjoin 4 8) (innerjoin 8 4) (innerjoin 3 6) -└── G8: (innerjoin 5 3) (innerjoin 3 5) +├── G6: (innerjoin 4[a] 5[b]) +├── G7: (innerjoin 4[a] 8) (innerjoin 8 4[a]) (innerjoin 3 6) +└── G8: (innerjoin 5[b] 3) (innerjoin 3 5[b]) `, }, } From 117a9244ebaeb9ba39b285a310d02b9ef5c6378b Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 5 Nov 2025 15:08:37 -0800 Subject: [PATCH 28/30] PR feedback --- sql/memo/expr_group.go | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/sql/memo/expr_group.go b/sql/memo/expr_group.go index a98be55307..2c198f7e73 100644 --- a/sql/memo/expr_group.go +++ b/sql/memo/expr_group.go @@ -48,18 +48,8 @@ func (e *ExprGroup) Format(f fmt.State, verb rune) { expr = e.First } switch ex := expr.(type) { - case *TableAlias: - io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) - io.WriteString(f, "[") - io.WriteString(f, ex.Name()) - io.WriteString(f, "]") - case *TableScan: - io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) - io.WriteString(f, "[") - io.WriteString(f, ex.Name()) - io.WriteString(f, "]") - case *SubqueryAlias: - io.WriteString(f, fmt.Sprintf("%d", ex.Group().Id)) + case sql.Nameable: + io.WriteString(f, fmt.Sprintf("%d", expr.Group().Id)) io.WriteString(f, "[") io.WriteString(f, ex.Name()) io.WriteString(f, "]") From b4ce0d915a297ce6ac1a71c0a66b54dbee57a61a Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 5 Nov 2025 15:33:56 -0800 Subject: [PATCH 29/30] Reduce unnecessary string evaluation --- sql/analyzer/indexed_joins.go | 26 +++++++++++++++++--------- sql/memo/join_order_builder.go | 10 +++++----- sql/memo/memo.go | 12 ++++++------ sql/memo/trace_logger.go | 16 ++++++++++++---- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index a01e561f13..a718433714 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -216,16 +216,22 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco // Once we've enumerated all expression groups, we can apply hints. This must be done after expression // groups have been identified, so that the applied hints use the correct metadata. for _, h := range hints { - m.Tracer.Log("Applying hint: %s", h.Typ.String()) + m.Tracer.Log("Applying hint: %s", h.Typ) m.ApplyHint(h) } - m.Tracer.Log("Starting cost-based optimization for groups %s", m) + if m.Tracer.TraceEnabled { + m.Tracer.Log("Starting cost-based optimization for groups %s", m) + } + err = m.OptimizeRoot() if err != nil { return nil, err } - m.Tracer.Log("Completed cost-based optimization:\n%s", m.CostDebugString()) + + if m.Tracer.TraceEnabled { + m.Tracer.Log("Completed cost-based optimization:\n%s", m.CostDebugString()) + } if a.Verbose && a.Debug { a.Log("%s", m.String()) @@ -234,7 +240,9 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco scope.JoinTrees = append(scope.JoinTrees, m.String()) } - m.Tracer.Log("Best root plan:\n%s", m.BestPlanDebugString()) + if m.Tracer.TraceEnabled { + m.Tracer.Log("Best root plan:\n%s", m.BestPlanDebugString()) + } return m.BestRootPlan(ctx) } @@ -261,7 +269,7 @@ func addLookupJoins(ctx *sql.Context, m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { - m.Tracer.PushDebugContext(fmt.Sprintf("%+v", e)) + m.Tracer.PushDebugContextFmt("%+v", e) defer m.Tracer.PopDebugContext() var right *memo.ExprGroup @@ -665,7 +673,7 @@ func addRightSemiJoins(ctx *sql.Context, m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { - m.Tracer.PushDebugContext(fmt.Sprintf("%+v", e)) + m.Tracer.PushDebugContextFmt("%+v", e) defer m.Tracer.PopDebugContext() semi, ok := e.(*memo.SemiJoin) @@ -831,7 +839,7 @@ func addHashJoins(m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { - m.Tracer.PushDebugContext(fmt.Sprintf("%+v", e)) + m.Tracer.PushDebugContextFmt("%+v", e) defer m.Tracer.PopDebugContext() switch e.(type) { @@ -971,7 +979,7 @@ func addRangeHeapJoin(m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { - m.Tracer.PushDebugContext(fmt.Sprintf("%+v", e)) + m.Tracer.PushDebugContextFmt("%+v", e) defer m.Tracer.PopDebugContext() switch e.(type) { @@ -1082,7 +1090,7 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { defer m.Tracer.PopDebugContext() return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { - m.Tracer.PushDebugContext(fmt.Sprintf("%+v", e)) + m.Tracer.PushDebugContextFmt("%+v", e) defer m.Tracer.PopDebugContext() var join *memo.JoinBase diff --git a/sql/memo/join_order_builder.go b/sql/memo/join_order_builder.go index ee8b0ac6eb..ef4a03f3d3 100644 --- a/sql/memo/join_order_builder.go +++ b/sql/memo/join_order_builder.go @@ -529,10 +529,10 @@ func (j *joinOrderBuilder) buildJoinOp(n *plan.JoinNode) *ExprGroup { } if !isInner { - j.m.Tracer.Log("Building non-inner edge for join type: %s", typ.String()) + j.m.Tracer.Log("Building non-inner edge for join type: %s", typ) j.buildNonInnerEdge(op, filters...) } else { - j.m.Tracer.Log("Building inner edge for join type: %s", typ.String()) + j.m.Tracer.Log("Building inner edge for join type: %s", typ) j.buildInnerEdge(op, filters...) } return group @@ -700,7 +700,7 @@ func setPrinter(all, s1, s2 vertexSet) { // addPlans finds operators that let us join (s1 op s2) and (s2 op s1). func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { - j.m.Tracer.PushDebugContext(fmt.Sprintf("addPlans/%s<->%s", s1, s2)) + j.m.Tracer.PushDebugContextFmt("addPlans/%s<->%s", s1, s2) defer j.m.Tracer.PopDebugContext() // all inner filters could be applied @@ -736,14 +736,14 @@ func (j *joinOrderBuilder) addPlans(s1, s2 vertexSet) { for i, ok := j.nonInnerEdges.Next(0); ok; i, ok = j.nonInnerEdges.Next(i + 1) { e := &j.edges[i] if e.applicable(s1, s2) { - j.m.Tracer.Log("Found applicable non-inner edge %d, adding join: %s", i, e.op.joinType.String()) + j.m.Tracer.Log("Found applicable non-inner edge %d, adding join: %s", i, e.op.joinType) j.addJoin(e.op.joinType, s1, s2, e.filters, innerJoinFilters, e.joinIsRedundant(s1, s2)) return } if e.applicable(s2, s1) { // This is necessary because we only iterate s1 up to subset / 2 // in DPSube() - j.m.Tracer.Log("Found applicable non-inner edge %d (swapped), adding join: %s", i, e.op.joinType.String()) + j.m.Tracer.Log("Found applicable non-inner edge %d (swapped), adding join: %s", i, e.op.joinType) j.addJoin(e.op.joinType, s2, s1, e.filters, innerJoinFilters, e.joinIsRedundant(s2, s1)) return } diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 463824e603..049e40508d 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -77,7 +77,7 @@ func (m *Memo) HandleErr(err error) { } func (m *Memo) EnableTrace(enable bool) { - m.Tracer.traceEnabled = enable + m.Tracer.TraceEnabled = enable } func (m *Memo) Root() *ExprGroup { @@ -435,7 +435,7 @@ func (m *Memo) optimizeMemoGroup(grp *ExprGroup) error { return nil } - m.Tracer.PushDebugContext(fmt.Sprintf("optimizeMemoGroup/%d", grp.Id)) + m.Tracer.PushDebugContextFmt("optimizeMemoGroup/%d", grp.Id) defer m.Tracer.PopDebugContext() n := grp.First @@ -503,21 +503,21 @@ func (m *Memo) updateBest(grp *ExprGroup, n RelExpr, cost float64) { grp.Best = n grp.Cost = cost grp.HintOk = true - m.Tracer.Log("Set best plan for group %d to hinted plan %s with cost %.2f", grp.Id, n.String(), cost) + m.Tracer.Log("Set best plan for group %d to hinted plan %s with cost %.2f", grp.Id, n, cost) return } if grp.updateBest(n, cost) { - m.Tracer.Log("Updated best plan for group %d to hinted plan %s with cost %.2f", grp.Id, n.String(), cost) + m.Tracer.Log("Updated best plan for group %d to hinted plan %s with cost %.2f", grp.Id, n, cost) } } else if grp.Best == nil || !grp.HintOk { if grp.updateBest(n, cost) { - m.Tracer.Log("Updated best plan for group %d to plan %s with cost %.2f (no hints satisfied)", grp.Id, n.String(), cost) + m.Tracer.Log("Updated best plan for group %d to plan %s with cost %.2f (no hints satisfied)", grp.Id, n, cost) } } return } if grp.updateBest(n, cost) { - m.Tracer.Log("Updated best plan for group %d to plan %s with cost %.2f", grp.Id, n.String(), cost) + m.Tracer.Log("Updated best plan for group %d to plan %s with cost %.2f", grp.Id, n, cost) } } diff --git a/sql/memo/trace_logger.go b/sql/memo/trace_logger.go index b553d86c67..78e6cccbd9 100755 --- a/sql/memo/trace_logger.go +++ b/sql/memo/trace_logger.go @@ -24,21 +24,29 @@ import ( type TraceLogger struct { // A stack of debugger context. See PushDebugContext, PopDebugContext contextStack []string - traceEnabled bool + TraceEnabled bool } var log = logrus.New() // PushDebugContext pushes the given context string onto the context stack, to use when logging debug messages. func (a *TraceLogger) PushDebugContext(msg string) { - if a != nil && a.traceEnabled { + if a != nil && a.TraceEnabled { a.contextStack = append(a.contextStack, msg) } } +// PushDebugContextFmt pushes a formatted context string onto the context stack, to use when logging debug messages. +// Useful to avoid the cost of formatting when tracing is disabled. +func (a *TraceLogger) PushDebugContextFmt(fmtStr string, args ...any) { + if a != nil && a.TraceEnabled { + a.contextStack = append(a.contextStack, fmt.Sprintf(fmtStr, args...)) + } +} + // PopDebugContext pops a context message off the context stack. func (a *TraceLogger) PopDebugContext() { - if a != nil && len(a.contextStack) > 0 { + if a != nil && a.TraceEnabled && len(a.contextStack) > 0 { a.contextStack = a.contextStack[:len(a.contextStack)-1] } } @@ -46,7 +54,7 @@ func (a *TraceLogger) PopDebugContext() { // Log prints an INFO message to stdout with the given message and args // if the analyzer is in debug mode. func (a *TraceLogger) Log(msg string, args ...interface{}) { - if a != nil && a.traceEnabled { + if a != nil && a.TraceEnabled { if len(a.contextStack) > 0 { ctx := strings.Join(a.contextStack, "/") fmt.Printf("%s: "+msg+"\n", append([]interface{}{ctx}, args...)...) From 1633431524da6f3cea53a2fe2aa90f82c66ee706 Mon Sep 17 00:00:00 2001 From: Zach Musgrave Date: Wed, 5 Nov 2025 16:04:40 -0800 Subject: [PATCH 30/30] Better optional logging --- sql/analyzer/indexed_joins.go | 8 ++------ sql/memo/memo.go | 23 +++++++++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index a718433714..d701726dc9 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -229,9 +229,7 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco return nil, err } - if m.Tracer.TraceEnabled { - m.Tracer.Log("Completed cost-based optimization:\n%s", m.CostDebugString()) - } + m.LogCostDebugString() if a.Verbose && a.Debug { a.Log("%s", m.String()) @@ -240,9 +238,7 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco scope.JoinTrees = append(scope.JoinTrees, m.String()) } - if m.Tracer.TraceEnabled { - m.Tracer.Log("Best root plan:\n%s", m.BestPlanDebugString()) - } + m.LogBestPlanDebugString() return m.BestRootPlan(ctx) } diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 049e40508d..649c9c5d8a 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -645,10 +645,15 @@ func (m *Memo) String() string { return b.String() } -// CostDebugString returns a string representation of the memo with cost +// LogCostDebugString logs a string representation of the memo with cost // information for each expression, ordered by best to worst for each group, // displayed in a tree structure. -func (m *Memo) CostDebugString() interface{} { +// Only logs if tracing is enabled. +func (m *Memo) LogCostDebugString() { + if m.root == nil || !m.Tracer.TraceEnabled { + return + } + exprs := make([]string, m.cnt) groups := make([]*ExprGroup, 0) @@ -685,18 +690,20 @@ func (m *Memo) CostDebugString() interface{} { } b.WriteString(fmt.Sprintf("%s G%d: %s\n", beg, i+1, g)) } - return b.String() + + m.Tracer.Log("Completed cost-based optimization:\n%s", b.String()) } -// BestPlanDebugString returns a physical tree representation of the best plan for each group in the tree that is +// LogBestPlanDebugString logs a physical tree representation of the best plan for each group in the tree that is // referenced by the best plan in the root. This differs from other debug strings in that it represents the groups // as children of their parents, rather than as a flat list, and only includes groups that are part of the best plan. -func (m *Memo) BestPlanDebugString() interface{} { - if m.root == nil { - return "" +// Only logs if tracing is enabled. +func (m *Memo) LogBestPlanDebugString() { + if m.root == nil || !m.Tracer.TraceEnabled { + return } - return m.root.BestPlanDebugString() + m.Tracer.Log("Best root plan:\n%s", m.root.BestPlanDebugString()) } type tableProps struct {