Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a37da55
Added trace logging to join planning
zachmu Oct 17, 2025
ba4bc16
new trace logger
zachmu Oct 17, 2025
2eb911c
Fixing unnecessary boxing
zachmu Oct 24, 2025
3ce4000
better logging
zachmu Oct 24, 2025
642b73f
removed silly generated formatting code, now artisanally crafted by s…
zachmu Oct 24, 2025
417089a
Better debug string for index lookups
zachmu Oct 24, 2025
9e17536
more logging
zachmu Oct 24, 2025
7e5f816
Fixed bad comment, more logging
zachmu Oct 25, 2025
8c9193e
Better logging again
zachmu Oct 25, 2025
0e44b59
more logging improvements
zachmu Oct 27, 2025
17dd529
more formatting
zachmu Oct 27, 2025
2ad95c6
Restore accidentally deleted functionalty from generation
zachmu Oct 27, 2025
33503ff
logging
zachmu Oct 27, 2025
105e4f1
logging
zachmu Oct 27, 2025
befaca2
logging (is this thing on)
zachmu Oct 28, 2025
c8b42ad
logging fixes
zachmu Oct 28, 2025
adce768
better debug cost printing
zachmu Oct 28, 2025
8d8f800
eliminated duplicates from ExprGroup.children, more logging changes
zachmu Oct 29, 2025
e3fd09c
bug fix for children(), added iterator method
zachmu Oct 29, 2025
b1426ea
New Iter method
zachmu Oct 29, 2025
2d17565
physical plan debug string
zachmu Oct 29, 2025
1c3a130
bug fixes
zachmu Oct 29, 2025
0e07997
formatting
zachmu Oct 29, 2025
0c5806f
Merge branch 'main' into zachmu/join-debugging
zachmu Oct 29, 2025
36fbcde
Added new pseudo-extension point, which was why Doltgres was not find…
zachmu Oct 29, 2025
8e87555
Fixed test
zachmu Nov 5, 2025
7b77937
fixed vet errors
zachmu Nov 5, 2025
b75f3b7
Fixed tests
zachmu Nov 5, 2025
117a924
PR feedback
zachmu Nov 5, 2025
b4ce0d9
Reduce unnecessary string evaluation
zachmu Nov 5, 2025
1633431
Better optional logging
zachmu Nov 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 69 additions & 89 deletions optgen/cmd/support/memo_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,36 +42,36 @@ func DecodeMemoExprs(path string) (MemoExprs, error) {
var _ GenDefs = (*MemoExprs)(nil)

type MemoGen struct {
defines []ExprDef
w io.Writer
defns []ExprDef
w io.Writer
}

func (g *MemoGen) Generate(defines GenDefs, w io.Writer) {
g.defines = defines.(MemoExprs).Exprs
func (g *MemoGen) Generate(defns GenDefs, w io.Writer) {
g.defns = defns.(MemoExprs).Exprs

g.w = w

g.genImport()
for _, define := range g.defines {
g.genType(define)
g.genRelInterfaces(define)

g.genStringer(define)
if define.SourceType != "" {
g.genSourceRelInterface(define)
for _, defn := range g.defns {
g.genType(defn)
g.genRelInterfaces(defn)

g.genStringer(defn)
g.genFormatter(defn)
if defn.SourceType != "" {
g.genSourceRelInterface(defn)
}
if define.Join {
g.genJoinRelInterface(define)
} else if define.Binary {
g.genBinaryGroupInterface(define)
} else if define.Unary {
g.genUnaryGroupInterface(define)
if defn.Join {
g.genJoinRelInterface(defn)
} else if defn.Binary {
g.genBinaryGroupInterface(defn)
} else if defn.Unary {
g.genUnaryGroupInterface(defn)
} else {
g.genChildlessGroupInterface(define)
g.genChildlessGroupInterface(defn)
}
}
g.genFormatters(g.defines)

g.genBuildRelExpr(g.defns)
}

func (g *MemoGen) genImport() {
Expand All @@ -83,108 +83,116 @@ func (g *MemoGen) genImport() {
fmt.Fprintf(g.w, ")\n\n")
}

func (g *MemoGen) genType(define ExprDef) {
fmt.Fprintf(g.w, "type %s struct {\n", strings.Title(define.Name))
if define.SourceType != "" {
func (g *MemoGen) genType(defn ExprDef) {
fmt.Fprintf(g.w, "type %s struct {\n", strings.Title(defn.Name))
if defn.SourceType != "" {
fmt.Fprintf(g.w, " *sourceBase\n")
fmt.Fprintf(g.w, " Table %s\n", define.SourceType)
} else if define.Join {
fmt.Fprintf(g.w, " Table %s\n", defn.SourceType)
} else if defn.Join {
fmt.Fprintf(g.w, " *JoinBase\n")
} else if define.Unary {
} else if defn.Unary {
fmt.Fprintf(g.w, " *relBase\n")
fmt.Fprintf(g.w, " Child *ExprGroup\n")
} else if define.Binary {
} else if defn.Binary {
fmt.Fprintf(g.w, " *relBase\n")
fmt.Fprintf(g.w, " Left *ExprGroup\n")
fmt.Fprintf(g.w, " Right *ExprGroup\n")
}
for _, attr := range define.Attrs {
for _, attr := range defn.Attrs {
fmt.Fprintf(g.w, " %s %s\n", strings.Title(attr[0]), attr[1])
}

fmt.Fprintf(g.w, "}\n\n")
}

func (g *MemoGen) genRelInterfaces(define ExprDef) {
fmt.Fprintf(g.w, "var _ RelExpr = (*%s)(nil)\n", define.Name)
if define.SourceType != "" {
fmt.Fprintf(g.w, "var _ SourceRel = (*%s)(nil)\n", define.Name)
} else if define.Join {
fmt.Fprintf(g.w, "var _ JoinRel = (*%s)(nil)\n", define.Name)
} else if define.Unary || define.Binary {
func (g *MemoGen) genRelInterfaces(defn ExprDef) {
fmt.Fprintf(g.w, "var _ RelExpr = (*%s)(nil)\n", defn.Name)
fmt.Fprintf(g.w, "var _ fmt.Formatter = (*%s)(nil)\n", defn.Name)
fmt.Fprintf(g.w, "var _ fmt.Stringer = (*%s)(nil)\n", defn.Name)
if defn.SourceType != "" {
fmt.Fprintf(g.w, "var _ SourceRel = (*%s)(nil)\n", defn.Name)
} else if defn.Join {
fmt.Fprintf(g.w, "var _ JoinRel = (*%s)(nil)\n", defn.Name)
} else if defn.Unary || defn.Binary {
} else {
panic("unreachable")
}
fmt.Fprintf(g.w, "\n")
}

func (g *MemoGen) genScalarInterfaces(define ExprDef) {
fmt.Fprintf(g.w, "var _ ScalarExpr = (*%s)(nil)\n", define.Name)
func (g *MemoGen) genScalarInterfaces(defn ExprDef) {
fmt.Fprintf(g.w, "var _ ScalarExpr = (*%s)(nil)\n", defn.Name)

fmt.Fprintf(g.w, "\n")

fmt.Fprintf(g.w, "func (r *%s) ExprId() ScalarExprId {\n", define.Name)
fmt.Fprintf(g.w, " return ScalarExpr%s\n", strings.Title(define.Name))
fmt.Fprintf(g.w, "func (r *%s) ExprId() ScalarExprId {\n", defn.Name)
fmt.Fprintf(g.w, " return ScalarExpr%s\n", strings.Title(defn.Name))
fmt.Fprintf(g.w, "}\n\n")
}

func (g *MemoGen) genStringer(defn ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) String() string {\n", defn.Name)
fmt.Fprintf(g.w, " return fmt.Sprintf(\"%%s\", r)\n")
fmt.Fprintf(g.w, "}\n\n")
}

func (g *MemoGen) genStringer(define ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) String() string {\n", define.Name)
fmt.Fprintf(g.w, " return FormatExpr(r)\n")
func (g *MemoGen) genFormatter(defn ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) Format(s fmt.State, verb rune) {\n", defn.Name)
fmt.Fprintf(g.w, " FormatExpr(r, s, verb)\n")
fmt.Fprintf(g.w, "}\n\n")
}

func (g *MemoGen) genSourceRelInterface(define ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) Name() string {\n", define.Name)
if !define.SkipName {
func (g *MemoGen) genSourceRelInterface(defn ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) Name() string {\n", defn.Name)
if !defn.SkipName {
fmt.Fprintf(g.w, " return strings.ToLower(r.Table.Name())\n")
} else {
fmt.Fprintf(g.w, " return \"\"\n")
}
fmt.Fprintf(g.w, "}\n\n")

fmt.Fprintf(g.w, "func (r *%s) TableId() sql.TableId {\n", define.Name)
fmt.Fprintf(g.w, "func (r *%s) TableId() sql.TableId {\n", defn.Name)
fmt.Fprintf(g.w, " return TableIdForSource(r.g.Id)\n")
fmt.Fprintf(g.w, "}\n\n")

fmt.Fprintf(g.w, "func (r *%s) TableIdNode() plan.TableIdNode {\n", define.Name)
if define.SkipTableId {
fmt.Fprintf(g.w, "func (r *%s) TableIdNode() plan.TableIdNode {\n", defn.Name)
if defn.SkipTableId {
fmt.Fprintf(g.w, " return nil\n")
} else {
fmt.Fprintf(g.w, " return r.Table\n")
}
fmt.Fprintf(g.w, "}\n\n")

fmt.Fprintf(g.w, "func (r *%s) OutputCols() sql.Schema {\n", define.Name)
fmt.Fprintf(g.w, "func (r *%s) OutputCols() sql.Schema {\n", defn.Name)
fmt.Fprintf(g.w, " return r.Table.Schema()\n")
fmt.Fprintf(g.w, "}\n\n")
}

func (g *MemoGen) genJoinRelInterface(define ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) JoinPrivate() *JoinBase {\n", define.Name)
func (g *MemoGen) genJoinRelInterface(defn ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) JoinPrivate() *JoinBase {\n", defn.Name)
fmt.Fprintf(g.w, " return r.JoinBase\n")
fmt.Fprintf(g.w, "}\n\n")
}

func (g *MemoGen) genBinaryGroupInterface(define ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", define.Name)
func (g *MemoGen) genBinaryGroupInterface(defn ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", defn.Name)
fmt.Fprintf(g.w, " return []*ExprGroup{r.Left, r.Right}\n")
fmt.Fprintf(g.w, "}\n\n")
}

func (g *MemoGen) genChildlessGroupInterface(define ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", define.Name)
func (g *MemoGen) genChildlessGroupInterface(defn ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", defn.Name)
fmt.Fprintf(g.w, " return nil\n")
fmt.Fprintf(g.w, "}\n\n")
}

func (g *MemoGen) genUnaryGroupInterface(define ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", define.Name)
func (g *MemoGen) genUnaryGroupInterface(defn ExprDef) {
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", defn.Name)
fmt.Fprintf(g.w, " return []*ExprGroup{r.Child}\n")
fmt.Fprintf(g.w, "}\n\n")

fmt.Fprintf(g.w, "func (r *%s) outputCols() sql.ColSet {\n", define.Name)
switch define.Name {
fmt.Fprintf(g.w, "func (r *%s) outputCols() sql.ColSet {\n", defn.Name)
switch defn.Name {
case "Project":
fmt.Fprintf(g.w, " return getProjectColset(r)\n")

Expand All @@ -193,42 +201,14 @@ func (g *MemoGen) genUnaryGroupInterface(define ExprDef) {
}

fmt.Fprintf(g.w, "}\n\n")

}

func (g *MemoGen) genFormatters(defines []ExprDef) {
// printer
fmt.Fprintf(g.w, "func FormatExpr(r exprType) string {\n")
fmt.Fprintf(g.w, " switch r := r.(type) {\n")
for _, d := range defines {
loweredName := strings.ToLower(d.Name)
fmt.Fprintf(g.w, " case *%s:\n", d.Name)
if loweredName == "indexscan" {
fmt.Fprintf(g.w, " if r.Alias != \"\" {\n")
fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%s\", r.Alias)\n", loweredName)
fmt.Fprintf(g.w, " }\n")
}
if d.SourceType != "" {
fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%s\", r.Name())\n", loweredName)
} else if d.Join || d.Binary {
fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s %%d %%d\", r.Left.Id, r.Right.Id)\n", loweredName)
} else if d.Unary {
fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%d\", r.Child.Id)\n", loweredName)
} else {
panic("unreachable")
}
}
fmt.Fprintf(g.w, " default:\n")
fmt.Fprintf(g.w, " panic(fmt.Sprintf(\"unknown RelExpr type: %%T\", r))\n")
fmt.Fprintf(g.w, " }\n")
fmt.Fprintf(g.w, "}\n\n")

// to sqlNode
func (g *MemoGen) genBuildRelExpr(defns []ExprDef) {
fmt.Fprintf(g.w, "func buildRelExpr(b *ExecBuilder, r RelExpr, children ...sql.Node) (sql.Node, error) {\n")
fmt.Fprintf(g.w, " var result sql.Node\n")
fmt.Fprintf(g.w, " var err error\n\n")
fmt.Fprintf(g.w, " switch r := r.(type) {\n")
for _, d := range defines {
for _, d := range defns {
if d.SkipExec {
continue
}
Expand Down
35 changes: 18 additions & 17 deletions optgen/cmd/support/memo_gen_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ func TestMemoGen(t *testing.T) {
expected string
}{
expected: `
import (
import (
"fmt"
"strings"
"github.com/dolthub/go-mysql-server/sql"
Expand All @@ -26,10 +26,16 @@ func TestMemoGen(t *testing.T) {
}

var _ RelExpr = (*hashJoin)(nil)
var _ fmt.Formatter = (*hashJoin)(nil)
var _ fmt.Stringer = (*hashJoin)(nil)
var _ JoinRel = (*hashJoin)(nil)

func (r *hashJoin) String() string {
return FormatExpr(r)
return fmt.Sprintf("%s", r)
}

func (r *hashJoin) Format(s fmt.State, verb rune) {
FormatExpr(r, s, verb)
}

func (r *hashJoin) JoinPrivate() *JoinBase {
Expand All @@ -42,10 +48,16 @@ func TestMemoGen(t *testing.T) {
}

var _ RelExpr = (*tableScan)(nil)
var _ fmt.Formatter = (*tableScan)(nil)
var _ fmt.Stringer = (*tableScan)(nil)
var _ SourceRel = (*tableScan)(nil)

func (r *tableScan) String() string {
return FormatExpr(r)
return fmt.Sprintf("%s", r)
}

func (r *tableScan) Format(s fmt.State, verb rune) {
FormatExpr(r, s, verb)
}

func (r *tableScan) Name() string {
Expand All @@ -68,17 +80,6 @@ func TestMemoGen(t *testing.T) {
return nil
}

func FormatExpr(r exprType) string {
switch r := r.(type) {
case *hashJoin:
return fmt.Sprintf("hashjoin %d %d", r.Left.Id, r.Right.Id)
case *tableScan:
return fmt.Sprintf("tablescan: %s", r.Name())
default:
panic(fmt.Sprintf("unknown RelExpr type: %T", r))
}
}

func buildRelExpr(b *ExecBuilder, r RelExpr, children ...sql.Node) (sql.Node, error) {
var result sql.Node
var err error
Expand All @@ -96,9 +97,9 @@ func TestMemoGen(t *testing.T) {
return nil, err
}

if withDescribeStats, ok := result.(sql.WithDescribeStats); ok {
withDescribeStats.SetDescribeStats(*DescribeStats(r))
}
if withDescribeStats, ok := result.(sql.WithDescribeStats); ok {
withDescribeStats.SetDescribeStats(*DescribeStats(r))
}
result, err = r.Group().finalize(result)
if err != nil {
return nil, err
Expand Down
5 changes: 5 additions & 0 deletions sql/analyzer/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (

const debugAnalyzerKey = "DEBUG_ANALYZER"
const verboseAnalyzerKey = "VERBOSE_ANALYZER"
const traceAnalyzerKey = "TRACE_ANALYZER"

const maxAnalysisIterations = 8

Expand Down Expand Up @@ -215,6 +216,7 @@ func (s simpleLogFormatter) Format(entry *logrus.Entry) ([]byte, error) {
func (ab *Builder) Build() *Analyzer {
_, debug := os.LookupEnv(debugAnalyzerKey)
_, verbose := os.LookupEnv(verboseAnalyzerKey)
_, trace := os.LookupEnv(traceAnalyzerKey)
var batches = []*Batch{
{
Desc: "pre-analyzer",
Expand Down Expand Up @@ -266,6 +268,7 @@ func (ab *Builder) Build() *Analyzer {
return &Analyzer{
Debug: debug || ab.debug,
Verbose: verbose,
Trace: trace,
contextStack: make([]string, 0),
Batches: batches,
Catalog: NewCatalog(ab.provider),
Expand Down Expand Up @@ -297,6 +300,8 @@ type Analyzer struct {
Batches []*Batch
// Whether to log various debugging messages
Debug bool
// Whether to output detailed trace logging for join planning
Trace bool
// Whether to output the query plan at each step of the analyzer
Verbose bool
}
Expand Down
7 changes: 5 additions & 2 deletions sql/analyzer/costed_index_scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func costedIndexLookup(ctx *sql.Context, n sql.Node, a *Analyzer, iat sql.IndexA
if err != nil {
return n, transform.SameTree, err
}
// TODO(next): this is getting a GMSCast node and not getting an index assigned here

ita, stats, filters, err := getCostedIndexScan(ctx, a.Catalog, rt, indexes, SplitConjunction(oldFilter), qFlags)
if err != nil || ita == nil {
return n, transform.SameTree, err
Expand Down Expand Up @@ -334,6 +334,9 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta
}

func addIndexScans(ctx *sql.Context, m *memo.Memo) error {
m.Tracer.PushDebugContext("addIndexScans")
defer m.Tracer.PopDebugContext()

return memo.DfsRel(m.Root(), func(e memo.RelExpr) error {
filter, ok := e.(*memo.Filter)
if !ok {
Expand Down Expand Up @@ -928,7 +931,7 @@ func (b *indexScanRangeBuilder) rangeBuildOr(f *iScanOr, inScan bool) (sql.MySQL
// imprecise filters cannot be removed
b.markImprecise(f)

//todo union the or ranges
// todo union the or ranges
var ret sql.MySQLRangeCollection
for _, c := range f.children {
var ranges sql.MySQLRangeCollection
Expand Down
Loading
Loading