From de146f29c4dc1ff7b04b210da071909e6789c9cd Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 25 Nov 2025 12:14:41 -0800 Subject: [PATCH 01/10] miscellanous improvements --- memory/stats.go | 13 +-- sql/analyzer/costed_index_scan.go | 132 ++++++++++++++++-------------- sql/fast_int_set.go | 28 +++---- sql/statistics.go | 3 +- sql/stats/filter.go | 9 +- sql/stats/statistic.go | 2 +- sql/value_row.go | 2 +- 7 files changed, 104 insertions(+), 85 deletions(-) diff --git a/memory/stats.go b/memory/stats.go index 94a0a3116c..cfd302d049 100644 --- a/memory/stats.go +++ b/memory/stats.go @@ -60,10 +60,11 @@ func (s *StatsProv) AnalyzeTable(ctx *sql.Context, table sql.Table, db string) e } newStats := make(map[statsKey][]int) - tablePrefix := fmt.Sprintf("%s.", strings.ToLower(table.Name())) + tablePrefix := strings.ToLower(table.Name()) + "." for _, idx := range indexes { - cols := make([]string, len(idx.Expressions())) - for i, c := range idx.Expressions() { + exprs := idx.Expressions() + cols := make([]string, len(exprs)) + for i, c := range exprs { cols[i] = strings.TrimPrefix(strings.ToLower(c), tablePrefix) } for i := 1; i < len(cols)+1; i++ { @@ -244,7 +245,7 @@ func (s *StatsProv) reservoirSample(ctx *sql.Context, table sql.Table) ([]sql.Ro } func (s *StatsProv) GetTableStats(ctx *sql.Context, db string, table sql.Table) ([]sql.Statistic, error) { - pref := fmt.Sprintf("%s.%s", strings.ToLower(db), strings.ToLower(table.Name())) + pref := strings.ToLower(db) + "." + strings.ToLower(table.Name()) var ret []sql.Statistic for key, stats := range s.colStats { if strings.HasPrefix(string(key), pref) { @@ -279,7 +280,7 @@ func (s *StatsProv) DropStats(ctx *sql.Context, qual sql.StatQualifier, cols []s } func (s *StatsProv) RowCount(ctx *sql.Context, db string, table sql.Table) (uint64, error) { - pref := fmt.Sprintf("%s.%s", strings.ToLower(db), strings.ToLower(table.Name())) + pref := strings.ToLower(db) + "." + strings.ToLower(table.Name()) var cnt uint64 for key, stats := range s.colStats { if strings.HasPrefix(string(key), pref) { @@ -292,7 +293,7 @@ func (s *StatsProv) RowCount(ctx *sql.Context, db string, table sql.Table) (uint } func (s *StatsProv) DataLength(ctx *sql.Context, db string, table sql.Table) (uint64, error) { - pref := fmt.Sprintf("%s.%s", db, table) + pref := strings.ToLower(db) + "." + strings.ToLower(table.Name()) var size uint64 for key, stats := range s.colStats { if strings.HasPrefix(string(key), pref) { diff --git a/sql/analyzer/costed_index_scan.go b/sql/analyzer/costed_index_scan.go index b5fa511169..d1abb3bbd2 100644 --- a/sql/analyzer/costed_index_scan.go +++ b/sql/analyzer/costed_index_scan.go @@ -155,7 +155,7 @@ func costedIndexLookup(ctx *sql.Context, n sql.Node, a *Analyzer, iat sql.IndexA } func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.TableNode, indexes []sql.Index, filters []sql.Expression, qFlags *sql.QueryFlags) (*plan.IndexedTableAccess, sql.Statistic, []sql.Expression, error) { - statistics, err := statsProv.GetTableStats(ctx, strings.ToLower(rt.Database().Name()), rt.UnderlyingTable()) + statistics, err := statsProv.GetTableStats(ctx, rt.Database().Name(), rt.UnderlyingTable()) if err != nil { return nil, nil, nil, err } @@ -174,6 +174,7 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta return nil, nil, nil, err } + // TODO: detect that this table is unchanged and reuse the work from previous index costs iat, ok := rt.UnderlyingTable().(sql.IndexAddressableTable) if !ok { return nil, nil, nil, err @@ -182,19 +183,19 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta // run each index through coster, save the cheapest var dbName string if dbTab, ok := rt.UnderlyingTable().(sql.Databaseable); ok { - dbName = strings.ToLower(dbTab.Database()) + dbName = dbTab.Database() } table := rt.UnderlyingTable() var schemaName string if schTab, ok := table.(sql.DatabaseSchemaTable); ok { - schemaName = strings.ToLower(schTab.DatabaseSchema().SchemaName()) + schemaName = schTab.DatabaseSchema().SchemaName() } - tableName := strings.ToLower(table.Name()) + tableName := table.Name() if len(qualToStat) > 0 { // don't mix and match real and default stats for _, idx := range indexes { - qual := sql.NewStatQualifier(dbName, schemaName, tableName, strings.ToLower(idx.ID())) + qual := sql.NewStatQualifier(dbName, schemaName, tableName, idx.ID()) _, ok := qualToStat[qual] if !ok { qualToStat = nil @@ -204,15 +205,15 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta } for _, idx := range indexes { - qual := sql.NewStatQualifier(dbName, schemaName, tableName, strings.ToLower(idx.ID())) + qual := sql.NewStatQualifier(dbName, schemaName, tableName, idx.ID()) stat, ok := qualToStat[qual] if !ok { stat, err = uniformDistStatisticsForIndex(ctx, statsProv, iat, idx) + if err != nil { + return nil, nil, nil, err + } } - if err != nil { - return nil, nil, nil, err - } - err := c.cost(root, stat, idx) + err = c.cost(root, stat, idx) if err != nil { return nil, nil, nil, err } @@ -314,25 +315,27 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta retFilters = b.leftover } - var bestStat sql.Statistic - if c.bestStat.FuncDeps().HasMax1Row() { - bestStat = c.bestStat.WithRowCount(1).WithDistinctCount(1) - } else { - bestStat, err = c.bestStat.WithHistogram(c.bestHist) - if err != nil { - return nil, nil, nil, err - } - bestStat = stats.UpdateCounts(bestStat) - } - - if bestStat.FuncDeps().HasMax1Row() && !qFlags.JoinIsSet() && !qFlags.SubqueryIsSet() && lookup.Ranges.Len() == 1 { + // TODO: this block is essentially doing a deep copy of c.bestStat? but why? + //var bestStat sql.Statistic + //if c.bestStat.FuncDeps().HasMax1Row() { + // bestStat = c.bestStat.WithRowCount(1).WithDistinctCount(1) + //} else { + // // TODO: c.cost already does this? + // bestStat, err = c.bestStat.WithHistogram(c.bestHist) // This filters out invalid buckets, but errors? + // if err != nil { + // return nil, nil, nil, err + // } + // bestStat = stats.UpdateCounts(bestStat) + //} + + if c.bestStat.FuncDeps().HasMax1Row() && !qFlags.JoinIsSet() && !qFlags.SubqueryIsSet() && lookup.Ranges.Len() == 1 { // Strict index lookup without a join or subquery scope will return // at most one row. We could also use some sort of scope counting // to check for single scope. qFlags.Set(sql.QFlagMax1Row) } - return ret, bestStat, retFilters, nil + return ret, c.bestStat, retFilters, nil } func addIndexScans(ctx *sql.Context, m *memo.Memo) error { @@ -469,6 +472,7 @@ type indexCoster struct { func (c *indexCoster) cost(f indexFilter, stat sql.Statistic, idx sql.Index) error { ordinals := ordinalsForStat(stat) + // TODO: cache var newHist []sql.HistogramBucket var newFds *sql.FuncDepSet var filters sql.FastIntSet @@ -492,6 +496,7 @@ func (c *indexCoster) cost(f indexFilter, stat sql.Statistic, idx sql.Index) err if ok { filters.Add(int(f.id)) } + case *iScanLeaf: newHist, newFds, ok, prefix, err = c.costIndexScanLeaf(f, stat, stat.Histogram(), ordinals, idx) if err != nil { @@ -500,6 +505,7 @@ func (c *indexCoster) cost(f indexFilter, stat sql.Statistic, idx sql.Index) err if ok { filters.Add(int(f.id)) } + default: panic("unreachable") } @@ -517,7 +523,7 @@ func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fd if s == nil || filters.Len() == 0 { return } - rowCnt, _, _ := stats.GetNewCounts(hist) + rowCnt := stats.GetNewRowCounts(hist) var update bool defer func() { @@ -534,16 +540,24 @@ func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fd if c.bestStat == nil { update = true return - } else if c.bestStat.FuncDeps().HasMax1Row() { + } + + if c.bestStat.FuncDeps().HasMax1Row() { return - } else if rowCnt < c.bestCnt { + } + + if rowCnt < c.bestCnt { update = true return - } else if c.bestPrefix == 0 || prefix == 0 && c.bestPrefix != prefix { + } + + if c.bestPrefix == 0 || prefix == 0 && c.bestPrefix != prefix { // any prefix is better than no prefix update = prefix > c.bestPrefix return - } else if rowCnt == c.bestCnt { + } + + if rowCnt == c.bestCnt { // hand rules when stats don't exist or match exactly cmp := fds best := c.bestStat.FuncDeps() @@ -554,21 +568,20 @@ func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fd // If one index uses a strict superset of the filters of the other, we should always pick the superset. // This is true even if the index with more filters isn't unique. - if prefix > c.bestPrefix && slices.Equal(c.bestStat.Columns()[:c.bestPrefix], s.Columns()[:c.bestPrefix]) { + bestCols := c.bestStat.Columns() + newCols := s.Columns() + if prefix > c.bestPrefix && slices.Equal(bestCols[:c.bestPrefix], newCols[:c.bestPrefix]) { update = true return } - - if prefix == c.bestPrefix && slices.Equal(c.bestStat.Columns()[:c.bestPrefix], s.Columns()[:c.bestPrefix]) && hasRange && !c.hasRange { + if prefix == c.bestPrefix && slices.Equal(bestCols[:c.bestPrefix], newCols[:c.bestPrefix]) && hasRange && !c.hasRange { update = true return } - - if c.bestPrefix > prefix && slices.Equal(c.bestStat.Columns()[:prefix], s.Columns()[:prefix]) { + if c.bestPrefix > prefix && slices.Equal(bestCols[:prefix], newCols[:prefix]) { return } - - if c.bestPrefix == prefix && slices.Equal(c.bestStat.Columns()[:prefix], s.Columns()[:prefix]) && !hasRange && c.hasRange { + if c.bestPrefix == prefix && slices.Equal(bestCols[:prefix], newCols[:prefix]) && !hasRange && c.hasRange { return } @@ -600,7 +613,8 @@ func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fd } update = true return - } else if cmp.Constants().Len() < best.Constants().Len() { + } + if cmp.Constants().Len() < best.Constants().Len() { if cmpHasLax && !bestHasLax { // keep unique key update = true @@ -612,7 +626,6 @@ func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fd update = true return } - if filters.Len() < c.bestFilters.Len() { return } @@ -624,32 +637,29 @@ func (c *indexCoster) updateBest(s sql.Statistic, hist []sql.HistogramBucket, fd return } - { - // if no unique keys, prefer equality over ranges - bestConst, bestIsNull := c.getConstAndNullFilters(c.bestFilters) - cmpConst, cmpIsNull := c.getConstAndNullFilters(filters) - if cmpConst.Len() > bestConst.Len() { - update = true - return - } - if cmpIsNull.Len() > bestIsNull.Len() { - update = true - return - } + // if no unique keys, prefer equality over ranges + bestConst, bestIsNull := c.getConstAndNullFilters(c.bestFilters) + cmpConst, cmpIsNull := c.getConstAndNullFilters(filters) + if cmpConst.Len() > bestConst.Len() { + update = true + return + } + if cmpIsNull.Len() > bestIsNull.Len() { + update = true + return } - { - if strings.EqualFold(s.Qualifier().Index(), "primary") { - update = true - return - } else if strings.EqualFold(c.bestStat.Qualifier().Index(), "primary") { - return - } - if strings.Compare(s.Qualifier().Index(), c.bestStat.Qualifier().Index()) < 0 { - // if they are still equal, use index name to make deterministic - update = true - return - } + if strings.EqualFold(s.Qualifier().Index(), "primary") { + update = true + return + } + if strings.EqualFold(c.bestStat.Qualifier().Index(), "primary") { + return + } + if strings.Compare(s.Qualifier().Index(), c.bestStat.Qualifier().Index()) < 0 { + // if they are still equal, use index name to make deterministic + update = true + return } } } diff --git a/sql/fast_int_set.go b/sql/fast_int_set.go index 289e742b6e..8631ce2450 100644 --- a/sql/fast_int_set.go +++ b/sql/fast_int_set.go @@ -141,7 +141,7 @@ func (s *FastIntSet) Remove(i int) { } // Contains returns true if the set contains the value. -func (s FastIntSet) Contains(i int) bool { +func (s *FastIntSet) Contains(i int) bool { if s.large != nil { return s.large.Has(i) } @@ -149,12 +149,12 @@ func (s FastIntSet) Contains(i int) bool { } // Empty returns true if the set is empty. -func (s FastIntSet) Empty() bool { +func (s *FastIntSet) Empty() bool { return s.small == 0 && (s.large == nil || s.large.IsEmpty()) } // Len returns the number of the elements in the set. -func (s FastIntSet) Len() int { +func (s *FastIntSet) Len() int { if s.large == nil { return bits.OnesCount64(s.small) } @@ -163,7 +163,7 @@ func (s FastIntSet) Len() int { // Next returns the first value in the set which is >= startVal. If there is no // value, the second return value is false. -func (s FastIntSet) Next(startVal int) (int, bool) { +func (s *FastIntSet) Next(startVal int) (int, bool) { if s.large != nil { res := s.large.LowerBound(startVal) return res, res != intsets.MaxInt @@ -181,7 +181,7 @@ func (s FastIntSet) Next(startVal int) (int, bool) { } // ForEach calls a function for each value in the set (in increasing order). -func (s FastIntSet) ForEach(f func(i int)) { +func (s *FastIntSet) ForEach(f func(i int)) { if s.large != nil { for x := s.large.Min(); x != intsets.MaxInt; x = s.large.LowerBound(x + 1) { f(x) @@ -196,7 +196,7 @@ func (s FastIntSet) ForEach(f func(i int)) { } // Ordered returns a slice with all the integers in the set, in increasing order. -func (s FastIntSet) Ordered() []int { +func (s *FastIntSet) Ordered() []int { if s.Empty() { return nil } @@ -211,7 +211,7 @@ func (s FastIntSet) Ordered() []int { } // Copy returns a copy of s which can be modified independently. -func (s FastIntSet) Copy() FastIntSet { +func (s *FastIntSet) Copy() FastIntSet { var c FastIntSet if s.large != nil { c.large = new(intsets.Sparse) @@ -254,7 +254,7 @@ func (s *FastIntSet) UnionWith(rhs FastIntSet) { } // Union returns the union of s and rhs as a new set. -func (s FastIntSet) Union(rhs FastIntSet) FastIntSet { +func (s *FastIntSet) Union(rhs FastIntSet) FastIntSet { r := s.Copy() r.UnionWith(rhs) return r @@ -278,14 +278,14 @@ func (s *FastIntSet) IntersectionWith(rhs FastIntSet) { } // Intersection returns the intersection of s and rhs as a new set. -func (s FastIntSet) Intersection(rhs FastIntSet) FastIntSet { +func (s *FastIntSet) Intersection(rhs FastIntSet) FastIntSet { r := s.Copy() r.IntersectionWith(rhs) return r } // Intersects returns true if s has any elements in common with rhs. -func (s FastIntSet) Intersects(rhs FastIntSet) bool { +func (s *FastIntSet) Intersects(rhs FastIntSet) bool { if s.large == nil { // Fast path other := rhs.small @@ -316,14 +316,14 @@ func (s *FastIntSet) DifferenceWith(rhs FastIntSet) { } // Difference returns the elements of s that are not in rhs as a new set. -func (s FastIntSet) Difference(rhs FastIntSet) FastIntSet { +func (s *FastIntSet) Difference(rhs FastIntSet) FastIntSet { r := s.Copy() r.DifferenceWith(rhs) return r } // Equals returns true if the two sets are identical. -func (s FastIntSet) Equals(rhs FastIntSet) bool { +func (s *FastIntSet) Equals(rhs FastIntSet) bool { if s.large == nil && rhs.large == nil { return s.small == rhs.small } @@ -344,7 +344,7 @@ func (s FastIntSet) Equals(rhs FastIntSet) bool { } // SubsetOf returns true if rhs contains all the elements in s. -func (s FastIntSet) SubsetOf(rhs FastIntSet) bool { +func (s *FastIntSet) SubsetOf(rhs FastIntSet) bool { if s.large == nil && rhs.large == nil { return (s.small & rhs.small) == s.small } @@ -394,7 +394,7 @@ func (s *FastIntSet) Shift(delta int) FastIntSet { // String returns a list representation of elements. Sequential runs of positive // numbers are shown as ranges. For example, for the set {0, 1, 2, 5, 6, 10}, // the output is "(0-2,5,6,10)". -func (s FastIntSet) String() string { +func (s *FastIntSet) String() string { var buf bytes.Buffer buf.WriteByte('(') appendRange := func(start, end int) { diff --git a/sql/statistics.go b/sql/statistics.go index ae1f9fda95..ef66dac32c 100644 --- a/sql/statistics.go +++ b/sql/statistics.go @@ -117,7 +117,8 @@ func NewStatQualifier(db, schema, table, index string) StatQualifier { Database: strings.ToLower(db), Sch: strings.ToLower(schema), Tab: strings.ToLower(table), - Idx: strings.ToLower(index)} + Idx: strings.ToLower(index), + } } // StatQualifier is the namespace hierarchy for a given statistic. diff --git a/sql/stats/filter.go b/sql/stats/filter.go index f2f2de5a21..ecff9f8553 100644 --- a/sql/stats/filter.go +++ b/sql/stats/filter.go @@ -158,7 +158,14 @@ func nilSafeCmp(ctx *sql.Context, typ sql.Type, left, right interface{}) (int, e } } -func GetNewCounts(buckets []sql.HistogramBucket) (rowCount uint64, distinctCount uint64, nullCount uint64) { +func GetNewRowCounts(buckets []sql.HistogramBucket) (rowCount uint64) { + for _, b := range buckets { + rowCount += b.RowCount() + } + return rowCount +} + +func GetAllNewCounts(buckets []sql.HistogramBucket) (rowCount uint64, distinctCount uint64, nullCount uint64) { if len(buckets) == 0 { return 0, 0, 0 } diff --git a/sql/stats/statistic.go b/sql/stats/statistic.go index c72dc89565..7e17287dc1 100644 --- a/sql/stats/statistic.go +++ b/sql/stats/statistic.go @@ -207,7 +207,7 @@ func (s *Statistic) WithLowerBound(r sql.Row) sql.Statistic { func (s *Statistic) WithHistogram(h sql.Histogram) (sql.Statistic, error) { ret := *s - ret.Hist = nil + ret.Hist = make(sql.Histogram, 0, len(h)) for _, b := range h { sqlB, ok := b.(*Bucket) if !ok { diff --git a/sql/value_row.go b/sql/value_row.go index f9140c41c5..e5fcd838c5 100644 --- a/sql/value_row.go +++ b/sql/value_row.go @@ -29,8 +29,8 @@ type ValueBytes []byte // Value is a logical index into a ValueRow. For efficiency reasons, use sparingly. type Value struct { - Val ValueBytes WrappedVal BytesWrapper + Val ValueBytes Typ query.Type } From 158401d0fd33e8405e61b412217f08110466314d Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 25 Nov 2025 12:36:42 -0800 Subject: [PATCH 02/10] tidy --- sql/analyzer/costed_index_scan.go | 2 -- sql/fast_int_set.go | 26 +++++++++++++------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/sql/analyzer/costed_index_scan.go b/sql/analyzer/costed_index_scan.go index d1abb3bbd2..db50955dec 100644 --- a/sql/analyzer/costed_index_scan.go +++ b/sql/analyzer/costed_index_scan.go @@ -174,7 +174,6 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta return nil, nil, nil, err } - // TODO: detect that this table is unchanged and reuse the work from previous index costs iat, ok := rt.UnderlyingTable().(sql.IndexAddressableTable) if !ok { return nil, nil, nil, err @@ -472,7 +471,6 @@ type indexCoster struct { func (c *indexCoster) cost(f indexFilter, stat sql.Statistic, idx sql.Index) error { ordinals := ordinalsForStat(stat) - // TODO: cache var newHist []sql.HistogramBucket var newFds *sql.FuncDepSet var filters sql.FastIntSet diff --git a/sql/fast_int_set.go b/sql/fast_int_set.go index 8631ce2450..fa2b59f374 100644 --- a/sql/fast_int_set.go +++ b/sql/fast_int_set.go @@ -141,7 +141,7 @@ func (s *FastIntSet) Remove(i int) { } // Contains returns true if the set contains the value. -func (s *FastIntSet) Contains(i int) bool { +func (s FastIntSet) Contains(i int) bool { if s.large != nil { return s.large.Has(i) } @@ -149,7 +149,7 @@ func (s *FastIntSet) Contains(i int) bool { } // Empty returns true if the set is empty. -func (s *FastIntSet) Empty() bool { +func (s FastIntSet) Empty() bool { return s.small == 0 && (s.large == nil || s.large.IsEmpty()) } @@ -163,7 +163,7 @@ func (s *FastIntSet) Len() int { // Next returns the first value in the set which is >= startVal. If there is no // value, the second return value is false. -func (s *FastIntSet) Next(startVal int) (int, bool) { +func (s FastIntSet) Next(startVal int) (int, bool) { if s.large != nil { res := s.large.LowerBound(startVal) return res, res != intsets.MaxInt @@ -181,7 +181,7 @@ func (s *FastIntSet) Next(startVal int) (int, bool) { } // ForEach calls a function for each value in the set (in increasing order). -func (s *FastIntSet) ForEach(f func(i int)) { +func (s FastIntSet) ForEach(f func(i int)) { if s.large != nil { for x := s.large.Min(); x != intsets.MaxInt; x = s.large.LowerBound(x + 1) { f(x) @@ -196,7 +196,7 @@ func (s *FastIntSet) ForEach(f func(i int)) { } // Ordered returns a slice with all the integers in the set, in increasing order. -func (s *FastIntSet) Ordered() []int { +func (s FastIntSet) Ordered() []int { if s.Empty() { return nil } @@ -211,7 +211,7 @@ func (s *FastIntSet) Ordered() []int { } // Copy returns a copy of s which can be modified independently. -func (s *FastIntSet) Copy() FastIntSet { +func (s FastIntSet) Copy() FastIntSet { var c FastIntSet if s.large != nil { c.large = new(intsets.Sparse) @@ -254,7 +254,7 @@ func (s *FastIntSet) UnionWith(rhs FastIntSet) { } // Union returns the union of s and rhs as a new set. -func (s *FastIntSet) Union(rhs FastIntSet) FastIntSet { +func (s FastIntSet) Union(rhs FastIntSet) FastIntSet { r := s.Copy() r.UnionWith(rhs) return r @@ -278,14 +278,14 @@ func (s *FastIntSet) IntersectionWith(rhs FastIntSet) { } // Intersection returns the intersection of s and rhs as a new set. -func (s *FastIntSet) Intersection(rhs FastIntSet) FastIntSet { +func (s FastIntSet) Intersection(rhs FastIntSet) FastIntSet { r := s.Copy() r.IntersectionWith(rhs) return r } // Intersects returns true if s has any elements in common with rhs. -func (s *FastIntSet) Intersects(rhs FastIntSet) bool { +func (s FastIntSet) Intersects(rhs FastIntSet) bool { if s.large == nil { // Fast path other := rhs.small @@ -316,14 +316,14 @@ func (s *FastIntSet) DifferenceWith(rhs FastIntSet) { } // Difference returns the elements of s that are not in rhs as a new set. -func (s *FastIntSet) Difference(rhs FastIntSet) FastIntSet { +func (s FastIntSet) Difference(rhs FastIntSet) FastIntSet { r := s.Copy() r.DifferenceWith(rhs) return r } // Equals returns true if the two sets are identical. -func (s *FastIntSet) Equals(rhs FastIntSet) bool { +func (s FastIntSet) Equals(rhs FastIntSet) bool { if s.large == nil && rhs.large == nil { return s.small == rhs.small } @@ -344,7 +344,7 @@ func (s *FastIntSet) Equals(rhs FastIntSet) bool { } // SubsetOf returns true if rhs contains all the elements in s. -func (s *FastIntSet) SubsetOf(rhs FastIntSet) bool { +func (s FastIntSet) SubsetOf(rhs FastIntSet) bool { if s.large == nil && rhs.large == nil { return (s.small & rhs.small) == s.small } @@ -394,7 +394,7 @@ func (s *FastIntSet) Shift(delta int) FastIntSet { // String returns a list representation of elements. Sequential runs of positive // numbers are shown as ranges. For example, for the set {0, 1, 2, 5, 6, 10}, // the output is "(0-2,5,6,10)". -func (s *FastIntSet) String() string { +func (s FastIntSet) String() string { var buf bytes.Buffer buf.WriteByte('(') appendRange := func(start, end int) { From ab73d00b145e8b5f63f308ddae8204390e5a5013 Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 25 Nov 2025 12:38:15 -0800 Subject: [PATCH 03/10] missed one --- sql/fast_int_set.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/fast_int_set.go b/sql/fast_int_set.go index fa2b59f374..289e742b6e 100644 --- a/sql/fast_int_set.go +++ b/sql/fast_int_set.go @@ -154,7 +154,7 @@ func (s FastIntSet) Empty() bool { } // Len returns the number of the elements in the set. -func (s *FastIntSet) Len() int { +func (s FastIntSet) Len() int { if s.large == nil { return bits.OnesCount64(s.small) } From b392a29c1d878a1b5c3e8b41b9dd98875e9fb565 Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 25 Nov 2025 13:28:43 -0800 Subject: [PATCH 04/10] avoid sprintf --- sql/expression/arithmetic.go | 2 +- sql/expression/literal.go | 25 ++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/sql/expression/arithmetic.go b/sql/expression/arithmetic.go index dc42d6a51d..1cefd09961 100644 --- a/sql/expression/arithmetic.go +++ b/sql/expression/arithmetic.go @@ -798,7 +798,7 @@ func (*UnaryMinus) CollationCoercibility(ctx *sql.Context) (collation sql.Collat } func (e *UnaryMinus) String() string { - return fmt.Sprintf("-%s", e.Child) + return "-" + e.Child.String() } // WithChildren implements the Expression interface. diff --git a/sql/expression/literal.go b/sql/expression/literal.go index 104c04fd97..f89acda93e 100644 --- a/sql/expression/literal.go +++ b/sql/expression/literal.go @@ -16,6 +16,7 @@ package expression import ( "fmt" + "strconv" "strings" "github.com/dolthub/vitess/go/vt/proto/query" @@ -79,8 +80,26 @@ func (lit *Literal) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) { func (lit *Literal) String() string { switch litVal := lit.Val.(type) { - case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: - return fmt.Sprintf("%d", litVal) + case int: + return strconv.FormatInt(int64(litVal), 10) + case int8: + return strconv.FormatInt(int64(litVal), 10) + case int16: + return strconv.FormatInt(int64(litVal), 10) + case int32: + return strconv.FormatInt(int64(litVal), 10) + case int64: + return strconv.FormatInt(litVal, 10) + case uint: + return strconv.FormatUint(uint64(litVal), 10) + case uint8: + return strconv.FormatUint(uint64(litVal), 10) + case uint16: + return strconv.FormatUint(uint64(litVal), 10) + case uint32: + return strconv.FormatUint(uint64(litVal), 10) + case uint64: + return strconv.FormatUint(litVal, 10) case string: switch lit.Typ.Type() { // utf8 charset cannot encode binary string @@ -91,7 +110,7 @@ func (lit *Literal) String() string { // Backslash chars also need to be replaced. escaped := strings.ReplaceAll(litVal, "'", "''") escaped = strings.ReplaceAll(escaped, "\\", "\\\\") - return fmt.Sprintf("'%s'", escaped) + return strconv.Quote(escaped) case decimal.Decimal: return litVal.StringFixed(litVal.Exponent() * -1) case []byte: From a72fafb1a6ce888d01c222b54b58fa42a1c8ddd6 Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 25 Nov 2025 13:47:17 -0800 Subject: [PATCH 05/10] apparently need to copy this stat --- sql/analyzer/costed_index_scan.go | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/sql/analyzer/costed_index_scan.go b/sql/analyzer/costed_index_scan.go index db50955dec..37cdeb5fed 100644 --- a/sql/analyzer/costed_index_scan.go +++ b/sql/analyzer/costed_index_scan.go @@ -314,18 +314,16 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta retFilters = b.leftover } - // TODO: this block is essentially doing a deep copy of c.bestStat? but why? - //var bestStat sql.Statistic - //if c.bestStat.FuncDeps().HasMax1Row() { - // bestStat = c.bestStat.WithRowCount(1).WithDistinctCount(1) - //} else { - // // TODO: c.cost already does this? - // bestStat, err = c.bestStat.WithHistogram(c.bestHist) // This filters out invalid buckets, but errors? - // if err != nil { - // return nil, nil, nil, err - // } - // bestStat = stats.UpdateCounts(bestStat) - //} + var bestStat sql.Statistic + if c.bestStat.FuncDeps().HasMax1Row() { + bestStat = c.bestStat.WithRowCount(1).WithDistinctCount(1) + } else { + bestStat, err = c.bestStat.WithHistogram(c.bestHist) + if err != nil { + return nil, nil, nil, err + } + bestStat = stats.UpdateCounts(bestStat) + } if c.bestStat.FuncDeps().HasMax1Row() && !qFlags.JoinIsSet() && !qFlags.SubqueryIsSet() && lookup.Ranges.Len() == 1 { // Strict index lookup without a join or subquery scope will return From 07d6ccbceb7d918e87090999360f0de445ad145b Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 25 Nov 2025 14:01:00 -0800 Subject: [PATCH 06/10] more string stuff --- sql/schemas.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/schemas.go b/sql/schemas.go index ce34ce52d7..9455574931 100644 --- a/sql/schemas.go +++ b/sql/schemas.go @@ -101,9 +101,8 @@ func (s Schema) IndexOf(column, source string) int { // IndexOfColName returns the index of the given column in the schema or -1 if it's not present. Only safe for schemas // corresponding to a single table, where the source of the column is irrelevant. func (s Schema) IndexOfColName(column string) int { - column = strings.ToLower(column) for i, col := range s { - if strings.ToLower(col.Name) == column { + if strings.EqualFold(col.Name, column) { return i } } From e39b8ed30b03451ea1d0a35ed3a4816382609edc Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 25 Nov 2025 14:17:14 -0800 Subject: [PATCH 07/10] delete unused error --- sql/convert_value.go | 36 +++++++++++++++++------------------- sql/expression/literal.go | 2 +- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/sql/convert_value.go b/sql/convert_value.go index 880b9f2f58..a759b8c529 100644 --- a/sql/convert_value.go +++ b/sql/convert_value.go @@ -1,92 +1,90 @@ package sql import ( - "fmt" - "github.com/dolthub/go-mysql-server/sql/values" "github.com/dolthub/vitess/go/vt/proto/query" ) // ConvertToValue converts the interface to a sql value. -func ConvertToValue(v interface{}) (Value, error) { +func ConvertToValue(v interface{}) Value { switch v := v.(type) { case nil: return Value{ Typ: query.Type_NULL_TYPE, Val: nil, - }, nil + } case int: return Value{ Typ: query.Type_INT64, Val: values.WriteInt64(make([]byte, values.Int64Size), int64(v)), - }, nil + } case int8: return Value{ Typ: query.Type_INT8, Val: values.WriteInt8(make([]byte, values.Int8Size), v), - }, nil + } case int16: return Value{ Typ: query.Type_INT16, Val: values.WriteInt16(make([]byte, values.Int16Size), v), - }, nil + } case int32: return Value{ Typ: query.Type_INT32, Val: values.WriteInt32(make([]byte, values.Int32Size), v), - }, nil + } case int64: return Value{ Typ: query.Type_INT64, Val: values.WriteInt64(make([]byte, values.Int64Size), v), - }, nil + } case uint: return Value{ Typ: query.Type_UINT64, Val: values.WriteUint64(make([]byte, values.Uint64Size), uint64(v)), - }, nil + } case uint8: return Value{ Typ: query.Type_UINT8, Val: values.WriteUint8(make([]byte, values.Uint8Size), v), - }, nil + } case uint16: return Value{ Typ: query.Type_UINT16, Val: values.WriteUint16(make([]byte, values.Uint16Size), v), - }, nil + } case uint32: return Value{ Typ: query.Type_UINT32, Val: values.WriteUint32(make([]byte, values.Uint32Size), v), - }, nil + } case uint64: return Value{ Typ: query.Type_UINT64, Val: values.WriteUint64(make([]byte, values.Uint64Size), v), - }, nil + } case float32: return Value{ Typ: query.Type_FLOAT32, Val: values.WriteFloat32(make([]byte, values.Float32Size), v), - }, nil + } case float64: return Value{ Typ: query.Type_FLOAT64, Val: values.WriteFloat64(make([]byte, values.Float64Size), v), - }, nil + } case string: return Value{ Typ: query.Type_VARCHAR, Val: values.WriteString(make([]byte, len(v)), v, values.ByteOrderCollation), - }, nil + } case []byte: return Value{ Typ: query.Type_BLOB, Val: values.WriteBytes(make([]byte, len(v)), v, values.ByteOrderCollation), - }, nil + } default: - return Value{}, fmt.Errorf("type %T not implemented", v) + return Value{} } } diff --git a/sql/expression/literal.go b/sql/expression/literal.go index f89acda93e..bd5dbfd320 100644 --- a/sql/expression/literal.go +++ b/sql/expression/literal.go @@ -41,7 +41,7 @@ var _ sqlparser.Injectable = &Literal{} // NewLiteral creates a new Literal expression. func NewLiteral(value interface{}, fieldType sql.Type) *Literal { - val2, _ := sql.ConvertToValue(value) + val2 := sql.ConvertToValue(value) return &Literal{ Val: value, val2: val2, From 060625a4c80f9bc29850c212a40b28b510afccbb Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 25 Nov 2025 14:56:52 -0800 Subject: [PATCH 08/10] return best stat --- sql/analyzer/costed_index_scan.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/analyzer/costed_index_scan.go b/sql/analyzer/costed_index_scan.go index 37cdeb5fed..ec9bb6c5f1 100644 --- a/sql/analyzer/costed_index_scan.go +++ b/sql/analyzer/costed_index_scan.go @@ -315,7 +315,7 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta } var bestStat sql.Statistic - if c.bestStat.FuncDeps().HasMax1Row() { + if bestStat.FuncDeps().HasMax1Row() { bestStat = c.bestStat.WithRowCount(1).WithDistinctCount(1) } else { bestStat, err = c.bestStat.WithHistogram(c.bestHist) @@ -332,7 +332,7 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta qFlags.Set(sql.QFlagMax1Row) } - return ret, c.bestStat, retFilters, nil + return ret, bestStat, retFilters, nil } func addIndexScans(ctx *sql.Context, m *memo.Memo) error { From be9e4ba5d74049c0742254b5448e9c37740e99be Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 25 Nov 2025 16:07:07 -0800 Subject: [PATCH 09/10] more string removal --- sql/expression/alias.go | 2 +- sql/expression/function/aggregation/unary_aggs.og.go | 2 +- sql/expression/literal.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/expression/alias.go b/sql/expression/alias.go index ea587555c9..3c9d7283a2 100644 --- a/sql/expression/alias.go +++ b/sql/expression/alias.go @@ -140,7 +140,7 @@ func (e *Alias) Describe(options sql.DescribeOptions) string { return fmt.Sprintf("%s->%s:%d", sql.Describe(e.Child, options), e.name, e.id) } } - return fmt.Sprintf("%s as %s", sql.Describe(e.Child, options), e.name) + return sql.Describe(e.Child, options) + " as " + e.name } func (e *Alias) String() string { diff --git a/sql/expression/function/aggregation/unary_aggs.og.go b/sql/expression/function/aggregation/unary_aggs.og.go index a5094cc975..eb0509115b 100644 --- a/sql/expression/function/aggregation/unary_aggs.og.go +++ b/sql/expression/function/aggregation/unary_aggs.og.go @@ -440,7 +440,7 @@ func (a *Count) String() string { pr.WriteChildren(children...) return pr.String() } - return fmt.Sprintf("COUNT(%s)", a.Child) + return "COUNT(" + a.Child.String() + ")" } func (a *Count) DebugString() string { diff --git a/sql/expression/literal.go b/sql/expression/literal.go index bd5dbfd320..8757bb8c31 100644 --- a/sql/expression/literal.go +++ b/sql/expression/literal.go @@ -110,7 +110,7 @@ func (lit *Literal) String() string { // Backslash chars also need to be replaced. escaped := strings.ReplaceAll(litVal, "'", "''") escaped = strings.ReplaceAll(escaped, "\\", "\\\\") - return strconv.Quote(escaped) + return "'" + escaped + "'" case decimal.Decimal: return litVal.StringFixed(litVal.Exponent() * -1) case []byte: From eaa96cf143790446d7d0032af5593cbda5d00580 Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 25 Nov 2025 16:16:40 -0800 Subject: [PATCH 10/10] oops --- sql/analyzer/costed_index_scan.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/analyzer/costed_index_scan.go b/sql/analyzer/costed_index_scan.go index ec9bb6c5f1..ddfd73b6c8 100644 --- a/sql/analyzer/costed_index_scan.go +++ b/sql/analyzer/costed_index_scan.go @@ -315,7 +315,7 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta } var bestStat sql.Statistic - if bestStat.FuncDeps().HasMax1Row() { + if c.bestStat.FuncDeps().HasMax1Row() { bestStat = c.bestStat.WithRowCount(1).WithDistinctCount(1) } else { bestStat, err = c.bestStat.WithHistogram(c.bestHist) @@ -325,7 +325,7 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta bestStat = stats.UpdateCounts(bestStat) } - if c.bestStat.FuncDeps().HasMax1Row() && !qFlags.JoinIsSet() && !qFlags.SubqueryIsSet() && lookup.Ranges.Len() == 1 { + if bestStat.FuncDeps().HasMax1Row() && !qFlags.JoinIsSet() && !qFlags.SubqueryIsSet() && lookup.Ranges.Len() == 1 { // Strict index lookup without a join or subquery scope will return // at most one row. We could also use some sort of scope counting // to check for single scope.