Skip to content

Commit c451888

Browse files
author
James Cor
committed
hash refactor
1 parent 4479e84 commit c451888

File tree

13 files changed

+126
-112
lines changed

13 files changed

+126
-112
lines changed

enginetest/memory_engine_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,9 @@ func TestSingleScript(t *testing.T) {
213213
Assertions: []queries.ScriptTestAssertion{
214214
{
215215
Query: "select 'abcdef' in (select name from test)",
216-
Expected: []sql.Row{},
216+
Expected: []sql.Row{
217+
{true},
218+
},
217219
},
218220
},
219221
},

memory/table_data.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
package memory
1616

1717
import (
18-
"context"
19-
"fmt"
18+
"fmt"
2019
"sort"
2120
"strconv"
2221
"strings"
@@ -25,6 +24,7 @@ import (
2524

2625
"github.com/dolthub/go-mysql-server/sql"
2726
"github.com/dolthub/go-mysql-server/sql/expression"
27+
"github.com/dolthub/go-mysql-server/sql/hash"
2828
"github.com/dolthub/go-mysql-server/sql/transform"
2929
"github.com/dolthub/go-mysql-server/sql/types"
3030
)
@@ -275,7 +275,7 @@ func (td *TableData) numRows(ctx *sql.Context) (uint64, error) {
275275
}
276276

277277
// throws an error if any two or more rows share the same |cols| values.
278-
func (td *TableData) errIfDuplicateEntryExist(ctx context.Context, cols []string, idxName string) error {
278+
func (td *TableData) errIfDuplicateEntryExist(ctx *sql.Context, cols []string, idxName string) error {
279279
columnMapping, err := td.columnIndexes(cols)
280280

281281
// We currently skip validating duplicates on unique virtual columns.
@@ -297,7 +297,7 @@ func (td *TableData) errIfDuplicateEntryExist(ctx context.Context, cols []string
297297
if hasNulls(idxPrefixKey) {
298298
continue
299299
}
300-
h, err := sql.HashOf(ctx, td.schema.Schema, idxPrefixKey)
300+
h, err := hash.HashOf(ctx, td.schema.Schema, idxPrefixKey)
301301
if err != nil {
302302
return err
303303
}

sql/cache.go

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -15,61 +15,12 @@
1515
package sql
1616

1717
import (
18-
"context"
1918
"fmt"
2019
"runtime"
21-
"sync"
22-
23-
"github.com/cespare/xxhash/v2"
2420

2521
lru "github.com/hashicorp/golang-lru"
2622
)
2723

28-
// HashOf returns a hash of the given value to be used as key in a cache.
29-
func HashOf(ctx context.Context, sch Schema, v Row) (uint64, error) {
30-
hash := digestPool.Get().(*xxhash.Digest)
31-
hash.Reset()
32-
defer digestPool.Put(hash)
33-
for i, x := range v {
34-
if i > 0 {
35-
// separate each value in the row with a nil byte
36-
if _, err := hash.Write([]byte{0}); err != nil {
37-
return 0, err
38-
}
39-
}
40-
41-
if i < len(sch) {
42-
typ := sch[i].Type
43-
if strType, ok := typ.(StringType); ok && x != nil {
44-
newX, _, err := strType.Convert(ctx, x)
45-
if err != nil {
46-
return 0, err
47-
}
48-
err = strType.Collation().WriteWeightString(hash, newX.(string))
49-
if err != nil {
50-
return 0, err
51-
}
52-
continue
53-
}
54-
}
55-
56-
// TODO: probably much faster to do this with a type switch
57-
// TODO: we don't have the type info necessary to appropriately encode the value of a string with a non-standard
58-
// collation, which means that two strings that differ only in their collations will hash to the same value.
59-
// See rowexec/grouping_key()
60-
if _, err := fmt.Fprintf(hash, "%v,", x); err != nil {
61-
return 0, err
62-
}
63-
}
64-
return hash.Sum64(), nil
65-
}
66-
67-
var digestPool = sync.Pool{
68-
New: func() any {
69-
return xxhash.New()
70-
},
71-
}
72-
7324
// ErrKeyNotFound is returned when the key could not be found in the cache.
7425
var ErrKeyNotFound = fmt.Errorf("memory: key not found in cache")
7526

sql/hash/hash.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Copyright 2025 Dolthub, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package hash
16+
17+
import (
18+
"fmt"
19+
"sync"
20+
21+
"github.com/cespare/xxhash/v2"
22+
23+
"github.com/dolthub/go-mysql-server/sql"
24+
"github.com/dolthub/go-mysql-server/sql/types"
25+
)
26+
27+
var digestPool = sync.Pool{
28+
New: func() any {
29+
return xxhash.New()
30+
},
31+
}
32+
33+
// HashOf returns a hash of the given value to be used as key in a cache.
34+
func HashOf(ctx *sql.Context, sch sql.Schema, row sql.Row) (uint64, error) {
35+
hash := digestPool.Get().(*xxhash.Digest)
36+
hash.Reset()
37+
defer digestPool.Put(hash)
38+
for i, v := range row {
39+
if i > 0 {
40+
// separate each value in the row with a nil byte
41+
if _, err := hash.Write([]byte{0}); err != nil {
42+
return 0, err
43+
}
44+
}
45+
46+
// TODO: we may not always have the type information available, so we check schema length.
47+
// Then, defer to original behavior
48+
if i >= len(sch) || v == nil {
49+
_, err := fmt.Fprintf(hash, "%v", v)
50+
if err != nil {
51+
return 0, err
52+
}
53+
continue
54+
}
55+
56+
switch typ := sch[i].Type.(type) {
57+
case types.ExtendedType:
58+
bytes, err := typ.SerializeValue(ctx, v)
59+
if err != nil {
60+
return 0, err
61+
}
62+
_, err = fmt.Fprint(hash, string(bytes))
63+
if err != nil {
64+
return 0, err
65+
}
66+
case types.StringType:
67+
strVal, err := types.ConvertToString(ctx, v, typ, nil)
68+
if err != nil {
69+
return 0, err
70+
}
71+
err = typ.Collation().WriteWeightString(hash, strVal)
72+
if err != nil {
73+
return 0, err
74+
}
75+
default:
76+
// TODO: probably much faster to do this with a type switch
77+
_, err := fmt.Fprintf(hash, "%v", v)
78+
if err != nil {
79+
return 0, err
80+
}
81+
}
82+
}
83+
return hash.Sum64(), nil
84+
}

sql/iters/rel_iters.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
"github.com/dolthub/go-mysql-server/sql"
2626
"github.com/dolthub/go-mysql-server/sql/expression"
27+
"github.com/dolthub/go-mysql-server/sql/hash"
2728
"github.com/dolthub/go-mysql-server/sql/types"
2829
)
2930

@@ -571,7 +572,7 @@ func (di *distinctIter) Next(ctx *sql.Context) (sql.Row, error) {
571572
return nil, err
572573
}
573574

574-
hash, err := sql.HashOf(ctx, nil, row)
575+
hash, err := hash.HashOf(ctx, nil, row)
575576
if err != nil {
576577
return nil, err
577578
}
@@ -647,7 +648,7 @@ func (ii *IntersectIter) Next(ctx *sql.Context) (sql.Row, error) {
647648
return nil, err
648649
}
649650

650-
hash, herr := sql.HashOf(ctx, nil, res)
651+
hash, herr := hash.HashOf(ctx, nil, res)
651652
if herr != nil {
652653
return nil, herr
653654
}
@@ -669,7 +670,7 @@ func (ii *IntersectIter) Next(ctx *sql.Context) (sql.Row, error) {
669670
return nil, err
670671
}
671672

672-
hash, herr := sql.HashOf(ctx, nil, res)
673+
hash, herr := hash.HashOf(ctx, nil, res)
673674
if herr != nil {
674675
return nil, herr
675676
}
@@ -714,7 +715,7 @@ func (ei *ExceptIter) Next(ctx *sql.Context) (sql.Row, error) {
714715
return nil, err
715716
}
716717

717-
hash, herr := sql.HashOf(ctx, nil, res)
718+
hash, herr := hash.HashOf(ctx, nil, res)
718719
if herr != nil {
719720
return nil, herr
720721
}
@@ -736,7 +737,7 @@ func (ei *ExceptIter) Next(ctx *sql.Context) (sql.Row, error) {
736737
return nil, err
737738
}
738739

739-
hash, herr := sql.HashOf(ctx, nil, res)
740+
hash, herr := hash.HashOf(ctx, nil, res)
740741
if herr != nil {
741742
return nil, herr
742743
}

sql/plan/hash_lookup.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ import (
1818
"fmt"
1919
"sync"
2020

21-
"github.com/dolthub/go-mysql-server/sql/types"
2221

23-
"github.com/dolthub/go-mysql-server/sql"
22+
"github.com/dolthub/go-mysql-server/sql"
23+
"github.com/dolthub/go-mysql-server/sql/hash"
24+
"github.com/dolthub/go-mysql-server/sql/types"
2425
)
2526

2627
// NewHashLookup returns a node that performs an indexed hash lookup
@@ -127,7 +128,7 @@ func (n *HashLookup) GetHashKey(ctx *sql.Context, e sql.Expression, row sql.Row)
127128
return nil, err
128129
}
129130
if s, ok := key.([]interface{}); ok {
130-
return sql.HashOf(ctx, n.Schema(), s)
131+
return hash.HashOf(ctx, n.Schema(), s)
131132
}
132133
// byte slices are not hashable
133134
if k, ok := key.([]byte); ok {

sql/plan/insubquery.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919

2020
"github.com/dolthub/go-mysql-server/sql"
2121
"github.com/dolthub/go-mysql-server/sql/expression"
22+
"github.com/dolthub/go-mysql-server/sql/hash"
2223
"github.com/dolthub/go-mysql-server/sql/types"
2324
)
2425

@@ -47,7 +48,7 @@ func NewInSubquery(left sql.Expression, right sql.Expression) *InSubquery {
4748
return &InSubquery{expression.BinaryExpressionStub{LeftChild: left, RightChild: right}}
4849
}
4950

50-
var nilKey, _ = sql.HashOf(nil, nil, sql.NewRow(nil))
51+
var nilKey, _ = hash.HashOf(nil, nil, sql.NewRow(nil))
5152

5253
// Eval implements the Expression interface.
5354
func (in *InSubquery) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
@@ -96,7 +97,7 @@ func (in *InSubquery) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
9697
return false, nil
9798
}
9899

99-
key, err := sql.HashOf(ctx, sql.Schema{&sql.Column{Type: rTyp}}, sql.NewRow(nLeft))
100+
key, err := hash.HashOf(ctx, sql.Schema{&sql.Column{Type: rTyp}}, sql.NewRow(nLeft))
100101
if err != nil {
101102
return nil, err
102103
}

sql/plan/subquery.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ package plan
1616

1717
import (
1818
"fmt"
19-
"io"
19+
"github.com/dolthub/go-mysql-server/sql/hash"
20+
"io"
2021
"sync"
2122

2223
"github.com/dolthub/go-mysql-server/sql/transform"
@@ -484,7 +485,7 @@ func putAllRows(ctx *sql.Context, cache sql.KeyValueCache, sch sql.Schema, vals
484485
if err != nil {
485486
return err
486487
}
487-
rowKey, err := sql.HashOf(ctx, sch, sql.NewRow(normVal))
488+
rowKey, err := hash.HashOf(ctx, sch, sql.NewRow(normVal))
488489
if err != nil {
489490
return err
490491
}

sql/rowexec/agg.go

Lines changed: 7 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,11 @@ package rowexec
1616

1717
import (
1818
"errors"
19-
"fmt"
2019
"io"
2120

22-
"github.com/cespare/xxhash/v2"
23-
2421
"github.com/dolthub/go-mysql-server/sql"
2522
"github.com/dolthub/go-mysql-server/sql/expression/function/aggregation"
26-
"github.com/dolthub/go-mysql-server/sql/types"
23+
"github.com/dolthub/go-mysql-server/sql/hash"
2724
)
2825

2926
type groupByIter struct {
@@ -238,46 +235,18 @@ func (i *groupByGroupingIter) Dispose() {
238235
}
239236
}
240237

241-
func groupingKey(
242-
ctx *sql.Context,
243-
exprs []sql.Expression,
244-
row sql.Row,
245-
) (uint64, error) {
246-
hash := xxhash.New()
238+
func groupingKey(ctx *sql.Context, exprs []sql.Expression, row sql.Row) (uint64, error) {
239+
var keyRow = make(sql.Row, len(exprs))
240+
var keySch = make(sql.Schema, len(exprs))
247241
for i, expr := range exprs {
248242
v, err := expr.Eval(ctx, row)
249243
if err != nil {
250244
return 0, err
251245
}
252-
253-
// TODO: this should just use sql.HashOf
254-
255-
if i > 0 {
256-
// separate each expression in the grouping key with a nil byte
257-
if _, err = hash.Write([]byte{0}); err != nil {
258-
return 0, err
259-
}
260-
}
261-
262-
if extendedType, isExtendedType := expr.Type().(types.ExtendedType); isExtendedType && v != nil {
263-
bytes, err := extendedType.SerializeValue(ctx, v)
264-
if err == nil {
265-
_, err = fmt.Fprint(hash, string(bytes))
266-
}
267-
} else if stringType, isStringType := expr.Type().(sql.StringType); isStringType && v != nil {
268-
v, err = types.ConvertToString(ctx, v, stringType, nil)
269-
if err == nil {
270-
err = stringType.Collation().WriteWeightString(hash, v.(string))
271-
}
272-
} else {
273-
_, err = fmt.Fprintf(hash, "%v", v)
274-
}
275-
if err != nil {
276-
return 0, err
277-
}
246+
keyRow[i] = v
247+
keySch[i] = &sql.Column{Type: expr.Type()}
278248
}
279-
280-
return hash.Sum64(), nil
249+
return hash.HashOf(ctx, keySch, keyRow)
281250
}
282251

283252
func newAggregationBuffer(expr sql.Expression) (sql.AggregationBuffer, error) {

0 commit comments

Comments
 (0)