From 6d958e9fa93dcefb4d9f220d4caa9d1d7dacb6b1 Mon Sep 17 00:00:00 2001 From: James Cor Date: Sun, 9 Nov 2025 22:56:18 -0800 Subject: [PATCH 1/3] manually write strings --- sql/hash/hash.go | 48 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/sql/hash/hash.go b/sql/hash/hash.go index 62d5ed2c85..bfd3c28d05 100644 --- a/sql/hash/hash.go +++ b/sql/hash/hash.go @@ -60,9 +60,16 @@ func HashOf(ctx *sql.Context, sch sql.Schema, row sql.Row) (uint64, error) { return 0, fmt.Errorf("error unwrapping value: %w", err) } + if v == nil { + if _, err := hash.WriteString(""); err != nil { + return 0, err + } + continue + } + // TODO: we may not always have the type information available, so we check schema length. // Then, defer to original behavior - if i >= len(sch) || v == nil { + if i >= len(sch) { _, err := fmt.Fprintf(hash, "%v", v) if err != nil { return 0, err @@ -89,10 +96,41 @@ func HashOf(ctx *sql.Context, sch sql.Schema, row sql.Row) (uint64, error) { return 0, err } default: - // TODO: probably much faster to do this with a type switch - _, err = fmt.Fprintf(hash, "%v", v) - if err != nil { - return 0, err + switch v := v.(type) { + case int: + hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int8: + hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int16: + hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int32: + hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int64: + hash.WriteString(strconv.FormatInt(v, 10)) + case uint: + hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint8: + hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint16: + hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint32: + hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint64: + hash.WriteString(strconv.FormatUint(v, 10)) + case float32: + str := strconv.FormatFloat(float64(v), 'f', -1, 32) + if str == "-0" { + str = "0" + } + hash.WriteString(str) + case float64: + str := strconv.FormatFloat(v, 'f', -1, 64) + if str == "-0" { + str = "0" + } + hash.WriteString(str) + default: + hash.WriteString(fmt.Sprintf("%v", v)) } } } From a1a954b080d3c8db460d193d6b703524bc4f0969 Mon Sep 17 00:00:00 2001 From: James Cor Date: Sun, 9 Nov 2025 23:09:01 -0800 Subject: [PATCH 2/3] use type switch instead of fprintf for grouping key --- sql/hash/hash.go | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/sql/hash/hash.go b/sql/hash/hash.go index bfd3c28d05..d2c756b619 100644 --- a/sql/hash/hash.go +++ b/sql/hash/hash.go @@ -70,9 +70,41 @@ func HashOf(ctx *sql.Context, sch sql.Schema, row sql.Row) (uint64, error) { // TODO: we may not always have the type information available, so we check schema length. // Then, defer to original behavior if i >= len(sch) { - _, err := fmt.Fprintf(hash, "%v", v) - if err != nil { - return 0, err + switch v := v.(type) { + case int: + hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int8: + hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int16: + hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int32: + hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int64: + hash.WriteString(strconv.FormatInt(v, 10)) + case uint: + hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint8: + hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint16: + hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint32: + hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint64: + hash.WriteString(strconv.FormatUint(v, 10)) + case float32: + str := strconv.FormatFloat(float64(v), 'f', -1, 32) + if str == "-0" { + str = "0" + } + hash.WriteString(str) + case float64: + str := strconv.FormatFloat(v, 'f', -1, 64) + if str == "-0" { + str = "0" + } + hash.WriteString(str) + default: + hash.WriteString(fmt.Sprintf("%v", v)) } continue } From 786f3a1c310aa82a5f937e802fda1f97109e89d9 Mon Sep 17 00:00:00 2001 From: James Cor Date: Tue, 11 Nov 2025 10:20:52 -0800 Subject: [PATCH 3/3] check for error --- sql/hash/hash.go | 146 +++++++++++++++++++---------------------------- 1 file changed, 58 insertions(+), 88 deletions(-) diff --git a/sql/hash/hash.go b/sql/hash/hash.go index d2c756b619..57feabd2fe 100644 --- a/sql/hash/hash.go +++ b/sql/hash/hash.go @@ -69,101 +69,71 @@ func HashOf(ctx *sql.Context, sch sql.Schema, row sql.Row) (uint64, error) { // TODO: we may not always have the type information available, so we check schema length. // Then, defer to original behavior - if i >= len(sch) { - switch v := v.(type) { - case int: - hash.WriteString(strconv.FormatInt(int64(v), 10)) - case int8: - hash.WriteString(strconv.FormatInt(int64(v), 10)) - case int16: - hash.WriteString(strconv.FormatInt(int64(v), 10)) - case int32: - hash.WriteString(strconv.FormatInt(int64(v), 10)) - case int64: - hash.WriteString(strconv.FormatInt(v, 10)) - case uint: - hash.WriteString(strconv.FormatUint(uint64(v), 10)) - case uint8: - hash.WriteString(strconv.FormatUint(uint64(v), 10)) - case uint16: - hash.WriteString(strconv.FormatUint(uint64(v), 10)) - case uint32: - hash.WriteString(strconv.FormatUint(uint64(v), 10)) - case uint64: - hash.WriteString(strconv.FormatUint(v, 10)) - case float32: - str := strconv.FormatFloat(float64(v), 'f', -1, 32) - if str == "-0" { - str = "0" + if i < len(sch) { + switch typ := sch[i].Type.(type) { + case sql.ExtendedType: + // TODO: Doltgres follows Postgres conventions which don't align with the expectations of MySQL, + // so we're using the old (probably incorrect) behavior for now + _, err := hash.WriteString(fmt.Sprintf("%v", v)) + if err != nil { + return 0, err } - hash.WriteString(str) - case float64: - str := strconv.FormatFloat(v, 'f', -1, 64) - if str == "-0" { - str = "0" + continue + case types.StringType: + var strVal string + strVal, err = types.ConvertToString(ctx, v, typ, nil) + if err != nil { + return 0, err } - hash.WriteString(str) - default: - hash.WriteString(fmt.Sprintf("%v", v)) + err = typ.Collation().WriteWeightString(hash, strVal) + if err != nil { + return 0, err + } + continue } - continue } - - switch typ := sch[i].Type.(type) { - case sql.ExtendedType: - // TODO: Doltgres follows Postgres conventions which don't align with the expectations of MySQL, - // so we're using the old (probably incorrect) behavior for now - _, err = fmt.Fprintf(hash, "%v", v) - if err != nil { - return 0, err - } - case types.StringType: - var strVal string - strVal, err = types.ConvertToString(ctx, v, typ, nil) - if err != nil { - return 0, err + switch v := v.(type) { + case int: + _, err = hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int8: + _, err = hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int16: + _, err = hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int32: + _, err = hash.WriteString(strconv.FormatInt(int64(v), 10)) + case int64: + _, err = hash.WriteString(strconv.FormatInt(v, 10)) + case uint: + _, err = hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint8: + _, err = hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint16: + _, err = hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint32: + _, err = hash.WriteString(strconv.FormatUint(uint64(v), 10)) + case uint64: + _, err = hash.WriteString(strconv.FormatUint(v, 10)) + case float32: + str := strconv.FormatFloat(float64(v), 'f', -1, 32) + if str == "-0" { + str = "0" } - err = typ.Collation().WriteWeightString(hash, strVal) - if err != nil { - return 0, err + _, err = hash.WriteString(str) + case float64: + str := strconv.FormatFloat(v, 'f', -1, 64) + if str == "-0" { + str = "0" } + _, err = hash.WriteString(str) + case string: + _, err = hash.WriteString(v) + case []byte: + _, err = hash.Write(v) default: - switch v := v.(type) { - case int: - hash.WriteString(strconv.FormatInt(int64(v), 10)) - case int8: - hash.WriteString(strconv.FormatInt(int64(v), 10)) - case int16: - hash.WriteString(strconv.FormatInt(int64(v), 10)) - case int32: - hash.WriteString(strconv.FormatInt(int64(v), 10)) - case int64: - hash.WriteString(strconv.FormatInt(v, 10)) - case uint: - hash.WriteString(strconv.FormatUint(uint64(v), 10)) - case uint8: - hash.WriteString(strconv.FormatUint(uint64(v), 10)) - case uint16: - hash.WriteString(strconv.FormatUint(uint64(v), 10)) - case uint32: - hash.WriteString(strconv.FormatUint(uint64(v), 10)) - case uint64: - hash.WriteString(strconv.FormatUint(v, 10)) - case float32: - str := strconv.FormatFloat(float64(v), 'f', -1, 32) - if str == "-0" { - str = "0" - } - hash.WriteString(str) - case float64: - str := strconv.FormatFloat(v, 'f', -1, 64) - if str == "-0" { - str = "0" - } - hash.WriteString(str) - default: - hash.WriteString(fmt.Sprintf("%v", v)) - } + _, err = hash.WriteString(fmt.Sprintf("%v", v)) + } + if err != nil { + return 0, err } } return hash.Sum64(), nil