Skip to content

Commit a88e686

Browse files
committed
sql: remove old hyperloglog version
This was only needed for compatibility with 24.3 nodes to ensure that table stats collection worked in mixed-version state. We no longer need compatibility with that version. Additionally remove no longer used sketch type field from the processor spec. We've bumped the min version as well as removed mixed-version 24.3 logic test config, so this should now be safe. Release note: None
1 parent 673482a commit a88e686

File tree

9 files changed

+21
-105
lines changed

9 files changed

+21
-105
lines changed

DEPS.bzl

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -783,16 +783,6 @@ def go_deps():
783783
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/axiomhq/hyperloglog/com_github_axiomhq_hyperloglog-v0.2.5.zip",
784784
],
785785
)
786-
go_repository(
787-
name = "com_github_axiomhq_hyperloglog_000",
788-
build_file_proto_mode = "disable_global",
789-
importpath = "github.com/axiomhq/hyperloglog/000",
790-
sha256 = "812834322ee2ca50dc36f91f9ac3f2cde4631af2f9c330b1271c78b46024a540",
791-
strip_prefix = "github.com/axiomhq/[email protected]",
792-
urls = [
793-
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/axiomhq/hyperloglog/com_github_axiomhq_hyperloglog-v0.0.0-20181223111420-4b99d0c2c99e.zip",
794-
],
795-
)
796786
go_repository(
797787
name = "com_github_aymanbagabas_go_osc52",
798788
build_file_proto_mode = "disable_global",

build/bazelutil/distdir_files.bzl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,6 @@ DISTDIR_FILES = {
288288
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/aws/aws-sdk-go-v2/service/sts/com_github_aws_aws_sdk_go_v2_service_sts-v1.33.17.zip": "87aca25fafd483a1eac29c5baaab05ad485422a9aa1ccc5db0d39733c2d71cd2",
289289
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/aws/aws-sdk-go/com_github_aws_aws_sdk_go-v1.40.37.zip": "c0c481d28af88f621fb3fdeacc1e5d32f69a1bb83d0ee959f95ce89e4e2d0494",
290290
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/aws/smithy-go/com_github_aws_smithy_go-v1.22.3.zip": "572df48de9133d57f45909d3067b2053b97230268c2d28e4e44ea9644009ef11",
291-
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/axiomhq/hyperloglog/com_github_axiomhq_hyperloglog-v0.0.0-20181223111420-4b99d0c2c99e.zip": "812834322ee2ca50dc36f91f9ac3f2cde4631af2f9c330b1271c78b46024a540",
292291
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/axiomhq/hyperloglog/com_github_axiomhq_hyperloglog-v0.2.5.zip": "6125b12664bb5dd8614e82f0fe7528242dcb11649e1d7e051aabf3da471e14e1",
293292
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/aymanbagabas/go-osc52/com_github_aymanbagabas_go_osc52-v1.0.3.zip": "138e75a51599c2a8e4afe2bd6acdeaddbb73eb9ec796dfa2f577b16201660d9e",
294293
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/aymerick/douceur/com_github_aymerick_douceur-v0.2.0.zip": "dcbf69760cc1a8b32384495438e1086e4c3d669b2ebc0debd92e1865ffd6be60",

go.mod

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,6 @@ require (
122122
github.com/aws/aws-sdk-go-v2/service/secretsmanager v1.35.1
123123
github.com/aws/smithy-go v1.22.3
124124
github.com/axiomhq/hyperloglog v0.2.5
125-
github.com/axiomhq/hyperloglog/000 v0.0.0-20181223111420-4b99d0c2c99e
126125
github.com/bazelbuild/rules_go v0.26.0
127126
github.com/biogo/store v0.0.0-20160505134755-913427a1d5e8
128127
github.com/blevesearch/snowballstem v0.9.0
@@ -507,10 +506,6 @@ replace github.com/gogo/protobuf => github.com/cockroachdb/gogoproto v1.3.3-0.20
507506

508507
replace storj.io/drpc => github.com/cockroachdb/drpc v0.0.0-20250507084558-a793c5c40d3d
509508

510-
// TODO(yuzefovich): remove this version once compatibility with 24.3 is no
511-
// longer needed.
512-
replace github.com/axiomhq/hyperloglog/000 => github.com/axiomhq/hyperloglog v0.0.0-20181223111420-4b99d0c2c99e
513-
514509
// Note: This forked dependency adds a commit that opens up some
515510
// private APIs to enable us to make some perf improvements to
516511
// histogram updates in particular.

go.sum

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,8 +451,6 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.33.17/go.mod h1:cQnB8CUnxbMU82JvlqjK
451451
github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
452452
github.com/aws/smithy-go v1.22.3 h1:Z//5NuZCSW6R4PhQ93hShNbyBbn8BWCmCVCt+Q8Io5k=
453453
github.com/aws/smithy-go v1.22.3/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
454-
github.com/axiomhq/hyperloglog v0.0.0-20181223111420-4b99d0c2c99e h1:190ugM9MsyFauTkR/UqcHG/mn5nmFe6SvHJqEHIrtrA=
455-
github.com/axiomhq/hyperloglog v0.0.0-20181223111420-4b99d0c2c99e/go.mod h1:IOXAcuKIFq/mDyuQ4wyJuJ79XLMsmLM+5RdQ+vWrL7o=
456454
github.com/axiomhq/hyperloglog v0.2.5 h1:Hefy3i8nAs8zAI/tDp+wE7N+Ltr8JnwiW3875pvl0N8=
457455
github.com/axiomhq/hyperloglog v0.2.5/go.mod h1:DLUK9yIzpU5B6YFLjxTIcbHu1g4Y1WQb1m5RH3radaM=
458456
github.com/aymanbagabas/go-osc52 v1.0.3 h1:DTwqENW7X9arYimJrPeGZcV0ln14sGMt3pHZspWD+Mg=

pkg/sql/distsql_plan_stats.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,6 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
410410

411411
sampledColumnIDs := make([]descpb.ColumnID, len(scan.catalogCols))
412412
spec := execinfrapb.SketchSpec{
413-
SketchType: execinfrapb.SketchType_HLL_PLUS_PLUS_V1,
414413
GenerateHistogram: reqStat.histogram,
415414
HistogramMaxBuckets: reqStat.histogramMaxBuckets,
416415
Columns: make([]uint32, len(reqStat.columns)),
@@ -656,7 +655,6 @@ func (dsp *DistSQLPlanner) createStatsPlan(
656655
sampledColumnIDs := make([]descpb.ColumnID, len(requestedCols))
657656
for _, s := range reqStats {
658657
spec := execinfrapb.SketchSpec{
659-
SketchType: execinfrapb.SketchType_HLL_PLUS_PLUS_V1,
660658
GenerateHistogram: s.histogram,
661659
HistogramMaxBuckets: s.histogramMaxBuckets,
662660
Columns: make([]uint32, len(s.columns)),

pkg/sql/execinfrapb/processors_table_stats.proto

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,9 @@ option go_package = "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb";
1717
import "sql/catalog/descpb/structured.proto";
1818
import "gogoproto/gogo.proto";
1919

20-
// TODO(yuzefovich): this can be removed once compatibility with 24.3 is no
21-
// longer needed.
22-
enum SketchType {
23-
// This is the github.com/axiomhq/hyperloglog binary format (as of commit
24-
// 730eea1) for a sketch with precision 14. Values are encoded using their key
25-
// encoding, except integers which are encoded in 8 bytes (little-endian).
26-
HLL_PLUS_PLUS_V1 = 0;
27-
}
28-
2920
// SketchSpec contains the specification for a generated statistic.
3021
message SketchSpec {
31-
optional SketchType sketch_type = 1 [(gogoproto.nullable) = false];
22+
reserved 1;
3223

3324
// Each value is an index identifying a column in the input stream.
3425
// TODO(radu): currently only one column is supported.

pkg/sql/rowexec/BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ go_library(
115115
"//pkg/util/tracing/tracingpb",
116116
"//pkg/util/vector",
117117
"@com_github_axiomhq_hyperloglog//:hyperloglog",
118-
"@com_github_axiomhq_hyperloglog_000//:000",
119118
"@com_github_cockroachdb_errors//:errors",
120119
"@com_github_cockroachdb_logtags//:logtags",
121120
"@com_github_cockroachdb_redact//:redact",

pkg/sql/rowexec/sample_aggregator.go

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@ import (
1010
"math"
1111
"time"
1212

13-
hllNew "github.com/axiomhq/hyperloglog"
14-
hllOld "github.com/axiomhq/hyperloglog/000"
13+
"github.com/axiomhq/hyperloglog"
1514
"github.com/cockroachdb/cockroach/pkg/jobs"
1615
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
1716
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
1817
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
1918
"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
20-
"github.com/cockroachdb/cockroach/pkg/sql/execversion"
2119
"github.com/cockroachdb/cockroach/pkg/sql/isql"
2220
"github.com/cockroachdb/cockroach/pkg/sql/parser"
2321
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
@@ -103,7 +101,6 @@ func newSampleAggregator(
103101
return nil, errors.Errorf("histograms require one column")
104102
}
105103
}
106-
useNewHLL := execversion.FromContext(ctx) >= execversion.V25_1
107104

108105
// Limit the memory use by creating a child monitor with a hard limit.
109106
// The processor will disable histogram collection if this limit is not
@@ -144,14 +141,10 @@ func newSampleAggregator(
144141
for i := range spec.Sketches {
145142
s.sketches[i] = sketchInfo{
146143
spec: spec.Sketches[i],
144+
sketch: hyperloglog.New14(),
147145
numNulls: 0,
148146
numRows: 0,
149147
}
150-
if useNewHLL {
151-
s.sketches[i].sketchNew = hllNew.New14()
152-
} else {
153-
s.sketches[i].sketchOld = hllOld.New14()
154-
}
155148
if spec.Sketches[i].GenerateHistogram {
156149
sampleCols.Add(int(spec.Sketches[i].Columns[0]))
157150
}
@@ -173,14 +166,10 @@ func newSampleAggregator(
173166
s.invSr[col] = &sr
174167
s.invSketch[col] = &sketchInfo{
175168
spec: spec.InvertedSketches[i],
169+
sketch: hyperloglog.New14(),
176170
numNulls: 0,
177171
numRows: 0,
178172
}
179-
if useNewHLL {
180-
s.invSketch[col].sketchNew = hllNew.New14()
181-
} else {
182-
s.invSketch[col].sketchOld = hllOld.New14()
183-
}
184173
}
185174

186175
if err := s.Init(
@@ -372,6 +361,8 @@ func (s *sampleAggregator) mainLoop(
372361
func (s *sampleAggregator) processSketchRow(
373362
sketch *sketchInfo, row rowenc.EncDatumRow, da *tree.DatumAlloc,
374363
) error {
364+
var tmpSketch hyperloglog.Sketch
365+
375366
numRows, err := row[s.numRowsCol].GetInt()
376367
if err != nil {
377368
return err
@@ -398,22 +389,11 @@ func (s *sampleAggregator) processSketchRow(
398389
if d == tree.DNull {
399390
return errors.AssertionFailedf("NULL sketch data")
400391
}
401-
if sketch.sketchNew != nil {
402-
var tmpSketch hllNew.Sketch
403-
if err := tmpSketch.UnmarshalBinary([]byte(*d.(*tree.DBytes))); err != nil {
404-
return err
405-
}
406-
if err := sketch.sketchNew.Merge(&tmpSketch); err != nil {
407-
return errors.NewAssertionErrorWithWrappedErrf(err, "merging sketch data")
408-
}
409-
} else {
410-
var tmpSketch hllOld.Sketch
411-
if err := tmpSketch.UnmarshalBinary([]byte(*d.(*tree.DBytes))); err != nil {
412-
return err
413-
}
414-
if err := sketch.sketchOld.Merge(&tmpSketch); err != nil {
415-
return errors.NewAssertionErrorWithWrappedErrf(err, "merging sketch data")
416-
}
392+
if err := tmpSketch.UnmarshalBinary([]byte(*d.(*tree.DBytes))); err != nil {
393+
return err
394+
}
395+
if err := sketch.sketch.Merge(&tmpSketch); err != nil {
396+
return errors.NewAssertionErrorWithWrappedErrf(err, "merging sketch data")
417397
}
418398
return nil
419399
}
@@ -631,12 +611,7 @@ func (s *sampleAggregator) getAvgSize(si *sketchInfo) int64 {
631611
// getDistinctCount returns the number of distinct values in the given sketch,
632612
// optionally including null values.
633613
func (s *sampleAggregator) getDistinctCount(si *sketchInfo, includeNulls bool) int64 {
634-
var distinctCount int64
635-
if si.sketchNew != nil {
636-
distinctCount = int64(si.sketchNew.Estimate())
637-
} else {
638-
distinctCount = int64(si.sketchOld.Estimate())
639-
}
614+
distinctCount := int64(si.sketch.Estimate())
640615
if si.numNulls > 0 && !includeNulls {
641616
// Nulls are included in the estimate, so reduce the count by 1 if nulls are
642617
// not requested.

pkg/sql/rowexec/sampler.go

Lines changed: 9 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ import (
1111
"math/rand"
1212
"time"
1313

14-
hllNew "github.com/axiomhq/hyperloglog"
15-
hllOld "github.com/axiomhq/hyperloglog/000"
14+
"github.com/axiomhq/hyperloglog"
1615
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
1716
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb"
1817
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
@@ -36,10 +35,8 @@ import (
3635

3736
// sketchInfo contains the specification and run-time state for each sketch.
3837
type sketchInfo struct {
39-
spec execinfrapb.SketchSpec
40-
// Exactly one of sketchOld and sketchNew will be set.
41-
sketchOld *hllOld.Sketch
42-
sketchNew *hllNew.Sketch
38+
spec execinfrapb.SketchSpec
39+
sketch *hyperloglog.Sketch
4340
numNulls int64
4441
numRows int64
4542
size int64
@@ -103,7 +100,6 @@ func newSamplerProcessor(
103100
input execinfra.RowSource,
104101
post *execinfrapb.PostProcessSpec,
105102
) (*samplerProcessor, error) {
106-
useNewHLL := execversion.FromContext(ctx) >= execversion.V25_1
107103
legacyFingerprinting := execversion.FromContext(ctx) < execversion.V25_2
108104

109105
// Limit the memory use by creating a child monitor with a hard limit.
@@ -133,15 +129,11 @@ func newSamplerProcessor(
133129
for i := range spec.Sketches {
134130
s.sketches[i] = sketchInfo{
135131
spec: spec.Sketches[i],
132+
sketch: hyperloglog.New14(),
136133
numNulls: 0,
137134
numRows: 0,
138135
legacyFingerprinting: legacyFingerprinting,
139136
}
140-
if useNewHLL {
141-
s.sketches[i].sketchNew = hllNew.New14()
142-
} else {
143-
s.sketches[i].sketchOld = hllOld.New14()
144-
}
145137
if spec.Sketches[i].GenerateHistogram {
146138
sampleCols.Add(int(spec.Sketches[i].Columns[0]))
147139
}
@@ -160,14 +152,10 @@ func newSamplerProcessor(
160152
sketchSpec.Columns = []uint32{0}
161153
s.invSketch[col] = &sketchInfo{
162154
spec: sketchSpec,
155+
sketch: hyperloglog.New14(),
163156
numNulls: 0,
164157
numRows: 0,
165158
}
166-
if useNewHLL {
167-
s.invSketch[col].sketchNew = hllNew.New14()
168-
} else {
169-
s.invSketch[col].sketchOld = hllOld.New14()
170-
}
171159
}
172160

173161
s.sr.Init(int(spec.SampleSize), int(spec.MinSampleSize), inTypes, &s.memAcc, sampleCols)
@@ -437,12 +425,7 @@ func (s *samplerProcessor) emitSketchRow(
437425
outRow[s.numRowsCol] = rowenc.EncDatum{Datum: tree.NewDInt(tree.DInt(si.numRows))}
438426
outRow[s.numNullsCol] = rowenc.EncDatum{Datum: tree.NewDInt(tree.DInt(si.numNulls))}
439427
outRow[s.sizeCol] = rowenc.EncDatum{Datum: tree.NewDInt(tree.DInt(si.size))}
440-
var data []byte
441-
if si.sketchNew != nil {
442-
data, err = si.sketchNew.MarshalBinary()
443-
} else {
444-
data, err = si.sketchOld.MarshalBinary()
445-
}
428+
data, err := si.sketch.MarshalBinary()
446429
if err != nil {
447430
return false, err
448431
}
@@ -603,11 +586,7 @@ func (s *sketchInfo) addRow(
603586
if allNulls {
604587
s.numNulls++
605588
}
606-
if s.sketchNew != nil {
607-
s.sketchNew.Insert(*buf)
608-
} else {
609-
s.sketchOld.Insert(*buf)
610-
}
589+
s.sketch.Insert(*buf)
611590
return nil
612591
}
613592

@@ -678,11 +657,7 @@ func (s *sketchInfo) addRowLegacy(
678657
// be uniformly distributed in the 2^64 range). Experiments (on tpcc
679658
// order_line) with simplistic functions yielded bad results.
680659
binary.LittleEndian.PutUint64(*buf, uint64(val))
681-
if s.sketchNew != nil {
682-
s.sketchNew.Insert(*buf)
683-
} else {
684-
s.sketchOld.Insert(*buf)
685-
}
660+
s.sketch.Insert(*buf)
686661
return nil
687662
}
688663
isNull := true
@@ -715,10 +690,6 @@ func (s *sketchInfo) addRowLegacy(
715690
if isNull {
716691
s.numNulls++
717692
}
718-
if s.sketchNew != nil {
719-
s.sketchNew.Insert(*buf)
720-
} else {
721-
s.sketchOld.Insert(*buf)
722-
}
693+
s.sketch.Insert(*buf)
723694
return nil
724695
}

0 commit comments

Comments
 (0)