Skip to content

Commit 64220c2

Browse files
craig[bot]yuzefovich
andcommitted
Merge #145932
145932: sql: remove old hyperloglog version r=yuzefovich a=yuzefovich This was only needed for compatibility with 24.3 nodes to ensure that table stats collection worked in mixed-version state. We no longer need compatibility with that version. Additionally remove no longer used sketch type field from the processor spec. Epic: None Release note: None Co-authored-by: Yahor Yuzefovich <[email protected]>
2 parents d4f47dd + 088306f commit 64220c2

File tree

9 files changed

+21
-105
lines changed

9 files changed

+21
-105
lines changed

DEPS.bzl

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -793,16 +793,6 @@ def go_deps():
793793
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/axiomhq/hyperloglog/com_github_axiomhq_hyperloglog-v0.2.5.zip",
794794
],
795795
)
796-
go_repository(
797-
name = "com_github_axiomhq_hyperloglog_000",
798-
build_file_proto_mode = "disable_global",
799-
importpath = "github.com/axiomhq/hyperloglog/000",
800-
sha256 = "812834322ee2ca50dc36f91f9ac3f2cde4631af2f9c330b1271c78b46024a540",
801-
strip_prefix = "github.com/axiomhq/[email protected]",
802-
urls = [
803-
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/axiomhq/hyperloglog/com_github_axiomhq_hyperloglog-v0.0.0-20181223111420-4b99d0c2c99e.zip",
804-
],
805-
)
806796
go_repository(
807797
name = "com_github_aymanbagabas_go_osc52",
808798
build_file_proto_mode = "disable_global",

build/bazelutil/distdir_files.bzl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,6 @@ DISTDIR_FILES = {
286286
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/aws/aws-sdk-go-v2/service/sts/com_github_aws_aws_sdk_go_v2_service_sts-v1.33.17.zip": "87aca25fafd483a1eac29c5baaab05ad485422a9aa1ccc5db0d39733c2d71cd2",
287287
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/aws/aws-sdk-go/com_github_aws_aws_sdk_go-v1.40.37.zip": "c0c481d28af88f621fb3fdeacc1e5d32f69a1bb83d0ee959f95ce89e4e2d0494",
288288
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/aws/smithy-go/com_github_aws_smithy_go-v1.22.3.zip": "572df48de9133d57f45909d3067b2053b97230268c2d28e4e44ea9644009ef11",
289-
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/axiomhq/hyperloglog/com_github_axiomhq_hyperloglog-v0.0.0-20181223111420-4b99d0c2c99e.zip": "812834322ee2ca50dc36f91f9ac3f2cde4631af2f9c330b1271c78b46024a540",
290289
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/axiomhq/hyperloglog/com_github_axiomhq_hyperloglog-v0.2.5.zip": "6125b12664bb5dd8614e82f0fe7528242dcb11649e1d7e051aabf3da471e14e1",
291290
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/aymanbagabas/go-osc52/com_github_aymanbagabas_go_osc52-v1.0.3.zip": "138e75a51599c2a8e4afe2bd6acdeaddbb73eb9ec796dfa2f577b16201660d9e",
292291
"https://storage.googleapis.com/cockroach-godeps/gomod/github.com/aymerick/douceur/com_github_aymerick_douceur-v0.2.0.zip": "dcbf69760cc1a8b32384495438e1086e4c3d669b2ebc0debd92e1865ffd6be60",

go.mod

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ require (
120120
github.com/aws/aws-sdk-go-v2/service/secretsmanager v1.35.1
121121
github.com/aws/smithy-go v1.22.3
122122
github.com/axiomhq/hyperloglog v0.2.5
123-
github.com/axiomhq/hyperloglog/000 v0.0.0-20181223111420-4b99d0c2c99e
124123
github.com/bazelbuild/rules_go v0.26.0
125124
github.com/biogo/store v0.0.0-20160505134755-913427a1d5e8
126125
github.com/blevesearch/snowballstem v0.9.0
@@ -500,10 +499,6 @@ replace golang.org/x/time => github.com/cockroachdb/x-time v0.3.1-0.202305251236
500499

501500
replace github.com/gogo/protobuf => github.com/cockroachdb/gogoproto v1.3.3-0.20241216150617-2358cdb156a1
502501

503-
// TODO(yuzefovich): remove this version once compatibility with 24.3 is no
504-
// longer needed.
505-
replace github.com/axiomhq/hyperloglog/000 => github.com/axiomhq/hyperloglog v0.0.0-20181223111420-4b99d0c2c99e
506-
507502
// Note: This forked dependency adds a commit that opens up some
508503
// private APIs to enable us to make some perf improvements to
509504
// histogram updates in particular.

go.sum

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -447,8 +447,6 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.33.17/go.mod h1:cQnB8CUnxbMU82JvlqjK
447447
github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
448448
github.com/aws/smithy-go v1.22.3 h1:Z//5NuZCSW6R4PhQ93hShNbyBbn8BWCmCVCt+Q8Io5k=
449449
github.com/aws/smithy-go v1.22.3/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI=
450-
github.com/axiomhq/hyperloglog v0.0.0-20181223111420-4b99d0c2c99e h1:190ugM9MsyFauTkR/UqcHG/mn5nmFe6SvHJqEHIrtrA=
451-
github.com/axiomhq/hyperloglog v0.0.0-20181223111420-4b99d0c2c99e/go.mod h1:IOXAcuKIFq/mDyuQ4wyJuJ79XLMsmLM+5RdQ+vWrL7o=
452450
github.com/axiomhq/hyperloglog v0.2.5 h1:Hefy3i8nAs8zAI/tDp+wE7N+Ltr8JnwiW3875pvl0N8=
453451
github.com/axiomhq/hyperloglog v0.2.5/go.mod h1:DLUK9yIzpU5B6YFLjxTIcbHu1g4Y1WQb1m5RH3radaM=
454452
github.com/aymanbagabas/go-osc52 v1.0.3 h1:DTwqENW7X9arYimJrPeGZcV0ln14sGMt3pHZspWD+Mg=

pkg/sql/distsql_plan_stats.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,6 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
410410

411411
sampledColumnIDs := make([]descpb.ColumnID, len(scan.catalogCols))
412412
spec := execinfrapb.SketchSpec{
413-
SketchType: execinfrapb.SketchType_HLL_PLUS_PLUS_V1,
414413
GenerateHistogram: reqStat.histogram,
415414
HistogramMaxBuckets: reqStat.histogramMaxBuckets,
416415
Columns: make([]uint32, len(reqStat.columns)),
@@ -656,7 +655,6 @@ func (dsp *DistSQLPlanner) createStatsPlan(
656655
sampledColumnIDs := make([]descpb.ColumnID, len(requestedCols))
657656
for _, s := range reqStats {
658657
spec := execinfrapb.SketchSpec{
659-
SketchType: execinfrapb.SketchType_HLL_PLUS_PLUS_V1,
660658
GenerateHistogram: s.histogram,
661659
HistogramMaxBuckets: s.histogramMaxBuckets,
662660
Columns: make([]uint32, len(s.columns)),

pkg/sql/execinfrapb/processors_table_stats.proto

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,9 @@ option go_package = "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb";
1717
import "sql/catalog/descpb/structured.proto";
1818
import "gogoproto/gogo.proto";
1919

20-
// TODO(yuzefovich): this can be removed once compatibility with 24.3 is no
21-
// longer needed.
22-
enum SketchType {
23-
// This is the github.com/axiomhq/hyperloglog binary format (as of commit
24-
// 730eea1) for a sketch with precision 14. Values are encoded using their key
25-
// encoding, except integers which are encoded in 8 bytes (little-endian).
26-
HLL_PLUS_PLUS_V1 = 0;
27-
}
28-
2920
// SketchSpec contains the specification for a generated statistic.
3021
message SketchSpec {
31-
optional SketchType sketch_type = 1 [(gogoproto.nullable) = false];
22+
reserved 1;
3223

3324
// Each value is an index identifying a column in the input stream.
3425
// TODO(radu): currently only one column is supported.

pkg/sql/rowexec/BUILD.bazel

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ go_library(
115115
"//pkg/util/tracing/tracingpb",
116116
"//pkg/util/vector",
117117
"@com_github_axiomhq_hyperloglog//:hyperloglog",
118-
"@com_github_axiomhq_hyperloglog_000//:000",
119118
"@com_github_cockroachdb_errors//:errors",
120119
"@com_github_cockroachdb_logtags//:logtags",
121120
"@com_github_cockroachdb_redact//:redact",

pkg/sql/rowexec/sample_aggregator.go

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@ import (
1010
"math"
1111
"time"
1212

13-
hllNew "github.com/axiomhq/hyperloglog"
14-
hllOld "github.com/axiomhq/hyperloglog/000"
13+
"github.com/axiomhq/hyperloglog"
1514
"github.com/cockroachdb/cockroach/pkg/jobs"
1615
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
1716
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
1817
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
1918
"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
20-
"github.com/cockroachdb/cockroach/pkg/sql/execversion"
2119
"github.com/cockroachdb/cockroach/pkg/sql/isql"
2220
"github.com/cockroachdb/cockroach/pkg/sql/parser"
2321
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
@@ -103,7 +101,6 @@ func newSampleAggregator(
103101
return nil, errors.Errorf("histograms require one column")
104102
}
105103
}
106-
useNewHLL := execversion.FromContext(ctx) >= execversion.V25_1
107104

108105
// Limit the memory use by creating a child monitor with a hard limit.
109106
// The processor will disable histogram collection if this limit is not
@@ -144,14 +141,10 @@ func newSampleAggregator(
144141
for i := range spec.Sketches {
145142
s.sketches[i] = sketchInfo{
146143
spec: spec.Sketches[i],
144+
sketch: hyperloglog.New14(),
147145
numNulls: 0,
148146
numRows: 0,
149147
}
150-
if useNewHLL {
151-
s.sketches[i].sketchNew = hllNew.New14()
152-
} else {
153-
s.sketches[i].sketchOld = hllOld.New14()
154-
}
155148
if spec.Sketches[i].GenerateHistogram {
156149
sampleCols.Add(int(spec.Sketches[i].Columns[0]))
157150
}
@@ -173,14 +166,10 @@ func newSampleAggregator(
173166
s.invSr[col] = &sr
174167
s.invSketch[col] = &sketchInfo{
175168
spec: spec.InvertedSketches[i],
169+
sketch: hyperloglog.New14(),
176170
numNulls: 0,
177171
numRows: 0,
178172
}
179-
if useNewHLL {
180-
s.invSketch[col].sketchNew = hllNew.New14()
181-
} else {
182-
s.invSketch[col].sketchOld = hllOld.New14()
183-
}
184173
}
185174

186175
if err := s.Init(
@@ -372,6 +361,8 @@ func (s *sampleAggregator) mainLoop(
372361
func (s *sampleAggregator) processSketchRow(
373362
sketch *sketchInfo, row rowenc.EncDatumRow, da *tree.DatumAlloc,
374363
) error {
364+
var tmpSketch hyperloglog.Sketch
365+
375366
numRows, err := row[s.numRowsCol].GetInt()
376367
if err != nil {
377368
return err
@@ -398,22 +389,11 @@ func (s *sampleAggregator) processSketchRow(
398389
if d == tree.DNull {
399390
return errors.AssertionFailedf("NULL sketch data")
400391
}
401-
if sketch.sketchNew != nil {
402-
var tmpSketch hllNew.Sketch
403-
if err := tmpSketch.UnmarshalBinary([]byte(*d.(*tree.DBytes))); err != nil {
404-
return err
405-
}
406-
if err := sketch.sketchNew.Merge(&tmpSketch); err != nil {
407-
return errors.NewAssertionErrorWithWrappedErrf(err, "merging sketch data")
408-
}
409-
} else {
410-
var tmpSketch hllOld.Sketch
411-
if err := tmpSketch.UnmarshalBinary([]byte(*d.(*tree.DBytes))); err != nil {
412-
return err
413-
}
414-
if err := sketch.sketchOld.Merge(&tmpSketch); err != nil {
415-
return errors.NewAssertionErrorWithWrappedErrf(err, "merging sketch data")
416-
}
392+
if err := tmpSketch.UnmarshalBinary([]byte(*d.(*tree.DBytes))); err != nil {
393+
return err
394+
}
395+
if err := sketch.sketch.Merge(&tmpSketch); err != nil {
396+
return errors.NewAssertionErrorWithWrappedErrf(err, "merging sketch data")
417397
}
418398
return nil
419399
}
@@ -631,12 +611,7 @@ func (s *sampleAggregator) getAvgSize(si *sketchInfo) int64 {
631611
// getDistinctCount returns the number of distinct values in the given sketch,
632612
// optionally including null values.
633613
func (s *sampleAggregator) getDistinctCount(si *sketchInfo, includeNulls bool) int64 {
634-
var distinctCount int64
635-
if si.sketchNew != nil {
636-
distinctCount = int64(si.sketchNew.Estimate())
637-
} else {
638-
distinctCount = int64(si.sketchOld.Estimate())
639-
}
614+
distinctCount := int64(si.sketch.Estimate())
640615
if si.numNulls > 0 && !includeNulls {
641616
// Nulls are included in the estimate, so reduce the count by 1 if nulls are
642617
// not requested.

pkg/sql/rowexec/sampler.go

Lines changed: 9 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ import (
1111
"math/rand"
1212
"time"
1313

14-
hllNew "github.com/axiomhq/hyperloglog"
15-
hllOld "github.com/axiomhq/hyperloglog/000"
14+
"github.com/axiomhq/hyperloglog"
1615
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
1716
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catenumpb"
1817
"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
@@ -36,10 +35,8 @@ import (
3635

3736
// sketchInfo contains the specification and run-time state for each sketch.
3837
type sketchInfo struct {
39-
spec execinfrapb.SketchSpec
40-
// Exactly one of sketchOld and sketchNew will be set.
41-
sketchOld *hllOld.Sketch
42-
sketchNew *hllNew.Sketch
38+
spec execinfrapb.SketchSpec
39+
sketch *hyperloglog.Sketch
4340
numNulls int64
4441
numRows int64
4542
size int64
@@ -103,7 +100,6 @@ func newSamplerProcessor(
103100
input execinfra.RowSource,
104101
post *execinfrapb.PostProcessSpec,
105102
) (*samplerProcessor, error) {
106-
useNewHLL := execversion.FromContext(ctx) >= execversion.V25_1
107103
legacyFingerprinting := execversion.FromContext(ctx) < execversion.V25_2
108104

109105
// Limit the memory use by creating a child monitor with a hard limit.
@@ -133,15 +129,11 @@ func newSamplerProcessor(
133129
for i := range spec.Sketches {
134130
s.sketches[i] = sketchInfo{
135131
spec: spec.Sketches[i],
132+
sketch: hyperloglog.New14(),
136133
numNulls: 0,
137134
numRows: 0,
138135
legacyFingerprinting: legacyFingerprinting,
139136
}
140-
if useNewHLL {
141-
s.sketches[i].sketchNew = hllNew.New14()
142-
} else {
143-
s.sketches[i].sketchOld = hllOld.New14()
144-
}
145137
if spec.Sketches[i].GenerateHistogram {
146138
sampleCols.Add(int(spec.Sketches[i].Columns[0]))
147139
}
@@ -160,14 +152,10 @@ func newSamplerProcessor(
160152
sketchSpec.Columns = []uint32{0}
161153
s.invSketch[col] = &sketchInfo{
162154
spec: sketchSpec,
155+
sketch: hyperloglog.New14(),
163156
numNulls: 0,
164157
numRows: 0,
165158
}
166-
if useNewHLL {
167-
s.invSketch[col].sketchNew = hllNew.New14()
168-
} else {
169-
s.invSketch[col].sketchOld = hllOld.New14()
170-
}
171159
}
172160

173161
s.sr.Init(int(spec.SampleSize), int(spec.MinSampleSize), inTypes, &s.memAcc, sampleCols)
@@ -437,12 +425,7 @@ func (s *samplerProcessor) emitSketchRow(
437425
outRow[s.numRowsCol] = rowenc.EncDatum{Datum: tree.NewDInt(tree.DInt(si.numRows))}
438426
outRow[s.numNullsCol] = rowenc.EncDatum{Datum: tree.NewDInt(tree.DInt(si.numNulls))}
439427
outRow[s.sizeCol] = rowenc.EncDatum{Datum: tree.NewDInt(tree.DInt(si.size))}
440-
var data []byte
441-
if si.sketchNew != nil {
442-
data, err = si.sketchNew.MarshalBinary()
443-
} else {
444-
data, err = si.sketchOld.MarshalBinary()
445-
}
428+
data, err := si.sketch.MarshalBinary()
446429
if err != nil {
447430
return false, err
448431
}
@@ -603,11 +586,7 @@ func (s *sketchInfo) addRow(
603586
if allNulls {
604587
s.numNulls++
605588
}
606-
if s.sketchNew != nil {
607-
s.sketchNew.Insert(*buf)
608-
} else {
609-
s.sketchOld.Insert(*buf)
610-
}
589+
s.sketch.Insert(*buf)
611590
return nil
612591
}
613592

@@ -678,11 +657,7 @@ func (s *sketchInfo) addRowLegacy(
678657
// be uniformly distributed in the 2^64 range). Experiments (on tpcc
679658
// order_line) with simplistic functions yielded bad results.
680659
binary.LittleEndian.PutUint64(*buf, uint64(val))
681-
if s.sketchNew != nil {
682-
s.sketchNew.Insert(*buf)
683-
} else {
684-
s.sketchOld.Insert(*buf)
685-
}
660+
s.sketch.Insert(*buf)
686661
return nil
687662
}
688663
isNull := true
@@ -715,10 +690,6 @@ func (s *sketchInfo) addRowLegacy(
715690
if isNull {
716691
s.numNulls++
717692
}
718-
if s.sketchNew != nil {
719-
s.sketchNew.Insert(*buf)
720-
} else {
721-
s.sketchOld.Insert(*buf)
722-
}
693+
s.sketch.Insert(*buf)
723694
return nil
724695
}

0 commit comments

Comments
 (0)