Skip to content

Commit 5c4f3ee

Browse files
craig[bot]mgartnerstevendannamw5h
committed
155064: opt: normalize LIKE ... ESCAPE r=mgartner a=mgartner #### logictest: make test case deterministic This commit makes a test case in `distsql_agg` deterministic no matter how many optimizer rules have been defined. It now explicitly disables an optimizer rule, which is required to reproduce the behavior, rather than using the `testing_optimizer_random_seed` and `testing_optimizer_disable_rule_probability` session settings. Release note: None #### opt: make scalar list custom func accessor safer The custom functions `FirstScalarListExpr` and `SecondScalarListExpr` have been replaced by `ScalarExprAt`. This function is safer because it returns ok=false if the given index is out-of-bounds instead of panicking. This function has been added to `general_funcs.go` instead of `comp_funcs.go` in preparation for usage outside of `comp.opt`. Release note: None #### opt: normalize LIKE ... ESCAPE An expression in the form `a LIKE b ESCAPE '/'`, which is parsed as `like_escape(a, b, '/')`, is now normalized to `a LIKE b`. This is valid because `\` is the default escape character, so the expressions are equivalent. This normalization allows for further optimization, for example: ```sql CREATE TABLE t (s STRING, INDEX (s)); EXPLAIN SELECT s FROM t WHERE s LIKE 'foo\_bar' ESCAPE '\'; -- BEFORE: -- • filter -- │ filter: like_escape(s, e'foo\\_bar', e'\\') -- │ -- └── • scan -- table: t@t_pkey -- spans: FULL SCAN -- AFTER: -- • scan -- table: t@t_s_idx -- spans: [/'foo_bar' - /'foo_bar'] ``` Informs #30192 Release note (performance improvement): Queries with filters in the form `a LIKE b ESCAPE '\'` are now index-accelerated in some cases which they were not before. 157908: kvpb: add (*RangeFeedEvent).EventType method r=wenyihu6 a=stevendanna A small helper for printing the type of an event since this type is enum-like. Informs #135332 Release note: None 157956: roachtest/vecindex: Always download datasets when testing r=mw5h a=mw5h Previously, the vecindex roachtest would use the vecann default of attempting to reuse cached dataset files. This led to a situation where a test runner apparently cached a truncated file, causing the test to fail repeatedly when run from that runner. This patch changes the behavior to always download the dataset, so the test doesn't repeatedly flake. Fixes: #157119 Release note: None Co-authored-by: Marcus Gartner <[email protected]> Co-authored-by: Steven Danna <[email protected]> Co-authored-by: Matt White <[email protected]>
4 parents 1b9970e + 4497042 + 48ee987 + 2794f07 commit 5c4f3ee

File tree

10 files changed

+131
-43
lines changed

10 files changed

+131
-43
lines changed

pkg/cmd/roachtest/tests/vecindex.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ func runVectorIndex(ctx context.Context, t test.Test, c cluster.Cluster, opts ve
253253
t.L().Printf("Loading dataset %s", opts.dataset)
254254
loader := vecann.DatasetLoader{
255255
DatasetName: opts.dataset,
256+
ResetCache: true,
256257
OnProgress: func(ctx context.Context, format string, args ...any) {
257258
t.L().Printf(format, args...)
258259
},

pkg/kv/kvpb/api.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2185,6 +2185,28 @@ func (e *RangeFeedEvent) ShallowCopy() *RangeFeedEvent {
21852185
return &cpy
21862186
}
21872187

2188+
// EventType returns a string description of the type of event..
2189+
func (e *RangeFeedEvent) EventType() string {
2190+
switch {
2191+
case e.Val != nil:
2192+
return "Value"
2193+
case e.Checkpoint != nil:
2194+
return "Checkpoint"
2195+
case e.SST != nil:
2196+
return "SST"
2197+
case e.DeleteRange != nil:
2198+
return "DeleteRange"
2199+
case e.Metadata != nil:
2200+
return "Metadata"
2201+
case e.Error != nil:
2202+
return "Error"
2203+
case e.BulkEvents != nil:
2204+
return "BulkEvents"
2205+
default:
2206+
return "Unknown"
2207+
}
2208+
}
2209+
21882210
// Timestamp is part of rangefeedbuffer.Event.
21892211
func (e *RangeFeedValue) Timestamp() hlc.Timestamp {
21902212
return e.Value.Timestamp

pkg/kv/kvserver/rangefeed/sender_helper_test.go

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -97,22 +97,7 @@ func (s *testServerStream) String() string {
9797
for streamID, eventList := range s.streamEvents {
9898
fmt.Fprintf(&str, "\tStreamID:%d, Len:%d", streamID, len(eventList))
9999
for _, ev := range eventList {
100-
switch {
101-
case ev.Val != nil:
102-
fmt.Fprintf(&str, "\t\tvalue")
103-
case ev.Checkpoint != nil:
104-
fmt.Fprintf(&str, "\t\tcheckpoint")
105-
case ev.SST != nil:
106-
fmt.Fprintf(&str, "\t\tsst")
107-
case ev.DeleteRange != nil:
108-
fmt.Fprintf(&str, "\t\tdelete")
109-
case ev.Metadata != nil:
110-
fmt.Fprintf(&str, "\t\tmetadata")
111-
case ev.Error != nil:
112-
fmt.Fprintf(&str, "\t\terror")
113-
default:
114-
panic("unknown event type")
115-
}
100+
fmt.Fprintf(&str, "\t\t%s", ev.EventType())
116101
}
117102
fmt.Fprintf(&str, "\n")
118103
}

pkg/sql/logictest/testdata/logic_test/distsql_agg

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -730,8 +730,7 @@ statement ok
730730
CREATE TABLE t108901 AS SELECT g::FLOAT8 AS _float8 FROM generate_series(1, 5) AS g;
731731
ALTER TABLE t108901 SPLIT AT VALUES (1), (2), (3);
732732
ALTER TABLE t108901 RELOCATE VOTERS VALUES (ARRAY[1], 1), (ARRAY[2], 2), (ARRAY[3], 3);
733-
SET testing_optimizer_random_seed = 1613845022891698972;
734-
SET testing_optimizer_disable_rule_probability = 0.500000;
733+
SET disable_optimizer_rules = PruneWindowOutputCols;
735734

736735
statement error integer out of range
737736
SELECT 1 FROM t108901 WHERE

pkg/sql/opt/norm/comp_funcs.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,16 +76,6 @@ func (c *CustomFuncs) NormalizeTupleEquality(left, right memo.ScalarListExpr) op
7676
return result
7777
}
7878

79-
// FirstScalarListExpr returns the first ScalarExpr in the given list.
80-
func (c *CustomFuncs) FirstScalarListExpr(list memo.ScalarListExpr) opt.ScalarExpr {
81-
return list[0]
82-
}
83-
84-
// SecondScalarListExpr returns the second ScalarExpr in the given list.
85-
func (c *CustomFuncs) SecondScalarListExpr(list memo.ScalarListExpr) opt.ScalarExpr {
86-
return list[1]
87-
}
88-
8979
// MakeTimeZoneFunction constructs a new timezone() function with the given zone
9080
// and timestamp as arguments. The type of the function result is TIMESTAMPTZ if
9181
// ts is of type TIMESTAMP, or TIMESTAMP if is of type TIMESTAMPTZ.

pkg/sql/opt/norm/general_funcs.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,22 @@ func (c *CustomFuncs) IsLeakproof(expr memo.RelExpr) bool {
658658
return expr.Relational().VolatilitySet.IsLeakproof()
659659
}
660660

661+
// ----------------------------------------------------------------------
662+
//
663+
// ScalarListExpr functions
664+
// General functions related to ScalarListExpr.
665+
//
666+
// ----------------------------------------------------------------------
667+
668+
// ScalarExprAt returns the ScalarExpr in the i-th position in the given list.
669+
// Returns ok=false if i is out of bounds.
670+
func (c *CustomFuncs) ScalarExprAt(list memo.ScalarListExpr, i int) (_ opt.ScalarExpr, ok bool) {
671+
if i >= 0 && i < len(list) {
672+
return list[i], true
673+
}
674+
return nil, false
675+
}
676+
661677
// ----------------------------------------------------------------------
662678
//
663679
// Ordering functions

pkg/sql/opt/norm/rules/comp.opt

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -220,14 +220,13 @@
220220
(Function $args:* $private:(FunctionPrivate "timezone"))
221221
$right:(ConstValue) &
222222
(IsTimestampTZ $right) &
223-
(IsTimestamp $ts:(SecondScalarListExpr $args)) &
224-
^(IsConstValueOrGroupOfConstValues $ts)
223+
(Let ($ts $tsOk):(ScalarExprAt $args 1) $tsOk) &
224+
(IsTimestamp $ts) &
225+
^(IsConstValueOrGroupOfConstValues $ts) &
226+
(Let ($zone $zoneOk):(ScalarExprAt $args 0) $zoneOk)
225227
)
226228
=>
227-
((OpName)
228-
$ts
229-
(MakeTimeZoneFunction (FirstScalarListExpr $args) $right)
230-
)
229+
((OpName) $ts (MakeTimeZoneFunction $zone $right))
231230

232231
# NormalizeCmpTimeZoneFunctionTZ normalizes timezone functions within
233232
# comparison operators. It only matches expressions when:
@@ -249,14 +248,13 @@
249248
(Function $args:* $private:(FunctionPrivate "timezone"))
250249
$right:(ConstValue) &
251250
(IsTimestamp $right) &
252-
(IsTimestampTZ $tz:(SecondScalarListExpr $args)) &
253-
^(IsConstValueOrGroupOfConstValues $tz)
251+
(Let ($tz $tzOk):(ScalarExprAt $args 1) $tzOk) &
252+
(IsTimestampTZ $tz) &
253+
^(IsConstValueOrGroupOfConstValues $tz) &
254+
(Let ($zone $zoneOk):(ScalarExprAt $args 0) $zoneOk)
254255
)
255256
=>
256-
((OpName)
257-
$tz
258-
(MakeTimeZoneFunction (FirstScalarListExpr $args) $right)
259-
)
257+
((OpName) $tz (MakeTimeZoneFunction $zone $right))
260258

261259
# FoldEqTrue replaces x = True with x.
262260
[FoldEqTrue, Normalize]

pkg/sql/opt/norm/rules/scalar.opt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,3 +452,21 @@ $udf
452452
(Indirection $input:* $index:* & (IsJSON $input))
453453
=>
454454
(FetchVal $input $index)
455+
456+
# ConvertLikeEscapeToLike converts a LIKE x ESCAPE '\' expression, which is
457+
# parsed as a like_escape function call, to a LIKE expression. This is valid
458+
# because the default escape character is '\'.
459+
#
460+
# Note that the FunctionExpr match pattern is enough to ensure that we are not
461+
# transforming a UDF because UDF invocations are always built as UDFCallExprs.
462+
[ConvertLikeEscapeToLike, Normalize]
463+
(Function
464+
$args:*
465+
$private:(FunctionPrivate "like_escape") &
466+
(Let ($input $iOk):(ScalarExprAt $args 0) $iOk) &
467+
(Let ($pattern $pOk):(ScalarExprAt $args 1) $pOk) &
468+
(Let ($escape $eOk):(ScalarExprAt $args 2) $eOk) &
469+
(ConstStringEquals $escape "\\")
470+
)
471+
=>
472+
(Like $input $pattern)

pkg/sql/opt/norm/testdata/rules/scalar

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2689,3 +2689,60 @@ project
26892689
└── projections
26902690
├── b.arr:5[1] [as=arr:8, outer=(5)]
26912691
└── b.arr:5[2] [as=arr:9, outer=(5)]
2692+
2693+
# --------------------------------------------------
2694+
# ConvertLikeEscapeToLike
2695+
# --------------------------------------------------
2696+
2697+
norm expect=ConvertLikeEscapeToLike
2698+
SELECT s LIKE 'foo%' ESCAPE '\' FROM a
2699+
----
2700+
project
2701+
├── columns: like_escape:8
2702+
├── scan a
2703+
│ └── columns: s:4
2704+
└── projections
2705+
└── s:4 LIKE 'foo%' [as=like_escape:8, outer=(4)]
2706+
2707+
norm expect=ConvertLikeEscapeToLike
2708+
SELECT like_escape(s, 'foo%', '\') FROM a
2709+
----
2710+
project
2711+
├── columns: like_escape:8
2712+
├── scan a
2713+
│ └── columns: s:4
2714+
└── projections
2715+
└── s:4 LIKE 'foo%' [as=like_escape:8, outer=(4)]
2716+
2717+
norm expect-not=ConvertLikeEscapeToLike
2718+
SELECT s LIKE 'foo%' ESCAPE '/' FROM a
2719+
----
2720+
project
2721+
├── columns: like_escape:8
2722+
├── immutable
2723+
├── scan a
2724+
│ └── columns: s:4
2725+
└── projections
2726+
└── like_escape(s:4, 'foo%', '/') [as=like_escape:8, outer=(4), immutable]
2727+
2728+
norm expect-not=ConvertLikeEscapeToLike
2729+
SELECT s LIKE 'foo%' ESCAPE 'f' FROM a
2730+
----
2731+
project
2732+
├── columns: like_escape:8
2733+
├── immutable
2734+
├── scan a
2735+
│ └── columns: s:4
2736+
└── projections
2737+
└── like_escape(s:4, 'foo%', 'f') [as=like_escape:8, outer=(4), immutable]
2738+
2739+
norm expect-not=ConvertLikeEscapeToLike
2740+
SELECT not_like_escape(s, 'foo%', '\') FROM a
2741+
----
2742+
project
2743+
├── columns: not_like_escape:8
2744+
├── immutable
2745+
├── scan a
2746+
│ └── columns: s:4
2747+
└── projections
2748+
└── not_like_escape(s:4, 'foo%', e'\\') [as=not_like_escape:8, outer=(4), immutable]

pkg/workload/vecann/datasets.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ type DatasetLoader struct {
9090
// CacheFolder is the path to the temporary folder where datasets will be
9191
// cached. It defaults to ~/.cache/workload-datasets.
9292
CacheFolder string
93+
// ResetCache indicates that the cache should be re-populated.
94+
ResetCache bool
9395

9496
// OnProgress logs the progress of the loading process.
9597
OnProgress func(ctx context.Context, format string, args ...any)
@@ -128,12 +130,12 @@ func (dl *DatasetLoader) loadFiles(ctx context.Context) error {
128130
neighbors := fmt.Sprintf("%s/%s-neighbors-%s.ibin", baseDir, baseName, metric)
129131

130132
// Download test and neighbors files if missing.
131-
if !fileExists(test) {
133+
if dl.ResetCache || !fileExists(test) {
132134
if err := dl.downloadAndUnzip(ctx, baseName, baseName+"-test.fbin.zip", test); err != nil {
133135
return err
134136
}
135137
}
136-
if !fileExists(neighbors) {
138+
if dl.ResetCache || !fileExists(neighbors) {
137139
fileName := baseName + "-neighbors-" + metric + ".ibin.zip"
138140
if err := dl.downloadAndUnzip(ctx, baseName, fileName, neighbors); err != nil {
139141
return err
@@ -179,7 +181,7 @@ func (dl *DatasetLoader) downloadTrainFiles(
179181
// First, check for files in the cache.
180182
onlyFileName := fmt.Sprintf("%s/%s.fbin", baseDir, baseName)
181183
firstPartName := fmt.Sprintf("%s/%s-1.fbin", baseDir, baseName)
182-
if !fileExists(onlyFileName) && !fileExists(firstPartName) {
184+
if dl.ResetCache || (!fileExists(onlyFileName) && !fileExists(firstPartName)) {
183185
// No files in cache, download them.
184186
partNum := 0
185187
for {

0 commit comments

Comments
 (0)