Skip to content

Commit 13f6f04

Browse files
committed
Added regexp_instr and regexp_substr
1 parent e1e2733 commit 13f6f04

File tree

6 files changed

+540
-7
lines changed

6 files changed

+540
-7
lines changed

enginetest/queries/regex_queries.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2107,4 +2107,52 @@ var RegexTests = []RegexTest{
21072107
Query: `SELECT REGEXP_LIKE("abc", "^([ab]*?)(?<!(a))c");`,
21082108
Expected: []sql.Row{{1}},
21092109
},
2110+
{
2111+
Query: "SELECT REGEXP_INSTR('dog cat dog', 'dog');",
2112+
Expected: []sql.Row{{1}},
2113+
},
2114+
{
2115+
Query: "SELECT REGEXP_INSTR('dog cat dog', 'doggo');",
2116+
Expected: []sql.Row{{0}},
2117+
},
2118+
{
2119+
Query: "SELECT REGEXP_INSTR('dog cat dog', 'dog', 2);",
2120+
Expected: []sql.Row{{9}},
2121+
},
2122+
{
2123+
Query: "SELECT REGEXP_INSTR('dog cat dog', 'dog', 1, 2);",
2124+
Expected: []sql.Row{{9}},
2125+
},
2126+
{
2127+
Query: "SELECT REGEXP_INSTR('aa aaa aaaa', 'a{2}');",
2128+
Expected: []sql.Row{{1}},
2129+
},
2130+
{
2131+
Query: "SELECT REGEXP_INSTR('aa aaa aaaa', 'a{4}');",
2132+
Expected: []sql.Row{{8}},
2133+
},
2134+
{
2135+
Query: "SELECT REGEXP_INSTR('dog cat dog', 'dog', 1, -1, 0);",
2136+
Expected: []sql.Row{{1}},
2137+
},
2138+
{
2139+
Query: "SELECT REGEXP_INSTR('dog cat dog', 'dog', 1, 1, 1);",
2140+
Expected: []sql.Row{{4}},
2141+
},
2142+
{
2143+
Query: "SELECT REGEXP_SUBSTR('abc def ghi', '[a-z]+');",
2144+
Expected: []sql.Row{{"abc"}},
2145+
},
2146+
{
2147+
Query: "SELECT REGEXP_SUBSTR('abc def ghi', '[a-z]+', 1, 3);",
2148+
Expected: []sql.Row{{"ghi"}},
2149+
},
2150+
{
2151+
Query: "SELECT REGEXP_SUBSTR('abc def ghi', '[a-z]+', 2, 2);",
2152+
Expected: []sql.Row{{"def"}},
2153+
},
2154+
{
2155+
Query: "SELECT REGEXP_SUBSTR('abc def ghi', '[j-z]+');",
2156+
Expected: []sql.Row{{nil}},
2157+
},
21102158
}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module github.com/dolthub/go-mysql-server
33
require (
44
github.com/cespare/xxhash/v2 v2.2.0
55
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
6-
github.com/dolthub/go-icu-regex v0.0.0-20241215010122-db690dd53c90
6+
github.com/dolthub/go-icu-regex v0.0.0-20250228125923-c1fa04750a0f
77
github.com/dolthub/jsonpath v0.0.2-0.20240227200619-19675ab05c71
88
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81
99
github.com/dolthub/vitess v0.0.0-20250228011932-c4f6bba87730

go.sum

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,12 @@ github.com/denisenkom/go-mssqldb v0.10.0/go.mod h1:xbL0rPBG9cCiLr28tMa8zpbdarY27
5252
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
5353
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2 h1:u3PMzfF8RkKd3lB9pZ2bfn0qEG+1Gms9599cr0REMww=
5454
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2/go.mod h1:mIEZOHnFx4ZMQeawhw9rhsj+0zwQj7adVsnBX7t+eKY=
55-
github.com/dolthub/go-icu-regex v0.0.0-20241215010122-db690dd53c90 h1:Sni8jrP0sy/w9ZYXoff4g/ixe+7bFCZlfCqXKJSU+zM=
56-
github.com/dolthub/go-icu-regex v0.0.0-20241215010122-db690dd53c90/go.mod h1:ylU4XjUpsMcvl/BKeRRMXSH7e7WBrPXdSLvnRJYrxEA=
55+
github.com/dolthub/go-icu-regex v0.0.0-20250228125923-c1fa04750a0f h1:nCfSUnIviI4c7NY1qcs/XUu7SMy/OWwaEg2H4jp/H5Q=
56+
github.com/dolthub/go-icu-regex v0.0.0-20250228125923-c1fa04750a0f/go.mod h1:ylU4XjUpsMcvl/BKeRRMXSH7e7WBrPXdSLvnRJYrxEA=
5757
github.com/dolthub/jsonpath v0.0.2-0.20240227200619-19675ab05c71 h1:bMGS25NWAGTEtT5tOBsCuCrlYnLRKpbJVJkDbrTRhwQ=
5858
github.com/dolthub/jsonpath v0.0.2-0.20240227200619-19675ab05c71/go.mod h1:2/2zjLQ/JOOSbbSboojeg+cAwcRV0fDLzIiWch/lhqI=
5959
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9XGFa6q5Ap4Z/OhNkAMBaK5YeuEzwJt+NZdhiE=
6060
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY=
61-
github.com/dolthub/vitess v0.0.0-20250123002143-3b45b8cacbfa h1:kyoPzxViSXAyqfO0Mab7Qo1UogFIrxZKKyBU6kBOl+E=
62-
github.com/dolthub/vitess v0.0.0-20250123002143-3b45b8cacbfa/go.mod h1:1gQZs/byeHLMSul3Lvl3MzioMtOW1je79QYGyi2fd70=
63-
github.com/dolthub/vitess v0.0.0-20250214225328-a0ed4612b41c h1:YsZuBsU5wKmwrXGfzhW6/a+XzP/LWfzayXC3nCz/kqQ=
64-
github.com/dolthub/vitess v0.0.0-20250214225328-a0ed4612b41c/go.mod h1:1gQZs/byeHLMSul3Lvl3MzioMtOW1je79QYGyi2fd70=
6561
github.com/dolthub/vitess v0.0.0-20250228011932-c4f6bba87730 h1:GtlMVB7+Z7fZZj7BHRFd2rzxZ574dJ8cB/EHWdq1kbY=
6662
github.com/dolthub/vitess v0.0.0-20250228011932-c4f6bba87730/go.mod h1:1gQZs/byeHLMSul3Lvl3MzioMtOW1je79QYGyi2fd70=
6763
github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
// Copyright 2025 Dolthub, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package function
16+
17+
import (
18+
"fmt"
19+
"strings"
20+
"sync"
21+
"sync/atomic"
22+
23+
regex "github.com/dolthub/go-icu-regex"
24+
25+
"github.com/dolthub/go-mysql-server/sql"
26+
"github.com/dolthub/go-mysql-server/sql/expression"
27+
"github.com/dolthub/go-mysql-server/sql/types"
28+
)
29+
30+
// RegexpInstr implements the REGEXP_INSTR function.
31+
// https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-instr
32+
type RegexpInstr struct {
33+
Text sql.Expression
34+
Pattern sql.Expression
35+
Position sql.Expression
36+
Occurrence sql.Expression
37+
ReturnOption sql.Expression
38+
Flags sql.Expression
39+
40+
cachedVal atomic.Value
41+
re regex.Regex
42+
compileOnce sync.Once
43+
compileErr error
44+
}
45+
46+
var _ sql.FunctionExpression = (*RegexpInstr)(nil)
47+
var _ sql.CollationCoercible = (*RegexpInstr)(nil)
48+
var _ sql.Closer = (*RegexpInstr)(nil)
49+
50+
// NewRegexpInstr creates a new RegexpInstr expression.
51+
func NewRegexpInstr(args ...sql.Expression) (sql.Expression, error) {
52+
var r *RegexpInstr
53+
switch len(args) {
54+
case 6:
55+
r = &RegexpInstr{
56+
Text: args[0],
57+
Pattern: args[1],
58+
Position: args[2],
59+
Occurrence: args[3],
60+
ReturnOption: args[4],
61+
Flags: args[5],
62+
}
63+
case 5:
64+
r = &RegexpInstr{
65+
Text: args[0],
66+
Pattern: args[1],
67+
Position: args[2],
68+
Occurrence: args[3],
69+
ReturnOption: args[4],
70+
}
71+
case 4:
72+
r = &RegexpInstr{
73+
Text: args[0],
74+
Pattern: args[1],
75+
Position: args[2],
76+
Occurrence: args[3],
77+
ReturnOption: expression.NewLiteral(0, types.Int32),
78+
}
79+
case 3:
80+
r = &RegexpInstr{
81+
Text: args[0],
82+
Pattern: args[1],
83+
Position: args[2],
84+
Occurrence: expression.NewLiteral(1, types.Int32),
85+
ReturnOption: expression.NewLiteral(0, types.Int32),
86+
}
87+
case 2:
88+
r = &RegexpInstr{
89+
Text: args[0],
90+
Pattern: args[1],
91+
Position: expression.NewLiteral(1, types.Int32),
92+
Occurrence: expression.NewLiteral(1, types.Int32),
93+
ReturnOption: expression.NewLiteral(0, types.Int32),
94+
}
95+
default:
96+
return nil, sql.ErrInvalidArgumentNumber.New("regexp_instr", "2 to 6", len(args))
97+
}
98+
return r, nil
99+
}
100+
101+
// FunctionName implements sql.FunctionExpression
102+
func (r *RegexpInstr) FunctionName() string {
103+
return "regexp_instr"
104+
}
105+
106+
// Description implements sql.FunctionExpression
107+
func (r *RegexpInstr) Description() string {
108+
return "returns the starting index of the substring."
109+
}
110+
111+
// Type implements the sql.Expression interface.
112+
func (r *RegexpInstr) Type() sql.Type { return types.Int32 }
113+
114+
// CollationCoercibility implements the interface sql.CollationCoercible.
115+
func (r *RegexpInstr) CollationCoercibility(ctx *sql.Context) (collation sql.CollationID, coercibility byte) {
116+
leftCollation, leftCoercibility := sql.GetCoercibility(ctx, r.Text)
117+
rightCollation, rightCoercibility := sql.GetCoercibility(ctx, r.Pattern)
118+
return sql.ResolveCoercibility(leftCollation, leftCoercibility, rightCollation, rightCoercibility)
119+
}
120+
121+
// IsNullable implements the sql.Expression interface.
122+
func (r *RegexpInstr) IsNullable() bool { return true }
123+
124+
// Children implements the sql.Expression interface.
125+
func (r *RegexpInstr) Children() []sql.Expression {
126+
var result = []sql.Expression{r.Text, r.Pattern, r.Position, r.Occurrence, r.ReturnOption}
127+
if r.Flags != nil {
128+
result = append(result, r.Flags)
129+
}
130+
return result
131+
}
132+
133+
// Resolved implements the sql.Expression interface.
134+
func (r *RegexpInstr) Resolved() bool {
135+
return r.Text.Resolved() && r.Pattern.Resolved() && r.Position.Resolved() && r.Occurrence.Resolved() &&
136+
r.ReturnOption.Resolved() && (r.Flags == nil || r.Flags.Resolved())
137+
}
138+
139+
// WithChildren implements the sql.Expression interface.
140+
func (r *RegexpInstr) WithChildren(children ...sql.Expression) (sql.Expression, error) {
141+
required := 5
142+
if r.Flags != nil {
143+
required = 6
144+
}
145+
if len(children) != required {
146+
return nil, sql.ErrInvalidChildrenNumber.New(r, len(children), required)
147+
}
148+
return NewRegexpInstr(children...)
149+
}
150+
151+
// String implements the sql.Expression interface.
152+
func (r *RegexpInstr) String() string {
153+
var args []string
154+
for _, e := range r.Children() {
155+
args = append(args, e.String())
156+
}
157+
return fmt.Sprintf("%s(%s)", r.FunctionName(), strings.Join(args, ","))
158+
}
159+
160+
// compile handles compilation of the regex.
161+
func (r *RegexpInstr) compile(ctx *sql.Context) {
162+
r.compileOnce.Do(func() {
163+
r.re, r.compileErr = compileRegex(ctx, r.Pattern, r.Text, r.Flags, r.FunctionName(), nil)
164+
})
165+
}
166+
167+
// Eval implements the sql.Expression interface.
168+
func (r *RegexpInstr) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
169+
span, ctx := ctx.Span("function.RegexpInstr")
170+
defer span.End()
171+
172+
cached := r.cachedVal.Load()
173+
if cached != nil {
174+
return cached, nil
175+
}
176+
177+
r.compile(ctx)
178+
if r.compileErr != nil {
179+
return nil, r.compileErr
180+
}
181+
if r.re == nil {
182+
return nil, nil
183+
}
184+
185+
text, err := r.Text.Eval(ctx, row)
186+
if err != nil {
187+
return nil, err
188+
}
189+
if text == nil {
190+
return nil, nil
191+
}
192+
text, _, err = types.LongText.Convert(text)
193+
if err != nil {
194+
return nil, err
195+
}
196+
197+
pos, err := r.Position.Eval(ctx, row)
198+
if err != nil {
199+
return nil, err
200+
}
201+
if pos == nil {
202+
return nil, nil
203+
}
204+
pos, _, err = types.Int32.Convert(pos)
205+
if err != nil {
206+
return nil, err
207+
}
208+
209+
occurrence, err := r.Occurrence.Eval(ctx, row)
210+
if err != nil {
211+
return nil, err
212+
}
213+
if occurrence == nil {
214+
return nil, nil
215+
}
216+
occurrence, _, err = types.Int32.Convert(occurrence)
217+
if err != nil {
218+
return nil, err
219+
}
220+
221+
returnOption, err := r.ReturnOption.Eval(ctx, row)
222+
if err != nil {
223+
return nil, err
224+
}
225+
if returnOption == nil {
226+
return nil, nil
227+
}
228+
returnOption, _, err = types.Int32.Convert(returnOption)
229+
if err != nil {
230+
return nil, err
231+
}
232+
233+
err = r.re.SetMatchString(ctx, text.(string))
234+
if err != nil {
235+
return nil, err
236+
}
237+
index, err := r.re.IndexOf(ctx, int(pos.(int32)), int(occurrence.(int32)), returnOption.(int32) == 1)
238+
if err != nil {
239+
return nil, err
240+
}
241+
242+
outVal := int32(index + 1)
243+
if canBeCached(r.Text) {
244+
r.cachedVal.Store(outVal)
245+
}
246+
return outVal, nil
247+
}
248+
249+
// Close implements the sql.Closer interface.
250+
func (r *RegexpInstr) Close(ctx *sql.Context) error {
251+
if r.re != nil {
252+
return r.re.Close()
253+
}
254+
return nil
255+
}

0 commit comments

Comments
 (0)