Skip to content

Commit a99943d

Browse files
Copilotmathiasrw
andauthored
Let REGEXP support MySQL style word boundaries to fix #1384 (#2308)
Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: mathiasrw <[email protected]> Co-authored-by: Mathias Wulff <[email protected]>
1 parent 182256c commit a99943d

File tree

2 files changed

+108
-1
lines changed

2 files changed

+108
-1
lines changed

src/55functions.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,11 @@ stdlib.SUBSTRING =
192192

193193
stdfn.REGEXP_LIKE = function (a, b, c) {
194194
// console.log(a,b,c);
195-
return (a || '').search(RegExp(b, c)) > -1;
195+
// Convert MySQL word boundaries to JavaScript word boundaries
196+
// [[:<:]] -> \b (start of word)
197+
// [[:>:]] -> \b (end of word)
198+
var pattern = b.replace(/\[\[:<:\]\]/g, '\\b').replace(/\[\[:>:\]\]/g, '\\b');
199+
return (a || '').search(RegExp(pattern, c)) > -1;
196200
};
197201

198202
// Here we uses undefined instead of null

test/test1384.js

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
if (typeof exports === 'object') {
2+
var assert = require('assert');
3+
var alasql = require('..');
4+
}
5+
6+
describe('Test 1384 - REGEXP with MySQL word boundaries', function () {
7+
const test = '1384';
8+
9+
before(function () {
10+
alasql('create database test' + test);
11+
alasql('use test' + test);
12+
});
13+
14+
after(function () {
15+
alasql('drop database test' + test);
16+
});
17+
18+
it('A) Test REGEXP with [[:<:]] and [[:>:]] word boundaries', function () {
19+
var sql =
20+
'CREATE TABLE cities (city string, population number);' +
21+
"INSERT INTO cities VALUES ('Rome,madrid',2863223),('Paris',2249975),('Berlin,rid',3517424), ('Madrid',3041579);" +
22+
"SELECT * FROM cities WHERE city REGEXP '[[:<:]]rid[[:>:]]';";
23+
24+
var res = alasql(sql);
25+
// When multiple statements are executed, result is an array with each statement's result
26+
// [1, 4, [{matching_rows}]] where 1 = CREATE result, 4 = INSERT count, last = SELECT result
27+
var selectResult = res[2];
28+
assert.equal(selectResult.length, 1, 'Should match exactly one city');
29+
assert.equal(selectResult[0].city, 'Berlin,rid', 'Should match Berlin,rid');
30+
assert.equal(selectResult[0].population, 3517424, 'Should have correct population');
31+
});
32+
33+
it('B) Test that REGEXP does not match partial words', function () {
34+
alasql('CREATE TABLE test_words (word string)');
35+
alasql(
36+
"INSERT INTO test_words VALUES ('rid'),('Madrid'),('riddle'),('pride'),('ридда'),('Berlin,rid')"
37+
);
38+
39+
// Should only match 'rid' and 'Berlin,rid' where 'rid' appears as a complete word
40+
var res = alasql("SELECT * FROM test_words WHERE word REGEXP '[[:<:]]rid[[:>:]]'");
41+
assert.equal(res.length, 2, 'Should match exactly two words');
42+
43+
var matched = res.map(r => r.word).sort();
44+
assert.deepEqual(matched, ['Berlin,rid', 'rid'], 'Should match only complete word rid');
45+
});
46+
47+
it('C) Test REGEXP_LIKE with word boundaries', function () {
48+
assert.equal(
49+
alasql("= REGEXP_LIKE('Berlin,rid', '[[:<:]]rid[[:>:]]')"),
50+
true,
51+
'Should match rid in Berlin,rid'
52+
);
53+
assert.equal(
54+
alasql("= REGEXP_LIKE('Madrid', '[[:<:]]rid[[:>:]]')"),
55+
false,
56+
'Should not match rid in Madrid'
57+
);
58+
assert.equal(
59+
alasql("= REGEXP_LIKE('riddle', '[[:<:]]rid[[:>:]]')"),
60+
false,
61+
'Should not match rid in riddle'
62+
);
63+
assert.equal(
64+
alasql("= REGEXP_LIKE('rid', '[[:<:]]rid[[:>:]]')"),
65+
true,
66+
'Should match standalone rid'
67+
);
68+
});
69+
70+
it('D) Test mixed word boundaries', function () {
71+
// Test start boundary only
72+
assert.equal(
73+
alasql("= REGEXP_LIKE('riddle', '[[:<:]]rid')"),
74+
true,
75+
'Should match rid at start of riddle'
76+
);
77+
assert.equal(
78+
alasql("= REGEXP_LIKE('Madrid', '[[:<:]]rid')"),
79+
false,
80+
'Should not match rid in Madrid (not at word start)'
81+
);
82+
83+
// Test end boundary only
84+
assert.equal(
85+
alasql("= REGEXP_LIKE('Madrid', 'rid[[:>:]]')"),
86+
true,
87+
'Should match rid at end of Madrid'
88+
);
89+
assert.equal(
90+
alasql("= REGEXP_LIKE('riddle', 'rid[[:>:]]')"),
91+
false,
92+
'Should not match rid in riddle (not at word end)'
93+
);
94+
});
95+
96+
it('E) Test backward compatibility - normal REGEXP still works', function () {
97+
// Ensure existing REGEXP functionality is not broken
98+
assert.equal(alasql("= REGEXP_LIKE('abcdef', 'a.*')"), true);
99+
assert.equal(alasql("= REGEXP_LIKE('abcdef', '[aq]')"), true);
100+
assert.equal(alasql("= REGEXP_LIKE('abcdef', '[^qw]')"), true);
101+
assert.equal(alasql("= REGEXP_LIKE('abcdef', '[qw]')"), false);
102+
});
103+
});

0 commit comments

Comments
 (0)