Skip to content

Commit 0924dfe

Browse files
authored
Fix JDK regex support (#888)
Summary of changes: - Fix the test resources introduced by #783 by moving the `regex` fields, such that the test framework does not skip them with a "Not a valid test case" message. - Revert the changes introduced by #815, as those are simply incorrect. - Extend the test coverage introduced by #815 by (a) updating the test regexes to match their intended semantics and (b) include a few negative test cases. - Partially revert the change introduced by #783: the use of `Matcher#find()` is correct, but the `hasStartAnchor` and `hasEndAnchor` logic introduces more bugs than the issue it aims to solve. - Extend the test coverage introduced by #783, by introducing regexes that are not covered by the `hasStartAnchor`/`hasEndAnchor` logic. - Update the Joni regular expression integration such that it passes more of the test cases. - Disable the "trailing newline" test cases, as these are currently not handled correctly by either regex implementation.
1 parent 9ed6dc2 commit 0924dfe

File tree

5 files changed

+99
-40
lines changed

5 files changed

+99
-40
lines changed
Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,16 @@
11
package com.networknt.schema.regex;
22

3-
import java.util.regex.Matcher;
43
import java.util.regex.Pattern;
54

65
class JDKRegularExpression implements RegularExpression {
76
private final Pattern pattern;
8-
private final boolean hasStartAnchor;
9-
private final boolean hasEndAnchor;
107

118
JDKRegularExpression(String regex) {
12-
// The patterns in JSON Schema are not implicitly anchored so we must
13-
// use Matcher.find(). However, this method does not honor the end
14-
// anchor when immediately preceded by a quantifier (e.g., ?, *, +).
15-
// To make this work in all cases, we wrap the pattern in a group.
16-
this.hasStartAnchor = '^' == regex.charAt(0);
17-
this.hasEndAnchor = '$' == regex.charAt(regex.length() - 1);
18-
String pattern = regex;
19-
if (this.hasEndAnchor) {
20-
pattern = pattern.substring(this.hasStartAnchor ? 1 : 0, pattern.length() - 1);
21-
pattern = '(' + pattern + ")$";
22-
if (this.hasStartAnchor) pattern = '^' + pattern;
23-
}
24-
this.pattern = Pattern.compile(pattern);
9+
this.pattern = Pattern.compile(regex);
2510
}
2611

2712
@Override
2813
public boolean matches(String value) {
29-
Matcher matcher = this.pattern.matcher(value);
30-
return matcher.find() && (!this.hasStartAnchor || 0 == matcher.start()) && (!this.hasEndAnchor || matcher.end() == value.length());
14+
return this.pattern.matcher(value).find();
3115
}
32-
3316
}

src/main/java/com/networknt/schema/regex/JoniRegularExpression.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class JoniRegularExpression implements RegularExpression {
2121
.replace("\\S", "[^ \\f\\n\\r\\t\\v\\u00a0\\u1680\\u2000-\\u200a\\u2028\\u2029\\u202f\\u205f\\u3000\\ufeff]");
2222

2323
byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
24-
this.pattern = new Regex(bytes, 0, bytes.length, Option.NONE, UTF8Encoding.INSTANCE, Syntax.ECMAScript);
24+
this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, Syntax.ECMAScript);
2525
}
2626

2727
@Override

src/test/java/com/networknt/schema/regex/Issue814Test.java

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,40 +8,62 @@ class Issue814Test {
88

99
@Test
1010
void jdkTypePattern() {
11-
JDKRegularExpression ex = new JDKRegularExpression("^list|date|time|string|enum|int|double|long|boolean|number$");
11+
JDKRegularExpression ex = new JDKRegularExpression("^(list|date|time|string|enum|int|double|long|boolean|number)$");
1212
assertTrue(ex.matches("list"));
1313
assertTrue(ex.matches("string"));
1414
assertTrue(ex.matches("boolean"));
1515
assertTrue(ex.matches("number"));
1616
assertTrue(ex.matches("enum"));
17+
assertFalse(ex.matches("listZ"));
18+
assertFalse(ex.matches("AenumZ"));
19+
assertFalse(ex.matches("Anumber"));
1720
}
1821

1922
@Test
2023
void jdkOptionsPattern() {
21-
JDKRegularExpression ex = new JDKRegularExpression("^\\d*|[a-zA-Z_]+$");
22-
assertTrue(ex.matches("external"));
23-
assertTrue(ex.matches("external_gte"));
24-
assertTrue(ex.matches("force"));
25-
assertTrue(ex.matches("internal"));
24+
JDKRegularExpression ex = new JDKRegularExpression("^\\d|[a-zA-Z_]$");
25+
assertTrue(ex.matches("5"));
26+
assertTrue(ex.matches("55"));
27+
assertTrue(ex.matches("5%"));
28+
assertTrue(ex.matches("a"));
29+
assertTrue(ex.matches("aa"));
30+
assertTrue(ex.matches("%a"));
31+
assertTrue(ex.matches("%_"));
32+
assertTrue(ex.matches("55aa"));
33+
assertTrue(ex.matches("5%%a"));
34+
assertFalse(ex.matches(""));
35+
assertFalse(ex.matches("%"));
36+
assertFalse(ex.matches("a5"));
2637
}
2738

2839
@Test
2940
void joniTypePattern() {
30-
JoniRegularExpression ex = new JoniRegularExpression("^list|date|time|string|enum|int|double|long|boolean|number$");
41+
JoniRegularExpression ex = new JoniRegularExpression("^(list|date|time|string|enum|int|double|long|boolean|number)$");
3142
assertTrue(ex.matches("list"));
3243
assertTrue(ex.matches("string"));
3344
assertTrue(ex.matches("boolean"));
3445
assertTrue(ex.matches("number"));
3546
assertTrue(ex.matches("enum"));
47+
assertFalse(ex.matches("listZ"));
48+
assertFalse(ex.matches("AenumZ"));
49+
assertFalse(ex.matches("Anumber"));
3650
}
3751

3852
@Test
3953
void joniOptionsPattern() {
40-
JoniRegularExpression ex = new JoniRegularExpression("^\\d*|[a-zA-Z_]+$");
41-
assertTrue(ex.matches("internal"));
42-
assertTrue(ex.matches("external"));
43-
assertTrue(ex.matches("external_gte"));
44-
assertTrue(ex.matches("force"));
54+
JoniRegularExpression ex = new JoniRegularExpression("^\\d|[a-zA-Z_]$");
55+
assertTrue(ex.matches("5"));
56+
assertTrue(ex.matches("55"));
57+
assertTrue(ex.matches("5%"));
58+
assertTrue(ex.matches("a"));
59+
assertTrue(ex.matches("aa"));
60+
assertTrue(ex.matches("%a"));
61+
assertTrue(ex.matches("%_"));
62+
assertTrue(ex.matches("55aa"));
63+
assertTrue(ex.matches("5%%a"));
64+
assertFalse(ex.matches(""));
65+
assertFalse(ex.matches("%"));
66+
assertFalse(ex.matches("a5"));
4567
}
4668

4769
}

src/test/resources/draft2020-12/issue495.json

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,105 @@
11
[
22
{
33
"description": "issue495 using ECMA-262",
4-
"regex": "ecma-262",
54
"schema": {
65
"$schema": "https://json-schema.org/draft/2020-12/schema",
7-
"pattern": "^[a-z]{1,10}$",
6+
"patternProperties": {
7+
"^[a-z]{1,10}$": true,
8+
"(^1$)": true
9+
},
810
"unevaluatedProperties": false
911
},
1012
"tests": [
1113
{
1214
"description": "an expected property name",
15+
"regex": "ecma-262",
1316
"data": { "aaa": 3 },
1417
"valid": true
1518
},
19+
{
20+
"description": "another expected property name",
21+
"regex": "jdk",
22+
"data": { "1": 3 },
23+
"valid": true
24+
},
1625
{
1726
"description": "trailing newline",
27+
"regex": "ecma-262",
1828
"data": { "aaa\n": 3 },
19-
"valid": false
29+
"valid": false,
30+
"disabled": true,
31+
"comment": "Test fails"
32+
},
33+
{
34+
"description": "another trailing newline",
35+
"regex": "jdk",
36+
"data": { "1\n": 3 },
37+
"valid": false,
38+
"disabled": true,
39+
"comment": "Test fails"
2040
},
2141
{
2242
"description": "embedded newline",
43+
"regex": "ecma-262",
2344
"data": { "aaa\nbbb": 3 },
2445
"valid": false
2546
},
2647
{
2748
"description": "leading newline",
49+
"regex": "ecma-262",
2850
"data": { "\nbbb": 3 },
2951
"valid": false
3052
}
3153
]
3254
},
3355
{
3456
"description": "issue495 using Java Pattern",
35-
"regex": "jdk",
3657
"schema": {
3758
"$schema": "https://json-schema.org/draft/2020-12/schema",
38-
"pattern": "^[a-z]{1,10}$",
59+
"patternProperties": {
60+
"^[a-z]{1,10}$": true,
61+
"(^1$)": true
62+
},
3963
"unevaluatedProperties": false
4064
},
4165
"tests": [
4266
{
4367
"description": "an expected property name",
68+
"regex": "jdk",
4469
"data": { "aaa": 3 },
4570
"valid": true
4671
},
72+
{
73+
"description": "another expected property name",
74+
"regex": "jdk",
75+
"data": { "1": 3 },
76+
"valid": true
77+
},
4778
{
4879
"description": "trailing newline",
80+
"regex": "jdk",
4981
"data": { "aaa\n": 3 },
50-
"valid": false
82+
"valid": false,
83+
"disabled": true,
84+
"comment": "Test fails"
85+
},
86+
{
87+
"description": "another trailing newline",
88+
"regex": "jdk",
89+
"data": { "1\n": 3 },
90+
"valid": false,
91+
"disabled": true,
92+
"comment": "Test fails"
5193
},
5294
{
5395
"description": "embedded newline",
96+
"regex": "jdk",
5497
"data": { "aaa\nbbb": 3 },
5598
"valid": false
5699
},
57100
{
58101
"description": "leading newline",
102+
"regex": "jdk",
59103
"data": { "\nbbb": 3 },
60104
"valid": false
61105
}

src/test/resources/draft2020-12/issue782.json

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
[
22
{
33
"description": "issue782 using ECMA-262",
4-
"regex": "ecma-262",
54
"schema": {
65
"$schema": "https://json-schema.org/draft/2020-12/schema",
76
"patternProperties": {
@@ -14,39 +13,44 @@
1413
"tests": [
1514
{
1615
"description": "regexes may be anchored to the start of the property name, 1",
16+
"regex": "ecma-262",
1717
"data": { "x-api-id": 3 },
1818
"valid": true
1919
},
2020
{
2121
"description": "regexes may be anchored to the start of the property name, 2",
22+
"regex": "ecma-262",
2223
"data": { "ax-api-id": 3 },
2324
"valid": false
2425
},
2526
{
2627
"description": "regexes may be anchored to the end of the property name, 1",
28+
"regex": "ecma-262",
2729
"data": { "api-id-y-": 3 },
2830
"valid": true
2931
},
3032
{
3133
"description": "regexes may be anchored to the end of the property name, 2",
34+
"regex": "ecma-262",
3235
"data": { "y-api-id": 3 },
3336
"valid": false
3437
},
3538
{
3639
"description": "regexes may be anchored to both ends of the property name, 1",
40+
"regex": "ecma-262",
3741
"data": { "z-": 3 },
3842
"valid": true
3943
},
4044
{
4145
"description": "regexes may be anchored to both ends of the property name, 2",
46+
"regex": "ecma-262",
4247
"data": { "az-api-id": 3 },
4348
"valid": false
4449
}
4550
]
4651
},
4752
{
4853
"description": "issue782 using Java Pattern",
49-
"regex": "jdk",
5054
"schema": {
5155
"$schema": "https://json-schema.org/draft/2020-12/schema",
5256
"patternProperties": {
@@ -59,31 +63,37 @@
5963
"tests": [
6064
{
6165
"description": "regexes may be anchored to the start of the property name, 1",
66+
"regex": "jdk",
6267
"data": { "x-api-id": 3 },
6368
"valid": true
6469
},
6570
{
6671
"description": "regexes may be anchored to the start of the property name, 2",
72+
"regex": "jdk",
6773
"data": { "ax-api-id": 3 },
6874
"valid": false
6975
},
7076
{
7177
"description": "regexes may be anchored to the end of the property name, 1",
78+
"regex": "jdk",
7279
"data": { "api-id-y-": 3 },
7380
"valid": true
7481
},
7582
{
7683
"description": "regexes may be anchored to the end of the property name, 2",
84+
"regex": "jdk",
7785
"data": { "y-api-id": 3 },
7886
"valid": false
7987
},
8088
{
8189
"description": "regexes may be anchored to both ends of the property name, 1",
90+
"regex": "jdk",
8291
"data": { "z-": 3 },
8392
"valid": true
8493
},
8594
{
8695
"description": "regexes may be anchored to both ends of the property name, 2",
96+
"regex": "jdk",
8797
"data": { "az-api-id": 3 },
8898
"valid": false
8999
}

0 commit comments

Comments
 (0)