Skip to content

Commit 151edd1

Browse files
lrhnCommit Queue
authored andcommitted
Make Dart2JS use the y RegExp flag for matchAsPrefix.
The implementation for `matchAsPrefix` predates the sticky `y` flag for JS regexps, so it had a hack that modified the RegExp pattern to ensure that it only matched at a particular position. This hack is no longer necessary, so the anchored RegExp can use the same RegExp source as the plain and global RegExp, which may let the JS engine better reuse RegExp compilation. Change-Id: I332ebb41cd733346a204345869da16637d775245 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/412420 Auto-Submit: Lasse Nielsen <[email protected]> Commit-Queue: Lasse Nielsen <[email protected]> Reviewed-by: Stephen Adams <[email protected]>
1 parent 465ca14 commit 151edd1

File tree

3 files changed

+145
-74
lines changed

3 files changed

+145
-74
lines changed

sdk/lib/_internal/js_runtime/lib/interceptors.dart

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import 'dart:_js_helper'
3737
initNativeDispatch,
3838
initNativeDispatchFlag,
3939
regExpGetNative,
40-
regExpCaptureCount,
40+
regExpHasCaptures,
4141
stringContainsUnchecked,
4242
stringIndexOfStringUnchecked,
4343
stringLastIndexOfUnchecked,
@@ -74,17 +74,25 @@ part 'js_array.dart';
7474
part 'js_number.dart';
7575
part 'js_string.dart';
7676

77-
final String DART_CLOSURE_PROPERTY_NAME =
78-
getIsolateAffinityTag(r'_$dart_dartClosure');
77+
final String DART_CLOSURE_PROPERTY_NAME = getIsolateAffinityTag(
78+
r'_$dart_dartClosure',
79+
);
7980

8081
getDispatchProperty(object) {
8182
return JS(
82-
'', '#[#]', object, JS_EMBEDDED_GLOBAL('String', DISPATCH_PROPERTY_NAME));
83+
'',
84+
'#[#]',
85+
object,
86+
JS_EMBEDDED_GLOBAL('String', DISPATCH_PROPERTY_NAME),
87+
);
8388
}
8489

8590
setDispatchProperty(object, value) {
8691
defineProperty(
87-
object, JS_EMBEDDED_GLOBAL('String', DISPATCH_PROPERTY_NAME), value);
92+
object,
93+
JS_EMBEDDED_GLOBAL('String', DISPATCH_PROPERTY_NAME),
94+
value,
95+
);
8896
}
8997

9098
// Avoid inlining this method because inlining gives us multiple allocation
@@ -119,8 +127,14 @@ makeDispatchRecord(interceptor, proto, extension, indexability) {
119127
// P I if object's prototype is P, use I
120128
// F - P if object's prototype is P, call F
121129

122-
return JS('', '{i: #, p: #, e: #, x: #}', interceptor, proto, extension,
123-
indexability);
130+
return JS(
131+
'',
132+
'{i: #, p: #, e: #, x: #}',
133+
interceptor,
134+
proto,
135+
extension,
136+
indexability,
137+
);
124138
}
125139

126140
dispatchRecordInterceptor(record) => JS('', '#.i', record);
@@ -215,8 +229,10 @@ void cacheInterceptorOnConstructor(constructor, interceptor) {
215229
defineProperty(constructor, JS_INTEROP_INTERCEPTOR_TAG, interceptor);
216230
}
217231

218-
var constructorToInterceptor =
219-
JS('', 'typeof(self.WeakMap) == "undefined" ? new Map() : new WeakMap()');
232+
var constructorToInterceptor = JS(
233+
'',
234+
'typeof(self.WeakMap) == "undefined" ? new Map() : new WeakMap()',
235+
);
220236

221237
XlookupInterceptorByConstructor(constructor) {
222238
return JS('', '#.get(#)', constructorToInterceptor, constructor);

sdk/lib/_internal/js_runtime/lib/js_string.dart

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,11 @@ final class JSString extends Interceptor
6767
return this.splitMapJoin(from, onMatch: convert);
6868
}
6969

70-
String splitMapJoin(Pattern from,
71-
{String Function(Match)? onMatch, String Function(String)? onNonMatch}) {
70+
String splitMapJoin(
71+
Pattern from, {
72+
String Function(Match)? onMatch,
73+
String Function(String)? onNonMatch,
74+
}) {
7275
return stringReplaceAllFuncUnchecked(this, from, onMatch, onNonMatch);
7376
}
7477

@@ -79,8 +82,11 @@ final class JSString extends Interceptor
7982
return stringReplaceFirstUnchecked(this, from, to, startIndex);
8083
}
8184

82-
String replaceFirstMapped(Pattern from, String replace(Match match),
83-
[int startIndex = 0]) {
85+
String replaceFirstMapped(
86+
Pattern from,
87+
String replace(Match match), [
88+
int startIndex = 0,
89+
]) {
8490
checkNull(replace);
8591
checkInt(startIndex);
8692
RangeError.checkValueInInterval(startIndex, 0, this.length, "startIndex");
@@ -91,7 +97,7 @@ final class JSString extends Interceptor
9197
checkNull(pattern);
9298
if (pattern is String) {
9399
return stringSplitUnchecked(this, pattern);
94-
} else if (pattern is JSSyntaxRegExp && regExpCaptureCount(pattern) == 0) {
100+
} else if (pattern is JSSyntaxRegExp && !regExpHasCaptures(pattern)) {
95101
var re = regExpGetNative(pattern);
96102
return stringSplitUnchecked(this, re);
97103
} else {
@@ -158,13 +164,19 @@ final class JSString extends Interceptor
158164
}
159165

160166
String toLowerCase() {
161-
return JS('returns:String;effects:none;depends:none;throws:null(1)',
162-
r'#.toLowerCase()', this);
167+
return JS(
168+
'returns:String;effects:none;depends:none;throws:null(1)',
169+
r'#.toLowerCase()',
170+
this,
171+
);
163172
}
164173

165174
String toUpperCase() {
166-
return JS('returns:String;effects:none;depends:none;throws:null(1)',
167-
r'#.toUpperCase()', this);
175+
return JS(
176+
'returns:String;effects:none;depends:none;throws:null(1)',
177+
r'#.toUpperCase()',
178+
this,
179+
);
168180
}
169181

170182
// Characters with Whitespace property (Unicode 6.3).
@@ -409,8 +421,8 @@ final class JSString extends Interceptor
409421
return this == other
410422
? 0
411423
: JS('bool', r'# < #', this, other)
412-
? -1
413-
: 1;
424+
? -1
425+
: 1;
414426
}
415427

416428
// Note: if you change this, also change the function [S].

sdk/lib/_internal/js_runtime/lib/regexp_helper.dart

Lines changed: 97 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -20,99 +20,139 @@ regExpGetGlobalNative(JSSyntaxRegExp regexp) {
2020
return nativeRegexp;
2121
}
2222

23-
/// Computes the number of captures in a regexp.
24-
///
25-
/// This currently involves creating a new RegExp object with a different
26-
/// source and running it against the empty string (the last part is usually
27-
/// fast).
28-
///
29-
/// The JSSyntaxRegExp could cache the result, and set the cache any time
30-
/// it finds a match.
31-
int regExpCaptureCount(JSSyntaxRegExp regexp) {
32-
var nativeAnchoredRegExp = regexp._nativeAnchoredVersion;
33-
JS('void', '#.lastIndex = #', nativeAnchoredRegExp, 0);
34-
JSArray match = JS('JSExtendableArray', '#.exec("")', nativeAnchoredRegExp);
35-
// The native-anchored regexp always have one capture more than the original,
36-
// and always matches the empty string.
37-
return match.length - 2;
38-
}
23+
// Helper method used by internal libraries ([String.split])
24+
bool regExpHasCaptures(JSSyntaxRegExp regexp) => regexp._hasCaptures;
3925

4026
class JSSyntaxRegExp implements RegExp {
4127
final String pattern;
4228
final _nativeRegExp;
4329
var _nativeGlobalRegExp;
4430
var _nativeAnchoredRegExp;
31+
bool? _hasCapturesCache;
4532

4633
String toString() =>
4734
'RegExp/$pattern/' + JS('String', '#.flags', _nativeRegExp);
4835

49-
JSSyntaxRegExp(String source,
50-
{bool multiLine = false,
51-
bool caseSensitive = true,
52-
bool unicode = false,
53-
bool dotAll = false})
54-
: this.pattern = source,
55-
this._nativeRegExp = makeNative(
56-
source, multiLine, caseSensitive, unicode, dotAll, false);
36+
JSSyntaxRegExp(
37+
String source, {
38+
bool multiLine = false,
39+
bool caseSensitive = true,
40+
bool unicode = false,
41+
bool dotAll = false,
42+
}) : this.pattern = source,
43+
this._nativeRegExp = makeNative(
44+
source,
45+
multiLine,
46+
caseSensitive,
47+
unicode,
48+
dotAll,
49+
'',
50+
);
5751

5852
get _nativeGlobalVersion {
5953
if (_nativeGlobalRegExp != null) return _nativeGlobalRegExp;
6054
return _nativeGlobalRegExp = makeNative(
61-
pattern, _isMultiLine, _isCaseSensitive, _isUnicode, _isDotAll, true);
55+
pattern,
56+
_isMultiLine,
57+
_isCaseSensitive,
58+
_isUnicode,
59+
_isDotAll,
60+
'g',
61+
);
6262
}
6363

6464
get _nativeAnchoredVersion {
6565
if (_nativeAnchoredRegExp != null) return _nativeAnchoredRegExp;
66-
// An "anchored version" of a regexp is created by adding "|()" to the
67-
// source. This means that the regexp always matches at the first position
68-
// that it tries, and you can see if the original regexp matched, or it
69-
// was the added zero-width match that matched, by looking at the last
70-
// capture. If it is a String, the match participated, otherwise it didn't.
71-
return _nativeAnchoredRegExp = makeNative('$pattern|()', _isMultiLine,
72-
_isCaseSensitive, _isUnicode, _isDotAll, true);
66+
// A sticky regexp is created by adding the "sticky" `y` flag.
67+
// Such a RegExp can only match exactly at the `lastIndex` position.
68+
return _nativeAnchoredRegExp = makeNative(
69+
pattern,
70+
_isMultiLine,
71+
_isCaseSensitive,
72+
_isUnicode,
73+
_isDotAll,
74+
'y',
75+
);
7376
}
7477

7578
bool get _isMultiLine => JS('bool', '#.multiline', _nativeRegExp);
7679
bool get _isCaseSensitive => JS('bool', '!#.ignoreCase', _nativeRegExp);
7780
bool get _isUnicode => JS('bool', '#.unicode', _nativeRegExp);
7881
bool get _isDotAll => JS('bool', '#.dotAll', _nativeRegExp);
7982

80-
static makeNative(String source, bool multiLine, bool caseSensitive,
81-
bool unicode, bool dotAll, bool global) {
83+
/// Used by [String.split] to determine whether it can use the JavaScript
84+
/// `String.prototype.split` directly. It cannot if the RegExp has capture
85+
/// groups.
86+
bool get _hasCaptures => _hasCapturesCache ??= _computeHasCaptures();
87+
88+
static makeNative(
89+
String source,
90+
bool multiLine,
91+
bool caseSensitive,
92+
bool unicode,
93+
bool dotAll,
94+
String extraFlags,
95+
) {
8296
checkString(source);
83-
String m = multiLine == true ? 'm' : '';
84-
String i = caseSensitive == true ? '' : 'i';
97+
String m = multiLine ? 'm' : '';
98+
String i = caseSensitive ? '' : 'i';
8599
String u = unicode ? 'u' : '';
86100
String s = dotAll ? 's' : '';
87-
String g = global ? 'g' : '';
88101
// We're using the JavaScript's try catch instead of the Dart one to avoid
89102
// dragging in Dart runtime support just because of using RegExp.
90103
var regexp = JS(
91-
'',
92-
r'''
104+
'',
105+
r'''
93106
(function(source, modifiers) {
94107
try {
95108
return new RegExp(source, modifiers);
96109
} catch (e) {
97110
return e;
98111
}
99112
})(#, # + # + # + # + #)''',
100-
source,
101-
m,
102-
i,
103-
u,
104-
s,
105-
g);
113+
source,
114+
m,
115+
i,
116+
u,
117+
s,
118+
extraFlags,
119+
);
106120
if (JS('bool', '# instanceof RegExp', regexp)) return regexp;
107121
// The returned value is the JavaScript exception. Turn it into a
108122
// Dart exception.
109123
String errorMessage = JS('String', r'String(#)', regexp);
110124
throw FormatException('Illegal RegExp pattern ($errorMessage)', source);
111125
}
112126

127+
bool _computeHasCaptures() {
128+
// Should only be called once, the first time [_hasCaptures] is read.
129+
assert(_hasCapturesCache == null);
130+
// No captures without parentheses.
131+
if (!pattern.contains('(')) return false;
132+
// Do not try to parse the RegExp, leave that to the RegExp compiler.
133+
// It's more future proof.
134+
135+
// Create RegExp with same capture groups, which always (and quickly)
136+
// matches the empty string, and do that match to get a match array.
137+
// Currently supported flags do not affect capture syntax, and only the
138+
// unicode flag affects parsing at all, so use no other flags.
139+
// The length of the match array is the number of capture groups plus one.
140+
JSArray match = JS(
141+
'JSExtendableArray',
142+
r'new RegExp("(?:)|" + #, #).exec("")',
143+
pattern,
144+
_isUnicode ? 'u' : '',
145+
);
146+
return match.length > 1;
147+
}
148+
113149
RegExpMatch? firstMatch(String string) {
114-
JSArray? m = JS('JSExtendableArray|Null', r'#.exec(#)', _nativeRegExp,
115-
checkString(string));
150+
JSArray? m = JS(
151+
'JSExtendableArray|Null',
152+
r'#.exec(#)',
153+
_nativeRegExp,
154+
checkString(string),
155+
);
116156
if (m == null) return null;
117157
return _MatchImplementation(this, m);
118158
}
@@ -149,9 +189,6 @@ class JSSyntaxRegExp implements RegExp {
149189
JS('void', '#.lastIndex = #', regexp, start);
150190
JSArray? match = JS('JSExtendableArray|Null', '#.exec(#)', regexp, string);
151191
if (match == null) return null;
152-
// If the last capture group participated, the original regexp did not
153-
// match at the start position.
154-
if (match.removeLast() != null) return null;
155192
return _MatchImplementation(this, match);
156193
}
157194

@@ -189,9 +226,14 @@ class _MatchImplementation implements RegExpMatch {
189226
int get start =>
190227
JS('returns:int;depends:none;effects:none;gvn:true', '#.index', _match);
191228

192-
int get end => (start +
193-
JS('returns:int;depends:none;effects:none;gvn:true', '#[0].length',
194-
_match)) as int;
229+
int get end =>
230+
(start +
231+
JS(
232+
'returns:int;depends:none;effects:none;gvn:true',
233+
'#[0].length',
234+
_match,
235+
))
236+
as int;
195237

196238
// The JS below changes the static type to avoid an implicit cast.
197239
// TODO(sra): Find a nicer way to do this, e.g. unsafeCast.
@@ -224,7 +266,8 @@ class _MatchImplementation implements RegExpMatch {
224266
var groups = JS('=Object|Null', '#.groups', _match);
225267
if (groups != null) {
226268
var keys = JSArray<String>.markGrowable(
227-
JS('returns:JSExtendableArray;new:true', 'Object.keys(#)', groups));
269+
JS('returns:JSExtendableArray;new:true', 'Object.keys(#)', groups),
270+
);
228271
return SubListIterable(keys, 0, null);
229272
}
230273
return Iterable.empty();

0 commit comments

Comments
 (0)