@@ -20,99 +20,139 @@ regExpGetGlobalNative(JSSyntaxRegExp regexp) {
2020 return nativeRegexp;
2121}
2222
23- /// Computes the number of captures in a regexp.
24- ///
25- /// This currently involves creating a new RegExp object with a different
26- /// source and running it against the empty string (the last part is usually
27- /// fast).
28- ///
29- /// The JSSyntaxRegExp could cache the result, and set the cache any time
30- /// it finds a match.
31- int regExpCaptureCount (JSSyntaxRegExp regexp) {
32- var nativeAnchoredRegExp = regexp._nativeAnchoredVersion;
33- JS ('void' , '#.lastIndex = #' , nativeAnchoredRegExp, 0 );
34- JSArray match = JS ('JSExtendableArray' , '#.exec("")' , nativeAnchoredRegExp);
35- // The native-anchored regexp always have one capture more than the original,
36- // and always matches the empty string.
37- return match.length - 2 ;
38- }
23+ // Helper method used by internal libraries ([String.split])
24+ bool regExpHasCaptures (JSSyntaxRegExp regexp) => regexp._hasCaptures;
3925
4026class JSSyntaxRegExp implements RegExp {
4127 final String pattern;
4228 final _nativeRegExp;
4329 var _nativeGlobalRegExp;
4430 var _nativeAnchoredRegExp;
31+ bool ? _hasCapturesCache;
4532
4633 String toString () =>
4734 'RegExp/$pattern /' + JS ('String' , '#.flags' , _nativeRegExp);
4835
49- JSSyntaxRegExp (String source,
50- {bool multiLine = false ,
51- bool caseSensitive = true ,
52- bool unicode = false ,
53- bool dotAll = false })
54- : this .pattern = source,
55- this ._nativeRegExp = makeNative (
56- source, multiLine, caseSensitive, unicode, dotAll, false );
36+ JSSyntaxRegExp (
37+ String source, {
38+ bool multiLine = false ,
39+ bool caseSensitive = true ,
40+ bool unicode = false ,
41+ bool dotAll = false ,
42+ }) : this .pattern = source,
43+ this ._nativeRegExp = makeNative (
44+ source,
45+ multiLine,
46+ caseSensitive,
47+ unicode,
48+ dotAll,
49+ '' ,
50+ );
5751
5852 get _nativeGlobalVersion {
5953 if (_nativeGlobalRegExp != null ) return _nativeGlobalRegExp;
6054 return _nativeGlobalRegExp = makeNative (
61- pattern, _isMultiLine, _isCaseSensitive, _isUnicode, _isDotAll, true );
55+ pattern,
56+ _isMultiLine,
57+ _isCaseSensitive,
58+ _isUnicode,
59+ _isDotAll,
60+ 'g' ,
61+ );
6262 }
6363
6464 get _nativeAnchoredVersion {
6565 if (_nativeAnchoredRegExp != null ) return _nativeAnchoredRegExp;
66- // An "anchored version" of a regexp is created by adding "|()" to the
67- // source. This means that the regexp always matches at the first position
68- // that it tries, and you can see if the original regexp matched, or it
69- // was the added zero-width match that matched, by looking at the last
70- // capture. If it is a String, the match participated, otherwise it didn't.
71- return _nativeAnchoredRegExp = makeNative ('$pattern |()' , _isMultiLine,
72- _isCaseSensitive, _isUnicode, _isDotAll, true );
66+ // A sticky regexp is created by adding the "sticky" `y` flag.
67+ // Such a RegExp can only match exactly at the `lastIndex` position.
68+ return _nativeAnchoredRegExp = makeNative (
69+ pattern,
70+ _isMultiLine,
71+ _isCaseSensitive,
72+ _isUnicode,
73+ _isDotAll,
74+ 'y' ,
75+ );
7376 }
7477
7578 bool get _isMultiLine => JS ('bool' , '#.multiline' , _nativeRegExp);
7679 bool get _isCaseSensitive => JS ('bool' , '!#.ignoreCase' , _nativeRegExp);
7780 bool get _isUnicode => JS ('bool' , '#.unicode' , _nativeRegExp);
7881 bool get _isDotAll => JS ('bool' , '#.dotAll' , _nativeRegExp);
7982
80- static makeNative (String source, bool multiLine, bool caseSensitive,
81- bool unicode, bool dotAll, bool global) {
83+ /// Used by [String.split] to determine whether it can use the JavaScript
84+ /// `String.prototype.split` directly. It cannot if the RegExp has capture
85+ /// groups.
86+ bool get _hasCaptures => _hasCapturesCache ?? = _computeHasCaptures ();
87+
88+ static makeNative (
89+ String source,
90+ bool multiLine,
91+ bool caseSensitive,
92+ bool unicode,
93+ bool dotAll,
94+ String extraFlags,
95+ ) {
8296 checkString (source);
83- String m = multiLine == true ? 'm' : '' ;
84- String i = caseSensitive == true ? '' : 'i' ;
97+ String m = multiLine ? 'm' : '' ;
98+ String i = caseSensitive ? '' : 'i' ;
8599 String u = unicode ? 'u' : '' ;
86100 String s = dotAll ? 's' : '' ;
87- String g = global ? 'g' : '' ;
88101 // We're using the JavaScript's try catch instead of the Dart one to avoid
89102 // dragging in Dart runtime support just because of using RegExp.
90103 var regexp = JS (
91- '' ,
92- r'''
104+ '' ,
105+ r'''
93106 (function(source, modifiers) {
94107 try {
95108 return new RegExp(source, modifiers);
96109 } catch (e) {
97110 return e;
98111 }
99112 })(#, # + # + # + # + #)''' ,
100- source,
101- m,
102- i,
103- u,
104- s,
105- g);
113+ source,
114+ m,
115+ i,
116+ u,
117+ s,
118+ extraFlags,
119+ );
106120 if (JS ('bool' , '# instanceof RegExp' , regexp)) return regexp;
107121 // The returned value is the JavaScript exception. Turn it into a
108122 // Dart exception.
109123 String errorMessage = JS ('String' , r'String(#)' , regexp);
110124 throw FormatException ('Illegal RegExp pattern ($errorMessage )' , source);
111125 }
112126
127+ bool _computeHasCaptures () {
128+ // Should only be called once, the first time [_hasCaptures] is read.
129+ assert (_hasCapturesCache == null );
130+ // No captures without parentheses.
131+ if (! pattern.contains ('(' )) return false ;
132+ // Do not try to parse the RegExp, leave that to the RegExp compiler.
133+ // It's more future proof.
134+
135+ // Create RegExp with same capture groups, which always (and quickly)
136+ // matches the empty string, and do that match to get a match array.
137+ // Currently supported flags do not affect capture syntax, and only the
138+ // unicode flag affects parsing at all, so use no other flags.
139+ // The length of the match array is the number of capture groups plus one.
140+ JSArray match = JS (
141+ 'JSExtendableArray' ,
142+ r'new RegExp("(?:)|" + #, #).exec("")' ,
143+ pattern,
144+ _isUnicode ? 'u' : '' ,
145+ );
146+ return match.length > 1 ;
147+ }
148+
113149 RegExpMatch ? firstMatch (String string) {
114- JSArray ? m = JS ('JSExtendableArray|Null' , r'#.exec(#)' , _nativeRegExp,
115- checkString (string));
150+ JSArray ? m = JS (
151+ 'JSExtendableArray|Null' ,
152+ r'#.exec(#)' ,
153+ _nativeRegExp,
154+ checkString (string),
155+ );
116156 if (m == null ) return null ;
117157 return _MatchImplementation (this , m);
118158 }
@@ -149,9 +189,6 @@ class JSSyntaxRegExp implements RegExp {
149189 JS ('void' , '#.lastIndex = #' , regexp, start);
150190 JSArray ? match = JS ('JSExtendableArray|Null' , '#.exec(#)' , regexp, string);
151191 if (match == null ) return null ;
152- // If the last capture group participated, the original regexp did not
153- // match at the start position.
154- if (match.removeLast () != null ) return null ;
155192 return _MatchImplementation (this , match);
156193 }
157194
@@ -189,9 +226,14 @@ class _MatchImplementation implements RegExpMatch {
189226 int get start =>
190227 JS ('returns:int;depends:none;effects:none;gvn:true' , '#.index' , _match);
191228
192- int get end => (start +
193- JS ('returns:int;depends:none;effects:none;gvn:true' , '#[0].length' ,
194- _match)) as int ;
229+ int get end =>
230+ (start +
231+ JS (
232+ 'returns:int;depends:none;effects:none;gvn:true' ,
233+ '#[0].length' ,
234+ _match,
235+ ))
236+ as int ;
195237
196238 // The JS below changes the static type to avoid an implicit cast.
197239 // TODO(sra): Find a nicer way to do this, e.g. unsafeCast.
@@ -224,7 +266,8 @@ class _MatchImplementation implements RegExpMatch {
224266 var groups = JS ('=Object|Null' , '#.groups' , _match);
225267 if (groups != null ) {
226268 var keys = JSArray <String >.markGrowable (
227- JS ('returns:JSExtendableArray;new:true' , 'Object.keys(#)' , groups));
269+ JS ('returns:JSExtendableArray;new:true' , 'Object.keys(#)' , groups),
270+ );
228271 return SubListIterable (keys, 0 , null );
229272 }
230273 return Iterable .empty ();
0 commit comments