38
38
# SOFTWARE.
39
39
40
40
from mmap import mmap
41
+ from array import array
41
42
42
43
_mappingproxy = type (type .__dict__ )
43
44
@@ -76,7 +77,7 @@ def __init__(self, compiled_pattern, flags, sticky):
76
77
self .__compiled_pattern__ = compiled_pattern
77
78
self .__sticky__ = sticky
78
79
self .pattern = compiled_pattern .pattern
79
- self .flags = flags
80
+ self .flags = { name : bool ( flags & flag ) for flag , name in FLAG_NAMES }
80
81
self .groupCount = 1 + compiled_pattern .groups
81
82
self .groups = _NamedCaptureGroups (compiled_pattern .groupindex )
82
83
@@ -110,7 +111,7 @@ def fallback_compiler(pattern, flags):
110
111
111
112
compiled_pattern = _sre_compile (pattern , bit_flags )
112
113
113
- return _ExecutablePattern (compiled_pattern , flags , sticky )
114
+ return _ExecutablePattern (compiled_pattern , bit_flags , sticky )
114
115
115
116
def _new_compile (p , flags = 0 ):
116
117
if _with_tregex and isinstance (p , (str , bytes )):
@@ -145,19 +146,19 @@ def setup(sre_compiler, error_class, flags_table):
145
146
FLAG_DEBUG = 128
146
147
FLAG_ASCII = 256
147
148
FLAG_NAMES = [
148
- (FLAG_TEMPLATE , "re. TEMPLATE" ),
149
- (FLAG_IGNORECASE , "re. IGNORECASE" ),
150
- (FLAG_LOCALE , "re. LOCALE" ),
151
- (FLAG_MULTILINE , "re. MULTILINE" ),
152
- (FLAG_DOTALL , "re. DOTALL" ),
153
- (FLAG_UNICODE , "re. UNICODE" ),
154
- (FLAG_VERBOSE , "re. VERBOSE" ),
155
- (FLAG_DEBUG , "re. DEBUG" ),
156
- (FLAG_ASCII , "re. ASCII" ),
149
+ (FLAG_TEMPLATE , "TEMPLATE" ),
150
+ (FLAG_IGNORECASE , "IGNORECASE" ),
151
+ (FLAG_LOCALE , "LOCALE" ),
152
+ (FLAG_MULTILINE , "MULTILINE" ),
153
+ (FLAG_DOTALL , "DOTALL" ),
154
+ (FLAG_UNICODE , "UNICODE" ),
155
+ (FLAG_VERBOSE , "VERBOSE" ),
156
+ (FLAG_DEBUG , "DEBUG" ),
157
+ (FLAG_ASCII , "ASCII" ),
157
158
]
158
159
159
160
160
- class SRE_Match ():
161
+ class Match ():
161
162
def __init__ (self , pattern , pos , endpos , result , input_str , compiled_regex ):
162
163
self .__result = result
163
164
self .__compiled_regex = compiled_regex
@@ -191,10 +192,12 @@ def __getitem__(self, item):
191
192
192
193
def __groupidx (self , idx ):
193
194
try :
194
- if isinstance (idx , str ):
195
+ if hasattr (idx , '__index__' ):
196
+ int_idx = int (idx )
197
+ if 0 <= int_idx < self .__compiled_regex .groupCount :
198
+ return int_idx
199
+ else :
195
200
return self .__compiled_regex .groups [idx ]
196
- elif 0 <= idx < self .__compiled_regex .groupCount :
197
- return idx
198
201
except Exception :
199
202
pass
200
203
raise IndexError ("no such group" )
@@ -204,8 +207,10 @@ def __group(self, idx, default=None):
204
207
start = self .__result .getStart (idxarg )
205
208
if start < 0 :
206
209
return default
207
- else :
210
+ elif isinstance ( self . __input_str , str ) :
208
211
return self .__input_str [start :self .__result .getEnd (idxarg )]
212
+ else :
213
+ return bytes (self .__input_str [start :self .__result .getEnd (idxarg )])
209
214
210
215
def groupdict (self , default = None ):
211
216
groups = self .__compiled_regex .groups
@@ -221,6 +226,14 @@ def start(self, groupnum=0):
221
226
idxarg = self .__groupidx (groupnum )
222
227
return self .__result .getStart (idxarg )
223
228
229
+ def expand (self , template ):
230
+ import re
231
+ return re ._expand (self .__re , self , template )
232
+
233
+ @property
234
+ def regs (self ):
235
+ return tuple (self .span (i ) for i in range (self .__compiled_regex .groupCount ))
236
+
224
237
@property
225
238
def string (self ):
226
239
return self .__input_str
@@ -252,7 +265,13 @@ def lastindex(self):
252
265
return lastindex
253
266
254
267
def __repr__ (self ):
255
- return "<re.Match object; span=%r, match=%r>" % (self .span (), self .group ())
268
+ return "<%s object; span=%r, match=%r>" % (type (self ).__name__ , self .span (), self .group ())
269
+
270
+ def __copy__ (self ):
271
+ return self
272
+
273
+ def __deepcopy__ (self , memo ):
274
+ return self
256
275
257
276
def _append_end_assert (pattern ):
258
277
if isinstance (pattern , str ):
@@ -261,18 +280,18 @@ def _append_end_assert(pattern):
261
280
return pattern if pattern .endswith (rb"\Z" ) else pattern + rb"\Z"
262
281
263
282
def _is_bytes_like (object ):
264
- return isinstance (object , (bytes , bytearray , memoryview , mmap ))
283
+ return isinstance (object , (bytes , bytearray , memoryview , array , mmap ))
265
284
266
- class SRE_Pattern ():
285
+ class Pattern ():
267
286
def __init__ (self , pattern , flags ):
268
287
self .__binary = isinstance (pattern , bytes )
269
288
self .pattern = pattern
270
- self .flags = flags
289
+ self .__input_flags = flags
271
290
flags_str = []
272
- for char ,flag in FLAGS .items ():
291
+ for char , flag in FLAGS .items ():
273
292
if flags & flag :
274
293
flags_str .append (char )
275
- self .flags_str = "" .join (flags_str )
294
+ self .__flags_str = "" .join (flags_str )
276
295
self .__compiled_regexes = {}
277
296
compiled_regex = self .__tregex_compile (self .pattern )
278
297
self .groups = compiled_regex .groupCount - 1
@@ -283,6 +302,19 @@ def __init__(self, pattern, flags):
283
302
group_names = dir (groups )
284
303
self .groupindex = _mappingproxy ({name : groups [name ] for name in group_names })
285
304
305
+ @property
306
+ def flags (self ):
307
+ # Flags can be spcified both in the flag argument or inline in the regex. Extract them back from the regex
308
+ flags = self .__input_flags
309
+ regex_flags = self .__tregex_compile (self .pattern ).flags
310
+ for flag , name in FLAG_NAMES :
311
+ try :
312
+ if regex_flags [name ]:
313
+ flags |= flag
314
+ except KeyError :
315
+ pass
316
+ return flags
317
+
286
318
def __check_input_type (self , input ):
287
319
if not isinstance (input , str ) and not _is_bytes_like (input ):
288
320
raise TypeError ("expected string or bytes-like object" )
@@ -298,7 +330,7 @@ def __check_pos(pos):
298
330
299
331
def __tregex_compile (self , pattern , flags = None ):
300
332
if flags is None :
301
- flags = self .flags_str
333
+ flags = self .__flags_str
302
334
if (pattern , flags ) not in self .__compiled_regexes :
303
335
try :
304
336
self .__compiled_regexes [(pattern , flags )] = tregex_compile_internal (pattern , flags , fallback_compiler )
@@ -317,7 +349,7 @@ def __repr__(self):
317
349
for code , name in FLAG_NAMES :
318
350
if flags & code :
319
351
flags -= code
320
- flag_items .append (name )
352
+ flag_items .append (f're. { name } ' )
321
353
if flags != 0 :
322
354
flag_items .append ("0x%x" % flags )
323
355
if len (flag_items ) == 0 :
@@ -331,15 +363,21 @@ def __repr__(self):
331
363
def __eq__ (self , other ):
332
364
if self is other :
333
365
return True
334
- if type (other ) != SRE_Pattern :
366
+ if type (other ) != Pattern :
335
367
return NotImplemented
336
368
return self .pattern == other .pattern and self .flags == other .flags
337
369
338
370
def __hash__ (self ):
339
371
return hash (self .pattern ) * 31 ^ hash (self .flags )
340
372
373
+ def __copy__ (self ):
374
+ return self
375
+
376
+ def __deepcopy__ (self , memo ):
377
+ return self
378
+
341
379
def _search (self , pattern , string , pos , endpos , sticky = False ):
342
- pattern = self .__tregex_compile (pattern , self .flags_str + ("y" if sticky else "" ))
380
+ pattern = self .__tregex_compile (pattern , self .__flags_str + ("y" if sticky else "" ))
343
381
input_str = string
344
382
if endpos == - 1 or endpos >= len (string ):
345
383
endpos = len (string )
@@ -348,7 +386,7 @@ def _search(self, pattern, string, pos, endpos, sticky=False):
348
386
input_str = string [:endpos ]
349
387
result = tregex_call_exec (pattern .exec , input_str , min (pos , endpos % len (string ) + 1 ))
350
388
if result .isMatch :
351
- return SRE_Match (self , pos , endpos , result , input_str , pattern )
389
+ return Match (self , pos , endpos , result , input_str , pattern )
352
390
else :
353
391
return None
354
392
@@ -389,7 +427,7 @@ def finditer(self, string, pos=0, endpos=-1):
389
427
if not result .isMatch :
390
428
break
391
429
else :
392
- yield SRE_Match (self , pos , endpos , result , string , compiled_regex )
430
+ yield Match (self , pos , endpos , result , string , compiled_regex )
393
431
no_progress = (result .getStart (0 ) == result .getEnd (0 ))
394
432
pos = result .getEnd (0 ) + no_progress
395
433
return
@@ -411,7 +449,7 @@ def findall(self, string, pos=0, endpos=-1):
411
449
elif compiled_regex .groupCount == 2 :
412
450
matchlist .append (self .__sanitize_out_type (string [result .getStart (1 ):result .getEnd (1 )]))
413
451
else :
414
- matchlist .append (tuple (map (self .__sanitize_out_type , SRE_Match (self , pos , endpos , result , string , compiled_regex ).groups ())))
452
+ matchlist .append (tuple (map (self .__sanitize_out_type , Match (self , pos , endpos , result , string , compiled_regex ).groups ())))
415
453
no_progress = (result .getStart (0 ) == result .getEnd (0 ))
416
454
pos = result .getEnd (0 ) + no_progress
417
455
return matchlist
@@ -433,11 +471,10 @@ def subn(self, repl, string, count=0):
433
471
else :
434
472
literal = b'\\ ' not in repl
435
473
if not literal :
436
- import sre_parse
437
- template = sre_parse .parse_template (repl , self )
438
-
439
- def repl (match ):
440
- return sre_parse .expand_template (template , match )
474
+ import re
475
+ repl = re ._subx (self , repl )
476
+ if not callable (repl ):
477
+ literal = True
441
478
442
479
while (count == 0 or n < count ) and pos <= len (string ):
443
480
match_result = tregex_call_exec (pattern .exec , string , pos )
@@ -450,7 +487,7 @@ def repl(match):
450
487
if literal :
451
488
result .append (repl )
452
489
else :
453
- _srematch = SRE_Match (self , pos , - 1 , match_result , string , pattern )
490
+ _srematch = Match (self , pos , - 1 , match_result , string , pattern )
454
491
_repl = repl (_srematch )
455
492
result .append (_repl )
456
493
pos = end
@@ -492,8 +529,37 @@ def split(self, string, maxsplit=0):
492
529
result .append (self .__sanitize_out_type (string [collect_pos :]))
493
530
return result
494
531
532
+ def scanner (self , string , pos = 0 , endpos = None ):
533
+ return SREScanner (self , string , pos , endpos )
534
+
535
+
536
+ class SREScanner (object ):
537
+ def __init__ (self , pattern , string , start , end ):
538
+ self .pattern = pattern
539
+ self ._string = string
540
+ self ._start = start
541
+ self ._end = end
542
+
543
+ def _match_search (self , matcher ):
544
+ if self ._start > len (self ._string ):
545
+ return None
546
+ match = matcher (self ._string , self ._start , self ._end )
547
+ if match is None :
548
+ self ._start += 1
549
+ else :
550
+ self ._start = match .end ()
551
+ if match .start () == self ._start :
552
+ self ._start += 1
553
+ return match
554
+
555
+ def match (self ):
556
+ return self ._match_search (self .pattern .match )
557
+
558
+ def search (self ):
559
+ return self ._match_search (self .pattern .search )
560
+
495
561
496
- _t_compile = SRE_Pattern
562
+ _t_compile = Pattern
497
563
498
564
def compile (pattern , flags , code , groups , groupindex , indexgroup ):
499
565
import _cpython_sre
0 commit comments