2121# - MONTH ZZZZ, with ZZZZ being 4 digits
2222# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
2323DATE_REGEX = re .compile (
24- r'(\b|(?<=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
25- r'(\b|(?<=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
26- r'(\b|(?<=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
27- r'(\b|(?<=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
28- r'(\b|(?<=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
24+ r'(?:\b|[_-])(' +
25+ r'[0-9]{1,2}[\.\/-][0-9]{1,2}[\.\/-](?:[0-9]{4}|[0-9]{2})|' +
26+ r'(?:[0-9]{4}|[0-9]{2})[\.\/-][0-9]{1,2}[\.\/-][0-9]{1,2}|' +
27+ r'[0-9]{1,2}[\. ]+[^ ]{3,9} (?:[0-9]{4}|[0-9]{2})|' +
28+ r'[^\W\d_]{3,9} [0-9]{1,2}, [0-9]{4}|' +
29+ r'[^\W\d_]{3,9} [0-9]{4}' +
30+ r')(?:\b|[_-])'
2931)
3032
3133
@@ -104,7 +106,7 @@ def __parser(ds, date_order):
104106 if self .FILENAME_DATE_ORDER :
105107 self .log ("info" , "Checking document title for date" )
106108 for m in re .finditer (DATE_REGEX , title ):
107- date_string = m .group (0 )
109+ date_string = m .group (1 )
108110
109111 try :
110112 date = __parser (date_string , self .FILENAME_DATE_ORDER )
@@ -130,7 +132,7 @@ def __parser(ds, date_order):
130132
131133 # Iterate through all regex matches in text and try to parse the date
132134 for m in re .finditer (DATE_REGEX , text ):
133- date_string = m .group (0 )
135+ date_string = m .group (1 )
134136
135137 try :
136138 date = __parser (date_string , self .DATE_ORDER )
0 commit comments