1414import time
1515import locale
1616import calendar
17+ import re
1718from re import compile as re_compile
1819from re import sub as re_sub
1920from re import IGNORECASE
@@ -41,6 +42,21 @@ def _findall(haystack, needle):
4142 yield i
4243 i += len (needle )
4344
45+
46+ lzh_TW_alt_digits = (
47+ # 〇:一:二:三:四:五:六:七:八:九
48+ '\u3007 ' , '\u4e00 ' , '\u4e8c ' , '\u4e09 ' , '\u56db ' ,
49+ '\u4e94 ' , '\u516d ' , '\u4e03 ' , '\u516b ' , '\u4e5d ' ,
50+ # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九
51+ '\u5341 ' , '\u5341 \u4e00 ' , '\u5341 \u4e8c ' , '\u5341 \u4e09 ' , '\u5341 \u56db ' ,
52+ '\u5341 \u4e94 ' , '\u5341 \u516d ' , '\u5341 \u4e03 ' , '\u5341 \u516b ' , '\u5341 \u4e5d ' ,
53+ # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九
54+ '\u5eff ' , '\u5eff \u4e00 ' , '\u5eff \u4e8c ' , '\u5eff \u4e09 ' , '\u5eff \u56db ' ,
55+ '\u5eff \u4e94 ' , '\u5eff \u516d ' , '\u5eff \u4e03 ' , '\u5eff \u516b ' , '\u5eff \u4e5d ' ,
56+ # 卅:卅一
57+ '\u5345 ' , '\u5345 \u4e00 ' )
58+
59+
4460class LocaleTime (object ):
4561 """Stores and handles locale-specific information related to time.
4662
@@ -84,6 +100,7 @@ def __init__(self):
84100 self .__calc_weekday ()
85101 self .__calc_month ()
86102 self .__calc_am_pm ()
103+ self .__calc_alt_digits ()
87104 self .__calc_timezone ()
88105 self .__calc_date_time ()
89106 if _getlang () != self .lang :
@@ -119,36 +136,76 @@ def __calc_am_pm(self):
119136 am_pm .append (time .strftime ("%p" , time_tuple ).lower ().strip ())
120137 self .am_pm = am_pm
121138
139+ def __calc_alt_digits (self ):
140+ # Set self.LC_alt_digits by using time.strftime().
141+
142+ # The magic data should contain all decimal digits.
143+ time_tuple = time .struct_time ((1998 , 1 , 27 , 10 , 43 , 56 , 1 , 27 , 0 ))
144+ s = time .strftime ("%x%X" , time_tuple )
145+ if s .isascii ():
146+ # Fast path -- all digits are ASCII.
147+ self .LC_alt_digits = ()
148+ return
149+
150+ digits = '' .join (sorted (set (re .findall (r'\d' , s ))))
151+ if len (digits ) == 10 and ord (digits [- 1 ]) == ord (digits [0 ]) + 9 :
152+ # All 10 decimal digits from the same set.
153+ if digits .isascii ():
154+ # All digits are ASCII.
155+ self .LC_alt_digits = ()
156+ return
157+
158+ self .LC_alt_digits = [a + b for a in digits for b in digits ]
159+ # Test whether the numbers contain leading zero.
160+ time_tuple2 = time .struct_time ((2000 , 1 , 1 , 1 , 1 , 1 , 5 , 1 , 0 ))
161+ if self .LC_alt_digits [1 ] not in time .strftime ("%x %X" , time_tuple2 ):
162+ self .LC_alt_digits [:10 ] = digits
163+ return
164+
165+ # Either non-Gregorian calendar or non-decimal numbers.
166+ if {'\u4e00 ' , '\u4e03 ' , '\u4e5d ' , '\u5341 ' , '\u5eff ' }.issubset (s ):
167+ # lzh_TW
168+ self .LC_alt_digits = lzh_TW_alt_digits
169+ return
170+
171+ self .LC_alt_digits = None
172+
122173 def __calc_date_time (self ):
123- # Set self.date_time , self.date, & self.time by using
124- # time.strftime().
174+ # Set self.LC_date_time , self.LC_date, self.LC_time and
175+ # self.LC_time_ampm by using time.strftime().
125176
126177 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
127178 # overloaded numbers is minimized. The order in which searches for
128179 # values within the format string is very important; it eliminates
129180 # possible ambiguity for what something represents.
130181 time_tuple = time .struct_time ((1999 ,3 ,17 ,22 ,44 ,55 ,2 ,76 ,0 ))
131182 time_tuple2 = time .struct_time ((1999 ,1 ,3 ,1 ,1 ,1 ,6 ,3 ,0 ))
132- replacement_pairs = [
183+ replacement_pairs = []
184+
185+ # Non-ASCII digits
186+ if self .LC_alt_digits or self .LC_alt_digits is None :
187+ for n , d in [(19 , '%OC' ), (99 , '%Oy' ), (22 , '%OH' ),
188+ (44 , '%OM' ), (55 , '%OS' ), (17 , '%Od' ),
189+ (3 , '%Om' ), (2 , '%Ow' ), (10 , '%OI' )]:
190+ if self .LC_alt_digits is None :
191+ s = chr (0x660 + n // 10 ) + chr (0x660 + n % 10 )
192+ replacement_pairs .append ((s , d ))
193+ if n < 10 :
194+ replacement_pairs .append ((s [1 ], d ))
195+ elif len (self .LC_alt_digits ) > n :
196+ replacement_pairs .append ((self .LC_alt_digits [n ], d ))
197+ else :
198+ replacement_pairs .append ((time .strftime (d , time_tuple ), d ))
199+ replacement_pairs += [
133200 ('1999' , '%Y' ), ('99' , '%y' ), ('22' , '%H' ),
134201 ('44' , '%M' ), ('55' , '%S' ), ('76' , '%j' ),
135202 ('17' , '%d' ), ('03' , '%m' ), ('3' , '%m' ),
136203 # '3' needed for when no leading zero.
137204 ('2' , '%w' ), ('10' , '%I' ),
138- # Non-ASCII digits
139- ('\u0661 \u0669 \u0669 \u0669 ' , '%Y' ),
140- ('\u0669 \u0669 ' , '%Oy' ),
141- ('\u0662 \u0662 ' , '%OH' ),
142- ('\u0664 \u0664 ' , '%OM' ),
143- ('\u0665 \u0665 ' , '%OS' ),
144- ('\u0661 \u0667 ' , '%Od' ),
145- ('\u0660 \u0663 ' , '%Om' ),
146- ('\u0663 ' , '%Om' ),
147- ('\u0662 ' , '%Ow' ),
148- ('\u0661 \u0660 ' , '%OI' ),
149205 ]
206+
150207 date_time = []
151- for directive in ('%c' , '%x' , '%X' ):
208+ for directive in ('%c' , '%x' , '%X' , '%r' ):
152209 current_format = time .strftime (directive , time_tuple ).lower ()
153210 current_format = current_format .replace ('%' , '%%' )
154211 # The month and the day of the week formats are treated specially
@@ -172,9 +229,10 @@ def __calc_date_time(self):
172229 if tz :
173230 current_format = current_format .replace (tz , "%Z" )
174231 # Transform all non-ASCII digits to digits in range U+0660 to U+0669.
175- current_format = re_sub (r'\d(?<![0-9])' ,
176- lambda m : chr (0x0660 + int (m [0 ])),
177- current_format )
232+ if not current_format .isascii () and self .LC_alt_digits is None :
233+ current_format = re_sub (r'\d(?<![0-9])' ,
234+ lambda m : chr (0x0660 + int (m [0 ])),
235+ current_format )
178236 for old , new in replacement_pairs :
179237 current_format = current_format .replace (old , new )
180238 # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
@@ -189,6 +247,7 @@ def __calc_date_time(self):
189247 self .LC_date_time = date_time [0 ]
190248 self .LC_date = date_time [1 ]
191249 self .LC_time = date_time [2 ]
250+ self .LC_time_ampm = date_time [3 ]
192251
193252 def __find_month_format (self , directive ):
194253 """Find the month format appropriate for the current locale.
@@ -213,7 +272,7 @@ def __find_month_format(self, directive):
213272 full_indices &= indices
214273 indices = set (_findall (datetime , self .a_month [m ]))
215274 if abbr_indices is None :
216- abbr_indices = indices
275+ abbr_indices = set ( indices )
217276 else :
218277 abbr_indices &= indices
219278 if not full_indices and not abbr_indices :
@@ -241,7 +300,7 @@ def __find_weekday_format(self, directive):
241300 if self .f_weekday [wd ] != self .a_weekday [wd ]:
242301 indices = set (_findall (datetime , self .a_weekday [wd ]))
243302 if abbr_indices is None :
244- abbr_indices = indices
303+ abbr_indices = set ( indices )
245304 else :
246305 abbr_indices &= indices
247306 if not full_indices and not abbr_indices :
@@ -288,8 +347,10 @@ def __init__(self, locale_time=None):
288347 # The " [1-9]" part of the regex is to make %c from ANSI C work
289348 'd' : r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])" ,
290349 'f' : r"(?P<f>[0-9]{1,6})" ,
291- 'H' : r"(?P<H>2[0-3]|[0-1]\d|\d)" ,
350+ 'H' : r"(?P<H>2[0-3]|[0-1]\d|\d| \d)" ,
351+ 'k' : r"(?P<H>2[0-3]|[0-1]\d|\d| \d)" ,
292352 'I' : r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9])" ,
353+ 'l' : r"(?P<I>1[0-2]|0[1-9]|[1-9]| [1-9])" ,
293354 'G' : r"(?P<G>\d\d\d\d)" ,
294355 'j' : r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])" ,
295356 'm' : r"(?P<m>1[0-2]|0[1-9]|[1-9])" ,
@@ -312,16 +373,49 @@ def __init__(self, locale_time=None):
312373 for tz in tz_names ),
313374 'Z' ),
314375 '%' : '%' }
315- for d in 'dmyHIMS' :
316- mapping ['O' + d ] = r'(?P<%s>\d\d|\d| \d)' % d
317- mapping ['Ow' ] = r'(?P<w>\d)'
376+ if self .locale_time .LC_alt_digits is None :
377+ for d in 'dmyCHIMS' :
378+ mapping ['O' + d ] = r'(?P<%s>\d\d|\d| \d)' % d
379+ mapping ['Ow' ] = r'(?P<w>\d)'
380+ else :
381+ mapping .update ({
382+ 'Od' : self .__seqToRE (self .locale_time .LC_alt_digits [1 :32 ], 'd' ,
383+ '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]' ),
384+ 'Om' : self .__seqToRE (self .locale_time .LC_alt_digits [1 :13 ], 'm' ,
385+ '1[0-2]|0[1-9]|[1-9]' ),
386+ 'Ow' : self .__seqToRE (self .locale_time .LC_alt_digits [:7 ], 'w' ,
387+ '[0-6]' ),
388+ 'Oy' : self .__seqToRE (self .locale_time .LC_alt_digits , 'y' ,
389+ '[0-9][0-9]' ),
390+ 'OC' : self .__seqToRE (self .locale_time .LC_alt_digits , 'C' ,
391+ '[0-9][0-9]' ),
392+ 'OH' : self .__seqToRE (self .locale_time .LC_alt_digits [:24 ], 'H' ,
393+ '2[0-3]|[0-1][0-9]|[0-9]' ),
394+ 'OI' : self .__seqToRE (self .locale_time .LC_alt_digits [1 :13 ], 'I' ,
395+ '1[0-2]|0[1-9]|[1-9]' ),
396+ 'OM' : self .__seqToRE (self .locale_time .LC_alt_digits [:60 ], 'M' ,
397+ '[0-5][0-9]|[0-9]' ),
398+ 'OS' : self .__seqToRE (self .locale_time .LC_alt_digits [:62 ], 'S' ,
399+ '6[0-1]|[0-5][0-9]|[0-9]' ),
400+ })
401+ mapping .update ({
402+ 'e' : mapping ['d' ],
403+ 'Oe' : mapping ['Od' ],
404+ 'P' : mapping ['p' ],
405+ 'Op' : mapping ['p' ],
406+ 'W' : mapping ['U' ].replace ('U' , 'W' ),
407+ })
318408 mapping ['W' ] = mapping ['U' ].replace ('U' , 'W' )
409+
319410 base .__init__ (mapping )
411+ base .__setitem__ ('T' , self .pattern ('%H:%M:%S' ))
412+ base .__setitem__ ('R' , self .pattern ('%H:%M' ))
413+ base .__setitem__ ('r' , self .pattern (self .locale_time .LC_time_ampm ))
320414 base .__setitem__ ('X' , self .pattern (self .locale_time .LC_time ))
321415 base .__setitem__ ('x' , self .pattern (self .locale_time .LC_date ))
322416 base .__setitem__ ('c' , self .pattern (self .locale_time .LC_date_time ))
323417
324- def __seqToRE (self , to_convert , directive ):
418+ def __seqToRE (self , to_convert , directive , altregex = None ):
325419 """Convert a list to a regex string for matching a directive.
326420
327421 Want possible matching values to be from longest to shortest. This
@@ -337,8 +431,9 @@ def __seqToRE(self, to_convert, directive):
337431 else :
338432 return ''
339433 regex = '|' .join (re_escape (stuff ) for stuff in to_convert )
340- regex = '(?P<%s>%s' % (directive , regex )
341- return '%s)' % regex
434+ if altregex is not None :
435+ regex += '|' + altregex
436+ return '(?P<%s>%s)' % (directive , regex )
342437
343438 def pattern (self , format ):
344439 """Return regex pattern for the format string.
@@ -365,7 +460,7 @@ def repl(m):
365460 nonlocal day_of_month_in_format
366461 day_of_month_in_format = True
367462 return self [format_char ]
368- format = re_sub (r'%([OE]?\\?.?)' , repl , format )
463+ format = re_sub (r'%[-_0^#]*[0-9]* ([OE]?\\?.?)' , repl , format )
369464 if day_of_month_in_format and not year_in_format :
370465 import warnings
371466 warnings .warn ("""\
@@ -467,37 +562,50 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
467562 # values
468563 weekday = julian = None
469564 found_dict = found .groupdict ()
565+ if locale_time .LC_alt_digits :
566+ def parse_int (s ):
567+ try :
568+ return locale_time .LC_alt_digits .index (s )
569+ except ValueError :
570+ return int (s )
571+ else :
572+ parse_int = int
573+
470574 for group_key in found_dict .keys ():
471575 # Directives not explicitly handled below:
472576 # c, x, X
473577 # handled by making out of other directives
474578 # U, W
475579 # worthless without day of the week
476580 if group_key == 'y' :
477- year = int (found_dict ['y' ])
478- # Open Group specification for strptime() states that a %y
479- #value in the range of [00, 68] is in the century 2000, while
480- #[69,99] is in the century 1900
481- if year <= 68 :
482- year += 2000
581+ year = parse_int (found_dict ['y' ])
582+ if 'C' in found_dict :
583+ century = parse_int (found_dict ['C' ])
584+ year += century * 100
483585 else :
484- year += 1900
586+ # Open Group specification for strptime() states that a %y
587+ #value in the range of [00, 68] is in the century 2000, while
588+ #[69,99] is in the century 1900
589+ if year <= 68 :
590+ year += 2000
591+ else :
592+ year += 1900
485593 elif group_key == 'Y' :
486594 year = int (found_dict ['Y' ])
487595 elif group_key == 'G' :
488596 iso_year = int (found_dict ['G' ])
489597 elif group_key == 'm' :
490- month = int (found_dict ['m' ])
598+ month = parse_int (found_dict ['m' ])
491599 elif group_key == 'B' :
492600 month = locale_time .f_month .index (found_dict ['B' ].lower ())
493601 elif group_key == 'b' :
494602 month = locale_time .a_month .index (found_dict ['b' ].lower ())
495603 elif group_key == 'd' :
496- day = int (found_dict ['d' ])
604+ day = parse_int (found_dict ['d' ])
497605 elif group_key == 'H' :
498- hour = int (found_dict ['H' ])
606+ hour = parse_int (found_dict ['H' ])
499607 elif group_key == 'I' :
500- hour = int (found_dict ['I' ])
608+ hour = parse_int (found_dict ['I' ])
501609 ampm = found_dict .get ('p' , '' ).lower ()
502610 # If there was no AM/PM indicator, we'll treat this like AM
503611 if ampm in ('' , locale_time .am_pm [0 ]):
@@ -513,9 +621,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
513621 if hour != 12 :
514622 hour += 12
515623 elif group_key == 'M' :
516- minute = int (found_dict ['M' ])
624+ minute = parse_int (found_dict ['M' ])
517625 elif group_key == 'S' :
518- second = int (found_dict ['S' ])
626+ second = parse_int (found_dict ['S' ])
519627 elif group_key == 'f' :
520628 s = found_dict ['f' ]
521629 # Pad to always return microseconds.
0 commit comments