@@ -38,7 +38,11 @@ class InvalidCodepointContext(IDNAError):
3838
3939
4040def _combining_class (cp ):
41- return unicodedata .combining (unichr (cp ))
41+ v = unicodedata .combining (unichr (cp ))
42+ if v == 0 :
43+ if not unicodedata .name (unichr (cp )):
44+ raise ValueError ("Unknown character in unicodedata" )
45+ return v
4246
4347def _is_script (cp , script ):
4448 return intranges_contain (ord (cp ), idnadata .scripts [script ])
@@ -75,7 +79,6 @@ def check_bidi(label, check_ltr=False):
7579 raise IDNABidiError ('Unknown directionality in label {0} at position {1}' .format (repr (label ), idx ))
7680 if direction in ['R' , 'AL' , 'AN' ]:
7781 bidi_label = True
78- break
7982 if not bidi_label and not check_ltr :
8083 return True
8184
@@ -248,8 +251,13 @@ def check_label(label):
248251 if intranges_contain (cp_value , idnadata .codepoint_classes ['PVALID' ]):
249252 continue
250253 elif intranges_contain (cp_value , idnadata .codepoint_classes ['CONTEXTJ' ]):
251- if not valid_contextj (label , pos ):
252- raise InvalidCodepointContext ('Joiner {0} not allowed at position {1} in {2}' .format (_unot (cp_value ), pos + 1 , repr (label )))
254+ try :
255+ if not valid_contextj (label , pos ):
256+ raise InvalidCodepointContext ('Joiner {0} not allowed at position {1} in {2}' .format (
257+ _unot (cp_value ), pos + 1 , repr (label )))
258+ except ValueError :
259+ raise IDNAError ('Unknown codepoint adjacent to joiner {0} at position {1} in {2}' .format (
260+ _unot (cp_value ), pos + 1 , repr (label )))
253261 elif intranges_contain (cp_value , idnadata .codepoint_classes ['CONTEXTO' ]):
254262 if not valid_contexto (label , pos ):
255263 raise InvalidCodepointContext ('Codepoint {0} not allowed at position {1} in {2}' .format (_unot (cp_value ), pos + 1 , repr (label )))
@@ -263,10 +271,7 @@ def alabel(label):
263271
264272 try :
265273 label = label .encode ('ascii' )
266- try :
267- ulabel (label )
268- except IDNAError :
269- raise IDNAError ('The label {0} is not a valid A-label' .format (label ))
274+ ulabel (label )
270275 if not valid_label_length (label ):
271276 raise IDNAError ('Label too long' )
272277 return label
@@ -321,10 +326,10 @@ def uts46_remap(domain, std3_rules=True, transitional=False):
321326 replacement = uts46row [2 ] if len (uts46row ) == 3 else None
322327 if (status == "V" or
323328 (status == "D" and not transitional ) or
324- (status == "3" and std3_rules and replacement is None )):
329+ (status == "3" and not std3_rules and replacement is None )):
325330 output += char
326331 elif replacement is not None and (status == "M" or
327- (status == "3" and std3_rules ) or
332+ (status == "3" and not std3_rules ) or
328333 (status == "D" and transitional )):
329334 output += replacement
330335 elif status != "I" :
@@ -348,15 +353,17 @@ def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):
348353 labels = s .split ('.' )
349354 else :
350355 labels = _unicode_dots_re .split (s )
351- while labels and not labels [0 ]:
352- del labels [0 ]
353- if not labels :
356+ if not labels or labels == ['' ]:
354357 raise IDNAError ('Empty domain' )
355358 if labels [- 1 ] == '' :
356359 del labels [- 1 ]
357360 trailing_dot = True
358361 for label in labels :
359- result .append (alabel (label ))
362+ s = alabel (label )
363+ if s :
364+ result .append (s )
365+ else :
366+ raise IDNAError ('Empty label' )
360367 if trailing_dot :
361368 result .append (b'' )
362369 s = b'.' .join (result )
@@ -377,15 +384,17 @@ def decode(s, strict=False, uts46=False, std3_rules=False):
377384 labels = _unicode_dots_re .split (s )
378385 else :
379386 labels = s .split (u'.' )
380- while labels and not labels [0 ]:
381- del labels [0 ]
382- if not labels :
387+ if not labels or labels == ['' ]:
383388 raise IDNAError ('Empty domain' )
384389 if not labels [- 1 ]:
385390 del labels [- 1 ]
386391 trailing_dot = True
387392 for label in labels :
388- result .append (ulabel (label ))
393+ s = ulabel (label )
394+ if s :
395+ result .append (s )
396+ else :
397+ raise IDNAError ('Empty label' )
389398 if trailing_dot :
390399 result .append (u'' )
391400 return u'.' .join (result )
0 commit comments