@@ -361,22 +361,56 @@ def pascal_case(word: str) -> str:
361361 return "" .join (items )
362362
363363
364- # Uppercase preceded by a lowercase marks the start of a new camelCase word
365- _snake_case_regex_camel_match = re .compile (r"(?<=[a-z])([A-Z]+[a-z0-9]*)" )
364+ _snakecase_re_camel_b1 = re .compile (r"(.)([A-Z][a-z]+)" ) # Xy boundary
365+ _snakecase_re_camel_b2 = re .compile (r"([a-z0-9])([A-Z])" ) # aA/0A boundary
366+ _snakecase_re_alpha_to_digit = re .compile (r"([A-Za-z])([0-9])" )
367+ _snakecase_re_digit_to_alpha = re .compile (r"([0-9])([A-Za-z])" )
366368
369+ _snakecase_re_multi_us = re .compile (r"__+" )
370+ _snakecase_re_upper_or_digits = re .compile (r"^[A-Z0-9]+$" )
371+ _snakecase_re_mixed_case = re .compile (r"[A-Z].*[a-z]|[a-z].*[A-Z]" )
372+ _snakecase_re_leading_us = re .compile (r"^_+" )
373+ _snakecase_re_trailing_us = re .compile (r"_+$" )
367374
368- def snake_case (word : str ) -> str :
369- """Converts given word to snake case.
370375
371- >>> snake_case("someWord")
372- "some_word"
376+ def snake_case (word : str ) -> str :
373377 """
374- snake_case_split : List [str ] = word .split ("_" )
375-
376- camel_case_split : List [str ] = list (
377- itertools .chain .from_iterable (
378- filter (None , _snake_case_regex_camel_match .split (w ))
379- for w in snake_case_split
380- )
378+ Convert identifier to snake_case with common-sense rules:
379+ - Preserve leading/trailing underscores exactly.
380+ - Normalize interior underscores.
381+ - Default: split at camel boundaries and letter<->digit boundaries.
382+ - If original has both '_' and mixed case, do NOT split letter<->digit.
383+ - Do NOT split letter<->digit inside ALL-UPPER tokens.
384+ """
385+ if not word :
386+ return ""
387+
388+ # Preserve edge underscores (e.g., '__init__')
389+ s = word .replace ("-" , "_" )
390+ pre_m = _snakecase_re_leading_us .match (s )
391+ pre = pre_m .group (0 ) if pre_m else ""
392+ rest = s [len (pre ) :] # use the remainder to find suffix
393+ suf_m = _snakecase_re_trailing_us .search (rest )
394+ suf = suf_m .group (0 ) if suf_m else ""
395+ core = rest [: len (rest ) - len (suf )] # core = s - pre - suf
396+
397+ respect_author_digits = ("_" in word ) and bool (
398+ _snakecase_re_mixed_case .search (word )
381399 )
382- return "_" .join (camel_case_split ).lower ()
400+
401+ parts : List [str ] = []
402+ for t in core .split ("_" ):
403+ if not t :
404+ continue
405+ # camel splits (two-pass)
406+ t = _snakecase_re_camel_b1 .sub (r"\1_\2" , t )
407+ t = _snakecase_re_camel_b2 .sub (r"\1_\2" , t )
408+ # letter<->digit split when allowed
409+ if not respect_author_digits and not _snakecase_re_upper_or_digits .fullmatch (t ):
410+ t = _snakecase_re_alpha_to_digit .sub (r"\1_\2" , t )
411+ t = _snakecase_re_digit_to_alpha .sub (r"\1_\2" , t )
412+ parts .append (t )
413+
414+ core_snake = "_" .join (parts )
415+ core_snake = _snakecase_re_multi_us .sub ("_" , core_snake ).strip ("_" ).lower ()
416+ return f"{ pre } { core_snake } { suf } "
0 commit comments