@@ -360,39 +360,56 @@ def pascal_case(word: str) -> str:
360360 return "" .join (items )
361361
362362
363- _snake_case_regex_head = r"[A-Z0-9]"
364- _snake_case_regex_tail = r"[^A-Z0-9]"
365- _snake_case_regex_capital_match = re .compile (
366- rf"({ _snake_case_regex_head } +{ _snake_case_regex_tail } *)"
367- )
368- _snake_case_regex_m_capital_match = re .compile (
369- rf"^({ _snake_case_regex_head } {{1,}})({ _snake_case_regex_head } +{ _snake_case_regex_tail } +)$"
370- )
363+ _snakecase_re_camel_b1 = re .compile (r"(.)([A-Z][a-z]+)" ) # Xy boundary
364+ _snakecase_re_camel_b2 = re .compile (r"([a-z0-9])([A-Z])" ) # aA/0A boundary
365+ _snakecase_re_alpha_to_digit = re .compile (r"([A-Za-z])([0-9])" )
366+ _snakecase_re_digit_to_alpha = re .compile (r"([0-9])([A-Za-z])" )
371367
368+ _snakecase_re_multi_us = re .compile (r"__+" )
369+ _snakecase_re_upper_or_digits = re .compile (r"^[A-Z0-9]+$" )
370+ _snakecase_re_mixed_case = re .compile (r"[A-Z].*[a-z]|[a-z].*[A-Z]" )
371+ _snakecase_re_leading_us = re .compile (r"^_+" )
372+ _snakecase_re_trailing_us = re .compile (r"_+$" )
372373
373- def snake_case (word : str ) -> str :
374- """Converts given word to snake case.
375374
376- >>> snake_case("someWord")
377- "some_word"
375+ def snake_case (word : str ) -> str :
378376 """
379- underscore = "_"
380- no_underscore_words = word .split (underscore )
381- no_underscore_cases : List [str ] = []
382-
383- for w in no_underscore_words :
384- cases = filter (None , _snake_case_regex_capital_match .split (w ))
385- for case in cases :
386- subcases = filter (None , _snake_case_regex_m_capital_match .split (case ))
387- if subcases :
388- for subcase in subcases :
389- no_underscore_cases .append (subcase )
390- else :
391- no_underscore_cases .append (case )
392-
393- snake_word = ""
394- for case in no_underscore_cases :
395- if not case .isdigit ():
396- snake_word += underscore
397- snake_word += case
398- return snake_word .strip (underscore ).lower ()
377+ Convert identifier to snake_case with common-sense rules:
378+ - Preserve leading/trailing underscores exactly.
379+ - Normalize interior underscores.
380+ - Default: split at camel boundaries and letter<->digit boundaries.
381+ - If original has both '_' and mixed case, do NOT split letter<->digit.
382+ - Do NOT split letter<->digit inside ALL-UPPER tokens.
383+ """
384+ if not word :
385+ return ""
386+
387+ # Preserve edge underscores (e.g., '__init__')
388+ s = word .replace ("-" , "_" )
389+ pre_m = _snakecase_re_leading_us .match (s )
390+ pre = pre_m .group (0 ) if pre_m else ""
391+ rest = s [len (pre ) :] # use the remainder to find suffix
392+ suf_m = _snakecase_re_trailing_us .search (rest )
393+ suf = suf_m .group (0 ) if suf_m else ""
394+ core = rest [: len (rest ) - len (suf )] # core = s - pre - suf
395+
396+ respect_author_digits = ("_" in word ) and bool (
397+ _snakecase_re_mixed_case .search (word )
398+ )
399+
400+ parts : List [str ] = []
401+ for t in core .split ("_" ):
402+ if not t :
403+ continue
404+ # camel splits (two-pass)
405+ t = _snakecase_re_camel_b1 .sub (r"\1_\2" , t )
406+ t = _snakecase_re_camel_b2 .sub (r"\1_\2" , t )
407+ # letter<->digit split when allowed
408+ if not respect_author_digits and not _snakecase_re_upper_or_digits .fullmatch (t ):
409+ t = _snakecase_re_alpha_to_digit .sub (r"\1_\2" , t )
410+ t = _snakecase_re_digit_to_alpha .sub (r"\1_\2" , t )
411+ parts .append (t )
412+
413+ core_snake = "_" .join (parts )
414+ core_snake = _snakecase_re_multi_us .sub ("_" , core_snake ).strip ("_" ).lower ()
415+ return f"{ pre } { core_snake } { suf } "
0 commit comments