Skip to content

Commit 1144412

Browse files
committed
improve the util:snake_case
1 parent c970668 commit 1144412

File tree

2 files changed

+73
-14
lines changed

2 files changed

+73
-14
lines changed

compiler/bitproto/utils.py

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -361,22 +361,56 @@ def pascal_case(word: str) -> str:
361361
return "".join(items)
362362

363363

364-
# Uppercase preceded by a lowercase marks the start of a new camelCase word
365-
_snake_case_regex_camel_match = re.compile(r"(?<=[a-z])([A-Z]+[a-z0-9]*)")
364+
_snakecase_re_camel_b1 = re.compile(r"(.)([A-Z][a-z]+)") # Xy boundary
365+
_snakecase_re_camel_b2 = re.compile(r"([a-z0-9])([A-Z])") # aA/0A boundary
366+
_snakecase_re_alpha_to_digit = re.compile(r"([A-Za-z])([0-9])")
367+
_snakecase_re_digit_to_alpha = re.compile(r"([0-9])([A-Za-z])")
366368

369+
_snakecase_re_multi_us = re.compile(r"__+")
370+
_snakecase_re_upper_or_digits = re.compile(r"^[A-Z0-9]+$")
371+
_snakecase_re_mixed_case = re.compile(r"[A-Z].*[a-z]|[a-z].*[A-Z]")
372+
_snakecase_re_leading_us = re.compile(r"^_+")
373+
_snakecase_re_trailing_us = re.compile(r"_+$")
367374

368-
def snake_case(word: str) -> str:
369-
"""Converts given word to snake case.
370375

371-
>>> snake_case("someWord")
372-
"some_word"
376+
def snake_case(word: str) -> str:
373377
"""
374-
snake_case_split: List[str] = word.split("_")
375-
376-
camel_case_split: List[str] = list(
377-
itertools.chain.from_iterable(
378-
filter(None, _snake_case_regex_camel_match.split(w))
379-
for w in snake_case_split
380-
)
378+
Convert identifier to snake_case with common-sense rules:
379+
- Preserve leading/trailing underscores exactly.
380+
- Normalize interior underscores.
381+
- Default: split at camel boundaries and letter<->digit boundaries.
382+
- If original has both '_' and mixed case, do NOT split letter<->digit.
383+
- Do NOT split letter<->digit inside ALL-UPPER tokens.
384+
"""
385+
if not word:
386+
return ""
387+
388+
# Preserve edge underscores (e.g., '__init__')
389+
s = word.replace("-", "_")
390+
pre_m = _snakecase_re_leading_us.match(s)
391+
pre = pre_m.group(0) if pre_m else ""
392+
rest = s[len(pre) :] # use the remainder to find suffix
393+
suf_m = _snakecase_re_trailing_us.search(rest)
394+
suf = suf_m.group(0) if suf_m else ""
395+
core = rest[: len(rest) - len(suf)] # core = s - pre - suf
396+
397+
respect_author_digits = ("_" in word) and bool(
398+
_snakecase_re_mixed_case.search(word)
381399
)
382-
return "_".join(camel_case_split).lower()
400+
401+
parts: List[str] = []
402+
for t in core.split("_"):
403+
if not t:
404+
continue
405+
# camel splits (two-pass)
406+
t = _snakecase_re_camel_b1.sub(r"\1_\2", t)
407+
t = _snakecase_re_camel_b2.sub(r"\1_\2", t)
408+
# letter<->digit split when allowed
409+
if not respect_author_digits and not _snakecase_re_upper_or_digits.fullmatch(t):
410+
t = _snakecase_re_alpha_to_digit.sub(r"\1_\2", t)
411+
t = _snakecase_re_digit_to_alpha.sub(r"\1_\2", t)
412+
parts.append(t)
413+
414+
core_snake = "_".join(parts)
415+
core_snake = _snakecase_re_multi_us.sub("_", core_snake).strip("_").lower()
416+
return f"{pre}{core_snake}{suf}"

tests/test_compiler/test_util.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,36 @@ def test_pascal_case() -> None:
129129

130130

131131
def test_snake_case() -> None:
132+
assert snake_case("") == ""
133+
assert snake_case("123") == "123"
134+
assert snake_case("A") == "a"
132135
assert snake_case("snake_case") == "snake_case"
133136
assert snake_case("SnakeCase") == "snake_case"
134137
assert snake_case("snakeCase") == "snake_case"
135138
assert snake_case("SNAKE_CASE") == "snake_case"
136139
assert snake_case("SNAKE_42_CASE") == "snake_42_case"
140+
assert snake_case("HTTPServer") == "http_server"
141+
assert snake_case("getHTTPResponseCode") == "get_http_response_code"
142+
assert snake_case("Mixed_SnakeCase") == "mixed_snake_case"
143+
assert snake_case("Snake42Case") == "snake_42_case"
144+
assert snake_case("xY") == "x_y"
145+
assert snake_case("Xy") == "xy"
146+
assert snake_case("Id") == "id"
147+
assert snake_case("__Init__") == "__init__"
148+
assert snake_case("__") == "__"
149+
assert snake_case("foo__bar") == "foo_bar"
150+
assert snake_case("already_snake_case") == "already_snake_case"
151+
assert snake_case("kebab-case-here") == "kebab_case_here"
152+
assert snake_case("Ipv6Address") == "ipv_6_address"
153+
assert snake_case("Ipv6_Address") == "ipv6_address"
154+
assert snake_case("MyMessage_v1") == "my_message_v1"
155+
assert snake_case("camelCase123") == "camel_case_123"
156+
assert snake_case("_privateVariable") == "_private_variable"
157+
assert snake_case("GPU3DModel") == "gpu_3_d_model"
158+
assert snake_case("TI82") == "ti82"
159+
assert snake_case("TI82_PLUS") == "ti82_plus"
160+
assert snake_case("MyMessage_mk2") == "my_message_mk2"
161+
assert snake_case("MY_VALUE1") == "my_value1"
137162

138163

139164
def test_cast_or_raise() -> None:

0 commit comments

Comments
 (0)