Skip to content

Commit b3c4674

Browse files
authored
Merge pull request #75 from hit9/D-Walther-master
improve the snake_case()
2 parents 0bfe128 + 7921cdc commit b3c4674

File tree

4 files changed

+85
-33
lines changed

4 files changed

+85
-33
lines changed

changes.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
.. currentmodule:: bitproto
22

3+
Version 1.2.2
4+
-------------
5+
6+
.. _version-1.2.2:
7+
8+
Warning: May break some existing projects's generated names:
9+
10+
- Improve `snake_case` function. #74, #75
11+
312
Version 1.2.1
413
-------------
514

compiler/bitproto/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@
88
99
"""
1010

11-
__version__ = "1.2.1"
11+
__version__ = "1.2.2"
1212
__description__ = "bit level data interchange format."

compiler/bitproto/utils.py

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -360,39 +360,56 @@ def pascal_case(word: str) -> str:
360360
return "".join(items)
361361

362362

363-
_snake_case_regex_head = r"[A-Z0-9]"
364-
_snake_case_regex_tail = r"[^A-Z0-9]"
365-
_snake_case_regex_capital_match = re.compile(
366-
rf"({_snake_case_regex_head}+{_snake_case_regex_tail}*)"
367-
)
368-
_snake_case_regex_m_capital_match = re.compile(
369-
rf"^({_snake_case_regex_head}{{1,}})({_snake_case_regex_head}+{_snake_case_regex_tail}+)$"
370-
)
363+
_snakecase_re_camel_b1 = re.compile(r"(.)([A-Z][a-z]+)") # Xy boundary
364+
_snakecase_re_camel_b2 = re.compile(r"([a-z0-9])([A-Z])") # aA/0A boundary
365+
_snakecase_re_alpha_to_digit = re.compile(r"([A-Za-z])([0-9])")
366+
_snakecase_re_digit_to_alpha = re.compile(r"([0-9])([A-Za-z])")
371367

368+
_snakecase_re_multi_us = re.compile(r"__+")
369+
_snakecase_re_upper_or_digits = re.compile(r"^[A-Z0-9]+$")
370+
_snakecase_re_mixed_case = re.compile(r"[A-Z].*[a-z]|[a-z].*[A-Z]")
371+
_snakecase_re_leading_us = re.compile(r"^_+")
372+
_snakecase_re_trailing_us = re.compile(r"_+$")
372373

373-
def snake_case(word: str) -> str:
374-
"""Converts given word to snake case.
375374

376-
>>> snake_case("someWord")
377-
"some_word"
375+
def snake_case(word: str) -> str:
378376
"""
379-
underscore = "_"
380-
no_underscore_words = word.split(underscore)
381-
no_underscore_cases: List[str] = []
382-
383-
for w in no_underscore_words:
384-
cases = filter(None, _snake_case_regex_capital_match.split(w))
385-
for case in cases:
386-
subcases = filter(None, _snake_case_regex_m_capital_match.split(case))
387-
if subcases:
388-
for subcase in subcases:
389-
no_underscore_cases.append(subcase)
390-
else:
391-
no_underscore_cases.append(case)
392-
393-
snake_word = ""
394-
for case in no_underscore_cases:
395-
if not case.isdigit():
396-
snake_word += underscore
397-
snake_word += case
398-
return snake_word.strip(underscore).lower()
377+
Convert identifier to snake_case with common-sense rules:
378+
- Preserve leading/trailing underscores exactly.
379+
- Normalize interior underscores.
380+
- Default: split at camel boundaries and letter<->digit boundaries.
381+
- If original has both '_' and mixed case, do NOT split letter<->digit.
382+
- Do NOT split letter<->digit inside ALL-UPPER tokens.
383+
"""
384+
if not word:
385+
return ""
386+
387+
# Preserve edge underscores (e.g., '__init__')
388+
s = word.replace("-", "_")
389+
pre_m = _snakecase_re_leading_us.match(s)
390+
pre = pre_m.group(0) if pre_m else ""
391+
rest = s[len(pre) :] # use the remainder to find suffix
392+
suf_m = _snakecase_re_trailing_us.search(rest)
393+
suf = suf_m.group(0) if suf_m else ""
394+
core = rest[: len(rest) - len(suf)] # core = s - pre - suf
395+
396+
respect_author_digits = ("_" in word) and bool(
397+
_snakecase_re_mixed_case.search(word)
398+
)
399+
400+
parts: List[str] = []
401+
for t in core.split("_"):
402+
if not t:
403+
continue
404+
# camel splits (two-pass)
405+
t = _snakecase_re_camel_b1.sub(r"\1_\2", t)
406+
t = _snakecase_re_camel_b2.sub(r"\1_\2", t)
407+
# letter<->digit split when allowed
408+
if not respect_author_digits and not _snakecase_re_upper_or_digits.fullmatch(t):
409+
t = _snakecase_re_alpha_to_digit.sub(r"\1_\2", t)
410+
t = _snakecase_re_digit_to_alpha.sub(r"\1_\2", t)
411+
parts.append(t)
412+
413+
core_snake = "_".join(parts)
414+
core_snake = _snakecase_re_multi_us.sub("_", core_snake).strip("_").lower()
415+
return f"{pre}{core_snake}{suf}"

tests/test_compiler/test_util.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,36 @@ def test_pascal_case() -> None:
129129

130130

131131
def test_snake_case() -> None:
132+
assert snake_case("") == ""
133+
assert snake_case("123") == "123"
134+
assert snake_case("A") == "a"
132135
assert snake_case("snake_case") == "snake_case"
133136
assert snake_case("SnakeCase") == "snake_case"
134137
assert snake_case("snakeCase") == "snake_case"
135138
assert snake_case("SNAKE_CASE") == "snake_case"
139+
assert snake_case("SNAKE_42_CASE") == "snake_42_case"
140+
assert snake_case("HTTPServer") == "http_server"
141+
assert snake_case("getHTTPResponseCode") == "get_http_response_code"
142+
assert snake_case("Mixed_SnakeCase") == "mixed_snake_case"
143+
assert snake_case("Snake42Case") == "snake_42_case"
144+
assert snake_case("xY") == "x_y"
145+
assert snake_case("Xy") == "xy"
146+
assert snake_case("Id") == "id"
147+
assert snake_case("__Init__") == "__init__"
148+
assert snake_case("__") == "__"
149+
assert snake_case("foo__bar") == "foo_bar"
150+
assert snake_case("already_snake_case") == "already_snake_case"
151+
assert snake_case("kebab-case-here") == "kebab_case_here"
152+
assert snake_case("Ipv6Address") == "ipv_6_address"
153+
assert snake_case("Ipv6_Address") == "ipv6_address"
154+
assert snake_case("MyMessage_v1") == "my_message_v1"
155+
assert snake_case("camelCase123") == "camel_case_123"
156+
assert snake_case("_privateVariable") == "_private_variable"
157+
assert snake_case("GPU3DModel") == "gpu_3_d_model"
158+
assert snake_case("TI82") == "ti82"
159+
assert snake_case("TI82_PLUS") == "ti82_plus"
160+
assert snake_case("MyMessage_mk2") == "my_message_mk2"
161+
assert snake_case("MY_VALUE1") == "my_value1"
136162

137163

138164
def test_cast_or_raise() -> None:

0 commit comments

Comments
 (0)