Skip to content

Commit d845b95

Browse files
authored
fix: correct snake_case conversion for consecutive uppercase letters in Python (#3587)
`mangleIdentifier()` in `src/languages/python.ts` incorrectly converted camelCase identifiers with consecutive uppercase letters. For example, `enforceSSL` became `enforce_sSL` instead of `enforce_ssl`. The root cause was a single regex (`/[^A-Z][A-Z]/g`) that only handled simple camelCase boundaries. It couldn't detect where an uppercase acronym ends and the next word begins. Replaced with a two-step regex approach: - `([a-z0-9])([A-Z])` — lowercase/digit to uppercase boundary - `([A-Z]+)([A-Z][a-z])` — end of uppercase run before a new word | Input | Before | After | |-------|--------|-------| | `enforceSSL` | `enforce_sSL` | `enforce_ssl` | | `myVPCId` | `my_vPCId` | `my_vpc_id` | | `getHTTPSUrl` | `get_hTTPSUrl` | `get_https_url` | | `parseJSON` | `parse_jSON` | `parse_json` | Added 12 test cases covering acronyms, digit boundaries, single uppercase letters, simple camelCase regression guards, leading underscores, already-snake_case passthrough, and multiple acronyms. --- By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license]. [Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0
1 parent e372215 commit d845b95

File tree

3 files changed

+55
-2
lines changed

3 files changed

+55
-2
lines changed

src/languages/python.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -842,8 +842,13 @@ function mangleIdentifier(originalIdentifier: string) {
842842
// Probably a class, leave as-is
843843
return originalIdentifier;
844844
}
845-
// Turn into snake-case
846-
const cased = originalIdentifier.replace(/[^A-Z][A-Z]/g, (m) => `${m[0].slice(0, 1)}_${m.slice(1).toLowerCase()}`);
845+
// Turn into snake_case by inserting '_' at two kinds of boundary, then lowercasing:
846+
// 1. lowercase/digit → uppercase: e.g. myVPCId → my_VPCId, getHTTPSUrl → get_HTTPSUrl
847+
// 2. end of uppercase run → new word: e.g. my_VPCId → my_VPC_Id, get_HTTPSUrl → get_HTTPS_Url
848+
const cased = originalIdentifier
849+
.replace(/([a-z0-9])([A-Z])/g, '$1_$2')
850+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1_$2')
851+
.toLowerCase();
847852
return IDENTIFIER_KEYWORDS.includes(cased) ? `${cased}_` : cased;
848853
}
849854

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# enforce_ssl: bool
2+
# my_vpc_id: str
3+
# parse_json: str
4+
# to_json: str
5+
# ec2_instance_id: str
6+
# x509_certificate: str
7+
# get_x: bool
8+
# some_method: str
9+
# arn_value: str
10+
# _private_field: str
11+
# already_snake: str
12+
# from_https_to_json: str
13+
print(enforce_ssl)
14+
print(my_vpc_id)
15+
print(parse_json)
16+
print(to_json)
17+
print(ec2_instance_id)
18+
print(x509_certificate)
19+
print(get_x)
20+
print(some_method)
21+
print(arn_value)
22+
print(_private_field)
23+
print(already_snake)
24+
print(from_https_to_json)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
declare const enforceSSL: boolean;
2+
declare const myVPCId: string;
3+
declare const parseJSON: string;
4+
declare const toJSON: string;
5+
declare const ec2InstanceId: string;
6+
declare const x509Certificate: string;
7+
declare const getX: boolean;
8+
declare const someMethod: string;
9+
declare const arnValue: string;
10+
declare const _privateField: string;
11+
declare const already_snake: string;
12+
declare const fromHTTPSToJSON: string;
13+
console.log(enforceSSL);
14+
console.log(myVPCId);
15+
console.log(parseJSON);
16+
console.log(toJSON);
17+
console.log(ec2InstanceId);
18+
console.log(x509Certificate);
19+
console.log(getX);
20+
console.log(someMethod);
21+
console.log(arnValue);
22+
console.log(_privateField);
23+
console.log(already_snake);
24+
console.log(fromHTTPSToJSON);

0 commit comments

Comments
 (0)