Skip to content

Commit 884d17f

Browse files
committed
Unify encoding name normalization
1 parent 9ef7390 commit 884d17f

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/CharsetMapping.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public static Charset getCharset(String encoding) {
2626
}
2727

2828
private static String normalize(String encoding) {
29-
return encoding.toLowerCase(Locale.ENGLISH).replaceAll("\\W+", "_");
29+
return encoding.toLowerCase(Locale.ENGLISH).replaceAll("[^\\w.]+", "_");
3030
}
3131

3232
private static Charset getJavaCharset(String name) {

graalpython/lib-graalpython/_codecs.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,22 @@ def register(search_function):
5050
__codec_search_path__.append(search_function)
5151

5252

53-
def __normalizestring(string):
54-
return string.replace(' ', '-').lower()
53+
def __normalizestring(encoding):
54+
# Copied from encodings.normalize_encoding + added lowercasing
55+
if isinstance(encoding, bytes):
56+
encoding = ascii_decode(encoding)[0]
57+
58+
chars = []
59+
punct = False
60+
for c in encoding:
61+
if c.isalnum() or c == '.':
62+
if punct and chars:
63+
chars.append('_')
64+
chars.append(c.lower())
65+
punct = False
66+
else:
67+
punct = True
68+
return ''.join(chars)
5569

5670

5771
@__graalpython__.builtin

0 commit comments

Comments
 (0)