Skip to content

Commit c34cb82

Browse files
committed
makeqstrdata: correct range of low code points to 0x80..0x9f inclusive
The previous range was unintentionally big and overlaps some characters we'd like to use (and also 0xa0, which we don't intentionally use)
1 parent f1c7389 commit c34cb82

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

py/makeqstrdata.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def pua_to_ngrams(compressed, ngrams):
116116
if len(ngrams) > 32:
117117
start, end = 0xe000, 0xf8ff
118118
else:
119-
start, end = 0x80, 0xbf
119+
start, end = 0x80, 0x9f
120120
return "".join(ngrams[ord(c) - start] if (start <= ord(c) <= end) else c for c in compressed)
121121

122122
def compute_huffman_coding(translations, qstrs, compression_filename):
@@ -146,6 +146,7 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
146146
last_l = l
147147
lengths = bytearray()
148148
print("// length count", length_count)
149+
print("// bigrams", ngrams)
149150
for i in range(1, max(length_count) + 2):
150151
lengths.append(length_count.get(i, 0))
151152
print("// values", values, "lengths", len(lengths), lengths)

0 commit comments

Comments
 (0)