Skip to content

Commit cbfd38d

Browse files
committed
Rename functions to encode_ngrams / decode_ngrams
1 parent c34cb82 commit cbfd38d

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

py/makeqstrdata.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def translate(translation_file, i18ns):
103103
def frequent_ngrams(corpus, sz, n):
104104
return collections.Counter(corpus[i:i+sz] for i in range(len(corpus)-sz)).most_common(n)
105105

106-
def ngrams_to_pua(translation, ngrams):
106+
def encode_ngrams(translation, ngrams):
107107
if len(ngrams) > 32:
108108
start = 0xe000
109109
else:
@@ -112,7 +112,7 @@ def ngrams_to_pua(translation, ngrams):
112112
translation = translation.replace(g, chr(start + i))
113113
return translation
114114

115-
def pua_to_ngrams(compressed, ngrams):
115+
def decode_ngrams(compressed, ngrams):
116116
if len(ngrams) > 32:
117117
start, end = 0xe000, 0xf8ff
118118
else:
@@ -123,7 +123,7 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
123123
all_strings = [x[1] for x in translations]
124124
all_strings_concat = "".join(all_strings)
125125
ngrams = [i[0] for i in frequent_ngrams(all_strings_concat, 2, 32)]
126-
all_strings_concat = ngrams_to_pua(all_strings_concat, ngrams)
126+
all_strings_concat = encode_ngrams(all_strings_concat, ngrams)
127127
counts = collections.Counter(all_strings_concat)
128128
cb = huffman.codebook(counts.items())
129129
values = []
@@ -211,7 +211,7 @@ def decompress(encoding_table, encoded, encoded_length_bits):
211211
searched_length += lengths[bit_length]
212212

213213
v = values[searched_length + bits - max_code]
214-
v = pua_to_ngrams(v, ngrams)
214+
v = decode_ngrams(v, ngrams)
215215
i += len(v.encode('utf-8'))
216216
dec.append(v)
217217
return ''.join(dec)
@@ -220,7 +220,7 @@ def compress(encoding_table, decompressed, encoded_length_bits, len_translation_
220220
if not isinstance(decompressed, str):
221221
raise TypeError()
222222
values, lengths, ngrams = encoding_table
223-
decompressed = ngrams_to_pua(decompressed, ngrams)
223+
decompressed = encode_ngrams(decompressed, ngrams)
224224
enc = bytearray(len(decompressed) * 3)
225225
#print(decompressed)
226226
#print(lengths)

0 commit comments

Comments
 (0)