Skip to content

Commit fb2dc8d

Browse files
author
Anna Grebneva
authored
Fixed BPE initialization for gpt-2 model (#3331)
1 parent 8645f95 commit fb2dc8d

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

demos/gpt2_text_prediction_demo/python/gpt2_text_prediction_demo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def main():
7070
log.debug("Loaded vocab file from {}, get {} tokens".format(args.vocab, len(vocab)))
7171

7272
# create tokenizer
73-
tokenizer = Tokenizer(BPE(str(args.vocab), str(args.merges)))
73+
tokenizer = Tokenizer(BPE.from_file(str(args.vocab), str(args.merges)))
7474
tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=False)
7575
tokenizer.decoder = decoders.ByteLevel()
7676

tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/wikitext2raw.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def configure(self):
5757
self.vocab_file = self.get_value_from_config('vocab_file')
5858
self.merges_file = self.get_value_from_config('merges_file')
5959
self.max_seq_length = int(self.get_value_from_config('max_seq_length'))
60-
self.tokenizer = Tokenizer(BPE(str(self.vocab_file), str(self.merges_file)))
60+
self.tokenizer = Tokenizer(BPE.from_file(str(self.vocab_file), str(self.merges_file)))
6161
self.tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=False)
6262
self.tokenizer.decoder = decoders.ByteLevel()
6363

0 commit comments

Comments
 (0)