Skip to content

Commit 10d73fa

Browse files
authored
Merge pull request #47 from todtom/patch-1
Update nlp.py --> Support chinese
2 parents fbd2d3a + 8dd401a commit 10d73fa

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

tensorlayer/nlp.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,7 @@ def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")):
744744
- Code from ``/tensorflow/models/rnn/translation/data_utils.py``
745745
"""
746746
words = []
747+
sentence = tf.compat.as_bytes(sentence)
747748
for space_separated_fragment in sentence.strip().split():
748749
words.extend(re.split(_WORD_SPLIT, space_separated_fragment))
749750
return [w for w in words if w]
@@ -840,7 +841,7 @@ def initialize_vocabulary(vocabulary_path):
840841
rev_vocab = []
841842
with gfile.GFile(vocabulary_path, mode="rb") as f:
842843
rev_vocab.extend(f.readlines())
843-
rev_vocab = [line.strip() for line in rev_vocab]
844+
rev_vocab = [tf.compat.as_bytes(line.strip()) for line in rev_vocab]
844845
vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)])
845846
return vocab, rev_vocab
846847
else:

0 commit comments

Comments
 (0)