diff --git a/LanguageModel.lua b/LanguageModel.lua index d6248184..ae6cce8b 100644 --- a/LanguageModel.lua +++ b/LanguageModel.lua @@ -5,6 +5,7 @@ require 'VanillaRNN' require 'LSTM' local utils = require 'util.utils' +local utf8 = require 'lua-utf8' local LM, parent = torch.class('nn.LanguageModel', 'nn.Module') @@ -122,9 +123,9 @@ end function LM:encode_string(s) - local encoded = torch.LongTensor(#s) - for i = 1, #s do - local token = s:sub(i, i) + local encoded = torch.LongTensor(utf8.len(s)) + for i = 1, utf8.len(s) do + local token = utf8.sub(s, i, i) local idx = self.token_to_idx[token] assert(idx ~= nil, 'Got invalid idx') encoded[i] = idx diff --git a/sample.lua b/sample.lua index 4e6ebae0..18a8a668 100644 --- a/sample.lua +++ b/sample.lua @@ -15,7 +15,6 @@ cmd:option('-gpu_backend', 'cuda') cmd:option('-verbose', 0) local opt = cmd:parse(arg) - local checkpoint = torch.load(opt.checkpoint) local model = checkpoint.model diff --git a/torch-rnn-scm-1.rockspec b/torch-rnn-scm-1.rockspec index 003d1f22..ac1ffefc 100644 --- a/torch-rnn-scm-1.rockspec +++ b/torch-rnn-scm-1.rockspec @@ -14,6 +14,7 @@ description = { dependencies = { "torch >= 7.0", "nn >= 1.0", + "luautf8 >= 1.2", } build = { type = "builtin", @@ -23,4 +24,4 @@ build = { ["torch-rnn.VanillaRNN"] = "VanillaRNN.lua", ["torch-rnn.TemporalCrossEntropyCriterion"] = "TemporalCrossEntropyCriterion.lua", } -} \ No newline at end of file +}