growing seq_len bug fix

Prikhodko Stanislav · Prikhodko Stanislav · commit 9c86510f043d · 2021-05-27T09:03:19.000Z
diff --git a/ctcdecode/__init__.py b/ctcdecode/__init__.py
@@ -225,7 +225,7 @@ def decode(self, probs, states, is_eos_s, seq_lens=None):
         out_seq_len = torch.zeros(batch_size, self._beam_width).cpu().int()
 
         decode_fn = ctc_decode.paddle_beam_decode_with_given_state
-        decode_fn(
+        res = decode_fn(
             probs,
             seq_lens,
             self._num_processes,
@@ -234,10 +234,11 @@ def decode(self, probs, states, is_eos_s, seq_lens=None):
             output,
             timesteps,
             scores,
-            out_seq_len,
+            out_seq_len
         )
+        res = res.int()
 
-        return output, scores, timesteps, out_seq_len
+        return res, scores, timesteps, out_seq_len
 
     def character_based(self):
         return ctc_decode.is_character_based(self._scorer) if self._scorer else None
diff --git a/ctcdecode/src/binding.cpp b/ctcdecode/src/binding.cpp
@@ -150,7 +150,7 @@ void* paddle_get_scorer(double alpha,
 }
 
 
-int beam_decode_with_given_state(at::Tensor th_probs,
+torch::Tensor beam_decode_with_given_state(at::Tensor th_probs,
                 at::Tensor th_seq_lens,
                 size_t num_processes,
                 std::vector<void*> &states,
@@ -185,6 +185,31 @@ int beam_decode_with_given_state(at::Tensor th_probs,
     std::vector<std::vector<std::pair<double, Output>>> batch_results =
     ctc_beam_search_decoder_batch_with_states(inputs, num_processes, states, is_eos_s);
     auto outputs_accessor = th_output.accessor<int, 3>();
+    
+    int max_result_size = 0;
+    int max_output_tokens_size = 0;
+    for (int b = 0; b < batch_results.size(); ++b){
+        std::vector<std::pair<double, Output>> results = batch_results[b];
+        if (batch_results[b].size() > max_result_size) {
+            max_result_size = batch_results[b].size();
+        }
+        for (int p = 0; p < results.size();++p){
+            std::pair<double, Output> n_path_result = results[p];
+            Output output = n_path_result.second;
+            std::vector<int> output_tokens = output.tokens;
+            std::vector<int> output_timesteps = output.timesteps;
+            
+            if (output_tokens.size() > max_output_tokens_size) {
+            max_output_tokens_size = output_tokens.size();
+        }
+        }
+        }
+    
+    torch::Tensor tensor = torch::randint(1, {batch_results.size(), max_result_size, max_output_tokens_size});
+    // cout << batch_results.size() << endl;
+    // cout << max_result_size << endl; 
+    // cout << max_output_tokens_size << endl; 
+
     auto timesteps_accessor =  th_timesteps.accessor<int, 3>();
     auto scores_accessor =  th_scores.accessor<float, 2>();
     auto out_length_accessor =  th_out_length.accessor<int, 2>();
@@ -205,11 +230,11 @@ int beam_decode_with_given_state(at::Tensor th_probs,
             std::vector<int> output_tokens = output.tokens;
             std::vector<int> output_timesteps = output.timesteps;
             for (int t = 0; t < output_tokens.size(); ++t) {
-                if (t < outputs_accessor.size(2)) {
-                    outputs_accessor[b][p][t] =  output_tokens[t]; // fill output tokens
+                if (t < tensor.size(2)) {
+                    tensor[b][p][t] =  output_tokens[t]; // fill output tokens
                 }
                 // else {
-                //     std::cerr << "Unsupported size: t >= outputs_accessor.size(2)\n";
+                //     std::cerr << "Unsupported size: t >= tensor.size(2)\n";
                 // }
 
                 if  (t < timesteps_accessor.size(2)) {
@@ -225,12 +250,13 @@ int beam_decode_with_given_state(at::Tensor th_probs,
         }
     }
 
-    
-    return 1;
+    // torch::Tensor int_tensor = tensor.to(torch::kInt32);
+
+    return tensor;
 }
 
 
-int paddle_beam_decode_with_given_state(at::Tensor th_probs,
+torch::Tensor paddle_beam_decode_with_given_state(at::Tensor th_probs,
                           at::Tensor th_seq_lens,
                           size_t num_processes,
                           std::vector<void*> states,
diff --git a/setup.py b/setup.py
@@ -131,7 +131,7 @@ def _single_compile(obj):
 
 setup(
     name="ctcdecode",
-    version="1.1.0",
+    version="1.0.3",
     description="CTC Decoder for PyTorch based on Paddle Paddle's implementation",
     url="https://github.com/parlance/ctcdecode",
     author="Ryan Leary",
diff --git a/tests/test_decode.py b/tests/test_decode.py
@@ -179,12 +179,36 @@ def test_online_decoder_decoding_with_two_calls_no_lm(self):
         )
 
         del state1, state2
-
+        size = beam_results.shape
         output_str1 = self.convert_to_string(beam_results[0][0], self.vocab_list, out_seq_len[0][0])
         output_str2 = self.convert_to_string(beam_results[1][0], self.vocab_list, out_seq_len[1][0])
 
         self.assertEqual(output_str1, self.beam_search_result[0])
         self.assertEqual(output_str2, self.beam_search_result[1])
+    
+    def test_online_decoder_decoding_with_a_lot_calls_no_lm_check_size(self):
+        decoder = ctcdecode.OnlineCTCBeamDecoder(
+            self.vocab_list,
+            beam_width=self.beam_size,
+            blank_id=self.vocab_list.index("_"),
+            log_probs_input=True,
+            num_processes=24,
+        )
+        state1 = ctcdecode.DecoderState(decoder)
+
+        probs_seq = torch.FloatTensor([self.probs_seq1]).log()
+
+        for i in range(1000):
+            beam_results, beam_scores, timesteps, out_seq_len = decoder.decode(
+                probs_seq, [state1], [False, False]
+            )
+
+        beam_results, beam_scores, timesteps, out_seq_len = decoder.decode(
+            probs_seq, [state1], [True, True]
+        )
+
+        del state1
+        self.assertGreaterEqual(beam_results.shape[2], out_seq_len.max())
 
 
 if __name__ == "__main__":