repro unbounded beam_idx

joecummings · joecummings · commit 208f2eadb2dc · 2023-02-10T11:19:12.000-05:00
diff --git a/torchtext/prototype/generate.py b/torchtext/prototype/generate.py
@@ -295,7 +295,27 @@ def update_func(emissions, N, T, prev_step_token_idxs, prev_step_hyp_idxs, prev_
                     model_inputs.update(self._huggingface_model_input_values)
                     if len(prev_step_hyp_idxs) > 1 and model_kwargs["past"] is not None:
                         beam_idxs = torch.Tensor(prev_step_hyp_idxs).to(dtype=torch.int32)
-                        model_inputs["past_key_values"] = self.model._reorder_cache(model_kwargs["past"], beam_idxs)
+
+                        # We could store this in model_kwargs
+                        num_hyps_in_prev_step = model_kwargs["past"][0][0].shape[0]
+                        
+                        num_finished_hyps_in_step = num_hyps_in_prev_step - len(prev_step_hyp_idxs)
+                        if num_finished_hyps_in_step > 0:
+                            beam_idxs = F.pad(beam_idxs, (0, num_finished_hyps_in_step), "constant", 0)
+                        
+                        reordered_cached = model_kwargs["past"] #self.model._reorder_cache(model_kwargs["past"], beam_idxs)
+
+                        if num_finished_hyps_in_step > 0:
+                            sliced_cache = ()
+                            for states in reordered_cached:
+                                sliced_state = ()
+                                for state in states:
+                                    sliced_state = sliced_state + (state[:len(prev_step_hyp_idxs)],)
+                                sliced_cache = sliced_cache + (sliced_state,)
+                            reordered_cached = sliced_cache
+
+                        model_inputs["past_key_values"] = reordered_cached
+
 
                 # Forward pass
                 outputs = self.model(**model_inputs)