wip feb 8/2

joecummings · joecummings · commit 7ba41ca18099 · 2023-02-08T23:35:54.000-05:00
diff --git a/torchtext/prototype/generate.py b/torchtext/prototype/generate.py
@@ -295,6 +295,11 @@ def update_func(emissions, N, T, prev_step_token_idxs, prev_step_hyp_idxs, prev_
                 model_inputs = self.model.prepare_inputs_for_generation(state_and_tokens, **new_model_kwargs)
                 if self.is_huggingface_model:
                     model_inputs.update(self._huggingface_model_input_values)
+                    if len(prev_step_hyp_idxs) > 1 and model_inputs["past_key_values"] is not None:
+                        model_inputs["past_key_values"] = self.model._reorder_cache(
+                            model_inputs["past_key_values"],
+                            torch.Tensor(prev_step_hyp_idxs).to(dtype=torch.int32),  # I think this is correct?
+                        )
 
                 # Forward pass
                 outputs = self.model(**model_inputs)
@@ -306,14 +311,6 @@ def update_func(emissions, N, T, prev_step_token_idxs, prev_step_hyp_idxs, prev_
                 # HF optimizations to reduce overhead in future `forward` calls
                 if self.is_huggingface_model:
                     new_model_kwargs = self._update_model_kwargs_for_generation(outputs, new_model_kwargs)
-                    if new_model_kwargs["past"] is not None and len(prev_step_hyp_idxs) > 1:
-                        if len(prev_step_hyp_idxs) == 9:
-                            import pdb
-                            pdb.set_trace()
-                        new_model_kwargs["past"] = self.model._reorder_cache(
-                            new_model_kwargs["past"],
-                            torch.Tensor(prev_step_hyp_idxs).to(dtype=torch.int32),  # I think this is correct?
-                        )
 
                 # Keep track of probabilities over vocab for this pairing
                 # TODO: clean up duplicate code in these branches