|
40 | 40 | logging, |
41 | 41 | replace_example_docstring, |
42 | 42 | ) |
43 | | -from ...utils.import_utils import is_transformers_version |
44 | 43 | from ...utils.torch_utils import randn_tensor |
45 | 44 | from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline |
46 | 45 | from .modeling_audioldm2 import AudioLDM2ProjectionModel, AudioLDM2UNet2DConditionModel |
@@ -313,19 +312,8 @@ def generate_language_model( |
313 | 312 | `inputs_embeds (`torch.Tensor` of shape `(batch_size, sequence_length, hidden_size)`): |
314 | 313 | The sequence of generated hidden-states. |
315 | 314 | """ |
316 | | - cache_position_kwargs = {} |
317 | | - if is_transformers_version("<", "4.52.0.dev0"): |
318 | | - cache_position_kwargs["input_ids"] = inputs_embeds |
319 | | - cache_position_kwargs["model_kwargs"] = model_kwargs |
320 | | - else: |
321 | | - cache_position_kwargs["seq_length"] = inputs_embeds.shape[0] |
322 | | - cache_position_kwargs["device"] = ( |
323 | | - self.language_model.device if getattr(self, "language_model", None) is not None else self.device |
324 | | - ) |
325 | | - cache_position_kwargs["model_kwargs"] = model_kwargs |
326 | 315 | max_new_tokens = max_new_tokens if max_new_tokens is not None else self.language_model.config.max_new_tokens |
327 | | - model_kwargs = self.language_model._get_initial_cache_position(**cache_position_kwargs) |
328 | | - |
| 316 | + model_kwargs = self.language_model._get_initial_cache_position(inputs_embeds, model_kwargs) |
329 | 317 | for _ in range(max_new_tokens): |
330 | 318 | # prepare model inputs |
331 | 319 | model_inputs = prepare_inputs_for_generation(inputs_embeds, **model_kwargs) |
|
0 commit comments