BUG: fix IndexTTS2 on transformes 4.57.1 (#4158)

OliverBryant · web-flow · commit 218e87554b8a · 2025-10-22T12:27:26.000+08:00
diff --git a/xinference/thirdparty/indextts/gpt/transformers_generation_utils.py b/xinference/thirdparty/indextts/gpt/transformers_generation_utils.py
@@ -30,8 +30,12 @@
     DynamicCache,
     EncoderDecoderCache,
     OffloadedCache,
-    QuantizedCacheConfig,
+    QuantizedCache,
     StaticCache,
+    SlidingWindowCache,
+    SinkCache,
+    HybridCache,
+    HybridChunkedCache,
 )
 from transformers.configuration_utils import PretrainedConfig
 from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
@@ -55,13 +59,10 @@
     AssistedCandidateGeneratorDifferentTokenizers,
     CandidateGenerator,
     PromptLookupCandidateGenerator,
-    _crop_past_key_values,
     _prepare_attention_mask,
     _prepare_token_type_ids,
 )
 from transformers.generation.configuration_utils import (
-    NEED_SETUP_CACHE_CLASSES_MAPPING,
-    QUANT_BACKEND_CLASSES_MAPPING,
     GenerationConfig,
     GenerationMode,
 )
@@ -111,6 +112,70 @@
 
 logger = logging.get_logger(__name__)
 
+# Compatibility with transformers 4.57.1+
+# These mappings are needed for the removed constants
+NEED_SETUP_CACHE_CLASSES_MAPPING = {
+    "auto": Cache,
+    "dynamic": DynamicCache,
+    "static": StaticCache,
+    "offloaded": OffloadedCache,
+    "sliding_window": SlidingWindowCache,
+    "sink": SinkCache,
+    "hybrid": HybridCache,
+    "hybrid_chunked": HybridChunkedCache,
+}
+
+# Mapping for quantized cache backends
+QUANT_BACKEND_CLASSES_MAPPING = {
+    "quanto": QuantizedCache,
+    "hqq": QuantizedCache,
+}
+
+# Compatibility class for removed QuantizedCacheConfig
+class QuantizedCacheConfig:
+    def __init__(self, backend: str = "quanto", nbits: int = 4,
+                 axis_key: int = 0, axis_value: int = 0,
+                 q_group_size: int = 64, residual_length: int = 128):
+        self.backend = backend
+        self.nbits = nbits
+        self.axis_key = axis_key
+        self.axis_value = axis_value
+        self.q_group_size = q_group_size
+        self.residual_length = residual_length
+
+# Compatibility function for removed _crop_past_key_values
+def _crop_past_key_values(model, past_key_values, max_length):
+    """
+    Crop past key values to a maximum length.
+    This is a compatibility function for the removed _crop_past_key_values.
+    """
+    if past_key_values is None:
+        return past_key_values
+
+    # If past_key_values is a Cache object
+    if hasattr(past_key_values, 'crop'):
+        return past_key_values.crop(max_length)
+
+    # If it's a tuple of tensors (legacy format)
+    if isinstance(past_key_values, tuple):
+        cropped_past_key_values = []
+        for layer_past_key_values in past_key_values:
+            if isinstance(layer_past_key_values, tuple) and len(layer_past_key_values) == 2:
+                # Standard format: (key, value)
+                key, value = layer_past_key_values
+                if key.shape[-2] > max_length:
+                    key = key[..., :max_length, :]
+                if value.shape[-2] > max_length:
+                    value = value[..., :max_length, :]
+                cropped_past_key_values.append((key, value))
+            else:
+                # Other formats, just append as is
+                cropped_past_key_values.append(layer_past_key_values)
+        return tuple(cropped_past_key_values)
+
+    # For other cache types, return as is
+    return past_key_values
+
 if is_accelerate_available():
     from accelerate.hooks import AlignDevicesHook, add_hook_to_module
 
@@ -1002,7 +1067,8 @@ def _get_logits_processor(
                     device=device,
                 )
             )
-        if generation_config.forced_decoder_ids is not None:
+        # Compatibility with transformers 4.57.1+: forced_decoder_ids has been removed
+        if hasattr(generation_config, 'forced_decoder_ids') and generation_config.forced_decoder_ids is not None:
             # TODO (sanchit): move this exception to GenerationConfig.validate() when TF & FLAX are aligned with PT
             raise ValueError(
                 "You have explicitly specified `forced_decoder_ids`. Please remove the `forced_decoder_ids` argument "
diff --git a/xinference/thirdparty/indextts/gpt/transformers_gpt2.py b/xinference/thirdparty/indextts/gpt/transformers_gpt2.py
@@ -32,7 +32,57 @@
 
 from indextts.gpt.transformers_generation_utils import GenerationMixin
 from indextts.gpt.transformers_modeling_utils import PreTrainedModel
-from transformers.modeling_utils import SequenceSummary
+# SequenceSummary has been removed in transformers 4.57.1+
+# Adding compatibility implementation
+class SequenceSummary(nn.Module):
+    """
+    Compute a single vector summary of a sequence hidden states.
+    """
+    def __init__(self, config):
+        super().__init__()
+        self.summary_type = getattr(config, 'summary_type', 'last')
+        self.summary_use_proj = getattr(config, 'summary_use_proj', True)
+        self.summary_activation = getattr(config, 'summary_activation', None)
+        self.summary_proj_to_labels = getattr(config, 'summary_proj_to_labels', True)
+        self.summary_first_dropout = getattr(config, 'summary_first_dropout', 0.1)
+
+        if self.summary_use_proj:
+            if hasattr(config, 'summary_proj_to_labels') and config.summary_proj_to_labels and config.num_labels > 0:
+                num_classes = config.num_labels
+            else:
+                num_classes = config.hidden_size
+            self.summary = nn.Linear(config.hidden_size, num_classes)
+
+        if hasattr(config, 'summary_activation') and config.summary_activation == 'tanh':
+            self.activation = nn.Tanh()
+        else:
+            self.activation = lambda x: x
+
+        if hasattr(config, 'summary_first_dropout') and config.summary_first_dropout > 0:
+            self.dropout = nn.Dropout(config.summary_first_dropout)
+        else:
+            self.dropout = lambda x: x
+
+    def forward(self, hidden_states, cls_token_index=None):
+        if self.summary_type == 'last':
+            output = hidden_states[:, -1]
+        elif self.summary_type == 'first':
+            output = hidden_states[:, 0]
+        elif self.summary_type == 'mean':
+            output = hidden_states.mean(dim=1)
+        elif self.summary_type == 'cls_index':
+            if cls_token_index is None:
+                raise ValueError("cls_token_index must be specified when summary_type='cls_index'")
+            batch_size = hidden_states.size(0)
+            output = hidden_states[batch_size, cls_token_index]
+        else:
+            output = hidden_states[:, -1]  # fallback to last
+
+        output = self.dropout(output)
+        if self.summary_use_proj:
+            output = self.summary(output)
+        output = self.activation(output)
+        return output
 
 from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask_for_sdpa, _prepare_4d_causal_attention_mask_for_sdpa
 from transformers.modeling_outputs import (