Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions examples/legacy/run_chinese_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ def _is_chinese_char(cp):
# like the all of the other languages.
if (
(cp >= 0x4E00 and cp <= 0x9FFF)
or (cp >= 0x3400 and cp <= 0x4DBF) #
or (cp >= 0x20000 and cp <= 0x2A6DF) #
or (cp >= 0x2A700 and cp <= 0x2B73F) #
or (cp >= 0x2B740 and cp <= 0x2B81F) #
or (cp >= 0x2B820 and cp <= 0x2CEAF) #
or (cp >= 0x3400 and cp <= 0x4DBF)
or (cp >= 0x20000 and cp <= 0x2A6DF)
or (cp >= 0x2A700 and cp <= 0x2B73F)
or (cp >= 0x2B740 and cp <= 0x2B81F)
or (cp >= 0x2B820 and cp <= 0x2CEAF)
or (cp >= 0xF900 and cp <= 0xFAFF)
or (cp >= 0x2F800 and cp <= 0x2FA1F) #
): #
or (cp >= 0x2F800 and cp <= 0x2FA1F)
):
return True

return False
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/commands/add_new_model_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def visit_SimpleStatementLine(self, node: cst.SimpleStatementLine):
""".lstrip()


class ModelInfos(object):
class ModelInfos:
"""
Retrieve the basic informations about an existing model classes.
"""
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/commands/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@
import string
import time
from argparse import ArgumentParser, Namespace
from collections.abc import AsyncIterator
from dataclasses import dataclass, field
from threading import Thread
from typing import AsyncIterator, Optional
from typing import Optional

import yaml
from huggingface_hub import AsyncInferenceClient, ChatCompletionStreamOutput
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/commands/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@
import threading
import time
from argparse import ArgumentParser, Namespace
from collections.abc import Generator, Iterable
from dataclasses import dataclass, field
from io import BytesIO
from threading import Thread
from typing import Generator, Iterable, Optional, Union
from typing import Optional, Union

from huggingface_hub import model_info
from huggingface_hub.constants import HF_HUB_OFFLINE
Expand Down
4 changes: 1 addition & 3 deletions src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1615,9 +1615,7 @@ def _find_mismatched_keys(
# This skips size mismatches for 4-bit weights. Two 4-bit values share an 8-bit container, causing size differences.
# Without matching with module type or parameter type it seems like a practical way to detect valid 4bit weights.
if not (
is_quantized
and new_state_dict[key].shape[-1] == 1
and new_state_dict[key].numel() * 2 == model_state_dict[key].numel()
is_quantized and tensor.shape[-1] == 1 and tensor.numel() * 2 == model_state_dict[key].numel()
):
mismatched_keys.append(key)
mismatched_shapes.append((tensor.shape, model_state_dict[key].shape))
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/convnext/modeling_tf_convnext.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def call(

# Change the other hidden state outputs to NCHW as well
if output_hidden_states:
hidden_states = tuple([tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1]])
hidden_states = tuple(tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1])

if not return_dict:
hidden_states = hidden_states if output_hidden_states else ()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def call(

# Change the other hidden state outputs to NCHW as well
if output_hidden_states:
hidden_states = tuple([tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1]])
hidden_states = tuple(tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1])

if not return_dict:
hidden_states = hidden_states if output_hidden_states else ()
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/cvt/modeling_tf_cvt.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ def call(
# Change back to (batch_size, num_channels, height, width) format to have uniformity in the modules
hidden_state = tf.transpose(hidden_state, perm=(0, 3, 1, 2))
if output_hidden_states:
all_hidden_states = tuple([tf.transpose(hs, perm=(0, 3, 1, 2)) for hs in all_hidden_states])
all_hidden_states = tuple(tf.transpose(hs, perm=(0, 3, 1, 2)) for hs in all_hidden_states)

if not return_dict:
return tuple(v for v in [hidden_state, cls_token, all_hidden_states] if v is not None)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def __init__(
if image_mean is None:
self.background_color = (127, 127, 127)
else:
self.background_color = tuple([int(x * 255) for x in image_mean])
self.background_color = tuple(int(x * 255) for x in image_mean)

def resize(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(self, **kwargs: Unpack[DeepseekVLFastImageProcessorKwargs]):
if kwargs.get("image_mean") is None:
background_color = (127, 127, 127)
else:
background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
background_color = tuple(int(x * 255) for x in kwargs.get("image_mean"))
self.background_color = tuple(background_color)

def resize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,12 @@ def __init__(
if image_mean is None:
self.background_color = (127, 127, 127)
else:
self.background_color = tuple([int(x * 255) for x in image_mean])
self.background_color = tuple(int(x * 255) for x in image_mean)

if high_res_image_mean is None:
self.high_res_background_color = (127, 127, 127)
else:
self.high_res_background_color = tuple([int(x * 255) for x in high_res_image_mean])
self.high_res_background_color = tuple(int(x * 255) for x in high_res_image_mean)

def resize(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def __init__(self, **kwargs: Unpack[DeepseekVLHybridFastImageProcessorKwargs]):
if kwargs.get("high_res_image_mean") is None:
high_res_background_color = (127, 127, 127)
else:
high_res_background_color = tuple([int(x * 255) for x in kwargs.get("high_res_image_mean")])
high_res_background_color = tuple(int(x * 255) for x in kwargs.get("high_res_image_mean"))
super().__init__(**kwargs)
self.background_color = tuple(background_color)
self.high_res_background_color = tuple(high_res_background_color)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def __init__(
if high_res_image_mean is None:
self.high_res_background_color = (127, 127, 127)
else:
self.high_res_background_color = tuple([int(x * 255) for x in high_res_image_mean])
self.high_res_background_color = tuple(int(x * 255) for x in high_res_image_mean)

@filter_out_non_signature_kwargs()
def preprocess(
Expand Down Expand Up @@ -756,7 +756,7 @@ def __init__(self, **kwargs: Unpack[DeepseekVLHybridFastImageProcessorKwargs]):
if kwargs.get("high_res_image_mean") is None:
high_res_background_color = (127, 127, 127)
else:
high_res_background_color = tuple([int(x * 255) for x in kwargs.get("high_res_image_mean")])
high_res_background_color = tuple(int(x * 255) for x in kwargs.get("high_res_image_mean"))
DeepseekVLImageProcessorFast().__init__(**kwargs)
self.background_color = tuple(background_color)
self.high_res_background_color = tuple(high_res_background_color)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ def call(
# The hidden states are in (batch_size, height, width, num_channels)
# shape after all stages except the MB3D blocks.
if output_hidden_states:
hidden_states = tuple([tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1][:-1]]) + (
hidden_states = tuple(tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1][:-1]) + (
encoder_outputs[1][-1],
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,7 @@ def forward(
pasts_or_spout_value = torch.split(pasts_or_spout_value, [1] * self.config.num_layers, dim=1)
# make same shape as past_key_values
pasts_or_spout_value = tuple(
tuple([b.squeeze(1) for b in torch.split(a.squeeze(1), [1, 1], dim=1)]) for a in pasts_or_spout_value
tuple(b.squeeze(1) for b in torch.split(a.squeeze(1), [1, 1], dim=1)) for a in pasts_or_spout_value
)
else:
pasts_or_spout_value = [None] * self.config.num_layers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,7 @@ def call(
if not isinstance(encoder_outputs, tuple):
encoder_outputs = encoder_outputs.to_tuple()
output = (loss, logits, past_key_values) + decoder_outputs[start_index:] + encoder_outputs
output = tuple([x for x in output if x is not None])
output = tuple(x for x in output if x is not None)
return output

return TFSeq2SeqLMOutput(
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/esm/openfold_utils/chunk_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def chunk_layer(
raise ValueError("Must provide at least one input")

initial_dims = [shape[:no_batch_dims] for shape in _fetch_dims(inputs)]
orig_batch_dims = tuple([max(s) for s in zip(*initial_dims)])
orig_batch_dims = tuple(max(s) for s in zip(*initial_dims))

def _prep_inputs(t: torch.Tensor) -> torch.Tensor:
if not low_mem:
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/janus/image_processing_janus.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def __init__(
if image_mean is None:
self.background_color = (127, 127, 127)
else:
self.background_color = tuple([int(x * 255) for x in image_mean])
self.background_color = tuple(int(x * 255) for x in image_mean)

def resize(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, **kwargs: Unpack[JanusFastImageProcessorKwargs]):
if kwargs.get("image_mean") is None:
background_color = (127, 127, 127)
else:
background_color = tuple([int(x * 255) for x in kwargs.get("image_mean")])
background_color = tuple(int(x * 255) for x in kwargs.get("image_mean"))
super().__init__(**kwargs)
self.background_color = tuple(background_color)

Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/janus/modular_janus.py
Original file line number Diff line number Diff line change
Expand Up @@ -1356,7 +1356,7 @@ def __init__(
if image_mean is None:
self.background_color = (127, 127, 127)
else:
self.background_color = tuple([int(x * 255) for x in image_mean])
self.background_color = tuple(int(x * 255) for x in image_mean)

def pad_to_square(
self,
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/led/modeling_led.py
Original file line number Diff line number Diff line change
Expand Up @@ -1592,10 +1592,10 @@ def forward(
# unpad `hidden_states` because the calling function is expecting a length == input_ids.size(1)
hidden_states = hidden_states[:, :-padding_len]
if output_hidden_states:
encoder_states = tuple([state[:, :-padding_len] for state in encoder_states])
encoder_states = tuple(state[:, :-padding_len] for state in encoder_states)

if output_attentions:
all_attentions = tuple([state[:, :, :-padding_len, :] for state in all_attentions])
all_attentions = tuple(state[:, :, :-padding_len, :] for state in all_attentions)

if not return_dict:
return tuple(
Expand Down
4 changes: 1 addition & 3 deletions src/transformers/models/led/modeling_tf_led.py
Original file line number Diff line number Diff line change
Expand Up @@ -1907,9 +1907,7 @@ def call(
# undo padding
if output_attentions:
all_attentions = (
tuple([state[:, :, :-padding_len, :] for state in all_attentions])
if padding_len > 0
else all_attentions
tuple(state[:, :, :-padding_len, :] for state in all_attentions) if padding_len > 0 else all_attentions
)

if output_hidden_states:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/longformer/modeling_longformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1283,10 +1283,10 @@ def forward(
# unpad `hidden_states` because the calling function is expecting a length == input_ids.size(1)
hidden_states = hidden_states[:, : hidden_states.shape[1] - padding_len]
if output_hidden_states:
all_hidden_states = tuple([state[:, : state.shape[1] - padding_len] for state in all_hidden_states])
all_hidden_states = tuple(state[:, : state.shape[1] - padding_len] for state in all_hidden_states)

if output_attentions:
all_attentions = tuple([state[:, :, : state.shape[2] - padding_len, :] for state in all_attentions])
all_attentions = tuple(state[:, :, : state.shape[2] - padding_len, :] for state in all_attentions)

if not return_dict:
return tuple(
Expand Down
4 changes: 1 addition & 3 deletions src/transformers/models/longformer/modeling_tf_longformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1706,9 +1706,7 @@ def call(
hidden_states = hidden_states[:, :-padding_len] if padding_len > 0 else hidden_states
if output_attentions:
all_attentions = (
tuple([state[:, :, :-padding_len, :] for state in all_attentions])
if padding_len > 0
else all_attentions
tuple(state[:, :, :-padding_len, :] for state in all_attentions) if padding_len > 0 else all_attentions
)

if not return_dict:
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/mobilevit/modeling_tf_mobilevit.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ def call(
if not self.expand_output:
remaining_encoder_outputs = encoder_outputs[1:]
remaining_encoder_outputs = tuple(
[tf.transpose(h, perm=(0, 3, 1, 2)) for h in remaining_encoder_outputs[0]]
tf.transpose(h, perm=(0, 3, 1, 2)) for h in remaining_encoder_outputs[0]
)
remaining_encoder_outputs = (remaining_encoder_outputs,)
return output + remaining_encoder_outputs
Expand All @@ -878,7 +878,7 @@ def call(

# Change the other hidden state outputs to NCHW as well
if output_hidden_states:
hidden_states = tuple([tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1]])
hidden_states = tuple(tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1])

return TFBaseModelOutputWithPooling(
last_hidden_state=last_hidden_state,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2044,7 +2044,7 @@ def forward(
raise Exception("target_values cannot be negative for negative_binomial distribution.")
distribution = self.distribution_output.distribution(y_hat)
# y_hat should be a 2-tuple, each with dimension [bs, num_targets]
y_hat = tuple([item.view(-1, self.config.num_targets) for item in y_hat])
y_hat = tuple(item.view(-1, self.config.num_targets) for item in y_hat)
loss_val = loss(distribution, target_values)
# take average of the loss
loss_val = weighted_average(loss_val)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/patchtst/modeling_patchtst.py
Original file line number Diff line number Diff line change
Expand Up @@ -1884,7 +1884,7 @@ def forward(
if self.distribution_output:
distribution = self.distribution_output.distribution(y_hat)
# y_hat should be a 2-tuple, each with dimension [bs, num_targets]
y_hat = tuple([item.view(-1, self.config.num_targets) for item in y_hat])
y_hat = tuple(item.view(-1, self.config.num_targets) for item in y_hat)
loss = nll(distribution, target_values)
# take average of the loss
loss = weighted_average(loss)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
Processor class for PerceptionLM.
"""

from typing import Iterable, Union
from collections.abc import Iterable
from typing import Union

import numpy as np

Expand Down
3 changes: 2 additions & 1 deletion src/transformers/models/reformer/modeling_reformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@

import sys
from collections import namedtuple
from collections.abc import Iterable
from dataclasses import dataclass
from functools import reduce
from operator import mul
from typing import Any, Iterable, Optional, Union
from typing import Any, Optional, Union

import numpy as np
import torch
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/regnet/modeling_tf_regnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ def call(

# Change the other hidden state outputs to NCHW as well
if output_hidden_states:
hidden_states = tuple([tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1]])
hidden_states = tuple(tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1])

if not return_dict:
return (last_hidden_state, pooled_output) + encoder_outputs[1:]
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/segformer/modeling_tf_segformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ def call(

# Change the other hidden state outputs to NCHW as well
if output_hidden_states:
hidden_states = tuple([tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1]])
hidden_states = tuple(tf.transpose(h, perm=(0, 3, 1, 2)) for h in encoder_outputs[1])

if not return_dict:
if tf.greater(len(encoder_outputs[1:]), 0):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/superglue/modeling_superglue.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def concat_pairs(tensor_tuple0: tuple[torch.Tensor], tensor_tuple1: tuple[torch.
Returns:
(`tuple[torch.Tensor]`): Tuple of concatenated tensors.
"""
return tuple([torch.cat([tensor0, tensor1]) for tensor0, tensor1 in zip(tensor_tuple0, tensor_tuple1)])
return tuple(torch.cat([tensor0, tensor1]) for tensor0, tensor1 in zip(tensor_tuple0, tensor_tuple1))


def normalize_keypoints(keypoints: torch.Tensor, height: int, width: int) -> torch.Tensor:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ def call(
if not isinstance(encoder_outputs, tuple):
encoder_outputs = encoder_outputs.to_tuple()
output = (loss, logits, past_key_values) + decoder_outputs[start_index:] + encoder_outputs
output = tuple([x for x in output if x is not None])
output = tuple(x for x in output if x is not None)
return output

return TFSeq2SeqLMOutput(
Expand Down
10 changes: 5 additions & 5 deletions src/transformers/models/zamba2/modeling_zamba2.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,13 +726,13 @@ def torch_forward(self, input_states, cache_params: Optional[Zamba2HybridDynamic
dtype = input_states.dtype
# Gated MLP's linear projection
if cache_params is not None and cache_params.has_previous_state:
projected_states = self.in_proj(input_states.squeeze(1))
projected_states = self.in_proj(input_states.squeeze(1))
else:
if attention_mask is not None and not torch.all(attention_mask==1):
# tune out hidden states for pad tokens, see https://github.com/state-spaces/mamba/issues/66
input_states = (input_states * attention_mask[:, :, None]).to(dtype)
projected_states = self.in_proj(input_states)
d_mlp = (projected_states.shape[-1] - 2 * self.intermediate_size - 2 * self.n_groups * self.ssm_state_size- self.num_heads) // 2
# tune out hidden states for pad tokens, see https://github.com/state-spaces/mamba/issues/66
input_states = (input_states * attention_mask[:, :, None]).to(dtype)
projected_states = self.in_proj(input_states)
d_mlp = (projected_states.shape[-1] - 2 * self.intermediate_size - 2 * self.n_groups * self.ssm_state_size- self.num_heads) // 2
_, _, gate, hidden_states, dt = projected_states.split(
[d_mlp, d_mlp, self.intermediate_size, self.conv_dim, self.num_heads], dim=-1
)
Expand Down
10 changes: 5 additions & 5 deletions src/transformers/models/zamba2/modular_zamba2.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,13 +510,13 @@ def torch_forward(self, input_states, cache_params: Optional[Zamba2HybridDynamic
dtype = input_states.dtype
# Gated MLP's linear projection
if cache_params is not None and cache_params.has_previous_state:
projected_states = self.in_proj(input_states.squeeze(1))
projected_states = self.in_proj(input_states.squeeze(1))
else:
if attention_mask is not None and not torch.all(attention_mask==1):
# tune out hidden states for pad tokens, see https://github.com/state-spaces/mamba/issues/66
input_states = (input_states * attention_mask[:, :, None]).to(dtype)
projected_states = self.in_proj(input_states)
d_mlp = (projected_states.shape[-1] - 2 * self.intermediate_size - 2 * self.n_groups * self.ssm_state_size- self.num_heads) // 2
# tune out hidden states for pad tokens, see https://github.com/state-spaces/mamba/issues/66
input_states = (input_states * attention_mask[:, :, None]).to(dtype)
projected_states = self.in_proj(input_states)
d_mlp = (projected_states.shape[-1] - 2 * self.intermediate_size - 2 * self.n_groups * self.ssm_state_size- self.num_heads) // 2
_, _, gate, hidden_states, dt = projected_states.split(
[d_mlp, d_mlp, self.intermediate_size, self.conv_dim, self.num_heads], dim=-1
)
Expand Down
Loading