Skip to content

Commit 1a6b4e6

Browse files
committed
[compat] Introduce Transformers v5.2 compatibility: trainer _nested_gather moved (#3664)
* Introduce Transformers v5.2 compatibility: trainer _nested_gather moved * Replace prajjwal1/bert-tiny due to issues loading with AutoConfig * Disable the transformers progress bars in the CI The weight loading progress bars heavily expand the logs
1 parent f7f7506 commit 1a6b4e6

File tree

5 files changed

+23
-11
lines changed

5 files changed

+23
-11
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ on:
1313

1414
env:
1515
TRANSFORMERS_IS_CI: 1
16+
HF_HUB_DISABLE_PROGRESS_BARS: 1 # The Transformers v5 weight loading progress bars heavily expand the logs
1617

1718
jobs:
1819
test_sampling:

sentence_transformers/trainer.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,16 @@ def log(self, logs: dict[str, float], start_time: float | None = None) -> None:
475475
# If we don't copy the logs, we'll include the loss components in the on_evaluate as well,
476476
# whereas we prefer to have them only in the on_log
477477
logs = logs.copy()
478-
accum_losses = self._nested_gather(self.accum_loss_components[training_type])
478+
# Transformers v4/v5 compatibility: v5.2 moves _nested_gather to `transformers.trainer_pt_utils`,
479+
# see https://github.com/huggingface/transformers/pull/43744
480+
if hasattr(self, "_nested_gather"):
481+
accum_losses = self._nested_gather(self.accum_loss_components[training_type])
482+
else:
483+
from transformers.trainer_pt_utils import nested_gather
484+
485+
accum_losses = nested_gather(
486+
self.accum_loss_components[training_type], parallel_mode=self.args.parallel_mode
487+
)
479488
if "steps" in accum_losses:
480489
steps = accum_losses.get("steps").sum().item()
481490
self.accum_loss_components[training_type]["steps"] *= 0

tests/cross_encoder/test_cross_encoder.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def test_target_device_backwards_compat():
258258

259259

260260
def test_num_labels_fresh_model():
261-
model = CrossEncoder("prajjwal1/bert-tiny")
261+
model = CrossEncoder("sentence-transformers-testing/stsb-bert-tiny-safetensors")
262262
assert model.num_labels == 1
263263

264264

@@ -542,7 +542,9 @@ def test_logger_warning(caplog):
542542
],
543543
)
544544
def test_load_activation_fn_from_kwargs(num_labels: int, activation_fn: str, saved_activation_fn: str, tmp_path: Path):
545-
model = CrossEncoder("prajjwal1/bert-tiny", num_labels=num_labels, activation_fn=activation_fn)
545+
model = CrossEncoder(
546+
"sentence-transformers-testing/stsb-bert-tiny-safetensors", num_labels=num_labels, activation_fn=activation_fn
547+
)
546548
assert fullname(model.activation_fn) == saved_activation_fn
547549

548550
model.save_pretrained(tmp_path)

tests/cross_encoder/test_model_card.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def dummy_dataset():
4646
"- sentence-transformers",
4747
"- cross-encoder",
4848
"pipeline_tag: text-ranking",
49-
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [prajjwal1/bert-tiny](https://huggingface.co/prajjwal1/bert-tiny)",
49+
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [sentence-transformers-testing/stsb-bert-tiny-safetensors](https://huggingface.co/sentence-transformers-testing/stsb-bert-tiny-safetensors)",
5050
"[sentence-transformers](https://www.SBERT.net) library",
5151
"It computes scores for pairs of texts, which can be used for text reranking and semantic search.",
5252
"**Maximum Sequence Length:** 512 tokens",
@@ -71,7 +71,7 @@ def dummy_dataset():
7171
"- sentence-transformers",
7272
"- cross-encoder",
7373
"pipeline_tag: text-classification",
74-
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [prajjwal1/bert-tiny](https://huggingface.co/prajjwal1/bert-tiny)",
74+
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [sentence-transformers-testing/stsb-bert-tiny-safetensors](https://huggingface.co/sentence-transformers-testing/stsb-bert-tiny-safetensors)",
7575
"[sentence-transformers](https://www.SBERT.net) library",
7676
"It computes scores for pairs of texts, which can be used for text pair classification.",
7777
"**Maximum Sequence Length:** 512 tokens",
@@ -91,15 +91,15 @@ def dummy_dataset():
9191
1,
9292
1,
9393
[
94-
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [prajjwal1/bert-tiny](https://huggingface.co/prajjwal1/bert-tiny) on the train_0 dataset using the [sentence-transformers](https://www.SBERT.net) library.",
94+
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [sentence-transformers-testing/stsb-bert-tiny-safetensors](https://huggingface.co/sentence-transformers-testing/stsb-bert-tiny-safetensors) on the train_0 dataset using the [sentence-transformers](https://www.SBERT.net) library.",
9595
"#### train_0",
9696
],
9797
),
9898
(
9999
2,
100100
1,
101101
[
102-
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [prajjwal1/bert-tiny](https://huggingface.co/prajjwal1/bert-tiny) on the train_0 and train_1 datasets using the [sentence-transformers](https://www.SBERT.net) library.",
102+
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [sentence-transformers-testing/stsb-bert-tiny-safetensors](https://huggingface.co/sentence-transformers-testing/stsb-bert-tiny-safetensors) on the train_0 and train_1 datasets using the [sentence-transformers](https://www.SBERT.net) library.",
103103
"#### train_0",
104104
"#### train_1",
105105
],
@@ -108,7 +108,7 @@ def dummy_dataset():
108108
10,
109109
1,
110110
[
111-
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [prajjwal1/bert-tiny](https://huggingface.co/prajjwal1/bert-tiny) on the train_0, train_1, train_2, train_3, train_4, train_5, train_6, train_7, train_8 and train_9 datasets using the [sentence-transformers](https://www.SBERT.net) library.",
111+
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [sentence-transformers-testing/stsb-bert-tiny-safetensors](https://huggingface.co/sentence-transformers-testing/stsb-bert-tiny-safetensors) on the train_0, train_1, train_2, train_3, train_4, train_5, train_6, train_7, train_8 and train_9 datasets using the [sentence-transformers](https://www.SBERT.net) library.",
112112
"<details><summary>train_0</summary>", # We start using <details><summary> if we have more than 3 datasets
113113
"#### train_0",
114114
"</details>\n<details><summary>train_9</summary>",
@@ -120,7 +120,7 @@ def dummy_dataset():
120120
50,
121121
1,
122122
[
123-
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [prajjwal1/bert-tiny](https://huggingface.co/prajjwal1/bert-tiny) on 50 datasets using the [sentence-transformers](https://www.SBERT.net) library.",
123+
"This is a [Cross Encoder](https://www.sbert.net/docs/cross_encoder/usage/usage.html) model finetuned from [sentence-transformers-testing/stsb-bert-tiny-safetensors](https://huggingface.co/sentence-transformers-testing/stsb-bert-tiny-safetensors) on 50 datasets using the [sentence-transformers](https://www.SBERT.net) library.",
124124
"<details><summary>train_0</summary>",
125125
"#### train_0",
126126
"</details>\n<details><summary>train_49</summary>",
@@ -135,7 +135,7 @@ def test_model_card_base(
135135
num_labels: int,
136136
expected_substrings: list[str],
137137
) -> None:
138-
model = CrossEncoder("prajjwal1/bert-tiny", num_labels=num_labels)
138+
model = CrossEncoder("sentence-transformers-testing/stsb-bert-tiny-safetensors", num_labels=num_labels)
139139

140140
# Let's avoid requesting the Hub for e.g. checking if a base model exists there
141141
model.model_card_data.local_files_only = True

tests/sparse_encoder/test_sparse_encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ def test_detect_mlm():
472472
def test_default_to_csr():
473473
# NOTE: bert-tiny is actually MLM-based, but the config isn't modern enough to allow us to detect it,
474474
# so we should default to CSR here.
475-
model = SparseEncoder("prajjwal1/bert-tiny")
475+
model = SparseEncoder("sentence-transformers-testing/stsb-bert-tiny-safetensors")
476476
assert isinstance(model[0], Transformer)
477477
assert isinstance(model[1], Pooling)
478478
assert isinstance(model[2], SparseAutoEncoder)

0 commit comments

Comments
 (0)