switch to 4.53.3 on CI (#193)

xadupre · web-flow · commit 1158ecae9bc6 · 2025-07-25T18:59:31.000+02:00
* switch to 4.53.3 on CI

* cache a config file

* action

* lower atol
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ jobs:
       matrix:
         os: [ubuntu-latest]
         python: ['3.10', '3.11', '3.12', '3.13']
-        transformers: ['4.48.3', '4.51.3', '4.52.4', '4.53.1', 'main']
+        transformers: ['4.48.3', '4.51.3', '4.52.4', '4.53.3', 'main']
         torch: ['2.7', 'main']
         exclude:
           - python: '3.10'
@@ -28,7 +28,7 @@ jobs:
           - python: '3.10'
             transformers: 'main'
           - python: '3.11'
-            transformers: '4.53.1'
+            transformers: '4.53.3'
           - python: '3.11'
             transformers: 'main'
           - python: '3.13'
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -24,7 +24,8 @@ jobs:
         with:
           python-version: ${{ matrix.python }}
 
-      - uses: tlylt/install-graphviz@v1
+      - name: install graphviz
+        uses: ts-graphviz/setup-graphviz@v2
 
       - name: Install pandoc
         run: sudo apt-get install -y pandoc
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,8 @@ Change Logs
 0.7.6
 +++++
 
+* :pr:`193`: validates with 4.53.3 
+* :pr:`189`: support for task mask-generation
 * :pr:`192`: add support for Gemma-3, add serialization for HybridCache,
   changes to support ``transformers>=4.54``
 
diff --git a/_doc/conf.py b/_doc/conf.py
@@ -119,6 +119,7 @@ def linkcode_resolve(domain, info):
     ("py:class", "False"),
     ("py:class", "True"),
     ("py:class", "Argument"),
+    ("py:class", "CacheProcessor"),
     ("py:class", "default=sklearn.utils.metadata_routing.UNCHANGED"),
     ("py:class", "diffusers.models.unets.unet_2d_condition.UNet2DConditionOutput"),
     ("py:class", "ModelProto"),
@@ -142,6 +143,7 @@ def linkcode_resolve(domain, info):
     ("py:class", "transformers.cache_utils.EncoderDecoderCache"),
     ("py:class", "transformers.cache_utils.HybridCache"),
     ("py:class", "transformers.cache_utils.MambaCache"),
+    ("py:class", "transformers.models.mamba.modeling_mamba.MambaCache"),
     ("py:class", "transformers.cache_utils.SlidingWindowCache"),
     ("py:class", "transformers.cache_utils.StaticCache"),
     ("py:class", "transformers.configuration_utils.PretrainedConfig"),
diff --git a/_unittests/ut_torch_onnx/test_sbs.py b/_unittests/ut_torch_onnx/test_sbs.py
@@ -78,7 +78,7 @@ def forward(self, x):
                 onx,
                 (x,),
                 check_conversion_cls=dict(
-                    cls=ExtendedReferenceEvaluator, atol=1e-5, rtol=1e-5
+                    cls=ExtendedReferenceEvaluator, atol=1e-4, rtol=1e-4
                 ),
                 verbose=1,
             ),
diff --git a/onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py b/onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py
@@ -1366,6 +1366,236 @@ def _ccached_fxmarty_tiny_random_gemmaforcausallm():
     )
 
 
+def _ccached_fxmarty_sam_vit_tiny_random():
+    "fxmarty/sam-vit-tiny-random"
+    return transformers.SamConfig(
+        **{
+            "_commit_hash": "a7c34ea5d2b33a3bc34d34dc9a7b2417c0eaa809",
+            "_name_or_path": "facebook/sam-vit-base",
+            "architectures": ["SamModel"],
+            "initializer_range": 0.02,
+            "mask_decoder_config": {
+                "_name_or_path": "",
+                "add_cross_attention": false,
+                "architectures": null,
+                "attention_downsample_rate": 2,
+                "bad_words_ids": null,
+                "begin_suppress_tokens": null,
+                "bos_token_id": null,
+                "chunk_size_feed_forward": 0,
+                "cross_attention_hidden_size": null,
+                "decoder_start_token_id": null,
+                "diversity_penalty": 0.0,
+                "do_sample": false,
+                "early_stopping": false,
+                "encoder_no_repeat_ngram_size": 0,
+                "eos_token_id": null,
+                "exponential_decay_length_penalty": null,
+                "finetuning_task": null,
+                "forced_bos_token_id": null,
+                "forced_eos_token_id": null,
+                "hidden_act": "relu",
+                "hidden_size": 32,
+                "id2label": {"0": "LABEL_0", "1": "LABEL_1"},
+                "iou_head_depth": 3,
+                "iou_head_hidden_dim": 256,
+                "is_decoder": false,
+                "is_encoder_decoder": false,
+                "label2id": {"LABEL_0": 0, "LABEL_1": 1},
+                "layer_norm_eps": 1e-06,
+                "length_penalty": 1.0,
+                "max_length": 20,
+                "min_length": 0,
+                "mlp_dim": 2048,
+                "model_type": "",
+                "no_repeat_ngram_size": 0,
+                "num_attention_heads": 8,
+                "num_beam_groups": 1,
+                "num_beams": 1,
+                "num_hidden_layers": 2,
+                "num_multimask_outputs": 3,
+                "num_return_sequences": 1,
+                "output_attentions": false,
+                "output_hidden_states": false,
+                "output_scores": false,
+                "pad_token_id": null,
+                "prefix": null,
+                "problem_type": null,
+                "pruned_heads": {},
+                "remove_invalid_values": false,
+                "repetition_penalty": 1.0,
+                "return_dict": true,
+                "return_dict_in_generate": false,
+                "sep_token_id": null,
+                "suppress_tokens": null,
+                "task_specific_params": null,
+                "temperature": 1.0,
+                "tf_legacy_loss": false,
+                "tie_encoder_decoder": false,
+                "tie_word_embeddings": true,
+                "tokenizer_class": null,
+                "top_k": 50,
+                "top_p": 1.0,
+                "torch_dtype": null,
+                "torchscript": false,
+                "transformers_version": "4.29.0.dev0",
+                "typical_p": 1.0,
+                "use_bfloat16": false,
+            },
+            "model_type": "sam",
+            "prompt_encoder_config": {
+                "_name_or_path": "",
+                "add_cross_attention": false,
+                "architectures": null,
+                "bad_words_ids": null,
+                "begin_suppress_tokens": null,
+                "bos_token_id": null,
+                "chunk_size_feed_forward": 0,
+                "cross_attention_hidden_size": null,
+                "decoder_start_token_id": null,
+                "diversity_penalty": 0.0,
+                "do_sample": false,
+                "early_stopping": false,
+                "encoder_no_repeat_ngram_size": 0,
+                "eos_token_id": null,
+                "exponential_decay_length_penalty": null,
+                "finetuning_task": null,
+                "forced_bos_token_id": null,
+                "forced_eos_token_id": null,
+                "hidden_act": "gelu",
+                "hidden_size": 32,
+                "id2label": {"0": "LABEL_0", "1": "LABEL_1"},
+                "image_embedding_size": 64,
+                "image_size": 1024,
+                "is_decoder": false,
+                "is_encoder_decoder": false,
+                "label2id": {"LABEL_0": 0, "LABEL_1": 1},
+                "layer_norm_eps": 1e-06,
+                "length_penalty": 1.0,
+                "mask_input_channels": 16,
+                "max_length": 20,
+                "min_length": 0,
+                "model_type": "",
+                "no_repeat_ngram_size": 0,
+                "num_beam_groups": 1,
+                "num_beams": 1,
+                "num_point_embeddings": 4,
+                "num_return_sequences": 1,
+                "output_attentions": false,
+                "output_hidden_states": false,
+                "output_scores": false,
+                "pad_token_id": null,
+                "patch_size": 16,
+                "prefix": null,
+                "problem_type": null,
+                "pruned_heads": {},
+                "remove_invalid_values": false,
+                "repetition_penalty": 1.0,
+                "return_dict": true,
+                "return_dict_in_generate": false,
+                "sep_token_id": null,
+                "suppress_tokens": null,
+                "task_specific_params": null,
+                "temperature": 1.0,
+                "tf_legacy_loss": false,
+                "tie_encoder_decoder": false,
+                "tie_word_embeddings": true,
+                "tokenizer_class": null,
+                "top_k": 50,
+                "top_p": 1.0,
+                "torch_dtype": null,
+                "torchscript": false,
+                "transformers_version": "4.29.0.dev0",
+                "typical_p": 1.0,
+                "use_bfloat16": false,
+            },
+            "torch_dtype": "float32",
+            "transformers_version": null,
+            "vision_config": {
+                "_name_or_path": "",
+                "add_cross_attention": false,
+                "architectures": null,
+                "attention_dropout": 0.0,
+                "bad_words_ids": null,
+                "begin_suppress_tokens": null,
+                "bos_token_id": null,
+                "chunk_size_feed_forward": 0,
+                "cross_attention_hidden_size": null,
+                "decoder_start_token_id": null,
+                "diversity_penalty": 0.0,
+                "do_sample": false,
+                "dropout": 0.0,
+                "early_stopping": false,
+                "encoder_no_repeat_ngram_size": 0,
+                "eos_token_id": null,
+                "exponential_decay_length_penalty": null,
+                "finetuning_task": null,
+                "forced_bos_token_id": null,
+                "forced_eos_token_id": null,
+                "global_attn_indexes": [2, 5, 8, 11],
+                "hidden_act": "gelu",
+                "hidden_size": 96,
+                "id2label": {"0": "LABEL_0", "1": "LABEL_1"},
+                "image_size": 1024,
+                "initializer_factor": 1.0,
+                "initializer_range": 1e-10,
+                "intermediate_size": 768,
+                "is_decoder": false,
+                "is_encoder_decoder": false,
+                "label2id": {"LABEL_0": 0, "LABEL_1": 1},
+                "layer_norm_eps": 1e-06,
+                "length_penalty": 1.0,
+                "max_length": 20,
+                "min_length": 0,
+                "mlp_dim": 384,
+                "mlp_ratio": 4.0,
+                "model_type": "",
+                "no_repeat_ngram_size": 0,
+                "num_attention_heads": 1,
+                "num_beam_groups": 1,
+                "num_beams": 1,
+                "num_channels": 3,
+                "num_hidden_layers": 12,
+                "num_pos_feats": 16,
+                "num_return_sequences": 1,
+                "output_attentions": false,
+                "output_channels": 32,
+                "output_hidden_states": false,
+                "output_scores": false,
+                "pad_token_id": null,
+                "patch_size": 16,
+                "prefix": null,
+                "problem_type": null,
+                "projection_dim": 64,
+                "pruned_heads": {},
+                "qkv_bias": true,
+                "remove_invalid_values": false,
+                "repetition_penalty": 1.0,
+                "return_dict": true,
+                "return_dict_in_generate": false,
+                "sep_token_id": null,
+                "suppress_tokens": null,
+                "task_specific_params": null,
+                "temperature": 1.0,
+                "tf_legacy_loss": false,
+                "tie_encoder_decoder": false,
+                "tie_word_embeddings": true,
+                "tokenizer_class": null,
+                "top_k": 50,
+                "top_p": 1.0,
+                "torch_dtype": null,
+                "torchscript": false,
+                "transformers_version": "4.29.0.dev0",
+                "typical_p": 1.0,
+                "use_abs_pos": true,
+                "use_bfloat16": false,
+                "use_rel_pos": true,
+                "window_size": 14,
+            },
+        }
+    )
+
+
 def _ccached_hf_internal_testing_tiny_random_gptneoxforcausallm():
     "hf-internal-testing/tiny-random-GPTNeoXForCausalLM"
     return transformers.GPTNeoXConfig(