sdpython · sdpython · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -75,18 +75,19 @@ jobs:
         run: |
           export PYTHONPATH=.
           python _unittests/ut_torch_models/test_tiny_llms_onnx.py
-        continue-on-error: true
+        continue-on-error: true  # connectivity issues
 
       - name: tiny-llm example
         run: |
           export PYTHONPATH=.
           python _doc/examples/plot_export_tiny_llm.py
-        continue-on-error: true
+        continue-on-error: true  # connectivity issues
 
       - name: tiny-llm bypass
         run: |
           export PYTHONPATH=.
           python _doc/examples/plot_export_tiny_llm_patched.py
+        continue-on-error: true  # connectivity issues
 
       - name: run tests
         run: |

diff --git a/_doc/examples/plot_export_tiny_phi2.py b/_doc/examples/plot_export_tiny_phi2.py
@@ -44,7 +44,7 @@
     data["n_weights"],
 )
 
-print(f"model {size / 2**10:1.3f} Kb with {n_weights} parameters.")
+print(f"model {size / 2**20:1.3f} Mb with {n_weights // 1000} mille parameters.")
 # %%
 # The original model has 2.7 billion parameters. It was divided by more than 10.
 # Let's see the configuration.
@@ -156,4 +156,4 @@
 # It looks good.
 
 # %%
-doc.plot_legend("untrained smaller\nmicrosoft/phi-2", "torch.onnx.export", "green")
+doc.plot_legend("untrained smaller\nmicrosoft/phi-2", "torch.onnx.export", "orange")
diff --git a/_unittests/ut_torch_models/test_hghub_api.py b/_unittests/ut_torch_models/test_hghub_api.py
@@ -127,6 +127,14 @@ def test_model_testings_and_architectures(self):
             task = task_from_id(mid)
             self.assertNotEmpty(task)
 
+    def test__ccached_config_64(self):
+        from onnx_diagnostic.torch_models.hghub.hub_data_cached_configs import (
+            _cached_hf_internal_testing_tiny_random_beitforimageclassification,
+        )
+
+        conf = _cached_hf_internal_testing_tiny_random_beitforimageclassification()
+        self.assertEqual(conf.auxiliary_channels, 256)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/torch_models/hghub/hub_api.py b/onnx_diagnostic/torch_models/hghub/hub_api.py
@@ -1,12 +1,58 @@
 import functools
-from typing import List, Optional, Union
+from typing import Dict, List, Optional, Union
 import transformers
 from huggingface_hub import HfApi, model_info
+from . import hub_data_cached_configs
 from .hub_data import __date__, __data_tasks__, load_architecture_task
 
 
-def get_pretrained_config(model_id: str, trust_remote_code: bool = True) -> str:
-    """Returns the config for a model_id."""
+@functools.cache
+def _retrieve_cached_configurations() -> Dict[str, transformers.PretrainedConfig]:
+    res = {}
+    for k, v in hub_data_cached_configs.__dict__.items():
+        if k.startswith("_ccached_"):
+            doc = v.__doc__
+            res[doc] = v
+    return res
+
+
+def get_cached_configuration(name: str) -> Optional[transformers.PretrainedConfig]:
+    """
+    Returns cached configuration to avoid having to many accesses to internet.
+    It returns None if not Cache. The list of cached models follows.
+
+    .. runpython::
+
+        from onnx_diagnostic.torch_models.hghub.hug_api import _retrieve_cached_configurations
+
+        configs = _retrieve_cached_configurations()
+        pprint.pprint(sorted(configs))
+    """
+    cached = _retrieve_cached_configurations()
+    assert cached, "no cached configuration, which is weird"
+    if name in cached:
+        return cached[name]()
+    return None
+
+
+def get_pretrained_config(
+    model_id: str, trust_remote_code: bool = True, use_cached: bool = True
+) -> str:
+    """
+    Returns the config for a model_id.
+
+    :param model_id: model id
+    :param trust_remote_code: trust_remote_code,
+        see :meth:`transformers.AutoConfig.from_pretrained`
+    :param used_cached: if cached, uses this version to avoid
+        accessing the network, if available, it is returned by
+        :func:`get_cached_configuration`, the cached list is mostly for
+        unit tests
+    """
+    if use_cached:
+        conf = get_cached_configuration(model_id)
+        if conf is not None:
+            return conf
     return transformers.AutoConfig.from_pretrained(
         model_id, trust_remote_code=trust_remote_code
     )

diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py
@@ -1,127 +1,129 @@
 import io
 import functools
+import textwrap
 from typing import Dict, List
 
 __date__ = "2025-03-26"
 
-__data_arch__ = """
-architecture,task
-ASTModel,feature-extraction
-AlbertModel,feature-extraction
-BeitForImageClassification,image-classification
-BigBirdModel,feature-extraction
-BlenderbotModel,feature-extraction
-BloomModel,feature-extraction
-CLIPModel,zero-shot-image-classification
-CLIPVisionModel,feature-extraction
-CamembertModel,feature-extraction
-CodeGenModel,feature-extraction
-ConvBertModel,feature-extraction
-ConvNextForImageClassification,image-classification
-ConvNextV2Model,image-feature-extraction
-CvtModel,feature-extraction
-DPTModel,image-feature-extraction
-Data2VecAudioModel,feature-extraction
-Data2VecTextModel,feature-extraction
-Data2VecVisionModel,image-feature-extraction
-DebertaModel,feature-extraction
-DebertaV2Model,feature-extraction
-DecisionTransformerModel,reinforcement-learning
-DeiTModel,image-feature-extraction
-DetrModel,image-feature-extraction
-Dinov2Model,image-feature-extraction
-DistilBertModel,feature-extraction
-DonutSwinModel,feature-extraction
-ElectraModel,feature-extraction
-EsmModel,feature-extraction
-GLPNModel,image-feature-extraction
-GPTBigCodeModel,feature-extraction
-GPTJModel,feature-extraction
-GPTNeoModel,feature-extraction
-GPTNeoXForCausalLM,text-generation
-GemmaForCausalLM,text-generation
-GraniteForCausalLM,text-generation
-GroupViTModel,feature-extraction
-HieraForImageClassification,image-classification
-HubertModel,feature-extraction
-IBertModel,feature-extraction
-ImageGPTModel,image-feature-extraction
-LayoutLMModel,feature-extraction
-LayoutLMv3Model,feature-extraction
-LevitModel,image-feature-extraction
-LiltModel,feature-extraction
-LlamaForCausalLM,text-generation
-LongT5Model,feature-extraction
-LongformerModel,feature-extraction
-MCTCTModel,feature-extraction
-MPNetModel,feature-extraction
-MT5Model,feature-extraction
-MarianMTModel,text2text-generation
-MarkupLMModel,feature-extraction
-MaskFormerForInstanceSegmentation,image-segmentation
-MegatronBertModel,feature-extraction
-MgpstrForSceneTextRecognition,feature-extraction
-MistralForCausalLM,text-generation
-MobileBertModel,feature-extraction
-MobileNetV1Model,image-feature-extraction
-MobileNetV2Model,image-feature-extraction
-MobileViTForImageClassification,image-classification
-ModernBertForMaskedLM,fill-mask
-MoonshineForConditionalGeneration,automatic-speech-recognition
-MptForCausalLM,text-generation
-MusicgenForConditionalGeneration,text-to-audio
-NystromformerModel,feature-extraction
-OPTModel,feature-extraction
-Olmo2ForCausalLM,text-generation
-OlmoForCausalLM,text-generation
-OwlViTModel,feature-extraction
-Owlv2Model,feature-extraction
-PatchTSMixerForPrediction,no-pipeline-tag
-PatchTSTForPrediction,no-pipeline-tag
-PegasusModel,feature-extraction
-Phi3ForCausalLM,text-generation
-PhiForCausalLM,text-generation
-Pix2StructForConditionalGeneration,image-to-text
-PoolFormerModel,image-feature-extraction
-PvtForImageClassification,image-classification
-Qwen2ForCausalLM,text-generation
-RTDetrForObjectDetection,object-detection
-RegNetModel,image-feature-extraction
-RemBertModel,feature-extraction
-ResNetForImageClassification,image-classification
-RoFormerModel,feature-extraction
-RobertaModel,feature-extraction
-RtDetrV2ForObjectDetection,object-detection
-SEWDModel,feature-extraction
-SEWModel,feature-extraction
-SamModel,mask-generation
-SegformerModel,image-feature-extraction
-SiglipModel,zero-shot-image-classification
-SiglipVisionModel,image-feature-extraction
-Speech2TextModel,feature-extraction
-SpeechT5ForTextToSpeech,text-to-audio
-SplinterModel,feature-extraction
-SqueezeBertModel,feature-extraction
-Swin2SRModel,image-feature-extraction
-SwinModel,image-feature-extraction
-Swinv2Model,image-feature-extraction
-T5ForConditionalGeneration,text2text-generation
-TableTransformerModel,image-feature-extraction
-UniSpeechForSequenceClassification,audio-classification
-ViTForImageClassification,image-classification
-ViTMAEModel,image-feature-extraction
-ViTMSNForImageClassification,image-classification
-VisionEncoderDecoderModel,document-question-answering
-VitPoseForPoseEstimation,keypoint-detection
-VitsModel,text-to-audio
-Wav2Vec2ConformerForCTC,automatic-speech-recognition
-Wav2Vec2Model,feature-extraction
-WhisperForConditionalGeneration,no-pipeline-tag
-XLMModel,feature-extraction
-XLMRobertaForCausalLM,text-generation
-YolosForObjectDetection,object-detection
-YolosModel,image-feature-extraction
-"""
+__data_arch__ = textwrap.dedent(
+    """
+    architecture,task
+    ASTModel,feature-extraction
+    AlbertModel,feature-extraction
+    BeitForImageClassification,image-classification
+    BigBirdModel,feature-extraction
+    BlenderbotModel,feature-extraction
+    BloomModel,feature-extraction
+    CLIPModel,zero-shot-image-classification
+    CLIPVisionModel,feature-extraction
+    CamembertModel,feature-extraction
+    CodeGenModel,feature-extraction
+    ConvBertModel,feature-extraction
+    ConvNextForImageClassification,image-classification
+    ConvNextV2Model,image-feature-extraction
+    CvtModel,feature-extraction
+    DPTModel,image-feature-extraction
+    Data2VecAudioModel,feature-extraction
+    Data2VecTextModel,feature-extraction
+    Data2VecVisionModel,image-feature-extraction
+    DebertaModel,feature-extraction
+    DebertaV2Model,feature-extraction
+    DecisionTransformerModel,reinforcement-learning
+    DeiTModel,image-feature-extraction
+    DetrModel,image-feature-extraction
+    Dinov2Model,image-feature-extraction
+    DistilBertModel,feature-extraction
+    DonutSwinModel,feature-extraction
+    ElectraModel,feature-extraction
+    EsmModel,feature-extraction
+    GLPNModel,image-feature-extraction
+    GPTBigCodeModel,feature-extraction
+    GPTJModel,feature-extraction
+    GPTNeoModel,feature-extraction
+    GPTNeoXForCausalLM,text-generation
+    GemmaForCausalLM,text-generation
+    GraniteForCausalLM,text-generation
+    GroupViTModel,feature-extraction
+    HieraForImageClassification,image-classification
+    HubertModel,feature-extraction
+    IBertModel,feature-extraction
+    ImageGPTModel,image-feature-extraction
+    LayoutLMModel,feature-extraction
+    LayoutLMv3Model,feature-extraction
+    LevitModel,image-feature-extraction
+    LiltModel,feature-extraction
+    LlamaForCausalLM,text-generation
+    LongT5Model,feature-extraction
+    LongformerModel,feature-extraction
+    MCTCTModel,feature-extraction
+    MPNetModel,feature-extraction
+    MT5Model,feature-extraction
+    MarianMTModel,text2text-generation
+    MarkupLMModel,feature-extraction
+    MaskFormerForInstanceSegmentation,image-segmentation
+    MegatronBertModel,feature-extraction
+    MgpstrForSceneTextRecognition,feature-extraction
+    MistralForCausalLM,text-generation
+    MobileBertModel,feature-extraction
+    MobileNetV1Model,image-feature-extraction
+    MobileNetV2Model,image-feature-extraction
+    MobileViTForImageClassification,image-classification
+    ModernBertForMaskedLM,fill-mask
+    MoonshineForConditionalGeneration,automatic-speech-recognition
+    MptForCausalLM,text-generation
+    MusicgenForConditionalGeneration,text-to-audio
+    NystromformerModel,feature-extraction
+    OPTModel,feature-extraction
+    Olmo2ForCausalLM,text-generation
+    OlmoForCausalLM,text-generation
+    OwlViTModel,feature-extraction
+    Owlv2Model,feature-extraction
+    PatchTSMixerForPrediction,no-pipeline-tag
+    PatchTSTForPrediction,no-pipeline-tag
+    PegasusModel,feature-extraction
+    Phi3ForCausalLM,text-generation
+    PhiForCausalLM,text-generation
+    Pix2StructForConditionalGeneration,image-to-text
+    PoolFormerModel,image-feature-extraction
+    PvtForImageClassification,image-classification
+    Qwen2ForCausalLM,text-generation
+    RTDetrForObjectDetection,object-detection
+    RegNetModel,image-feature-extraction
+    RemBertModel,feature-extraction
+    ResNetForImageClassification,image-classification
+    RoFormerModel,feature-extraction
+    RobertaModel,feature-extraction
+    RtDetrV2ForObjectDetection,object-detection
+    SEWDModel,feature-extraction
+    SEWModel,feature-extraction
+    SamModel,mask-generation
+    SegformerModel,image-feature-extraction
+    SiglipModel,zero-shot-image-classification
+    SiglipVisionModel,image-feature-extraction
+    Speech2TextModel,feature-extraction
+    SpeechT5ForTextToSpeech,text-to-audio
+    SplinterModel,feature-extraction
+    SqueezeBertModel,feature-extraction
+    Swin2SRModel,image-feature-extraction
+    SwinModel,image-feature-extraction
+    Swinv2Model,image-feature-extraction
+    T5ForConditionalGeneration,text2text-generation
+    TableTransformerModel,image-feature-extraction
+    UniSpeechForSequenceClassification,audio-classification
+    ViTForImageClassification,image-classification
+    ViTMAEModel,image-feature-extraction
+    ViTMSNForImageClassification,image-classification
+    VisionEncoderDecoderModel,document-question-answering
+    VitPoseForPoseEstimation,keypoint-detection
+    VitsModel,text-to-audio
+    Wav2Vec2ConformerForCTC,automatic-speech-recognition
+    Wav2Vec2Model,feature-extraction
+    WhisperForConditionalGeneration,no-pipeline-tag
+    XLMModel,feature-extraction
+    XLMRobertaForCausalLM,text-generation
+    YolosForObjectDetection,object-detection
+    YolosModel,image-feature-extraction"""
+)
 
 __data_tasks__ = [
     "automatic-speech-recognition",