add summarization

xadupre · xadupre · commit 63abbbb72d2d · 2025-05-19T14:57:58.000+02:00
diff --git a/_doc/api/tasks/index.rst b/_doc/api/tasks/index.rst
@@ -43,6 +43,7 @@ Or:
     mixture_of_expert
     object_detection
     sentence_similarity
+    summarization
     text_classification
     text_generation
     text2text_generation
diff --git a/_doc/api/tasks/summarization.rst b/_doc/api/tasks/summarization.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.tasks.summarization
+===================================
+
+.. automodule:: onnx_diagnostic.tasks.summarization
+    :members:
+    :no-undoc-members:
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -150,6 +150,20 @@ def test_feature_extraction_tiny_bart(self):
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
 
+    @hide_stdout()
+    def test_summarization(self):
+        mid = "facebook/bart-large-cnn"
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "summarization")
+        self.assertIn((data["size"], data["n_weights"]), [(1625161728, 406290432)])
+        model, inputs, _ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        model(**data["inputs2"])
+        # with torch_export_patches(patch_transformers=True, verbose=10):
+        #    torch.export.export(
+        #        model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+        #    )
+
     @hide_stdout()
     def test_text_classification(self):
         mid = "Intel/bert-base-uncased-mrpc"
diff --git a/onnx_diagnostic/tasks/__init__.py b/onnx_diagnostic/tasks/__init__.py
@@ -8,6 +8,7 @@
     mixture_of_expert,
     object_detection,
     sentence_similarity,
+    summarization,
     text_classification,
     text_generation,
     text2text_generation,
@@ -23,6 +24,7 @@
     mixture_of_expert,
     object_detection,
     sentence_similarity,
+    summarization,
     text_classification,
     text_generation,
     text2text_generation,
diff --git a/onnx_diagnostic/tasks/summarization.py b/onnx_diagnostic/tasks/summarization.py
@@ -0,0 +1,221 @@
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
+from ..helpers.config_helper import update_config, check_hasattr, _pick
+
+__TASK__ = "summarization"
+
+
+def reduce_model_config(config: Any) -> Dict[str, Any]:
+    """Reduces a model size."""
+    kwargs: Dict[str, Any] = {}
+    if hasattr(config, "num_decoder_layers"):
+        config.num_decoder_layers = min(config.num_decoder_layers, 2)
+    if hasattr(config, "num_hidden_layers"):
+        config.num_hidden_layers = min(config.num_hidden_layers, 2)
+    update_config(config, kwargs)
+    return kwargs
+
+
+def get_inputs(
+    model: torch.nn.Module,
+    config: Optional[Any],
+    dummy_max_token_id: int,
+    num_key_value_heads_encoder: int,
+    num_key_value_heads_decoder: int,
+    num_hidden_layers: int,
+    head_dim_encoder: int,
+    head_dim_decoder: int,
+    batch_size: int = 2,
+    sequence_length: int = 30,
+    sequence_length2: int = 3,
+    add_second_input: bool = False,
+    **kwargs,  # unused
+):
+    """
+    Generates input for task ``summarization``.
+
+    :param model: model to get the missing information
+    :param config: configuration used to generate the model
+    :param head_dim_encoder: last dimension of the cache for the encoder
+    :param head_dim_decoder: last dimension of the cache for the decoder
+    :param num_key_value_heads_encoder: number of heads for the encoder
+    :param num_key_value_heads_decoder: number of heads for the decoder
+    :param dummy_max_token_id: dummy max token id
+    :param batch_size: batch size
+    :param sequence_length: sequence length
+    :param sequence_length2: new sequence length
+    :return: dictionary
+
+    Stolen inputs for one model.
+
+    ::
+
+        cache_position:T7s1
+        past_key_values:EncoderDecoderCache(
+            self_attention_cache=DynamicCache(
+                key_cache=#6[T1s1x8x1x64,...],
+                value_cache=#6[T1s1x8x1x64,...]),
+            cross_attention_cache=DynamicCache(
+                key_cache=#6[T1s1x8x16x64,...],
+                value_cache=#6[T1s1x8x16x64,...])),
+        decoder_input_ids:T7s1x1,
+        encoder_outputs:dict(last_hidden_state:T1s1x16x512)
+    """
+    batch = torch.export.Dim("batch", min=1, max=1024)
+    seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
+    cache_length = "cache_length_key"  # torch.export.Dim("cache_length", min=1, max=4096)
+    cache_length2 = "cache_length_val"  # torch.export.Dim("cache_length2", min=1, max=4096)
+
+    shapes = {
+        "input_ids": {0: batch, 1: seq_length},
+        "decoder_input_ids": {0: batch, 1: "seq_ids"},
+        "attention_mask": {0: batch, 1: "seq_mask"},
+        # "cache_position": {0: batch, 1: torch.export.Dim.DYNAMIC},
+        "past_key_values": [
+            [
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+            ],
+            [
+                [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
+            ],
+        ],
+        # one these is selected based on the forward method signature
+        # "encoder_last_hidden_state": {0: batch, 1: torch.export.Dim.DYNAMIC},
+        # "encoder_outputs": {0: batch, 1: torch.export.Dim.DYNAMIC},
+    }
+
+    inputs = dict(
+        input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length)).to(
+            torch.int64
+        ),
+        decoder_input_ids=torch.randint(
+            0, dummy_max_token_id, (batch_size, sequence_length2)
+        ).to(torch.int64),
+        attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
+        # cache_position=torch.arange(sequence_length, sequence_length + sequence_length2)
+        # .to(torch.int64)
+        # .expand((batch_size, -1)),
+        past_key_values=make_encoder_decoder_cache(
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            num_key_value_heads_encoder,
+                            sequence_length,
+                            head_dim_encoder,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            num_key_value_heads_encoder,
+                            sequence_length,
+                            head_dim_encoder,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            num_key_value_heads_decoder,
+                            sequence_length2,
+                            head_dim_decoder,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            num_key_value_heads_decoder,
+                            sequence_length2,
+                            head_dim_decoder,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+        ),
+    )
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            dummy_max_token_id=dummy_max_token_id,
+            num_key_value_heads_encoder=num_key_value_heads_encoder,
+            num_key_value_heads_decoder=num_key_value_heads_decoder,
+            num_hidden_layers=num_hidden_layers,
+            head_dim_encoder=head_dim_encoder,
+            head_dim_decoder=head_dim_decoder,
+            batch_size=batch_size + 1,
+            sequence_length=sequence_length + 1,
+            sequence_length2=sequence_length2 + 1,
+            **kwargs,
+        )["inputs"]
+    return res
+
+
+def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
+    """
+    Inputs kwargs.
+
+    If the configuration is None, the function selects typical dimensions.
+    """
+    if config is not None:
+        check_hasattr(
+            config,
+            "vocab_size",
+            "hidden_size",
+            "num_attention_heads",
+            ("num_hidden_layers", "num_layers"),
+            ("n_positions", "d_model"),
+            (
+                "num_key_value_heads",
+                "num_heads",
+                ("decoder_attention_heads", "encoder_attention_heads"),
+            ),
+        )
+    # exceptions = {
+    #     "PLBartForConditionalGeneration": (
+    #         lambda c: c.encoder_attention_heads + c.decoder_attention_heads
+    #    )
+    # }
+    kwargs = dict(
+        batch_size=2,
+        sequence_length=30,
+        sequence_length2=3,
+        head_dim_encoder=(
+            16 if config is None else int(_pick(config, "encoder_ffn_dim") ** 0.5)
+        ),
+        head_dim_decoder=(
+            16 if config is None else int(_pick(config, "decoder_ffn_dim") ** 0.5)
+        ),
+        dummy_max_token_id=31999 if config is None else config.vocab_size - 1,
+        num_hidden_layers=(
+            8 if config is None else _pick(config, "num_hidden_layers", "num_layers")
+        ),
+        num_key_value_heads_encoder=(
+            16
+            if config is None
+            else _pick(
+                config,
+                "encoder_attention_heads",
+                "num_key_value_heads",
+                "num_heads",
+            )
+        ),
+        num_key_value_heads_decoder=(
+            16
+            if config is None
+            else _pick(
+                config,
+                "decoder_attention_heads",
+                "num_key_value_heads",
+                "num_heads",
+            )
+        ),
+    )
+    return kwargs, get_inputs
diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py
@@ -13,6 +13,7 @@
     ASTModel,feature-extraction
     AlbertModel,feature-extraction
     BeitForImageClassification,image-classification
+    BartForConditionalGeneration,summarization
     BartModel,feature-extraction
     BertForMaskedLM,fill-mask
     BertForSequenceClassification,text-classification
@@ -163,6 +164,7 @@
     "object-detection",
     "reinforcement-learning",
     "sentence-similarity",
+    "summarization",
     "text-classification",
     "text-generation",
     "text-to-image",
diff --git a/onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py b/onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py
@@ -3852,7 +3852,7 @@ def _ccached_hustvl_yolos_tiny():
     )
 
 
-def _ccached_facebook_bart_base():
+def _ccached_tiny_random_plbart_for_conditioan_generation():
     "hf-tiny-model-private/tiny-random-PLBartForConditionalGeneration"
     return transformers.BartConfig(
         **{
@@ -3887,3 +3887,67 @@ def _ccached_facebook_bart_base():
             "vocab_size": 50005,
         }
     )
+
+
+def _ccached_facebook_bart_large_cnn():
+    "facebook/bart-large-cnn"
+    return transformers.BartConfig(
+        **{
+            "_num_labels": 3,
+            "activation_dropout": 0.0,
+            "activation_function": "gelu",
+            "add_final_layer_norm": false,
+            "architectures": ["BartForConditionalGeneration"],
+            "attention_dropout": 0.0,
+            "bos_token_id": 0,
+            "classif_dropout": 0.0,
+            "classifier_dropout": 0.0,
+            "d_model": 1024,
+            "decoder_attention_heads": 16,
+            "decoder_ffn_dim": 4096,
+            "decoder_layerdrop": 0.0,
+            "decoder_layers": 12,
+            "decoder_start_token_id": 2,
+            "dropout": 0.1,
+            "early_stopping": true,
+            "encoder_attention_heads": 16,
+            "encoder_ffn_dim": 4096,
+            "encoder_layerdrop": 0.0,
+            "encoder_layers": 12,
+            "eos_token_id": 2,
+            "force_bos_token_to_be_generated": true,
+            "forced_bos_token_id": 0,
+            "forced_eos_token_id": 2,
+            "gradient_checkpointing": false,
+            "id2label": {"0": "LABEL_0", "1": "LABEL_1", "2": "LABEL_2"},
+            "init_std": 0.02,
+            "is_encoder_decoder": true,
+            "label2id": {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2},
+            "length_penalty": 2.0,
+            "max_length": 142,
+            "max_position_embeddings": 1024,
+            "min_length": 56,
+            "model_type": "bart",
+            "no_repeat_ngram_size": 3,
+            "normalize_before": false,
+            "num_beams": 4,
+            "num_hidden_layers": 12,
+            "output_past": true,
+            "pad_token_id": 1,
+            "prefix": " ",
+            "scale_embedding": false,
+            "task_specific_params": {
+                "summarization": {
+                    "early_stopping": true,
+                    "length_penalty": 2.0,
+                    "max_length": 142,
+                    "min_length": 56,
+                    "no_repeat_ngram_size": 3,
+                    "num_beams": 4,
+                }
+            },
+            "transformers_version": "4.7.0.dev0",
+            "use_cache": true,
+            "vocab_size": 50264,
+        }
+    )
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
@@ -1,4 +1,5 @@
 import inspect
+import os
 from typing import Any, Dict, Optional, Tuple
 import torch
 import transformers
@@ -132,6 +133,11 @@ def get_untrained_model_with_inputs(
     kwargs, fct = random_input_kwargs(config, task)
     if verbose:
         print(f"[get_untrained_model_with_inputs] use fct={fct}")
+        if os.environ.get("PRINT_CONFIG") in (1, "1"):
+            import pprint
+
+            print(f"-- input kwargs for task {task!r}")
+            pprint.pprint(kwargs)
     if inputs_kwargs:
         kwargs.update(inputs_kwargs)
 
diff --git a/onnx_diagnostic/torch_models/test_helper.py b/onnx_diagnostic/torch_models/test_helper.py