huggingface
diff --git a/‎.github/workflows/build_documentation.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build_documentation.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build_pr_documentation.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build_pr_documentation.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/onnx/usage_guides/export_a_model.mdx‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/onnx/usage_guides/export_a_model.mdx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/onnxruntime/usage_guides/gpu.mdx‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/onnxruntime/usage_guides/gpu.mdx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎optimum/commands/export/onnx.py‎
Lines changed: 11 additions & 10 deletions b/‎optimum/commands/export/onnx.py‎
Lines changed: 11 additions & 10 deletions
diff --git a/‎optimum/commands/onnxruntime/__init__.py‎
Lines changed: 0 additions & 10 deletions b/‎optimum/commands/onnxruntime/__init__.py‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎optimum/commands/onnxruntime/base.py‎
Lines changed: 1 addition & 1 deletion b/‎optimum/commands/onnxruntime/base.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎optimum/commands/register/register_export.py‎
Lines changed: 3 additions & 1 deletion b/‎optimum/commands/register/register_export.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎optimum/commands/register/register_onnxruntime.py‎
Lines changed: 4 additions & 2 deletions b/‎optimum/commands/register/register_onnxruntime.py‎
Lines changed: 4 additions & 2 deletions
@@ -25,7 +25,7 @@ jobs:
       - uses: actions/checkout@v5
       - uses: actions/setup-node@v4
         with:
-          node-version: "18"
+          node-version: "20"
           cache-dependency-path: "kit/package-lock.json"
 
       - name: Set up Python
 
@@ -26,7 +26,7 @@ jobs:
       - uses: actions/checkout@v5
       - uses: actions/setup-node@v4
         with:
-          node-version: "18"
+          node-version: "20"
           cache-dependency-path: "kit/package-lock.json"
 
       - name: Set up Python
 
@@ -38,7 +38,7 @@ For more information on the ONNX export, please check the [documentation](https:
 
 #### Inference
 
-Once the model is exported to the ONNX format, we provide Python classes enabling you to run the exported ONNX model in a seemless manner using [ONNX Runtime](https://onnxruntime.ai/) in the backend:
+Once the model is exported to the ONNX format, we provide Python classes enabling you to run the exported ONNX model in a seamless manner using [ONNX Runtime](https://onnxruntime.ai/) in the backend:
 
 
 ```diff
 
@@ -317,7 +317,7 @@ For tasks that require only a single ONNX file (e.g. encoder-only), an exported
 
 ### Customize the export of Transformers models with custom modeling
 
-Optimum supports the export of Transformers models with custom modeling that use [`trust_remote_code=True`](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoModel.from_pretrained.trust_remote_code), not officially supported in the Transormers library but usable with its functionality as [pipelines](https://huggingface.co/docs/transformers/main_classes/pipelines) and [generation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationMixin.generate).
+Optimum supports the export of Transformers models with custom modeling that use [`trust_remote_code=True`](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoModel.from_pretrained.trust_remote_code), not officially supported in the Transformers library but usable with its functionality as [pipelines](https://huggingface.co/docs/transformers/main_classes/pipelines) and [generation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationMixin.generate).
 
 Examples of such models are [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b) and [mosaicml/mpt-30b](https://huggingface.co/mosaicml/mpt-30b).
 
 
@@ -126,7 +126,7 @@ Due to current limitations in ONNX Runtime, it is not possible to use quantized
 
 [IOBinding](https://onnxruntime.ai/docs/api/python/api_summary.html#iobinding) is an efficient way to avoid expensive data copying when using GPUs. By default, ONNX Runtime will copy the input from the CPU (even if the tensors are already copied to the targeted device), and assume that outputs also need to be copied back to the CPU from GPUs after the run. These data copying overheads between the host and devices are expensive, and __can lead to worse inference latency than vanilla PyTorch__ especially for the decoding process.
 
-To avoid the slowdown, 🤗 Optimum adopts the IOBinding to copy inputs onto GPUs and pre-allocate memory for outputs prior the inference.  When instanciating the `ORTModel`, set the value of the argument `use_io_binding` to choose whether to turn on the IOBinding during the inference. `use_io_binding` is set to `True` by default, if you choose CUDA as execution provider.
+To avoid the slowdown, 🤗 Optimum adopts the IOBinding to copy inputs onto GPUs and pre-allocate memory for outputs prior the inference.  When instantiating the `ORTModel`, set the value of the argument `use_io_binding` to choose whether to turn on the IOBinding during the inference. `use_io_binding` is set to `True` by default, if you choose CUDA as execution provider.
 
 And if you want to turn off IOBinding:
 ```python
 
@@ -22,8 +22,8 @@
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 
 from optimum.commands.base import BaseOptimumCLICommand, CommandInfo
-from optimum.exporters.tasks import TasksManager
-from optimum.utils import DEFAULT_DUMMY_SHAPES
+from optimum.utils.constant import ALL_TASKS
+from optimum.utils.input_generators import DEFAULT_DUMMY_SHAPES
 
 
 if TYPE_CHECKING:
@@ -38,14 +38,19 @@ def parse_args_onnx(parser):
     required_group.add_argument(
         "output", type=Path, help="Path indicating the directory where to store the generated ONNX model."
     )
+    # NOTE: why using a positional argument here ? should we deprecate in favor of -o/--output keyword argument ?
+    # required_group.add_argument(
+    #     "-o", "--output", type=Path, help="Path indicating the directory where to store the generated ONNX model."
+    # )
 
     optional_group = parser.add_argument_group("Optional arguments")
     optional_group.add_argument(
         "--task",
         default="auto",
         help=(
-            "The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among:"
-            f" {TasksManager.get_all_tasks()}. For decoder models, use `xxx-with-past` to export the model using past key values in the decoder."
+            "The task to export the model for. If not specified, the task will be auto-inferred from the model's metadata or files. "
+            "For tasks that generate text, add the `xxx-with-past` suffix to export the model using past key values caching. "
+            f"Available tasks depend on the model, but are among the following list: {ALL_TASKS}."
         ),
     )
     optional_group.add_argument(
@@ -107,12 +112,8 @@ def parse_args_onnx(parser):
         "--framework",
         type=str,
         choices=["pt"],
-        default=None,
-        help=(
-            "The framework to use for the ONNX export."
-            " If not provided, will attempt to use the local checkpoint's original framework"
-            " or what is available in the environment."
-        ),
+        default="pt",
+        help="The framework to use for the export. Defaults to 'pt' for PyTorch.",
     )
     optional_group.add_argument(
         "--atol",
 
@@ -33,7 +33,7 @@ class ONNXRuntimeCommand(BaseOptimumCLICommand):
         ),
         CommandInfo(
             name="quantize",
-            help="Dynammic quantization for ONNX models.",
+            help="Dynamic quantization for ONNX models.",
             subcommand_class=ONNXRuntimeQuantizeCommand,
         ),
     )
@@ -16,4 +16,6 @@
 from optimum.commands.export.onnx import ONNXExportCommand
 
 
-REGISTER_COMMANDS = [(ONNXExportCommand, ExportCommand)]
+REGISTER_COMMANDS = [
+    (ONNXExportCommand, ExportCommand),
+]
@@ -13,7 +13,9 @@
 # limitations under the License.
 
 
-from optimum.commands.onnxruntime import ONNXRuntimeCommand
+from optimum.commands.onnxruntime.base import ONNXRuntimeCommand
 
 
-REGISTER_COMMANDS = [ONNXRuntimeCommand]
+REGISTER_COMMANDS = [
+    ONNXRuntimeCommand,
+]
Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@ class ONNXRuntimeCommand(BaseOptimumCLICommand):`
`33`	`33`	`),`
`34`	`34`	`CommandInfo(`
`35`	`35`	`name="quantize",`
`36`		`- help="Dynammic quantization for ONNX models.",`
	`36`	`+ help="Dynamic quantization for ONNX models.",`
`37`	`37`	`subcommand_class=ONNXRuntimeQuantizeCommand,`
`38`	`38`	`),`
`39`	`39`	`)`