diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 2d9469f45..3da4e38fb 100755
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -8,7 +8,7 @@ Model Optimizer Changelog (Linux)
 
 - Deprecated ModelOpt's custom docker images. Please use the PyTorch, TensorRT-LLM or TensorRT docker image directly or refer to the `installation guide <https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html>`_ for more details.
 - Deprecated ``quantize_mode`` argument in ``examples/onnx_ptq/evaluate.py`` to support strongly typing. Use ``engine_precision`` instead.
-- Deprecated TRT-LLM's TRT backend in ``examples/llm_ptq`` and ``examples/vlm_ptq``. Tasks ``build`` and ``benchmark`` support are removed and replaced with ``quant``. For performance evaluation, please use ``trtllm-bench`` directly.
+- Deprecated TRT-LLM's TRT backend in ``examples/llm_ptq`` and ``examples/vlm_ptq``. Tasks ``build`` and ``benchmark`` support are removed and replaced with ``quant``. ``engine_dir`` is replaced with ``checkpoint_dir`` in ``examples/llm_ptq`` and ``examples/vlm_ptq``. For performance evaluation, please use ``trtllm-bench`` directly.
 - ``--export_fmt`` flag in ``examples/llm_ptq`` is removed. By default we export to the unified Hugging Face checkpoint format.
 - Deprecated ``examples/vlm_eval`` as it depends on the deprecated TRT-LLM's TRT backend.
 
diff --git a/examples/llm_eval/README.md b/examples/llm_eval/README.md
index 0e1855d99..bad3ca477 100644
--- a/examples/llm_eval/README.md
+++ b/examples/llm_eval/README.md
@@ -93,7 +93,7 @@ If `trust_remote_code` needs to be true, please append the command with the `--t
 ### TensorRT-LLM
 
 ```sh
-python lm_eval_tensorrt_llm.py --model trt-llm --model_args tokenizer=<HF model folder>,engine_dir=<Quantized checkpoint dir> --tasks <comma separated tasks> --batch_size <engine batch size>
+python lm_eval_tensorrt_llm.py --model trt-llm --model_args tokenizer=<HF model folder>,checkpoint_dir=<Quantized checkpoint dir> --tasks <comma separated tasks> --batch_size <max batch size>
 ```
 
 ## MMLU
@@ -137,10 +137,10 @@ python mmlu.py --model_name causal --model_path <HF model folder or model card>
 python mmlu.py --model_name causal --model_path <HF model folder or model card> --quant_cfg $MODELOPT_QUANT_CFG_TO_SEARCH --auto_quantize_bits $EFFECTIVE_BITS --batch_size 4
 ```
 
-### Evaluate the TensorRT-LLM engine
+### Evaluate with TensorRT-LLM
 
 ```bash
-python mmlu.py --model_name causal --model_path <HF model folder or model card> --engine_dir <Quantized checkpoint dir>
+python mmlu.py --model_name causal --model_path <HF model folder or model card> --checkpoint_dir <Quantized checkpoint dir>
 ```
 
 ## MT-Bench
@@ -160,7 +160,7 @@ bash run_fastchat.sh -h <HF model folder or model card>
 bash run_fastchat.sh -h <HF model folder or model card> --quant_cfg MODELOPT_QUANT_CFG
 ```
 
-### Evaluate the TensorRT-LLM engine
+### Evaluate with TensorRT-LLM
 
 ```bash
 bash run_fastchat.sh -h <HF model folder or model card> <Quantized checkpoint dir>
diff --git a/examples/llm_eval/gen_model_answer.py b/examples/llm_eval/gen_model_answer.py
index e0b752dcf..afa76e25b 100644
--- a/examples/llm_eval/gen_model_answer.py
+++ b/examples/llm_eval/gen_model_answer.py
@@ -118,7 +118,7 @@ def run_eval(
     max_gpu_memory,
     dtype,
     revision,
-    engine_dir,
+    checkpoint_dir,
     nim_model,
     args,
 ):
@@ -150,7 +150,7 @@ def run_eval(
             revision=revision,
             top_p=top_p,
             temperature=temperature,
-            engine_dir=engine_dir,
+            checkpoint_dir=checkpoint_dir,
             nim_model=nim_model,
         )
         for i in range(0, len(questions), chunk_size)
@@ -174,25 +174,22 @@ def get_model_answers(
     revision,
     top_p=None,
     temperature=None,
-    engine_dir=None,
+    checkpoint_dir=None,
     nim_model=None,
 ):
     # Model Optimizer modification
-    if engine_dir:
-        tokenizer = get_tokenizer(model_path, trust_remote_code=args.trust_remote_code)
-        if engine_dir:
-            # get model type
-            last_part = os.path.basename(engine_dir)
-            model_type = last_part.split("_")[0]
-            # Some models require to set pad_token and eos_token based on external config (e.g., qwen)
-            if model_type == "qwen":
-                tokenizer.pad_token = tokenizer.convert_ids_to_tokens(151643)
-                tokenizer.eos_token = tokenizer.convert_ids_to_tokens(151643)
-
-            assert LLM is not None, "tensorrt_llm APIs could not be imported."
-            model = LLM(engine_dir, tokenizer=tokenizer)
-        else:
-            raise ValueError("engine_dir is required for TensorRT LLM inference.")
+    tokenizer = get_tokenizer(model_path, trust_remote_code=args.trust_remote_code)
+    if checkpoint_dir:
+        # get model type
+        last_part = os.path.basename(checkpoint_dir)
+        model_type = last_part.split("_")[0]
+        # Some models require to set pad_token and eos_token based on external config (e.g., qwen)
+        if model_type == "qwen":
+            tokenizer.pad_token = tokenizer.convert_ids_to_tokens(151643)
+            tokenizer.eos_token = tokenizer.convert_ids_to_tokens(151643)
+
+        assert LLM is not None, "tensorrt_llm APIs could not be imported."
+        model = LLM(checkpoint_dir, tokenizer=tokenizer)
     elif not nim_model:
         model, _ = load_model(
             model_path,
@@ -205,7 +202,6 @@ def get_model_answers(
             cpu_offloading=False,
             debug=False,
         )
-        tokenizer = get_tokenizer(model_path, trust_remote_code=args.trust_remote_code)
         if args.quant_cfg:
             quantize_model(
                 model,
@@ -259,7 +255,7 @@ def get_model_answers(
 
                 # some models may error out when generating long outputs
                 try:
-                    if not engine_dir:
+                    if not checkpoint_dir:
                         output_ids = model.generate(
                             torch.as_tensor(input_ids).cuda(),
                             do_sample=do_sample,
@@ -427,9 +423,9 @@ def reorg_answer_file(answer_file):
         help="The model revision to load.",
     )
     parser.add_argument(
-        "--engine-dir",
+        "--checkpoint-dir",
         type=str,
-        help="The path to the TensorRT LLM engine directory.",
+        help="The path to the model checkpoint directory.",
     )
     parser.add_argument(
         "--nim-model",
@@ -502,7 +498,7 @@ def reorg_answer_file(answer_file):
         max_gpu_memory=args.max_gpu_memory,
         dtype=str_to_torch_dtype(args.dtype),
         revision=args.revision,
-        engine_dir=args.engine_dir,
+        checkpoint_dir=args.checkpoint_dir,
         nim_model=args.nim_model,
         args=args,
     )
diff --git a/examples/llm_eval/lm_eval_tensorrt_llm.py b/examples/llm_eval/lm_eval_tensorrt_llm.py
index ffd716413..4b23be46f 100644
--- a/examples/llm_eval/lm_eval_tensorrt_llm.py
+++ b/examples/llm_eval/lm_eval_tensorrt_llm.py
@@ -42,7 +42,7 @@ class TRTLLM(TemplateAPI):
     def __init__(
         self,
         tokenizer: str,
-        engine_dir: str,
+        checkpoint_dir: str,
         batch_size: int = 1,
         **kwargs,
     ):
@@ -56,11 +56,11 @@ def __init__(
         if self.tokenizer.pad_token_id is None:
             self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
 
-        assert isinstance(engine_dir, str)
+        assert isinstance(checkpoint_dir, str)
 
-        self.llm = LLM(checkpoint_dir=engine_dir, tokenizer=self.tokenizer)
+        self.llm = LLM(checkpoint_dir=checkpoint_dir, tokenizer=self.tokenizer)
         self.max_length = self.llm.max_seq_len - 1
-        logger.info("Loaded TRT-LLM engine")
+        logger.info("Loaded TRT-LLM")
 
     def model_call(
         self,
diff --git a/examples/llm_eval/mmlu.py b/examples/llm_eval/mmlu.py
index 3e12abfe7..4b0f3b341 100755
--- a/examples/llm_eval/mmlu.py
+++ b/examples/llm_eval/mmlu.py
@@ -252,9 +252,9 @@ def main(
     mto.enable_huggingface_checkpointing()
     model_path = kwargs["model_path"]
     tokenizer = get_tokenizer(model_path, trust_remote_code=kwargs.get("trust_remote_code", False))
-    if kwargs.get("engine_dir"):
+    if kwargs.get("checkpoint_dir"):
         # get model type
-        last_part = os.path.basename(kwargs["engine_dir"])
+        last_part = os.path.basename(kwargs["checkpoint_dir"])
         model_type = last_part.split("_")[0]
         # Some models require to set pad_token and eos_token based on external config (e.g., qwen)
         if model_type == "qwen":
@@ -264,7 +264,9 @@ def main(
         assert LLM is not None, "tensorrt_llm APIs could not be imported."
         medusa_choices = kwargs.get("medusa_choices")
         model = LLM(
-            checkpoint_dir=kwargs["engine_dir"], tokenizer=tokenizer, medusa_choices=medusa_choices
+            checkpoint_dir=kwargs["checkpoint_dir"],
+            tokenizer=tokenizer,
+            medusa_choices=medusa_choices,
         )
     else:
         model = select_model(
diff --git a/examples/llm_eval/run_fastchat.sh b/examples/llm_eval/run_fastchat.sh
index 16aa54bc2..e7d53b80d 100644
--- a/examples/llm_eval/run_fastchat.sh
+++ b/examples/llm_eval/run_fastchat.sh
@@ -20,9 +20,9 @@
 # If you are using NIM, ensure that you export the NIM API key using:
 # export OPENAI_API_KEY=<NIM_API_KEY>
 #
-# Usage: bash run_fastchat.sh -h <HF model folder or model card> -e <engine_dir> -n <NIM model model card>
+# Usage: bash run_fastchat.sh -h <HF model folder or model card> -e <checkpoint_dir> -n <NIM model model card>
 # model_name: The HuggingFace handle or folder of the model to evaluate.
-# engine_dir: The directory where the TRT-LLM engine is stored.
+# checkpoint_dir: The directory where the checkpoint is stored.
 # nim_model_name: The handle of the NIM model to be used for evaluation.
 #
 # Example commands:
@@ -30,8 +30,8 @@
 # Evaluate "meta-llama/Meta-Llama-3-8B-Instruct" HF model:
 # bash run_fastchat.sh -h meta-llama/Meta-Llama-3-8B-Instruct
 #
-# Evaluate "meta-llama/Meta-Llama-3-8B-Instruct" HF model with TRT-LLM engine:
-# bash run_fastchat.sh -h meta-llama/Meta-Llama-3-8B-Instruct -e /path/to/engine_dir
+# Evaluate "meta-llama/Meta-Llama-3-8B-Instruct" HF model with TRT-LLM:
+# bash run_fastchat.sh -h meta-llama/Meta-Llama-3-8B-Instruct -e /path/to/checkpoint_dir
 #
 # Evaluate "meta-llama/Meta-Llama-3-8B-Instruct" HF model with NIM:
 # bash run_fastchat.sh -h meta-llama/Meta-Llama-3-8B-Instruct -n meta-llama/Meta-Llama-3-8B-Instruct
@@ -41,7 +41,7 @@ set -e
 set -x
 
 hf_model_name=""
-engine_dir=""
+checkpoint_dir=""
 nim_model_name=""
 answer_file=""
 quant_cfg=""
@@ -56,9 +56,9 @@ while [[ "$1" != "" ]]; do
             shift
             hf_model_name=$1
             ;;
-        -e | --engine_dir )
+        -e | --checkpoint_dir )
             shift
-            engine_dir=$1
+            checkpoint_dir=$1
             ;;
         -n | --nim_model_name )
             shift
@@ -96,8 +96,8 @@ if [ "$hf_model_name" == "" ]; then
     exit 1
 fi
 
-if [ "$engine_dir" != "" ]; then
-    engine_dir=" --engine-dir $engine_dir "
+if [ "$checkpoint_dir" != "" ]; then
+    checkpoint_dir=" --checkpoint-dir $checkpoint_dir "
 fi
 
 if [ "$nim_model_name" != "" ]; then
@@ -143,7 +143,7 @@ PYTHONPATH=FastChat:$PYTHONPATH python gen_model_answer.py \
     --model-id $hf_model_name \
     --temperature 0.0001 \
     --top-p 0.0001 \
-    $engine_dir \
+    $checkpoint_dir \
     $nim_model_name \
     $answer_file \
     $quant_args
diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py
index 3ac167db2..7203e78c7 100755
--- a/examples/llm_ptq/example_utils.py
+++ b/examples/llm_ptq/example_utils.py
@@ -36,16 +36,6 @@ def is_speculative(hf_config):
     )
 
 
-def get_mode_type_from_engine_dir(engine_dir_str):
-    # Split the path by '/' and get the last part
-    last_part = os.path.basename(engine_dir_str)
-
-    # Split the last part by '_' and get the first segment
-    model_type = last_part.split("_")[0]
-
-    return model_type
-
-
 def get_tokenizer(ckpt_path, trust_remote_code=False, **kwargs):
     print(f"Initializing tokenizer from {ckpt_path}")
 
diff --git a/examples/llm_ptq/run_tensorrt_llm.py b/examples/llm_ptq/run_tensorrt_llm.py
index c3152959a..56d25df70 100644
--- a/examples/llm_ptq/run_tensorrt_llm.py
+++ b/examples/llm_ptq/run_tensorrt_llm.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""An example script to run the tensorrt_llm engine."""
+"""An example script to run the tensorrt_llm inference."""
 
 import argparse
 
@@ -28,7 +28,7 @@ def parse_arguments():
     parser = argparse.ArgumentParser()
     parser.add_argument("--tokenizer", type=str, default="")
     parser.add_argument("--max_output_len", type=int, default=100)
-    parser.add_argument("--engine_dir", type=str, default="/tmp/modelopt")
+    parser.add_argument("--checkpoint_dir", type=str)
     parser.add_argument(
         "--input_texts",
         type=str,
@@ -49,8 +49,8 @@ def parse_arguments():
 
 def run(args):
     if not args.tokenizer:
-        # Assume the tokenizer files are saved in the engine_dr.
-        args.tokenizer = args.engine_dir
+        # Assume the tokenizer files are saved in the checkpoint_dir.
+        args.tokenizer = args.checkpoint_dir
 
     if isinstance(args.tokenizer, PreTrainedTokenizerBase):
         tokenizer = args.tokenizer
@@ -66,7 +66,7 @@ def run(args):
 
     print("TensorRT-LLM example outputs:")
 
-    llm = LLM(args.engine_dir, tokenizer=tokenizer, max_batch_size=len(input_texts))
+    llm = LLM(args.checkpoint_dir, tokenizer=tokenizer, max_batch_size=len(input_texts))
     torch.cuda.cudart().cudaProfilerStart()
     outputs = llm.generate_text(input_texts, args.max_output_len)
     torch.cuda.cudart().cudaProfilerStop()
diff --git a/examples/llm_ptq/scripts/huggingface_example.sh b/examples/llm_ptq/scripts/huggingface_example.sh
index 8878b824c..97d14ea03 100755
--- a/examples/llm_ptq/scripts/huggingface_example.sh
+++ b/examples/llm_ptq/scripts/huggingface_example.sh
@@ -158,7 +158,7 @@ if [[ $TASKS =~ "quant" ]] || [[ ! -d "$SAVE_PATH" ]] || [[ ! $(ls -A $SAVE_PATH
         echo "Quantized model config $MODEL_CONFIG exists, skipping the quantization stage"
     fi
 
-    # for enc-dec model, users need to refer TRT-LLM example to build engines and deployment
+    # for enc-dec model, users need to refer TRT-LLM example for deployment
     if [[ -f "$SAVE_PATH/encoder/config.json" && -f "$SAVE_PATH/decoder/config.json" && ! -f $MODEL_CONFIG ]]; then
         echo "Please continue to deployment with the TRT-LLM enc_dec example, https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/models/core/enc_dec. Checkpoint export_path: $SAVE_PATH"
         exit 0
@@ -187,7 +187,7 @@ if [[ $TASKS =~ "quant" ]] || [[ ! -d "$SAVE_PATH" ]] || [[ ! $(ls -A $SAVE_PATH
         RUN_ARGS+=" --trust_remote_code "
     fi
 
-    python run_tensorrt_llm.py --engine_dir=$SAVE_PATH $RUN_ARGS
+    python run_tensorrt_llm.py --checkpoint_dir=$SAVE_PATH $RUN_ARGS
 fi
 
 if [[ -d "${MODEL_PATH}" ]]; then
@@ -229,7 +229,7 @@ if [[ $TASKS =~ "lm_eval" ]]; then
 
     python lm_eval_tensorrt_llm.py \
         --model trt-llm \
-        --model_args tokenizer=$MODEL_PATH,engine_dir=$SAVE_PATH,max_gen_toks=$BUILD_MAX_OUTPUT_LEN \
+        --model_args tokenizer=$MODEL_PATH,checkpoint_dir=$SAVE_PATH,max_gen_toks=$BUILD_MAX_OUTPUT_LEN \
         --tasks $LM_EVAL_TASKS \
         --batch_size $BUILD_MAX_BATCH_SIZE $lm_eval_flags | tee $LM_EVAL_RESULT
 
@@ -259,7 +259,7 @@ if [[ $TASKS =~ "mmlu" ]]; then
     python mmlu.py \
         --model_name causal \
         --model_path $MODEL_ABS_PATH \
-        --engine_dir $SAVE_PATH \
+        --checkpoint_dir $SAVE_PATH \
         --data_dir $MMLU_DATA_PATH | tee $MMLU_RESULT
     popd
 
diff --git a/examples/vlm_ptq/README.md b/examples/vlm_ptq/README.md
index cdadb3374..1357ffc8f 100644
--- a/examples/vlm_ptq/README.md
+++ b/examples/vlm_ptq/README.md
@@ -56,7 +56,7 @@ Please refer to the [llm_ptq/README.md](../llm_ptq/README.md#current-out-of-the-
 
 Please refer to the [llm_ptq/README.md](../llm_ptq/README.md) about the details of model quantization.
 
-The following scripts provide an all-in-one and step-by-step model quantization example for Llava, VILA, Phi-3-vision and Qwen2.5-VL models. The quantization format and the number of GPUs will be supplied as inputs to these scripts. By default, we build the engine for the fp8 format and 1 GPU.
+The following scripts provide an all-in-one and step-by-step model quantization example for the supported Hugging Face multi-modal models. The quantization format and the number of GPUs will be supplied as inputs to these scripts.
 
 ### Hugging Face Example [Script](./scripts/huggingface_example.sh)
 
diff --git a/examples/vlm_ptq/utils.py b/examples/vlm_ptq/utils.py
deleted file mode 100644
index 496ce236b..000000000
--- a/examples/vlm_ptq/utils.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-def add_common_args(parser):
-    parser.add_argument("--max_new_tokens", type=int, default=128)
-    parser.add_argument("--batch_size", type=int, default=1)
-    parser.add_argument("--log_level", type=str, default="info")
-    parser.add_argument(
-        "--visual_engine_name", type=str, default="model.engine", help="Name of visual TRT engine"
-    )
-    parser.add_argument(
-        "--engine_dir",
-        type=str,
-        default=None,
-        help="Directory containing visual and LLM TRT engines",
-    )
-    parser.add_argument(
-        "--hf_model_dir", type=str, default=None, help="Directory containing tokenizer"
-    )
-    parser.add_argument(
-        "--input_text", type=str, nargs="+", default=None, help="Text prompt to LLM"
-    )
-    parser.add_argument("--num_beams", type=int, help="Use beam search if num_beams >1", default=1)
-    parser.add_argument("--top_k", type=int, default=1)
-    parser.add_argument("--top_p", type=float, default=0.0)
-    parser.add_argument("--temperature", type=float, default=1.0)
-    parser.add_argument("--repetition_penalty", type=float, default=1.0)
-    parser.add_argument(
-        "--run_profiling", action="store_true", help="Profile runtime over several iterations"
-    )
-    parser.add_argument(
-        "--profiling_iterations", type=int, help="Number of iterations to run profiling", default=20
-    )
-    parser.add_argument(
-        "--check_accuracy", action="store_true", help="Check correctness of text output"
-    )
-    parser.add_argument(
-        "--video_path",
-        type=str,
-        default=None,
-        help=(
-            "Path to your local video file, using 'llava-onevision-accuracy' to check the"
-            "Llava-OneVision model accuracy"
-        ),
-    )
-    parser.add_argument(
-        "--video_num_frames",
-        type=int,
-        help="The number of frames sampled from the video in the Llava-OneVision model.",
-        default=None,
-    )
-    parser.add_argument(
-        "--image_path",
-        type=str,
-        nargs="+",
-        default=None,
-        help="List of input image paths, separated by symbol",
-    )
-    parser.add_argument("--path_sep", type=str, default=",", help="Path separator symbol")
-    parser.add_argument("--prompt_sep", type=str, default=",", help="Prompt separator symbol")
-    parser.add_argument(
-        "--enable_context_fmha_fp32_acc",
-        action="store_true",
-        default=None,
-        help="Enable FMHA runner FP32 accumulation.",
-    )
-    parser.add_argument(
-        "--enable_chunked_context",
-        action="store_true",
-        help="Enables chunked context (only available with cpp session).",
-    )
-    parser.add_argument(
-        "--mm_embedding_offloading",
-        action="store_true",
-        help=(
-            "Enable position table offloading. When not specified, defaults to True if using with "
-            "--enable_chunked_context."
-        ),
-    )
-    parser.add_argument(
-        "--use_py_session",
-        default=False,
-        action="store_true",
-        help="Whether or not to use Python runtime session. By default C++ runtime session is used for the LLM.",
-    )
-    parser.add_argument(
-        "--kv_cache_free_gpu_memory_fraction",
-        default=0.8,
-        type=float,
-        help="Specify the free gpu memory fraction.",
-    )
-    parser.add_argument(
-        "--cross_kv_cache_fraction",
-        default=0.5,
-        type=float,
-        help=(
-            "Specify the kv cache fraction reserved for cross attention. Only applicable for"
-            "encoder-decoder models. By default 0.5 for self and 0.5 for cross."
-        ),
-    )
-    parser.add_argument(
-        "--multi_block_mode",
-        type=lambda s: s.lower()
-        in ("yes", "true", "t", "1"),  # custom boolean function to convert input string to boolean
-        default=True,
-        help="Distribute the work across multiple CUDA thread-blocks on the GPU for masked MHA kernel.",
-    )
-    parser.add_argument(
-        "--session",
-        default="cpp_llm_only",
-        type=str,
-        choices=["python", "cpp_llm_only", "cpp"],
-        help="Runtime used to run the models.\n"
-        "`cpp_llm_only`: vision engine run in python runtime, but LLM in pybind cpp runtime\n"
-        "`python`: everything runs in python runtime\n"
-        "`cpp`: everything runs in C++ runtime",
-    )
-    parser.add_argument(
-        "--lora_task_uids",
-        type=str,
-        default=None,
-        nargs="+",
-        help="The list of LoRA task uids; use -1 to disable the LoRA module",
-    )
-    return parser