microsoft · xieofxie · Aug 27, 2025 · Aug 27, 2025 · Aug 27, 2025 · Aug 28, 2025
@@ -1,6 +1,6 @@
 {
     "configCheck": 128,
-    "copyCheck": 182,
+    "copyCheck": 179,
     "extensionCheck": 1,
     "gitignoreCheck": 38,
     "inferenceModelCheck": 25,
@@ -9,8 +9,8 @@
     "modelProjectCheck": 39,
     "oliveCheck": 36,
     "oliveJsonCheck": 128,
-    "pathCheck": 1105,
+    "pathCheck": 1080,
     "requirementsCheck": 37,
     "templateCheck": 1,
-    "venvRequirementsCheck": 11
+    "venvRequirementsCheck": 12
 }
@@ -17,7 +17,7 @@
             "architecture": "Transformer",
             "status": "Ready",
             "relativePath": "microsoft-Phi-3.5-mini-instruct/aitk",
-            "version": 4,
+            "version": 5,
             "p0": true
         },
         {
@@ -60,7 +60,7 @@
             "architecture": "Transformer",
             "status": "Ready",
             "relativePath": "deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk",
-            "version": 4,
+            "version": 5,
             "p0": true
         },
         {
@@ -164,7 +164,7 @@
             "architecture": "Transformer",
             "status": "Ready",
             "relativePath": "meta-llama-Llama-3.2-1B-Instruct/aitk",
-            "version": 4,
+            "version": 5,
             "p0": true
         },
         {
@@ -228,7 +228,7 @@
             "architecture": "Transformer",
             "status": "Ready",
             "relativePath": "Qwen-Qwen2.5-1.5B-Instruct/aitk",
-            "version": 4,
+            "version": 5,
             "p0": true
         },
         {
@@ -680,7 +680,8 @@
         "wikipedia": "https://huggingface.co/datasets/wikimedia/wikipedia",
         "google-research-datasets/conceptual_captions": "https://huggingface.co/datasets/google-research-datasets/conceptual_captions",
         "AIMClab-RUC/COCO-CN": "https://huggingface.co/datasets/AIMClab-RUC/COCO-CN",
-        "librispeech_asr": "https://huggingface.co/datasets/openslr/librispeech_asr"
+        "librispeech_asr": "https://huggingface.co/datasets/openslr/librispeech_asr",
+        "pileval_for_awq_benchmark": "https://huggingface.co/datasets/mit-han-lab/pile-val-backup"
     },
     "LoginRequiredDatasets": [
         "imagenet-1k"
@@ -759,6 +760,9 @@
             "uint8": "a8",
             "int16": "a16",
             "uint16": "a16"
+        },
+        "QuarkDataType": {
+            "bfloat16": "bf16"
         }
     }
 }
@@ -4,6 +4,7 @@
             "type": "LocalSystem",
             "accelerators": [
                 {
+                    "device": "cpu",
                     "execution_providers": [
                         "CPUExecutionProvider"
                     ]

@@ -9,6 +9,7 @@
             "type": "LocalSystem",
             "accelerators": [
                 {
+                    "device": "cpu",
                     "execution_providers": [
                         "CPUExecutionProvider"
                     ]

@@ -0,0 +1,104 @@
+--extra-index-url https://download.pytorch.org/whl/cu128
+--extra-index-url=https://pypi.amd.com/simple
+# accelerate==1.9.0
+accelerate==1.9.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+alembic==1.16.4
+annotated-types==0.7.0
+async-timeout==5.0.1
+attrs==25.3.0
+certifi==2025.8.3
+charset-normalizer==3.4.2
+colorama==0.4.6
+coloredlogs==15.0.1
+colorlog==6.9.0
+# datasets==3.5.0
+datasets==3.5.0
+dill==0.3.8
+# amd-quark==0.9
+# uvpip:install amd-quark==0.9;post;{"UV_INSECURE_NO_ZIP_VALIDATION":"1"}
+evaluate==0.4.5
+filelock==3.18.0
+flatbuffers==25.2.10
+frozenlist==1.7.0
+fsspec==2024.12.0
+greenlet==3.2.3
+# not in requires
+hf-xet==1.1.5
+huggingface-hub==0.34.4
+# huggingface-hub[hf_xet]==0.34.4
+huggingface-hub[hf_xet]==0.34.4
+humanfriendly==10.0
+idna==3.10
+jinja2==3.1.6
+lightning-utilities==0.15.1
+mako==1.3.10
+markdown-it-py==3.0.0
+markupsafe==3.0.2
+mdurl==0.1.2
+ml-dtypes==0.5.3
+# model-generate==1.5.1
+model-generate==1.5.1
+mpmath==1.3.0
+multidict==6.6.3
+multiprocess==0.70.16
+networkx==3.4.2
+ninja==1.11.1.4
+numpy==2.1.3
+# olive-ai@git+https://github.com/microsoft/Olive.git@8365802b68c32725418ae2c8999b9a90af0d41e0#egg=olive-ai
+olive-ai@git+https://github.com/microsoft/Olive.git@8b44cf49e03b8c4bf5d9f31ec3aa6a5f22d7677d#egg=olive-ai
+# onnx==1.17.0
+onnx==1.17.0
+onnx-ir==0.1.10
+onnx-tool==0.9.0
+onnxruntime==1.21.1
+onnxruntime-extensions==0.14.0
+onnxruntime-genai==0.7.1
+onnxscript==0.5.3
+onnxsim==0.4.36
+optuna==4.4.0
+packaging==25.0
+pandas==2.3.1
+propcache==0.3.2
+protobuf==6.31.1
+# psutil==7.0.0
+psutil==7.0.0
+pyarrow==21.0.0
+pydantic==2.11.7
+pydantic-core==2.33.2
+pygments==2.19.2
+pyreadline3==3.5.4
+python-dateutil==2.9.0.post0
+pytz==2025.2
+pyyaml==6.0.2
+regex==2025.7.34
+requests==2.32.4
+rich==14.1.0
+ryzenai-dynamic-dispatch==1.5.1
+ryzenai-onnx-utils==1.5.1
+safetensors==0.5.3
+scipy==1.15.3
+sentencepiece==0.2.0
+setuptools==80.9.0
+six==1.17.0
+sqlalchemy==2.0.42
+sympy==1.14.0
+# tabulate==0.9.0
+tabulate==0.9.0
+tokenizers==0.21.4
+tomli==2.2.1
+# torch==2.7.0+cu128
+torch==2.7.0+cu128
+torchmetrics==1.7.0
+tqdm==4.67.1
+# transformers==4.50.0
+transformers==4.50.0
+typing-extensions==4.14.1
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.5.0
+xxhash==3.5.0
+yarl==1.20.1
+zstandard==0.23.0
@@ -132,6 +132,7 @@ class OlivePropertyNames:
     DataConfigs = "data_configs"
     DataName = "data_name"
     Dataset = "dataset"
+    DataType = "data_type"
     Device = "device"
     Engine = "engine"
     EvaluateInputModel = "evaluate_input_model"

@@ -62,8 +62,8 @@ def checkSystem(oliveJsonFile: str, system):
         printError(f"{oliveJsonFile} should have only one accelerator")
         return False
     if OlivePropertyNames.Device not in accelerators[0]:
-        printWarning(f"{oliveJsonFile} accelerator should have device")
-        # return False
+        printError(f"{oliveJsonFile} accelerator should have device")
+        return False
     eps = accelerators[0][OlivePropertyNames.ExecutionProviders]
     if len(eps) != 1:
         printError(f"{oliveJsonFile} should have only one execution provider")

@@ -133,12 +133,13 @@ def generate_quantization_config(
     return None
 
 
-def generate_amd_quantization_config(configFile: Path, modelList: ModelList) -> Optional[Section]:
+def generate_amd_quantization_config(configFile: Path, modelList: ModelList, parameter: ModelParameter) -> Optional[Section]:
     with open_ex(configFile, "r") as f:
         content = json.load(f)
     parameters = []
     for k, v in content[OlivePropertyNames.Passes].items():
         if v[OlivePropertyNames.Type].lower() == OlivePassNames.QuarkQuantization:
+            # https://github.com/amd/Quark/blob/0a542692aa39181b7ab0ae77246cb537a0f97791/examples/onnx/accuracy_improvement/quarot/data_preparation.py#L78
             data_name = v.get(OlivePropertyNames.Dataset)
             if data_name:
                 parameters.append(
@@ -162,6 +163,14 @@ def generate_amd_quantization_config(configFile: Path, modelList: ModelList) ->
                         ),
                     )
                 )
+            data_type = v.get(OlivePropertyNames.DataType)
+            if data_type:
+                parameter.optimizationPaths.append(
+                    OptimizationPath(
+                        name="QuarkDataType",
+                        path=f"{OlivePropertyNames.Passes}.{k}.{OlivePropertyNames.DataType}",
+                    )
+                )
             break
 
     if parameters:
@@ -196,7 +205,7 @@ def generator_amd(id: str, recipe, folder: Path, modelList: ModelList):
     parameter = create_model_parameter(aitk, name, configFile)
     parameter.isLLM = isLLM
 
-    quantize = generate_amd_quantization_config(configFile, modelList)
+    quantize = generate_amd_quantization_config(configFile, modelList, parameter)
     if not quantize:
         quantize = generate_quantization_config(configFile, modelList, parameter)
     if quantize:

@@ -88,14 +88,15 @@ def Check(
         sectionId: int,
         oliveJson: Any,
         modelList: ModelList,
+        emptyAllowed: bool
     ):
         if not self.name:
             return False
         # if not self.description:
         #    return False
         # TODO add place holder for General?
-        if not self.parameters and self.phase != PhaseTypeEnum.Conversion:
-            printWarning(f"{_file} self.parameters is empty for {self.phase}.")
+        if not emptyAllowed and not self.parameters:
+            printError(f"{_file} self.parameters is empty for {self.phase}.")
 
         for i, parameter in enumerate(self.parameters):
             if parameter.template:
@@ -229,6 +230,8 @@ class ModelParameter(BaseModelClass):
     # For template using CUDA and no runtime overwrite, we need to set this so we know the target EP
     evalRuntime: Optional[RuntimeEnum] = None
     evalMetrics: Optional[Dict[str, str]] = None
+    # when we only use random data for evaluation latency
+    evalNoDataConfig: Optional[bool] = None
     debugInfo: Optional[DebugInfo] = None
     # A SHORTCUT FOR SEVERAL PARAMETERS
     # This kind of config will
@@ -406,7 +409,8 @@ def Check(self, templates: Dict[str, Parameter], oliveJson: Any, modelList: Mode
                 if not checkPath(f"{OlivePropertyNames.Evaluators}.{evaluatorName}", oliveJson):
                     printError(f"{self._file} does not have evaluator {evaluatorName}")
 
-            if not section.Check(templates, self._file or "", tmpDevice, oliveJson, modelList):
+            emptyAllowed = section.phase == PhaseTypeEnum.Conversion or (section.phase == PhaseTypeEnum.Evaluation and self.evalNoDataConfig)
+            if not section.Check(templates, self._file or "", tmpDevice, oliveJson, modelList, emptyAllowed):
                 printError(f"{self._file} section {tmpDevice} has error")
 
         if (
@@ -426,7 +430,7 @@ def Check(self, templates: Dict[str, Parameter], oliveJson: Any, modelList: Mode
         if self.evalMetrics and len(self.evalMetrics) > 2:
             printError(f"{self._file} evalMetrics should not have more than 2 metrics")
 
-        self.checkPhase(oliveJson)
+        self.checkPhase(oliveJson, self.evalNoDataConfig or False)
         self.CheckRuntimeInConversion(oliveJson, modelList, modelInfo)
         self.checkOliveFile(oliveJson, modelInfo)
         self.checkRequirements(modelList)
@@ -570,7 +574,7 @@ def addRuntimeInConversion(runtime: Parameter, path: str, values: List[Any]):
             if not self.runtimeInConversion.Check(False, oliveJson, modelList):
                 printError(f"{self._file} runtime in conversion has error")
 
-    def checkPhase(self, oliveJson: Any):
+    def checkPhase(self, oliveJson: Any, evalNoDataConfig: bool):
         allPhases = [section.phase for section in self.sections]
         if len(allPhases) == 1 and allPhases[0] == PhaseTypeEnum.Conversion:
             pass
@@ -593,22 +597,26 @@ def checkPhase(self, oliveJson: Any):
         if (
             PhaseTypeEnum.Evaluation in allPhases
             and PhaseTypeEnum.Quantization in allPhases
+            and not evalNoDataConfig
             and (OlivePropertyNames.DataConfigs not in oliveJson or len(oliveJson[OlivePropertyNames.DataConfigs]) != 2)
         ):
-            printWarning(f"{self._file}'s olive json should have two data configs for evaluation")
+            printError(f"{self._file}'s olive json should have two data configs for evaluation")
 
     def checkOliveFile(self, oliveJson: Any, modelInfo: ModelInfo):
         if modelInfo.extension:
             return
         if modelInfo.template:
             return
+        if self.aitkPython:
+            return
         if not self.oliveFile:
             if (
                 self.runtime
                 and self.runtime.displayNames
                 and self.runtime.displayNames[0]
                 in [
                     GlobalVars.RuntimeToDisplayName[RuntimeEnum.DML],
+                    # TODO this warning it is useless now
                     GlobalVars.RuntimeToDisplayName[RuntimeEnum.AMDGPU],
                     GlobalVars.RuntimeToDisplayName[RuntimeEnum.IntelCPU],
                     GlobalVars.RuntimeToDisplayName[RuntimeEnum.IntelGPU],

@@ -14,20 +14,6 @@
                 }
             ]
         },
-        {
-            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json",
-            "dst": "qwen2_5_vitis_ai_config.json",
-            "replacements": [
-                {
-                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-                    "replace": "Qwen/Qwen2.5-1.5B-Instruct"
-                },
-                {
-                    "find": "model/deepseek",
-                    "replace": "model/qwen2_5"
-                }
-            ]
-        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json",
             "dst": "qwen2_5_trtrtx_config.json",

@@ -12,10 +12,7 @@ recipes:
       ep: VitisAIExecutionProvider
       aitk:
         oliveFile: "VitisAI/Qwen2.5-1.5B-Instruct_quark_vitisai_llm.json"
-        requirementsPatches:
-          - AutoGptq
-        runtimeOverwrite:
-          executeEp: CUDAExecutionProvider
+        requirements: AMD/Quark_py3.10.17
         evalRuntime: AMDNPU
     - file: "qwen2_5_ov_gpu_config.json"
       devices:
@@ -37,7 +34,7 @@ recipes:
 aitk:
     modelInfo:
         id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
-        version: 4
+        version: 5
         groupId: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
         groupItemName: "1.5B"
         p0: true
@@ -27,6 +27,6 @@
     ],
     "modelInfo": {
         "id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct",
-        "version": 4
+        "version": 5
     }
 }
@@ -9,6 +9,7 @@
             "python_environment_path": "/path/to/qnn/env/bin",
             "accelerators": [
                 {
+                    "device": "npu",
                     "execution_providers": [
                         "QNNExecutionProvider"
                     ]