Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
9f17dfb
add two more
Aug 27, 2025
947139b
add p0 llm gpu
Aug 27, 2025
58c2112
big update for clip
Aug 27, 2025
30130e1
update inference sample
Aug 28, 2025
f512590
rename qnn_system to target_system
Aug 28, 2025
135938b
update Olive
Aug 28, 2025
4850651
align llm
Aug 28, 2025
9d754a9
update qnn
Aug 28, 2025
276a0e9
use qnn
Aug 28, 2025
a1fe058
update amd
Aug 28, 2025
c9f99fc
update
Aug 28, 2025
c50ca92
3 more amds
Aug 28, 2025
28aac18
add google bert qnn
Aug 28, 2025
dcd55c9
Merge remote-tracking branch 'origin/main' into hualxie/update_p0
Sep 1, 2025
5d2d1be
Merge remote-tracking branch 'origin/main' into hualxie/update_p0
Sep 1, 2025
24764b4
update script
Sep 1, 2025
46d7e23
remove amd llm check
Sep 1, 2025
22577f6
update qnn llm
Sep 1, 2025
5445764
update metrics
Sep 1, 2025
4a277cd
add UX
Sep 1, 2025
1d8ae56
unused
Sep 1, 2025
2c1a612
updates
Sep 1, 2025
5f8d270
revert qwen due to wmic not found
Sep 1, 2025
be6f116
update google bert qnn
Sep 1, 2025
2a17db2
nit
Sep 1, 2025
0a04f30
Merge remote-tracking branch 'origin/main' into hualxie/update_p0
Sep 1, 2025
4f859bb
revert for inference sample
Sep 1, 2025
9ce0ba6
revert
Sep 1, 2025
28aa7a1
fix dim
Sep 1, 2025
3fefaa6
nit
Sep 1, 2025
90461d3
update QNN_LLM
Sep 2, 2025
570470b
a
Sep 2, 2025
b6cd368
add AMD_Quark
Sep 2, 2025
a61b4f6
update reqs
Sep 2, 2025
a2d30f2
use amd quark
Sep 2, 2025
def95e8
revert version
Sep 2, 2025
49aaabe
remove copy_from_recipe
Sep 2, 2025
f5a630d
use evalRuntime
Sep 2, 2025
5bdda0c
Merge branch 'hualxie/update_p0' of https://github.com/microsoft/oliv…
Sep 2, 2025
6b729ba
Merge remote-tracking branch 'origin/main' into hualxie/update_p0
Sep 8, 2025
fa9b7ae
align
Sep 8, 2025
aa6c1e3
add relative check
Sep 8, 2025
0235643
Merge remote-tracking branch 'origin/main' into hualxie/update_p0
Dec 17, 2025
2cc1974
temp
Dec 17, 2025
5e06583
fix
Dec 17, 2025
6cc02ca
clean up
Dec 17, 2025
5ce2d7d
worked!
Dec 17, 2025
456a023
Merge remote-tracking branch 'origin/main' into hualxie/update_p0
Jan 27, 2026
4e60676
add venv
Jan 27, 2026
df1681f
fix device
Jan 27, 2026
71edc1b
update more
Jan 27, 2026
f4cfd9a
use CPU
Jan 27, 2026
115b961
fix requirements
Jan 27, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .aitk/configs/checks.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"configCheck": 128,
"copyCheck": 182,
"copyCheck": 179,
"extensionCheck": 1,
"gitignoreCheck": 38,
"inferenceModelCheck": 25,
Expand All @@ -9,8 +9,8 @@
"modelProjectCheck": 39,
"oliveCheck": 36,
"oliveJsonCheck": 128,
"pathCheck": 1105,
"pathCheck": 1080,
"requirementsCheck": 37,
"templateCheck": 1,
"venvRequirementsCheck": 11
"venvRequirementsCheck": 12
}
14 changes: 9 additions & 5 deletions .aitk/configs/model_list.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"architecture": "Transformer",
"status": "Ready",
"relativePath": "microsoft-Phi-3.5-mini-instruct/aitk",
"version": 4,
"version": 5,
"p0": true
},
{
Expand Down Expand Up @@ -60,7 +60,7 @@
"architecture": "Transformer",
"status": "Ready",
"relativePath": "deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk",
"version": 4,
"version": 5,
"p0": true
},
{
Expand Down Expand Up @@ -164,7 +164,7 @@
"architecture": "Transformer",
"status": "Ready",
"relativePath": "meta-llama-Llama-3.2-1B-Instruct/aitk",
"version": 4,
"version": 5,
"p0": true
},
{
Expand Down Expand Up @@ -228,7 +228,7 @@
"architecture": "Transformer",
"status": "Ready",
"relativePath": "Qwen-Qwen2.5-1.5B-Instruct/aitk",
"version": 4,
"version": 5,
"p0": true
},
{
Expand Down Expand Up @@ -680,7 +680,8 @@
"wikipedia": "https://huggingface.co/datasets/wikimedia/wikipedia",
"google-research-datasets/conceptual_captions": "https://huggingface.co/datasets/google-research-datasets/conceptual_captions",
"AIMClab-RUC/COCO-CN": "https://huggingface.co/datasets/AIMClab-RUC/COCO-CN",
"librispeech_asr": "https://huggingface.co/datasets/openslr/librispeech_asr"
"librispeech_asr": "https://huggingface.co/datasets/openslr/librispeech_asr",
"pileval_for_awq_benchmark": "https://huggingface.co/datasets/mit-han-lab/pile-val-backup"
},
"LoginRequiredDatasets": [
"imagenet-1k"
Expand Down Expand Up @@ -759,6 +760,9 @@
"uint8": "a8",
"int16": "a16",
"uint16": "a16"
},
"QuarkDataType": {
"bfloat16": "bf16"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"type": "LocalSystem",
"accelerators": [
{
"device": "cpu",
"execution_providers": [
"CPUExecutionProvider"
]
Expand Down
1 change: 1 addition & 0 deletions .aitk/non_model_projects/templates/empty/sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"type": "LocalSystem",
"accelerators": [
{
"device": "cpu",
"execution_providers": [
"CPUExecutionProvider"
]
Expand Down
104 changes: 104 additions & 0 deletions .aitk/requirements/AMD/Quark_py3.10.17.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
--extra-index-url https://download.pytorch.org/whl/cu128
--extra-index-url=https://pypi.amd.com/simple
# accelerate==1.9.0
accelerate==1.9.0
aiohappyeyeballs==2.6.1
aiohttp==3.12.15
aiosignal==1.4.0
alembic==1.16.4
annotated-types==0.7.0
async-timeout==5.0.1
attrs==25.3.0
certifi==2025.8.3
charset-normalizer==3.4.2
colorama==0.4.6
coloredlogs==15.0.1
colorlog==6.9.0
# datasets==3.5.0
datasets==3.5.0
dill==0.3.8
# amd-quark==0.9
# uvpip:install amd-quark==0.9;post;{"UV_INSECURE_NO_ZIP_VALIDATION":"1"}
evaluate==0.4.5
filelock==3.18.0
flatbuffers==25.2.10
frozenlist==1.7.0
fsspec==2024.12.0
greenlet==3.2.3
# not in requires
hf-xet==1.1.5
huggingface-hub==0.34.4
# huggingface-hub[hf_xet]==0.34.4
huggingface-hub[hf_xet]==0.34.4
humanfriendly==10.0
idna==3.10
jinja2==3.1.6
lightning-utilities==0.15.1
mako==1.3.10
markdown-it-py==3.0.0
markupsafe==3.0.2
mdurl==0.1.2
ml-dtypes==0.5.3
# model-generate==1.5.1
model-generate==1.5.1
mpmath==1.3.0
multidict==6.6.3
multiprocess==0.70.16
networkx==3.4.2
ninja==1.11.1.4
numpy==2.1.3
# olive-ai@git+https://github.com/microsoft/Olive.git@8365802b68c32725418ae2c8999b9a90af0d41e0#egg=olive-ai
olive-ai@git+https://github.com/microsoft/Olive.git@8b44cf49e03b8c4bf5d9f31ec3aa6a5f22d7677d#egg=olive-ai
# onnx==1.17.0
onnx==1.17.0
onnx-ir==0.1.10
onnx-tool==0.9.0
onnxruntime==1.21.1
onnxruntime-extensions==0.14.0
onnxruntime-genai==0.7.1
onnxscript==0.5.3
onnxsim==0.4.36
optuna==4.4.0
packaging==25.0
pandas==2.3.1
propcache==0.3.2
protobuf==6.31.1
# psutil==7.0.0
psutil==7.0.0
pyarrow==21.0.0
pydantic==2.11.7
pydantic-core==2.33.2
pygments==2.19.2
pyreadline3==3.5.4
python-dateutil==2.9.0.post0
pytz==2025.2
pyyaml==6.0.2
regex==2025.7.34
requests==2.32.4
rich==14.1.0
ryzenai-dynamic-dispatch==1.5.1
ryzenai-onnx-utils==1.5.1
safetensors==0.5.3
scipy==1.15.3
sentencepiece==0.2.0
setuptools==80.9.0
six==1.17.0
sqlalchemy==2.0.42
sympy==1.14.0
# tabulate==0.9.0
tabulate==0.9.0
tokenizers==0.21.4
tomli==2.2.1
# torch==2.7.0+cu128
torch==2.7.0+cu128
torchmetrics==1.7.0
tqdm==4.67.1
# transformers==4.50.0
transformers==4.50.0
typing-extensions==4.14.1
typing-inspection==0.4.1
tzdata==2025.2
urllib3==2.5.0
xxhash==3.5.0
yarl==1.20.1
zstandard==0.23.0
1 change: 1 addition & 0 deletions .aitk/scripts/sanitize/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ class OlivePropertyNames:
DataConfigs = "data_configs"
DataName = "data_name"
Dataset = "dataset"
DataType = "data_type"
Device = "device"
Engine = "engine"
EvaluateInputModel = "evaluate_input_model"
Expand Down
4 changes: 2 additions & 2 deletions .aitk/scripts/sanitize/file_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def checkSystem(oliveJsonFile: str, system):
printError(f"{oliveJsonFile} should have only one accelerator")
return False
if OlivePropertyNames.Device not in accelerators[0]:
printWarning(f"{oliveJsonFile} accelerator should have device")
# return False
printError(f"{oliveJsonFile} accelerator should have device")
return False
eps = accelerators[0][OlivePropertyNames.ExecutionProviders]
if len(eps) != 1:
printError(f"{oliveJsonFile} should have only one execution provider")
Expand Down
13 changes: 11 additions & 2 deletions .aitk/scripts/sanitize/generator_amd.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,13 @@ def generate_quantization_config(
return None


def generate_amd_quantization_config(configFile: Path, modelList: ModelList) -> Optional[Section]:
def generate_amd_quantization_config(configFile: Path, modelList: ModelList, parameter: ModelParameter) -> Optional[Section]:
with open_ex(configFile, "r") as f:
content = json.load(f)
parameters = []
for k, v in content[OlivePropertyNames.Passes].items():
if v[OlivePropertyNames.Type].lower() == OlivePassNames.QuarkQuantization:
# https://github.com/amd/Quark/blob/0a542692aa39181b7ab0ae77246cb537a0f97791/examples/onnx/accuracy_improvement/quarot/data_preparation.py#L78
data_name = v.get(OlivePropertyNames.Dataset)
if data_name:
parameters.append(
Expand All @@ -162,6 +163,14 @@ def generate_amd_quantization_config(configFile: Path, modelList: ModelList) ->
),
)
)
data_type = v.get(OlivePropertyNames.DataType)
if data_type:
parameter.optimizationPaths.append(
OptimizationPath(
name="QuarkDataType",
path=f"{OlivePropertyNames.Passes}.{k}.{OlivePropertyNames.DataType}",
)
)
break

if parameters:
Expand Down Expand Up @@ -196,7 +205,7 @@ def generator_amd(id: str, recipe, folder: Path, modelList: ModelList):
parameter = create_model_parameter(aitk, name, configFile)
parameter.isLLM = isLLM

quantize = generate_amd_quantization_config(configFile, modelList)
quantize = generate_amd_quantization_config(configFile, modelList, parameter)
if not quantize:
quantize = generate_quantization_config(configFile, modelList, parameter)
if quantize:
Expand Down
20 changes: 14 additions & 6 deletions .aitk/scripts/sanitize/model_parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,15 @@ def Check(
sectionId: int,
oliveJson: Any,
modelList: ModelList,
emptyAllowed: bool
):
if not self.name:
return False
# if not self.description:
# return False
# TODO add place holder for General?
if not self.parameters and self.phase != PhaseTypeEnum.Conversion:
printWarning(f"{_file} self.parameters is empty for {self.phase}.")
if not emptyAllowed and not self.parameters:
printError(f"{_file} self.parameters is empty for {self.phase}.")

for i, parameter in enumerate(self.parameters):
if parameter.template:
Expand Down Expand Up @@ -229,6 +230,8 @@ class ModelParameter(BaseModelClass):
# For template using CUDA and no runtime overwrite, we need to set this so we know the target EP
evalRuntime: Optional[RuntimeEnum] = None
evalMetrics: Optional[Dict[str, str]] = None
# when we only use random data for evaluation latency
evalNoDataConfig: Optional[bool] = None
debugInfo: Optional[DebugInfo] = None
# A SHORTCUT FOR SEVERAL PARAMETERS
# This kind of config will
Expand Down Expand Up @@ -406,7 +409,8 @@ def Check(self, templates: Dict[str, Parameter], oliveJson: Any, modelList: Mode
if not checkPath(f"{OlivePropertyNames.Evaluators}.{evaluatorName}", oliveJson):
printError(f"{self._file} does not have evaluator {evaluatorName}")

if not section.Check(templates, self._file or "", tmpDevice, oliveJson, modelList):
emptyAllowed = section.phase == PhaseTypeEnum.Conversion or (section.phase == PhaseTypeEnum.Evaluation and self.evalNoDataConfig)
if not section.Check(templates, self._file or "", tmpDevice, oliveJson, modelList, emptyAllowed):
printError(f"{self._file} section {tmpDevice} has error")

if (
Expand All @@ -426,7 +430,7 @@ def Check(self, templates: Dict[str, Parameter], oliveJson: Any, modelList: Mode
if self.evalMetrics and len(self.evalMetrics) > 2:
printError(f"{self._file} evalMetrics should not have more than 2 metrics")

self.checkPhase(oliveJson)
self.checkPhase(oliveJson, self.evalNoDataConfig or False)
self.CheckRuntimeInConversion(oliveJson, modelList, modelInfo)
self.checkOliveFile(oliveJson, modelInfo)
self.checkRequirements(modelList)
Expand Down Expand Up @@ -570,7 +574,7 @@ def addRuntimeInConversion(runtime: Parameter, path: str, values: List[Any]):
if not self.runtimeInConversion.Check(False, oliveJson, modelList):
printError(f"{self._file} runtime in conversion has error")

def checkPhase(self, oliveJson: Any):
def checkPhase(self, oliveJson: Any, evalNoDataConfig: bool):
allPhases = [section.phase for section in self.sections]
if len(allPhases) == 1 and allPhases[0] == PhaseTypeEnum.Conversion:
pass
Expand All @@ -593,22 +597,26 @@ def checkPhase(self, oliveJson: Any):
if (
PhaseTypeEnum.Evaluation in allPhases
and PhaseTypeEnum.Quantization in allPhases
and not evalNoDataConfig
and (OlivePropertyNames.DataConfigs not in oliveJson or len(oliveJson[OlivePropertyNames.DataConfigs]) != 2)
):
printWarning(f"{self._file}'s olive json should have two data configs for evaluation")
printError(f"{self._file}'s olive json should have two data configs for evaluation")

def checkOliveFile(self, oliveJson: Any, modelInfo: ModelInfo):
if modelInfo.extension:
return
if modelInfo.template:
return
if self.aitkPython:
return
if not self.oliveFile:
if (
self.runtime
and self.runtime.displayNames
and self.runtime.displayNames[0]
in [
GlobalVars.RuntimeToDisplayName[RuntimeEnum.DML],
# TODO this warning it is useless now
GlobalVars.RuntimeToDisplayName[RuntimeEnum.AMDGPU],
GlobalVars.RuntimeToDisplayName[RuntimeEnum.IntelCPU],
GlobalVars.RuntimeToDisplayName[RuntimeEnum.IntelGPU],
Expand Down
14 changes: 0 additions & 14 deletions Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,6 @@
}
]
},
{
"src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json",
"dst": "qwen2_5_vitis_ai_config.json",
"replacements": [
{
"find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"replace": "Qwen/Qwen2.5-1.5B-Instruct"
},
{
"find": "model/deepseek",
"replace": "model/qwen2_5"
}
]
},
{
"src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json",
"dst": "qwen2_5_trtrtx_config.json",
Expand Down
7 changes: 2 additions & 5 deletions Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@ recipes:
ep: VitisAIExecutionProvider
aitk:
oliveFile: "VitisAI/Qwen2.5-1.5B-Instruct_quark_vitisai_llm.json"
requirementsPatches:
- AutoGptq
runtimeOverwrite:
executeEp: CUDAExecutionProvider
requirements: AMD/Quark_py3.10.17
evalRuntime: AMDNPU
- file: "qwen2_5_ov_gpu_config.json"
devices:
Expand All @@ -37,7 +34,7 @@ recipes:
aitk:
modelInfo:
id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
version: 4
version: 5
groupId: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
groupItemName: "1.5B"
p0: true
2 changes: 1 addition & 1 deletion Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@
],
"modelInfo": {
"id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct",
"version": 4
"version": 5
}
}
1 change: 1 addition & 0 deletions Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"python_environment_path": "/path/to/qnn/env/bin",
"accelerators": [
{
"device": "npu",
"execution_providers": [
"QNNExecutionProvider"
]
Expand Down
Loading
Loading