Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4bb42a4
add rtx recipe
chinazhangchao Aug 6, 2025
a5cfa12
add break
chinazhangchao Aug 6, 2025
b2a5196
Merge branch 'main' of https://github.com/microsoft/olive-recipes int…
chinazhangchao Aug 7, 2025
1db3630
merge main
chinazhangchao Aug 8, 2025
1935615
change req
chinazhangchao Aug 8, 2025
7becba2
merge main
chinazhangchao Aug 15, 2025
72a4bf4
add webgpu recipe
chinazhangchao Aug 19, 2025
38d6ff4
align with AITK
chinazhangchao Aug 19, 2025
60caa99
fix llm webgpu precision
chinazhangchao Aug 19, 2025
ecc64cf
add all webgpu recipes
chinazhangchao Aug 20, 2025
355b2fe
update olive, fix clip
chinazhangchao Aug 20, 2025
0a9345f
fix comments
chinazhangchao Aug 20, 2025
599c358
merge main
chinazhangchao Aug 25, 2025
ccbac48
update readme
chinazhangchao Aug 26, 2025
79b5e58
Merge branch 'main' into chao/trtrtx
chinazhangchao Aug 26, 2025
3696f21
merge main
chinazhangchao Aug 28, 2025
1cd193e
merge main
chinazhangchao Oct 14, 2025
025f9e9
sanitize
chinazhangchao Oct 14, 2025
3c8dfcd
fix conflict
chinazhangchao Oct 14, 2025
943491f
merge main
chinazhangchao Oct 16, 2025
7c361f8
fix merge issue
chinazhangchao Oct 16, 2025
47c0e63
fix lint
chinazhangchao Oct 20, 2025
f9c83e6
Merge branch 'main' of https://github.com/microsoft/olive-recipes int…
chinazhangchao Oct 20, 2025
a18458f
merge main
chinazhangchao Oct 22, 2025
5fee543
Merge branch 'main' into chao/webgpu
chinazhangchao Oct 27, 2025
86a4d15
Merge branch 'main' into chao/webgpu
chinazhangchao Oct 28, 2025
5ddeb45
Merge branch 'main' of https://github.com/microsoft/Olive-recipes int…
chinazhangchao Oct 30, 2025
e7ad1ba
merge main
chinazhangchao Nov 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 22 additions & 11 deletions .aitk/configs/model_list.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "Transformer",
"status": "Ready",
Expand All @@ -33,7 +34,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "CNN",
"status": "Ready",
Expand All @@ -55,7 +57,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "Transformer",
"status": "Ready",
Expand All @@ -76,7 +79,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "Transformer",
"status": "Ready",
Expand All @@ -97,7 +101,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "Transformer",
"status": "Ready",
Expand All @@ -118,7 +123,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "Transformer",
"status": "Ready",
Expand All @@ -139,7 +145,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "Transformer",
"status": "Ready",
Expand All @@ -159,7 +166,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "Transformer",
"status": "Ready",
Expand All @@ -180,7 +188,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "Transformer",
"status": "Ready",
Expand All @@ -201,7 +210,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "Transformer",
"status": "Ready",
Expand All @@ -223,7 +233,8 @@
"IntelCPU",
"IntelGPU",
"IntelNPU",
"DML"
"DML",
"WebGPU"
],
"architecture": "Transformer",
"status": "Ready",
Expand Down
4 changes: 2 additions & 2 deletions .aitk/docs/guide/ModelList.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@
| [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json) |
| [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json) |
| [Clip Vit Large Patch14](https://huggingface.co/openai/clip-vit-large-patch14) | [Qualcomm NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-large-patch14/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-large-patch14/aitk/openai_clip_dml.json) |
| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json) |
| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json) |
| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json), [WebGPU](../../../microsoft-resnet-50/aitk/resnet_webgpu.json) |
| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json), [WebGPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json) |
2 changes: 1 addition & 1 deletion Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [Qwen2.5-1.5B-Instruct](htt
+ This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
- OpenVINO for Intel® CPU/GPU/NPU
+ This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
- Float downcasting for NVIDIA TRT for RTX GPU
- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
- DML for general GPU
+ This process uses AutoAWQ and ModelBuilder

Expand Down
19 changes: 19 additions & 0 deletions Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,25 @@
"dst": "qwen2_5_dml_config.json.config",
"replacements": []
},
{
"src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json",
"dst": "qwen2_5_webgpu_config.json",
"replacements": [
{
"find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"replace": "Qwen/Qwen2.5-1.5B-Instruct"
},
{
"find": "model/deepseek",
"replace": "model/qwen2_5"
}
]
},
{
"src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config",
"dst": "qwen2_5_webgpu_config.json.config",
"replacements": []
},
{
"src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
"dst": "README.md",
Expand Down
6 changes: 6 additions & 0 deletions Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ recipes:
- file: "qwen2_5_dml_config.json"
device: gpu
ep: DmlExecutionProvider
- file: "qwen2_5_webgpu_config.json"
device: gpu
ep: WebGpuExecutionProvider
- file: "qwen2_5_migraphx_config.json"
device: gpu
ep: MIGraphXExecutionProvider
aitk:
modelInfo:
id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
Expand Down
8 changes: 8 additions & 0 deletions Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@
{
"file": "qwen2_5_dml_config.json",
"templateName": "qwen2_5_dml_config"
},
{
"file": "qwen2_5_webgpu_config.json",
"templateName": "qwen2_5_webgpu_config"
},
{
"file": "qwen2_5_migraphx_config.json",
"templateName": "qwen2_5_migraphx_config"
}
],
"modelInfo": {
Expand Down
38 changes: 38 additions & 0 deletions Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"input_model": {
"type": "HfModel",
"model_path": "Qwen/Qwen2.5-1.5B-Instruct"
},
"systems": {
"local_system": {
"type": "LocalSystem",
"accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
}
},
"data_configs": [
{
"name": "wikitext2_train",
"type": "HuggingfaceContainer",
"load_dataset_config": {
"data_name": "wikitext",
"subset": "wikitext-2-raw-v1",
"split": "train"
},
"pre_process_data_config": {
"strategy": "line-by-line",
"add_special_tokens": false,
"max_samples": 128,
"max_seq_len": 512
}
}
],
"passes": {
"builder": { "type": "ModelBuilder", "precision": "int4" }
},
"target": "local_system",
"log_severity_level": 1,
"output_dir": "model/qwen2_5",
"cache_dir": "cache",
"no_artifacts": true,
"evaluate_input_model": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"name": "Convert to WebGPU",
"oliveFile": "",
"isLLM": true,
"debugInfo": {
"autoGenerated": true,
"useModelBuilder": "builder"
},
"needHFLogin": true,
"addCpu": false,
"runtime": {
"autoGenerated": true,
"name": "Evaluate on",
"type": "enum",
"displayNames": [
"WebGPU"
],
"path": "systems.local_system.accelerators.0.execution_providers.0",
"values": [
"WebGpuExecutionProvider"
],
"readOnly": false
},
"sections": [
{
"autoGenerated": true,
"name": "Convert",
"phase": "Conversion",
"parameters": [],
"toggle": {
"autoGenerated": true,
"name": "Convert to ONNX format",
"type": "bool",
"path": "passes.builder",
"actions": [
[],
[]
],
"readOnly": true
}
}
]
}
2 changes: 1 addition & 1 deletion deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [DeepSeek-R1-Distill-Qwen-1
+ This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
- OpenVINO for Intel® CPU/GPU/NPU
+ This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
- Float downcasting for NVIDIA TRT for RTX GPU
- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
- DML for general GPU
+ This process uses AutoAWQ and ModelBuilder

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"input_model": {
"type": "HfModel",
"model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
},
"systems": {
"local_system": {
"type": "LocalSystem",
"accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
}
},
"data_configs": [
{
"name": "wikitext2_train",
"type": "HuggingfaceContainer",
"load_dataset_config": {
"data_name": "wikitext",
"subset": "wikitext-2-raw-v1",
"split": "train"
},
"pre_process_data_config": {
"strategy": "line-by-line",
"add_special_tokens": false,
"max_samples": 128,
"max_seq_len": 512
}
}
],
"passes": {
"builder": { "type": "ModelBuilder", "precision": "int4" }
},
"target": "local_system",
"log_severity_level": 1,
"output_dir": "model/deepseek",
"cache_dir": "cache",
"no_artifacts": true,
"evaluate_input_model": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"name": "Convert to WebGPU",
"oliveFile": "",
"isLLM": true,
"debugInfo": {
"autoGenerated": true,
"useModelBuilder": "builder"
},
"needHFLogin": true,
"addCpu": false,
"runtime": {
"autoGenerated": true,
"name": "Evaluate on",
"type": "enum",
"displayNames": [
"WebGPU"
],
"path": "systems.local_system.accelerators.0.execution_providers.0",
"values": [
"WebGpuExecutionProvider"
],
"readOnly": false
},
"sections": [
{
"autoGenerated": true,
"name": "Convert",
"phase": "Conversion",
"parameters": [],
"toggle": {
"autoGenerated": true,
"name": "Convert to ONNX format",
"type": "bool",
"path": "passes.builder",
"actions": [
[],
[]
],
"readOnly": true
}
}
]
}
6 changes: 6 additions & 0 deletions deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ recipes:
- file: "deepseek_dml_config.json"
device: gpu
ep: DmlExecutionProvider
- file: "deepseek_webgpu_config.json"
device: gpu
ep: WebGpuExecutionProvider
- file: "deepseek_migraphx_config.json"
device: gpu
ep: MIGraphXExecutionProvider
aitk:
modelInfo:
id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@
{
"file": "deepseek_dml_config.json",
"templateName": "deepseek_dml_config"
},
{
"file": "deepseek_webgpu_config.json",
"templateName": "deepseek_webgpu_config"
},
{
"file": "deepseek_migraphx_config.json",
"templateName": "deepseek_migraphx_config"
}
],
"modelInfo": {
Expand Down
2 changes: 1 addition & 1 deletion google-bert-bert-base-multilingual-cased/aitk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ This folder contains examples of BERT optimization using different workflows.

- QDQ for Qualcomm NPU / AMD NPU
- OpenVINO for Intel® CPU/GPU/NPU
- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU

## QDQ for Qualcomm NPU / AMD NPU

Expand Down
Loading
Loading