microsoft · chinazhangchao · Aug 6, 2025 · Aug 6, 2025 · Aug 7, 2025 · Aug 8, 2025
@@ -12,7 +12,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -33,7 +34,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "CNN",
             "status": "Ready",
@@ -55,7 +57,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -76,7 +79,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -97,7 +101,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -118,7 +123,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -139,7 +145,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -159,7 +166,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -180,7 +188,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -201,7 +210,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -223,7 +233,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",

@@ -41,5 +41,5 @@
 | [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json) |
 | [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json) |
 | [Clip Vit Large Patch14](https://huggingface.co/openai/clip-vit-large-patch14) | [Qualcomm NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-large-patch14/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-large-patch14/aitk/openai_clip_dml.json) |
-| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json) |
-| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json) |
+| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json), [WebGPU](../../../microsoft-resnet-50/aitk/resnet_webgpu.json) |
+| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json), [WebGPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json) |
@@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [Qwen2.5-1.5B-Instruct](htt
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel® CPU/GPU/NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
-- Float downcasting for NVIDIA TRT for RTX GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 

@@ -61,6 +61,25 @@
             "dst": "qwen2_5_dml_config.json.config",
             "replacements": []
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json",
+            "dst": "qwen2_5_webgpu_config.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "Qwen/Qwen2.5-1.5B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/qwen2_5"
+                }
+            ]
+        },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config",
+            "dst": "qwen2_5_webgpu_config.json.config",
+            "replacements": []
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
             "dst": "README.md",

@@ -34,6 +34,12 @@ recipes:
     - file: "qwen2_5_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "qwen2_5_webgpu_config.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+    - file: "qwen2_5_migraphx_config.json"
+      device: gpu
+      ep: MIGraphXExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"

@@ -23,6 +23,14 @@
         {
             "file": "qwen2_5_dml_config.json",
             "templateName": "qwen2_5_dml_config"
+        },
+        {
+            "file": "qwen2_5_webgpu_config.json",
+            "templateName": "qwen2_5_webgpu_config"
+        },
+        {
+            "file": "qwen2_5_migraphx_config.json",
+            "templateName": "qwen2_5_migraphx_config"
         }
     ],
     "modelInfo": {

@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "Qwen/Qwen2.5-1.5B-Instruct"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "int4" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/qwen2_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
@@ -0,0 +1,43 @@
+{
+    "name": "Convert to WebGPU",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "needHFLogin": true,
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
@@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [DeepSeek-R1-Distill-Qwen-1
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel® CPU/GPU/NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
-- Float downcasting for NVIDIA TRT for RTX GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 

@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "int4" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/deepseek",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
@@ -0,0 +1,43 @@
+{
+    "name": "Convert to WebGPU",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "needHFLogin": true,
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
@@ -34,6 +34,12 @@ recipes:
     - file: "deepseek_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "deepseek_webgpu_config.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+    - file: "deepseek_migraphx_config.json"
+      device: gpu
+      ep: MIGraphXExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

@@ -23,6 +23,14 @@
         {
             "file": "deepseek_dml_config.json",
             "templateName": "deepseek_dml_config"
+        },
+        {
+            "file": "deepseek_webgpu_config.json",
+            "templateName": "deepseek_webgpu_config"
+        },
+        {
+            "file": "deepseek_migraphx_config.json",
+            "templateName": "deepseek_migraphx_config"
         }
     ],
     "modelInfo": {

@@ -4,7 +4,7 @@ This folder contains examples of BERT optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel® CPU/GPU/NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## QDQ for Qualcomm NPU / AMD NPU