diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index ef6619ea..89a88148 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -12,7 +12,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -33,7 +34,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "CNN",
             "status": "Ready",
@@ -55,7 +57,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -76,7 +79,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -97,7 +101,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -118,7 +123,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -139,7 +145,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -159,7 +166,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -180,7 +188,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -201,7 +210,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -223,7 +233,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md
index 0ffc184b..d716a599 100644
--- a/.aitk/docs/guide/ModelList.md
+++ b/.aitk/docs/guide/ModelList.md
@@ -41,5 +41,5 @@
 | [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json) |
 | [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json) |
 | [Clip Vit Large Patch14](https://huggingface.co/openai/clip-vit-large-patch14) | [Qualcomm NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_qdq_amd.json), [AMD GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_migraphx.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-large-patch14/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-large-patch14/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-large-patch14/aitk/openai_clip_dml.json) |
-| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json) |
-| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json) |
+| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [AMD GPU](../../../microsoft-resnet-50/aitk/resnet_migraphx.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json), [WebGPU](../../../microsoft-resnet-50/aitk/resnet_webgpu.json) |
+| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [AMD GPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_migraphx.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json), [WebGPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json) |
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md
index 13a61646..fec0500c 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md
@@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [Qwen2.5-1.5B-Instruct](htt
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel® CPU/GPU/NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
-- Float downcasting for NVIDIA TRT for RTX GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
index 8e9510e6..b61489fe 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
@@ -61,6 +61,25 @@
             "dst": "qwen2_5_dml_config.json.config",
             "replacements": []
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json",
+            "dst": "qwen2_5_webgpu_config.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "Qwen/Qwen2.5-1.5B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/qwen2_5"
+                }
+            ]
+        },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config",
+            "dst": "qwen2_5_webgpu_config.json.config",
+            "replacements": []
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
             "dst": "README.md",
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
index 3a7e04e6..244f7b57 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
@@ -34,6 +34,12 @@ recipes:
     - file: "qwen2_5_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "qwen2_5_webgpu_config.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+    - file: "qwen2_5_migraphx_config.json"
+      device: gpu
+      ep: MIGraphXExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
index 10029125..ab06c67f 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
@@ -23,6 +23,14 @@
         {
             "file": "qwen2_5_dml_config.json",
             "templateName": "qwen2_5_dml_config"
+        },
+        {
+            "file": "qwen2_5_webgpu_config.json",
+            "templateName": "qwen2_5_webgpu_config"
+        },
+        {
+            "file": "qwen2_5_migraphx_config.json",
+            "templateName": "qwen2_5_migraphx_config"
         }
     ],
     "modelInfo": {
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
new file mode 100644
index 00000000..d4ae439c
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "Qwen/Qwen2.5-1.5B-Instruct"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "int4" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/qwen2_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
new file mode 100644
index 00000000..b3e6c90c
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
@@ -0,0 +1,43 @@
+{
+    "name": "Convert to WebGPU",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "needHFLogin": true,
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md
index 50d6122d..ca8e0c48 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md
@@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [DeepSeek-R1-Distill-Qwen-1
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel® CPU/GPU/NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
-- Float downcasting for NVIDIA TRT for RTX GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
new file mode 100644
index 00000000..fd193ded
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "int4" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/deepseek",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
new file mode 100644
index 00000000..b3e6c90c
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
@@ -0,0 +1,43 @@
+{
+    "name": "Convert to WebGPU",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "needHFLogin": true,
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
index 1c560030..eb66c59f 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
@@ -34,6 +34,12 @@ recipes:
     - file: "deepseek_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "deepseek_webgpu_config.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+    - file: "deepseek_migraphx_config.json"
+      device: gpu
+      ep: MIGraphXExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
index 4c125a4b..6ec45979 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
@@ -23,6 +23,14 @@
         {
             "file": "deepseek_dml_config.json",
             "templateName": "deepseek_dml_config"
+        },
+        {
+            "file": "deepseek_webgpu_config.json",
+            "templateName": "deepseek_webgpu_config"
+        },
+        {
+            "file": "deepseek_migraphx_config.json",
+            "templateName": "deepseek_migraphx_config"
         }
     ],
     "modelInfo": {
diff --git a/google-bert-bert-base-multilingual-cased/aitk/README.md b/google-bert-bert-base-multilingual-cased/aitk/README.md
index c6745c5c..867ec378 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/README.md
+++ b/google-bert-bert-base-multilingual-cased/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of BERT optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel® CPU/GPU/NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## QDQ for Qualcomm NPU / AMD NPU
 
diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json
new file mode 100644
index 00000000..962aba68
--- /dev/null
+++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json
@@ -0,0 +1,138 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "google-bert/bert-base-multilingual-cased",
+        "task": "feature-extraction"
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "evaluation_data_config",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "facebook/xnli",
+                "subset": "en",
+                "split": "validation"
+            },
+            "pre_process_data_config": {
+                "input_cols": [
+                    "premise"
+                ],
+                "padding": "max_length",
+                "max_length": 128,
+                "max_samples": 10
+            },
+            "dataloader_config": {
+                "batch_size": 1
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "priority": 1,
+                            "goal": {
+                                "type": "percent-min-improvement",
+                                "value": 0.1
+                            }
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                },
+                {
+                    "name": "throughput",
+                    "type": "throughput",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg"
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 20,
+            "save_as_external_data": true
+        },
+        "transformer_optimizer": {
+            "type": "OrtTransformersOptimization",
+            "model_type": "bert",
+            "opt_level": 0,
+            "float16": true,
+            "use_gpu": true,
+            "keep_io_types": false,
+            "optimization_options": {
+                "enable_gelu": true,
+                "enable_layer_norm": true,
+                "enable_attention": true,
+                "enable_skip_layer_norm": false,
+                "enable_embed_layer_norm": false,
+                "enable_bias_skip_layer_norm": false,
+                "enable_bias_gelu": false,
+                "enable_gelu_approximation": false,
+                "enable_qordered_matmul": false,
+                "enable_shape_inference": true,
+                "enable_gemm_fast_gelu": false,
+                "enable_nhwc_conv": false,
+                "enable_group_norm": false,
+                "enable_bias_splitgelu": false,
+                "enable_packed_qkv": true,
+                "enable_packed_kv": true,
+                "enable_bias_add": false,
+                "enable_rotary_embeddings": true
+            },
+            "save_as_external_data": true
+        }
+    },
+    "host": "host_system",
+    "target": "target_system",
+    "evaluator": "common_evaluator",
+    "cache_dir": "cache",
+    "output_dir": "model/google_bert",
+    "evaluate_input_model": false
+}
diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config
new file mode 100644
index 00000000..16d4d9bd
--- /dev/null
+++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config
@@ -0,0 +1,123 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.data_name",
+                    "values": [
+                        "facebook/xnli"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.data_name",
+                        "values": [
+                            "facebook/xnli"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Subset",
+                    "tags": [
+                        "EvaluationDatasetSubset",
+                        "DependsOnDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.subset",
+                    "values": [
+                        "en",
+                        "all_languages"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.subset",
+                        "values": [
+                            "en",
+                            "all_languages"
+                        ],
+                        "template": "EvaluationDatasetSubset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Split",
+                    "tags": [
+                        "EvaluationDatasetSplit",
+                        "DependsOnDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.split",
+                    "values": [
+                        "train",
+                        "validation",
+                        "test"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.split",
+                        "template": "EvaluationDatasetSplit"
+                    }
+                },
+                {
+                    "name": "Quantization Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].pre_process_data_config.max_samples",
+                    "template": {
+                        "path": "data_configs[0].pre_process_data_config.max_samples",
+                        "template": "QuantizationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/google-bert-bert-base-multilingual-cased/aitk/info.yml b/google-bert-bert-base-multilingual-cased/aitk/info.yml
index 42234846..f37be1bc 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/info.yml
+++ b/google-bert-bert-base-multilingual-cased/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "bert-base-multilingual-cased_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "bert-base-multilingual-cased_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
     - file: "bert-base-multilingual-cased_migraphx.json"
       device: gpu
       ep: MIGraphXExecutionProvider
diff --git a/google-bert-bert-base-multilingual-cased/aitk/model_project.config b/google-bert-bert-base-multilingual-cased/aitk/model_project.config
index b817eb1f..92911e8a 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/model_project.config
+++ b/google-bert-bert-base-multilingual-cased/aitk/model_project.config
@@ -20,6 +20,10 @@
             "file": "bert-base-multilingual-cased_dml.json",
             "templateName": "bert-base-multilingual-cased_dml"
         },
+        {
+            "file": "bert-base-multilingual-cased_webgpu.json",
+            "templateName": "bert-base-multilingual-cased_webgpu"
+        },
         {
             "file": "bert-base-multilingual-cased_migraphx.json",
             "templateName": "bert-base-multilingual-cased_migraphx"
diff --git a/google-vit-base-patch16-224/aitk/README.md b/google-vit-base-patch16-224/aitk/README.md
index 7bb4b160..f2a46bb2 100644
--- a/google-vit-base-patch16-224/aitk/README.md
+++ b/google-vit-base-patch16-224/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of VIT optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel® CPU/GPU/NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Optimization Workflows
 
diff --git a/google-vit-base-patch16-224/aitk/inference_sample.ipynb b/google-vit-base-patch16-224/aitk/inference_sample.ipynb
index 62936bbe..ece8e99e 100644
--- a/google-vit-base-patch16-224/aitk/inference_sample.ipynb
+++ b/google-vit-base-patch16-224/aitk/inference_sample.ipynb
@@ -42,6 +42,29 @@
     "register_execution_providers()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/google-vit-base-patch16-224/aitk/info.yml b/google-vit-base-patch16-224/aitk/info.yml
index 9e7ceb4f..50ea5f2c 100644
--- a/google-vit-base-patch16-224/aitk/info.yml
+++ b/google-vit-base-patch16-224/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "vit-base-patch16-224_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "vit-base-patch16-224_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
     - file: "vit-base-patch16-224_migraphx.json"
       device: gpu
       ep: MIGraphXExecutionProvider
diff --git a/google-vit-base-patch16-224/aitk/model_project.config b/google-vit-base-patch16-224/aitk/model_project.config
index 602d90f7..aeb99a06 100644
--- a/google-vit-base-patch16-224/aitk/model_project.config
+++ b/google-vit-base-patch16-224/aitk/model_project.config
@@ -20,6 +20,10 @@
             "file": "vit-base-patch16-224_dml.json",
             "templateName": "vit-base-patch16-224_dml"
         },
+        {
+            "file": "vit-base-patch16-224_webgpu.json",
+            "templateName": "vit-base-patch16-224_webgpu"
+        },
         {
             "file": "vit-base-patch16-224_migraphx.json",
             "templateName": "vit-base-patch16-224_migraphx"
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb
new file mode 100644
index 00000000..19adf2a8
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb
@@ -0,0 +1,215 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "\n",
+    "ExecutionProvider=\"DmlExecutionProvider\"\n",
+    "if ExecutionProvider == \"OpenVINOExecutionProvider\":\n",
+    "    onnx_model_path = \"./model/ov_model_st_quant.onnx\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import onnxruntime as ort\n",
+    "import time\n",
+    "import torch\n",
+    "import torchvision.transforms as transforms\n",
+    "from datasets import load_dataset\n",
+    "from transformers import ViTFeatureExtractor, ViTForImageClassification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_samples = 256"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load datasets\n",
+    "\n",
+    "feature_extractor = ViTFeatureExtractor.from_pretrained(\"google/vit-base-patch16-224\")\n",
+    "preprocess = transforms.Compose([\n",
+    "    transforms.Lambda(lambda img: img.convert(\"RGB\")),\n",
+    "    transforms.Resize((224, 224)),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),\n",
+    "])\n",
+    "\n",
+    "def imageTransform(example):\n",
+    "    example[\"image\"] = preprocess(example[\"image\"])\n",
+    "    return example\n",
+    "datasetStream = load_dataset(\"timm/mini-imagenet\", split=\"validation\", streaming=True, trust_remote_code=True)\n",
+    "iterable_dataset = iter(datasetStream)\n",
+    "selected_samples = [next(iterable_dataset) for _ in range(num_samples)]\n",
+    "selected_samples = list(map(imageTransform, selected_samples))\n",
+    "\n",
+    "def get_imagenet_label_map():\n",
+    "    import json\n",
+    "    from pathlib import Path\n",
+    "    cache_file = Path(f\"../../cache/data/imagenet_class_index.json\")\n",
+    "    if not cache_file.exists():\n",
+    "        import requests        \n",
+    "        imagenet_class_index_url = (\n",
+    "            \"https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json\"\n",
+    "        )\n",
+    "        response = requests.get(imagenet_class_index_url)\n",
+    "        response.raise_for_status()  # Ensure the request was successful\n",
+    "        content = response.json()\n",
+    "        cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)\n",
+    "        with open(cache_file, \"w\") as f:\n",
+    "            json.dump(content, f)\n",
+    "    else:\n",
+    "        with open(cache_file) as f:\n",
+    "            content = json.loads(f.read())\n",
+    "\n",
+    "    return {v[0]: int(k) for k, v in content.items()}\n",
+    "\n",
+    "label_map = get_imagenet_label_map()\n",
+    "label_names = datasetStream.features[\"label\"].names\n",
+    "\n",
+    "def mini_to_imagenet_label(mini_label):\n",
+    "    class_name = label_names[mini_label]\n",
+    "    return label_map[class_name]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Original model metrics\n",
+    "\n",
+    "def evaluate_torch(model, selected_samples, device):\n",
+    "    model.eval()\n",
+    "    correct, total = 0, 0\n",
+    "    latencies = []\n",
+    "    with torch.no_grad():\n",
+    "        for example in selected_samples:\n",
+    "            image = example[\"image\"].unsqueeze(0).to(device)\n",
+    "            label = torch.tensor(example[\"label\"]).to(device)\n",
+    "            label = mini_to_imagenet_label(label.item())\n",
+    "            \n",
+    "            start_time = time.time()\n",
+    "            output = model(image)\n",
+    "            end_time = time.time()\n",
+    "            \n",
+    "            latencies.append((end_time - start_time))\n",
+    "            pred = torch.argmax(output.logits, dim=1)\n",
+    "            correct += (pred == label).sum().item()\n",
+    "            total += 1\n",
+    "    \n",
+    "    accuracy = correct / total\n",
+    "    avg_latency = np.mean(latencies)\n",
+    "    return accuracy, avg_latency\n",
+    "\n",
+    "device = torch.device(\"cpu\")\n",
+    "model = ViTForImageClassification.from_pretrained(\"google/vit-base-patch16-224\").to(device)\n",
+    "accuracy, avg_latency = evaluate_torch(model, selected_samples, device)\n",
+    "\n",
+    "print(f\"Original Model Accuracy: {accuracy * 100:.2f}%\")\n",
+    "print(f\"Original Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Quantized model metrics\n",
+    "\n",
+    "def evaluate_onnx(session, selected_samples):\n",
+    "    correct, total = 0, 0\n",
+    "    latencies = []\n",
+    "    input_name = session.get_inputs()[0].name\n",
+    "    output_name = session.get_outputs()[0].name\n",
+    "\n",
+    "    for example in selected_samples:\n",
+    "        image = np.expand_dims(example[\"image\"], axis=0)\n",
+    "        label = example[\"label\"]\n",
+    "        label = mini_to_imagenet_label(label)\n",
+    "        \n",
+    "        start_time = time.time()\n",
+    "        output = session.run([output_name], {input_name: image.astype(np.float16)})[0]\n",
+    "        end_time = time.time()\n",
+    "        \n",
+    "        latencies.append((end_time - start_time))\n",
+    "        pred = np.argmax(output, axis=1)[0]\n",
+    "        correct += (pred == label)\n",
+    "        total += 1\n",
+    "    \n",
+    "    accuracy = correct / total\n",
+    "    avg_latency = np.mean(latencies)\n",
+    "    return accuracy, avg_latency\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "session = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "accuracy, avg_latency = evaluate_onnx(session, selected_samples)\n",
+    "\n",
+    "print(f\"Quantized Model Accuracy: {accuracy * 100:.2f}%\")\n",
+    "print(f\"Quantized Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb
new file mode 100644
index 00000000..7b3c0075
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb
@@ -0,0 +1,215 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "\n",
+    "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\"\n",
+    "if ExecutionProvider == \"OpenVINOExecutionProvider\":\n",
+    "    onnx_model_path = \"./model/ov_model_st_quant.onnx\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import onnxruntime as ort\n",
+    "import time\n",
+    "import torch\n",
+    "import torchvision.transforms as transforms\n",
+    "from datasets import load_dataset\n",
+    "from transformers import ViTFeatureExtractor, ViTForImageClassification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_samples = 256"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load datasets\n",
+    "\n",
+    "feature_extractor = ViTFeatureExtractor.from_pretrained(\"google/vit-base-patch16-224\")\n",
+    "preprocess = transforms.Compose([\n",
+    "    transforms.Lambda(lambda img: img.convert(\"RGB\")),\n",
+    "    transforms.Resize((224, 224)),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),\n",
+    "])\n",
+    "\n",
+    "def imageTransform(example):\n",
+    "    example[\"image\"] = preprocess(example[\"image\"])\n",
+    "    return example\n",
+    "datasetStream = load_dataset(\"timm/mini-imagenet\", split=\"validation\", streaming=True, trust_remote_code=True)\n",
+    "iterable_dataset = iter(datasetStream)\n",
+    "selected_samples = [next(iterable_dataset) for _ in range(num_samples)]\n",
+    "selected_samples = list(map(imageTransform, selected_samples))\n",
+    "\n",
+    "def get_imagenet_label_map():\n",
+    "    import json\n",
+    "    from pathlib import Path\n",
+    "    cache_file = Path(f\"../../cache/data/imagenet_class_index.json\")\n",
+    "    if not cache_file.exists():\n",
+    "        import requests        \n",
+    "        imagenet_class_index_url = (\n",
+    "            \"https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json\"\n",
+    "        )\n",
+    "        response = requests.get(imagenet_class_index_url)\n",
+    "        response.raise_for_status()  # Ensure the request was successful\n",
+    "        content = response.json()\n",
+    "        cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)\n",
+    "        with open(cache_file, \"w\") as f:\n",
+    "            json.dump(content, f)\n",
+    "    else:\n",
+    "        with open(cache_file) as f:\n",
+    "            content = json.loads(f.read())\n",
+    "\n",
+    "    return {v[0]: int(k) for k, v in content.items()}\n",
+    "\n",
+    "label_map = get_imagenet_label_map()\n",
+    "label_names = datasetStream.features[\"label\"].names\n",
+    "\n",
+    "def mini_to_imagenet_label(mini_label):\n",
+    "    class_name = label_names[mini_label]\n",
+    "    return label_map[class_name]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Original model metrics\n",
+    "\n",
+    "def evaluate_torch(model, selected_samples, device):\n",
+    "    model.eval()\n",
+    "    correct, total = 0, 0\n",
+    "    latencies = []\n",
+    "    with torch.no_grad():\n",
+    "        for example in selected_samples:\n",
+    "            image = example[\"image\"].unsqueeze(0).to(device)\n",
+    "            label = torch.tensor(example[\"label\"]).to(device)\n",
+    "            label = mini_to_imagenet_label(label.item())\n",
+    "            \n",
+    "            start_time = time.time()\n",
+    "            output = model(image)\n",
+    "            end_time = time.time()\n",
+    "            \n",
+    "            latencies.append((end_time - start_time))\n",
+    "            pred = torch.argmax(output.logits, dim=1)\n",
+    "            correct += (pred == label).sum().item()\n",
+    "            total += 1\n",
+    "    \n",
+    "    accuracy = correct / total\n",
+    "    avg_latency = np.mean(latencies)\n",
+    "    return accuracy, avg_latency\n",
+    "\n",
+    "device = torch.device(\"cpu\")\n",
+    "model = ViTForImageClassification.from_pretrained(\"google/vit-base-patch16-224\").to(device)\n",
+    "accuracy, avg_latency = evaluate_torch(model, selected_samples, device)\n",
+    "\n",
+    "print(f\"Original Model Accuracy: {accuracy * 100:.2f}%\")\n",
+    "print(f\"Original Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Quantized model metrics\n",
+    "\n",
+    "def evaluate_onnx(session, selected_samples):\n",
+    "    correct, total = 0, 0\n",
+    "    latencies = []\n",
+    "    input_name = session.get_inputs()[0].name\n",
+    "    output_name = session.get_outputs()[0].name\n",
+    "\n",
+    "    for example in selected_samples:\n",
+    "        image = np.expand_dims(example[\"image\"], axis=0)\n",
+    "        label = example[\"label\"]\n",
+    "        label = mini_to_imagenet_label(label)\n",
+    "        \n",
+    "        start_time = time.time()\n",
+    "        output = session.run([output_name], {input_name: image.astype(np.float16)})[0]\n",
+    "        end_time = time.time()\n",
+    "        \n",
+    "        latencies.append((end_time - start_time))\n",
+    "        pred = np.argmax(output, axis=1)[0]\n",
+    "        correct += (pred == label)\n",
+    "        total += 1\n",
+    "    \n",
+    "    accuracy = correct / total\n",
+    "    avg_latency = np.mean(latencies)\n",
+    "    return accuracy, avg_latency\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "session = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "accuracy, avg_latency = evaluate_onnx(session, selected_samples)\n",
+    "\n",
+    "print(f\"Quantized Model Accuracy: {accuracy * 100:.2f}%\")\n",
+    "print(f\"Quantized Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json
new file mode 100644
index 00000000..c1b6866e
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json
@@ -0,0 +1,142 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "google/vit-base-patch16-224",
+        "task": "image-classification",
+        "io_config": {
+            "input_names": [
+                "pixel_values"
+            ],
+            "input_shapes": [
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ]
+            ],
+            "output_names": [
+                "output"
+            ]
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "evaluation_data_config",
+            "type": "HuggingfaceContainer",
+            "user_script": "vit-base-patch16-224.py",
+            "load_dataset_config": {
+                "data_name": "timm/mini-imagenet",
+                "split": "validation",
+                "streaming": true,
+                "trust_remote_code": true
+            },
+            "pre_process_data_config": {
+                "type": "dataset_pre_process",
+                "size": 1000,
+                "cache_key": "imagedata_evaluation"
+            },
+            "post_process_data_config": {
+                "type": "dataset_post_process"
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy_score",
+                            "priority": 1,
+                            "metric_config": {
+                                "task": "multiclass",
+                                "num_classes": 1000
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "priority": 2
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 20,
+            "save_as_external_data": true
+        },
+        "transformer_optimizer": {
+            "type": "OrtTransformersOptimization",
+            "model_type": "vit",
+            "opt_level": 0,
+            "float16": true,
+            "use_gpu": true,
+            "keep_io_types": false,
+            "optimization_options": {
+                "enable_gelu": true,
+                "enable_layer_norm": true,
+                "enable_attention": true,
+                "enable_skip_layer_norm": false,
+                "enable_embed_layer_norm": false,
+                "enable_bias_skip_layer_norm": false,
+                "enable_bias_gelu": false,
+                "enable_gelu_approximation": false,
+                "enable_qordered_matmul": false,
+                "enable_shape_inference": true,
+                "enable_gemm_fast_gelu": false,
+                "enable_nhwc_conv": false,
+                "enable_group_norm": false,
+                "enable_bias_splitgelu": false,
+                "enable_packed_qkv": true,
+                "enable_packed_kv": true,
+                "enable_bias_add": false,
+                "enable_rotary_embeddings": true
+            },
+            "save_as_external_data": true
+        }
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "host_system",
+    "target": "target_system",
+    "cache_dir": "cache",
+    "output_dir": "model/vit"
+}
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config
new file mode 100644
index 00000000..23c3f074
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config
@@ -0,0 +1,104 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.data_name",
+                    "values": [
+                        "timm/mini-imagenet",
+                        "imagenet-1k"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.data_name",
+                        "values": [
+                            "timm/mini-imagenet",
+                            "imagenet-1k"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Split",
+                    "tags": [
+                        "EvaluationDatasetSplit",
+                        "DependsOnDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.split",
+                    "values": [
+                        "train",
+                        "validation",
+                        "test"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.split",
+                        "template": "EvaluationDatasetSplit"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].pre_process_data_config.size",
+                    "template": {
+                        "path": "data_configs[0].pre_process_data_config.size",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb
new file mode 100644
index 00000000..4f3ab189
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb
@@ -0,0 +1,215 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "\n",
+    "ExecutionProvider=\"WebGpuExecutionProvider\"\n",
+    "if ExecutionProvider == \"OpenVINOExecutionProvider\":\n",
+    "    onnx_model_path = \"./model/ov_model_st_quant.onnx\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import onnxruntime as ort\n",
+    "import time\n",
+    "import torch\n",
+    "import torchvision.transforms as transforms\n",
+    "from datasets import load_dataset\n",
+    "from transformers import ViTFeatureExtractor, ViTForImageClassification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_samples = 256"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load datasets\n",
+    "\n",
+    "feature_extractor = ViTFeatureExtractor.from_pretrained(\"google/vit-base-patch16-224\")\n",
+    "preprocess = transforms.Compose([\n",
+    "    transforms.Lambda(lambda img: img.convert(\"RGB\")),\n",
+    "    transforms.Resize((224, 224)),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),\n",
+    "])\n",
+    "\n",
+    "def imageTransform(example):\n",
+    "    example[\"image\"] = preprocess(example[\"image\"])\n",
+    "    return example\n",
+    "datasetStream = load_dataset(\"timm/mini-imagenet\", split=\"validation\", streaming=True, trust_remote_code=True)\n",
+    "iterable_dataset = iter(datasetStream)\n",
+    "selected_samples = [next(iterable_dataset) for _ in range(num_samples)]\n",
+    "selected_samples = list(map(imageTransform, selected_samples))\n",
+    "\n",
+    "def get_imagenet_label_map():\n",
+    "    import json\n",
+    "    from pathlib import Path\n",
+    "    cache_file = Path(f\"../../cache/data/imagenet_class_index.json\")\n",
+    "    if not cache_file.exists():\n",
+    "        import requests        \n",
+    "        imagenet_class_index_url = (\n",
+    "            \"https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json\"\n",
+    "        )\n",
+    "        response = requests.get(imagenet_class_index_url)\n",
+    "        response.raise_for_status()  # Ensure the request was successful\n",
+    "        content = response.json()\n",
+    "        cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)\n",
+    "        with open(cache_file, \"w\") as f:\n",
+    "            json.dump(content, f)\n",
+    "    else:\n",
+    "        with open(cache_file) as f:\n",
+    "            content = json.loads(f.read())\n",
+    "\n",
+    "    return {v[0]: int(k) for k, v in content.items()}\n",
+    "\n",
+    "label_map = get_imagenet_label_map()\n",
+    "label_names = datasetStream.features[\"label\"].names\n",
+    "\n",
+    "def mini_to_imagenet_label(mini_label):\n",
+    "    class_name = label_names[mini_label]\n",
+    "    return label_map[class_name]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Original model metrics\n",
+    "\n",
+    "def evaluate_torch(model, selected_samples, device):\n",
+    "    model.eval()\n",
+    "    correct, total = 0, 0\n",
+    "    latencies = []\n",
+    "    with torch.no_grad():\n",
+    "        for example in selected_samples:\n",
+    "            image = example[\"image\"].unsqueeze(0).to(device)\n",
+    "            label = torch.tensor(example[\"label\"]).to(device)\n",
+    "            label = mini_to_imagenet_label(label.item())\n",
+    "            \n",
+    "            start_time = time.time()\n",
+    "            output = model(image)\n",
+    "            end_time = time.time()\n",
+    "            \n",
+    "            latencies.append((end_time - start_time))\n",
+    "            pred = torch.argmax(output.logits, dim=1)\n",
+    "            correct += (pred == label).sum().item()\n",
+    "            total += 1\n",
+    "    \n",
+    "    accuracy = correct / total\n",
+    "    avg_latency = np.mean(latencies)\n",
+    "    return accuracy, avg_latency\n",
+    "\n",
+    "device = torch.device(\"cpu\")\n",
+    "model = ViTForImageClassification.from_pretrained(\"google/vit-base-patch16-224\").to(device)\n",
+    "accuracy, avg_latency = evaluate_torch(model, selected_samples, device)\n",
+    "\n",
+    "print(f\"Original Model Accuracy: {accuracy * 100:.2f}%\")\n",
+    "print(f\"Original Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Quantized model metrics\n",
+    "\n",
+    "def evaluate_onnx(session, selected_samples):\n",
+    "    correct, total = 0, 0\n",
+    "    latencies = []\n",
+    "    input_name = session.get_inputs()[0].name\n",
+    "    output_name = session.get_outputs()[0].name\n",
+    "\n",
+    "    for example in selected_samples:\n",
+    "        image = np.expand_dims(example[\"image\"], axis=0)\n",
+    "        label = example[\"label\"]\n",
+    "        label = mini_to_imagenet_label(label)\n",
+    "        \n",
+    "        start_time = time.time()\n",
+    "        output = session.run([output_name], {input_name: image.astype(np.float16)})[0]\n",
+    "        end_time = time.time()\n",
+    "        \n",
+    "        latencies.append((end_time - start_time))\n",
+    "        pred = np.argmax(output, axis=1)[0]\n",
+    "        correct += (pred == label)\n",
+    "        total += 1\n",
+    "    \n",
+    "    accuracy = correct / total\n",
+    "    avg_latency = np.mean(latencies)\n",
+    "    return accuracy, avg_latency\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "session = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "accuracy, avg_latency = evaluate_onnx(session, selected_samples)\n",
+    "\n",
+    "print(f\"Quantized Model Accuracy: {accuracy * 100:.2f}%\")\n",
+    "print(f\"Quantized Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/README.md b/intel-bert-base-uncased-mrpc/aitk/README.md
index 66283471..0d59c1da 100644
--- a/intel-bert-base-uncased-mrpc/aitk/README.md
+++ b/intel-bert-base-uncased-mrpc/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of BERT optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel® CPU/GPU/NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## QDQ for Qualcomm NPU / AMD NPU
 
diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
new file mode 100644
index 00000000..26a24bb8
--- /dev/null
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
@@ -0,0 +1,130 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "Intel/bert-base-uncased-mrpc",
+        "task": "text-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "glue_mrpc_eval",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "glue",
+                "subset": "mrpc",
+                "split": "validation"
+            },
+            "pre_process_data_config": {
+                "max_length": 128,
+                "padding": "max_length",
+                "input_cols": [
+                    "sentence1",
+                    "sentence2"
+                ],
+                "max_samples": 100
+            },
+            "dataloader_config": {
+                "batch_size": 1
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "data_config": "glue_mrpc_eval",
+                    "sub_types": [
+                        {
+                            "name": "accuracy_score",
+                            "priority": 1
+                        },
+                        {
+                            "name": "f1_score"
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "glue_mrpc_eval",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "priority": 2
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 20,
+            "save_as_external_data": true
+        },
+        "transformer_optimizer": {
+            "type": "OrtTransformersOptimization",
+            "model_type": "bert",
+            "opt_level": 0,
+            "float16": true,
+            "use_gpu": true,
+            "keep_io_types": false,
+            "optimization_options": {
+                "enable_gelu": true,
+                "enable_layer_norm": true,
+                "enable_attention": true,
+                "enable_skip_layer_norm": false,
+                "enable_embed_layer_norm": false,
+                "enable_bias_skip_layer_norm": false,
+                "enable_bias_gelu": false,
+                "enable_gelu_approximation": false,
+                "enable_qordered_matmul": false,
+                "enable_shape_inference": true,
+                "enable_gemm_fast_gelu": false,
+                "enable_nhwc_conv": false,
+                "enable_group_norm": false,
+                "enable_bias_splitgelu": false,
+                "enable_packed_qkv": true,
+                "enable_packed_kv": true,
+                "enable_bias_add": false,
+                "enable_rotary_embeddings": true
+            },
+            "save_as_external_data": true
+        }
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "host_system",
+    "target": "target_system",
+    "cache_dir": "cache",
+    "output_dir": "model/bert_webgpu"
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
new file mode 100644
index 00000000..b0f0ed25
--- /dev/null
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
@@ -0,0 +1,102 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.data_name",
+                    "values": [
+                        "glue"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.data_name",
+                        "values": [
+                            "glue"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Split",
+                    "tags": [
+                        "EvaluationDatasetSplit",
+                        "DependsOnDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.split",
+                    "values": [
+                        "train",
+                        "validation",
+                        "test"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.split",
+                        "template": "EvaluationDatasetSplit"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].pre_process_data_config.max_samples",
+                    "template": {
+                        "path": "data_configs[0].pre_process_data_config.max_samples",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/info.yml b/intel-bert-base-uncased-mrpc/aitk/info.yml
index d9bb76a7..af232f23 100644
--- a/intel-bert-base-uncased-mrpc/aitk/info.yml
+++ b/intel-bert-base-uncased-mrpc/aitk/info.yml
@@ -23,6 +23,9 @@ recipes:
     - file: "bert_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "bert_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
     - file: "bert_migraphx.json"
       device: gpu
       ep: MIGraphXExecutionProvider
diff --git a/intel-bert-base-uncased-mrpc/aitk/model_project.config b/intel-bert-base-uncased-mrpc/aitk/model_project.config
index 95de4455..fb63a594 100644
--- a/intel-bert-base-uncased-mrpc/aitk/model_project.config
+++ b/intel-bert-base-uncased-mrpc/aitk/model_project.config
@@ -20,6 +20,10 @@
             "file": "bert_dml.json",
             "templateName": "bert_dml"
         },
+        {
+            "file": "bert_webgpu.json",
+            "templateName": "bert_webgpu"
+        },
         {
             "file": "bert_migraphx.json",
             "templateName": "bert_migraphx"
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md
index af5d33de..38216fb9 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of Laion Clip optimization using different workflo
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel® CPU/GPU/NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Laion Clip optimization with QDQ for Qualcomm NPU / AMD NPU
 
@@ -18,7 +18,7 @@ This workflow performs quantization with OpenVINO NNCF. It performs the optimiza
 
 - *HuggingFace Model -> OpenVINO Model -> Quantized OpenVINO model -> Quantized encapsulated ONNX OpenVINO IR model*
 
-## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 It performs the optimization pipeline:
 
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/inference_sample.ipynb
index 8a9b4f5a..670b0cd2 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/inference_sample.ipynb
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/inference_sample.ipynb
@@ -53,6 +53,34 @@
     "register_execution_providers()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
index c717da13..2286f064 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "laion_clip_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "laion_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
     - file: "laion_clip_migraphx.json"
       device: gpu
       ep: MIGraphXExecutionProvider
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb
new file mode 100644
index 00000000..e174c596
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"DmlExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb
new file mode 100644
index 00000000..e09f24a8
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb
@@ -0,0 +1,113 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/openvino_model_quant_st.onnx\"\n",
+    "ExecutionProvider=\"OpenVINOExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values']\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb
new file mode 100644
index 00000000..0884b6ac
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb
@@ -0,0 +1,166 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43751a72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"QNNExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0ea54b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
+   "metadata": {},
+   "source": [
+    "### Data Processor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import CLIPProcessor\n",
+    "\n",
+    "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5568eb71-5812-4c74-989c-c12271d33b12",
+   "metadata": {},
+   "source": [
+    "### Model Inference with ORT-QNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02bad4ec-f477-4659-8584-00735f6ed5a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnxruntime as ort\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "text_model = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "def get_text_embedding(text):\n",
+    "    inputs = processor(\n",
+    "        text=text,\n",
+    "        padding=\"max_length\",\n",
+    "        max_length=77,#text_model.sequence_length,\n",
+    "        truncation=True,\n",
+    "        add_special_tokens=True,\n",
+    "        return_tensors=\"np\",\n",
+    "    )\n",
+    "    output = text_model.run(None, {\n",
+    "        \"input_ids\": inputs[\"input_ids\"].astype(np.int32),\n",
+    "        \"attention_mask\": inputs[\"attention_mask\"].astype(np.int32),\n",
+    "    })\n",
+    "    return torch.from_numpy(output[0])\n",
+    "\n",
+    "def calculate_score(emb_1, emb_2):\n",
+    "    emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n",
+    "    emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n",
+    "    return torch.matmul(emb_1, emb_2.T) * 100.0\n",
+    "\n",
+    "# Get source embedding and calculate the similarity score for each target\n",
+    "# We need to process one by one because to static quantization, we fixed the batch size to 1\n",
+    "def ask(source, targets):\n",
+    "    source_emb = get_text_embedding(source)\n",
+    "    scores = []\n",
+    "    for i, target in enumerate(targets):\n",
+    "        target_emb = get_text_embedding(target)\n",
+    "        score = calculate_score(source_emb, target_emb)\n",
+    "        print(f\"Similarity score of sentence {i}：{score.item()}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3477e36c-2e72-432b-ae81-602073a3754c",
+   "metadata": {},
+   "source": [
+    "### Play with Samples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ask(\"a photo containing two cats\", [\"a photo of tshirt\", \"a photo of two cats\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb
new file mode 100644
index 00000000..fc5e4a5f
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb
new file mode 100644
index 00000000..aa8a8757
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb
@@ -0,0 +1,195 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c18a7d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "\n",
+    "ExecutionProvider=\"QNNExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f82e3bca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
+   "metadata": {},
+   "source": [
+    "### Data Processor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import CLIPProcessor\n",
+    "\n",
+    "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5568eb71-5812-4c74-989c-c12271d33b12",
+   "metadata": {},
+   "source": [
+    "### Model Inference with ORT-QNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02bad4ec-f477-4659-8584-00735f6ed5a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnxruntime as ort\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "vision_model = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "def get_image_embedding(image):\n",
+    "    inputs = processor(images=image, return_tensors=\"np\")\n",
+    "    output = vision_model.run(None, { \"pixel_values\": inputs[\"pixel_values\"] })\n",
+    "    return torch.from_numpy(output[0])\n",
+    "\n",
+    "def calculate_score(emb_1, emb_2):\n",
+    "    emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n",
+    "    emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n",
+    "    return torch.matmul(emb_1, emb_2.T) * 100.0\n",
+    "\n",
+    "# Get source embedding and calculate the similarity score for each target\n",
+    "# We need to process one by one because to static quantization, we fixed the batch size to 1\n",
+    "def ask(source, targets):\n",
+    "    source_emb = get_image_embedding(source)\n",
+    "    for i, target in enumerate(targets):\n",
+    "        target_emb = get_image_embedding(target)\n",
+    "        score = calculate_score(source_emb, target_emb)\n",
+    "        print(f\"Similarity score of image {i}：{score.item()}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3477e36c-2e72-432b-ae81-602073a3754c",
+   "metadata": {},
+   "source": [
+    "### Play with Samples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16868fbd-e447-4866-af7d-eb6e49975bcc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from PIL import Image\n",
+    "\n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    "image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07076b9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"http://images.cocodataset.org/train2017/000000208833.jpg\"\n",
+    "image1 = Image.open(requests.get(url, stream=True).raw)\n",
+    "image1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c10de7cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"http://images.cocodataset.org/train2017/000000125690.jpg\"\n",
+    "image2 = Image.open(requests.get(url, stream=True).raw)\n",
+    "image2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ask(image, [image1, image2])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
new file mode 100644
index 00000000..bb76bded
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
@@ -0,0 +1,181 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "metric_data_config",
+            "user_script": "user_script.py",
+            "load_dataset_config": {
+                "type": "clip_dataset",
+                "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
+                "dataset_name": "nlphuji/flickr30k",
+                "start": 0,
+                "end": 10
+            },
+            "dataloader_config": {
+                "type": "no_auto_batch_dataloader"
+            },
+            "post_process_data_config": {
+                "type": "clip_post_process"
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "backend": "huggingface_metrics",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy",
+                            "priority": 1,
+                            "goal": {
+                                "type": "max-degradation",
+                                "value": 0.05
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "goal": {
+                                "type": "percent-min-improvement",
+                                "value": 0.1
+                            }
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                },
+                {
+                    "name": "throughput",
+                    "type": "throughput",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg"
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "onnx_float_to_float16": {
+            "type": "OnnxFloatToFloat16",
+            "save_as_external_data": true
+        }
+    },
+    "search_strategy": false,
+    "host": "host_system",
+    "target": "target_system",
+    "cache_dir": "cache",
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "output_dir": "model/clip"
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config
new file mode 100644
index 00000000..d17c25fa
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config
@@ -0,0 +1,84 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.dataset_name",
+                    "values": [
+                        "nlphuji/flickr30k"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.dataset_name",
+                        "values": [
+                            "nlphuji/flickr30k"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].load_dataset_config.end",
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.end",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb
new file mode 100644
index 00000000..6938c9bb
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"WebGpuExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
index f1670578..9f31e5be 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
@@ -20,6 +20,10 @@
             "file": "laion_clip_dml.json",
             "templateName": "laion_clip_dml"
         },
+        {
+            "file": "laion_clip_webgpu.json",
+            "templateName": "laion_clip_webgpu"
+        },
         {
             "file": "laion_clip_migraphx.json",
             "templateName": "laion_clip_migraphx"
diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md b/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md
index 612cdf2b..8efa8cc5 100644
--- a/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md
+++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md
@@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [Llama-3.1-8B-Instruct](htt
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel® CPU/GPU/NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
-- Float downcasting for NVIDIA TRT for RTX GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md
index 573bf132..b8873094 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md
@@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [Llama-3.2-1B-Instruct](htt
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel® CPU/GPU/NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
-- Float downcasting for NVIDIA TRT for RTX GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
index c59e4aa4..c49d0c2f 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
@@ -61,6 +61,25 @@
             "dst": "llama3_2_dml_config.json.config",
             "replacements": []
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json",
+            "dst": "llama3_2_webgpu_config.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "meta-llama/Llama-3.2-1B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/llama3_2"
+                }
+            ]
+        },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config",
+            "dst": "llama3_2_webgpu_config.json.config",
+            "replacements": []
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
             "dst": "README.md",
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
index f159eae3..fb0a0e6c 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
@@ -34,6 +34,12 @@ recipes:
     - file: "llama3_2_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "llama3_2_webgpu_config.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+    - file: "llama3_2_migraphx_config.json"
+      device: gpu
+      ep: MIGraphXExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/meta-llama/Llama-3.2-1B-Instruct"
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
new file mode 100644
index 00000000..4a2e2a3f
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "meta-llama/Llama-3.2-1B-Instruct"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "int4" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/llama3_2",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
new file mode 100644
index 00000000..b3e6c90c
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
@@ -0,0 +1,43 @@
+{
+    "name": "Convert to WebGPU",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "needHFLogin": true,
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
index e0a1d7cb..e96ca946 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
@@ -23,6 +23,14 @@
         {
             "file": "llama3_2_dml_config.json",
             "templateName": "llama3_2_dml_config"
+        },
+        {
+            "file": "llama3_2_webgpu_config.json",
+            "templateName": "llama3_2_webgpu_config"
+        },
+        {
+            "file": "llama3_2_migraphx_config.json",
+            "templateName": "llama3_2_migraphx_config"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/README.md b/microsoft-Phi-3.5-mini-instruct/aitk/README.md
index cd635e33..c0bcb905 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/README.md
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/README.md
@@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [Microsoft Phi-3.5 Mini Ins
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel® CPU/GPU/NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
-- Float downcasting for NVIDIA TRT for RTX GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
index 1142a71c..d02d1954 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
@@ -29,8 +29,8 @@
             ]
         },
         {
-            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json",
-            "dst": "phi3_5_dml_config.json",
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json",
+            "dst": "phi3_5_ov_config.json",
             "replacements": [
                 {
                     "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
@@ -47,6 +47,25 @@
             "dst": "phi3_5_dml_config.json.config",
             "replacements": []
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json",
+            "dst": "phi3_5_webgpu_config.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "microsoft/Phi-3.5-mini-instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/phi3_5"
+                }
+            ]
+        },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config",
+            "dst": "phi3_5_webgpu_config.json.config",
+            "replacements": []
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md",
             "dst": "README.md",
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
index 890fb728..5eab37db 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
@@ -34,6 +34,12 @@ recipes:
     - file: "phi3_5_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "phi3_5_webgpu_config.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
+    - file: "phi3_5_migraphx_config.json"
+      device: gpu
+      ep: MIGraphXExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/microsoft/Phi-3.5-mini-instruct"
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
index 4b1a78b5..1444a95a 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
@@ -23,6 +23,14 @@
         {
             "file": "phi3_5_dml_config.json",
             "templateName": "phi3_5_dml_config"
+        },
+        {
+            "file": "phi3_5_webgpu_config.json",
+            "templateName": "phi3_5_webgpu_config"
+        },
+        {
+            "file": "phi3_5_migraphx_config.json",
+            "templateName": "phi3_5_migraphx_config"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
new file mode 100644
index 00000000..3f1a4b36
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/Phi-3.5-mini-instruct"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "int4" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/phi3_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config
new file mode 100644
index 00000000..b3e6c90c
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config
@@ -0,0 +1,43 @@
+{
+    "name": "Convert to WebGPU",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "needHFLogin": true,
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/microsoft-resnet-50/aitk/README.md b/microsoft-resnet-50/aitk/README.md
index 37dd1448..ffd2ebc1 100644
--- a/microsoft-resnet-50/aitk/README.md
+++ b/microsoft-resnet-50/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of ResNet optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel® CPU/GPU/NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## QDQ for Qualcomm NPU / AMD NPU
 
diff --git a/microsoft-resnet-50/aitk/inference_sample.ipynb b/microsoft-resnet-50/aitk/inference_sample.ipynb
index e2a97c45..e9202f6a 100644
--- a/microsoft-resnet-50/aitk/inference_sample.ipynb
+++ b/microsoft-resnet-50/aitk/inference_sample.ipynb
@@ -45,6 +45,29 @@
     "register_execution_providers()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/microsoft-resnet-50/aitk/info.yml b/microsoft-resnet-50/aitk/info.yml
index 4978c8ff..343fb825 100644
--- a/microsoft-resnet-50/aitk/info.yml
+++ b/microsoft-resnet-50/aitk/info.yml
@@ -23,6 +23,9 @@ recipes:
     - file: "resnet_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "resnet_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/microsoft/resnet-50"
diff --git a/microsoft-resnet-50/aitk/model_project.config b/microsoft-resnet-50/aitk/model_project.config
index 2bd3c778..6c5defda 100644
--- a/microsoft-resnet-50/aitk/model_project.config
+++ b/microsoft-resnet-50/aitk/model_project.config
@@ -23,6 +23,10 @@
         {
             "file": "resnet_dml.json",
             "templateName": "resnet_dml"
+        },
+        {
+            "file": "resnet_webgpu.json",
+            "templateName": "resnet_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb
new file mode 100644
index 00000000..f1f634e1
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb
@@ -0,0 +1,145 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"DmlExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from PIL import Image\n",
+    "url = \"https://onnxruntime.ai/images/dog.jpeg\"\n",
+    "response = requests.get(url)\n",
+    "# Save the image to a file\n",
+    "with open(\"dog.jpeg\", \"wb\") as file:\n",
+    "    file.write(response.content)\n",
+    "img = Image.open(\"dog.jpeg\")\n",
+    "img"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnxruntime as ort\n",
+    "from PIL import Image\n",
+    "import torch\n",
+    "import torchvision.transforms as transforms\n",
+    "from torchvision.models.resnet import ResNet50_Weights\n",
+    "import numpy as np\n",
+    "\n",
+    "image_file_path = \"dog.jpeg\"\n",
+    "\n",
+    "# Create ONNX runtime session\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "session = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "print(\"Available providers:\", session.get_providers())\n",
+    "print(\"Current provider:\", session.get_provider_options())\n",
+    "\n",
+    "# Read and preprocess image\n",
+    "image = Image.open(image_file_path)\n",
+    "preprocess = transforms.Compose([\n",
+    "    transforms.Resize(256),\n",
+    "    transforms.CenterCrop(224),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n",
+    "])\n",
+    "input_tensor = preprocess(image)\n",
+    "input_batch = input_tensor.unsqueeze(0)\n",
+    "\n",
+    "# Run inference\n",
+    "ort_inputs = {session.get_inputs()[0].name: input_batch.numpy().astype(np.float16)}\n",
+    "ort_outputs = session.run(None, ort_inputs)\n",
+    "\n",
+    "# Postprocess to get softmax vector\n",
+    "output = ort_outputs[0]\n",
+    "softmax = torch.nn.functional.softmax(torch.tensor(output), dim=1)\n",
+    "\n",
+    "# Extract top 10 predicted classes\n",
+    "top10 = torch.topk(softmax, 10)\n",
+    "\n",
+    "# Get label mapping\n",
+    "weights = ResNet50_Weights.DEFAULT\n",
+    "labels = weights.meta[\"categories\"]\n",
+    "\n",
+    "# Print results to console\n",
+    "print(\"Top 10 predictions for ResNet50 v2...\")\n",
+    "print(\"--------------------------------------------------------------\")\n",
+    "for i in range(10):\n",
+    "    print(f\"Label: {labels[top10.indices[0][i]]}, Confidence: {top10.values[0][i].item():.4f}\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cpu",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb
new file mode 100644
index 00000000..a9bdf35d
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb
@@ -0,0 +1,145 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from PIL import Image\n",
+    "url = \"https://onnxruntime.ai/images/dog.jpeg\"\n",
+    "response = requests.get(url)\n",
+    "# Save the image to a file\n",
+    "with open(\"dog.jpeg\", \"wb\") as file:\n",
+    "    file.write(response.content)\n",
+    "img = Image.open(\"dog.jpeg\")\n",
+    "img"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnxruntime as ort\n",
+    "from PIL import Image\n",
+    "import torch\n",
+    "import torchvision.transforms as transforms\n",
+    "from torchvision.models.resnet import ResNet50_Weights\n",
+    "import numpy as np\n",
+    "\n",
+    "image_file_path = \"dog.jpeg\"\n",
+    "\n",
+    "# Create ONNX runtime session\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "session = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "print(\"Available providers:\", session.get_providers())\n",
+    "print(\"Current provider:\", session.get_provider_options())\n",
+    "\n",
+    "# Read and preprocess image\n",
+    "image = Image.open(image_file_path)\n",
+    "preprocess = transforms.Compose([\n",
+    "    transforms.Resize(256),\n",
+    "    transforms.CenterCrop(224),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n",
+    "])\n",
+    "input_tensor = preprocess(image)\n",
+    "input_batch = input_tensor.unsqueeze(0)\n",
+    "\n",
+    "# Run inference\n",
+    "ort_inputs = {session.get_inputs()[0].name: input_batch.numpy().astype(np.float16)}\n",
+    "ort_outputs = session.run(None, ort_inputs)\n",
+    "\n",
+    "# Postprocess to get softmax vector\n",
+    "output = ort_outputs[0]\n",
+    "softmax = torch.nn.functional.softmax(torch.tensor(output), dim=1)\n",
+    "\n",
+    "# Extract top 10 predicted classes\n",
+    "top10 = torch.topk(softmax, 10)\n",
+    "\n",
+    "# Get label mapping\n",
+    "weights = ResNet50_Weights.DEFAULT\n",
+    "labels = weights.meta[\"categories\"]\n",
+    "\n",
+    "# Print results to console\n",
+    "print(\"Top 10 predictions for ResNet50 v2...\")\n",
+    "print(\"--------------------------------------------------------------\")\n",
+    "for i in range(10):\n",
+    "    print(f\"Label: {labels[top10.indices[0][i]]}, Confidence: {top10.values[0][i].item():.4f}\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cpu",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json b/microsoft-resnet-50/aitk/resnet_webgpu.json
new file mode 100644
index 00000000..e64119cf
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_webgpu.json
@@ -0,0 +1,121 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/resnet-50",
+        "task": "image-classification",
+        "io_config": {
+            "input_names": [
+                "pixel_values"
+            ],
+            "input_shapes": [
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ]
+            ],
+            "output_names": [
+                "logits"
+            ]
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "evaluation_data_config",
+            "type": "HuggingfaceContainer",
+            "user_script": "imagenet.py",
+            "load_dataset_config": {
+                "data_name": "timm/mini-imagenet",
+                "split": "validation",
+                "streaming": true,
+                "trust_remote_code": true
+            },
+            "pre_process_data_config": {
+                "type": "dataset_pre_process",
+                "size": 1000,
+                "cache_key": "imagedata_evaluation"
+            },
+            "post_process_data_config": {
+                "type": "dataset_post_process"
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy_score",
+                            "priority": 1,
+                            "metric_config": {
+                                "task": "multiclass",
+                                "num_classes": 1001
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "priority": 2
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "device": "cpu",
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true,
+            "all_tensors_to_one_file": true,
+            "dynamic": false,
+            "use_dynamo_exporter": false
+        },
+        "onnx_float_to_float16": {
+            "type": "OnnxFloatToFloat16",
+            "save_as_external_data": true
+        }
+    },
+    "host": "host_system",
+    "target": "target_system",
+    "evaluator": "common_evaluator",
+    "cache_dir": "cache",
+    "output_dir": "model/resnet_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json.config b/microsoft-resnet-50/aitk/resnet_webgpu.json.config
new file mode 100644
index 00000000..23c3f074
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_webgpu.json.config
@@ -0,0 +1,104 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.data_name",
+                    "values": [
+                        "timm/mini-imagenet",
+                        "imagenet-1k"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.data_name",
+                        "values": [
+                            "timm/mini-imagenet",
+                            "imagenet-1k"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Split",
+                    "tags": [
+                        "EvaluationDatasetSplit",
+                        "DependsOnDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.split",
+                    "values": [
+                        "train",
+                        "validation",
+                        "test"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.split",
+                        "template": "EvaluationDatasetSplit"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].pre_process_data_config.size",
+                    "template": {
+                        "path": "data_configs[0].pre_process_data_config.size",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb
new file mode 100644
index 00000000..7cc8ad1c
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb
@@ -0,0 +1,145 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"WebGpuExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from PIL import Image\n",
+    "url = \"https://onnxruntime.ai/images/dog.jpeg\"\n",
+    "response = requests.get(url)\n",
+    "# Save the image to a file\n",
+    "with open(\"dog.jpeg\", \"wb\") as file:\n",
+    "    file.write(response.content)\n",
+    "img = Image.open(\"dog.jpeg\")\n",
+    "img"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnxruntime as ort\n",
+    "from PIL import Image\n",
+    "import torch\n",
+    "import torchvision.transforms as transforms\n",
+    "from torchvision.models.resnet import ResNet50_Weights\n",
+    "import numpy as np\n",
+    "\n",
+    "image_file_path = \"dog.jpeg\"\n",
+    "\n",
+    "# Create ONNX runtime session\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "session = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "print(\"Available providers:\", session.get_providers())\n",
+    "print(\"Current provider:\", session.get_provider_options())\n",
+    "\n",
+    "# Read and preprocess image\n",
+    "image = Image.open(image_file_path)\n",
+    "preprocess = transforms.Compose([\n",
+    "    transforms.Resize(256),\n",
+    "    transforms.CenterCrop(224),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n",
+    "])\n",
+    "input_tensor = preprocess(image)\n",
+    "input_batch = input_tensor.unsqueeze(0)\n",
+    "\n",
+    "# Run inference\n",
+    "ort_inputs = {session.get_inputs()[0].name: input_batch.numpy().astype(np.float16)}\n",
+    "ort_outputs = session.run(None, ort_inputs)\n",
+    "\n",
+    "# Postprocess to get softmax vector\n",
+    "output = ort_outputs[0]\n",
+    "softmax = torch.nn.functional.softmax(torch.tensor(output), dim=1)\n",
+    "\n",
+    "# Extract top 10 predicted classes\n",
+    "top10 = torch.topk(softmax, 10)\n",
+    "\n",
+    "# Get label mapping\n",
+    "weights = ResNet50_Weights.DEFAULT\n",
+    "labels = weights.meta[\"categories\"]\n",
+    "\n",
+    "# Print results to console\n",
+    "print(\"Top 10 predictions for ResNet50 v2...\")\n",
+    "print(\"--------------------------------------------------------------\")\n",
+    "for i in range(10):\n",
+    "    print(f\"Label: {labels[top10.indices[0][i]]}, Confidence: {top10.values[0][i].item():.4f}\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cpu",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/openai-clip-vit-base-patch16/aitk/README.md b/openai-clip-vit-base-patch16/aitk/README.md
index 44468f65..45de6a35 100644
--- a/openai-clip-vit-base-patch16/aitk/README.md
+++ b/openai-clip-vit-base-patch16/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of Openai Clip optimization using different workfl
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel® CPU/GPU/NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Openai Clip optimization with QDQ for Qualcomm NPU / AMD NPU
 
@@ -18,7 +18,7 @@ This workflow performs quantization with OpenVINO NNCF. It performs the optimiza
 
 - *HuggingFace Model -> OpenVINO Model -> Quantized OpenVINO model -> Quantized encapsulated ONNX OpenVINO IR model*
 
-## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 It performs the optimization pipeline:
 
diff --git a/openai-clip-vit-base-patch16/aitk/inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/inference_sample.ipynb
index cf26a73e..bc1a552f 100644
--- a/openai-clip-vit-base-patch16/aitk/inference_sample.ipynb
+++ b/openai-clip-vit-base-patch16/aitk/inference_sample.ipynb
@@ -53,6 +53,34 @@
     "register_execution_providers()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/openai-clip-vit-base-patch16/aitk/info.yml b/openai-clip-vit-base-patch16/aitk/info.yml
index cffa85db..d1124ea7 100644
--- a/openai-clip-vit-base-patch16/aitk/info.yml
+++ b/openai-clip-vit-base-patch16/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "openai_clip_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "openai_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
     - file: "openai_clip_migraphx.json"
       device: gpu
       ep: MIGraphXExecutionProvider
diff --git a/openai-clip-vit-base-patch16/aitk/model_project.config b/openai-clip-vit-base-patch16/aitk/model_project.config
index b2e2714b..b3c86e66 100644
--- a/openai-clip-vit-base-patch16/aitk/model_project.config
+++ b/openai-clip-vit-base-patch16/aitk/model_project.config
@@ -20,6 +20,10 @@
             "file": "openai_clip_dml.json",
             "templateName": "openai_clip_dml"
         },
+        {
+            "file": "openai_clip_webgpu.json",
+            "templateName": "openai_clip_webgpu"
+        },
         {
             "file": "openai_clip_migraphx.json",
             "templateName": "openai_clip_migraphx"
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb
new file mode 100644
index 00000000..83c5e565
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"DmlExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb
new file mode 100644
index 00000000..9ba20f48
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb
@@ -0,0 +1,113 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/openvino_model_quant_st.onnx\"\n",
+    "ExecutionProvider=\"OpenVINOExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values']\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb
new file mode 100644
index 00000000..46a0e8d6
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb
@@ -0,0 +1,166 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43751a72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"QNNExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0ea54b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
+   "metadata": {},
+   "source": [
+    "### Data Processor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import CLIPProcessor\n",
+    "\n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5568eb71-5812-4c74-989c-c12271d33b12",
+   "metadata": {},
+   "source": [
+    "### Model Inference with ORT-QNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02bad4ec-f477-4659-8584-00735f6ed5a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnxruntime as ort\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "text_model = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "def get_text_embedding(text):\n",
+    "    inputs = processor(\n",
+    "        text=text,\n",
+    "        padding=\"max_length\",\n",
+    "        max_length=77,#text_model.sequence_length,\n",
+    "        truncation=True,\n",
+    "        add_special_tokens=True,\n",
+    "        return_tensors=\"np\",\n",
+    "    )\n",
+    "    output = text_model.run(None, {\n",
+    "        \"input_ids\": inputs[\"input_ids\"].astype(np.int32),\n",
+    "        \"attention_mask\": inputs[\"attention_mask\"].astype(np.int32),\n",
+    "    })\n",
+    "    return torch.from_numpy(output[0])\n",
+    "\n",
+    "def calculate_score(emb_1, emb_2):\n",
+    "    emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n",
+    "    emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n",
+    "    return torch.matmul(emb_1, emb_2.T) * 100.0\n",
+    "\n",
+    "# Get source embedding and calculate the similarity score for each target\n",
+    "# We need to process one by one because to static quantization, we fixed the batch size to 1\n",
+    "def ask(source, targets):\n",
+    "    source_emb = get_text_embedding(source)\n",
+    "    scores = []\n",
+    "    for i, target in enumerate(targets):\n",
+    "        target_emb = get_text_embedding(target)\n",
+    "        score = calculate_score(source_emb, target_emb)\n",
+    "        print(f\"Similarity score of sentence {i}：{score.item()}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3477e36c-2e72-432b-ae81-602073a3754c",
+   "metadata": {},
+   "source": [
+    "### Play with Samples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ask(\"a photo containing two cats\", [\"a photo of tshirt\", \"a photo of two cats\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb
new file mode 100644
index 00000000..2343edf0
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb
new file mode 100644
index 00000000..f90ea43a
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb
@@ -0,0 +1,195 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c18a7d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "\n",
+    "ExecutionProvider=\"QNNExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f82e3bca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
+   "metadata": {},
+   "source": [
+    "### Data Processor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import CLIPProcessor\n",
+    "\n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5568eb71-5812-4c74-989c-c12271d33b12",
+   "metadata": {},
+   "source": [
+    "### Model Inference with ORT-QNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02bad4ec-f477-4659-8584-00735f6ed5a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnxruntime as ort\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "vision_model = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "def get_image_embedding(image):\n",
+    "    inputs = processor(images=image, return_tensors=\"np\")\n",
+    "    output = vision_model.run(None, { \"pixel_values\": inputs[\"pixel_values\"] })\n",
+    "    return torch.from_numpy(output[0])\n",
+    "\n",
+    "def calculate_score(emb_1, emb_2):\n",
+    "    emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n",
+    "    emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n",
+    "    return torch.matmul(emb_1, emb_2.T) * 100.0\n",
+    "\n",
+    "# Get source embedding and calculate the similarity score for each target\n",
+    "# We need to process one by one because to static quantization, we fixed the batch size to 1\n",
+    "def ask(source, targets):\n",
+    "    source_emb = get_image_embedding(source)\n",
+    "    for i, target in enumerate(targets):\n",
+    "        target_emb = get_image_embedding(target)\n",
+    "        score = calculate_score(source_emb, target_emb)\n",
+    "        print(f\"Similarity score of image {i}：{score.item()}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3477e36c-2e72-432b-ae81-602073a3754c",
+   "metadata": {},
+   "source": [
+    "### Play with Samples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16868fbd-e447-4866-af7d-eb6e49975bcc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from PIL import Image\n",
+    "\n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    "image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07076b9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"http://images.cocodataset.org/train2017/000000208833.jpg\"\n",
+    "image1 = Image.open(requests.get(url, stream=True).raw)\n",
+    "image1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c10de7cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"http://images.cocodataset.org/train2017/000000125690.jpg\"\n",
+    "image2 = Image.open(requests.get(url, stream=True).raw)\n",
+    "image2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ask(image, [image1, image2])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
new file mode 100644
index 00000000..39b77871
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
@@ -0,0 +1,181 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "openai/clip-vit-base-patch16",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "metric_data_config",
+            "user_script": "user_script.py",
+            "load_dataset_config": {
+                "type": "clip_dataset",
+                "model_name": "openai/clip-vit-base-patch16",
+                "dataset_name": "nlphuji/flickr30k",
+                "start": 0,
+                "end": 10
+            },
+            "dataloader_config": {
+                "type": "no_auto_batch_dataloader"
+            },
+            "post_process_data_config": {
+                "type": "clip_post_process"
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "backend": "huggingface_metrics",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy",
+                            "priority": 1,
+                            "goal": {
+                                "type": "max-degradation",
+                                "value": 0.05
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "goal": {
+                                "type": "percent-min-improvement",
+                                "value": 0.1
+                            }
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                },
+                {
+                    "name": "throughput",
+                    "type": "throughput",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg"
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "onnx_float_to_float16": {
+            "type": "OnnxFloatToFloat16",
+            "save_as_external_data": true
+        }
+    },
+    "search_strategy": false,
+    "host": "host_system",
+    "target": "target_system",
+    "cache_dir": "cache",
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "output_dir": "model/clip"
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
new file mode 100644
index 00000000..d17c25fa
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
@@ -0,0 +1,84 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.dataset_name",
+                    "values": [
+                        "nlphuji/flickr30k"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.dataset_name",
+                        "values": [
+                            "nlphuji/flickr30k"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].load_dataset_config.end",
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.end",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb
new file mode 100644
index 00000000..9e2d7c1c
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"WebGpuExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch32/aitk/README.md b/openai-clip-vit-base-patch32/aitk/README.md
index 44468f65..45de6a35 100644
--- a/openai-clip-vit-base-patch32/aitk/README.md
+++ b/openai-clip-vit-base-patch32/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of Openai Clip optimization using different workfl
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel® CPU/GPU/NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Openai Clip optimization with QDQ for Qualcomm NPU / AMD NPU
 
@@ -18,7 +18,7 @@ This workflow performs quantization with OpenVINO NNCF. It performs the optimiza
 
 - *HuggingFace Model -> OpenVINO Model -> Quantized OpenVINO model -> Quantized encapsulated ONNX OpenVINO IR model*
 
-## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 It performs the optimization pipeline:
 
diff --git a/openai-clip-vit-base-patch32/aitk/inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/inference_sample.ipynb
index 86ccf8fe..325e9da9 100644
--- a/openai-clip-vit-base-patch32/aitk/inference_sample.ipynb
+++ b/openai-clip-vit-base-patch32/aitk/inference_sample.ipynb
@@ -53,6 +53,34 @@
     "register_execution_providers()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/openai-clip-vit-base-patch32/aitk/info.yml b/openai-clip-vit-base-patch32/aitk/info.yml
index fb8fef2e..a76e4301 100644
--- a/openai-clip-vit-base-patch32/aitk/info.yml
+++ b/openai-clip-vit-base-patch32/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "openai_clip_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "openai_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
     - file: "openai_clip_migraphx.json"
       device: gpu
       ep: MIGraphXExecutionProvider
diff --git a/openai-clip-vit-base-patch32/aitk/model_project.config b/openai-clip-vit-base-patch32/aitk/model_project.config
index 20bbc8f8..8ec217a7 100644
--- a/openai-clip-vit-base-patch32/aitk/model_project.config
+++ b/openai-clip-vit-base-patch32/aitk/model_project.config
@@ -20,6 +20,10 @@
             "file": "openai_clip_dml.json",
             "templateName": "openai_clip_dml"
         },
+        {
+            "file": "openai_clip_webgpu.json",
+            "templateName": "openai_clip_webgpu"
+        },
         {
             "file": "openai_clip_migraphx.json",
             "templateName": "openai_clip_migraphx"
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb
new file mode 100644
index 00000000..7e2f5cc6
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"DmlExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb
new file mode 100644
index 00000000..03e0fc89
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb
@@ -0,0 +1,113 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/openvino_model_quant_st.onnx\"\n",
+    "ExecutionProvider=\"OpenVINOExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values']\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb
new file mode 100644
index 00000000..347c9d15
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb
@@ -0,0 +1,166 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43751a72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"QNNExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0ea54b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
+   "metadata": {},
+   "source": [
+    "### Data Processor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import CLIPProcessor\n",
+    "\n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5568eb71-5812-4c74-989c-c12271d33b12",
+   "metadata": {},
+   "source": [
+    "### Model Inference with ORT-QNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02bad4ec-f477-4659-8584-00735f6ed5a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnxruntime as ort\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "text_model = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "def get_text_embedding(text):\n",
+    "    inputs = processor(\n",
+    "        text=text,\n",
+    "        padding=\"max_length\",\n",
+    "        max_length=77,#text_model.sequence_length,\n",
+    "        truncation=True,\n",
+    "        add_special_tokens=True,\n",
+    "        return_tensors=\"np\",\n",
+    "    )\n",
+    "    output = text_model.run(None, {\n",
+    "        \"input_ids\": inputs[\"input_ids\"].astype(np.int32),\n",
+    "        \"attention_mask\": inputs[\"attention_mask\"].astype(np.int32),\n",
+    "    })\n",
+    "    return torch.from_numpy(output[0])\n",
+    "\n",
+    "def calculate_score(emb_1, emb_2):\n",
+    "    emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n",
+    "    emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n",
+    "    return torch.matmul(emb_1, emb_2.T) * 100.0\n",
+    "\n",
+    "# Get source embedding and calculate the similarity score for each target\n",
+    "# We need to process one by one because to static quantization, we fixed the batch size to 1\n",
+    "def ask(source, targets):\n",
+    "    source_emb = get_text_embedding(source)\n",
+    "    scores = []\n",
+    "    for i, target in enumerate(targets):\n",
+    "        target_emb = get_text_embedding(target)\n",
+    "        score = calculate_score(source_emb, target_emb)\n",
+    "        print(f\"Similarity score of sentence {i}：{score.item()}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3477e36c-2e72-432b-ae81-602073a3754c",
+   "metadata": {},
+   "source": [
+    "### Play with Samples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ask(\"a photo containing two cats\", [\"a photo of tshirt\", \"a photo of two cats\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb
new file mode 100644
index 00000000..4c1986a4
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb
new file mode 100644
index 00000000..0863f581
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb
@@ -0,0 +1,195 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c18a7d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "\n",
+    "ExecutionProvider=\"QNNExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f82e3bca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
+   "metadata": {},
+   "source": [
+    "### Data Processor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa8d84cd-4853-4746-bce3-b281bfc23d8b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import CLIPProcessor\n",
+    "\n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5568eb71-5812-4c74-989c-c12271d33b12",
+   "metadata": {},
+   "source": [
+    "### Model Inference with ORT-QNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02bad4ec-f477-4659-8584-00735f6ed5a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnxruntime as ort\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "vision_model = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "def get_image_embedding(image):\n",
+    "    inputs = processor(images=image, return_tensors=\"np\")\n",
+    "    output = vision_model.run(None, { \"pixel_values\": inputs[\"pixel_values\"] })\n",
+    "    return torch.from_numpy(output[0])\n",
+    "\n",
+    "def calculate_score(emb_1, emb_2):\n",
+    "    emb_1 /= torch.norm(emb_1, dim=-1, keepdim=True)\n",
+    "    emb_2 /= torch.norm(emb_2, dim=-1, keepdim=True)\n",
+    "    return torch.matmul(emb_1, emb_2.T) * 100.0\n",
+    "\n",
+    "# Get source embedding and calculate the similarity score for each target\n",
+    "# We need to process one by one because to static quantization, we fixed the batch size to 1\n",
+    "def ask(source, targets):\n",
+    "    source_emb = get_image_embedding(source)\n",
+    "    for i, target in enumerate(targets):\n",
+    "        target_emb = get_image_embedding(target)\n",
+    "        score = calculate_score(source_emb, target_emb)\n",
+    "        print(f\"Similarity score of image {i}：{score.item()}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3477e36c-2e72-432b-ae81-602073a3754c",
+   "metadata": {},
+   "source": [
+    "### Play with Samples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16868fbd-e447-4866-af7d-eb6e49975bcc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from PIL import Image\n",
+    "\n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    "image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07076b9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"http://images.cocodataset.org/train2017/000000208833.jpg\"\n",
+    "image1 = Image.open(requests.get(url, stream=True).raw)\n",
+    "image1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c10de7cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"http://images.cocodataset.org/train2017/000000125690.jpg\"\n",
+    "image2 = Image.open(requests.get(url, stream=True).raw)\n",
+    "image2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8cdc2a6-4c81-4f93-8426-065ee4c2b013",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ask(image, [image1, image2])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
new file mode 100644
index 00000000..6198b710
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
@@ -0,0 +1,181 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "openai/clip-vit-base-patch32",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "metric_data_config",
+            "user_script": "user_script.py",
+            "load_dataset_config": {
+                "type": "clip_dataset",
+                "model_name": "openai/clip-vit-base-patch32",
+                "dataset_name": "nlphuji/flickr30k",
+                "start": 0,
+                "end": 10
+            },
+            "dataloader_config": {
+                "type": "no_auto_batch_dataloader"
+            },
+            "post_process_data_config": {
+                "type": "clip_post_process"
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "backend": "huggingface_metrics",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy",
+                            "priority": 1,
+                            "goal": {
+                                "type": "max-degradation",
+                                "value": 0.05
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "goal": {
+                                "type": "percent-min-improvement",
+                                "value": 0.1
+                            }
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                },
+                {
+                    "name": "throughput",
+                    "type": "throughput",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg"
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "onnx_float_to_float16": {
+            "type": "OnnxFloatToFloat16",
+            "save_as_external_data": true
+        }
+    },
+    "search_strategy": false,
+    "host": "host_system",
+    "target": "target_system",
+    "cache_dir": "cache",
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "output_dir": "model/clip"
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config
new file mode 100644
index 00000000..d17c25fa
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config
@@ -0,0 +1,84 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.dataset_name",
+                    "values": [
+                        "nlphuji/flickr30k"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.dataset_name",
+                        "values": [
+                            "nlphuji/flickr30k"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].load_dataset_config.end",
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.end",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb
new file mode 100644
index 00000000..015cd8a1
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"WebGpuExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-large-patch14/aitk/inference_sample.ipynb b/openai-clip-vit-large-patch14/aitk/inference_sample.ipynb
index 96024cd4..f58feec6 100644
--- a/openai-clip-vit-large-patch14/aitk/inference_sample.ipynb
+++ b/openai-clip-vit-large-patch14/aitk/inference_sample.ipynb
@@ -53,6 +53,34 @@
     "register_execution_providers()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,