pytorch
diff --git a/‎.ci/docker/build.sh‎
Lines changed: 5 additions & 0 deletions b/‎.ci/docker/build.sh‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 2 additions & 0 deletions b/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ci/scripts/setup-vulkan-linux-deps.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/setup-vulkan-linux-deps.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.ci/scripts/test_llama.sh‎
Lines changed: 9 additions & 0 deletions b/‎.ci/scripts/test_llama.sh‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎.github/workflows/docker-builds.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/docker-builds.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 19 additions & 1 deletion b/‎.github/workflows/pull.yml‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 3 additions & 3 deletions b/‎CMakeLists.txt‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/arm/arm_backend.py‎
Lines changed: 3 additions & 6 deletions b/‎backends/arm/arm_backend.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎backends/arm/quantizer/quantization_annotation/generic_annotator.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/quantizer/quantization_annotation/generic_annotator.py‎
Lines changed: 1 addition & 0 deletions
@@ -41,6 +41,10 @@ case "${IMAGE_NAME}" in
     QNN_SDK=yes
     CLANG_VERSION=12
     ;;
+  executorch-ubuntu-22.04-mediatek-sdk)
+    MEDIATEK_SDK=yes
+    CLANG_VERSION=12
+    ;;
   executorch-ubuntu-22.04-clang12-android)
     LINTRUNNER=""
     CLANG_VERSION=12
@@ -77,6 +81,7 @@ docker build \
   --build-arg "BUILD_DOCS=${BUILD_DOCS}" \
   --build-arg "ARM_SDK=${ARM_SDK:-}" \
   --build-arg "QNN_SDK=${QNN_SDK:-}" \
+  --build-arg "MEDIATEK_SDK=${MEDIATEK_SDK:-}" \
   --build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
   -f "${OS}"/Dockerfile \
   "$@" \
 
@@ -85,5 +85,7 @@ RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "[email protected]
 
 ARG QNN_SDK
 
+ARG MEDIATEK_SDK
+
 USER ci-user
 CMD ["bash"]
@@ -27,7 +27,7 @@ install_swiftshader() {
 
 install_vulkan_sdk() {
   VULKAN_SDK_VERSION=$1
-  _vulkan_sdk_url="https://sdk.lunarg.com/sdk/download/${VULKAN_SDK_VERSION}/linux/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
+  _vulkan_sdk_url="https://sdk.lunarg.com/sdk/download/${VULKAN_SDK_VERSION}/linux/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.xz"
 
   _vulkan_sdk_dir=/tmp/vulkansdk
   mkdir -p $_vulkan_sdk_dir
@@ -37,12 +37,12 @@ install_vulkan_sdk() {
   curl --silent --show-error --location --fail --retry 3 \
     --output "${_tmp_archive}" "${_vulkan_sdk_url}"
 
-  tar -C "${_vulkan_sdk_dir}" -xzf "${_tmp_archive}"
+  tar -C "${_vulkan_sdk_dir}" -xJf "${_tmp_archive}"
 
   export PATH="${PATH}:${_vulkan_sdk_dir}/${VULKAN_SDK_VERSION}/x86_64/bin/"
 }
 
-VULKAN_SDK_VERSION="1.2.198.1"
+VULKAN_SDK_VERSION="1.3.296.0"
 
 install_swiftshader
 install_vulkan_sdk "${VULKAN_SDK_VERSION}"
@@ -110,6 +110,12 @@ else
   COREML=OFF
 fi
 
+if [[ "${MODE}" =~ .*quantize_kv.* ]]; then
+  QUANTIZE_KV_CACHE=ON
+else
+  QUANTIZE_KV_CACHE=OFF
+fi
+
 echo "COREML option ${COREML}"
 
 if [[ "${MODE}" =~ .*qnn.* ]]; then
@@ -249,6 +255,9 @@ if [[ "${QNN}" == "ON" ]]; then
     EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
   fi
 fi
+if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} --quantize_kv_cache"
+fi
 # Add dynamically linked library location
 $PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
 
 
@@ -42,6 +42,7 @@ jobs:
           - docker-image-name: executorch-ubuntu-22.04-linter
           - docker-image-name: executorch-ubuntu-22.04-arm-sdk
           - docker-image-name: executorch-ubuntu-22.04-qnn-sdk
+          - docker-image-name: executorch-ubuntu-22.04-mediatek-sdk
           - docker-image-name: executorch-ubuntu-22.04-clang12-android
     env:
       DOCKER_IMAGE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/executorch/${{ matrix.docker-image-name }}
 
@@ -86,7 +86,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        mode: [portable, xnnpack+custom, xnnpack+custom+qe]
+        mode: [portable, xnnpack+custom, xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
         include:
           - dtype: bf16
             mode: portable
@@ -504,3 +504,21 @@ jobs:
 
         # run llama runner in eager mode
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
+
+  test-mediatek-models-linux:
+    name: test-mediatek-models-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: executorch-ubuntu-22.04-mediatek-sdk
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        # placeholder for mediatek to add more tests
@@ -225,7 +225,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        mode: [portable, xnnpack+kv+custom, mps, coreml]
+        mode: [portable, xnnpack+kv+custom, mps, coreml, xnnpack+custom+quantize_kv]
         include:
           - dtype: bf16
             mode: portable
 
@@ -742,9 +742,9 @@ if(EXECUTORCH_BUILD_PYBIND)
   endif()
 
   if(EXECUTORCH_BUILD_XNNPACK)
-    # need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
-    # from libtorch_cpu
-    list(APPEND _dep_libs xnnpack_backend XNNPACK)
+    # need to explicitly specify XNNPACK and microkernels-prod
+    # here otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu
+    list(APPEND _dep_libs xnnpack_backend XNNPACK microkernels-prod)
   endif()
 
   # compile options for pybind
 
@@ -13,7 +13,7 @@
 
 import logging
 import os
-from typing import final, List, Optional
+from typing import cast, final, List, Optional
 
 import serializer.tosa_serializer as ts
 from executorch.backends.arm.arm_vela import vela_compile
@@ -32,6 +32,7 @@
 from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from torch.export.exported_program import ExportedProgram
+from torch.fx import Node
 
 # TOSA backend debug functionality
 logger = logging.getLogger(__name__)
@@ -269,6 +270,7 @@ def preprocess(  # noqa: C901
         node_visitors = get_node_visitors(edge_program, tosa_spec)
         input_count = 0
         for node in graph_module.graph.nodes:
+            node = cast(Node, node)
             if node.op == "call_function":
                 process_call_function(node, tosa_graph, node_visitors, tosa_spec)
             elif node.op == "placeholder":
@@ -288,9 +290,6 @@ def preprocess(  # noqa: C901
                     "The rank of the input order is not equal to amount of input tensors"
                 )
 
-        # TODO: It would be awesome if this dump could somehow be done on top level and not here.
-        # Problem is that the desc.json has to be created on the tosa_graph object, which we can't
-        # access from top level.
         if artifact_path:
             tag = _get_first_delegation_tag(graph_module)
             dbg_tosa_dump(
@@ -311,6 +310,4 @@ def preprocess(  # noqa: C901
         else:
             raise RuntimeError(f"Unknown format {output_format}")
 
-        # Continueing from above. Can I put tosa_graph into this function?
-        # debug_handle_map = ...
         return PreprocessResult(processed_bytes=binary)
@@ -53,6 +53,7 @@
     torch.ops.aten.tile.default,
     torch.ops.aten.flip.default,
     torch.ops.aten.cat.default,
+    torch.ops.aten.concatenate.default,
     torch.ops.aten.stack.default,
     torch.ops.aten.chunk.default,
     torch.ops.aten.contiguous.default,