quadric-io · mgleonard425 · Aug 28, 2023 · Sep 7, 2023 · Sep 8, 2023 · Sep 8, 2023
diff --git a/...hub/workflows/generate-skip-doc-change.py → .github.upstream/generate-skip-doc-change.py b/...hub/workflows/generate-skip-doc-change.py → .github.upstream/generate-skip-doc-change.py
diff --git a/.github/workflows/skip-doc-change.yml.j2 → .github.upstream/skip-doc-change.yml.j2 b/.github/workflows/skip-doc-change.yml.j2 → .github.upstream/skip-doc-change.yml.j2
diff --git a/.github/workflows/cffconvert.yml → .github.upstream/workflows/cffconvert.yml b/.github/workflows/cffconvert.yml → .github.upstream/workflows/cffconvert.yml
diff --git a/.github/workflows/codeql.yml → .github.upstream/workflows/codeql.yml b/.github/workflows/codeql.yml → .github.upstream/workflows/codeql.yml
diff --git a/...b/workflows/generated_fake_win_gpu_ci.yml → ...m/workflows/generated_fake_win_gpu_ci.yml b/...b/workflows/generated_fake_win_gpu_ci.yml → ...m/workflows/generated_fake_win_gpu_ci.yml
diff --git a/...b/workflows/gradle-wrapper-validation.yml → ...m/workflows/gradle-wrapper-validation.yml b/...b/workflows/gradle-wrapper-validation.yml → ...m/workflows/gradle-wrapper-validation.yml
diff --git a/.github/workflows/labeler.yml → .github.upstream/workflows/labeler.yml b/.github/workflows/labeler.yml → .github.upstream/workflows/labeler.yml
diff --git a/.github/workflows/lint.yml → .github.upstream/workflows/lint.yml b/.github/workflows/lint.yml → .github.upstream/workflows/lint.yml
diff --git a/.github/workflows/linux.yml → .github.upstream/workflows/linux.yml b/.github/workflows/linux.yml → .github.upstream/workflows/linux.yml
diff --git a/.github/workflows/publish-c-apidocs.yml → ....upstream/workflows/publish-c-apidocs.yml b/.github/workflows/publish-c-apidocs.yml → ....upstream/workflows/publish-c-apidocs.yml
diff --git a/.github/workflows/publish-csharp-apidocs.yml → ...ream/workflows/publish-csharp-apidocs.yml b/.github/workflows/publish-csharp-apidocs.yml → ...ream/workflows/publish-csharp-apidocs.yml
diff --git a/.github/workflows/publish-java-apidocs.yml → ...stream/workflows/publish-java-apidocs.yml b/.github/workflows/publish-java-apidocs.yml → ...stream/workflows/publish-java-apidocs.yml
diff --git a/.github/workflows/publish-python-apidocs.yml → ...ream/workflows/publish-python-apidocs.yml b/.github/workflows/publish-python-apidocs.yml → ...ream/workflows/publish-python-apidocs.yml
diff --git a/.github/workflows/windows.yml → .github.upstream/workflows/windows.yml b/.github/workflows/windows.yml → .github.upstream/workflows/windows.yml
diff --git a/.github/workflows/sca.yml b/.github/workflows/sca.yml
diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml
@@ -0,0 +1,97 @@
+name: CI && Release & Upload Wheel
+
+on:
+  workflow_call:
+    inputs:
+      onnxruntime_branch:
+        type: string
+        default: "main"
+  workflow_dispatch:
+    inputs:
+      onnxruntime_branch:
+        type: string
+        default: "main"
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build_and_upload_wheel_linux:
+    runs-on: The_CTOs_Choice
+    container:
+      image: ghcr.io/quadric-io/tvm:devel
+      options: "--mount type=bind,source=${{ github.workspace }},target=/workspace"
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+      with:
+        repository: quadric-io/onnxruntime
+        ref: ${{ inputs.onnxruntime_branch || github.ref }}
+    - name: Build ONNX Runtime wheel
+      working-directory: /workspace
+      run: |
+        python3 -m pip install cmake --upgrade
+        ./build.sh --build_wheel --config Release --parallel ${{ github.event_name == 'pull_request' && ' ' || '--skip_tests'}} --skip_submodule_sync --allow_running_as_root --compile_no_warning_as_error
+        wheel_path=$(find . -name '*.whl' | xargs readlink -f)
+        echo "wheel_path=$wheel_path" >> $GITHUB_ENV
+    - name: Upload Artifact
+      uses: actions/upload-artifact@v3
+      with:
+        name: ort-wheel-linux
+        path: ${{ env.wheel_path }}
+
+  build_and_upload_wheel_mac:
+    runs-on: [self-hosted, macOS, ARM64]
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+      with:
+        repository: quadric-io/onnxruntime
+        ref: ${{ inputs.onnxruntime_branch || github.ref }}      
+    - name: Build ONNX Runtime wheel
+      run: |
+        ./build.sh --build_wheel --config Release --parallel ${{ github.event_name == 'pull_request' && ' ' || '--skip_tests'}} --skip_submodule_sync --compile_no_warning_as_error --skip_submodule_sync --apple_deploy_target 12
+        wheel_path=$(find . -name '*.whl' | xargs readlink -f)
+        echo "wheel_path=$wheel_path" >> $GITHUB_ENV
+    - name: Upload Artifact
+      uses: actions/upload-artifact@v3
+      with:
+        name: ort-wheel-mac
+        path: ${{ env.wheel_path }}
+
+  create_release:
+    if: (github.ref == 'refs/heads/main') && ( github.event_name != 'workflow_call' && github.event_name != 'workflow_dispatch' )
+    needs: [build_and_upload_wheel_mac, build_and_upload_wheel_linux]
+    runs-on: ubuntu-latest
+    steps:
+    - name: Download ort-wheel-linux artifact
+      uses: actions/download-artifact@v3
+      with:
+        name: ort-wheel-linux
+        path: artifacts/
+    - name: Download ort-wheel-mac artifact
+      uses: actions/download-artifact@v3
+      with:
+        name: ort-wheel-mac
+        path: artifacts/
+    - name: Count releases
+      id: count_releases
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        count=$(curl --request GET \
+          --url https://api.github.com/repos/${{ github.repository }}/releases \
+          --header "Authorization: Bearer $GITHUB_TOKEN" | jq length)
+        echo "count=$count" >> $GITHUB_ENV
+    - name: Create Release and Upload Both Assets
+      uses: softprops/action-gh-release@v1
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      with:
+        tag_name: v${{ env.count }}
+        name: Release v${{ env.count }}
+        files: |
+          artifacts/*.whl
diff --git a/.gitmodules b/.gitmodules
@@ -8,6 +8,3 @@
 	path = cmake/external/emsdk
 	url = https://github.com/emscripten-core/emsdk.git
 	branch = 3.1.44
-[submodule "cmake/external/onnxruntime-extensions"]
-	path = cmake/external/onnxruntime-extensions
-	url = https://github.com/microsoft/onnxruntime-extensions.git
diff --git a/README_EPU.md b/README_EPU.md
@@ -0,0 +1,29 @@
+# The Quadric Version of onnxruntime
+
+This repository contains the a distribution of onnxruntime with additional operator quantization capabilities.
+
+
+## Prerequisites:
+- python 3.9
+- pip
+
+## Clone repository and build:
+```
+git clone --recursive https://github.com/quadric-io/onnxruntime onnxruntime
+cd onnxruntime
+python3.9 -m venv venv
+source venv/bin/activate
+# Install required packages. numpy version is restricted by TVM
+pip3 install wheel packaging numpy==1.24.4
+# Build the python package
+./build.sh --build_wheel --config Release --parallel
+```
+
+## Install 
+```
+# Find the wheel you just created
+$ find . -name '*.whl'
+./build/MacOS/Release/dist/onnxruntime-1.16.0-cp39-cp39-macosx_13_0_arm64.whl
+# Install it
+pip3 install ./build/MacOS/Release/dist/onnxruntime-1.16.0-cp39-cp39-macosx_13_0_arm64.whl
+```
diff --git a/ThirdPartyNotices.txt b/ThirdPartyNotices.txt
@@ -6230,3 +6230,37 @@ https://github.com/intel/neural-compressor
    terms, and open source software license terms. These separate license terms
    govern your use of the third party programs as set forth in the
    "THIRD-PARTY-PROGRAMS" file.
+
+_____
+
+FlashAttention, https://github.com/Dao-AILab/flash-attention
+
+BSD 3-Clause License
+
+Copyright (c) 2022, the respective contributors, as shown by the AUTHORS file.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/VERSION_NUMBER b/VERSION_NUMBER
@@ -1 +1 @@
-1.16.0
+1.16.2
diff --git a/cgmanifests/cgmanifest.json b/cgmanifests/cgmanifest.json
@@ -568,7 +568,7 @@
          "component": {
             "type": "git",
             "git": {
-               "commitHash": "d10b27fe37736d2944630ecd7557cefa95cf87c9",
+               "commitHash": "e7248b26a1ed53fa030c5c459f7ea095dfd276ac",
                "repositoryUrl": "https://gitlab.com/libeigen/eigen.git"
             }            
          }

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -84,7 +84,8 @@ option(onnxruntime_USE_PREINSTALLED_EIGEN "Use pre-installed EIGEN. Need to prov
 option(onnxruntime_BUILD_BENCHMARKS "Build ONNXRuntime micro-benchmarks" OFF)
 option(onnxruntime_USE_LLVM "Build TVM with LLVM" OFF)
 
-option(onnxruntime_USE_FLASH_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)
+cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "NOT WIN32; onnxruntime_USE_CUDA" OFF)
+option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)
 
 option(onnxruntime_BUILD_FOR_NATIVE_MACHINE "Enable this option for turning on optimization specific to this machine" OFF)
 option(onnxruntime_USE_AVX "Use AVX instructions" OFF)
@@ -666,13 +667,16 @@ if (onnxruntime_USE_CUDA)
 
   if (onnxruntime_DISABLE_CONTRIB_OPS)
     set(onnxruntime_USE_FLASH_ATTENTION OFF)
+    set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
   endif()
   if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6)
     message( STATUS "Turn off flash attention since CUDA compiler version < 11.6")
     set(onnxruntime_USE_FLASH_ATTENTION OFF)
+    set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
   endif()
 else()
   set(onnxruntime_USE_FLASH_ATTENTION OFF)
+  set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
 endif()
 
 if (onnxruntime_USE_CUDA)
@@ -685,6 +689,11 @@ if (onnxruntime_USE_CUDA)
       list(APPEND ORT_PROVIDER_FLAGS -DUSE_FLASH_ATTENTION=1)
       list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_FLASH_ATTENTION=1)
     endif()
+    if (onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)
+      message( STATUS "Enable memory efficient attention for CUDA EP")
+      list(APPEND ORT_PROVIDER_FLAGS -DUSE_MEMORY_EFFICIENT_ATTENTION=1)
+      list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_MEMORY_EFFICIENT_ATTENTION=1)
+    endif()
 
 endif()
 if (onnxruntime_USE_VITISAI)

diff --git a/cmake/deps.txt b/cmake/deps.txt
@@ -11,6 +11,7 @@ abseil_cpp;https://github.com/abseil/abseil-cpp/archive/refs/tags/20220623.1.zip
 cxxopts;https://github.com/jarro2783/cxxopts/archive/3c73d91c0b04e2b59462f0a741be8c07024c1bc0.zip;6c6ca7f8480b26c8d00476e0e24b7184717fe4f0
 date;https://github.com/HowardHinnant/date/archive/refs/tags/v2.4.1.zip;ea99f021262b1d804a872735c658860a6a13cc98
 dlpack;https://github.com/dmlc/dlpack/archive/refs/tags/v0.6.zip;4d565dd2e5b31321e5549591d78aa7f377173445
+eigen;https://gitlab.com/libeigen/eigen/-/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;be8be39fdbc6e60e94fa7870b280707069b5b81a
 flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v1.12.0.zip;ba0a75fd12dbef8f6557a74e611b7a3d0c5fe7bf
 fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494
 fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1
@@ -41,5 +42,4 @@ re2;https://github.com/google/re2/archive/refs/tags/2022-06-01.zip;aa77313b76e91
 safeint;https://github.com/dcleblanc/SafeInt/archive/ff15c6ada150a5018c5ef2172401cb4529eac9c0.zip;913a4046e5274d329af2806cb53194f617d8c0ab
 tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
 cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.0.0.zip;0f95b3c1fc1bd1175c4a90b2c9e39074d1bccefd
-extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c
-eigen;https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip;ee201b07085203ea7bd8eb97cbcb31b07cfa3efb
+extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c