intel
diff --git a/‎.github/ISSUE_TEMPLATE/bug.yml‎
Lines changed: 48 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/bug.yml‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/config.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/config.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/performance.yml‎
Lines changed: 44 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/performance.yml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 3 additions & 0 deletions b/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/integration-tests.yml‎
Lines changed: 0 additions & 4 deletions b/‎.github/workflows/integration-tests.yml‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎.github/workflows/integration-tests.yml.in‎
Lines changed: 0 additions & 4 deletions b/‎.github/workflows/integration-tests.yml.in‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎.github/workflows/llvm-build.yml‎
Lines changed: 30 additions & 3 deletions b/‎.github/workflows/llvm-build.yml‎
Lines changed: 30 additions & 3 deletions
diff --git a/‎.github/workflows/llvm-build/centos.Dockerfile‎
Lines changed: 56 additions & 0 deletions b/‎.github/workflows/llvm-build/centos.Dockerfile‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 0 deletions b/‎README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎cmake/AddTritonUnitTest.cmake‎
Lines changed: 1 addition & 1 deletion b/‎cmake/AddTritonUnitTest.cmake‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,48 @@
+name: Report a bug
+description: Report triton failing to compile a kernel, or giving incorrect results
+labels: ["bug"]
+
+body:
+- type: markdown
+  attributes:
+    value: |
+      #### Disclaimer
+      The core triton team is small and has very limited capacity. We may not have time to look into your report.
+      For the best results, please:
+        - Avoid submitting duplicates. Search through [the existing and past issues](https://github.com/triton-lang/triton/issues?q=is%3Aissue+sort%3Acreated-desc+) first to see if it's been reported previously.
+        - Check if the issue persists with a build from the latest source.
+        - Provide all relevant information in the initial report, to prevent unnecessary back and forth discussion.
+        - If you can, try to diagnose and/or fix the issue yourself. We welcome high quality contributions.
+- type: textarea
+  attributes:
+    label: Describe the bug
+    description: |
+      Please provide a clear and concise description of what the bug is.
+
+      If relevant, add a [minimal complete example](https://stackoverflow.com/help/minimal-reproducible-example) that reproduces the bug. It is very important for the snippet to be as simple as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did, so include both the kernel and launching code as well as any relevant imports.
+
+      If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com.
+
+      Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
+    placeholder: |
+      A clear and concise description of what the bug is.
+
+      ```python
+      # Sample code to reproduce the problem
+      ```
+
+      ```
+      The error message you got, with the full traceback.
+      ```
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Environment details
+    description: |
+      Please include any relevant context about how you're running the reproducer e.g. which version of triton, and what GPU you are using.
+    placeholder: |
+        Triton: ...
+        GPU: ...
+  validations:
+    required: true
@@ -0,0 +1,5 @@
+blank_issues_enabled: true
+contact_links:
+  - name: Community help
+    url: https://discord.gg/gpumode
+    about: GPU-mode discord community has a triton channel which is a great resource for help writing/learning triton
@@ -0,0 +1,44 @@
+name: Report a performance issue
+description: Report cases where triton is generating sub-optimal (but functionally correct) PTX/LLVM IR
+labels: ["performance"]
+
+body:
+- type: markdown
+  attributes:
+    value: |
+      #### Disclaimer
+      The core triton team is small and has very limited capacity. We may not have time to look into your report.
+      For the best results, please:
+        - Avoid submitting duplicates. Search through [the existing and past issues](https://github.com/triton-lang/triton/issues?q=is%3Aissue+sort%3Acreated-desc+) first to see if it's been reported previously.
+        - Check if the issue persists with a build from the latest source.
+        - Provide all relevant information in the initial report, to prevent unnecessary back and forth discussion.
+        - If you can, try to diagnose and/or fix the issue yourself. We welcome high quality contributions.
+- type: textarea
+  attributes:
+    label: Describe the issue
+    description: |
+      Please provide a clear and concise description of the issue.
+
+      Include a [minimal complete example](https://stackoverflow.com/help/minimal-reproducible-example) that reproduces the issue. It is very important for the snippet to be as simple as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did.
+
+      A reproducer could be a python program that runs a triton kernel and prints out the relevant suboptimal IR, or an IR file with an accompanying triton-opt command.
+
+      If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com.
+    placeholder: |
+      A clear and concise description of the issue.
+
+      ```python
+      # Sample code to reproduce the problem
+      ```
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Environment details
+    description: |
+      Please include any relevant context about how you're running the reproducer e.g. which version of triton, and what GPU you are using.
+    placeholder: |
+        Triton: ...
+        GPU: ...
+  validations:
+    required: true
@@ -1,11 +1,14 @@
+<!---
 The core Triton is a small number of people, and we receive many PRs (thank
 you!).  To help us review your code more quickly, **if you are a new
 contributor (less than 3 PRs merged) we ask that you complete the following
 tasks and include the filled-out checklist in your PR description.**
 
 Complete the following tasks before sending your PR, and replace `[ ]` with
 `[x]` to indicate you have done them.
+-->
 
+# New contributor declaration
 - [ ] I am not making a trivial change, such as fixing a typo in a comment.
 
 - [ ] I have written a PR description following these
 
@@ -141,10 +141,6 @@ jobs:
       - name: Check pre-commit
         run: |
           python3 -m pip install --upgrade pre-commit
-          # TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
-          python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true
-          # If first run of yapf worked and made changes reset the tree to the original state
-          git reset --hard
           python3 -m pre_commit run --all-files --verbose
       - name: Print diff of changes if pre-commit failed
         if: failure()
 
@@ -155,10 +155,6 @@ jobs:
       - name: Check pre-commit
         run: |
           python3 -m pip install --upgrade pre-commit
-          # TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
-          python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true
-          # If first run of yapf worked and made changes reset the tree to the original state
-          git reset --hard
           python3 -m pre_commit run --all-files --verbose
 
       - name: Print diff of changes if pre-commit failed
 
@@ -28,6 +28,7 @@ jobs:
         config:
         - {runner: 'Ubuntu 20.04', runs_on: 'ubuntu-20.04', target-os: 'ubuntu', arch: 'x64'}
         - {runner: 'Ubuntu 20.04 ARM64', runs_on: 'ubuntu-20.04', target-os: 'ubuntu', arch: 'arm64'}
+        - {runner: 'CentOS 7', runs_on: ['self-hosted', 'CPU'], target-os: 'centos', arch: 'x64'}
         - {runner: 'AlmaLinux 8', runs_on: ['self-hosted', 'CPU'], target-os: 'almalinux', arch: 'x64'}
         - {runner: 'MacOS X64', runs_on: 'macos-12', target-os: 'macos', arch: 'x64'}
         - {runner: 'MacOS ARM64', runs_on: 'macos-12', target-os: 'macos', arch: 'arm64'}
@@ -233,15 +234,16 @@ jobs:
 
         tar czf "${{ env.llvm_install_dir }}.tar.gz" "${{ env.llvm_install_dir }}"
 
-    - name: Configure, Build, Test, and Install LLVM (AlmaLinux)
-      if: matrix.config.target-os == 'almalinux'
+
+    - name: Configure, Build, Test, and Install LLVM (CentOS)
+      if: matrix.config.target-os == 'centos'
       run: |
         # if this step crashes, it can leave behind a stale docker container
         docker container prune -f
         docker rmi -f $(docker images -q)
 
         docker build --tag llvm-build --build-arg llvm_dir=llvm-project \
-          -f llvm-build/.github/workflows/llvm-build/almalinux.Dockerfile .
+          -f llvm-build/.github/workflows/llvm-build/centos.Dockerfile .
 
         # Create temporary container to copy cache and installed artifacts.
         CONTAINER_ID=$(docker create llvm-build)
@@ -256,6 +258,31 @@ jobs:
 
         docker rm "${CONTAINER_ID}"
 
+    - name: Configure, Build, Test, and Install LLVM (AlmaLinux)
+      if: matrix.config.target-os == 'almalinux'
+      run: |
+        # if this step crashes, it can leave behind a stale docker container
+        docker container prune -f
+        docker rmi -f $(docker images -q)
+
+        docker build --tag llvm-build --build-arg llvm_dir=llvm-project \
+          -f llvm-build/.github/workflows/llvm-build/almalinux.Dockerfile .
+
+        # Create temporary container to copy cache and installed artifacts.
+        CONTAINER_ID=$(docker create llvm-build)
+
+        # We remove the existing directories, otherwise docker cp will
+        # create a subdirectory inside the existing directory.
+        rm -rf "${{ env.SCCACHE_DIR }}" "${{ env.llvm_install_dir }}"
+
+        docker cp "${CONTAINER_ID}:/install" "${{ env.llvm_install_dir }}"
+        tar czf "${{ env.llvm_install_dir }}.tar.gz" "${{ env.llvm_install_dir }}"
+
+        docker cp "${CONTAINER_ID}:/sccache" "${{ env.SCCACHE_DIR }}"
+        sudo chown -R "$(id -u -n):$(id -g -n)" "${{ env.SCCACHE_DIR }}"
+
+        docker rm "${CONTAINER_ID}"
+
     - name: Upload Build Artifacts
       uses: actions/upload-artifact@v4
       with:
 
@@ -0,0 +1,56 @@
+FROM centos:7
+ARG llvm_dir=llvm-project
+# Add the cache artifacts and the LLVM source tree to the container
+ADD sccache /sccache
+ADD "${llvm_dir}" /source/llvm-project
+ENV SCCACHE_DIR="/sccache"
+ENV SCCACHE_CACHE_SIZE="2G"
+
+RUN echo -e "[llvmtoolset-build]\nname=LLVM Toolset 13.0 - Build\nbaseurl=https://buildlogs.centos.org/c7-llvm-toolset-13.0.x86_64/\ngpgcheck=0\nenabled=1" > /etc/yum.repos.d/llvmtoolset-build.repo
+
+# Note: This is required patch since CentOS have reached EOL
+# otherwise any yum install setp will fail
+RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
+RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
+RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
+
+# Install build dependencies
+RUN yum install --assumeyes centos-release-scl
+
+# The definition of insanity is doing the same thing and expecting a different result
+RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
+RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
+RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
+
+RUN yum install --assumeyes --nogpgcheck llvm-toolset-13.0
+RUN yum install --assumeyes rh-python38-python-devel rh-python38-python-pip
+SHELL [ "/usr/bin/scl", "enable", "llvm-toolset-13.0", "rh-python38" ]
+
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install --upgrade cmake ninja sccache
+
+# Install MLIR's Python Dependencies
+RUN python3 -m pip install -r /source/llvm-project/mlir/python/requirements.txt
+
+# Configure, Build, Test, and Install LLVM
+RUN cmake -GNinja -Bbuild \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DCMAKE_C_COMPILER=clang \
+  -DCMAKE_CXX_COMPILER=clang++ \
+  -DCMAKE_ASM_COMPILER=clang \
+  -DCMAKE_C_COMPILER_LAUNCHER=sccache \
+  -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
+  -DCMAKE_CXX_FLAGS="-Wno-everything" \
+  -DCMAKE_LINKER=lld \
+  -DCMAKE_INSTALL_PREFIX="/install" \
+  -DLLVM_BUILD_UTILS=ON \
+  -DLLVM_BUILD_TOOLS=ON \
+  -DLLVM_ENABLE_ASSERTIONS=ON \
+  -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
+  -DLLVM_ENABLE_PROJECTS=mlir \
+  -DLLVM_ENABLE_TERMINFO=OFF \
+  -DLLVM_INSTALL_UTILS=ON \
+  -DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU" \
+  /source/llvm-project/llvm
+
+RUN ninja -C build install
@@ -176,6 +176,9 @@ For detailed instructions on how to debug Triton's frontend, please refer to thi
    kernels. Use `MLIR_ENABLE_DUMP=kernelName` to dump for a specific kernel only.
   - Triton cache can interfere with the dump. In cases where `MLIR_ENABLE_DUMP=1` does not work, try cleaning your triton cache: `rm -r ~/.triton/cache/*`
 - `LLVM_IR_ENABLE_DUMP=1` dumps the IR before every pass run over the LLVM IR.
+- `TRITON_REPRODUCER_PATH=<reproducer_path>` will generate an MLIR reproducer file
+  at `<reproducer_path>` before each MLIR compiler stage. If any of the stages fail,
+  `<reproducer_path>` will be a local MLIR reproducer captured right before the failing pass.
 - `TRITON_INTERPRET=1` uses the Triton interpreter instead of running on the
   GPU.  You can insert Python breakpoints in your kernel code!
 - `TRITON_ENABLE_LLVM_DEBUG=1` passes `-debug` to LLVM, printing a lot of
 
@@ -35,5 +35,5 @@ function(add_triton_ut)
   # Without the TEST_DISCOVERY_TIMEOUT, the tests randomly time out on my mac
   # laptop.  I think the issue may be that the very first time you run a program
   # it's a bit slow.
-  gtest_discover_tests(${__NAME} PROPERTIES TEST_DISCOVERY_TIMEOUT 60)
+  gtest_discover_tests(${__NAME} DISCOVERY_TIMEOUT 60)
 endfunction()