llvm
diff --git a/‎.ci/premerge_advisor_explain.py‎
Lines changed: 63 additions & 0 deletions b/‎.ci/premerge_advisor_explain.py‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎.ci/utils.sh‎
Lines changed: 5 additions & 0 deletions b/‎.ci/utils.sh‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 7 additions & 0 deletions b/‎.github/CODEOWNERS‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎.github/workflows/containers/github-action-ci-tooling/Dockerfile‎
Lines changed: 21 additions & 1 deletion b/‎.github/workflows/containers/github-action-ci-tooling/Dockerfile‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎.github/workflows/pr-code-lint.yml‎
Lines changed: 6 additions & 17 deletions b/‎.github/workflows/pr-code-lint.yml‎
Lines changed: 6 additions & 17 deletions
diff --git a/‎.github/workflows/premerge.yaml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/premerge.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎bolt/README.md‎
Lines changed: 5 additions & 4 deletions b/‎bolt/README.md‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎bolt/docs/Heatmaps.md‎
Lines changed: 4 additions & 4 deletions b/‎bolt/docs/Heatmaps.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎bolt/docs/OptimizingClang.md‎
Lines changed: 1 addition & 1 deletion b/‎bolt/docs/OptimizingClang.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bolt/docs/OptimizingLinux.md‎
Lines changed: 1 addition & 1 deletion b/‎bolt/docs/OptimizingLinux.md‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,63 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""Script for getting explanations from the premerge advisor."""
+
+import argparse
+import os
+import platform
+import sys
+
+import requests
+
+import generate_test_report_lib
+
+PREMERGE_ADVISOR_URL = (
+    "http://premerge-advisor.premerge-advisor.svc.cluster.local:5000/explain"
+)
+
+
+def main(commit_sha: str, build_log_files: list[str]):
+    junit_objects, ninja_logs = generate_test_report_lib.load_info_from_files(
+        build_log_files
+    )
+    test_failures = generate_test_report_lib.get_failures(junit_objects)
+    current_platform = f"{platform.system()}-{platform.machine()}".lower()
+    explanation_request = {
+        "base_commit_sha": commit_sha,
+        "platform": current_platform,
+        "failures": [],
+    }
+    if test_failures:
+        for _, failures in test_failures.items():
+            for name, failure_messsage in failures:
+                explanation_request["failures"].append(
+                    {"name": name, "message": failure_messsage}
+                )
+    else:
+        ninja_failures = generate_test_report_lib.find_failure_in_ninja_logs(ninja_logs)
+        for name, failure_message in ninja_failures:
+            explanation_request["failures"].append(
+                {"name": name, "message": failure_message}
+            )
+    advisor_response = requests.get(PREMERGE_ADVISOR_URL, json=explanation_request)
+    if advisor_response.status_code == 200:
+        print(advisor_response.json())
+    else:
+        print(advisor_response.reason)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("commit_sha", help="The base commit SHA for the test.")
+    parser.add_argument(
+        "build_log_files", help="Paths to JUnit report files and ninja logs.", nargs="*"
+    )
+    args = parser.parse_args()
+
+    # Skip looking for results on AArch64 for now because the premerge advisor
+    # service is not available on AWS currently.
+    if platform.machine() == "arm64":
+        sys.exit(0)
+
+    main(args.commit_sha, args.build_log_files)
@@ -43,6 +43,11 @@ function at-exit {
     python "${MONOREPO_ROOT}"/.ci/premerge_advisor_upload.py \
       $(git rev-parse HEAD~1) $GITHUB_RUN_NUMBER \
       "${BUILD_DIR}"/test-results.*.xml "${MONOREPO_ROOT}"/ninja*.log
+    if [[ "$GITHUB_ACTIONS" != "" ]]; then
+      python "${MONOREPO_ROOT}"/.ci/premerge_advisor_explain.py \
+        $(git rev-parse HEAD~1) "${BUILD_DIR}"/test-results.*.xml \
+        "${MONOREPO_ROOT}"/ninja*.log
+    fi
   fi
 }
 trap at-exit EXIT
 
@@ -53,6 +53,13 @@
 /mlir/include/mlir/Interfaces/DestinationStyleOpInterface.* @matthias-springer
 /mlir/lib/Interfaces/DestinationStyleOpInterface.* @matthias-springer
 
+# AMDGPU and ROCDL dialects in MLIR.
+/mlir/include/mlir/Dialect/AMDGPU @krzysz00 @kuhar
+/mlir/lib/Dialect/AMDGPU @krzysz00 @kuhar
+/mlir/lib/Conversion/*AMDGPU* @krzysz00 @kuhar
+/mlir/lib/Conversion/*ToROCDL @krzysz00 @kuhar
+/mlir/include/mlir/Dialect/LLVMIR/ROCDL* @krzysz00 @kuhar
+
 # Bufferization Dialect in MLIR.
 /mlir/include/mlir/Dialect/Bufferization @matthias-springer
 /mlir/lib/Dialect/Bufferization @matthias-springer
 
@@ -1,14 +1,19 @@
 ARG LLVM_VERSION=21.1.0
+# FIXME: Use "${LLVM_VERSION%%.*}" instead of "LLVM_VERSION_MAJOR" once we update runners to Ubuntu-26.04 with Buildah >= 1.37
+ARG LLVM_VERSION_MAJOR=21
 
 FROM docker.io/library/ubuntu:24.04 AS llvm-downloader
 ARG LLVM_VERSION
+ARG LLVM_VERSION_MAJOR
 
 RUN apt-get update && \
     apt-get install -y wget xz-utils && \
     wget -O llvm.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-${LLVM_VERSION}/LLVM-${LLVM_VERSION}-Linux-X64.tar.xz && \
     mkdir -p /llvm-extract && \
     tar -xvJf llvm.tar.xz -C /llvm-extract \
         # Only unpack these tools to save space on Github runner.
+        LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-${LLVM_VERSION_MAJOR} \
+        LLVM-${LLVM_VERSION}-Linux-X64/lib/clang/${LLVM_VERSION_MAJOR}/include \
         LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-tidy \
         LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-format \
         LLVM-${LLVM_VERSION}-Linux-X64/bin/git-clang-format && \
@@ -50,12 +55,27 @@ RUN pip install -r requirements_formatting.txt --break-system-packages && \
 
 FROM base AS ci-container-code-lint
 ARG LLVM_VERSION
+ARG LLVM_VERSION_MAJOR
 
-COPY --from=llvm-downloader /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-tidy ${LLVM_SYSROOT}/bin/
+COPY --from=llvm-downloader /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-tidy \
+                            /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/bin/clang-${LLVM_VERSION_MAJOR} \
+                            ${LLVM_SYSROOT}/bin/
+COPY --from=llvm-downloader /llvm-extract/LLVM-${LLVM_VERSION}-Linux-X64/lib/clang/${LLVM_VERSION_MAJOR}/include \
+                            ${LLVM_SYSROOT}/lib/clang/${LLVM_VERSION_MAJOR}/include
 COPY clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py ${LLVM_SYSROOT}/bin/clang-tidy-diff.py
 
+RUN ln -s ${LLVM_SYSROOT}/bin/clang-${LLVM_VERSION_MAJOR} ${LLVM_SYSROOT}/bin/clang && \
+    ln -s ${LLVM_SYSROOT}/bin/clang ${LLVM_SYSROOT}/bin/clang++
+
 ENV PATH=${LLVM_SYSROOT}/bin:${PATH}
 
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    cmake \
+    ninja-build && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
 # Install dependencies for 'pr-code-lint.yml' job
 COPY llvm/utils/git/requirements_linting.txt requirements_linting.txt
 RUN pip install -r requirements_linting.txt --break-system-packages && \
 
@@ -20,7 +20,7 @@ jobs:
       run:
         shell: bash
     container:
-      image: 'ghcr.io/llvm/ci-ubuntu-24.04:latest'
+      image: 'ghcr.io/llvm/ci-ubuntu-24.04-lint'
     timeout-minutes: 60
     concurrency:
       group: ${{ github.workflow }}-${{ github.ref }}
@@ -31,6 +31,11 @@ jobs:
         with:
           fetch-depth: 2
 
+      # FIXME: same as in ".github/workflows/pr-code-format.yml"
+      - name: Set Safe Directory
+        run: |
+          chown -R root $(pwd)
+      
       - name: Get changed files
         id: changed-files
         uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
@@ -46,22 +51,6 @@ jobs:
         run: |
           echo "Changed files:"
           echo "$CHANGED_FILES"
-
-      # The clang tidy version should always be upgraded to the first version
-      # of a release cycle (x.1.0) or the last version of a release cycle, or
-      # if there have been relevant clang-format backports.
-      - name: Install clang-tidy
-        uses: aminya/setup-cpp@a276e6e3d1db9160db5edc458e99a30d3b109949 # v1.7.1
-        with:
-          clang-tidy: 21.1.0
-      
-      - name: Setup Python env
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
-        with:
-          python-version: '3.13'
-
-      - name: Install Python dependencies
-        run: python3 -m pip install -r llvm/utils/git/requirements_linting.txt
       
       # TODO: create special mapping for 'codegen' targets, for now build predefined set
       # TODO: add entrypoint in 'compute_projects.py' that only adds a project and its direct dependencies
 
@@ -62,6 +62,7 @@ jobs:
         with:
           fetch-depth: 2
       - name: Build and Test
+        timeout-minutes: 120
         continue-on-error: ${{ runner.arch == 'ARM64' }}
         run: |
           git config --global --add safe.directory '*'
@@ -149,6 +150,7 @@ jobs:
           echo "windows-runtimes=${runtimes_to_build}" >> $GITHUB_OUTPUT
           echo "windows-runtimes-check-targets=${runtimes_check_targets}" >> $GITHUB_OUTPUT
       - name: Build and Test
+        timeout-minutes: 180
         if: ${{ steps.vars.outputs.windows-projects != '' }}
         shell: cmd
         run: |
 
@@ -108,9 +108,10 @@ $ perf record -e cycles:u -j any,u -o perf.data -- <executable> <args> ...
 #### For Services
 
 Once you get the service deployed and warmed-up, it is time to collect perf
-data with LBR (branch information). The exact perf command to use will depend
-on the service. E.g., to collect the data for all processes running on the
-server for the next 3 minutes use:
+data with brstack (branch information). Different architectures implement this
+using different hardware units, for example LBR on X86, and BRBE on AArch64.
+The exact perf command to use will depend on the service. E.g., to collect the
+data for all processes running on the server for the next 3 minutes use:
 ```
 $ perf record -e cycles:u -j any,u -a -o perf.data -- sleep 180
 ```
@@ -163,7 +164,7 @@ $ perf2bolt -p perf.data -o perf.fdata <executable>
 This command will aggregate branch data from `perf.data` and store it in a
 format that is both more compact and more resilient to binary modifications.
 
-If the profile was collected without LBRs, you will need to add `-nl` flag to
+If the profile was collected without brstacks, you will need to add `-nl` flag to
 the command line above.
 
 ### Step 3: Optimize with BOLT
 
@@ -1,7 +1,7 @@
 # Code Heatmaps
 
 BOLT has gained the ability to print code heatmaps based on
-sampling-based profiles generated by `perf`, either with `LBR` data or not.
+sampling-based profiles generated by `perf`, either with `brstack` data or not.
 The output is produced in colored ASCII to be displayed in a color-capable
 terminal. It looks something like this:
 
@@ -20,9 +20,9 @@ or if you want to monitor the existing process(es):
 $ perf record -e cycles:u -j any,u [-p PID|-a] -- sleep <interval>
 ```
 
-Running with LBR (`-j any,u` or `-b`) is recommended. Heatmaps can be generated
-from basic events by using the llvm-bolt-heatmap option `-nl` (no LBR) but
-such heatmaps do not have the coverage provided by LBR and may only be useful
+Running with brstack (`-j any,u` or `-b`) is recommended. Heatmaps can be generated
+from basic events by using the llvm-bolt-heatmap option `-nl` (no brstack) but
+such heatmaps do not have the coverage provided by brstack and may only be useful
 for finding event hotspots at larger code block granularities.
 
 Once the run is complete, and `perf.data` is generated, run llvm-bolt-heatmap:
 
@@ -97,7 +97,7 @@ BOLT-INFO: basic block reordering modified layout of 7848 (10.32%) functions
            790053908 : all conditional branches (=)
 ...
 ```
-The statistics in the output is based on the LBR profile collected with `perf`, and since we were using
+The statistics in the output is based on the brstack profile (LBR) collected with `perf`, and since we were using
 the `cycles` counter, its accuracy is affected. However, the relative improvement in `taken conditional
  branches` is a good indication that BOLT was able to straighten out the code even after PGO.
 
 
@@ -5,7 +5,7 @@
 
 Many Linux applications spend a significant amount of their execution time in the kernel. Thus, when we consider code optimization for system performance, it is essential to improve the CPU utilization not only in the user-space applications and libraries but also in the kernel. BOLT has demonstrated double-digit gains while being applied to user-space programs. This guide shows how to apply BOLT to the x86-64 Linux kernel and enhance your system's performance. In our experiments, BOLT boosted database TPS by 2 percent when applied to the kernel compiled with the highest level optimizations, including PGO and LTO. The database spent ~40% of the time in the kernel and was quite sensitive to kernel performance.
 
-BOLT optimizes code layout based on a low-level execution profile collected with the Linux `perf` tool. The best quality profile should include branch history, such as Intel's last branch records (LBR). BOLT runs on a linked binary and reorders the code while combining frequently executed blocks of instructions in a manner best suited for the hardware. Other than branch instructions, most of the code is left unchanged. Additionally, BOLT updates all metadata associated with the modified code, including DWARF debug information and Linux ORC unwind information.
+BOLT optimizes code layout based on a low-level execution profile collected with the Linux `perf` tool. The best quality profile should include branch history (brstack), such as Intel's last branch records (LBR) or AArch64's Branch Record Buffer Extension (BRBE). BOLT runs on a linked binary and reorders the code while combining frequently executed blocks of instructions in a manner best suited for the hardware. Other than branch instructions, most of the code is left unchanged. Additionally, BOLT updates all metadata associated with the modified code, including DWARF debug information and Linux ORC unwind information.
 
 While BOLT optimizations are not specific to the Linux kernel, certain quirks distinguish the kernel from user-level applications.