microsoft
diff --git a/‎.clang-format‎
Lines changed: 1 addition & 0 deletions b/‎.clang-format‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/clang-format.yml‎
Lines changed: 23 additions & 0 deletions b/‎.github/workflows/clang-format.yml‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 3 deletions b/‎.gitignore‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 2 additions & 5 deletions b/‎CMakeLists.txt‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎backend/include/ExecutionEngine/CRunnerUtils.cpp‎
Lines changed: 2 additions & 5 deletions b/‎backend/include/ExecutionEngine/CRunnerUtils.cpp‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎backend/include/ExecutionEngine/CRunnerUtils.h‎
Lines changed: 17 additions & 34 deletions b/‎backend/include/ExecutionEngine/CRunnerUtils.h‎
Lines changed: 17 additions & 34 deletions
diff --git a/‎include/triton-shared/Analysis/MaskAnalysis.h‎
Lines changed: 3 additions & 2 deletions b/‎include/triton-shared/Analysis/MaskAnalysis.h‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎include/triton-shared/AnalysisStructured/PtrAnalysis.h‎
Lines changed: 9 additions & 7 deletions b/‎include/triton-shared/AnalysisStructured/PtrAnalysis.h‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎include/triton-shared/Conversion/TritonArithToLinalg/ConversionPatterns.hpp‎
Lines changed: 22 additions & 22 deletions b/‎include/triton-shared/Conversion/TritonArithToLinalg/ConversionPatterns.hpp‎
Lines changed: 22 additions & 22 deletions
diff --git a/‎include/triton-shared/Conversion/TritonArithToLinalg/ConversionTools.h‎
Lines changed: 2 additions & 1 deletion b/‎include/triton-shared/Conversion/TritonArithToLinalg/ConversionTools.h‎
Lines changed: 2 additions & 1 deletion
@@ -0,0 +1 @@
+BasedOnStyle: LLVM
@@ -0,0 +1,23 @@
+name: clang-format
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  clang-format:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v2
+      with:
+        python-version: "3.12"
+    - name: Install dependencies
+      run: |
+        pip install clang-format==20.1.8 ripgrep==14.1.0
+    - name: Running clang-format
+      run: |
+        rg . --type cpp --type c --files-with-matches \
+            | xargs clang-format --dry-run --Werror
@@ -3,6 +3,5 @@
 compile_commands.json
 build/*
 .vscode/*
-/.clang-format
-test_core.py
-test_annotations.py
+/python/examples/test_core.py
+/python/examples/test_annotations.py
@@ -27,15 +27,12 @@ if (TRITON_SHARED_BUILD_CPU_BACKEND)
     target_link_libraries(TritonShared PRIVATE Python3::Module pybind11::headers)
 endif()
 
-# Add symlinks to selected pytest files and the clang-format setting in triton. The tests are imported into triton-shared’s test folder to 
-# run under triton-shared's conftest configuration, and the clang-format link ensures consistent code style enforcement across both repositories.
+# Add symlinks to selected pytest files in triton. The tests are imported into triton-shared’s test folder to
+# run under triton-shared's conftest configuration.
 cmake_path(APPEND CMAKE_CURRENT_SOURCE_DIR "python" "examples" "test_core.py" OUTPUT_VARIABLE TRITON_SHARED_TEST_CORE)
 cmake_path(APPEND CMAKE_CURRENT_SOURCE_DIR "python" "examples" "test_annotations.py" OUTPUT_VARIABLE TRITON_SHARED_TEST_ANNOTATIONS)
-cmake_path(APPEND CMAKE_CURRENT_SOURCE_DIR ".clang-format" OUTPUT_VARIABLE TRITON_SHARED_CLANG_FORMAT_SETTING)
 cmake_path(APPEND CMAKE_SOURCE_DIR "python" "test" "unit" "language" "test_core.py" OUTPUT_VARIABLE TRITON_TEST_CORE)
 cmake_path(APPEND CMAKE_SOURCE_DIR "python" "test" "unit" "language" "test_annotations.py" OUTPUT_VARIABLE TRITON_TEST_ANNOTATIONS)
-cmake_path(APPEND CMAKE_SOURCE_DIR ".clang-format" OUTPUT_VARIABLE TRITON_CLANG_FORMAT_SETTING)
 
 add_symlink(${TRITON_TEST_CORE} ${TRITON_SHARED_TEST_CORE})
 add_symlink(${TRITON_TEST_ANNOTATIONS} ${TRITON_SHARED_TEST_ANNOTATIONS})
-add_symlink(${TRITON_CLANG_FORMAT_SETTING} ${TRITON_SHARED_CLANG_FORMAT_SETTING})
@@ -16,7 +16,7 @@
 #include "Msan.h"
 
 #ifndef _WIN32
-#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) ||     \
     defined(__DragonFly__)
 #include <cstdlib>
 #else
@@ -37,10 +37,7 @@
 #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
 
 namespace {
-template <typename V>
-void stdSort(uint64_t n, V *p) {
-  std::sort(p, p + n);
-}
+template <typename V> void stdSort(uint64_t n, V *p) { std::sort(p, p + n); }
 
 } // namespace
 
 
@@ -50,11 +50,9 @@ constexpr unsigned nextPowerOf2(int n) {
   return (n <= 1) ? 1 : (isPowerOf2(n) ? n : (2 * nextPowerOf2((n + 1) / 2)));
 }
 
-template <typename T, int Dim, bool IsPowerOf2>
-struct Vector1D;
+template <typename T, int Dim, bool IsPowerOf2> struct Vector1D;
 
-template <typename T, int Dim>
-struct Vector1D<T, Dim, /*IsPowerOf2=*/true> {
+template <typename T, int Dim> struct Vector1D<T, Dim, /*IsPowerOf2=*/true> {
   Vector1D() {
     static_assert(detail::nextPowerOf2(sizeof(T[Dim])) == sizeof(T[Dim]),
                   "size error");
@@ -68,8 +66,7 @@ struct Vector1D<T, Dim, /*IsPowerOf2=*/true> {
 
 // 1-D vector, padded to the next power of 2 allocation.
 // Specialization occurs to avoid zero size arrays (which fail in -Werror).
-template <typename T, int Dim>
-struct Vector1D<T, Dim, /*IsPowerOf2=*/false> {
+template <typename T, int Dim> struct Vector1D<T, Dim, /*IsPowerOf2=*/false> {
   Vector1D() {
     static_assert(nextPowerOf2(sizeof(T[Dim])) > sizeof(T[Dim]), "size error");
     static_assert(nextPowerOf2(sizeof(T[Dim])) < 2 * sizeof(T[Dim]),
@@ -86,8 +83,7 @@ struct Vector1D<T, Dim, /*IsPowerOf2=*/false> {
 } // namespace mlir
 
 // N-D vectors recurse down to 1-D.
-template <typename T, int Dim, int... Dims>
-struct Vector {
+template <typename T, int Dim, int... Dims> struct Vector {
   inline Vector<T, Dims...> &operator[](unsigned i) { return vector[i]; }
   inline const Vector<T, Dims...> &operator[](unsigned i) const {
     return vector[i];
@@ -105,30 +101,25 @@ struct Vector<T, Dim>
                                     mlir::detail::isPowerOf2(sizeof(T[Dim]))> {
 };
 
-template <int D1, typename T>
-using Vector1D = Vector<T, D1>;
-template <int D1, int D2, typename T>
-using Vector2D = Vector<T, D1, D2>;
+template <int D1, typename T> using Vector1D = Vector<T, D1>;
+template <int D1, int D2, typename T> using Vector2D = Vector<T, D1, D2>;
 template <int D1, int D2, int D3, typename T>
 using Vector3D = Vector<T, D1, D2, D3>;
 template <int D1, int D2, int D3, int D4, typename T>
 using Vector4D = Vector<T, D1, D2, D3, D4>;
 
-template <int N>
-void dropFront(int64_t arr[N], int64_t *res) {
+template <int N> void dropFront(int64_t arr[N], int64_t *res) {
   for (unsigned i = 1; i < N; ++i)
     *(res + i - 1) = arr[i];
 }
 
 //===----------------------------------------------------------------------===//
 // Codegen-compatible structures for StridedMemRef type.
 //===----------------------------------------------------------------------===//
-template <typename T, int Rank>
-class StridedMemrefIterator;
+template <typename T, int Rank> class StridedMemrefIterator;
 
 /// StridedMemRef descriptor type with static rank.
-template <typename T, int N>
-struct StridedMemRefType {
+template <typename T, int N> struct StridedMemRefType {
   T *basePtr;
   T *data;
   int64_t offset;
@@ -165,8 +156,7 @@ struct StridedMemRefType {
 };
 
 /// StridedMemRef descriptor type specialized for rank 1.
-template <typename T>
-struct StridedMemRefType<T, 1> {
+template <typename T> struct StridedMemRefType<T, 1> {
   T *basePtr;
   T *data;
   int64_t offset;
@@ -188,8 +178,7 @@ struct StridedMemRefType<T, 1> {
 };
 
 /// StridedMemRef descriptor type specialized for rank 0.
-template <typename T>
-struct StridedMemRefType<T, 0> {
+template <typename T> struct StridedMemRefType<T, 0> {
   T *basePtr;
   T *data;
   int64_t offset;
@@ -207,8 +196,7 @@ struct StridedMemRefType<T, 0> {
 };
 
 /// Iterate over all elements in a strided memref.
-template <typename T, int Rank>
-class StridedMemrefIterator {
+template <typename T, int Rank> class StridedMemrefIterator {
 public:
   using iterator_category = std::forward_iterator_tag;
   using value_type = T;
@@ -261,8 +249,7 @@ class StridedMemrefIterator {
 };
 
 /// Iterate over all elements in a 0-ranked strided memref.
-template <typename T>
-class StridedMemrefIterator<T, 0> {
+template <typename T> class StridedMemrefIterator<T, 0> {
 public:
   using iterator_category = std::forward_iterator_tag;
   using value_type = T;
@@ -307,21 +294,18 @@ class StridedMemrefIterator<T, 0> {
 // Codegen-compatible structure for UnrankedMemRef type.
 //===----------------------------------------------------------------------===//
 // Unranked MemRef
-template <typename T>
-struct UnrankedMemRefType {
+template <typename T> struct UnrankedMemRefType {
   int64_t rank;
   void *descriptor;
 };
 
 //===----------------------------------------------------------------------===//
 // DynamicMemRefType type.
 //===----------------------------------------------------------------------===//
-template <typename T>
-class DynamicMemRefIterator;
+template <typename T> class DynamicMemRefIterator;
 
 // A reference to one of the StridedMemRef types.
-template <typename T>
-class DynamicMemRefType {
+template <typename T> class DynamicMemRefType {
 public:
   int64_t rank;
   T *basePtr;
@@ -388,8 +372,7 @@ class DynamicMemRefType {
 };
 
 /// Iterate over all elements in a dynamic memref.
-template <typename T>
-class DynamicMemRefIterator {
+template <typename T> class DynamicMemRefIterator {
 public:
   using iterator_category = std::forward_iterator_tag;
   using value_type = T;
 
@@ -90,8 +90,9 @@ struct MaskState {
   LogicalResult addStates(const MaskState &lhsState, const MaskState &rhsState,
                           Location loc, OpBuilder &builder);
 
-  LogicalResult minStateScalar(const MaskState &lhsState, const MaskState &rhsState,
-                          Location loc, OpBuilder &builder);
+  LogicalResult minStateScalar(const MaskState &lhsState,
+                               const MaskState &rhsState, Location loc,
+                               OpBuilder &builder);
 
   LogicalResult minStates(const MaskState &lhsState, const MaskState &rhsState,
                           Location loc, OpBuilder &builder);
 
@@ -46,9 +46,9 @@ const extern std::string ptrAnalysisAttr;
 // address, it will be collapsed to 1D. To support gather/scatter access, treat
 // the unstructured offset as a whole offset instead of decoding the pointer
 // arithmetic on it except scalar mul.
-// The stride is set to 1 when there's no scalar mul so it still matches the offset *
-// stride formula. When there're scalar muls, the stride is set to the multiplication
-// of all the scalar strides.
+// The stride is set to 1 when there's no scalar mul so it still matches the
+// offset * stride formula. When there're scalar muls, the stride is set to the
+// multiplication of all the scalar strides.
 struct PtrState {
   SmallVector<OpFoldResult> offsets;
   SmallVector<OpFoldResult> sizes;
@@ -321,14 +321,16 @@ class PtrAnalysis {
   // Operand is the result of tt.int_to_ptr.
   // Expected result:
   //  Directly grab op result
-  LogicalResult visitOperandIntToPtr(triton::IntToPtrOp intToPtrOp, PtrState &state,
-                                     const Location loc, OpBuilder &builder);
+  LogicalResult visitOperandIntToPtr(triton::IntToPtrOp intToPtrOp,
+                                     PtrState &state, const Location loc,
+                                     OpBuilder &builder);
 
   // Operand is the result of tt.bitcast.
   // Expected result:
   //  Directly grab op result
-  LogicalResult visitOperandBitcast(triton::BitcastOp bitcastOp, PtrState &state,
-                                    const Location loc, OpBuilder &builder);
+  LogicalResult visitOperandBitcast(triton::BitcastOp bitcastOp,
+                                    PtrState &state, const Location loc,
+                                    OpBuilder &builder);
 
   // Get the computed PtrState for the forOp's init-arg at the provided index.
   FailureOr<PtrState> getLoopInitArgPtrState(scf::ForOp forOp, size_t index);
 
@@ -813,40 +813,40 @@ struct AssertConverter : public OpConversionPattern<triton::AssertOp> {
     Value condVal = op.getCondition();
 
     auto assertMessage =
-          llvm::formatv("Assertion `{0}` failed", op.getMessage());
-    
-    // The condition can only be I1 or I1Tensor (integer or tensor) from TritonOps.td.
-    // Tensors will always be RankedTensorType.
+        llvm::formatv("Assertion `{0}` failed", op.getMessage());
+
+    // The condition can only be I1 or I1Tensor (integer or tensor) from
+    // TritonOps.td. Tensors will always be RankedTensorType.
     if (isa<mlir::IntegerType>(condVal.getType())) {
       // handle scalar case
       rewriter.create<mlir::cf::AssertOp>(op.getLoc(), condVal,
                                           assertMessage.str());
-    } else if (auto tensorType = dyn_cast<RankedTensorType>(condVal.getType())) {
+    } else if (auto tensorType =
+                   dyn_cast<RankedTensorType>(condVal.getType())) {
       // handle tensor case
       int64_t rank = tensorType.getRank();
 
       // create identity mapping for access pattern
-      SmallVector<AffineMap, 3> indexingMaps{AffineMap::getMultiDimIdentityMap(rank, rewriter.getContext())};
+      SmallVector<AffineMap, 3> indexingMaps{
+          AffineMap::getMultiDimIdentityMap(rank, rewriter.getContext())};
 
       // loops do not depend on each other
-      SmallVector<utils::IteratorType, 3> iteratorTypes(rank, utils::IteratorType::parallel);
+      SmallVector<utils::IteratorType, 3> iteratorTypes(
+          rank, utils::IteratorType::parallel);
 
       rewriter.create<linalg::GenericOp>(
-        op.getLoc(),
-        TypeRange{},
-        condVal,
-        ValueRange{},
-        ArrayRef<AffineMap>{indexingMaps},
-        ArrayRef<utils::IteratorType>{iteratorTypes},
-        [&](OpBuilder &b, Location loc, ValueRange args) {
-          // obtain the element in the tensor
-          Value element = args[0];
-
-          // make a cf.assert for the current element
-          b.create<mlir::cf::AssertOp>(loc, element, assertMessage.str());
-          
-          b.create<linalg::YieldOp>(loc);
-        });
+          op.getLoc(), TypeRange{}, condVal, ValueRange{},
+          ArrayRef<AffineMap>{indexingMaps},
+          ArrayRef<utils::IteratorType>{iteratorTypes},
+          [&](OpBuilder &b, Location loc, ValueRange args) {
+            // obtain the element in the tensor
+            Value element = args[0];
+
+            // make a cf.assert for the current element
+            b.create<mlir::cf::AssertOp>(loc, element, assertMessage.str());
+
+            b.create<linalg::YieldOp>(loc);
+          });
     } else {
       op.emitError("Unexpected type in triton::AssertOp");
       return failure();
 
@@ -6,7 +6,8 @@
 namespace mlir {
 namespace triton {
 
-static inline SmallVector<utils::IteratorType> getNParallelLoopsAttrs(unsigned n) {
+static inline SmallVector<utils::IteratorType>
+getNParallelLoopsAttrs(unsigned n) {
   return SmallVector<utils::IteratorType>(n, utils::IteratorType::parallel);
 }
Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,8 @@`
`6`	`6`	`namespace mlir {`
`7`	`7`	`namespace triton {`
`8`	`8`
`9`		`-static inline SmallVector<utils::IteratorType> getNParallelLoopsAttrs(unsigned n) {`
	`9`	`+static inline SmallVector<utils::IteratorType>`
	`10`	`+getNParallelLoopsAttrs(unsigned n) {`
`10`	`11`	`return SmallVector<utils::IteratorType>(n, utils::IteratorType::parallel);`
`11`	`12`	`}`
`12`	`13`