From 8a4cab25e29a3d4bf10d34a1124c47f965e3b926 Mon Sep 17 00:00:00 2001 From: Alex Baden Date: Wed, 25 Sep 2024 19:59:42 +0000 Subject: [PATCH 1/5] Use stride instead of order to determine block attr --- .../MaterializeBlockPointer.cpp | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp b/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp index f281764c63..1f3e4bf834 100644 --- a/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp +++ b/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp @@ -51,16 +51,28 @@ struct TritonIntelGPUMaterializeBlockPointerPass LDBG("Found make tensor ptr op: " << makeTensorPtrOp); auto ptrType = cast(makeTensorPtrOp.getType()); auto tensorType = cast(ptrType.getPointeeType()); - ArrayRef order = makeTensorPtrOp.getOrder(); - unsigned rank = order.size(); + Operation::operand_range shape = makeTensorPtrOp.getShape(); + unsigned rank = shape.size(); LDBG("Rank: " << rank); if (rank == 1) return; - unsigned fastChangeDim = order[0]; + Operation::operand_range strides = makeTensorPtrOp.getStrides(); + int fastChangeDim = -1; + for (size_t i = 0; i < strides.size(); i++) { + if (mlir::triton::gpu::intel::isConstant(strides[i], 1)) { + fastChangeDim = i; + break; + } + } LDBG("Fast change dim: " << fastChangeDim); + if (fastChangeDim < 0) { + return; + } + ArrayRef order = makeTensorPtrOp.getOrder(); + + // unsigned fastChangeDim = order[0]; if (fastChangeDim >= (rank - 2)) { - Operation::operand_range strides = makeTensorPtrOp.getStrides(); // HW 2D block read instruction only supports contiguous access. Value fastChangeStride = strides[fastChangeDim]; From c09e10e09997ee2d3b3ef8374c748b2cff1aad00 Mon Sep 17 00:00:00 2001 From: Alex Baden Date: Thu, 26 Sep 2024 00:51:27 +0000 Subject: [PATCH 2/5] fix pitch restriction check + remove commented code --- .../lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp b/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp index 1f3e4bf834..9802724d8d 100644 --- a/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp +++ b/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp @@ -71,9 +71,7 @@ struct TritonIntelGPUMaterializeBlockPointerPass } ArrayRef order = makeTensorPtrOp.getOrder(); - // unsigned fastChangeDim = order[0]; if (fastChangeDim >= (rank - 2)) { - // HW 2D block read instruction only supports contiguous access. Value fastChangeStride = strides[fastChangeDim]; LLVM_DEBUG({ @@ -89,7 +87,8 @@ struct TritonIntelGPUMaterializeBlockPointerPass Value pitch = strides[(fastChangeDim == rank - 1) ? rank - 2 : rank - 1]; LDBG("Pitch: " << pitch); - if (!ttgi::isDivisible(pitch, 64 / tensorType.getElementTypeBitWidth())) + if (!ttgi::isDivisible(pitch, + 128 / tensorType.getElementTypeBitWidth())) return; loadOp->setAttr(ttgi::TritonIntelGPUDialect::getBlockIOAttrName(), From 9f7bfdc974b3bec2095d11be1d2adf80522c21f5 Mon Sep 17 00:00:00 2001 From: Alex Baden Date: Wed, 25 Sep 2024 20:56:03 -0400 Subject: [PATCH 3/5] ++i Co-authored-by: Whitney Tsang --- .../lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp b/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp index 9802724d8d..aa08fc4cf3 100644 --- a/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp +++ b/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp @@ -59,7 +59,7 @@ struct TritonIntelGPUMaterializeBlockPointerPass Operation::operand_range strides = makeTensorPtrOp.getStrides(); int fastChangeDim = -1; - for (size_t i = 0; i < strides.size(); i++) { + for (size_t i = 0; i < strides.size(); ++i) { if (mlir::triton::gpu::intel::isConstant(strides[i], 1)) { fastChangeDim = i; break; From de99c71bd5aad859b4589c7a0470a9e06abf9ea8 Mon Sep 17 00:00:00 2001 From: Alex Baden Date: Thu, 26 Sep 2024 00:57:51 +0000 Subject: [PATCH 4/5] remove unused variable --- .../lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp b/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp index aa08fc4cf3..c803cbb81f 100644 --- a/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp +++ b/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp @@ -65,11 +65,11 @@ struct TritonIntelGPUMaterializeBlockPointerPass break; } } + LDBG("Fast change dim: " << fastChangeDim); if (fastChangeDim < 0) { return; } - ArrayRef order = makeTensorPtrOp.getOrder(); if (fastChangeDim >= (rank - 2)) { // HW 2D block read instruction only supports contiguous access. From 5a23de486d810d26b074cbe8ecded1c67303d19e Mon Sep 17 00:00:00 2001 From: Alex Baden Date: Thu, 26 Sep 2024 01:01:38 +0000 Subject: [PATCH 5/5] format --- .../lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp b/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp index c803cbb81f..8361675b55 100644 --- a/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp +++ b/third_party/intel/lib/TritonIntelGPUTransforms/MaterializeBlockPointer.cpp @@ -65,7 +65,7 @@ struct TritonIntelGPUMaterializeBlockPointerPass break; } } - + LDBG("Fast change dim: " << fastChangeDim); if (fastChangeDim < 0) { return;