Skip to content

Commit 3d8b0be

Browse files
ZelboKDanial Javadyantiagainst
authored
[AMD] Refactor llStore and llLoad to use proper ops (triton-lang#8036)
Refactor `llLoad` and `llStore` to no longer use strings that get replaced in built in to LLVM pass, now they have their own AMDGPU ops. `builtin-func-to-llvm` is no longer responsible for these operations, so code there was removed. --------- Co-authored-by: Danial Javady <[email protected]> Co-authored-by: Lei Zhang <[email protected]>
1 parent 8b792c8 commit 3d8b0be

File tree

15 files changed

+318
-271
lines changed

15 files changed

+318
-271
lines changed

test/Conversion/amd/async-ops-alias-scopes.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
// RUN: triton-opt %s -split-input-file --allocate-shared-memory --convert-triton-amdgpu-to-llvm=arch=gfx950 --convert-scf-to-cf --convert-builtin-func-to-llvm | FileCheck %s --check-prefixes=COMMON,GFX950
2-
// RUN: triton-opt %s -split-input-file --allocate-shared-memory --convert-triton-amdgpu-to-llvm=arch=gfx942 --convert-scf-to-cf --convert-builtin-func-to-llvm | FileCheck %s --check-prefixes=COMMON,GFX942
1+
// RUN: triton-opt %s -split-input-file --allocate-shared-memory --convert-triton-amdgpu-to-llvm=arch=gfx950 --convert-scf-to-cf | FileCheck %s --check-prefixes=COMMON,GFX950
2+
// RUN: triton-opt %s -split-input-file --allocate-shared-memory --convert-triton-amdgpu-to-llvm=arch=gfx942 --convert-scf-to-cf | FileCheck %s --check-prefixes=COMMON,GFX942
33

44
// COMMON: [[$ASYNC_COPY_SCOPE:#.*]] = #llvm.alias_scope<id = "amdgpu.AsyncCopies"
55
// COMMON: [[$LOCAL_LOAD_SCOPE:#.*]] = #llvm.alias_scope<id = "amdgpu.LocalLoads"

test/Conversion/amd/async_ops_to_llvm.mlir

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -176,22 +176,26 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.shar
176176
// CHECK: llvm.cond_br
177177
// CHECK: rocdl.global.load.lds
178178
// CHECK-NEXT: llvm.br
179-
// CHECK: _predicated_store
179+
// CHECK: llvm.cond_br
180+
// CHECK: llvm.store
180181

181182
// CHECK: llvm.cond_br
182183
// CHECK: rocdl.global.load.lds
183184
// CHECK-NEXT: llvm.br
184-
// CHECK: _predicated_store
185+
// CHECK: llvm.cond_br
186+
// CHECK: llvm.store
185187

186188
// CHECK: llvm.cond_br
187189
// CHECK: rocdl.global.load.lds
188190
// CHECK-NEXT: llvm.br
189-
// CHECK: _predicated_store
191+
// CHECK: llvm.cond_br
192+
// CHECK: llvm.store
190193

191194
// CHECK: llvm.cond_br
192195
// CHECK: rocdl.global.load.lds
193196
// CHECK-NEXT: llvm.br
194-
// CHECK: _predicated_store
197+
// CHECK: llvm.cond_br
198+
// CHECK: llvm.store
195199

196200
%2 = ttg.async_copy_global_to_local %1, %arg2 mask %67 other %cst_0 : tensor<32x32x!tt.ptr<f32>, #blocked> -> <32x32xf32, #shared, #smem, mutable>
197201
tt.return
@@ -236,28 +240,32 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.shar
236240
// CHECK: llvm.cond_br
237241
// CHECK: rocdl.global.load.lds
238242
// CHECK-NEXT: llvm.br
239-
// CHECK: _predicated_store
243+
// CHECK: llvm.cond_br
244+
// CHECK: llvm.store
240245

241246
// CHECK: rocdl.ds_bpermute
242247
// CHECK: rocdl.ballot
243248
// CHECK: llvm.cond_br
244249
// CHECK: rocdl.global.load.lds
245250
// CHECK-NEXT: llvm.br
246-
// CHECK: _predicated_store
251+
// CHECK: llvm.cond_br
252+
// CHECK: llvm.store
247253

248254
// CHECK: rocdl.ds_bpermute
249255
// CHECK: rocdl.ballot
250256
// CHECK: llvm.cond_br
251257
// CHECK: rocdl.global.load.lds
252258
// CHECK-NEXT: llvm.br
253-
// CHECK: _predicated_store
259+
// CHECK: llvm.cond_br
260+
// CHECK: llvm.store
254261

255262
// CHECK: rocdl.ds_bpermute
256263
// CHECK: rocdl.ballot
257264
// CHECK: llvm.cond_br
258265
// CHECK: rocdl.global.load.lds
259266
// CHECK-NEXT: llvm.br
260-
// CHECK: _predicated_store
267+
// CHECK: llvm.cond_br
268+
// CHECK: llvm.store
261269

262270
%2 = ttg.async_copy_global_to_local %1, %arg2 mask %67 other %cst_0 : tensor<32x32x!tt.ptr<f32>, #blocked> -> <32x32xf32, #shared, #smem, mutable>
263271
tt.return

test/Conversion/amd/buffer_load_to_local_to_llvm.mlir

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -147,19 +147,25 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.shar
147147
// Note that mask/other alignment is 1 so we need 4 conditionals
148148

149149
// COMMON: rocdl.raw.ptr.buffer.load.lds
150-
// COMMON: _predicated_store
150+
// COMMON: llvm.cond_br
151+
// COMMON: llvm.store
151152

152153
// COMMON: rocdl.raw.ptr.buffer.load.lds
153-
// COMMON: _predicated_store
154+
// COMMON: llvm.cond_br
155+
// COMMON: llvm.store
154156

155157
// COMMON: rocdl.raw.ptr.buffer.load.lds
156-
// COMMON: _predicated_store
158+
// COMMON: llvm.cond_br
159+
// COMMON: llvm.store
157160

158161
// COMMON: rocdl.raw.ptr.buffer.load.lds
159-
// COMMON: _predicated_store
162+
// COMMON: llvm.cond_br
163+
// COMMON: llvm.store
160164

161165
// COMMON-NOT: rocdl.raw.ptr.buffer.load.lds
162166
// COMMON-NOT: _predicated_store
167+
// COMMON-NOT: llvm.cond_br
168+
// COMMON-NOT: llvm.store
163169

164170
amdgpu.buffer_load_to_local %arg1[%arg2] mask=%67 other=%cst_0 into %arg3 : <f32>[tensor<32x32xi32, #blocked>] tensor<32x32xf32, #blocked> -> <32x32xf32, #shared, #smem, mutable>
165171
tt.return
@@ -257,22 +263,26 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, ttg.shar
257263
// COMMON: rocdl.ds_bpermute
258264
// COMMON: rocdl.ballot
259265
// COMMON: rocdl.raw.ptr.buffer.load.lds
260-
// COMMON: _predicated_store
266+
// COMMON: llvm.cond_br
267+
// COMMON: llvm.store
261268

262269
// COMMON: rocdl.ds_bpermute
263270
// COMMON: rocdl.ballot
264271
// COMMON: rocdl.raw.ptr.buffer.load.lds
265-
// COMMON: _predicated_store
272+
// COMMON: llvm.cond_br
273+
// COMMON: llvm.store
266274

267275
// COMMON: rocdl.ds_bpermute
268276
// COMMON: rocdl.ballot
269277
// COMMON: rocdl.raw.ptr.buffer.load.lds
270-
// COMMON: _predicated_store
278+
// COMMON: llvm.cond_br
279+
// COMMON: llvm.store
271280

272281
// COMMON: rocdl.ds_bpermute
273282
// COMMON: rocdl.ballot
274283
// COMMON: rocdl.raw.ptr.buffer.load.lds
275-
// COMMON: _predicated_store
284+
// COMMON: llvm.cond_br
285+
// COMMON: llvm.store
276286

277287
// COMMON-NOT: rocdl.ds_bpermute
278288
// COMMON-NOT: rocdl.ballot

test/Proton/amd/protongpu_to_llvm.mlir

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ module attributes {"ttg.num-warps" = 8 : i32, ttg.profile_scratch_memory_alignme
8282
// CHECK-DAG: rocdl.workgroup.id.z
8383
// CHECK-DAG: rocdl.grid.dim.x
8484
// CHECK-DAG: rocdl.grid.dim.y
85-
// CHECK-DAG: %[[PID:.*]] = llvm.trunc %15 : i64 to i32
85+
// CHECK-DAG: %[[PID:.*]] = llvm.trunc %{{.*}} : i64 to i32
8686
// CHECK-DAG: %[[SIZE:.*]] = llvm.mlir.constant(384 : i32)
8787
// CHECK-DAG: %{{.*}} = llvm.mul %[[PID]], %[[SIZE]] : i32
8888
%1 = proton_gpu.global_scratch_alloc {alignment = 128 : i32, nbytes = 384 : i32, offset = 0 : i32} : !tt.ptr<i32>
@@ -91,37 +91,24 @@ module attributes {"ttg.num-warps" = 8 : i32, ttg.profile_scratch_memory_alignme
9191
}
9292

9393
// -----
94-
9594
#shared = #ttg.swizzled_shared<{vec = 1, perPhase = 1, maxPhase = 1, order = [0]}>
9695
#smem = #ttg.shared_memory
9796
module attributes {"ttg.num-warps" = 8 : i32, ttg.profile_scratch_memory_alignment = 128 : i32, ttg.profile_scratch_memory_size = 384 : i32} {
9897
// CHECK-LABEL: convert_smem_finalize
9998
// CHECK: llvm.inline_asm asm_dialect = att operand_attrs = [] "s_getreg_b32 $0, hwreg(HW_REG_XCC_ID, 0, 3)", "=s" : () -> i32
10099
// CHECK: llvm.inline_asm asm_dialect = att operand_attrs = [] "s_getreg_b32 $0, hwreg(HW_REG_HW_ID, 8, 4)", "=s" : () -> i32
101100
// CHECK: llvm.inline_asm asm_dialect = att operand_attrs = [] "s_getreg_b32 $0, hwreg(HW_REG_HW_ID, 13, 3)", "=s" : () -> i32
102-
// CONVERT-BUILTIN: llvm.cond_br %{{.*}}, ^bb1, ^bb9
103-
// CONVERT-BUILTIN: ^bb1: // pred: ^bb0
101+
// CONVERT-BUILTIN: llvm.cond_br %{{.*}}, ^bb1, ^bb3
102+
// CONVERT-BUILTIN: ^bb1:
104103
// CONVERT-BUILTIN: llvm.store %{{.*}}, %{{.*}} : i32, !llvm.ptr<1>
105104
// CONVERT-BUILTIN: llvm.br ^bb2(%{{.*}} : i32)
106-
// CONVERT-BUILTIN: ^bb2(%{{.*}}: i32): // 2 preds: ^bb1, ^bb8
107-
// CONVERT-BUILTIN: llvm.cond_br %1, ^bb3, ^bb4
108-
// CONVERT-BUILTIN: bb3: // pred: ^bb2
109-
// CONVERT-BUILTIN: %{{.*}} = llvm.load %{{.*}} : !llvm.ptr<3> -> i32
110-
// CONVERT-BUILTIN: llvm.br ^bb5(%{{.*}} : i32)
111-
// CONVERT-BUILTIN: ^bb4: // pred: ^bb2
112-
// CONVERT-BUILTIN: llvm.br ^bb5(%{{.*}} : i32)
113-
// CONVERT-BUILTIN: ^bb5(%{{.*}}: i32): // 2 preds: ^bb3, ^bb4
105+
// CONVERT-BUILTIN: ^bb2(%{{.*}}: i32):
106+
// CONVERT-BUILTIN: llvm.load %{{.*}} : !llvm.ptr<3> -> i32
114107
// CONVERT-BUILTIN: llvm.store %{{.*}}, %{{.*}} : i32, !llvm.ptr<1>
115-
// CONVERT-BUILTIN: llvm.cond_br %{{.*}}, ^bb6, ^bb7
116-
// CONVERT-BUILTIN: ^bb6: // pred: ^bb5
117-
// CONVERT-BUILTIN: %{{.*}} = llvm.load %{{.*}} : !llvm.ptr<3> -> i32
118-
// CONVERT-BUILTIN: llvm.br ^bb8(%{{.*}} : i32)
119-
// CONVERT-BUILTIN: ^bb7: // pred: ^bb5
120-
// CONVERT-BUILTIN: llvm.br ^bb8(%{{.*}} : i32)
121-
// CONVERT-BUILTIN: ^bb8(%{{.*}}: i32): // 2 preds: ^bb6, ^bb7
108+
// CONVERT-BUILTIN: llvm.load %{{.*}} : !llvm.ptr<3> -> i32
122109
// CONVERT-BUILTIN: llvm.store %{{.*}}, %{{.*}} : i32, !llvm.ptr<1>
123-
// CONVERT-BUILTIN: llvm.cond_br %{{.*}}, ^bb2(%{{.*}} : i32), ^bb9
124-
// CONVERT-BUILTIN: ^bb9: // 2 preds: ^bb0, ^bb8
110+
// CONVERT-BUILTIN: llvm.cond_br %{{.*}}, ^bb2(%{{.*}} : i32), ^bb3
111+
// CONVERT-BUILTIN: ^bb3:
125112
// CHECK: llvm.return
126113
llvm.func @convert_smem_finalize(%arg: !llvm.ptr<1>) attributes {noinline = false, nvvm.kernel = 1 : ui1} {
127114
%0 = ttg.local_alloc : () -> !ttg.memdesc<512xi32, #shared, #smem, mutable>

third_party/amd/include/Dialect/TritonAMDGPU/IR/TritonAMDGPUOps.td

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,56 @@ def TTG_UpcastMXFPOp : TT_AMDGPU_Op<"upcast_mxfp", [Pure]> {
519519
}];
520520
}
521521

522+
//===----------------------------------------------------------------------===//
523+
// MaskedLoadOp
524+
//===----------------------------------------------------------------------===//
525+
def MaskedLoadOp : TT_AMDGPU_Op<"masked_load", []> {
526+
let summary = "Masked load operation";
527+
let description = [{
528+
Load operation with masking support. If the mask is true, loads from the given pointer. Works with LLVM types as a utility op for making LLVM conversion easier.
529+
}];
530+
let arguments = (ins
531+
LLVM_AnyPointer:$ptr,
532+
I1:$mask,
533+
LLVM_Type:$falseVal,
534+
DefaultValuedAttr<TT_CacheModifierAttr, "::mlir::triton::CacheModifier::NONE">:$cache,
535+
DefaultValuedAttr<BoolAttr, "false">:$forceNoAlias
536+
);
537+
538+
let results = (outs LLVM_Type:$result);
539+
540+
let assemblyFormat = [{
541+
$ptr `,` $mask `,` $falseVal
542+
oilist(`cacheModifier` `=` $cache)
543+
(`forceNoAlias` $forceNoAlias^)?
544+
attr-dict `:` functional-type(operands, results)
545+
}];
546+
}
547+
548+
//===----------------------------------------------------------------------===//
549+
// MaskedStoreOp
550+
//===----------------------------------------------------------------------===//
551+
def MaskedStoreOp : TT_AMDGPU_Op<"masked_store", []> {
552+
let summary = "Masked Store operation";
553+
let description = [{
554+
Store operation with masking support. If the mask is true, Store from the given pointer. Works with LLVM types as a utility op for making LLVM conversion easier.
555+
}];
556+
let arguments = (ins
557+
LLVM_AnyPointer:$ptr,
558+
LLVM_Type:$value,
559+
I1:$mask,
560+
DefaultValuedAttr<TT_CacheModifierAttr, "::mlir::triton::CacheModifier::NONE">:$cache,
561+
DefaultValuedAttr<BoolAttr, "false">:$forceNoAlias
562+
);
563+
564+
let assemblyFormat = [{
565+
$ptr `,` $value `,` $mask
566+
oilist(`cacheModifier` `=` $cache)
567+
(`forceNoAlias` $forceNoAlias^)?
568+
attr-dict `:` type(operands)
569+
}];
570+
}
571+
522572
//===----------------------------------------------------------------------===//
523573
// ScaledUpcastFp4Op
524574
//===----------------------------------------------------------------------===//
@@ -579,7 +629,6 @@ def ScaledUpcastFp8Op : TT_AMDGPU_Op<"scaled_upcast_fp8", [
579629
`:` type($input) `,` type($scale) `->` type($output)
580630
}];
581631
}
582-
583632
//===----------------------------------------------------------------------===//
584633
// InThreadTransposeOp
585634
//===----------------------------------------------------------------------===//

third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp

Lines changed: 2 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
#include "AsyncUtility.h"
44
#include "Utility.h"
5+
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
56
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
67
#include "mlir/Pass/Pass.h"
78
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
89
#include "triton/Conversion/TritonGPUToLLVM/Utility.h"
9-
1010
namespace mlir::triton {
1111
#define GEN_PASS_DEF_CONVERTBUILTINFUNCTOLLVM
1212
#include "TritonAMDGPUToLLVM/Passes.h.inc"
@@ -24,27 +24,14 @@ class CallOpConversion : public OpRewritePattern<LLVM::CallOp> {
2424
LogicalResult
2525
matchAndRewrite(LLVM::CallOp callOp,
2626
mlir::PatternRewriter &rewriter) const override {
27-
if (isPredicatedLoad(callOp)) {
28-
return convertPredicatedLoad(callOp, rewriter);
29-
} else if (isPredicatedStore(callOp)) {
30-
return convertPredicatedStore(callOp, rewriter);
31-
} else if (isWrappedLLVMIntrinsic(callOp)) {
27+
if (isWrappedLLVMIntrinsic(callOp)) {
3228
return convertToLLVMIntrinsic(callOp, rewriter);
3329
} else {
3430
return failure();
3531
}
3632
}
3733

3834
private:
39-
bool isPredicatedLoad(LLVM::CallOp callOp) const {
40-
return callOp.getCallee().value().contains(mlir::LLVM::AMD::predicatedLoad);
41-
}
42-
43-
bool isPredicatedStore(LLVM::CallOp callOp) const {
44-
return callOp.getCallee().value().contains(
45-
mlir::LLVM::AMD::predicatedStore);
46-
}
47-
4835
bool isWrappedLLVMIntrinsic(LLVM::CallOp callOp) const {
4936
if (std::optional<StringRef> callee = callOp.getCallee()) {
5037
if (callee.value().starts_with("__triton_hip_")) {
@@ -54,91 +41,6 @@ class CallOpConversion : public OpRewritePattern<LLVM::CallOp> {
5441
return false;
5542
}
5643

57-
LogicalResult convertPredicatedStore(LLVM::CallOp callOp,
58-
mlir::PatternRewriter &rewriter) const {
59-
auto operands = callOp.getOperands();
60-
61-
auto loc = callOp.getLoc();
62-
auto ptr = operands[0];
63-
auto val = operands[1];
64-
auto pred = operands[2];
65-
66-
Block *currentBlock = rewriter.getInsertionBlock();
67-
Block *afterStore =
68-
rewriter.splitBlock(currentBlock, rewriter.getInsertionPoint());
69-
Block *trueBlock = rewriter.createBlock(afterStore);
70-
rewriter.setInsertionPointToEnd(currentBlock);
71-
rewriter.create<LLVM::CondBrOp>(loc, pred, trueBlock, afterStore);
72-
rewriter.setInsertionPointToStart(trueBlock);
73-
// | vialatile | non-tmp | gcn instr gfx94
74-
// LLVM::StoreOp | 0 | 0 | (cg) global store
75-
// | 0 | 1 | (cs) global store nt
76-
// | 1 | 0/1 | (wt) global store sc0 sc1
77-
auto [volatileFlag, nonTmpFlag] =
78-
mlir::LLVM::AMD::getCacheModifierFlagsForPredicatedCall(callOp);
79-
int alignment = 0;
80-
if (auto vecTy = dyn_cast<VectorType>(val.getType())) {
81-
auto elemTy = vecTy.getElementType();
82-
auto elemSizeInBytes = elemTy.getIntOrFloatBitWidth() / 8;
83-
alignment = elemSizeInBytes * vecTy.getNumElements();
84-
}
85-
86-
auto storeOp = rewriter.create<LLVM::StoreOp>(loc, val, ptr, alignment,
87-
volatileFlag, nonTmpFlag);
88-
bool addAsyncAliasScopes =
89-
callOp.getCallee().value().contains(mlir::LLVM::AMD::noAliasAsyncLoads);
90-
if (addAsyncAliasScopes) {
91-
AMD::addLocalLoadNoAliasScope(storeOp);
92-
}
93-
rewriter.create<LLVM::BrOp>(loc, afterStore);
94-
rewriter.setInsertionPointToStart(afterStore);
95-
rewriter.eraseOp(callOp);
96-
return mlir::success();
97-
}
98-
99-
LogicalResult convertPredicatedLoad(LLVM::CallOp callOp,
100-
mlir::PatternRewriter &rewriter) const {
101-
auto operands = callOp.getOperands();
102-
auto result = callOp.getResult();
103-
104-
auto loc = callOp.getLoc();
105-
auto elemTy = result.getType();
106-
auto ptr = operands[0];
107-
auto pred = operands[1];
108-
auto falseVal = operands[2];
109-
110-
Block *currentBlock = rewriter.getInsertionBlock();
111-
Block *afterLoad =
112-
rewriter.splitBlock(currentBlock, rewriter.getInsertionPoint());
113-
afterLoad->addArgument({elemTy}, {loc});
114-
Block *trueBlock = rewriter.createBlock(afterLoad);
115-
Block *falseBlock =
116-
rewriter.splitBlock(trueBlock, rewriter.getInsertionPoint());
117-
rewriter.setInsertionPointToEnd(currentBlock);
118-
rewriter.create<LLVM::CondBrOp>(loc, pred, trueBlock, falseBlock);
119-
rewriter.setInsertionPointToStart(trueBlock);
120-
// | vialatile | non-tmp | gcn instr gfx94
121-
// LLVM::LoadOp | 0 | 0 | (ca) global load
122-
// | 0/1 | 1 | (cg) global load nt
123-
// | 1 | 0 | (cv) flat load sc0 sc1
124-
auto [volatileFlag, nonTmpFlag] =
125-
mlir::LLVM::AMD::getCacheModifierFlagsForPredicatedCall(callOp);
126-
auto loadOp = rewriter.create<LLVM::LoadOp>(
127-
loc, elemTy, ptr, /*alignment=*/0, volatileFlag, nonTmpFlag);
128-
bool addAsyncNoAliasInfo =
129-
callOp.getCallee().value().contains(mlir::LLVM::AMD::noAliasAsyncLoads);
130-
if (addAsyncNoAliasInfo) {
131-
AMD::addLocalLoadNoAliasScope(loadOp);
132-
}
133-
rewriter.create<LLVM::BrOp>(loc, loadOp->getResult(0), afterLoad);
134-
rewriter.setInsertionPointToStart(falseBlock);
135-
rewriter.create<LLVM::BrOp>(loc, falseVal, afterLoad);
136-
rewriter.setInsertionPointToStart(afterLoad);
137-
Value loadVal = afterLoad->getArgument(0);
138-
rewriter.replaceOp(callOp, loadVal);
139-
return mlir::success();
140-
}
141-
14244
// Utility function to create fast exponential operation
14345
Operation *createFastExpf(mlir::PatternRewriter &rewriter, Location loc,
14446
Value input, Type returnType, bool ftz) const {
@@ -253,7 +155,6 @@ struct ConvertBuiltinFuncToLLVM
253155

254156
RewritePatternSet patterns(context);
255157
patterns.add<CallOpConversion>(context, this->ftz);
256-
257158
if (mlir::applyPatternsGreedily(mod, std::move(patterns), config)
258159
.failed()) {
259160
signalPassFailure();

0 commit comments

Comments
 (0)