-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[flang][cuda] Use nvvm operation for match any #134283
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesThe string used for intrinsic was not the correct one "llvm.nvvm.match.any.sync.i32p". There was an extra Use the NVVM operation instead so we don't duplicate it. Full diff: https://github.com/llvm/llvm-project/pull/134283.diff 3 Files Affected:
diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h
index e999796c23718..caa5686dd7013 100644
--- a/flang/include/flang/Optimizer/Support/InitFIR.h
+++ b/flang/include/flang/Optimizer/Support/InitFIR.h
@@ -23,6 +23,7 @@
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/Func/Extensions/InlinerExtension.h"
#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/InitAllDialects.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassRegistry.h"
@@ -37,7 +38,8 @@ namespace fir::support {
mlir::scf::SCFDialect, mlir::arith::ArithDialect, \
mlir::cf::ControlFlowDialect, mlir::func::FuncDialect, \
mlir::vector::VectorDialect, mlir::math::MathDialect, \
- mlir::complex::ComplexDialect, mlir::DLTIDialect, cuf::CUFDialect
+ mlir::complex::ComplexDialect, mlir::DLTIDialect, cuf::CUFDialect, \
+ mlir::NVVM::NVVMDialect
#define FLANG_CODEGEN_DIALECT_LIST FIRCodeGenDialect, mlir::LLVM::LLVMDialect
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 8aed288d128b6..cbbc24ec6ea50 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -48,6 +48,7 @@
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "llvm/Support/CommandLine.h"
@@ -6548,23 +6549,11 @@ IntrinsicLibrary::genMatchAnySync(mlir::Type resultType,
assert(args.size() == 2);
bool is32 = args[1].getType().isInteger(32) || args[1].getType().isF32();
- llvm::StringRef funcName =
- is32 ? "llvm.nvvm.match.any.sync.i32p" : "llvm.nvvm.match.any.sync.i64p";
- mlir::MLIRContext *context = builder.getContext();
- mlir::Type i32Ty = builder.getI32Type();
- mlir::Type i64Ty = builder.getI64Type();
- mlir::Type valTy = is32 ? i32Ty : i64Ty;
+ mlir::Value arg1 = args[1];
+ if (arg1.getType().isF32() || arg1.getType().isF64())
+ arg1 = builder.create<fir::ConvertOp>(loc, is32 ? builder.getI32Type() : builder.getI64Type(), arg1);
- mlir::FunctionType ftype =
- mlir::FunctionType::get(context, {i32Ty, valTy}, {i32Ty});
- auto funcOp = builder.createFunction(loc, funcName, ftype);
- llvm::SmallVector<mlir::Value> filteredArgs;
- filteredArgs.push_back(args[0]);
- if (args[1].getType().isF32() || args[1].getType().isF64())
- filteredArgs.push_back(builder.create<fir::ConvertOp>(loc, valTy, args[1]));
- else
- filteredArgs.push_back(args[1]);
- return builder.create<fir::CallOp>(loc, funcOp, filteredArgs).getResult(0);
+ return builder.create<mlir::NVVM::MatchSyncOp>(loc, resultType, args[0], arg1, mlir::NVVM::MatchSyncKind::any).getResult();
}
// MATMUL
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 6a7fee73f338a..ae86eec54ef32 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -143,12 +143,10 @@ attributes(device) subroutine testMatchAny()
end subroutine
! CHECK-LABEL: func.func @_QPtestmatchany()
-! CHECK: fir.call @llvm.nvvm.match.any.sync.i32p
-! CHECK: fir.call @llvm.nvvm.match.any.sync.i64p
-! CHECK: fir.convert %{{.*}} : (f32) -> i32
-! CHECK: fir.call @llvm.nvvm.match.any.sync.i32p
-! CHECK: fir.convert %{{.*}} : (f64) -> i64
-! CHECK: fir.call @llvm.nvvm.match.any.sync.i64p
+! CHECK: %{{.*}} = nvvm.match.sync any %{{.*}}, %{{.*}} : i32 -> i32
+! CHECK: %{{.*}} = nvvm.match.sync any %{{.*}}, %{{.*}} : i64 -> i32
+! CHECK: %{{.*}} = nvvm.match.sync any %{{.*}}, %{{.*}} : i32 -> i32
+! CHECK: %{{.*}} = nvvm.match.sync any %{{.*}}, %{{.*}} : i64 -> i32
attributes(device) subroutine testAtomic(aa, n)
integer :: aa(*)
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
The string used for intrinsic was not the correct one "llvm.nvvm.match.any.sync.i32p". There was an extra
pat the end.Use the NVVM operation instead so we don't duplicate it.