diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 4be0159fb1dd9..83b24136d58a8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1187,7 +1187,8 @@ class OpenMPIRBuilder { void applySimd(CanonicalLoopInfo *Loop, MapVector AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, - ConstantInt *Safelen); + ConstantInt *Safelen, + SmallVector NontempralVars = {}); /// Generator for '#omp flush' /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 515b74cbb7588..6a32f3aa7703a 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5183,10 +5183,31 @@ OpenMPIRBuilder::getOpenMPDefaultSimdAlign(const Triple &TargetTriple, return 0; } +/// Attach nontemporal metadata to the load/store instructions of nontemporal +/// variables of \p Block +static void addNonTemporalMetadata(BasicBlock *Block, MDNode *Nontemporal, + SmallVector NontemporalVars) { + for (Instruction &I : *Block) { + llvm::Value *mem_ptr = nullptr; + if (llvm::LoadInst *li = dyn_cast(&I)) + mem_ptr = li->getPointerOperand(); + else if (llvm::StoreInst *si = dyn_cast(&I)) + mem_ptr = si->getPointerOperand(); + if (mem_ptr) { + if (llvm::GetElementPtrInst *gep = + dyn_cast(mem_ptr)) + mem_ptr = gep->getPointerOperand(); + if (is_contained(NontemporalVars, mem_ptr)) + I.setMetadata(LLVMContext::MD_nontemporal, Nontemporal); + } + } +} + void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, MapVector AlignedVars, Value *IfCond, OrderKind Order, - ConstantInt *Simdlen, ConstantInt *Safelen) { + ConstantInt *Simdlen, ConstantInt *Safelen, + SmallVector NontemporalVars) { LLVMContext &Ctx = Builder.getContext(); Function *F = CanonicalLoop->getFunction(); @@ -5283,6 +5304,12 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, } addLoopMetadata(CanonicalLoop, LoopMDList); + // Set nontemporal metadata to load and stores of nontemporal values + if (NontemporalVars.size()) { + MDNode *NontemporalNode = MDNode::getDistinct(Ctx, {}); + for (BasicBlock *BB : Reachable) + addNonTemporalMetadata(BB, NontemporalNode, NontemporalVars); + } } /// Create the TargetMachine object to query the backend for optimization diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 0cba8d80681f1..1a597a3608274 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1867,11 +1867,19 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, llvm::MapVector alignedVars; llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder()); + + llvm::SmallVector nontemporalVars; + mlir::OperandRange nontemporals = simdOp.getNontemporalVars(); + for (mlir::Value nontemporal : nontemporals) { + llvm::Value *nt = moduleTranslation.lookupValue(nontemporal); + nontemporalVars.push_back(nt); + } + ompBuilder->applySimd(loopInfo, alignedVars, simdOp.getIfExpr() ? moduleTranslation.lookupValue(simdOp.getIfExpr()) : nullptr, - order, simdlen, safelen); + order, simdlen, safelen, nontemporalVars); builder.restoreIP(afterIP); return success(); diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 95e12e5bc4e74..34e42ed037cc6 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -872,6 +872,28 @@ llvm.func @simd_order() { // CHECK-NEXT: llvm.loop.vectorize.width{{.*}}i64 2 // ----- +// CHECK-LABEL: @simd_nontemporal +llvm.func @simd_nontemporal() { + %0 = llvm.mlir.constant(10 : i64) : i64 + %1 = llvm.mlir.constant(1 : i64) : i64 + %2 = llvm.alloca %1 x i64 : (i64) -> !llvm.ptr + %3 = llvm.alloca %1 x i64 : (i64) -> !llvm.ptr + //CHECK: %[[A_ADDR:.*]] = alloca i64, i64 1, align 8 + //CHECK: %[[B_ADDR:.*]] = alloca i64, i64 1, align 8 + //CHECK: %[[B:.*]] = load i64, ptr %[[B_ADDR]], align 4, !nontemporal !1, !llvm.access.group !2 + //CHECK: store i64 %[[B]], ptr %[[A_ADDR]], align 4, !nontemporal !1, !llvm.access.group !2 + omp.simd nontemporal(%2, %3 : !llvm.ptr, !llvm.ptr) { + omp.loop_nest (%arg0) : i64 = (%1) to (%0) inclusive step (%1) { + %4 = llvm.load %3 : !llvm.ptr -> i64 + llvm.store %4, %2 : i64, !llvm.ptr + omp.yield + } + omp.terminator + } + llvm.return +} +// ----- + llvm.func @body(i64) llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {