diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 77f19343653db..66869f71fbf5d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -540,32 +540,32 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, } // Now see if we can emit a target-specific builtin. - if (mlir::Value v = emitTargetBuiltinExpr(builtinID, e, returnValue)) { - switch (evalKind) { - case cir::TEK_Scalar: - if (mlir::isa(v.getType())) - return RValue::get(nullptr); - return RValue::get(v); - case cir::TEK_Aggregate: - cgm.errorNYI(e->getSourceRange(), "aggregate return value from builtin"); - return getUndefRValue(e->getType()); - case cir::TEK_Complex: - llvm_unreachable("No current target builtin returns complex"); - } - llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); + RValue value = emitTargetBuiltinExpr(builtinID, e, returnValue); + + if (value.isScalar()) { + if (!value.getValue() || + mlir::isa(value.getValue().getType())) + return RValue::getIgnored(); + + return value; } - cgm.errorNYI(e->getSourceRange(), - std::string("unimplemented builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); - return getUndefRValue(e->getType()); + if (value.isAggregate()) { + cgm.errorNYI(e->getSourceRange(), "aggregate return value from builtin"); + return getUndefRValue(e->getType()); + } + + if (value.isComplex()) { + llvm_unreachable("No current target builtin returns complex"); + } + + llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); } -static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf, - unsigned builtinID, - const CallExpr *e, - ReturnValueSlot &returnValue, - llvm::Triple::ArchType arch) { +static std::optional +emitTargetArchBuiltinExpr(CIRGenFunction *cgf, unsigned builtinID, + const CallExpr *e, ReturnValueSlot &returnValue, + llvm::Triple::ArchType arch) { // When compiling in HipStdPar mode we have to be conservative in rejecting // target specific features in the FE, and defer the possible error to the // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is @@ -616,18 +616,28 @@ static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf, } } -mlir::Value -CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID, const CallExpr *e, - ReturnValueSlot &returnValue) { +RValue CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID, + const CallExpr *e, + ReturnValueSlot &returnValue) { + std::optional valueOpt; if (getContext().BuiltinInfo.isAuxBuiltinID(builtinID)) { assert(getContext().getAuxTargetInfo() && "Missing aux target info"); - return emitTargetArchBuiltinExpr( + valueOpt = emitTargetArchBuiltinExpr( this, getContext().BuiltinInfo.getAuxBuiltinID(builtinID), e, returnValue, getContext().getAuxTargetInfo()->getTriple().getArch()); + } else { + valueOpt = emitTargetArchBuiltinExpr(this, builtinID, e, returnValue, + getTarget().getTriple().getArch()); + } + + if (!valueOpt) { + cgm.errorNYI(e->getSourceRange(), + std::string("unimplemented builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return getUndefRValue(e->getType()); } - return emitTargetArchBuiltinExpr(this, builtinID, e, returnValue, - getTarget().getTriple().getArch()); + return RValue::get(*valueOpt); } mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg( diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index ee6900141647f..ad5742aef174a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -20,6 +20,11 @@ using namespace clang; using namespace clang::CIRGen; +/// Get integer from a mlir::Value that is an int constant or a constant op. +static int64_t getIntValueFromConstOp(mlir::Value val) { + return val.getDefiningOp().getIntValue().getSExtValue(); +} + template static mlir::Value emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *e, const std::string &str, @@ -33,6 +38,32 @@ static mlir::Value emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *e, .getResult(); } +static mlir::Value emitPrefetch(CIRGenFunction &cgf, unsigned builtinID, + const CallExpr *e, + const SmallVector &ops) { + CIRGenBuilderTy &builder = cgf.getBuilder(); + mlir::Location location = cgf.getLoc(e->getExprLoc()); + mlir::Type voidTy = builder.getVoidTy(); + mlir::Value address = builder.createPtrBitcast(ops[0], voidTy); + bool isWrite{}; + int locality{}; + + assert(builtinID == X86::BI_mm_prefetch || builtinID == X86::BI_m_prefetchw || + builtinID == X86::BI_m_prefetch && "Expected prefetch builtin"); + + if (builtinID == X86::BI_mm_prefetch) { + int hint = getIntValueFromConstOp(ops[1]); + isWrite = (hint >> 2) & 0x1; + locality = hint & 0x3; + } else { + isWrite = (builtinID == X86::BI_m_prefetchw); + locality = 0x3; + } + + cir::PrefetchOp::create(builder, location, address, locality, isWrite); + return {}; +} + // OG has unordered comparison as a form of optimization in addition to // ordered comparison, while CIR doesn't. // @@ -68,8 +99,8 @@ static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder, return bitCast; } -mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, - const CallExpr *expr) { +std::optional +CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { if (builtinID == Builtin::BI__builtin_cpu_is) { cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is"); return {}; @@ -120,6 +151,9 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI_mm_sfence: return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy); case X86::BI_mm_prefetch: + case X86::BI_m_prefetch: + case X86::BI_m_prefetchw: + return emitPrefetch(*this, builtinID, expr, ops); case X86::BI__rdtsc: case X86::BI__builtin_ia32_rdtscp: case X86::BI__builtin_ia32_lzcnt_u16: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index b22bf2d87fc10..db1eac4116d0d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1759,9 +1759,8 @@ class CIRGenFunction : public CIRGenTypeCache { bool buildingTopLevelCase); mlir::LogicalResult emitSwitchStmt(const clang::SwitchStmt &s); - mlir::Value emitTargetBuiltinExpr(unsigned builtinID, - const clang::CallExpr *e, - ReturnValueSlot &returnValue); + RValue emitTargetBuiltinExpr(unsigned builtinID, const clang::CallExpr *e, + ReturnValueSlot &returnValue); /// Given a value and its clang type, returns the value casted to its memory /// representation. @@ -1801,7 +1800,8 @@ class CIRGenFunction : public CIRGenTypeCache { mlir::LogicalResult emitWhileStmt(const clang::WhileStmt &s); - mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *e); + std::optional emitX86BuiltinExpr(unsigned builtinID, + const CallExpr *e); /// Given an assignment `*lhs = rhs`, emit a test that checks if \p rhs is /// nonnull, if 1\p LHS is marked _Nonnull. diff --git a/clang/test/CIR/CodeGen/X86/prefetchw-builtin.c b/clang/test/CIR/CodeGen/X86/prefetchw-builtin.c new file mode 100644 index 0000000000000..7d7ce348b8d88 --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/prefetchw-builtin.c @@ -0,0 +1,36 @@ + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG + + +#include + +void test_m_prefetch_w(void *p) { + // CIR-LABEL: test_m_prefetch_w + // LLVM-LABEL: test_m_prefetch_w + // OGCG-LABEL: test_m_prefetch_w + return _m_prefetchw(p); + // CIR: cir.prefetch write locality(3) %{{.*}} : !cir.ptr + // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1) + // OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1) +} + +void test_m_prefetch(void *p) { + // CIR-LABEL: test_m_prefetch + // LLVM-LABEL: test_m_prefetch + // OGCG-LABEL: test_m_prefetch + return _m_prefetch(p); + // CIR: cir.prefetch read locality(3) %{{.*}} : !cir.ptr + // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) + // OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) +} diff --git a/clang/test/CIR/CodeGen/X86/sse-builtins.c b/clang/test/CIR/CodeGen/X86/sse-builtins.c index 3a61018741958..e8203265f89f3 100644 --- a/clang/test/CIR/CodeGen/X86/sse-builtins.c +++ b/clang/test/CIR/CodeGen/X86/sse-builtins.c @@ -26,3 +26,33 @@ void test_mm_sfence(void) { // LLVM: call void @llvm.x86.sse.sfence() // OGCG: call void @llvm.x86.sse.sfence() } + +void test_mm_prefetch(char const* p) { + // CIR-LABEL: test_mm_prefetch + // LLVM-LABEL: test_mm_prefetch + // OGCG-LABEL: test_mm_prefetch + _mm_prefetch(p, 0); + // CIR: cir.prefetch read locality(0) %{{.*}} : !cir.ptr + // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1) + // OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1) +} + +void test_mm_prefetch_local(char const* p) { + // CIR-LABEL: test_mm_prefetch_local + // LLVM-LABEL: test_mm_prefetch_local + // OGCG-LABEL: test_mm_prefetch_local + _mm_prefetch(p, 3); + // CIR: cir.prefetch read locality(3) %{{.*}} : !cir.ptr + // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) + // OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1) +} + +void test_mm_prefetch_write(char const* p) { + // CIR-LABEL: test_mm_prefetch_write + // LLVM-LABEL: test_mm_prefetch_write + // OGCG-LABEL: test_mm_prefetch_write + _mm_prefetch(p, 7); + // CIR: cir.prefetch write locality(3) %{{.*}} : !cir.ptr + // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1) + // OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1) +}