Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 38 additions & 28 deletions clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,32 +540,32 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
}

// Now see if we can emit a target-specific builtin.
if (mlir::Value v = emitTargetBuiltinExpr(builtinID, e, returnValue)) {
switch (evalKind) {
case cir::TEK_Scalar:
if (mlir::isa<cir::VoidType>(v.getType()))
return RValue::get(nullptr);
return RValue::get(v);
case cir::TEK_Aggregate:
cgm.errorNYI(e->getSourceRange(), "aggregate return value from builtin");
return getUndefRValue(e->getType());
case cir::TEK_Complex:
llvm_unreachable("No current target builtin returns complex");
}
llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
RValue value = emitTargetBuiltinExpr(builtinID, e, returnValue);

if (value.isScalar()) {
if (!value.getValue() ||
mlir::isa<cir::VoidType>(value.getValue().getType()))
return RValue::getIgnored();

return value;
}

cgm.errorNYI(e->getSourceRange(),
std::string("unimplemented builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return getUndefRValue(e->getType());
if (value.isAggregate()) {
cgm.errorNYI(e->getSourceRange(), "aggregate return value from builtin");
return getUndefRValue(e->getType());
}

if (value.isComplex()) {
llvm_unreachable("No current target builtin returns complex");
}

llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
}

static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf,
unsigned builtinID,
const CallExpr *e,
ReturnValueSlot &returnValue,
llvm::Triple::ArchType arch) {
static std::optional<mlir::Value>
emitTargetArchBuiltinExpr(CIRGenFunction *cgf, unsigned builtinID,
const CallExpr *e, ReturnValueSlot &returnValue,
llvm::Triple::ArchType arch) {
// When compiling in HipStdPar mode we have to be conservative in rejecting
// target specific features in the FE, and defer the possible error to the
// AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
Expand Down Expand Up @@ -616,18 +616,28 @@ static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf,
}
}

mlir::Value
CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID, const CallExpr *e,
ReturnValueSlot &returnValue) {
RValue CIRGenFunction::emitTargetBuiltinExpr(unsigned builtinID,
const CallExpr *e,
ReturnValueSlot &returnValue) {
std::optional<mlir::Value> valueOpt;
if (getContext().BuiltinInfo.isAuxBuiltinID(builtinID)) {
assert(getContext().getAuxTargetInfo() && "Missing aux target info");
return emitTargetArchBuiltinExpr(
valueOpt = emitTargetArchBuiltinExpr(
this, getContext().BuiltinInfo.getAuxBuiltinID(builtinID), e,
returnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
} else {
valueOpt = emitTargetArchBuiltinExpr(this, builtinID, e, returnValue,
getTarget().getTriple().getArch());
}

if (!valueOpt) {
cgm.errorNYI(e->getSourceRange(),
std::string("unimplemented builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return getUndefRValue(e->getType());
}

return emitTargetArchBuiltinExpr(this, builtinID, e, returnValue,
getTarget().getTriple().getArch());
return RValue::get(*valueOpt);
}

mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg(
Expand Down
38 changes: 36 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
using namespace clang;
using namespace clang::CIRGen;

/// Get integer from a mlir::Value that is an int constant or a constant op.
static int64_t getIntValueFromConstOp(mlir::Value val) {
return val.getDefiningOp<cir::ConstantOp>().getIntValue().getSExtValue();
}

template <typename... Operands>
static mlir::Value emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *e,
const std::string &str,
Expand All @@ -33,6 +38,32 @@ static mlir::Value emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *e,
.getResult();
}

static mlir::Value emitPrefetch(CIRGenFunction &cgf, unsigned builtinID,
const CallExpr *e,
const SmallVector<mlir::Value> &ops) {
CIRGenBuilderTy &builder = cgf.getBuilder();
mlir::Location location = cgf.getLoc(e->getExprLoc());
mlir::Type voidTy = builder.getVoidTy();
mlir::Value address = builder.createPtrBitcast(ops[0], voidTy);
bool isWrite{};
int locality{};

assert(builtinID == X86::BI_mm_prefetch || builtinID == X86::BI_m_prefetchw ||
builtinID == X86::BI_m_prefetch && "Expected prefetch builtin");

if (builtinID == X86::BI_mm_prefetch) {
int hint = getIntValueFromConstOp(ops[1]);
isWrite = (hint >> 2) & 0x1;
locality = hint & 0x3;
} else {
isWrite = (builtinID == X86::BI_m_prefetchw);
locality = 0x3;
}

cir::PrefetchOp::create(builder, location, address, locality, isWrite);
return {};
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here I'm not sure if this is correct: Can I somehow convert the PrefetchOp into an mlir::Value and return it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I requested this change from the incubator because I thought we should be using CIR-specific operations when we have them, and in this case the CIR operation is exactly equivalent to the LLVM intrinsic.

I see that our handling of BI__builtin_prefetch returns RValue::get(nullptr) from emitBuiltinExpr, which is what we'd get by returning {} here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You'd have to add a result to the operation. Incubator has support for case X86::BI_mm_prefetch: which should be somewhat similar and shared here, it's using cir.llvm.intrinsic directly, which has a result. If there are reasons to deviate here they should also be replayed in the incubator.

Copy link
Contributor Author

@HendrikHuebner HendrikHuebner Nov 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just returning {} a.k.a nullptr does not work here I think because the caller assumes the builtin was not emitted and throws NYI. Instead, how about we make it return an optional<mlir::Value>? If the optional is present the builtin was generated, and if the value is nullptr, there is no return value. I think void type values are also a thing in mlir, so maybe we could use that as well? If you prefer that, how would I construct one?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is going to work because we return {} for all of the cases where we have emitted an NYI diagnostic and for the default, where we have done nothing and don't intend to do anything.

Copy link
Contributor Author

@HendrikHuebner HendrikHuebner Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I replaced the return type of the caller with std::optional<mlir::Value> to handle this case. I tested it locally and it still emits the NYI message for missing builtins, and as you can see the tests are passing.

}

// OG has unordered comparison as a form of optimization in addition to
// ordered comparison, while CIR doesn't.
//
Expand Down Expand Up @@ -68,8 +99,8 @@ static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
return bitCast;
}

mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
std::optional<mlir::Value>
CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
return {};
Expand Down Expand Up @@ -120,6 +151,9 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI_mm_sfence:
return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy);
case X86::BI_mm_prefetch:
case X86::BI_m_prefetch:
case X86::BI_m_prefetchw:
return emitPrefetch(*this, builtinID, expr, ops);
case X86::BI__rdtsc:
case X86::BI__builtin_ia32_rdtscp:
case X86::BI__builtin_ia32_lzcnt_u16:
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/CIR/CodeGen/CIRGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -1759,9 +1759,8 @@ class CIRGenFunction : public CIRGenTypeCache {
bool buildingTopLevelCase);
mlir::LogicalResult emitSwitchStmt(const clang::SwitchStmt &s);

mlir::Value emitTargetBuiltinExpr(unsigned builtinID,
const clang::CallExpr *e,
ReturnValueSlot &returnValue);
RValue emitTargetBuiltinExpr(unsigned builtinID, const clang::CallExpr *e,
ReturnValueSlot &returnValue);

/// Given a value and its clang type, returns the value casted to its memory
/// representation.
Expand Down Expand Up @@ -1801,7 +1800,8 @@ class CIRGenFunction : public CIRGenTypeCache {

mlir::LogicalResult emitWhileStmt(const clang::WhileStmt &s);

mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *e);
std::optional<mlir::Value> emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *e);

/// Given an assignment `*lhs = rhs`, emit a test that checks if \p rhs is
/// nonnull, if 1\p LHS is marked _Nonnull.
Expand Down
36 changes: 36 additions & 0 deletions clang/test/CIR/CodeGen/X86/prefetchw-builtin.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG


#include <x86intrin.h>

void test_m_prefetch_w(void *p) {
// CIR-LABEL: test_m_prefetch_w
// LLVM-LABEL: test_m_prefetch_w
// OGCG-LABEL: test_m_prefetch_w
return _m_prefetchw(p);
// CIR: cir.prefetch write locality(3) %{{.*}} : !cir.ptr<!void>
// LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1)
// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1)
}

void test_m_prefetch(void *p) {
// CIR-LABEL: test_m_prefetch
// LLVM-LABEL: test_m_prefetch
// OGCG-LABEL: test_m_prefetch
return _m_prefetch(p);
// CIR: cir.prefetch read locality(3) %{{.*}} : !cir.ptr<!void>
// LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1)
// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1)
}
30 changes: 30 additions & 0 deletions clang/test/CIR/CodeGen/X86/sse-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,33 @@ void test_mm_sfence(void) {
// LLVM: call void @llvm.x86.sse.sfence()
// OGCG: call void @llvm.x86.sse.sfence()
}

void test_mm_prefetch(char const* p) {
// CIR-LABEL: test_mm_prefetch
// LLVM-LABEL: test_mm_prefetch
// OGCG-LABEL: test_mm_prefetch
_mm_prefetch(p, 0);
// CIR: cir.prefetch read locality(0) %{{.*}} : !cir.ptr<!void>
// LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1)
// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1)
}

void test_mm_prefetch_local(char const* p) {
// CIR-LABEL: test_mm_prefetch_local
// LLVM-LABEL: test_mm_prefetch_local
// OGCG-LABEL: test_mm_prefetch_local
_mm_prefetch(p, 3);
// CIR: cir.prefetch read locality(3) %{{.*}} : !cir.ptr<!void>
// LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1)
// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 1)
}

void test_mm_prefetch_write(char const* p) {
// CIR-LABEL: test_mm_prefetch_write
// LLVM-LABEL: test_mm_prefetch_write
// OGCG-LABEL: test_mm_prefetch_write
_mm_prefetch(p, 7);
// CIR: cir.prefetch write locality(3) %{{.*}} : !cir.ptr<!void>
// LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1)
// OGCG: call void @llvm.prefetch.p0(ptr {{.*}}, i32 1, i32 3, i32 1)
}