Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,18 @@ def CIR_ConstantOp : CIR_Op<"const", [

template <typename T>
T getValueAttr() { return mlir::dyn_cast<T>(getValue()); }

llvm::APInt getIntValue() {
if (const auto intAttr = getValueAttr<cir::IntAttr>())
return intAttr.getValue();
llvm_unreachable("Expected an IntAttr in ConstantOp");
}

bool getBoolValue() {
if (const auto boolAttr = getValueAttr<cir::BoolAttr>())
return boolAttr.getValue();
llvm_unreachable("Expected a BoolAttr in ConstantOp");
}
}];

let hasFolder = 1;
Expand Down Expand Up @@ -2579,6 +2591,39 @@ def CIR_FuncOp : CIR_Op<"func", [
}];
}

//===----------------------------------------------------------------------===//
// LLVMIntrinsicCallOp
//===----------------------------------------------------------------------===//

def CIR_LLVMIntrinsicCallOp : CIR_Op<"llvm.intrinsic"> {
let summary = "Call to llvm intrinsic functions that is not defined in CIR";
let description = [{
`cir.llvm.intrinsic` operation represents a call-like expression which has
return type and arguments that maps directly to a llvm intrinsic.
It only records intrinsic `intrinsic_name`.
}];

let results = (outs Optional<CIR_AnyType>:$result);
let arguments = (ins
StrAttr:$intrinsic_name, Variadic<CIR_AnyType>:$arg_ops);

let skipDefaultBuilders = 1;

let assemblyFormat = [{
$intrinsic_name $arg_ops `:` functional-type($arg_ops, $result) attr-dict
}];

let builders = [
OpBuilder<(ins "mlir::StringAttr":$intrinsic_name, "mlir::Type":$resType,
CArg<"mlir::ValueRange", "{}">:$operands), [{
$_state.addAttribute("intrinsic_name", intrinsic_name);
$_state.addOperands(operands);
if (resType)
$_state.addTypes(resType);
}]>,
];
}

//===----------------------------------------------------------------------===//
// CallOp
//===----------------------------------------------------------------------===//
Expand Down
59 changes: 59 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,48 @@
using namespace clang;
using namespace clang::CIRGen;

/// Get integer from a mlir::Value that is an int constant or a constant op.
static int64_t getIntValueFromConstOp(mlir::Value val) {
return val.getDefiningOp<cir::ConstantOp>().getIntValue().getSExtValue();
}

static mlir::Value emitClFlush(CIRGenFunction& cgf,
const CallExpr* e,
mlir::Value& op) {
mlir::Type voidTy = cir::VoidType::get(&cgf.getMLIRContext());
mlir::Location location = cgf.getLoc(e->getExprLoc());
return cir::LLVMIntrinsicCallOp::create(
cgf.getBuilder(), location,
cgf.getBuilder().getStringAttr("x86.sse2.clflush"), voidTy, op)
.getResult();
}

static mlir::Value emitPrefetch(CIRGenFunction& cgf,
const CallExpr* e,
mlir::Value& addr,
int64_t hint) {
CIRGenBuilderTy& builder = cgf.getBuilder();
mlir::Type voidTy = cir::VoidType::get(&cgf.getMLIRContext());
mlir::Type sInt32Ty = cir::IntType::get(&cgf.getMLIRContext(), 32, true);
mlir::Value address = builder.createPtrBitcast(addr, voidTy);
mlir::Location location = cgf.getLoc(e->getExprLoc());
mlir::Value rw =
cir::ConstantOp::create(builder, location,
cir::IntAttr::get(sInt32Ty, (hint >> 2) & 0x1));
mlir::Value locality =
cir::ConstantOp::create(builder, location,
cir::IntAttr::get(sInt32Ty, hint & 0x3));
mlir::Value data = cir::ConstantOp::create(builder, location,
cir::IntAttr::get(sInt32Ty, 1));

return cir::LLVMIntrinsicCallOp::create(
builder, location,
builder.getStringAttr("prefetch"), voidTy,
mlir::ValueRange{address, rw, locality, data})
.getResult();
}


mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *e) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
Expand All @@ -43,11 +85,28 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
// Find out if any arguments are required to be integer constant expressions.
assert(!cir::MissingFeatures::handleBuiltinICEArguments());

// The operands of the builtin call
llvm::SmallVector<mlir::Value, 4> ops;

// `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
// is required to be a constant integer expression.
unsigned ICEArguments = 0;
ASTContext::GetBuiltinTypeError error;
getContext().GetBuiltinType(builtinID, error, &ICEArguments);
assert(error == ASTContext::GE_None && "Error while getting builtin type.");

const unsigned numArgs = e->getNumArgs();
for (unsigned i = 0; i != numArgs; i++) {
ops.push_back(emitScalarOrConstFoldImmArg(ICEArguments, i, e));
}

switch (builtinID) {
default:
return {};
case X86::BI_mm_prefetch:
return emitPrefetch(*this, e, ops[0], getIntValueFromConstOp(ops[1]));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure you're actually hitting this case. The test you added looks like it would get here, but the check in the test is checking for cir.prefetch being generated rather than cir.llvm.intrinsic as the code in this PR would generate.

I just checked and the prefetch test passes without this PR being applied. This is because in xmmintrin.h the definition of _mm_prefetch looks like this:

extern __inline void
    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    _mm_prefetch(const void *__P, enum _mm_hint __I) {
  /* Current PowerPC will ignores the hint parameters.  */
  __builtin_prefetch(__P);
}

It's calling the general prefetch builtin rather than the X86-specific builtin. That's actually what we want. I'm not sure this handler is needed at all.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, thanks for the review. Should we still keep the handler? Clangs codegen also provides a handler for X86::BI_mm_prefetch and I think they test it the same way here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see what's going on now. The version I showed above was from clang/lib/Headers/ppc_wrappers/xmmintrin.h and so isn't the definition we normally find. In the usual version (clang/lib/Headers/xmmintrin.h) I find this:

#ifndef _MSC_VER
// If _MSC_VER is defined, we use the builtin variant of _mm_prefetch.
// Otherwise, we provide this macro, which includes a cast, allowing the user
// to pass a pointer of any time. The _mm_prefetch accepts char to match MSVC.

/// Loads one cache line of data from the specified address to a location
///    closer to the processor.
///
/// \headerfile <x86intrin.h>
///
/// \code
/// void _mm_prefetch(const void *a, const int sel);
/// \endcode
///
/// This intrinsic corresponds to the <c> PREFETCHNTA </c> instruction.
///
/// \param a
///    A pointer to a memory location containing a cache line of data.
/// \param sel
///    A predefined integer constant specifying the type of prefetch
///    operation: \n
///    _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint. The
///    PREFETCHNTA instruction will be generated. \n
///    _MM_HINT_T0: Move data using the T0 hint. The PREFETCHT0 instruction will
///    be generated. \n
///    _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will
///    be generated. \n
///    _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
///    be generated.
#define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \
                                                 ((sel) >> 2) & 1, (sel) & 0x3))
#endif

So, we hit the BI_mm_prefetch built-in if-and-only-if _MSC_VER is defined (otherwise, the macro above maps it to the general prefetch builtin).

Note that in the test you linked, several of the RUN lines contain -triple=x86_64-windows-msvc and -fms-compatibility, which will cause us to fall back on the BI_mm_prefetch builtin rather than mapping directly to builtin_prefetch.

I also noticed that there are _m_prefectch and _m_prefetchw builtins that were added here and are missing from the incubator.

We're missing a lot of support for Windows, but mostly ABI-related things so the builtin handling may work. However, we should be generating the cir.prefetch operation rather than a call to the LLVM intrinsic. I'd suggested moving this into a separate PR.

case X86::BI_mm_clflush:
return emitClFlush(*this, e, ops[0]);
case X86::BI_mm_lfence:
case X86::BI_mm_pause:
case X86::BI_mm_mfence:
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1430,6 +1430,28 @@ mlir::Value CIRGenFunction::emitPromotedScalarExpr(const Expr *e,
return ScalarExprEmitter(*this, builder).Visit(const_cast<Expr *>(e));
}

mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg(unsigned ICEArguments,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also being added in #167125

See my comments there about using a range-for at the call sites and passing the argument expression directly to this function.

I'd prefer to have the other PR merged first, then rebase this one to use the definition of this function that will be added there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, you mentioned under another PR that you would like to merge this PR first after all. I addressed all the feedback and removed the prefetch builtin from this patch. Could you give it another look? Thanks.

unsigned index,
const CallExpr *e) {
mlir::Value arg{};

// The bit at the specified index indicates whether the argument is required
// to be a constant integer expression.
bool isArgRequiredToBeConstant = (ICEArguments & (1 << index));

if (!isArgRequiredToBeConstant) {
arg = emitScalarExpr(e->getArg(index));
} else {
// If this is required to be a constant, constant fold it so that we
// know that the generated intrinsic gets a ConstantInt.
std::optional<llvm::APSInt> result =
e->getArg(index)->getIntegerConstantExpr(getContext());
assert(result && "Expected argument to be a constant");
arg = builder.getConstInt(getLoc(e->getSourceRange()), *result);
}
return arg;
}

[[maybe_unused]] static bool mustVisitNullValue(const Expr *e) {
// If a null pointer expression's type is the C++0x nullptr_t and
// the expression is not a simple literal, it must be evaluated
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -1529,6 +1529,10 @@ class CIRGenFunction : public CIRGenTypeCache {
mlir::Value emitScalarExpr(const clang::Expr *e,
bool ignoreResultAssign = false);

mlir::Value emitScalarOrConstFoldImmArg(unsigned ICEArguments,
unsigned index,
const CallExpr *e);

mlir::Value emitScalarPrePostIncDec(const UnaryOperator *e, LValue lv,
cir::UnaryOpKind kind, bool isPre);

Expand Down
14 changes: 14 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,20 @@ static mlir::LLVM::CallIntrinsicOp replaceOpWithCallLLVMIntrinsicOp(
return callIntrinOp;
}

mlir::LogicalResult CIRToLLVMLLVMIntrinsicCallOpLowering::matchAndRewrite(
cir::LLVMIntrinsicCallOp op,
OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
mlir::Type llvmResTy =
getTypeConverter()->convertType(op->getResultTypes()[0]);
if (!llvmResTy)
return op.emitError("expected LLVM result type");
StringRef name = op.getIntrinsicName();
replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm." + name, llvmResTy,
adaptor.getOperands());
return mlir::success();
}

/// IntAttr visitor.
mlir::Value CIRAttrToValue::visitCirAttr(cir::IntAttr intAttr) {
mlir::Location loc = parentOp->getLoc();
Expand Down
23 changes: 23 additions & 0 deletions clang/test/CIR/CodeGen/X86/sse-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also add the OGCG checks. It's useful to manually verify that we're generating equivalent code.

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG

// This test mimics clang/test/CodeGen/X86/sse-builtins.c, which eventually
// CIR shall be able to support fully.

#include <immintrin.h>


void test_mm_prefetch(char const* p) {
// CIR-LABEL: test_mm_prefetch
// LLVM-LABEL: test_mm_prefetch
_mm_prefetch(p, 0);
// CIR: cir.prefetch read locality(0) %{{.*}} : !cir.ptr<!void>
// LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1)
}
23 changes: 23 additions & 0 deletions clang/test/CIR/CodeGen/X86/sse2-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s

Can you replace all instances of CIR-CHECK with CIR and LLVM-CHECK with LLVM? I'm not sure what the person who implemented that in the incubator was trying to accomplish, but it's not necessary. As currently written, this test will ignore the CIR-LABEL and LLVM-LABEL checks. Those are label checks for the CIR and LLVM prefixes.

Also, please add OGCG checks here too.

Copy link
Contributor Author

@HendrikHuebner HendrikHuebner Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. What is the point of adding CIR-LABEL instead of just CIR? I've seen CIR-NEXT and CIR-SAME as well, are there docs about what these do?

Edit: Ahh I found this: https://llvm.org/docs/CommandGuide/FileCheck.html

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andykaylor Hi, you mentioned you would like to see this PR merged before the other builtin upstreaming PRs, do you think we can land it now?

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s

// This test mimics clang/test/CodeGen/X86/sse2-builtins.c, which eventually
// CIR shall be able to support fully.

#include <immintrin.h>


void test_mm_clflush(void* A) {
// CIR-LABEL: test_mm_clflush
// LLVM-LABEL: teh
_mm_clflush(A);
// CIR-CHECK: {{%.*}} = cir.llvm.intrinsic "x86.sse2.clflush" {{%.*}} : (!cir.ptr<!void>) -> !void
// LLVM-CHECK: call void @llvm.x86.sse2.clflush(ptr {{%.*}})
}
Loading