Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -400,22 +400,29 @@ def LoadOp : CIR_Op<"load", [
let summary = "Load value from memory adddress";
let description = [{
`cir.load` reads a value (lvalue to rvalue conversion) given an address
backed up by a `cir.ptr` type.
backed up by a `cir.ptr` type. A unit attribute `deref` can be used to
mark the resulting value as used by another operation to dereference
a pointer.

Example:

```mlir

// Read from local variable, address in %0.
%1 = cir.load %0 : !cir.ptr<i32>, i32

// Load address from memory at address %0. %3 is used by at least one
// operation that dereferences a pointer.
%3 = cir.load deref %0 : !cir.ptr<!cir.ptr<i32>>
```
}];

let arguments = (ins Arg<CIR_PointerType, "the address to load from",
[MemRead]>:$addr);
[MemRead]>:$addr, UnitAttr:$isDeref);
let results = (outs CIR_AnyType:$result);

let assemblyFormat = [{
(`deref` $isDeref^)?
$addr `:` qualified(type($addr)) `,` type($result) attr-dict
}];

Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/CIR/MissingFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ struct MissingFeatures {
static bool cgFPOptionsRAII() { return false; }
static bool metaDataNode() { return false; }
static bool fastMathFlags() { return false; }
static bool lvalueBaseInfo() { return false; }
static bool alignCXXRecordDecl() { return false; }
static bool setNonGC() { return false; }

// Missing types
static bool dataMemberType() { return false; }
Expand Down
162 changes: 160 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,147 @@ using namespace clang;
using namespace clang::CIRGen;
using namespace cir;

/// Given an expression of pointer type, try to
/// derive a more accurate bound on the alignment of the pointer.
Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr) {
// We allow this with ObjC object pointers because of fragile ABIs.
assert(expr->getType()->isPointerType() ||
expr->getType()->isObjCObjectPointerType());
expr = expr->IgnoreParens();

// Casts:
if (auto const *ce = dyn_cast<CastExpr>(expr)) {
if (auto const *ece = dyn_cast<ExplicitCastExpr>(ce)) {
cgm.errorNYI(expr->getSourceRange(),
"emitPointerWithAlignment: explicit cast");
return Address::invalid();
}

switch (ce->getCastKind()) {
// Non-converting casts (but not C's implicit conversion from void*).
case CK_BitCast:
case CK_NoOp:
case CK_AddressSpaceConversion: {
cgm.errorNYI(expr->getSourceRange(),
"emitPointerWithAlignment: noop cast");
return Address::invalid();
} break;

// Array-to-pointer decay. TODO(cir): BaseInfo and TBAAInfo.
case CK_ArrayToPointerDecay: {
cgm.errorNYI(expr->getSourceRange(),
"emitPointerWithAlignment: array-to-pointer decay");
return Address::invalid();
}

case CK_UncheckedDerivedToBase:
case CK_DerivedToBase: {
cgm.errorNYI(expr->getSourceRange(),
"emitPointerWithAlignment: derived-to-base cast");
return Address::invalid();
}

case CK_AnyPointerToBlockPointerCast:
case CK_BaseToDerived:
case CK_BaseToDerivedMemberPointer:
case CK_BlockPointerToObjCPointerCast:
case CK_BuiltinFnToFnPtr:
case CK_CPointerToObjCPointerCast:
case CK_DerivedToBaseMemberPointer:
case CK_Dynamic:
case CK_FunctionToPointerDecay:
case CK_IntegralToPointer:
case CK_LValueToRValue:
case CK_LValueToRValueBitCast:
case CK_NullToMemberPointer:
case CK_NullToPointer:
case CK_ReinterpretMemberPointer:
// Common pointer conversions, nothing to do here.
// TODO: Is there any reason to treat base-to-derived conversions
// specially?
break;

case CK_ARCConsumeObject:
case CK_ARCExtendBlockObject:
case CK_ARCProduceObject:
case CK_ARCReclaimReturnedObject:
case CK_AtomicToNonAtomic:
case CK_BooleanToSignedIntegral:
case CK_ConstructorConversion:
case CK_CopyAndAutoreleaseBlockObject:
case CK_Dependent:
case CK_FixedPointCast:
case CK_FixedPointToBoolean:
case CK_FixedPointToFloating:
case CK_FixedPointToIntegral:
case CK_FloatingCast:
case CK_FloatingComplexCast:
case CK_FloatingComplexToBoolean:
case CK_FloatingComplexToIntegralComplex:
case CK_FloatingComplexToReal:
case CK_FloatingRealToComplex:
case CK_FloatingToBoolean:
case CK_FloatingToFixedPoint:
case CK_FloatingToIntegral:
case CK_HLSLAggregateSplatCast:
case CK_HLSLArrayRValue:
case CK_HLSLElementwiseCast:
case CK_HLSLVectorTruncation:
case CK_IntToOCLSampler:
case CK_IntegralCast:
case CK_IntegralComplexCast:
case CK_IntegralComplexToBoolean:
case CK_IntegralComplexToFloatingComplex:
case CK_IntegralComplexToReal:
case CK_IntegralRealToComplex:
case CK_IntegralToBoolean:
case CK_IntegralToFixedPoint:
case CK_IntegralToFloating:
case CK_LValueBitCast:
case CK_MatrixCast:
case CK_MemberPointerToBoolean:
case CK_NonAtomicToAtomic:
case CK_ObjCObjectLValueCast:
case CK_PointerToBoolean:
case CK_PointerToIntegral:
case CK_ToUnion:
case CK_ToVoid:
case CK_UserDefinedConversion:
case CK_VectorSplat:
case CK_ZeroToOCLOpaqueType:
llvm_unreachable("unexpected cast for emitPointerWithAlignment");
}
}

// Unary &
if (const UnaryOperator *uo = dyn_cast<UnaryOperator>(expr)) {
// TODO(cir): maybe we should use cir.unary for pointers here instead.
if (uo->getOpcode() == UO_AddrOf) {
cgm.errorNYI(expr->getSourceRange(), "emitPointerWithAlignment: unary &");
return Address::invalid();
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the rest of the cases NYI or can we assert that Opcode is AddrOf?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neither. The rest of the opcodes don't require any special handling here. Neither the incubator nor the classic codegen do anything other than the special handling for AddrOf. Any others get handled by emitScalarExpr below.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah? so we still expect the others to get here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

}

// std::addressof and variants.
if (auto const *call = dyn_cast<CallExpr>(expr)) {
switch (call->getBuiltinCallee()) {
default:
break;
case Builtin::BIaddressof:
case Builtin::BI__addressof:
case Builtin::BI__builtin_addressof: {
cgm.errorNYI(expr->getSourceRange(),
"emitPointerWithAlignment: builtin addressof");
return Address::invalid();
}
}
}

// Otherwise, use the alignment of the type.
return makeNaturalAddressForPointer(
emitScalarExpr(expr), expr->getType()->getPointeeType(), CharUnits());
}

void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst,
bool isInit) {
if (!dst.isSimple()) {
Expand Down Expand Up @@ -193,8 +334,25 @@ LValue CIRGenFunction::emitUnaryOpLValue(const UnaryOperator *e) {

switch (op) {
case UO_Deref: {
cgm.errorNYI(e->getSourceRange(), "UnaryOp dereference");
return LValue();
QualType t = e->getSubExpr()->getType()->getPointeeType();
assert(!t.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type");

assert(!cir::MissingFeatures::lvalueBaseInfo());
assert(!cir::MissingFeatures::opTBAA());
Address addr = emitPointerWithAlignment(e->getSubExpr());

// Tag 'load' with deref attribute.
// FIXME: This misses some derefence cases and has problematic interactions
// with other operators.
if (auto loadOp =
dyn_cast<cir::LoadOp>(addr.getPointer().getDefiningOp())) {
loadOp.setIsDerefAttr(mlir::UnitAttr::get(&getMLIRContext()));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm following correctly, the "deref" attribute is supposed to indicate the pointer is dereferenced... but the way it's implemented, this is a static property of the code structure, not a dynamic property of the execution. There might be some codepath where it doesn't actually get dereferenced... and in fact, the dereference might not be reachable at all.

Given that, what can you actually do with "deref" information?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good question. You're correct that the intent is to describe a static property of the source code. Perhaps the dialect documentation needs some clarification on this point. At the time that the CIR is generated in the front end, I think there is likely to be a direct correspondence here. The pointer loaded is being dereferenced. We just emitted it in the call to emitPointerWithAlignment() on line 342. Of course, after some CIR-to-CIR transformation, this might not be the case.

In any event, I believe this is intended to be used for static analysis. For example, it distinguishes loads of alloca values to reference local variables, from loads of user pointers. For example, consider this code:

1  void foo(int *p1) {
2    int *p2;
3    *p1 = 2;
4    p2 = p1;
5  }

There are cir.load operations generated for lines 3 and 4. Both are immediately used in cir.store operations, but only the one for line 3 gets marked with the deref attribute.

https://godbolt.org/z/WP69hqvxj

I'm not sure exactly what the static analysis does with this information. Perhaps @bcardosolopes could provide more information.

BTW, I see that we miss applying the deref attribute if there is a cast on the pointer being dereferenced. I'll open an issue for that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure exactly what the static analysis does with this information.

We currently use this in the lifetime checker to decide whether the load is worth looking at:
https://github.com/llvm/clangir/blob/9f2da98faf36a3c88e61ad307d1261caa0c9d768/clang/lib/CIR/Dialect/Transforms/LifetimeCheck.cpp#L1314

Instead of directly tagging the load during CIRGen it's reasonable to argue that such information could instead be provided by the result on an analysis (versus encoded directly into the operation). The design choice at the moment this was created was to not spend extra compile time given the information is conveniently available. My take is to leave it as is until we have other passes that might take advantage of that information, where we could revisit it and work on the guarantees. FWIW, this is the only operation currently in CIR encoding this type of semantic.

BTW, I see that we miss applying the deref attribute if there is a cast on the pointer being dereferenced. I'll open an issue for that.

Awesome!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@efriedma-quic Are you satisfied with the explanations above?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it really accomplishes what you've set out to do in this context... it interacts weirdly with other operators. It sets &*x as deref, and it doesn't set *&x->y as deref.

That said, this isn't going to have a critical impact on anything; I'm okay with revisiting as needed. Maybe leave a FIXME in the code so anyone reading it later can refer to this discussion.

}

LValue lv = LValue::makeAddr(addr, t);
assert(!cir::MissingFeatures::addressSpace());
assert(!cir::MissingFeatures::setNonGC());
return lv;
}
case UO_Real:
case UO_Imag: {
Expand Down
27 changes: 25 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
return VisitCastExpr(e);
}

mlir::Value VisitCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *e) {
return cgf.cgm.emitNullConstant(e->getType(),
cgf.getLoc(e->getSourceRange()));
}

/// Perform a pointer to boolean conversion.
mlir::Value emitPointerToBoolConversion(mlir::Value v, QualType qt) {
// TODO(cir): comparing the ptr to null is done when lowering CIR to LLVM.
Expand Down Expand Up @@ -444,6 +449,22 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
llvm_unreachable("Unexpected signed overflow behavior kind");
}

mlir::Value VisitUnaryAddrOf(const UnaryOperator *e) {
if (llvm::isa<MemberPointerType>(e->getType())) {
cgf.cgm.errorNYI(e->getSourceRange(), "Address of member pointer");
return builder.getNullPtr(cgf.convertType(e->getType()),
cgf.getLoc(e->getExprLoc()));
}

return cgf.emitLValue(e->getSubExpr()).getPointer();
}

mlir::Value VisitUnaryDeref(const UnaryOperator *e) {
if (e->getType()->isVoidType())
return Visit(e->getSubExpr()); // the actual value should be unused
return emitLoadOfLValue(e);
}

mlir::Value VisitUnaryPlus(const UnaryOperator *e) {
return emitUnaryPlusOrMinus(e, cir::UnaryOpKind::Plus);
}
Expand Down Expand Up @@ -856,9 +877,11 @@ mlir::Value CIRGenFunction::emitPromotedScalarExpr(const Expr *e,
}

[[maybe_unused]] static bool mustVisitNullValue(const Expr *e) {
// If a null pointer expression's type is the C++0x nullptr_t, then
// it's not necessarily a simple constant and it must be evaluated
// If a null pointer expression's type is the C++0x nullptr_t and
// the expression is not a simple literal, it must be evaluated
// for its potential side effects.
if (isa<IntegerLiteral>(e) || isa<CXXNullPtrLiteralExpr>(e))
return false;
return e->getType()->isNullPtrType();
}

Expand Down
23 changes: 23 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,17 @@ class CIRGenFunction : public CIRGenTypeCache {
// TODO: Add symbol table support
}

/// Construct an address with the natural alignment of T. If a pointer to T
/// is expected to be signed, the pointer passed to this function must have
/// been signed, and the returned Address will have the pointer authentication
/// information needed to authenticate the signed pointer.
Address makeNaturalAddressForPointer(mlir::Value ptr, QualType t,
CharUnits alignment) {
if (alignment.isZero())
alignment = cgm.getNaturalTypeAlignment(t);
return Address(ptr, convertTypeForMem(t), alignment);
}

cir::FuncOp generateCode(clang::GlobalDecl gd, cir::FuncOp fn,
cir::FuncType funcType);

Expand Down Expand Up @@ -466,6 +477,18 @@ class CIRGenFunction : public CIRGenTypeCache {
/// FIXME: document this function better.
LValue emitLValue(const clang::Expr *e);

/// Given an expression with a pointer type, emit the value and compute our
/// best estimate of the alignment of the pointee.
///
/// One reasonable way to use this information is when there's a language
/// guarantee that the pointer must be aligned to some stricter value, and
/// we're simply trying to ensure that sufficiently obvious uses of under-
/// aligned objects don't get miscompiled; for example, a placement new
/// into the address of a local variable. In such a case, it's quite
/// reasonable to just ignore the returned alignment when it isn't from an
/// explicit source.
Address emitPointerWithAlignment(const clang::Expr *expr);

mlir::LogicalResult emitReturnStmt(const clang::ReturnStmt &s);

/// Emit a conversion from the specified type to the specified destination
Expand Down
51 changes: 51 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,57 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext,
builder.getStringAttr(getTriple().str()));
}

CharUnits CIRGenModule::getNaturalTypeAlignment(QualType t) {
assert(!cir::MissingFeatures::opTBAA());

// FIXME: This duplicates logic in ASTContext::getTypeAlignIfKnown. But
// that doesn't return the information we need to compute BaseInfo.

// Honor alignment typedef attributes even on incomplete types.
// We also honor them straight for C++ class types, even as pointees;
// there's an expressivity gap here.
if (const auto *tt = t->getAs<TypedefType>()) {
if (unsigned align = tt->getDecl()->getMaxAlignment()) {
assert(!cir::MissingFeatures::lvalueBaseInfo());
return astContext.toCharUnitsFromBits(align);
}
}

// Analyze the base element type, so we don't get confused by incomplete
// array types.
t = astContext.getBaseElementType(t);

if (t->isIncompleteType()) {
// We could try to replicate the logic from
// ASTContext::getTypeAlignIfKnown, but nothing uses the alignment if the
// type is incomplete, so it's impossible to test. We could try to reuse
// getTypeAlignIfKnown, but that doesn't return the information we need
// to set BaseInfo. So just ignore the possibility that the alignment is
// greater than one.
assert(!cir::MissingFeatures::lvalueBaseInfo());
return CharUnits::One();
}

assert(!cir::MissingFeatures::lvalueBaseInfo());

CharUnits alignment;
if (t.getQualifiers().hasUnaligned()) {
alignment = CharUnits::One();
} else {
assert(!cir::MissingFeatures::alignCXXRecordDecl());
alignment = astContext.getTypeAlignInChars(t);
}

// Cap to the global maximum type alignment unless the alignment
// was somehow explicit on the type.
if (unsigned maxAlign = astContext.getLangOpts().MaxTypeAlign) {
if (alignment.getQuantity() > maxAlign &&
!astContext.isAlignmentRequired(t))
alignment = CharUnits::fromQuantity(maxAlign);
}
return alignment;
}

mlir::Location CIRGenModule::getLoc(SourceLocation cLoc) {
assert(cLoc.isValid() && "expected valid source location");
const SourceManager &sm = astContext.getSourceManager();
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ class CIRGenModule : public CIRGenTypeCache {
mlir::Location getLoc(clang::SourceLocation cLoc);
mlir::Location getLoc(clang::SourceRange cRange);

/// FIXME: this could likely be a common helper and not necessarily related
/// with codegen.
clang::CharUnits getNaturalTypeAlignment(clang::QualType t);

void emitTopLevelDecl(clang::Decl *decl);

bool verifyModule() const;
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,14 @@ mlir::Type CIRGenTypes::convertType(QualType type) {
resultType = cgm.SInt32Ty;
break;

case BuiltinType::NullPtr:
// Add proper CIR type for it? this looks mostly useful for sema related
// things (like for overloads accepting void), for now, given that
// `sizeof(std::nullptr_t)` is equal to `sizeof(void *)`, model
// std::nullptr_t as !cir.ptr<!void>
resultType = builder.getVoidPtrTy();
break;

default:
cgm.errorNYI(SourceLocation(), "processing of built-in type", type);
resultType = cgm.SInt32Ty;
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CIR/CodeGen/CIRGenTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class CIRGenTypes {

/// Return whether a type can be zero-initialized (in the C++ sense) with an
/// LLVM zeroinitializer.
bool isZeroInitializable(clang::QualType t);
bool isZeroInitializable(clang::QualType ty);
};

} // namespace clang::CIRGen
Expand Down
Loading