Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ CODEGENOPT(ImplicitMapSyms, 1, 0, Benign) ///< -Wa,-mmapsyms=implicit
CODEGENOPT(AsmVerbose , 1, 0, Benign) ///< -dA, -fverbose-asm.
CODEGENOPT(PreserveAsmComments, 1, 1, Benign) ///< -dA, -fno-preserve-as-comments.
CODEGENOPT(AssumeSaneOperatorNew , 1, 1, Benign) ///< implicit __attribute__((malloc)) operator new
CODEGENOPT(AssumeArrayBounds , 1, 0, Benign) ///< Generate llvm.assume for array bounds.
CODEGENOPT(AssumeUniqueVTables , 1, 1, Benign) ///< Assume a class has only one vtable.
CODEGENOPT(Autolink , 1, 1, Benign) ///< -fno-autolink
CODEGENOPT(AutoImport , 1, 1, Benign) ///< -fno-auto-import
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1603,6 +1603,11 @@ defm assume_unique_vtables : BoolFOption<"assume-unique-vtables",
BothFlags<[], [ClangOption, CLOption]>>;

def fassume_sane_operator_new : Flag<["-"], "fassume-sane-operator-new">, Group<f_Group>;
defm assume_array_bounds : BoolFOption<"assume-array-bounds",
CodeGenOpts<"AssumeArrayBounds">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CC1Option],
"Generate llvm.assume for array bounds to enable optimizations (may break code with intentional out-of-bounds access)">,
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>;
def fastcp : Flag<["-"], "fastcp">, Group<f_Group>;
def fastf : Flag<["-"], "fastf">, Group<f_Group>;
def fast : Flag<["-"], "fast">, Group<f_Group>;
Expand Down
59 changes: 48 additions & 11 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4589,8 +4589,25 @@ void CodeGenFunction::EmitCountedByBoundsChecking(
/// array subscripts are within bounds, enabling better optimization without
/// duplicating side effects from the subscript expression. The IndexVal
/// parameter should be the already-emitted index value to avoid re-evaluation.
///
/// Code that intentionally accesses out-of-bounds (UB) may break with
/// optimizations. Only applies to constant-size arrays (not pointers, VLAs, or
/// flexible arrays.) Disabled when -fsanitize=array-bounds is active.
///
void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
llvm::Value *IndexVal) {
// Disable with -fno-assume-array-bounds.
if (!CGM.getCodeGenOpts().AssumeArrayBounds)
return;

// Disable at -O0.
if (CGM.getCodeGenOpts().OptimizationLevel == 0)
return;

// Disable with array-bounds sanitizer.
if (SanOpts.has(SanitizerKind::ArrayBounds))
return;

const Expr *Base = E->getBase();
const Expr *Idx = E->getIdx();
QualType BaseType = Base->getType();
Expand All @@ -4610,6 +4627,26 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
if (ArraySize == 0)
return;

// Don't generate assumes for flexible array member pattern.
// Arrays of size 1 in structs are often used as placeholders for
// variable-length data (pre-C99 flexible array member idiom.)
if (ArraySize == 1) {
if (const auto *ME = dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) {
if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
const RecordDecl *RD = FD->getParent();
// Check if this field is the last field in the record.
// Only the last field can be a flexible array member.
const FieldDecl *LastField = nullptr;
for (const auto *Field : RD->fields())
LastField = Field;
if (LastField == FD)
// This is a size-1 array as the last field in a struct.
// Likely a flexible array member pattern - skip assumes.
return;
}
}
}

QualType IdxType = Idx->getType();
llvm::Type *IndexType = ConvertType(IdxType);
llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0);
Expand All @@ -4633,21 +4670,21 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
// This enforces the C18 standard requirement that array subscripts
// must be "greater than or equal to zero and less than the size of the
// array."
llvm::Value *LowerBound, *UpperBound;
if (IdxType->isSignedIntegerOrEnumerationType()) {
// For signed indices: index >= 0 && index < size.
LowerBound = Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
UpperBound = Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
llvm::Value *LowerBound =
Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
llvm::Value *UpperBound =
Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
llvm::Value *BoundsConstraint =
Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
Builder.CreateAssumption(BoundsConstraint);
} else {
// For unsigned indices: index < size (>= 0 is implicit).
LowerBound = Builder.getTrue();
UpperBound = Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
// For unsigned indices: index < size (>= 0 is implicit.)
llvm::Value *UpperBound =
Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
Builder.CreateAssumption(UpperBound);
}

llvm::Value *BoundsConstraint =
Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
llvm::Function *AssumeIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::assume);
Builder.CreateCall(AssumeIntrinsic, BoundsConstraint);
}

LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
Expand Down
89 changes: 89 additions & 0 deletions clang/test/CodeGen/array-bounds-constraints-safety.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// RUN: %clang_cc1 -emit-llvm -O2 -fassume-array-bounds %s -o - | FileCheck %s
// Test that array bounds constraints are NOT applied to cases that might
// break real-world code with intentional out-of-bounds access patterns.

// C18 standard allows one-past-the-end pointers, and some legacy code
// intentionally accesses out-of-bounds for performance or compatibility.
// This test verifies that bounds constraints are only applied to safe cases.

// CHECK-LABEL: define {{.*}} @test_flexible_array_member
struct Data {
int count;
int items[1]; // Flexible array member pattern (pre-C99 style)
};

int test_flexible_array_member(struct Data *d, int i) {
// CHECK-NOT: call void @llvm.assume
// Flexible array member pattern (size 1 array as last field) should NOT
// generate bounds constraints because items[1] is just a placeholder
// for a larger array allocated with `malloc (sizeof (struct Data) + 42)`.
return d->items[i];
}

// CHECK-LABEL: define {{.*}} @test_not_flexible_array
struct NotFlexible {
int items[1]; // Size 1 array but NOT the last field.
int count; // Something comes after it.
};

int test_not_flexible_array(struct NotFlexible *s, int i) {
// CHECK: call void @llvm.assume
// This is NOT a flexible array pattern (not the last field),
// so we're fine generating `assume(i < 1)`.
return s->items[i];
}

// CHECK-LABEL: define {{.*}} @test_pointer_parameter
int test_pointer_parameter(int *arr, int i) {
// CHECK-NOT: call void @llvm.assume
// Pointer parameters should NOT generate bounds constraints
// because we don't know the actual array size.
return arr[i];
}

// CHECK-LABEL: define {{.*}} @test_vla
int test_vla(int n, int i) {
int arr[n]; // Variable-length array.
// CHECK-NOT: call void @llvm.assume
// VLAs should NOT generate bounds constraints
// because the size is dynamic.
return arr[i];
}

// CHECK-LABEL: define {{.*}} @test_one_past_end
extern int extern_array[100];
int *test_one_past_end(void) {
// CHECK-NOT: call void @llvm.assume
// Taking address of one-past-the-end is allowed by C standard.
// We should NOT assume anything about this access.
return &extern_array[100]; // Legal: one past the end.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried

extern int extern_array[100];
int *test_extern_array_val(int i) {
  return &extern_array[i];
}

with this PR an it generates

  %bounds.constraint = icmp ult i32 %i, 100
  tail call void @llvm.assume(i1 %bounds.constraint)

if &extern_array[100] is legal, so must test_extern_array_val(100).

Did you consider C++ references?

int &test_extern_array_val(int i) {
  return extern_array[i];
}

I think a reference must always point to valid memory, so here one can apply the stricter i < 100.

}

// CHECK-LABEL: define {{.*}} @test_extern_array
int test_extern_array(int i) {
// CHECK: call void @llvm.assume
// This will generate bounds constraints.
// The array is a constant-size global array.
// This is the safe case where we want optimization hints.
return extern_array[i];
}

// CHECK-LABEL: define {{.*}} @test_local_constant_array
int test_local_constant_array(int i) {
int arr[10];
// CHECK: call void @llvm.assume
// This will generate bounds constraints.
// We know the exact size of this alloca array.
// This is the safe case where we want optimization hints.
return arr[i];
}

// CHECK-LABEL: define {{.*}} @test_malloc_array
int *my_malloc(int);
int test_malloc_array(int i) {
// CHECK-NOT: call void @llvm.assume
// Dynamically allocated arrays accessed via pointers do not get bounds
// constraints.
int *x = my_malloc(100 * sizeof(int));
return x[i];
}
9 changes: 6 additions & 3 deletions clang/test/CodeGen/array-bounds-constraints.c
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
// Test that array bounds constraints generate llvm.assume statements for optimization hints.
// RUN: %clang_cc1 -emit-llvm -O2 %s -o - | FileCheck %s

// This test verifies that clang generates llvm.assume statements to inform the
// optimizer that array subscripts are within bounds to enable better optimization.
// RUN: %clang_cc1 -emit-llvm -O2 -fassume-array-bounds %s -o - | FileCheck %s

// Verify no assumes are generated.
// RUN: %clang_cc1 -emit-llvm -O2 -fno-assume-array-bounds %s -o - | FileCheck %s -check-prefix=NO-FLAG

// CHECK-LABEL: define {{.*}} @test_simple_array
// NO-FLAG-LABEL: define {{.*}} @test_simple_array
int test_simple_array(int i) {
int arr[10]; // C arrays are 0-based: valid indices are [0, 9]
// CHECK: %{{.*}} = icmp ult i32 %i, 10
// CHECK: call void @llvm.assume(i1 %{{.*}})
// NO-FLAG-NOT: call void @llvm.assume
return arr[i];
}

Expand Down
Loading