Skip to content

Commit 702d9dd

Browse files
committed
add -fassume-array-bounds flag disabled by default for now
Sanitizer interaction: assume generation is disabled when -fsanitize=array-bounds is active. Flexible array detection: skip size-1 arrays as last struct field.
1 parent 7fdec0a commit 702d9dd

File tree

5 files changed

+149
-14
lines changed

5 files changed

+149
-14
lines changed

clang/include/clang/Basic/CodeGenOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ CODEGENOPT(ImplicitMapSyms, 1, 0, Benign) ///< -Wa,-mmapsyms=implicit
3333
CODEGENOPT(AsmVerbose , 1, 0, Benign) ///< -dA, -fverbose-asm.
3434
CODEGENOPT(PreserveAsmComments, 1, 1, Benign) ///< -dA, -fno-preserve-as-comments.
3535
CODEGENOPT(AssumeSaneOperatorNew , 1, 1, Benign) ///< implicit __attribute__((malloc)) operator new
36+
CODEGENOPT(AssumeArrayBounds , 1, 0, Benign) ///< Generate llvm.assume for array bounds.
3637
CODEGENOPT(AssumeUniqueVTables , 1, 1, Benign) ///< Assume a class has only one vtable.
3738
CODEGENOPT(Autolink , 1, 1, Benign) ///< -fno-autolink
3839
CODEGENOPT(AutoImport , 1, 1, Benign) ///< -fno-auto-import

clang/include/clang/Driver/Options.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,6 +1603,11 @@ defm assume_unique_vtables : BoolFOption<"assume-unique-vtables",
16031603
BothFlags<[], [ClangOption, CLOption]>>;
16041604

16051605
def fassume_sane_operator_new : Flag<["-"], "fassume-sane-operator-new">, Group<f_Group>;
1606+
defm assume_array_bounds : BoolFOption<"assume-array-bounds",
1607+
CodeGenOpts<"AssumeArrayBounds">, DefaultFalse,
1608+
PosFlag<SetTrue, [], [ClangOption, CC1Option],
1609+
"Generate llvm.assume for array bounds to enable optimizations (may break code with intentional out-of-bounds access)">,
1610+
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>;
16061611
def fastcp : Flag<["-"], "fastcp">, Group<f_Group>;
16071612
def fastf : Flag<["-"], "fastf">, Group<f_Group>;
16081613
def fast : Flag<["-"], "fast">, Group<f_Group>;

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4589,8 +4589,25 @@ void CodeGenFunction::EmitCountedByBoundsChecking(
45894589
/// array subscripts are within bounds, enabling better optimization without
45904590
/// duplicating side effects from the subscript expression. The IndexVal
45914591
/// parameter should be the already-emitted index value to avoid re-evaluation.
4592+
///
4593+
/// Code that intentionally accesses out-of-bounds (UB) may break with
4594+
/// optimizations. Only applies to constant-size arrays (not pointers, VLAs, or
4595+
/// flexible arrays.) Disabled when -fsanitize=array-bounds is active.
4596+
///
45924597
void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
45934598
llvm::Value *IndexVal) {
4599+
// Disable with -fno-assume-array-bounds.
4600+
if (!CGM.getCodeGenOpts().AssumeArrayBounds)
4601+
return;
4602+
4603+
// Disable at -O0.
4604+
if (CGM.getCodeGenOpts().OptimizationLevel == 0)
4605+
return;
4606+
4607+
// Disable with array-bounds sanitizer.
4608+
if (SanOpts.has(SanitizerKind::ArrayBounds))
4609+
return;
4610+
45944611
const Expr *Base = E->getBase();
45954612
const Expr *Idx = E->getIdx();
45964613
QualType BaseType = Base->getType();
@@ -4610,6 +4627,26 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
46104627
if (ArraySize == 0)
46114628
return;
46124629

4630+
// Don't generate assumes for flexible array member pattern.
4631+
// Arrays of size 1 in structs are often used as placeholders for
4632+
// variable-length data (pre-C99 flexible array member idiom.)
4633+
if (ArraySize == 1) {
4634+
if (const auto *ME = dyn_cast<MemberExpr>(Base->IgnoreParenImpCasts())) {
4635+
if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
4636+
const RecordDecl *RD = FD->getParent();
4637+
// Check if this field is the last field in the record.
4638+
// Only the last field can be a flexible array member.
4639+
const FieldDecl *LastField = nullptr;
4640+
for (const auto *Field : RD->fields())
4641+
LastField = Field;
4642+
if (LastField == FD)
4643+
// This is a size-1 array as the last field in a struct.
4644+
// Likely a flexible array member pattern - skip assumes.
4645+
return;
4646+
}
4647+
}
4648+
}
4649+
46134650
QualType IdxType = Idx->getType();
46144651
llvm::Type *IndexType = ConvertType(IdxType);
46154652
llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0);
@@ -4633,21 +4670,21 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
46334670
// This enforces the C18 standard requirement that array subscripts
46344671
// must be "greater than or equal to zero and less than the size of the
46354672
// array."
4636-
llvm::Value *LowerBound, *UpperBound;
46374673
if (IdxType->isSignedIntegerOrEnumerationType()) {
46384674
// For signed indices: index >= 0 && index < size.
4639-
LowerBound = Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
4640-
UpperBound = Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
4675+
llvm::Value *LowerBound =
4676+
Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
4677+
llvm::Value *UpperBound =
4678+
Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
4679+
llvm::Value *BoundsConstraint =
4680+
Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
4681+
Builder.CreateAssumption(BoundsConstraint);
46414682
} else {
4642-
// For unsigned indices: index < size (>= 0 is implicit).
4643-
LowerBound = Builder.getTrue();
4644-
UpperBound = Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
4683+
// For unsigned indices: index < size (>= 0 is implicit.)
4684+
llvm::Value *UpperBound =
4685+
Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
4686+
Builder.CreateAssumption(UpperBound);
46454687
}
4646-
4647-
llvm::Value *BoundsConstraint =
4648-
Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
4649-
llvm::Function *AssumeIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::assume);
4650-
Builder.CreateCall(AssumeIntrinsic, BoundsConstraint);
46514688
}
46524689

46534690
LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// RUN: %clang_cc1 -emit-llvm -O2 -fassume-array-bounds %s -o - | FileCheck %s
2+
// Test that array bounds constraints are NOT applied to cases that might
3+
// break real-world code with intentional out-of-bounds access patterns.
4+
5+
// C18 standard allows one-past-the-end pointers, and some legacy code
6+
// intentionally accesses out-of-bounds for performance or compatibility.
7+
// This test verifies that bounds constraints are only applied to safe cases.
8+
9+
// CHECK-LABEL: define {{.*}} @test_flexible_array_member
10+
struct Data {
11+
int count;
12+
int items[1]; // Flexible array member pattern (pre-C99 style)
13+
};
14+
15+
int test_flexible_array_member(struct Data *d, int i) {
16+
// CHECK-NOT: call void @llvm.assume
17+
// Flexible array member pattern (size 1 array as last field) should NOT
18+
// generate bounds constraints because items[1] is just a placeholder
19+
// for a larger array allocated with `malloc (sizeof (struct Data) + 42)`.
20+
return d->items[i];
21+
}
22+
23+
// CHECK-LABEL: define {{.*}} @test_not_flexible_array
24+
struct NotFlexible {
25+
int items[1]; // Size 1 array but NOT the last field.
26+
int count; // Something comes after it.
27+
};
28+
29+
int test_not_flexible_array(struct NotFlexible *s, int i) {
30+
// CHECK: call void @llvm.assume
31+
// This is NOT a flexible array pattern (not the last field),
32+
// so we're fine generating `assume(i < 1)`.
33+
return s->items[i];
34+
}
35+
36+
// CHECK-LABEL: define {{.*}} @test_pointer_parameter
37+
int test_pointer_parameter(int *arr, int i) {
38+
// CHECK-NOT: call void @llvm.assume
39+
// Pointer parameters should NOT generate bounds constraints
40+
// because we don't know the actual array size.
41+
return arr[i];
42+
}
43+
44+
// CHECK-LABEL: define {{.*}} @test_vla
45+
int test_vla(int n, int i) {
46+
int arr[n]; // Variable-length array.
47+
// CHECK-NOT: call void @llvm.assume
48+
// VLAs should NOT generate bounds constraints
49+
// because the size is dynamic.
50+
return arr[i];
51+
}
52+
53+
// CHECK-LABEL: define {{.*}} @test_one_past_end
54+
extern int extern_array[100];
55+
int *test_one_past_end(void) {
56+
// CHECK-NOT: call void @llvm.assume
57+
// Taking address of one-past-the-end is allowed by C standard.
58+
// We should NOT assume anything about this access.
59+
return &extern_array[100]; // Legal: one past the end.
60+
}
61+
62+
// CHECK-LABEL: define {{.*}} @test_extern_array
63+
int test_extern_array(int i) {
64+
// CHECK: call void @llvm.assume
65+
// This will generate bounds constraints.
66+
// The array is a constant-size global array.
67+
// This is the safe case where we want optimization hints.
68+
return extern_array[i];
69+
}
70+
71+
// CHECK-LABEL: define {{.*}} @test_local_constant_array
72+
int test_local_constant_array(int i) {
73+
int arr[10];
74+
// CHECK: call void @llvm.assume
75+
// This will generate bounds constraints.
76+
// We know the exact size of this alloca array.
77+
// This is the safe case where we want optimization hints.
78+
return arr[i];
79+
}
80+
81+
// CHECK-LABEL: define {{.*}} @test_malloc_array
82+
int *my_malloc(int);
83+
int test_malloc_array(int i) {
84+
// CHECK-NOT: call void @llvm.assume
85+
// Dynamically allocated arrays accessed via pointers do not get bounds
86+
// constraints.
87+
int *x = my_malloc(100 * sizeof(int));
88+
return x[i];
89+
}

clang/test/CodeGen/array-bounds-constraints.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
1-
// Test that array bounds constraints generate llvm.assume statements for optimization hints.
2-
// RUN: %clang_cc1 -emit-llvm -O2 %s -o - | FileCheck %s
3-
41
// This test verifies that clang generates llvm.assume statements to inform the
52
// optimizer that array subscripts are within bounds to enable better optimization.
3+
// RUN: %clang_cc1 -emit-llvm -O2 -fassume-array-bounds %s -o - | FileCheck %s
4+
5+
// Verify no assumes are generated.
6+
// RUN: %clang_cc1 -emit-llvm -O2 -fno-assume-array-bounds %s -o - | FileCheck %s -check-prefix=NO-FLAG
67

78
// CHECK-LABEL: define {{.*}} @test_simple_array
9+
// NO-FLAG-LABEL: define {{.*}} @test_simple_array
810
int test_simple_array(int i) {
911
int arr[10]; // C arrays are 0-based: valid indices are [0, 9]
1012
// CHECK: %{{.*}} = icmp ult i32 %i, 10
1113
// CHECK: call void @llvm.assume(i1 %{{.*}})
14+
// NO-FLAG-NOT: call void @llvm.assume
1215
return arr[i];
1316
}
1417

0 commit comments

Comments
 (0)