Skip to content

Commit f46618d

Browse files
committed
use Accessed to generate strict vs. size+1 checks
1 parent 033a1ce commit f46618d

File tree

5 files changed

+51
-37
lines changed

5 files changed

+51
-37
lines changed

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4585,17 +4585,18 @@ void CodeGenFunction::EmitCountedByBoundsChecking(
45854585
/// considered to be equivalent to a pointer to a hypothetical element x[n]
45864586
/// for this purpose; see 6.9.2.
45874587
///
4588-
/// This function emits llvm.assume statements to inform the optimizer that
4589-
/// array subscripts are within bounds, enabling better optimization without
4590-
/// duplicating side effects from the subscript expression. The IndexVal
4591-
/// parameter should be the already-emitted index value to avoid re-evaluation.
4588+
4589+
/// The standards allow &arr[size] (one-past-the-end) for iterators,
4590+
/// but dereferencing one-past-the-end is UB. This function uses the Accessed
4591+
/// parameter to distinguish: Accessed=true uses strict bounds (index < size),
4592+
/// Accessed=false allows one-past-the-end (index <= size).
45924593
///
4593-
/// Code that intentionally accesses out-of-bounds (UB) may break with
4594-
/// optimizations. Only applies to constant-size arrays (not pointers, VLAs, or
4595-
/// flexible arrays.) Disabled when -fsanitize=array-bounds is active.
4594+
/// Code that intentionally dereferences out-of-bounds (UB) may break with
4595+
/// optimizations. Disabled when -fsanitize=array-bounds is active.
45964596
///
45974597
void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
4598-
llvm::Value *IndexVal) {
4598+
llvm::Value *IndexVal,
4599+
bool Accessed) {
45994600
// Disable with -fno-assume-array-bounds.
46004601
if (!CGM.getCodeGenOpts().AssumeArrayBounds)
46014602
return;
@@ -4682,25 +4683,29 @@ void CodeGenFunction::EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
46824683
IndexVal = Builder.CreateIntCast(IndexVal, IndexType, IsSigned, "idx.cast");
46834684
}
46844685

4685-
// Create bounds constraint: 0 <= index && index < size.
4686-
// C arrays are 0-based, so valid indices are [0, size-1].
4687-
// This enforces the C18 standard requirement that array subscripts
4688-
// must be "greater than or equal to zero and less than the size of the
4689-
// array."
4686+
// Create bounds constraint: 0 <= index && index (< or <=) size.
4687+
// The Accessed parameter indicates whether the array element will be
4688+
// dereferenced. Per C/C++ standards, &arr[size] (one-past-the-end) is legal
4689+
// for iterators.
4690+
// Accessed = true: element is dereferenced, strict bounds: 0 <= index < size
4691+
// Accessed = false: address only, allow one-past-the-end: 0 <= index <= size
4692+
46904693
if (IdxType->isSignedIntegerOrEnumerationType()) {
4691-
// For signed indices: index >= 0 && index < size.
4694+
// For signed indices: index >= 0 && index [<|<=] size.
46924695
llvm::Value *Zero = llvm::ConstantInt::get(IndexType, 0);
46934696
llvm::Value *LowerBound =
46944697
Builder.CreateICmpSGE(IndexVal, Zero, "idx.ge.zero");
4695-
llvm::Value *UpperBound =
4696-
Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.lt.size");
4698+
llvm::Value *UpperBound = Accessed
4699+
? Builder.CreateICmpSLT(IndexVal, ArraySizeVal, "idx.slt.size")
4700+
: Builder.CreateICmpSLE(IndexVal, ArraySizeVal, "idx.sle.size");
46974701
llvm::Value *BoundsConstraint =
46984702
Builder.CreateAnd(LowerBound, UpperBound, "bounds.constraint");
46994703
Builder.CreateAssumption(BoundsConstraint);
47004704
} else {
4701-
// For unsigned indices: index < size (>= 0 is implicit.)
4702-
llvm::Value *UpperBound =
4703-
Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.lt.size");
4705+
// For unsigned indices: index [<|<=] size. (>= 0 is implicit.)
4706+
llvm::Value *UpperBound = Accessed
4707+
? Builder.CreateICmpULT(IndexVal, ArraySizeVal, "idx.ult.size")
4708+
: Builder.CreateICmpULE(IndexVal, ArraySizeVal, "idx.ule.size");
47044709
Builder.CreateAssumption(UpperBound);
47054710
}
47064711
}
@@ -4746,7 +4751,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
47464751
auto *Idx = EmitIdxAfterBase(/*Promote*/false);
47474752

47484753
// Emit array bounds constraints for vector subscripts.
4749-
EmitArrayBoundsConstraints(E, Idx);
4754+
EmitArrayBoundsConstraints(E, Idx, Accessed);
47504755

47514756
assert(LHS.isSimple() && "Can only subscript lvalue vectors here!");
47524757
return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(),
@@ -4788,9 +4793,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
47884793
Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo);
47894794
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
47904795

4791-
// Emit array bounds constraints for VLA access (though VLAs typically don't
4792-
// have constant bounds).
4793-
EmitArrayBoundsConstraints(E, Idx);
4796+
// Emit array bounds constraints for VLA access.
4797+
EmitArrayBoundsConstraints(E, Idx, Accessed);
47944798

47954799
// The element count here is the total number of non-VLA elements.
47964800
llvm::Value *numElements = getVLASize(vla).NumElts;
@@ -4817,7 +4821,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
48174821
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
48184822

48194823
// Emit array bounds constraints for ObjC interface access.
4820-
EmitArrayBoundsConstraints(E, Idx);
4824+
EmitArrayBoundsConstraints(E, Idx, Accessed);
48214825

48224826
CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT);
48234827
llvm::Value *InterfaceSizeVal =
@@ -4855,7 +4859,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
48554859
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
48564860

48574861
// Emit array bounds constraints for optimization.
4858-
EmitArrayBoundsConstraints(E, Idx);
4862+
EmitArrayBoundsConstraints(E, Idx, Accessed);
48594863

48604864
if (SanOpts.has(SanitizerKind::ArrayBounds))
48614865
EmitCountedByBoundsChecking(Array, Idx, ArrayLV.getAddress(),
@@ -4902,7 +4906,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
49024906
auto *Idx = EmitIdxAfterBase(/*Promote*/true);
49034907

49044908
// Emit array bounds constraints for pointer-based array access.
4905-
EmitArrayBoundsConstraints(E, Idx);
4909+
EmitArrayBoundsConstraints(E, Idx, Accessed);
49064910

49074911
QualType ptrType = E->getBase()->getType();
49084912
Addr = emitArraySubscriptGEP(*this, BaseAddr, Idx, E->getType(),

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2101,7 +2101,7 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
21012101
CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true);
21022102

21032103
// Emit array bounds constraints for vector element access.
2104-
CGF.EmitArrayBoundsConstraints(E, Idx);
2104+
CGF.EmitArrayBoundsConstraints(E, Idx, /*Accessed=*/true);
21052105

21062106
return Builder.CreateExtractElement(Base, Idx, "vecext");
21072107
}

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3344,9 +3344,11 @@ class CodeGenFunction : public CodeGenTypeCache {
33443344
/// Emit array bounds constraints using llvm.assume for optimization hints.
33453345
/// Emits assume statements for array bounds without duplicating side effects.
33463346
/// Takes the already-emitted index value to avoid re-evaluating expressions
3347-
/// with side effects. Helps optimizer with vectorization and bounds analysis.
3347+
/// with side effects. The Accessed parameter distinguishes:
3348+
/// - dereferenced use strict bounds: index < size vs.
3349+
/// - address-only allows one-past: index <= size.
33483350
void EmitArrayBoundsConstraints(const ArraySubscriptExpr *E,
3349-
llvm::Value *IndexVal);
3351+
llvm::Value *IndexVal, bool Accessed);
33503352

33513353
/// Returns debug info, with additional annotation if
33523354
/// CGM.getCodeGenOpts().SanitizeAnnotateDebugInfo[Ordinal] is enabled for

clang/test/CodeGen/array-bounds-constraints-safety.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,9 @@ int test_vla(int n, int i) {
6969
extern int extern_array[100];
7070
int *test_one_past_end(void) {
7171
// CHECK-NOT: call void @llvm.assume
72-
// Taking address of one-past-the-end is allowed by C standard.
73-
// We should NOT assume anything about this access.
72+
// Taking address of one-past-the-end is legal per C standard.
73+
// Used in iterators (e.g., arr + size, std::end(arr)).
74+
// No assumes are generated for address-only operations.
7475
return &extern_array[100]; // Legal: one past the end.
7576
}
7677

clang/test/CodeGen/array-bounds-constraints.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@
99
// NO-FLAG-LABEL: define {{.*}} @test_simple_array
1010
void init_array(int *arr);
1111
int test_simple_array(int i) {
12-
int arr[10]; // C arrays are 0-based: valid indices are [0, 9]
13-
init_array(arr); // Initialize to avoid UB from uninitialized read.
14-
// CHECK: %{{.*}} = icmp ult i32 %i, 10
12+
int arr[10];
13+
init_array(arr);
14+
// Single-dimension array subscript: Accessed defaults to false to support
15+
// C++ iterators that use &arr[size]. This generates index < 11 (not < 10)
16+
// to allow one-past-the-end address formation.
17+
// CHECK: %{{.*}} = icmp ult i32 %i, 11
1518
// CHECK: call void @llvm.assume(i1 %{{.*}})
1619
// NO-FLAG-NOT: call void @llvm.assume
1720
return arr[i];
@@ -21,9 +24,11 @@ int test_simple_array(int i) {
2124
int test_multidimensional_array(int i, int j) {
2225
int arr[5][8]; // Valid indices: i in [0, 4], j in [0, 7]
2326
init_array(arr[0]); // Initialize to avoid UB from uninitialized read.
27+
// Multidimensional: inner subscript (i) uses Accessed=true (strict < 5)
28+
// outer subscript (j) may allow one-past-the-end
2429
// CHECK: %{{.*}} = icmp ult i32 %i, 5
2530
// CHECK: call void @llvm.assume(i1 %{{.*}})
26-
// CHECK: %{{.*}} = icmp ult i32 %j, 8
31+
// CHECK: %{{.*}} = icmp ult i32 %j, 9
2732
// CHECK: call void @llvm.assume(i1 %{{.*}})
2833
return arr[i][j];
2934
}
@@ -32,15 +37,17 @@ int test_multidimensional_array(int i, int j) {
3237
int test_unsigned_index(unsigned int i) {
3338
int arr[10];
3439
init_array(arr); // Initialize to avoid UB from uninitialized read.
35-
// CHECK: %{{.*}} = icmp ult i32 %i, 10
40+
// Accessed=false, allows one-past-the-end
41+
// CHECK: %{{.*}} = icmp ult i32 %i, 11
3642
// CHECK: call void @llvm.assume(i1 %{{.*}})
3743
return arr[i];
3844
}
3945

4046
// CHECK-LABEL: define {{.*}} @test_store_undef
4147
void test_store_undef(int i, int value) {
4248
int arr[10];
43-
// CHECK: %{{.*}} = icmp ult i32 %i, 10
49+
// Accessed=false, allows one-past-the-end
50+
// CHECK: %{{.*}} = icmp ult i32 %i, 11
4451
// CHECK: call void @llvm.assume(i1 %{{.*}})
4552
arr[i] = value;
4653
init_array(arr); // Avoid optimization of the above statement.

0 commit comments

Comments
 (0)