@@ -4559,6 +4559,97 @@ void CodeGenFunction::EmitCountedByBoundsChecking(
45594559 }
45604560}
45614561
4562+ // / Emit array bounds constraints using llvm.assume for optimization hints.
4563+ // /
4564+ // / C Standard (ISO/IEC 9899:2011 - C11)
4565+ // / Section J.2 (Undefined behavior): An array subscript is out of range, even
4566+ // / if an object is apparently accessible with the given subscript (as in the
4567+ // / lvalue expression a[1][7] given the declaration int a[4][5]) (6.5.6).
4568+ // /
4569+ // / Section 6.5.6 (Additive operators): If both the pointer operand and the
4570+ // / result point to elements of the same array object, or one past the last
4571+ // / element of the array object, the evaluation shall not produce an overflow;
4572+ // / otherwise, the behavior is undefined.
4573+ // /
4574+ // / C++ Standard (ISO/IEC 14882 - 2017)
4575+ // / Section 8.7 (Additive operators):
4576+ // / 4 When an expression that has integral type is added to or subtracted from a
4577+ // / pointer, the result has the type of the pointer operand. If the expression
4578+ // / P points to element x[i] of an array object x with n elements,^86 the
4579+ // / expressions P + J and J + P (where J has the value j) point to the
4580+ // / (possibly-hypothetical) element x[i + j] if 0 ≤ i + j ≤ n; otherwise, the
4581+ // / behavior is undefined. Likewise, the expression P - J points to the
4582+ // / (possibly-hypothetical) element x[i − j] if 0 ≤ i − j ≤ n; otherwise, the
4583+ // / behavior is undefined.
4584+ // / ^86 A pointer past the last element of an array x of n elements is
4585+ // / considered to be equivalent to a pointer to a hypothetical element x[n]
4586+ // / for this purpose; see 6.9.2.
4587+ // /
4588+ // / This function emits llvm.assume statements to inform the optimizer that
4589+ // / array subscripts are within bounds, enabling better optimization without
4590+ // / duplicating side effects from the subscript expression. The IndexVal
4591+ // / parameter should be the already-emitted index value to avoid re-evaluation.
4592+ void CodeGenFunction::EmitArrayBoundsConstraints (const ArraySubscriptExpr *E,
4593+ llvm::Value *IndexVal) {
4594+ const Expr *Base = E->getBase ();
4595+ const Expr *Idx = E->getIdx ();
4596+ QualType BaseType = Base->getType ();
4597+
4598+ if (const auto *ICE = dyn_cast<ImplicitCastExpr>(Base)) {
4599+ if (ICE->getCastKind () == CK_ArrayToPointerDecay) {
4600+ BaseType = ICE->getSubExpr ()->getType ();
4601+ }
4602+ }
4603+
4604+ // For now: only handle constant array types.
4605+ const ConstantArrayType *CAT = getContext ().getAsConstantArrayType (BaseType);
4606+ if (!CAT)
4607+ return ;
4608+
4609+ llvm::APInt ArraySize = CAT->getSize ();
4610+ if (ArraySize == 0 )
4611+ return ;
4612+
4613+ QualType IdxType = Idx->getType ();
4614+ llvm::Type *IndexType = ConvertType (IdxType);
4615+ llvm::Value *Zero = llvm::ConstantInt::get (IndexType, 0 );
4616+
4617+ uint64_t ArraySizeValue = ArraySize.getLimitedValue ();
4618+ llvm::Value *ArraySizeVal = llvm::ConstantInt::get (IndexType, ArraySizeValue);
4619+
4620+ // Use the provided IndexVal to avoid duplicating side effects.
4621+ // The caller has already emitted the index expression once.
4622+ if (!IndexVal)
4623+ return ;
4624+
4625+ // Ensure index value has the same type as our constants.
4626+ if (IndexVal->getType () != IndexType) {
4627+ bool IsSigned = IdxType->isSignedIntegerOrEnumerationType ();
4628+ IndexVal = Builder.CreateIntCast (IndexVal, IndexType, IsSigned, " idx.cast" );
4629+ }
4630+
4631+ // Create bounds constraint: 0 <= index && index < size.
4632+ // C arrays are 0-based, so valid indices are [0, size-1].
4633+ // This enforces the C18 standard requirement that array subscripts
4634+ // must be "greater than or equal to zero and less than the size of the
4635+ // array."
4636+ llvm::Value *LowerBound, *UpperBound;
4637+ if (IdxType->isSignedIntegerOrEnumerationType ()) {
4638+ // For signed indices: index >= 0 && index < size.
4639+ LowerBound = Builder.CreateICmpSGE (IndexVal, Zero, " idx.ge.zero" );
4640+ UpperBound = Builder.CreateICmpSLT (IndexVal, ArraySizeVal, " idx.lt.size" );
4641+ } else {
4642+ // For unsigned indices: index < size (>= 0 is implicit).
4643+ LowerBound = Builder.getTrue ();
4644+ UpperBound = Builder.CreateICmpULT (IndexVal, ArraySizeVal, " idx.lt.size" );
4645+ }
4646+
4647+ llvm::Value *BoundsConstraint =
4648+ Builder.CreateAnd (LowerBound, UpperBound, " bounds.constraint" );
4649+ llvm::Function *AssumeIntrinsic = CGM.getIntrinsic (llvm::Intrinsic::assume);
4650+ Builder.CreateCall (AssumeIntrinsic, BoundsConstraint);
4651+ }
4652+
45624653LValue CodeGenFunction::EmitArraySubscriptExpr (const ArraySubscriptExpr *E,
45634654 bool Accessed) {
45644655 // The index must always be an integer, which is not an aggregate. Emit it
@@ -4588,13 +4679,20 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
45884679 };
45894680 IdxPre = nullptr ;
45904681
4682+ // Array bounds constraints will be emitted after index evaluation to avoid
4683+ // duplicating side effects from the index expression.
4684+
45914685 // If the base is a vector type, then we are forming a vector element lvalue
45924686 // with this subscript.
45934687 if (E->getBase ()->getType ()->isSubscriptableVectorType () &&
45944688 !isa<ExtVectorElementExpr>(E->getBase ())) {
45954689 // Emit the vector as an lvalue to get its address.
45964690 LValue LHS = EmitLValue (E->getBase ());
45974691 auto *Idx = EmitIdxAfterBase (/* Promote*/ false );
4692+
4693+ // Emit array bounds constraints for vector subscripts.
4694+ EmitArrayBoundsConstraints (E, Idx);
4695+
45984696 assert (LHS.isSimple () && " Can only subscript lvalue vectors here!" );
45994697 return LValue::MakeVectorElt (LHS.getAddress (), Idx, E->getBase ()->getType (),
46004698 LHS.getBaseInfo (), TBAAAccessInfo ());
@@ -4635,6 +4733,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
46354733 Addr = EmitPointerWithAlignment (E->getBase (), &EltBaseInfo, &EltTBAAInfo);
46364734 auto *Idx = EmitIdxAfterBase (/* Promote*/ true );
46374735
4736+ // Emit array bounds constraints for VLA access (though VLAs typically don't
4737+ // have constant bounds).
4738+ EmitArrayBoundsConstraints (E, Idx);
4739+
46384740 // The element count here is the total number of non-VLA elements.
46394741 llvm::Value *numElements = getVLASize (vla).NumElts ;
46404742
@@ -4659,6 +4761,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
46594761 Addr = EmitPointerWithAlignment (E->getBase (), &EltBaseInfo, &EltTBAAInfo);
46604762 auto *Idx = EmitIdxAfterBase (/* Promote*/ true );
46614763
4764+ // Emit array bounds constraints for ObjC interface access.
4765+ EmitArrayBoundsConstraints (E, Idx);
4766+
46624767 CharUnits InterfaceSize = getContext ().getTypeSizeInChars (OIT);
46634768 llvm::Value *InterfaceSizeVal =
46644769 llvm::ConstantInt::get (Idx->getType (), InterfaceSize.getQuantity ());
@@ -4694,6 +4799,9 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
46944799 ArrayLV = EmitLValue (Array);
46954800 auto *Idx = EmitIdxAfterBase (/* Promote*/ true );
46964801
4802+ // Emit array bounds constraints for optimization.
4803+ EmitArrayBoundsConstraints (E, Idx);
4804+
46974805 if (SanOpts.has (SanitizerKind::ArrayBounds))
46984806 EmitCountedByBoundsChecking (Array, Idx, ArrayLV.getAddress (),
46994807 E->getIdx ()->getType (), Array->getType (),
@@ -4737,6 +4845,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
47374845 Address BaseAddr =
47384846 EmitPointerWithAlignment (E->getBase (), &EltBaseInfo, &EltTBAAInfo);
47394847 auto *Idx = EmitIdxAfterBase (/* Promote*/ true );
4848+
4849+ // Emit array bounds constraints for pointer-based array access.
4850+ EmitArrayBoundsConstraints (E, Idx);
4851+
47404852 QualType ptrType = E->getBase ()->getType ();
47414853 Addr = emitArraySubscriptGEP (*this , BaseAddr, Idx, E->getType (),
47424854 !getLangOpts ().PointerOverflowDefined ,
0 commit comments