Skip to content

Commit a32b79e

Browse files
committed
[SimplifyLibCalls] Add initial support for non-8-bit bytes
The patch makes CharWidth argument of `getStringLength` mandatory and ensures the correct values are passed in most cases. This is *not* a complete support for unusual byte widths in SimplifyLibCalls since `getConstantStringInfo` returns false for those. The code guarded by `getConstantStringInfo` returning true is unchanged because the changes are currently not testable.
1 parent e5b70fc commit a32b79e

35 files changed

+928
-101
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,7 @@ struct ConstantDataArraySlice {
684684
/// If successful \p Slice will point to a ConstantDataArray info object
685685
/// with an appropriate offset.
686686
bool getConstantDataArrayInfo(const Value *V, ConstantDataArraySlice &Slice,
687-
unsigned ElementSize, uint64_t Offset = 0);
687+
unsigned ElementBitWidth, uint64_t Offset = 0);
688688

689689
/// This function computes the length of a null-terminated C string pointed to
690690
/// by V. If successful, it returns true and returns the string in Str. If
@@ -697,7 +697,7 @@ bool getConstantStringInfo(const Value *V, StringRef &Str, unsigned CharWidth,
697697

698698
/// If we can compute the length of the string pointed to by the specified
699699
/// pointer, return 'len+1'. If we can't, return 0.
700-
uint64_t GetStringLength(const Value *V, unsigned CharSize = 8);
700+
uint64_t getStringLength(const Value *V, unsigned CharWidth);
701701

702702
/// This function returns call pointer argument that is considered the same by
703703
/// aliasing rules. You CAN'T use it to replace one value with another. If

llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,8 @@ class LibCallSimplifier {
253253
bool hasFloatVersion(const Module *M, StringRef FuncName);
254254

255255
/// Shared code to optimize strlen+wcslen and strnlen+wcsnlen.
256-
Value *optimizeStringLength(CallInst *CI, IRBuilderBase &B, unsigned CharSize,
257-
Value *Bound = nullptr);
256+
Value *optimizeStringLength(CallInst *CI, IRBuilderBase &B,
257+
unsigned CharWidth, Value *Bound = nullptr);
258258
};
259259
} // End llvm namespace
260260

llvm/lib/Analysis/MemoryBuiltins.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,8 @@ llvm::getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI,
380380

381381
// Handle strdup-like functions separately.
382382
if (FnData->AllocTy == StrDupLike) {
383-
APInt Size(IntTyBits, GetStringLength(Mapper(CB->getArgOperand(0))));
383+
APInt Size(IntTyBits, getStringLength(Mapper(CB->getArgOperand(0)),
384+
DL.getByteWidth()));
384385
if (!Size)
385386
return std::nullopt;
386387

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6236,17 +6236,14 @@ bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
62366236
}
62376237

62386238
// If V refers to an initialized global constant, set Slice either to
6239-
// its initializer if the size of its elements equals ElementSize, or,
6240-
// for ElementSize == 8, to its representation as an array of unsiged
6241-
// char. Return true on success.
6242-
// Offset is in the unit "nr of ElementSize sized elements".
6239+
// its initializer if the bit width of its elements equals ElementBitWidth,
6240+
// or, for ElementBitWidth == CHAR_BIT, to its representation as an array
6241+
// of unsigned char. Return true on success.
6242+
// Offset is in the unit "nr of ElementBitWidth sized elements".
62436243
bool llvm::getConstantDataArrayInfo(const Value *V,
62446244
ConstantDataArraySlice &Slice,
6245-
unsigned ElementSize, uint64_t Offset) {
6245+
unsigned ElementBitWidth, uint64_t Offset) {
62466246
assert(V && "V should not be null.");
6247-
assert((ElementSize % 8) == 0 &&
6248-
"ElementSize expected to be a multiple of the size of a byte.");
6249-
unsigned ElementSizeInBytes = ElementSize / 8;
62506247

62516248
// Drill down into the pointer expression V, ignoring any intervening
62526249
// casts, and determine the identity of the object it references along
@@ -6258,6 +6255,11 @@ bool llvm::getConstantDataArrayInfo(const Value *V,
62586255
return false;
62596256

62606257
const DataLayout &DL = GV->getDataLayout();
6258+
unsigned ByteWidth = DL.getByteWidth();
6259+
assert((ElementBitWidth % ByteWidth) == 0 &&
6260+
"ElementBitWidth is expected to be a multiple of the byte width");
6261+
unsigned ElementSizeInBytes = ElementBitWidth / ByteWidth;
6262+
62616263
APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0);
62626264

62636265
if (GV != V->stripAndAccumulateConstantOffsets(DL, Off,
@@ -6297,7 +6299,7 @@ bool llvm::getConstantDataArrayInfo(const Value *V,
62976299
auto *Init = const_cast<Constant *>(GV->getInitializer());
62986300
if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) {
62996301
Type *InitElTy = ArrayInit->getElementType();
6300-
if (InitElTy->isIntegerTy(ElementSize)) {
6302+
if (InitElTy->isIntegerTy(ElementBitWidth)) {
63016303
// If Init is an initializer for an array of the expected type
63026304
// and size, use it as is.
63036305
Array = ArrayInit;
@@ -6306,7 +6308,7 @@ bool llvm::getConstantDataArrayInfo(const Value *V,
63066308
}
63076309

63086310
if (!Array) {
6309-
if (ElementSize != 8)
6311+
if (ElementBitWidth != CHAR_BIT)
63106312
// TODO: Handle conversions to larger integral types.
63116313
return false;
63126314

@@ -6384,9 +6386,9 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
63846386

63856387
/// If we can compute the length of the string pointed to by
63866388
/// the specified pointer, return 'len+1'. If we can't, return 0.
6387-
static uint64_t GetStringLengthH(const Value *V,
6388-
SmallPtrSetImpl<const PHINode*> &PHIs,
6389-
unsigned CharSize) {
6389+
static uint64_t getStringLength(const Value *V,
6390+
SmallPtrSetImpl<const PHINode *> &PHIs,
6391+
unsigned CharWidth) {
63906392
// Look through noop bitcast instructions.
63916393
V = V->stripPointerCasts();
63926394

@@ -6399,7 +6401,7 @@ static uint64_t GetStringLengthH(const Value *V,
63996401
// If it was new, see if all the input strings are the same length.
64006402
uint64_t LenSoFar = ~0ULL;
64016403
for (Value *IncValue : PN->incoming_values()) {
6402-
uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
6404+
uint64_t Len = getStringLength(IncValue, PHIs, CharWidth);
64036405
if (Len == 0) return 0; // Unknown length -> unknown.
64046406

64056407
if (Len == ~0ULL) continue;
@@ -6415,9 +6417,9 @@ static uint64_t GetStringLengthH(const Value *V,
64156417

64166418
// strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
64176419
if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
6418-
uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
6420+
uint64_t Len1 = getStringLength(SI->getTrueValue(), PHIs, CharWidth);
64196421
if (Len1 == 0) return 0;
6420-
uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
6422+
uint64_t Len2 = getStringLength(SI->getFalseValue(), PHIs, CharWidth);
64216423
if (Len2 == 0) return 0;
64226424
if (Len1 == ~0ULL) return Len2;
64236425
if (Len2 == ~0ULL) return Len1;
@@ -6427,7 +6429,7 @@ static uint64_t GetStringLengthH(const Value *V,
64276429

64286430
// Otherwise, see if we can read the string.
64296431
ConstantDataArraySlice Slice;
6430-
if (!getConstantDataArrayInfo(V, Slice, CharSize))
6432+
if (!getConstantDataArrayInfo(V, Slice, CharWidth))
64316433
return 0;
64326434

64336435
if (Slice.Array == nullptr)
@@ -6449,12 +6451,12 @@ static uint64_t GetStringLengthH(const Value *V,
64496451

64506452
/// If we can compute the length of the string pointed to by
64516453
/// the specified pointer, return 'len+1'. If we can't, return 0.
6452-
uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
6454+
uint64_t llvm::getStringLength(const Value *V, unsigned CharWidth) {
64536455
if (!V->getType()->isPointerTy())
64546456
return 0;
64556457

64566458
SmallPtrSet<const PHINode*, 32> PHIs;
6457-
uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
6459+
uint64_t Len = ::getStringLength(V, PHIs, CharWidth);
64586460
// If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
64596461
// an empty string as a length.
64606462
return Len == ~0ULL ? 1 : Len;

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
156156
uint64_t Size = MemOpLength->getLimitedValue();
157157
assert(Size && "0-sized memory transferring should be removed already.");
158158

159-
if (Size > 8 || (Size&(Size-1)))
159+
uint64_t MemOpWidth = Size * DL.getByteWidth();
160+
if (MemOpWidth > 64 || (Size & (Size - 1)))
160161
return nullptr; // If not 1/2/4/8 bytes, exit.
161162

162163
// If it is an atomic and alignment is less than the size then we will
@@ -168,7 +169,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
168169
return nullptr;
169170

170171
// Use an integer load+store unless we can find something better.
171-
IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
172+
IntegerType *IntType = IntegerType::get(MI->getContext(), MemOpWidth);
172173

173174
// If the memcpy has metadata describing the members, see if we can get the
174175
// TBAA, scope and noalias tags describing our copy.
@@ -244,7 +245,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
244245
// Extract the length and alignment and fill if they are constant.
245246
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
246247
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
247-
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
248+
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(DL.getByteWidth()))
248249
return nullptr;
249250
const uint64_t Len = LenC->getLimitedValue();
250251
assert(Len && "0-sized memory setting should be removed already.");
@@ -259,12 +260,13 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
259260
return nullptr;
260261

261262
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
262-
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
263+
uint64_t MemOpWidth = Len * DL.getByteWidth();
264+
if (MemOpWidth <= 64 && isPowerOf2_32((uint32_t)Len)) {
263265
Value *Dest = MI->getDest();
264266

265267
// Extract the fill value and store.
266268
Constant *FillVal = ConstantInt::get(
267-
MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
269+
MI->getContext(), APInt::getSplat(MemOpWidth, FillC->getValue()));
268270
StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
269271
S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
270272
auto replaceOpForAssignmentMarkers = [FillC, FillVal](auto *DbgAssign) {

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2778,8 +2778,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
27782778
if (MadeChange)
27792779
return &GEP;
27802780

2781-
// Canonicalize constant GEPs to i8 type.
2782-
if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
2781+
// Canonicalize constant GEPs to byte type.
2782+
if (!GEPEltType->isIntegerTy(DL.getByteWidth()) &&
2783+
GEP.hasAllConstantIndices()) {
27832784
APInt Offset(DL.getIndexTypeSizeInBits(GEPType), 0);
27842785
if (GEP.accumulateConstantOffset(DL, Offset))
27852786
return replaceInstUsesWith(

0 commit comments

Comments
 (0)