Skip to content

Commit 73b1913

Browse files
committed
[SimplifyLibCalls] Add initial support for non-8-bit bytes
The patch makes CharWidth argument of `getStringLength` mandatory and ensures the correct values are passed in most cases. This is *not* a complete support for unusual byte widths in SimplifyLibCalls since `getConstantStringInfo` returns false for those. The code guarded by `getConstantStringInfo` returning true is unchanged because the changes are currently not testable.
1 parent 0d66f01 commit 73b1913

35 files changed

+929
-101
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ struct ConstantDataArraySlice {
389389
/// with an appropriate offset.
390390
LLVM_ABI bool getConstantDataArrayInfo(const Value *V,
391391
ConstantDataArraySlice &Slice,
392-
unsigned ElementSize,
392+
unsigned ElementBitWidth,
393393
uint64_t Offset = 0);
394394

395395
/// This function computes the length of a null-terminated C string pointed to
@@ -403,7 +403,7 @@ LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str,
403403

404404
/// If we can compute the length of the string pointed to by the specified
405405
/// pointer, return 'len+1'. If we can't, return 0.
406-
LLVM_ABI uint64_t GetStringLength(const Value *V, unsigned CharSize = 8);
406+
LLVM_ABI uint64_t getStringLength(const Value *V, unsigned CharWidth);
407407

408408
/// This function returns call pointer argument that is considered the same by
409409
/// aliasing rules. You CAN'T use it to replace one value with another. If

llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,8 @@ class LibCallSimplifier {
261261
bool hasFloatVersion(const Module *M, StringRef FuncName);
262262

263263
/// Shared code to optimize strlen+wcslen and strnlen+wcsnlen.
264-
Value *optimizeStringLength(CallInst *CI, IRBuilderBase &B, unsigned CharSize,
265-
Value *Bound = nullptr);
264+
Value *optimizeStringLength(CallInst *CI, IRBuilderBase &B,
265+
unsigned CharWidth, Value *Bound = nullptr);
266266
};
267267
} // End llvm namespace
268268

llvm/lib/Analysis/MemoryBuiltins.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,8 @@ llvm::getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI,
379379

380380
// Handle strdup-like functions separately.
381381
if (FnData->AllocTy == StrDupLike) {
382-
APInt Size(IntTyBits, GetStringLength(Mapper(CB->getArgOperand(0))));
382+
APInt Size(IntTyBits, getStringLength(Mapper(CB->getArgOperand(0)),
383+
DL.getByteWidth()));
383384
if (!Size)
384385
return std::nullopt;
385386

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6528,17 +6528,14 @@ llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
65286528
}
65296529

65306530
// If V refers to an initialized global constant, set Slice either to
6531-
// its initializer if the size of its elements equals ElementSize, or,
6532-
// for ElementSize == 8, to its representation as an array of unsiged
6533-
// char. Return true on success.
6534-
// Offset is in the unit "nr of ElementSize sized elements".
6531+
// its initializer if the bit width of its elements equals ElementBitWidth,
6532+
// or, for ElementBitWidth == CHAR_BIT, to its representation as an array
6533+
// of unsigned char. Return true on success.
6534+
// Offset is in the unit "nr of ElementBitWidth sized elements".
65356535
bool llvm::getConstantDataArrayInfo(const Value *V,
65366536
ConstantDataArraySlice &Slice,
6537-
unsigned ElementSize, uint64_t Offset) {
6537+
unsigned ElementBitWidth, uint64_t Offset) {
65386538
assert(V && "V should not be null.");
6539-
assert((ElementSize % 8) == 0 &&
6540-
"ElementSize expected to be a multiple of the size of a byte.");
6541-
unsigned ElementSizeInBytes = ElementSize / 8;
65426539

65436540
// Drill down into the pointer expression V, ignoring any intervening
65446541
// casts, and determine the identity of the object it references along
@@ -6550,6 +6547,11 @@ bool llvm::getConstantDataArrayInfo(const Value *V,
65506547
return false;
65516548

65526549
const DataLayout &DL = GV->getDataLayout();
6550+
unsigned ByteWidth = DL.getByteWidth();
6551+
assert((ElementBitWidth % ByteWidth) == 0 &&
6552+
"ElementBitWidth is expected to be a multiple of the byte width");
6553+
unsigned ElementSizeInBytes = ElementBitWidth / ByteWidth;
6554+
65536555
APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0);
65546556

65556557
if (GV != V->stripAndAccumulateConstantOffsets(DL, Off,
@@ -6589,7 +6591,7 @@ bool llvm::getConstantDataArrayInfo(const Value *V,
65896591
auto *Init = const_cast<Constant *>(GV->getInitializer());
65906592
if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) {
65916593
Type *InitElTy = ArrayInit->getElementType();
6592-
if (InitElTy->isIntegerTy(ElementSize)) {
6594+
if (InitElTy->isIntegerTy(ElementBitWidth)) {
65936595
// If Init is an initializer for an array of the expected type
65946596
// and size, use it as is.
65956597
Array = ArrayInit;
@@ -6598,7 +6600,7 @@ bool llvm::getConstantDataArrayInfo(const Value *V,
65986600
}
65996601

66006602
if (!Array) {
6601-
if (ElementSize != 8)
6603+
if (ElementBitWidth != CHAR_BIT)
66026604
// TODO: Handle conversions to larger integral types.
66036605
return false;
66046606

@@ -6676,9 +6678,9 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
66766678

66776679
/// If we can compute the length of the string pointed to by
66786680
/// the specified pointer, return 'len+1'. If we can't, return 0.
6679-
static uint64_t GetStringLengthH(const Value *V,
6680-
SmallPtrSetImpl<const PHINode*> &PHIs,
6681-
unsigned CharSize) {
6681+
static uint64_t getStringLength(const Value *V,
6682+
SmallPtrSetImpl<const PHINode *> &PHIs,
6683+
unsigned CharWidth) {
66826684
// Look through noop bitcast instructions.
66836685
V = V->stripPointerCasts();
66846686

@@ -6691,7 +6693,7 @@ static uint64_t GetStringLengthH(const Value *V,
66916693
// If it was new, see if all the input strings are the same length.
66926694
uint64_t LenSoFar = ~0ULL;
66936695
for (Value *IncValue : PN->incoming_values()) {
6694-
uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
6696+
uint64_t Len = getStringLength(IncValue, PHIs, CharWidth);
66956697
if (Len == 0) return 0; // Unknown length -> unknown.
66966698

66976699
if (Len == ~0ULL) continue;
@@ -6707,9 +6709,9 @@ static uint64_t GetStringLengthH(const Value *V,
67076709

67086710
// strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
67096711
if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
6710-
uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
6712+
uint64_t Len1 = getStringLength(SI->getTrueValue(), PHIs, CharWidth);
67116713
if (Len1 == 0) return 0;
6712-
uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
6714+
uint64_t Len2 = getStringLength(SI->getFalseValue(), PHIs, CharWidth);
67136715
if (Len2 == 0) return 0;
67146716
if (Len1 == ~0ULL) return Len2;
67156717
if (Len2 == ~0ULL) return Len1;
@@ -6719,7 +6721,7 @@ static uint64_t GetStringLengthH(const Value *V,
67196721

67206722
// Otherwise, see if we can read the string.
67216723
ConstantDataArraySlice Slice;
6722-
if (!getConstantDataArrayInfo(V, Slice, CharSize))
6724+
if (!getConstantDataArrayInfo(V, Slice, CharWidth))
67236725
return 0;
67246726

67256727
if (Slice.Array == nullptr)
@@ -6741,12 +6743,12 @@ static uint64_t GetStringLengthH(const Value *V,
67416743

67426744
/// If we can compute the length of the string pointed to by
67436745
/// the specified pointer, return 'len+1'. If we can't, return 0.
6744-
uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
6746+
uint64_t llvm::getStringLength(const Value *V, unsigned CharWidth) {
67456747
if (!V->getType()->isPointerTy())
67466748
return 0;
67476749

67486750
SmallPtrSet<const PHINode*, 32> PHIs;
6749-
uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
6751+
uint64_t Len = ::getStringLength(V, PHIs, CharWidth);
67506752
// If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
67516753
// an empty string as a length.
67526754
return Len == ~0ULL ? 1 : Len;

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,8 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
158158
uint64_t Size = MemOpLength->getLimitedValue();
159159
assert(Size && "0-sized memory transferring should be removed already.");
160160

161-
if (Size > 8 || (Size&(Size-1)))
161+
uint64_t MemOpWidth = Size * DL.getByteWidth();
162+
if (MemOpWidth > 64 || (Size & (Size - 1)))
162163
return nullptr; // If not 1/2/4/8 bytes, exit.
163164

164165
// If it is an atomic and alignment is less than the size then we will
@@ -170,7 +171,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
170171
return nullptr;
171172

172173
// Use an integer load+store unless we can find something better.
173-
IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
174+
IntegerType *IntType = IntegerType::get(MI->getContext(), MemOpWidth);
174175

175176
// If the memcpy has metadata describing the members, see if we can get the
176177
// TBAA, scope and noalias tags describing our copy.
@@ -246,7 +247,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
246247
// Extract the length and alignment and fill if they are constant.
247248
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
248249
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
249-
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
250+
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(DL.getByteWidth()))
250251
return nullptr;
251252
const uint64_t Len = LenC->getLimitedValue();
252253
assert(Len && "0-sized memory setting should be removed already.");
@@ -260,12 +261,13 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
260261
return nullptr;
261262

262263
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
263-
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
264+
uint64_t MemOpWidth = Len * DL.getByteWidth();
265+
if (MemOpWidth <= 64 && isPowerOf2_32((uint32_t)Len)) {
264266
Value *Dest = MI->getDest();
265267

266268
// Extract the fill value and store.
267269
Constant *FillVal = ConstantInt::get(
268-
MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
270+
MI->getContext(), APInt::getSplat(MemOpWidth, FillC->getValue()));
269271
StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
270272
S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
271273
for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3252,8 +3252,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
32523252
if (MadeChange)
32533253
return &GEP;
32543254

3255-
// Canonicalize constant GEPs to i8 type.
3256-
if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
3255+
// Canonicalize constant GEPs to byte type.
3256+
if (!GEPEltType->isIntegerTy(DL.getByteWidth()) &&
3257+
GEP.hasAllConstantIndices()) {
32573258
APInt Offset(DL.getIndexTypeSizeInBits(GEPType), 0);
32583259
if (GEP.accumulateConstantOffset(DL, Offset))
32593260
return replaceInstUsesWith(

0 commit comments

Comments
 (0)