Skip to content

Commit e810d55

Browse files
author
serge-sans-paille
committed
[ValueTracking] Make getStringLenth aware of strdup
During strlen compile-time evaluation, make it possible to track size of strduped strings. Differential Revision: https://reviews.llvm.org/D123497
1 parent 0231a90 commit e810d55

File tree

5 files changed

+60
-26
lines changed

5 files changed

+60
-26
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,9 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
340340

341341
/// If we can compute the length of the string pointed to by the specified
342342
/// pointer, return 'len+1'. If we can't, return 0.
343-
uint64_t GetStringLength(const Value *V, unsigned CharSize = 8);
343+
uint64_t GetStringLength(const Value *V,
344+
const TargetLibraryInfo *TLI = nullptr,
345+
unsigned CharSize = 8);
344346

345347
/// This function returns call pointer argument that is considered the same by
346348
/// aliasing rules. You CAN'T use it to replace one value with another. If

llvm/lib/Analysis/MemoryBuiltins.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ llvm::getAllocSize(const CallBase *CB,
374374

375375
// Handle strdup-like functions separately.
376376
if (FnData->AllocTy == StrDupLike) {
377-
APInt Size(IntTyBits, GetStringLength(Mapper(CB->getArgOperand(0))));
377+
APInt Size(IntTyBits, GetStringLength(Mapper(CB->getArgOperand(0)), TLI));
378378
if (!Size)
379379
return None;
380380

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4199,7 +4199,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
41994199
/// If we can compute the length of the string pointed to by
42004200
/// the specified pointer, return 'len+1'. If we can't, return 0.
42014201
static uint64_t GetStringLengthH(const Value *V,
4202-
SmallPtrSetImpl<const PHINode*> &PHIs,
4202+
SmallPtrSetImpl<const PHINode *> &PHIs,
4203+
const TargetLibraryInfo *TLI,
42034204
unsigned CharSize) {
42044205
// Look through noop bitcast instructions.
42054206
V = V->stripPointerCasts();
@@ -4213,7 +4214,7 @@ static uint64_t GetStringLengthH(const Value *V,
42134214
// If it was new, see if all the input strings are the same length.
42144215
uint64_t LenSoFar = ~0ULL;
42154216
for (Value *IncValue : PN->incoming_values()) {
4216-
uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
4217+
uint64_t Len = GetStringLengthH(IncValue, PHIs, TLI, CharSize);
42174218
if (Len == 0) return 0; // Unknown length -> unknown.
42184219

42194220
if (Len == ~0ULL) continue;
@@ -4229,16 +4230,32 @@ static uint64_t GetStringLengthH(const Value *V,
42294230

42304231
// strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
42314232
if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
4232-
uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
4233+
uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, TLI, CharSize);
42334234
if (Len1 == 0) return 0;
4234-
uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
4235+
uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, TLI, CharSize);
42354236
if (Len2 == 0) return 0;
42364237
if (Len1 == ~0ULL) return Len2;
42374238
if (Len2 == ~0ULL) return Len1;
42384239
if (Len1 != Len2) return 0;
42394240
return Len1;
42404241
}
42414242

4243+
if (auto *CB = dyn_cast<CallBase>(V)) {
4244+
Function *Callee = CB->getCalledFunction();
4245+
if (!Callee)
4246+
return 0;
4247+
4248+
LibFunc TLIFn;
4249+
if (!TLI || !TLI->getLibFunc(*CB->getCalledFunction(), TLIFn) ||
4250+
!TLI->has(TLIFn))
4251+
return 0;
4252+
4253+
if (TLIFn == LibFunc_strdup || TLIFn == LibFunc_dunder_strdup)
4254+
return GetStringLengthH(CB->getArgOperand(0), PHIs, TLI, CharSize);
4255+
4256+
return 0;
4257+
}
4258+
42424259
// Otherwise, see if we can read the string.
42434260
ConstantDataArraySlice Slice;
42444261
if (!getConstantDataArrayInfo(V, Slice, CharSize))
@@ -4259,12 +4276,13 @@ static uint64_t GetStringLengthH(const Value *V,
42594276

42604277
/// If we can compute the length of the string pointed to by
42614278
/// the specified pointer, return 'len+1'. If we can't, return 0.
4262-
uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
4279+
uint64_t llvm::GetStringLength(const Value *V, const TargetLibraryInfo *TLI,
4280+
unsigned CharSize) {
42634281
if (!V->getType()->isPointerTy())
42644282
return 0;
42654283

42664284
SmallPtrSet<const PHINode*, 32> PHIs;
4267-
uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
4285+
uint64_t Len = GetStringLengthH(V, PHIs, TLI, CharSize);
42684286
// If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
42694287
// an empty string as a length.
42704288
return Len == ~0ULL ? 1 : Len;

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilderBase &B) {
212212
annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
213213

214214
// See if we can get the length of the input string.
215-
uint64_t Len = GetStringLength(Src);
215+
uint64_t Len = GetStringLength(Src, TLI);
216216
if (Len)
217217
annotateDereferenceableBytes(CI, 1, Len);
218218
else
@@ -269,7 +269,7 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
269269
}
270270

271271
// See if we can get the length of the input string.
272-
uint64_t SrcLen = GetStringLength(Src);
272+
uint64_t SrcLen = GetStringLength(Src, TLI);
273273
if (SrcLen) {
274274
annotateDereferenceableBytes(CI, 1, SrcLen);
275275
--SrcLen; // Unbias length.
@@ -300,7 +300,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
300300
// of the input string and turn this into memchr.
301301
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
302302
if (!CharC) {
303-
uint64_t Len = GetStringLength(SrcStr);
303+
uint64_t Len = GetStringLength(SrcStr, TLI);
304304
if (Len)
305305
annotateDereferenceableBytes(CI, 0, Len);
306306
else
@@ -387,10 +387,10 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
387387
CI->getType());
388388

389389
// strcmp(P, "x") -> memcmp(P, "x", 2)
390-
uint64_t Len1 = GetStringLength(Str1P);
390+
uint64_t Len1 = GetStringLength(Str1P, TLI);
391391
if (Len1)
392392
annotateDereferenceableBytes(CI, 0, Len1);
393-
uint64_t Len2 = GetStringLength(Str2P);
393+
uint64_t Len2 = GetStringLength(Str2P, TLI);
394394
if (Len2)
395395
annotateDereferenceableBytes(CI, 1, Len2);
396396

@@ -464,10 +464,10 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
464464
return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
465465
CI->getType());
466466

467-
uint64_t Len1 = GetStringLength(Str1P);
467+
uint64_t Len1 = GetStringLength(Str1P, TLI);
468468
if (Len1)
469469
annotateDereferenceableBytes(CI, 0, Len1);
470-
uint64_t Len2 = GetStringLength(Str2P);
470+
uint64_t Len2 = GetStringLength(Str2P, TLI);
471471
if (Len2)
472472
annotateDereferenceableBytes(CI, 1, Len2);
473473

@@ -496,7 +496,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
496496
Value *LibCallSimplifier::optimizeStrNDup(CallInst *CI, IRBuilderBase &B) {
497497
Value *Src = CI->getArgOperand(0);
498498
ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1));
499-
uint64_t SrcLen = GetStringLength(Src);
499+
uint64_t SrcLen = GetStringLength(Src, TLI);
500500
if (SrcLen && Size) {
501501
annotateDereferenceableBytes(CI, 0, SrcLen);
502502
if (SrcLen <= Size->getZExtValue() + 1)
@@ -513,7 +513,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
513513

514514
annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
515515
// See if we can get the length of the input string.
516-
uint64_t Len = GetStringLength(Src);
516+
uint64_t Len = GetStringLength(Src, TLI);
517517
if (Len)
518518
annotateDereferenceableBytes(CI, 1, Len);
519519
else
@@ -544,7 +544,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
544544
}
545545

546546
// See if we can get the length of the input string.
547-
uint64_t Len = GetStringLength(Src);
547+
uint64_t Len = GetStringLength(Src, TLI);
548548
if (Len)
549549
annotateDereferenceableBytes(CI, 1, Len);
550550
else
@@ -584,7 +584,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
584584
return Dst;
585585

586586
// See if we can get the length of the input string.
587-
uint64_t SrcLen = GetStringLength(Src);
587+
uint64_t SrcLen = GetStringLength(Src, TLI);
588588
if (SrcLen) {
589589
annotateDereferenceableBytes(CI, 1, SrcLen);
590590
--SrcLen; // Unbias length.
@@ -633,7 +633,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
633633
Value *Src = CI->getArgOperand(0);
634634

635635
// Constant folding: strlen("xyz") -> 3
636-
if (uint64_t Len = GetStringLength(Src, CharSize))
636+
if (uint64_t Len = GetStringLength(Src, TLI, CharSize))
637637
return ConstantInt::get(CI->getType(), Len - 1);
638638

639639
// If s is a constant pointer pointing to a string literal, we can fold
@@ -688,8 +688,8 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
688688

689689
// strlen(x?"foo":"bars") --> x ? 3 : 4
690690
if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
691-
uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize);
692-
uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize);
691+
uint64_t LenTrue = GetStringLength(SI->getTrueValue(), TLI, CharSize);
692+
uint64_t LenFalse = GetStringLength(SI->getFalseValue(), TLI, CharSize);
693693
if (LenTrue && LenFalse) {
694694
ORE.emit([&]() {
695695
return OptimizationRemark("instcombine", "simplify-libcalls", CI)
@@ -2511,7 +2511,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
25112511
// sprintf(dest, "%s", str) -> strcpy(dest, str)
25122512
return copyFlags(*CI, emitStrCpy(Dest, CI->getArgOperand(2), B, TLI));
25132513

2514-
uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
2514+
uint64_t SrcLen = GetStringLength(CI->getArgOperand(2), TLI);
25152515
if (SrcLen) {
25162516
B.CreateMemCpy(
25172517
Dest, Align(1), CI->getArgOperand(2), Align(1),
@@ -2803,7 +2803,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
28032803
return nullptr;
28042804

28052805
// fputs(s,F) --> fwrite(s,strlen(s),1,F)
2806-
uint64_t Len = GetStringLength(CI->getArgOperand(0));
2806+
uint64_t Len = GetStringLength(CI->getArgOperand(0), TLI);
28072807
if (!Len)
28082808
return nullptr;
28092809

@@ -3247,7 +3247,7 @@ FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
32473247
if (OnlyLowerUnknownSize)
32483248
return false;
32493249
if (StrOp) {
3250-
uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
3250+
uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp), TLI);
32513251
// If the length is 0 we don't know how long it is and so we can't
32523252
// remove the check.
32533253
if (Len)
@@ -3351,7 +3351,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
33513351
return nullptr;
33523352

33533353
// Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
3354-
uint64_t Len = GetStringLength(Src);
3354+
uint64_t Len = GetStringLength(Src, TLI);
33553355
if (Len)
33563356
annotateDereferenceableBytes(CI, 1, Len);
33573357
else

llvm/test/Transforms/InstCombine/strlen-1.ll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
1414
@null_hello_mid = constant [13 x i8] c"hello wor\00ld\00"
1515

1616
declare i32 @strlen(i8*)
17+
declare noalias i8* @strdup(i8*)
1718

1819
; Check strlen(string constant) -> integer constant.
1920

@@ -280,4 +281,17 @@ define i1 @strlen0_after_write_to_second_byte(i8 *%ptr) {
280281
ret i1 %cmp
281282
}
282283

284+
; Check strlen(strdup(string constant)) -> integer constant.
285+
286+
define i32 @test_simplify_strduped_constant() {
287+
; CHECK-LABEL: @test_simplify_strduped_constant(
288+
; CHECK-NEXT: ret i32 5
289+
;
290+
%hello_p = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
291+
%hello_s = call i8* @strdup(i8* %hello_p)
292+
%hello_l = call i32 @strlen(i8* %hello_s)
293+
ret i32 %hello_l
294+
}
295+
296+
283297
attributes #0 = { null_pointer_is_valid }

0 commit comments

Comments
 (0)