Skip to content

Commit c7c6c0a

Browse files
authored
[AggressiveInstCombine] Fix memory location for alias analysis (#169953)
When LOps.RootInsert comes after LI2, since we use LI2 as the new insert point, we should make sure the memory region accessed by LOps isn't modified. However, the original implementation passes the bit width `LOps.LoadSize` as the number of bytes to be accessed, causing BasicAA to return NoAlias: https://github.com/llvm/llvm-project/blob/a941e150749650e6a75e948f10d46b0bedcc128b/llvm/lib/Analysis/BasicAliasAnalysis.cpp#L1658-L1667 With `-aa-trace`, we get: ``` End ptr getelementptr inbounds nuw (i8, ptr @g, i64 4) @ LocationSize::precise(1), %gep1 = getelementptr i8, ptr %p, i64 4 @ LocationSize::precise(32) = NoAlias ``` This patch uses `getTypeStoreSize` to compute the correct access size for LOps. Instead of modifying the MemoryLocation for End (i.e., `LOps.RootInsert`), it also uses the computed base and AATag for correctness. Closes #169921.
1 parent 97e0573 commit c7c6c0a

File tree

2 files changed

+63
-2
lines changed

2 files changed

+63
-2
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -710,9 +710,17 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
710710
MemoryLocation Loc;
711711
if (!Start->comesBefore(End)) {
712712
std::swap(Start, End);
713-
Loc = MemoryLocation::get(End);
713+
// If LOps.RootInsert comes after LI2, since we use LI2 as the new insert
714+
// point, we should make sure whether the memory region accessed by LOps
715+
// isn't modified.
714716
if (LOps.FoundRoot)
715-
Loc = Loc.getWithNewSize(LOps.LoadSize);
717+
Loc = MemoryLocation(
718+
LOps.Root->getPointerOperand(),
719+
LocationSize::precise(DL.getTypeStoreSize(
720+
IntegerType::get(LI1->getContext(), LOps.LoadSize))),
721+
LOps.AATags);
722+
else
723+
Loc = MemoryLocation::get(End);
716724
} else
717725
Loc = MemoryLocation::get(End);
718726
unsigned NumScanned = 0;

llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2505,3 +2505,56 @@ entry:
25052505
%or = or disjoint i32 %shl, %conv.2
25062506
ret i32 %or
25072507
}
2508+
2509+
@g = global i64 1060856922120
2510+
2511+
; Make sure we use the correct memory location for alias analysis.
2512+
define i64 @loadcombine_consecutive_mayalias(ptr %p) {
2513+
; LE-LABEL: @loadcombine_consecutive_mayalias(
2514+
; LE-NEXT: entry:
2515+
; LE-NEXT: [[LOAD3:%.*]] = load i32, ptr [[P:%.*]], align 4
2516+
; LE-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 4
2517+
; LE-NEXT: store i8 0, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 4
2518+
; LE-NEXT: [[LOAD2:%.*]] = load i32, ptr [[GEP1]], align 4
2519+
; LE-NEXT: [[TMP0:%.*]] = zext i32 [[LOAD2]] to i64
2520+
; LE-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 32
2521+
; LE-NEXT: [[ZEXT3:%.*]] = zext i32 [[LOAD3]] to i64
2522+
; LE-NEXT: [[LOAD1:%.*]] = or i64 [[TMP1]], [[ZEXT3]]
2523+
; LE-NEXT: [[RES:%.*]] = lshr i64 [[LOAD1]], 32
2524+
; LE-NEXT: ret i64 [[RES]]
2525+
;
2526+
; BE-LABEL: @loadcombine_consecutive_mayalias(
2527+
; BE-NEXT: entry:
2528+
; BE-NEXT: [[LOAD1:%.*]] = load i32, ptr [[P:%.*]], align 4
2529+
; BE-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 4
2530+
; BE-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[P]], i64 5
2531+
; BE-NEXT: store i8 0, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 4
2532+
; BE-NEXT: [[LOAD2:%.*]] = load i8, ptr [[GEP1]], align 4
2533+
; BE-NEXT: [[LOAD3:%.*]] = load i24, ptr [[GEP2]], align 1
2534+
; BE-NEXT: [[ZEXT1:%.*]] = zext i24 [[LOAD3]] to i64
2535+
; BE-NEXT: [[SHL1:%.*]] = shl i64 [[ZEXT1]], 40
2536+
; BE-NEXT: [[ZEXT2:%.*]] = zext i8 [[LOAD2]] to i64
2537+
; BE-NEXT: [[SHL2:%.*]] = shl i64 [[ZEXT2]], 32
2538+
; BE-NEXT: [[OR1:%.*]] = or i64 [[SHL1]], [[SHL2]]
2539+
; BE-NEXT: [[ZEXT3:%.*]] = zext i32 [[LOAD1]] to i64
2540+
; BE-NEXT: [[OR2:%.*]] = or i64 [[OR1]], [[ZEXT3]]
2541+
; BE-NEXT: [[RES:%.*]] = lshr i64 [[OR2]], 32
2542+
; BE-NEXT: ret i64 [[RES]]
2543+
;
2544+
entry:
2545+
%load1 = load i32, ptr %p, align 4
2546+
%gep1 = getelementptr i8, ptr %p, i64 4
2547+
%gep2 = getelementptr i8, ptr %p, i64 5
2548+
store i8 0, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 4
2549+
%load2 = load i8, ptr %gep1, align 4
2550+
%load3 = load i24, ptr %gep2, align 1
2551+
%zext1 = zext i24 %load3 to i64
2552+
%shl1 = shl i64 %zext1, 40
2553+
%zext2 = zext i8 %load2 to i64
2554+
%shl2 = shl i64 %zext2, 32
2555+
%or1 = or i64 %shl1, %shl2
2556+
%zext3 = zext i32 %load1 to i64
2557+
%or2 = or i64 %or1, %zext3
2558+
%res = lshr i64 %or2, 32
2559+
ret i64 %res
2560+
}

0 commit comments

Comments
 (0)