Skip to content

Commit 2d3167f

Browse files
committed
[SeparateConstOffsetFromGEP] Avoid miscompiles related to trunc nuw/nsw (#154582)
Drop poison generating flags on trunc when distributing trunc over add/sub/or. We need to do this since for example (add (trunc nuw A), (trunc nuw B)) is more poisonous than (trunc nuw (add A, B))). In some situations it is pessimistic to drop the flags. Such as if the add in the example above also has the nuw flag. For now we keep it simple and always drop the flags. Worth mentioning is that we drop the flags when cloning instructions and rebuilding the chain. This is done after the "allowsPreservingNUW" checks in ConstantOffsetExtractor::Extract. So we still take the "trunc nuw" into consideration when determining if nuw can be preserved in the gep (which should be ok since that check also require that all the involved binary operations has nuw). Fixes #154116
1 parent 4ff7ac2 commit 2d3167f

File tree

3 files changed

+12
-9
lines changed

3 files changed

+12
-9
lines changed

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,13 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) {
642642

643643
Instruction *Ext = I->clone();
644644
Ext->setOperand(0, Current);
645+
// In ConstantOffsetExtractor::find we do not analyze nuw/nsw for trunc, so
646+
// we assume that it is ok to redistribute trunc over add/sub/or. But for
647+
// example (add (trunc nuw A), (trunc nuw B)) is more poisonous than (trunc
648+
// nuw (add A, B))). To make such redistributions legal we drop all the
649+
// poison generating flags from cloned trunc instructions here.
650+
if (isa<TruncInst>(Ext))
651+
Ext->dropPoisonGeneratingFlags();
645652
Ext->insertBefore(*IP->getParent(), IP);
646653
Current = Ext;
647654
}

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ entry:
311311
define ptr @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(ptr %p, i128 %i) {
312312
; CHECK-LABEL: @nuw_inbounds_implies_nuw_inbounds_trunc_nuw(
313313
; CHECK-NEXT: entry:
314-
; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i128 [[I:%.*]] to i64
314+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[I:%.*]] to i64
315315
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[P:%.*]], i64 [[TMP0]]
316316
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
317317
; CHECK-NEXT: ret ptr [[ARRAYIDX2]]

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/rebuild-trunc.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=separate-const-offset-from-gep -S | FileCheck %s
33

4-
; FIXME: (add (trunc nuw A), (trunc nuw B)) is more poisonous than
5-
; (trunc nuw (add A, B))), so we should for example drop the
6-
; nuw on the trunc when doing the rewrite here.
4+
; Verify that we drop "nuw" from trunc.
75
define ptr @pr154116_nuw(ptr %p, i128 %i) {
86
; CHECK-LABEL: define ptr @pr154116_nuw(
97
; CHECK-SAME: ptr [[P:%.*]], i128 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
10-
; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw i128 [[I]] to i64
8+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[I]] to i64
119
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]]
1210
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP2]], i64 80
1311
; CHECK-NEXT: ret ptr [[ARRAYIDX2]]
@@ -18,13 +16,11 @@ define ptr @pr154116_nuw(ptr %p, i128 %i) {
1816
ret ptr %arrayidx
1917
}
2018

21-
; FIXME: (add (trunc nsw A), (trunc nsw B)) is more poisonous than
22-
; (trunc nsw (add A, B))), so we should for example drop the
23-
; nsw on the trunc when doing the rewrite here.
19+
; Verify that we drop "nsw" from trunc.
2420
define ptr @pr154116_nsw(ptr %p, i128 %i) {
2521
; CHECK-LABEL: define ptr @pr154116_nsw(
2622
; CHECK-SAME: ptr [[P:%.*]], i128 [[I:%.*]]) #[[ATTR0]] {
27-
; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw i128 [[I]] to i64
23+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[I]] to i64
2824
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]]
2925
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP2]], i64 4
3026
; CHECK-NEXT: ret ptr [[ARRAYIDX2]]

0 commit comments

Comments
 (0)