Skip to content

Commit ec6cc8c

Browse files
committed
Split off NUW change, fix error test, add braces
1 parent 1cb9bb3 commit ec6cc8c

File tree

4 files changed

+26
-20
lines changed

4 files changed

+26
-20
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -728,10 +728,11 @@ Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(Type *T) {
728728
if (!DL.typeSizeEqualsStoreSize(T))
729729
T = IRB.getIntNTy(Size.getFixedValue());
730730
Type *ElemTy = T->getScalarType();
731-
if (isa<PointerType, ScalableVectorType>(ElemTy))
731+
if (isa<PointerType, ScalableVectorType>(ElemTy)) {
732732
// Pointers are always big enough, and we'll let scalable vectors through to
733733
// fail in codegen.
734734
return T;
735+
}
735736
unsigned ElemSize = DL.getTypeSizeInBits(ElemTy).getFixedValue();
736737
if (isPowerOf2_32(ElemSize) && ElemSize >= 16 && ElemSize <= 128) {
737738
// [vectors of] anything that's 16/32/64/128 bits can be cast and split into
@@ -1809,7 +1810,7 @@ PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
18091810

18101811
auto [Rsrc, Off] = getPtrParts(Ptr);
18111812
const DataLayout &DL = GEP.getDataLayout();
1812-
bool IsNUW = GEP.hasNoUnsignedWrap();
1813+
bool InBounds = GEP.isInBounds();
18131814

18141815
// In order to call emitGEPOffset() and thus not have to reimplement it,
18151816
// we need the GEP result to have ptr addrspace(7) type.
@@ -1824,12 +1825,16 @@ PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
18241825
return {Rsrc, Off};
18251826
}
18261827

1828+
bool HasNonNegativeOff = false;
1829+
if (auto *CI = dyn_cast<ConstantInt>(OffAccum)) {
1830+
HasNonNegativeOff = !CI->isNegative();
1831+
}
18271832
Value *NewOff;
18281833
if (match(Off, m_Zero())) {
18291834
NewOff = OffAccum;
18301835
} else {
18311836
NewOff = IRB.CreateAdd(Off, OffAccum, "",
1832-
/*hasNUW=*/IsNUW,
1837+
/*hasNUW=*/InBounds && HasNonNegativeOff,
18331838
/*hasNSW=*/false);
18341839
}
18351840
copyMetadata(NewOff, &GEP);

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.nxv2i32.fail.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
; Note: The exact error messages aren't important here, but are included to catch
22
; anything changing.
3-
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s 2>&1 \
4-
; RUN: | grep "LLVM ERROR: Scalarization of scalable vectors is not supported."
5-
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s 2>&1 \
6-
; RUN: | grep "LLVM ERROR: Invalid size request on a scalable vector."
3+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -filetype=null < %s 2>&1 | FileCheck %s --check-prefix=SDAG
4+
; SDAG: LLVM ERROR: Scalarization of scalable vectors is not supported.
5+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -filetype=null < %s 2>&1 | FileCheck %s --check-prefix=GISEL
6+
; GISEL: LLVM ERROR: Invalid size request on a scalable vector.
7+
78
define void @buffer_store_nxv2i32(ptr addrspace(8) inreg %rsrc, i32 %offset) {
89
call void @llvm.amdgcn.raw.ptr.buffer.store.nxv2i32(<vscale x 2 x i32> poison, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
910
ret void

llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-calls.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,12 @@ define void @caller(ptr addrspace(7) noundef nonnull %arg) {
9191
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i160 [[V_INT_RSRC]], 32
9292
; CHECK-NEXT: [[V_INT_OFF:%.*]] = zext i32 [[V_OFF]] to i160
9393
; CHECK-NEXT: [[V_INT:%.*]] = or i160 [[TMP1]], [[V_INT_OFF]]
94-
; CHECK-NEXT: [[V_INT_CAST:%.*]] = bitcast i160 [[V_INT]] to <5 x i32>
95-
; CHECK-NEXT: [[V_INT_CAST_SLICE_0:%.*]] = shufflevector <5 x i32> [[V_INT_CAST]], <5 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
96-
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[V_INT_CAST_SLICE_0]], ptr addrspace(8) align 32 [[ARG_RSRC]], i32 [[ARG_OFF]], i32 0, i32 0)
97-
; CHECK-NEXT: [[ARG_PART_4:%.*]] = add nuw i32 [[ARG_OFF]], 16
98-
; CHECK-NEXT: [[V_INT_CAST_SLICE_4:%.*]] = extractelement <5 x i32> [[V_INT_CAST]], i64 4
99-
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[V_INT_CAST_SLICE_4]], ptr addrspace(8) align 16 [[ARG_RSRC]], i32 [[ARG_PART_4]], i32 0, i32 0)
94+
; CHECK-NEXT: [[V_INT_LEGAL:%.*]] = bitcast i160 [[V_INT]] to <5 x i32>
95+
; CHECK-NEXT: [[V_INT_SLICE_0:%.*]] = shufflevector <5 x i32> [[V_INT_LEGAL]], <5 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
96+
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[V_INT_SLICE_0]], ptr addrspace(8) align 32 [[ARG_RSRC]], i32 [[ARG_OFF]], i32 0, i32 0)
97+
; CHECK-NEXT: [[ARG_PART_4:%.*]] = add i32 [[ARG_OFF]], 16
98+
; CHECK-NEXT: [[V_INT_SLICE_4:%.*]] = extractelement <5 x i32> [[V_INT_LEGAL]], i64 4
99+
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[V_INT_SLICE_4]], ptr addrspace(8) align 16 [[ARG_RSRC]], i32 [[ARG_PART_4]], i32 0, i32 0)
100100
; CHECK-NEXT: ret void
101101
;
102102
%v = call ptr addrspace(7) @extern(ptr addrspace(7) %arg)
@@ -109,7 +109,7 @@ define internal noalias noundef nonnull ptr addrspace(7) @foo(ptr addrspace(7) n
109109
; CHECK-SAME: ({ ptr addrspace(8), i32 } noundef [[ARG:%.*]]) #[[ATTR0]] {
110110
; CHECK-NEXT: [[ARG_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 0
111111
; CHECK-NEXT: [[ARG_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 1
112-
; CHECK-NEXT: [[RET:%.*]] = add i32 [[ARG_OFF]], 4
112+
; CHECK-NEXT: [[RET:%.*]] = add nuw i32 [[ARG_OFF]], 4
113113
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[ARG_RSRC]], 0
114114
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1
115115
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]]

llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ define float @debug_stash_pointer(ptr addrspace(8) %buf, i32 %idx, ptr addrspace
5454
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i160 [[BUF_PTR_4_PTR_INT_RSRC]], 32, !dbg [[DBG33]]
5555
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_OFF:%.*]] = zext i32 [[BUF_PTR_4_PTR_OFF]] to i160, !dbg [[DBG33]]
5656
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT:%.*]] = or i160 [[TMP10]], [[BUF_PTR_4_PTR_INT_OFF]], !dbg [[DBG33]]
57-
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_CAST:%.*]] = bitcast i160 [[BUF_PTR_4_PTR_INT]] to <5 x i32>, !dbg [[DBG33]]
58-
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_CAST_SLICE_0:%.*]] = shufflevector <5 x i32> [[BUF_PTR_4_PTR_INT_CAST]], <5 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>, !dbg [[DBG33]]
59-
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[BUF_PTR_4_PTR_INT_CAST_SLICE_0]], ptr addrspace(8) align 32 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_OFF]], i32 0, i32 0), !dbg [[DBG33]]
60-
; CHECK-NEXT: [[AUX_PTR_2_PTR_PART_4:%.*]] = add nuw i32 [[AUX_PTR_2_PTR_OFF]], 16, !dbg [[DBG33]]
61-
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_CAST_SLICE_4:%.*]] = extractelement <5 x i32> [[BUF_PTR_4_PTR_INT_CAST]], i64 4, !dbg [[DBG33]]
62-
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[BUF_PTR_4_PTR_INT_CAST_SLICE_4]], ptr addrspace(8) align 16 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_PART_4]], i32 0, i32 0), !dbg [[DBG33]]
57+
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_LEGAL:%.*]] = bitcast i160 [[BUF_PTR_4_PTR_INT]] to <5 x i32>, !dbg [[DBG33]]
58+
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_SLICE_0:%.*]] = shufflevector <5 x i32> [[BUF_PTR_4_PTR_INT_LEGAL]], <5 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>, !dbg [[DBG33]]
59+
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[BUF_PTR_4_PTR_INT_SLICE_0]], ptr addrspace(8) align 32 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_OFF]], i32 0, i32 0), !dbg [[DBG33]]
60+
; CHECK-NEXT: [[AUX_PTR_2_PTR_PART_4:%.*]] = add i32 [[AUX_PTR_2_PTR_OFF]], 16, !dbg [[DBG33]]
61+
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_SLICE_4:%.*]] = extractelement <5 x i32> [[BUF_PTR_4_PTR_INT_LEGAL]], i64 4, !dbg [[DBG33]]
62+
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[BUF_PTR_4_PTR_INT_SLICE_4]], ptr addrspace(8) align 16 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_PART_4]], i32 0, i32 0), !dbg [[DBG33]]
6363
; CHECK-NEXT: ret float [[RET]], !dbg [[DBG34:![0-9]+]]
6464
;
6565
%buf.ptr.var = alloca ptr addrspace(7), align 32, addrspace(5), !dbg !20

0 commit comments

Comments
 (0)