Skip to content

Commit ec78750

Browse files
authored
Revert "[AMDGPU] Enable i8 GEP promotion for vector allocas" (#171087)
Reverts #166132 Broke libc on GPU tests. https://lab.llvm.org/buildbot/#/builders/10/builds/18635
1 parent 95470b6 commit ec78750

File tree

2 files changed

+3
-162
lines changed

2 files changed

+3
-162
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -457,25 +457,10 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
457457
const auto &VarOffset = VarOffsets.front();
458458
APInt OffsetQuot;
459459
APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
460-
Value *Offset = VarOffset.first;
461-
if (Rem != 0) {
462-
unsigned ElemSizeShift = Log2_64(VecElemSize);
463-
SimplifyQuery SQ(DL);
464-
SQ.CxtI = GEP;
465-
KnownBits KB = computeKnownBits(VarOffset.first, SQ);
466-
// Bail out if the index may point into the middle of an element.
467-
if (KB.countMinTrailingZeros() < ElemSizeShift)
468-
return nullptr;
469-
470-
Value *Scaled = Builder.CreateLShr(VarOffset.first, ElemSizeShift);
471-
if (Instruction *NewInst = dyn_cast<Instruction>(Scaled))
472-
NewInsts.push_back(NewInst);
473-
474-
Offset = Scaled;
475-
OffsetQuot = APInt(BW, 1);
476-
Rem = 0;
477-
}
460+
if (Rem != 0 || OffsetQuot.isZero())
461+
return nullptr;
478462

463+
Value *Offset = VarOffset.first;
479464
if (!isa<IntegerType>(Offset->getType()))
480465
return nullptr;
481466

llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll

Lines changed: 0 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -250,150 +250,6 @@ bb2:
250250
store i32 0, ptr addrspace(5) %extractelement
251251
ret void
252252
}
253-
254-
define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(ptr %buffer, float %data, i1 %idx_sel) {
255-
; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(
256-
; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
257-
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <3 x float> poison
258-
; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
259-
; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 0, i32 4
260-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[INDEX]], 2
261-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float [[DATA]], i32 [[TMP1]]
262-
; CHECK-NEXT: store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
263-
; CHECK-NEXT: ret void
264-
;
265-
%alloca = alloca <3 x float>, align 16, addrspace(5)
266-
%vec = load <3 x float>, ptr %buffer
267-
store <3 x float> %vec, ptr addrspace(5) %alloca
268-
%index = select i1 %idx_sel, i32 0, i32 4
269-
%elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
270-
store float %data, ptr addrspace(5) %elt, align 4
271-
%updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
272-
store <3 x float> %updated, ptr %buffer, align 16
273-
ret void
274-
}
275-
276-
define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(ptr %buffer, float %data, i1 %idx_sel) {
277-
; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(
278-
; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
279-
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <3 x float> poison
280-
; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
281-
; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 8
282-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[INDEX]], 2
283-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float [[DATA]], i32 [[TMP1]]
284-
; CHECK-NEXT: store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
285-
; CHECK-NEXT: ret void
286-
;
287-
%alloca = alloca <3 x float>, align 16, addrspace(5)
288-
%vec = load <3 x float>, ptr %buffer
289-
store <3 x float> %vec, ptr addrspace(5) %alloca
290-
%index = select i1 %idx_sel, i32 4, i32 8
291-
%elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
292-
store float %data, ptr addrspace(5) %elt, align 4
293-
%updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
294-
store <3 x float> %updated, ptr %buffer, align 16
295-
ret void
296-
}
297-
298-
define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_8(ptr %buffer, float %data, i1 %idx_sel) {
299-
; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_8(
300-
; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
301-
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <8 x float> poison
302-
; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
303-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x float> [[VEC]], i64 0
304-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[ALLOCA]], float [[TMP1]], i32 0
305-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x float> [[VEC]], i64 1
306-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP2]], float [[TMP3]], i32 1
307-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x float> [[VEC]], i64 2
308-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP4]], float [[TMP5]], i32 2
309-
; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 8
310-
; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[INDEX]], 2
311-
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP6]], float [[DATA]], i32 [[TMP7]]
312-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP8]], i32 0
313-
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x float> poison, float [[TMP9]], i64 0
314-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x float> [[TMP8]], i32 1
315-
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <3 x float> [[TMP10]], float [[TMP11]], i64 1
316-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP8]], i32 2
317-
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <3 x float> [[TMP12]], float [[TMP13]], i64 2
318-
; CHECK-NEXT: store <3 x float> [[TMP14]], ptr [[BUFFER]], align 16
319-
; CHECK-NEXT: ret void
320-
;
321-
%alloca = alloca [2 x <3 x float>], align 16, addrspace(5)
322-
%row = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) %alloca, i32 0, i32 0
323-
%vec = load <3 x float>, ptr %buffer
324-
store <3 x float> %vec, ptr addrspace(5) %row, align 16
325-
%index = select i1 %idx_sel, i32 4, i32 8
326-
%elt = getelementptr inbounds nuw i8, ptr addrspace(5) %row, i32 %index
327-
store float %data, ptr addrspace(5) %elt, align 4
328-
%updated = load <3 x float>, ptr addrspace(5) %row, align 16
329-
store <3 x float> %updated, ptr %buffer, align 16
330-
ret void
331-
}
332-
333-
define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_5_no_promote(ptr %buffer, float %data, i1 %idx_sel) {
334-
; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_5_no_promote(
335-
; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
336-
; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
337-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
338-
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4, !invariant.load [[META0]]
339-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2
340-
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[TMP4]], align 4, !range [[RNG1]], !invariant.load [[META0]]
341-
; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 16
342-
; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
343-
; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
344-
; CHECK-NEXT: [[TMP9:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
345-
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]]
346-
; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]]
347-
; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]]
348-
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]]
349-
; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]]
350-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x <3 x float>], ptr addrspace(3) @scalar_alloca_vector_gep_i8_4_or_5_no_promote.alloca, i32 0, i32 [[TMP14]]
351-
; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
352-
; CHECK-NEXT: store <3 x float> [[VEC]], ptr addrspace(3) [[TMP15]], align 16
353-
; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 5
354-
; CHECK-NEXT: [[ELT:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[TMP15]], i32 [[INDEX]]
355-
; CHECK-NEXT: store float [[DATA]], ptr addrspace(3) [[ELT]], align 4
356-
; CHECK-NEXT: [[UPDATED:%.*]] = load <3 x float>, ptr addrspace(3) [[TMP15]], align 16
357-
; CHECK-NEXT: store <3 x float> [[UPDATED]], ptr [[BUFFER]], align 16
358-
; CHECK-NEXT: ret void
359-
;
360-
%alloca = alloca <3 x float>, align 16, addrspace(5)
361-
%vec = load <3 x float>, ptr %buffer
362-
store <3 x float> %vec, ptr addrspace(5) %alloca
363-
%index = select i1 %idx_sel, i32 4, i32 5
364-
%elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
365-
store float %data, ptr addrspace(5) %elt, align 4
366-
%updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
367-
store <3 x float> %updated, ptr %buffer, align 16
368-
ret void
369-
}
370-
371-
define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_5_no_promote(ptr %buffer, float %data, i1 %idx_sel) {
372-
; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_5_no_promote(
373-
; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
374-
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x <3 x float>], align 16, addrspace(5)
375-
; CHECK-NEXT: [[ROW:%.*]] = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0
376-
; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
377-
; CHECK-NEXT: store <3 x float> [[VEC]], ptr addrspace(5) [[ROW]], align 16
378-
; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 5
379-
; CHECK-NEXT: [[ELT:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[ROW]], i32 [[INDEX]]
380-
; CHECK-NEXT: store float [[DATA]], ptr addrspace(5) [[ELT]], align 4
381-
; CHECK-NEXT: [[TMP14:%.*]] = load <3 x float>, ptr addrspace(5) [[ROW]], align 16
382-
; CHECK-NEXT: store <3 x float> [[TMP14]], ptr [[BUFFER]], align 16
383-
; CHECK-NEXT: ret void
384-
;
385-
%alloca = alloca [2 x <3 x float>], align 16, addrspace(5)
386-
%row = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) %alloca, i32 0, i32 0
387-
%vec = load <3 x float>, ptr %buffer
388-
store <3 x float> %vec, ptr addrspace(5) %row, align 16
389-
%index = select i1 %idx_sel, i32 4, i32 5
390-
%elt = getelementptr inbounds nuw i8, ptr addrspace(5) %row, i32 %index
391-
store float %data, ptr addrspace(5) %elt, align 4
392-
%updated = load <3 x float>, ptr addrspace(5) %row, align 16
393-
store <3 x float> %updated, ptr %buffer, align 16
394-
ret void
395-
}
396-
397253
;.
398254
; CHECK: [[META0]] = !{}
399255
; CHECK: [[RNG1]] = !{i32 0, i32 1025}

0 commit comments

Comments
 (0)