Skip to content

Commit 2ec7739

Browse files
topperczmodem
authored andcommitted
[FastISel] Bail out of selectGetElementPtr for vector GEPs.
The code that decomposes the GEP into ADD/MUL doesn't work properly for vector GEPs. It can create bad COPY instructions or possibly assert. For now just bail out to SelectionDAG. Fixes PR45906 (cherry picked from commit 4208ea3)
1 parent d754173 commit 2ec7739

File tree

2 files changed

+54
-0
lines changed

2 files changed

+54
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/FastISel.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,12 @@ bool FastISel::selectGetElementPtr(const User *I) {
690690
Register N = getRegForValue(I->getOperand(0));
691691
if (!N) // Unhandled operand. Halt "fast" selection and bail.
692692
return false;
693+
694+
// FIXME: The code below does not handle vector GEPs. Halt "fast" selection
695+
// and bail.
696+
if (isa<VectorType>(I->getType()))
697+
return false;
698+
693699
bool NIsKill = hasTrivialKill(I->getOperand(0));
694700

695701
// Keep a running tab of the total offset to coalesce multiple N = N + Offset

llvm/test/CodeGen/X86/masked_gather_scatter.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3319,3 +3319,51 @@ define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i3
33193319
call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %gep, i32 4, <16 x i1> %mask)
33203320
ret void
33213321
}
3322+
3323+
%struct.foo = type { i8*, i64, i16, i16, i32 }
3324+
3325+
; This used to cause fast-isel to generate bad copy instructions that would
3326+
; cause an error in copyPhysReg.
3327+
define <8 x i64> @pr45906(<8 x %struct.foo*> %ptr) {
3328+
; KNL_64-LABEL: pr45906:
3329+
; KNL_64: # %bb.0: # %bb
3330+
; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
3331+
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
3332+
; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1}
3333+
; KNL_64-NEXT: retq
3334+
;
3335+
; KNL_32-LABEL: pr45906:
3336+
; KNL_32: # %bb.0: # %bb
3337+
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
3338+
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm1
3339+
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
3340+
; KNL_32-NEXT: vpgatherdq (,%ymm1), %zmm0 {%k1}
3341+
; KNL_32-NEXT: retl
3342+
;
3343+
; SKX_SMALL-LABEL: pr45906:
3344+
; SKX_SMALL: # %bb.0: # %bb
3345+
; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
3346+
; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
3347+
; SKX_SMALL-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1}
3348+
; SKX_SMALL-NEXT: retq
3349+
;
3350+
; SKX_LARGE-LABEL: pr45906:
3351+
; SKX_LARGE: # %bb.0: # %bb
3352+
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
3353+
; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
3354+
; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
3355+
; SKX_LARGE-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1}
3356+
; SKX_LARGE-NEXT: retq
3357+
;
3358+
; SKX_32-LABEL: pr45906:
3359+
; SKX_32: # %bb.0: # %bb
3360+
; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
3361+
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
3362+
; SKX_32-NEXT: vpgatherdq (,%ymm1), %zmm0 {%k1}
3363+
; SKX_32-NEXT: retl
3364+
bb:
3365+
%tmp = getelementptr inbounds %struct.foo, <8 x %struct.foo*> %ptr, i64 0, i32 1
3366+
%tmp1 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> %tmp, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> undef)
3367+
ret <8 x i64> %tmp1
3368+
}
3369+
declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)

0 commit comments

Comments
 (0)