Skip to content

Commit 14a6115

Browse files
phoebewangtru
authored andcommitted
[X86][FP16] Fix vector_shuffle and lowering without f16c feature problems
The problem Alexander reported on D127982 was caused by an optimization for AVX512-FP16 instruction. We must limit it to the feature enabled only. During the investigation, I found we didn't expand for fp_round/fp_extend without F16C. This may result runtime crash, so change them too. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D130817 (cherry picked from commit 23021d4)
1 parent 9c4cab0 commit 14a6115

File tree

3 files changed

+4136
-473
lines changed

3 files changed

+4136
-473
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,6 +1362,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
13621362

13631363
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom);
13641364
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom);
1365+
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Expand);
1366+
setOperationAction(ISD::FP_ROUND, MVT::v8f16, Expand);
13651367
setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
13661368
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom);
13671369

@@ -16120,16 +16122,18 @@ static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1612016122
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
1612116123
int NumV2Elements = count_if(Mask, [](int M) { return M >= 8; });
1612216124

16123-
if (NumV2Elements == 0) {
16124-
// Check for being able to broadcast a single element.
16125-
if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f16, V1, V2,
16126-
Mask, Subtarget, DAG))
16127-
return Broadcast;
16125+
if (Subtarget.hasFP16()) {
16126+
if (NumV2Elements == 0) {
16127+
// Check for being able to broadcast a single element.
16128+
if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f16, V1, V2,
16129+
Mask, Subtarget, DAG))
16130+
return Broadcast;
16131+
}
16132+
if (NumV2Elements == 1 && Mask[0] >= 8)
16133+
if (SDValue V = lowerShuffleAsElementInsertion(
16134+
DL, MVT::v8f16, V1, V2, Mask, Zeroable, Subtarget, DAG))
16135+
return V;
1612816136
}
16129-
if (NumV2Elements == 1 && Mask[0] >= 8)
16130-
if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v8f16, V1, V2, Mask,
16131-
Zeroable, Subtarget, DAG))
16132-
return V;
1613316137

1613416138
V1 = DAG.getBitcast(MVT::v8i16, V1);
1613516139
V2 = DAG.getBitcast(MVT::v8i16, V2);

llvm/test/CodeGen/X86/half.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,4 +1297,30 @@ define <8 x half> @select(i1 %c, <8 x half> %x, <8 x half> %y) {
12971297
ret <8 x half> %s
12981298
}
12991299

1300+
define <8 x half> @shuffle(ptr %p) {
1301+
; CHECK-LIBCALL-LABEL: shuffle:
1302+
; CHECK-LIBCALL: # %bb.0:
1303+
; CHECK-LIBCALL-NEXT: movdqu (%rdi), %xmm0
1304+
; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1305+
; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1306+
; CHECK-LIBCALL-NEXT: retq
1307+
;
1308+
; BWON-F16C-LABEL: shuffle:
1309+
; BWON-F16C: # %bb.0:
1310+
; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,4,4,4,4]
1311+
; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1312+
; BWON-F16C-NEXT: retq
1313+
;
1314+
; CHECK-I686-LABEL: shuffle:
1315+
; CHECK-I686: # %bb.0:
1316+
; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax
1317+
; CHECK-I686-NEXT: movdqu (%eax), %xmm0
1318+
; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1319+
; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1320+
; CHECK-I686-NEXT: retl
1321+
%1 = load <8 x half>, ptr %p, align 8
1322+
%2 = shufflevector <8 x half> %1, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1323+
ret <8 x half> %2
1324+
}
1325+
13001326
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)