Skip to content

Commit 56b2be4

Browse files
committed
[X86] Fold scalar_to_vector(funnel(x,y,imm)) -> funnel(scalar_to_vector(x),scalar_to_vector(y),imm)
Limit this to cases where x, y are known to be extracted from a vector. Addresses poor x86 codegen on #107289
1 parent 13013bd commit 56b2be4

File tree

2 files changed

+23
-37
lines changed

2 files changed

+23
-37
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57500,6 +57500,24 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG,
5750057500
}
5750157501
}
5750257502
break;
57503+
case ISD::FSHL:
57504+
case ISD::FSHR:
57505+
if (auto *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(2))) {
57506+
if (supportedVectorShiftWithImm(VT, Subtarget, ISD::SHL) &&
57507+
Src.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
57508+
Src.getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
57509+
Src.hasOneUse()) {
57510+
uint64_t AmtVal =
57511+
Amt->getAPIntValue().urem(Src.getScalarValueSizeInBits());
57512+
SDValue SrcVec0 =
57513+
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Src.getOperand(0));
57514+
SDValue SrcVec1 =
57515+
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Src.getOperand(1));
57516+
return DAG.getNode(Src.getOpcode(), DL, VT, SrcVec0, SrcVec1,
57517+
DAG.getConstant(AmtVal, DL, VT));
57518+
}
57519+
}
57520+
break;
5750357521
}
5750457522

5750557523
return SDValue();

llvm/test/CodeGen/X86/vector-shuffle-combining.ll

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3534,46 +3534,14 @@ define <4 x i32> @PR63700(i128 %0) {
35343534
}
35353535

35363536
define <16 x i8> @PR107289(<16 x i8> %0) {
3537-
; SSE2-LABEL: PR107289:
3538-
; SSE2: # %bb.0:
3539-
; SSE2-NEXT: movq %xmm0, %rax
3540-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3541-
; SSE2-NEXT: movq %xmm1, %rcx
3542-
; SSE2-NEXT: shldq $8, %rax, %rcx
3543-
; SSE2-NEXT: movq %rcx, %xmm1
3544-
; SSE2-NEXT: psllq $8, %xmm0
3545-
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3546-
; SSE2-NEXT: retq
3547-
;
3548-
; SSSE3-LABEL: PR107289:
3549-
; SSSE3: # %bb.0:
3550-
; SSSE3-NEXT: movq %xmm0, %rax
3551-
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3552-
; SSSE3-NEXT: movq %xmm1, %rcx
3553-
; SSSE3-NEXT: shldq $8, %rax, %rcx
3554-
; SSSE3-NEXT: movq %rcx, %xmm1
3555-
; SSSE3-NEXT: psllq $8, %xmm0
3556-
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3557-
; SSSE3-NEXT: retq
3558-
;
3559-
; SSE41-LABEL: PR107289:
3560-
; SSE41: # %bb.0:
3561-
; SSE41-NEXT: movq %xmm0, %rax
3562-
; SSE41-NEXT: pextrq $1, %xmm0, %rcx
3563-
; SSE41-NEXT: shldq $8, %rax, %rcx
3564-
; SSE41-NEXT: movq %rcx, %xmm1
3565-
; SSE41-NEXT: psllq $8, %xmm0
3566-
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3567-
; SSE41-NEXT: retq
3537+
; SSE-LABEL: PR107289:
3538+
; SSE: # %bb.0:
3539+
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
3540+
; SSE-NEXT: retq
35683541
;
35693542
; AVX-LABEL: PR107289:
35703543
; AVX: # %bb.0:
3571-
; AVX-NEXT: vmovq %xmm0, %rax
3572-
; AVX-NEXT: vpextrq $1, %xmm0, %rcx
3573-
; AVX-NEXT: shldq $8, %rax, %rcx
3574-
; AVX-NEXT: vmovq %rcx, %xmm1
3575-
; AVX-NEXT: vpsllq $8, %xmm0, %xmm0
3576-
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3544+
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
35773545
; AVX-NEXT: retq
35783546
%src = bitcast <16 x i8> %0 to i128
35793547
%shl = shl i128 %src, 8

0 commit comments

Comments
 (0)