Skip to content

Commit 793f3ea

Browse files
zhaoqi5mahesh-attarde
authored andcommitted
[LoongArch] Avoid expanding build_vector containing insertion of undef elements (llvm#150377)
1 parent 5db58cd commit 793f3ea

File tree

6 files changed

+146
-182
lines changed

6 files changed

+146
-182
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2385,19 +2385,9 @@ SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
23852385
return Res;
23862386
}
23872387

2388-
static bool isConstantOrUndef(const SDValue Op) {
2389-
if (Op->isUndef())
2390-
return true;
2391-
if (isa<ConstantSDNode>(Op))
2392-
return true;
2393-
if (isa<ConstantFPSDNode>(Op))
2394-
return true;
2395-
return false;
2396-
}
2397-
2398-
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
2388+
static bool isConstantBUILD_VECTOR(const BuildVectorSDNode *Op) {
23992389
for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2400-
if (isConstantOrUndef(Op->getOperand(i)))
2390+
if (isIntOrFPConstant(Op->getOperand(i)))
24012391
return true;
24022392
return false;
24032393
}
@@ -2505,20 +2495,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
25052495
if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
25062496
return Op;
25072497

2508-
if (!isConstantOrUndefBUILD_VECTOR(Node)) {
2498+
if (!isConstantBUILD_VECTOR(Node)) {
25092499
// Use INSERT_VECTOR_ELT operations rather than expand to stores.
25102500
// The resulting code is the same length as the expansion, but it doesn't
25112501
// use memory operations.
2512-
EVT ResTy = Node->getValueType(0);
2513-
25142502
assert(ResTy.isVector());
25152503

25162504
unsigned NumElts = ResTy.getVectorNumElements();
2517-
SDValue Vector =
2518-
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Node->getOperand(0));
2505+
SDValue Op0 = Node->getOperand(0);
2506+
SDValue Vector = DAG.getUNDEF(ResTy);
2507+
2508+
if (!Op0.isUndef())
2509+
Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
25192510
for (unsigned i = 1; i < NumElts; ++i) {
2520-
Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
2521-
Node->getOperand(i),
2511+
SDValue Opi = Node->getOperand(i);
2512+
if (Opi.isUndef())
2513+
continue;
2514+
Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
25222515
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
25232516
}
25242517
return Vector;

llvm/test/CodeGen/LoongArch/lasx/build-vector.ll

Lines changed: 65 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -355,40 +355,46 @@ entry:
355355
define void @buildvector_v32i8_partial(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a5, i8 %a7, i8 %a8, i8 %a15, i8 %a17, i8 %a18, i8 %a20, i8 %a22, i8 %a23, i8 %a27, i8 %a28, i8 %a31) nounwind {
356356
; CHECK-LABEL: buildvector_v32i8_partial:
357357
; CHECK: # %bb.0: # %entry
358-
; CHECK-NEXT: addi.d $sp, $sp, -96
359-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
360-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
361-
; CHECK-NEXT: addi.d $fp, $sp, 96
362-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
363-
; CHECK-NEXT: ld.b $t0, $fp, 0
364-
; CHECK-NEXT: ld.b $t1, $fp, 8
365-
; CHECK-NEXT: ld.b $t2, $fp, 16
366-
; CHECK-NEXT: ld.b $t3, $fp, 24
367-
; CHECK-NEXT: ld.b $t4, $fp, 56
368-
; CHECK-NEXT: ld.b $t5, $fp, 48
369-
; CHECK-NEXT: ld.b $t6, $fp, 40
370-
; CHECK-NEXT: ld.b $t7, $fp, 32
371-
; CHECK-NEXT: st.b $t4, $sp, 63
372-
; CHECK-NEXT: st.b $t5, $sp, 60
373-
; CHECK-NEXT: st.b $t6, $sp, 59
374-
; CHECK-NEXT: st.b $t7, $sp, 55
375-
; CHECK-NEXT: st.b $t3, $sp, 54
376-
; CHECK-NEXT: st.b $t2, $sp, 52
377-
; CHECK-NEXT: st.b $t1, $sp, 50
378-
; CHECK-NEXT: st.b $t0, $sp, 49
379-
; CHECK-NEXT: st.b $a7, $sp, 47
380-
; CHECK-NEXT: st.b $a6, $sp, 40
381-
; CHECK-NEXT: st.b $a5, $sp, 39
382-
; CHECK-NEXT: st.b $a4, $sp, 37
383-
; CHECK-NEXT: st.b $a3, $sp, 34
384-
; CHECK-NEXT: st.b $a2, $sp, 33
385-
; CHECK-NEXT: st.b $a1, $sp, 32
386-
; CHECK-NEXT: xvld $xr0, $sp, 32
358+
; CHECK-NEXT: ld.b $t0, $sp, 56
359+
; CHECK-NEXT: ld.b $t1, $sp, 48
360+
; CHECK-NEXT: ld.b $t2, $sp, 40
361+
; CHECK-NEXT: ld.b $t3, $sp, 32
362+
; CHECK-NEXT: ld.b $t4, $sp, 24
363+
; CHECK-NEXT: ld.b $t5, $sp, 16
364+
; CHECK-NEXT: ld.b $t6, $sp, 8
365+
; CHECK-NEXT: ld.b $t7, $sp, 0
366+
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0
367+
; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1
368+
; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2
369+
; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 5
370+
; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 7
371+
; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 8
372+
; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 15
373+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
374+
; CHECK-NEXT: vinsgr2vr.b $vr1, $t7, 1
375+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
376+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
377+
; CHECK-NEXT: vinsgr2vr.b $vr1, $t6, 2
378+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
379+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
380+
; CHECK-NEXT: vinsgr2vr.b $vr1, $t5, 4
381+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
382+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
383+
; CHECK-NEXT: vinsgr2vr.b $vr1, $t4, 6
384+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
385+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
386+
; CHECK-NEXT: vinsgr2vr.b $vr1, $t3, 7
387+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
388+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
389+
; CHECK-NEXT: vinsgr2vr.b $vr1, $t2, 11
390+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
391+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
392+
; CHECK-NEXT: vinsgr2vr.b $vr1, $t1, 12
393+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
394+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
395+
; CHECK-NEXT: vinsgr2vr.b $vr1, $t0, 15
396+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
387397
; CHECK-NEXT: xvst $xr0, $a0, 0
388-
; CHECK-NEXT: addi.d $sp, $fp, -96
389-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
390-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
391-
; CHECK-NEXT: addi.d $sp, $sp, 96
392398
; CHECK-NEXT: ret
393399
entry:
394400
%ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0
@@ -581,24 +587,18 @@ entry:
581587
define void @buildvector_v16i16_partial(ptr %dst, i16 %a0, i16 %a2, i16 %a5, i16 %a6, i16 %a7, i16 %a12, i16 %a13) nounwind {
582588
; CHECK-LABEL: buildvector_v16i16_partial:
583589
; CHECK: # %bb.0: # %entry
584-
; CHECK-NEXT: addi.d $sp, $sp, -96
585-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
586-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
587-
; CHECK-NEXT: addi.d $fp, $sp, 96
588-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
589-
; CHECK-NEXT: st.h $a7, $sp, 58
590-
; CHECK-NEXT: st.h $a6, $sp, 56
591-
; CHECK-NEXT: st.h $a5, $sp, 46
592-
; CHECK-NEXT: st.h $a4, $sp, 44
593-
; CHECK-NEXT: st.h $a3, $sp, 42
594-
; CHECK-NEXT: st.h $a2, $sp, 36
595-
; CHECK-NEXT: st.h $a1, $sp, 32
596-
; CHECK-NEXT: xvld $xr0, $sp, 32
590+
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
591+
; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 2
592+
; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 5
593+
; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 6
594+
; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 7
595+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
596+
; CHECK-NEXT: vinsgr2vr.h $vr1, $a6, 4
597+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
598+
; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
599+
; CHECK-NEXT: vinsgr2vr.h $vr1, $a7, 5
600+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
597601
; CHECK-NEXT: xvst $xr0, $a0, 0
598-
; CHECK-NEXT: addi.d $sp, $fp, -96
599-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
600-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
601-
; CHECK-NEXT: addi.d $sp, $sp, 96
602602
; CHECK-NEXT: ret
603603
entry:
604604
%ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0
@@ -702,21 +702,11 @@ entry:
702702
define void @buildvector_v8i32_partial(ptr %dst, i32 %a2, i32 %a4, i32 %a5, i32 %a6) nounwind {
703703
; CHECK-LABEL: buildvector_v8i32_partial:
704704
; CHECK: # %bb.0: # %entry
705-
; CHECK-NEXT: addi.d $sp, $sp, -96
706-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
707-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
708-
; CHECK-NEXT: addi.d $fp, $sp, 96
709-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
710-
; CHECK-NEXT: st.w $a4, $sp, 56
711-
; CHECK-NEXT: st.w $a3, $sp, 52
712-
; CHECK-NEXT: st.w $a2, $sp, 48
713-
; CHECK-NEXT: st.w $a1, $sp, 40
714-
; CHECK-NEXT: xvld $xr0, $sp, 32
705+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2
706+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 4
707+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 5
708+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 6
715709
; CHECK-NEXT: xvst $xr0, $a0, 0
716-
; CHECK-NEXT: addi.d $sp, $fp, -96
717-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
718-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
719-
; CHECK-NEXT: addi.d $sp, $sp, 96
720710
; CHECK-NEXT: ret
721711
entry:
722712
%ins0 = insertelement <8 x i32> undef, i32 undef, i32 0
@@ -787,11 +777,8 @@ entry:
787777
define void @buildvector_v4i64_partial(ptr %dst, i64 %a1, i64 %a2) nounwind {
788778
; CHECK-LABEL: buildvector_v4i64_partial:
789779
; CHECK: # %bb.0: # %entry
790-
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 0
791-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
792-
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a1, 0
793-
; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68
794-
; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0
780+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1
781+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 2
795782
; CHECK-NEXT: xvst $xr0, $a0, 0
796783
; CHECK-NEXT: ret
797784
entry:
@@ -867,21 +854,15 @@ entry:
867854
define void @buildvector_v8f32_partial(ptr %dst, float %a1, float %a2, float %a5, float %a7) nounwind {
868855
; CHECK-LABEL: buildvector_v8f32_partial:
869856
; CHECK: # %bb.0: # %entry
870-
; CHECK-NEXT: addi.d $sp, $sp, -96
871-
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
872-
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
873-
; CHECK-NEXT: addi.d $fp, $sp, 96
874-
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
875-
; CHECK-NEXT: fst.s $fa3, $sp, 60
876-
; CHECK-NEXT: fst.s $fa2, $sp, 52
877-
; CHECK-NEXT: fst.s $fa1, $sp, 40
878-
; CHECK-NEXT: fst.s $fa0, $sp, 36
879-
; CHECK-NEXT: xvld $xr0, $sp, 32
857+
; CHECK-NEXT: # kill: def $f3 killed $f3 def $xr3
858+
; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2
859+
; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1
860+
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
861+
; CHECK-NEXT: xvinsve0.w $xr0, $xr0, 1
862+
; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 2
863+
; CHECK-NEXT: xvinsve0.w $xr0, $xr2, 5
864+
; CHECK-NEXT: xvinsve0.w $xr0, $xr3, 7
880865
; CHECK-NEXT: xvst $xr0, $a0, 0
881-
; CHECK-NEXT: addi.d $sp, $fp, -96
882-
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
883-
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
884-
; CHECK-NEXT: addi.d $sp, $sp, 96
885866
; CHECK-NEXT: ret
886867
entry:
887868
%ins0 = insertelement <8 x float> undef, float undef, i32 0
@@ -960,9 +941,7 @@ define void @buildvector_v4f64_partial(ptr %dst, double %a0, double %a3) nounwin
960941
; CHECK: # %bb.0: # %entry
961942
; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $xr1
962943
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
963-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
964-
; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68
965-
; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0
944+
; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 3
966945
; CHECK-NEXT: xvst $xr0, $a0, 0
967946
; CHECK-NEXT: ret
968947
entry:

llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -524,9 +524,8 @@ define i8 @xvmsk_eq_v2i64_concat_poison(<2 x i64> %vec) {
524524
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
525525
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
526526
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
527-
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
528-
; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1
529-
; CHECK-NEXT: vslli.h $vr0, $vr0, 15
527+
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1
528+
; CHECK-NEXT: vslli.h $vr0, $vr1, 15
530529
; CHECK-NEXT: vmskltz.h $vr0, $vr0
531530
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
532531
; CHECK-NEXT: ret
@@ -539,24 +538,20 @@ define i8 @xvmsk_eq_v2i64_concat_poison(<2 x i64> %vec) {
539538
define i8 @xvmsk_ne_v4i32_concat_poison(<4 x i32> %vec) {
540539
; CHECK-LABEL: xvmsk_ne_v4i32_concat_poison:
541540
; CHECK: # %bb.0:
542-
; CHECK-NEXT: addi.d $sp, $sp, -16
543-
; CHECK-NEXT: .cfi_def_cfa_offset 16
544541
; CHECK-NEXT: vseqi.w $vr0, $vr0, 0
545542
; CHECK-NEXT: vrepli.b $vr1, -1
546543
; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
547-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
548-
; CHECK-NEXT: st.h $a0, $sp, 6
549-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2
550-
; CHECK-NEXT: st.h $a0, $sp, 4
551-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
552-
; CHECK-NEXT: st.h $a0, $sp, 2
553544
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
554-
; CHECK-NEXT: st.h $a0, $sp, 0
555-
; CHECK-NEXT: vld $vr0, $sp, 0
556-
; CHECK-NEXT: vslli.h $vr0, $vr0, 15
545+
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
546+
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
547+
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1
548+
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2
549+
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2
550+
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
551+
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3
552+
; CHECK-NEXT: vslli.h $vr0, $vr1, 15
557553
; CHECK-NEXT: vmskltz.h $vr0, $vr0
558554
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
559-
; CHECK-NEXT: addi.d $sp, $sp, 16
560555
; CHECK-NEXT: ret
561556
%tobool = icmp ne <4 x i32> %vec, zeroinitializer
562557
%insertvec = shufflevector <4 x i1> %tobool, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -567,23 +562,19 @@ define i8 @xvmsk_ne_v4i32_concat_poison(<4 x i32> %vec) {
567562
define i8 @xvmsk_ogt_v4f64_concat_poison(<4 x double> %vec) {
568563
; CHECK-LABEL: xvmsk_ogt_v4f64_concat_poison:
569564
; CHECK: # %bb.0:
570-
; CHECK-NEXT: addi.d $sp, $sp, -16
571-
; CHECK-NEXT: .cfi_def_cfa_offset 16
572565
; CHECK-NEXT: xvrepli.b $xr1, 0
573566
; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
574-
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
575-
; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 1
576-
; CHECK-NEXT: xvpickve2gr.d $a2, $xr0, 2
577-
; CHECK-NEXT: xvpickve2gr.d $a3, $xr0, 3
578-
; CHECK-NEXT: st.h $a3, $sp, 6
579-
; CHECK-NEXT: st.h $a2, $sp, 4
580-
; CHECK-NEXT: st.h $a1, $sp, 2
581-
; CHECK-NEXT: st.h $a0, $sp, 0
582-
; CHECK-NEXT: vld $vr0, $sp, 0
567+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
568+
; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 2
569+
; CHECK-NEXT: xvpickve2gr.d $a2, $xr0, 1
570+
; CHECK-NEXT: xvpickve2gr.d $a3, $xr0, 0
571+
; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 0
572+
; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1
573+
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
574+
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3
583575
; CHECK-NEXT: vslli.h $vr0, $vr0, 15
584576
; CHECK-NEXT: vmskltz.h $vr0, $vr0
585577
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
586-
; CHECK-NEXT: addi.d $sp, $sp, 16
587578
; CHECK-NEXT: ret
588579
%tobool = fcmp ogt <4 x double> %vec, zeroinitializer
589580
%insertvec = shufflevector <4 x i1> %tobool, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>

llvm/test/CodeGen/LoongArch/llvm.exp10.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,20 +137,20 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) #0 {
137137
; LA64-NEXT: addi.d $sp, $sp, -48
138138
; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
139139
; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
140-
; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
140+
; LA64-NEXT: vreplvei.w $vr0, $vr0, 1
141141
; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
142142
; LA64-NEXT: pcaddu18i $ra, %call36(exp10f)
143143
; LA64-NEXT: jirl $ra, $ra, 0
144144
; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
145145
; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
146146
; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
147-
; LA64-NEXT: vreplvei.w $vr0, $vr0, 1
147+
; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
148148
; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
149149
; LA64-NEXT: pcaddu18i $ra, %call36(exp10f)
150150
; LA64-NEXT: jirl $ra, $ra, 0
151151
; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
152152
; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
153-
; LA64-NEXT: vpackev.w $vr0, $vr0, $vr1
153+
; LA64-NEXT: vextrins.w $vr0, $vr1, 16
154154
; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
155155
; LA64-NEXT: addi.d $sp, $sp, 48
156156
; LA64-NEXT: ret

0 commit comments

Comments
 (0)