@@ -223,18 +223,31 @@ entry:
223223}
224224
225225define arm_aapcs_vfpcc <8 x i16 > @shuffle3_i16 (<8 x i16 > %src ) {
226- ; CHECK-LABEL: shuffle3_i16:
227- ; CHECK: @ %bb.0: @ %entry
228- ; CHECK-NEXT: vmovx.f16 s5, s3
229- ; CHECK-NEXT: vmovx.f16 s6, s1
230- ; CHECK-NEXT: vmovx.f16 s4, s0
231- ; CHECK-NEXT: vins.f16 s1, s0
232- ; CHECK-NEXT: vins.f16 s6, s4
233- ; CHECK-NEXT: vins.f16 s5, s3
234- ; CHECK-NEXT: vmov.f32 s4, s2
235- ; CHECK-NEXT: vmov.f32 s7, s1
236- ; CHECK-NEXT: vmov q0, q1
237- ; CHECK-NEXT: bx lr
226+ ; CHECK-LV-LABEL: shuffle3_i16:
227+ ; CHECK-LV: @ %bb.0: @ %entry
228+ ; CHECK-LV-NEXT: vmovx.f16 s5, s3
229+ ; CHECK-LV-NEXT: vmovx.f16 s6, s1
230+ ; CHECK-LV-NEXT: vmovx.f16 s4, s0
231+ ; CHECK-LV-NEXT: vins.f16 s1, s0
232+ ; CHECK-LV-NEXT: vins.f16 s6, s4
233+ ; CHECK-LV-NEXT: vins.f16 s5, s3
234+ ; CHECK-LV-NEXT: vmov.f32 s4, s2
235+ ; CHECK-LV-NEXT: vmov.f32 s7, s1
236+ ; CHECK-LV-NEXT: vmov q0, q1
237+ ; CHECK-LV-NEXT: bx lr
238+
239+ ; CHECK-LIS-LABEL: shuffle3_i16:
240+ ; CHECK-LIS: @ %bb.0: @ %entry
241+ ; CHECK-LIS-NEXT: vmov q1, q0
242+ ; CHECK-LIS-NEXT: vmovx.f16 s2, s5
243+ ; CHECK-LIS-NEXT: vmovx.f16 s0, s4
244+ ; CHECK-LIS-NEXT: vins.f16 s5, s4
245+ ; CHECK-LIS-NEXT: vins.f16 s2, s0
246+ ; CHECK-LIS-NEXT: vmov.f32 s0, s6
247+ ; CHECK-LIS-NEXT: vmovx.f16 s1, s7
248+ ; CHECK-LIS-NEXT: vmov.f32 s3, s5
249+ ; CHECK-LIS-NEXT: vins.f16 s1, s7
250+ ; CHECK-LIS-NEXT: bx lr
238251entry:
239252 %out = shufflevector <8 x i16 > %src , <8 x i16 > undef , <8 x i32 > <i32 4 , i32 5 , i32 7 , i32 6 , i32 3 , i32 1 , i32 2 , i32 0 >
240253 ret <8 x i16 > %out
@@ -1145,18 +1158,31 @@ entry:
11451158}
11461159
11471160define arm_aapcs_vfpcc <8 x half > @shuffle3_f16 (<8 x half > %src ) {
1148- ; CHECK-LABEL: shuffle3_f16:
1149- ; CHECK: @ %bb.0: @ %entry
1150- ; CHECK-NEXT: vmovx.f16 s5, s3
1151- ; CHECK-NEXT: vmovx.f16 s6, s1
1152- ; CHECK-NEXT: vmovx.f16 s4, s0
1153- ; CHECK-NEXT: vins.f16 s1, s0
1154- ; CHECK-NEXT: vins.f16 s6, s4
1155- ; CHECK-NEXT: vins.f16 s5, s3
1156- ; CHECK-NEXT: vmov.f32 s4, s2
1157- ; CHECK-NEXT: vmov.f32 s7, s1
1158- ; CHECK-NEXT: vmov q0, q1
1159- ; CHECK-NEXT: bx lr
1161+ ; CHECK-LV-LABEL: shuffle3_f16:
1162+ ; CHECK-LV: @ %bb.0: @ %entry
1163+ ; CHECK-LV-NEXT: vmovx.f16 s5, s3
1164+ ; CHECK-LV-NEXT: vmovx.f16 s6, s1
1165+ ; CHECK-LV-NEXT: vmovx.f16 s4, s0
1166+ ; CHECK-LV-NEXT: vins.f16 s1, s0
1167+ ; CHECK-LV-NEXT: vins.f16 s6, s4
1168+ ; CHECK-LV-NEXT: vins.f16 s5, s3
1169+ ; CHECK-LV-NEXT: vmov.f32 s4, s2
1170+ ; CHECK-LV-NEXT: vmov.f32 s7, s1
1171+ ; CHECK-LV-NEXT: vmov q0, q1
1172+ ; CHECK-LV-NEXT: bx lr
1173+
1174+ ; CHECK-LIS-LABEL: shuffle3_f16:
1175+ ; CHECK-LIS: @ %bb.0: @ %entry
1176+ ; CHECK-LIS-NEXT: vmov q1, q0
1177+ ; CHECK-LIS-NEXT: vmovx.f16 s2, s5
1178+ ; CHECK-LIS-NEXT: vmovx.f16 s0, s4
1179+ ; CHECK-LIS-NEXT: vins.f16 s5, s4
1180+ ; CHECK-LIS-NEXT: vins.f16 s2, s0
1181+ ; CHECK-LIS-NEXT: vmov.f32 s0, s6
1182+ ; CHECK-LIS-NEXT: vmovx.f16 s1, s7
1183+ ; CHECK-LIS-NEXT: vmov.f32 s3, s5
1184+ ; CHECK-LIS-NEXT: vins.f16 s1, s7
1185+ ; CHECK-LIS-NEXT: bx lr
11601186entry:
11611187 %out = shufflevector <8 x half > %src , <8 x half > undef , <8 x i32 > <i32 4 , i32 5 , i32 7 , i32 6 , i32 3 , i32 1 , i32 2 , i32 0 >
11621188 ret <8 x half > %out
@@ -1467,27 +1493,47 @@ entry:
14671493 ret <2 x double > %out
14681494}
14691495define arm_aapcs_vfpcc <8 x double > @shuffle9_f64 (<4 x double > %src1 , <4 x double > %src2 ) {
1470- ; CHECK-LABEL: shuffle9_f64:
1471- ; CHECK: @ %bb.0: @ %entry
1472- ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1473- ; CHECK-NEXT: vpush {d8, d9, d10, d11}
1474- ; CHECK-NEXT: vmov q5, q2
1475- ; CHECK-NEXT: vmov.f32 s16, s0
1476- ; CHECK-NEXT: vmov.f32 s18, s20
1477- ; CHECK-NEXT: vmov.f32 s20, s2
1478- ; CHECK-NEXT: vmov.f32 s10, s12
1479- ; CHECK-NEXT: vmov.f32 s19, s21
1480- ; CHECK-NEXT: vmov.f32 s8, s4
1481- ; CHECK-NEXT: vmov.f32 s17, s1
1482- ; CHECK-NEXT: vmov.f32 s21, s3
1483- ; CHECK-NEXT: vmov q0, q4
1484- ; CHECK-NEXT: vmov.f32 s12, s6
1485- ; CHECK-NEXT: vmov.f32 s11, s13
1486- ; CHECK-NEXT: vmov.f32 s9, s5
1487- ; CHECK-NEXT: vmov.f32 s13, s7
1488- ; CHECK-NEXT: vmov q1, q5
1489- ; CHECK-NEXT: vpop {d8, d9, d10, d11}
1490- ; CHECK-NEXT: bx lr
1496+ ; CHECK-LV-LABEL: shuffle9_f64:
1497+ ; CHECK-LV: @ %bb.0: @ %entry
1498+ ; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
1499+ ; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
1500+ ; CHECK-LV-NEXT: vmov q5, q2
1501+ ; CHECK-LV-NEXT: vmov.f32 s16, s0
1502+ ; CHECK-LV-NEXT: vmov.f32 s18, s20
1503+ ; CHECK-LV-NEXT: vmov.f32 s20, s2
1504+ ; CHECK-LV-NEXT: vmov.f32 s10, s12
1505+ ; CHECK-LV-NEXT: vmov.f32 s19, s21
1506+ ; CHECK-LV-NEXT: vmov.f32 s8, s4
1507+ ; CHECK-LV-NEXT: vmov.f32 s17, s1
1508+ ; CHECK-LV-NEXT: vmov.f32 s21, s3
1509+ ; CHECK-LV-NEXT: vmov q0, q4
1510+ ; CHECK-LV-NEXT: vmov.f32 s12, s6
1511+ ; CHECK-LV-NEXT: vmov.f32 s11, s13
1512+ ; CHECK-LV-NEXT: vmov.f32 s9, s5
1513+ ; CHECK-LV-NEXT: vmov.f32 s13, s7
1514+ ; CHECK-LV-NEXT: vmov q1, q5
1515+ ; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
1516+ ; CHECK-LV-NEXT: bx lr
1517+
1518+ ; CHECK-LIS-LABEL: shuffle9_f64:
1519+ ; CHECK-LIS: @ %bb.0: @ %entry
1520+ ; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
1521+ ; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
1522+ ; CHECK-LIS-NEXT: vmov q5, q2
1523+ ; CHECK-LIS-NEXT: vmov q4, q0
1524+ ; CHECK-LIS-NEXT: vmov.f32 s2, s20
1525+ ; CHECK-LIS-NEXT: vmov.f32 s20, s18
1526+ ; CHECK-LIS-NEXT: vmov.f32 s10, s12
1527+ ; CHECK-LIS-NEXT: vmov.f32 s3, s21
1528+ ; CHECK-LIS-NEXT: vmov.f32 s8, s4
1529+ ; CHECK-LIS-NEXT: vmov.f32 s21, s19
1530+ ; CHECK-LIS-NEXT: vmov.f32 s12, s6
1531+ ; CHECK-LIS-NEXT: vmov.f32 s11, s13
1532+ ; CHECK-LIS-NEXT: vmov.f32 s9, s5
1533+ ; CHECK-LIS-NEXT: vmov.f32 s13, s7
1534+ ; CHECK-LIS-NEXT: vmov q1, q5
1535+ ; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
1536+ ; CHECK-LIS-NEXT: bx lr
14911537entry:
14921538 %out = shufflevector <4 x double > %src1 , <4 x double > %src2 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
14931539 ret <8 x double > %out
@@ -1560,27 +1606,47 @@ entry:
15601606 ret <2 x i64 > %out
15611607}
15621608define arm_aapcs_vfpcc <8 x i64 > @shuffle9_i64 (<4 x i64 > %src1 , <4 x i64 > %src2 ) {
1563- ; CHECK-LABEL: shuffle9_i64:
1564- ; CHECK: @ %bb.0: @ %entry
1565- ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1566- ; CHECK-NEXT: vpush {d8, d9, d10, d11}
1567- ; CHECK-NEXT: vmov q5, q2
1568- ; CHECK-NEXT: vmov.f32 s16, s0
1569- ; CHECK-NEXT: vmov.f32 s18, s20
1570- ; CHECK-NEXT: vmov.f32 s20, s2
1571- ; CHECK-NEXT: vmov.f32 s10, s12
1572- ; CHECK-NEXT: vmov.f32 s19, s21
1573- ; CHECK-NEXT: vmov.f32 s8, s4
1574- ; CHECK-NEXT: vmov.f32 s17, s1
1575- ; CHECK-NEXT: vmov.f32 s21, s3
1576- ; CHECK-NEXT: vmov q0, q4
1577- ; CHECK-NEXT: vmov.f32 s12, s6
1578- ; CHECK-NEXT: vmov.f32 s11, s13
1579- ; CHECK-NEXT: vmov.f32 s9, s5
1580- ; CHECK-NEXT: vmov.f32 s13, s7
1581- ; CHECK-NEXT: vmov q1, q5
1582- ; CHECK-NEXT: vpop {d8, d9, d10, d11}
1583- ; CHECK-NEXT: bx lr
1609+ ; CHECK-LV-LABEL: shuffle9_i64:
1610+ ; CHECK-LV: @ %bb.0: @ %entry
1611+ ; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
1612+ ; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
1613+ ; CHECK-LV-NEXT: vmov q5, q2
1614+ ; CHECK-LV-NEXT: vmov.f32 s16, s0
1615+ ; CHECK-LV-NEXT: vmov.f32 s18, s20
1616+ ; CHECK-LV-NEXT: vmov.f32 s20, s2
1617+ ; CHECK-LV-NEXT: vmov.f32 s10, s12
1618+ ; CHECK-LV-NEXT: vmov.f32 s19, s21
1619+ ; CHECK-LV-NEXT: vmov.f32 s8, s4
1620+ ; CHECK-LV-NEXT: vmov.f32 s17, s1
1621+ ; CHECK-LV-NEXT: vmov.f32 s21, s3
1622+ ; CHECK-LV-NEXT: vmov q0, q4
1623+ ; CHECK-LV-NEXT: vmov.f32 s12, s6
1624+ ; CHECK-LV-NEXT: vmov.f32 s11, s13
1625+ ; CHECK-LV-NEXT: vmov.f32 s9, s5
1626+ ; CHECK-LV-NEXT: vmov.f32 s13, s7
1627+ ; CHECK-LV-NEXT: vmov q1, q5
1628+ ; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
1629+ ; CHECK-LV-NEXT: bx lr
1630+
1631+ ; CHECK-LIS-LABEL: shuffle9_i64:
1632+ ; CHECK-LIS: @ %bb.0: @ %entry
1633+ ; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
1634+ ; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
1635+ ; CHECK-LIS-NEXT: vmov q5, q2
1636+ ; CHECK-LIS-NEXT: vmov q4, q0
1637+ ; CHECK-LIS-NEXT: vmov.f32 s2, s20
1638+ ; CHECK-LIS-NEXT: vmov.f32 s20, s18
1639+ ; CHECK-LIS-NEXT: vmov.f32 s10, s12
1640+ ; CHECK-LIS-NEXT: vmov.f32 s3, s21
1641+ ; CHECK-LIS-NEXT: vmov.f32 s8, s4
1642+ ; CHECK-LIS-NEXT: vmov.f32 s21, s19
1643+ ; CHECK-LIS-NEXT: vmov.f32 s12, s6
1644+ ; CHECK-LIS-NEXT: vmov.f32 s11, s13
1645+ ; CHECK-LIS-NEXT: vmov.f32 s9, s5
1646+ ; CHECK-LIS-NEXT: vmov.f32 s13, s7
1647+ ; CHECK-LIS-NEXT: vmov q1, q5
1648+ ; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
1649+ ; CHECK-LIS-NEXT: bx lr
15841650entry:
15851651 %out = shufflevector <4 x i64 > %src1 , <4 x i64 > %src2 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
15861652 ret <8 x i64 > %out
0 commit comments