Skip to content

Commit a410570

Browse files
authored
[llvm][LoongArch] Introduce LASX and LSX conversion intrinsics (#157818)
This patch introduces the LASX and LSX conversion intrinsics: - <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>) - <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>) - <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>) - <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>) - <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>) - <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>) - <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>) - <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>) - <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>) - <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>) - <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>) - <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>) - <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>) - <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>) - <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>) - <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>) - <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>) - <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>)
1 parent c782ed3 commit a410570

File tree

4 files changed

+377
-0
lines changed

4 files changed

+377
-0
lines changed

llvm/include/llvm/IR/IntrinsicsLoongArch.td

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,4 +1192,42 @@ def int_loongarch_lasx_xvstelm_w
11921192
def int_loongarch_lasx_xvstelm_d
11931193
: VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
11941194
[IntrWriteMem, IntrArgMemOnly, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>;
1195+
1196+
// LASX and LSX conversion
1197+
def int_loongarch_lasx_cast_128_s
1198+
: VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
1199+
def int_loongarch_lasx_cast_128_d
1200+
: VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
1201+
def int_loongarch_lasx_cast_128
1202+
: VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
1203+
def int_loongarch_lasx_concat_128_s
1204+
: VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
1205+
def int_loongarch_lasx_concat_128_d
1206+
: VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
1207+
def int_loongarch_lasx_concat_128
1208+
: VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
1209+
def int_loongarch_lasx_extract_128_lo_s
1210+
: VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
1211+
def int_loongarch_lasx_extract_128_lo_d
1212+
: VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
1213+
def int_loongarch_lasx_extract_128_lo
1214+
: VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
1215+
def int_loongarch_lasx_extract_128_hi_s
1216+
: VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
1217+
def int_loongarch_lasx_extract_128_hi_d
1218+
: VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
1219+
def int_loongarch_lasx_extract_128_hi
1220+
: VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
1221+
def int_loongarch_lasx_insert_128_lo_s
1222+
: VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
1223+
def int_loongarch_lasx_insert_128_lo_d
1224+
: VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
1225+
def int_loongarch_lasx_insert_128_lo
1226+
: VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
1227+
def int_loongarch_lasx_insert_128_hi_s
1228+
: VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
1229+
def int_loongarch_lasx_insert_128_hi_d
1230+
: VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
1231+
def int_loongarch_lasx_insert_128_hi
1232+
: VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
11951233
} // TargetPrefix = "loongarch"

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6630,6 +6630,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
66306630
return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
66316631
N->getOperand(1));
66326632
break;
6633+
case Intrinsic::loongarch_lasx_concat_128_s:
6634+
case Intrinsic::loongarch_lasx_concat_128_d:
6635+
case Intrinsic::loongarch_lasx_concat_128:
6636+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
6637+
N->getOperand(1), N->getOperand(2));
66336638
}
66346639
return SDValue();
66356640
}

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,6 +2113,37 @@ defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64, 2, sub_128>;
21132113
defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8, sub_128>;
21142114
defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8, 16, sub_128>;
21152115

2116+
// LASX and LSX conversion
2117+
def : Pat<(int_loongarch_lasx_cast_128_s (v4f32 LSX128:$src)),
2118+
(INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
2119+
def : Pat<(int_loongarch_lasx_cast_128_d (v2f64 LSX128:$src)),
2120+
(INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
2121+
def : Pat<(int_loongarch_lasx_cast_128 (v2i64 LSX128:$src)),
2122+
(INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>;
2123+
def : Pat<(int_loongarch_lasx_extract_128_lo_s (v8f32 LASX256:$src)),
2124+
(EXTRACT_SUBREG LASX256:$src, sub_128)>;
2125+
def : Pat<(int_loongarch_lasx_extract_128_lo_d (v4f64 LASX256:$src)),
2126+
(EXTRACT_SUBREG LASX256:$src, sub_128)>;
2127+
def : Pat<(int_loongarch_lasx_extract_128_lo (v4i64 LASX256:$src)),
2128+
(EXTRACT_SUBREG LASX256:$src, sub_128)>;
2129+
def : Pat<(int_loongarch_lasx_extract_128_hi_s (v8f32 LASX256:$src)),
2130+
(EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
2131+
def : Pat<(int_loongarch_lasx_extract_128_hi_d (v4f64 LASX256:$src)),
2132+
(EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
2133+
def : Pat<(int_loongarch_lasx_extract_128_hi (v4i64 LASX256:$src)),
2134+
(EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>;
2135+
def : Pat<(int_loongarch_lasx_insert_128_lo_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)),
2136+
(XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
2137+
def : Pat<(int_loongarch_lasx_insert_128_lo_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)),
2138+
(XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
2139+
def : Pat<(int_loongarch_lasx_insert_128_lo (v4i64 LASX256:$src), (v2i64 LSX128:$lo)),
2140+
(XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>;
2141+
def : Pat<(int_loongarch_lasx_insert_128_hi_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)),
2142+
(XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
2143+
def : Pat<(int_loongarch_lasx_insert_128_hi_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)),
2144+
(XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
2145+
def : Pat<(int_loongarch_lasx_insert_128_hi (v4i64 LASX256:$src), (v2i64 LSX128:$lo)),
2146+
(XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>;
21162147
} // Predicates = [HasExtLASX]
21172148

21182149
/// Intrinsic pattern
Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
3+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
4+
5+
declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>)
6+
7+
define void @lasx_cast_128_s(ptr %vd, ptr %va) {
8+
; CHECK-LABEL: lasx_cast_128_s:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: vld $vr0, $a1, 0
11+
; CHECK-NEXT: xvst $xr0, $a0, 0
12+
; CHECK-NEXT: ret
13+
entry:
14+
%a = load <4 x float>, ptr %va
15+
%b = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %a)
16+
store <8 x float> %b, ptr %vd
17+
ret void
18+
}
19+
20+
declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>)
21+
22+
define void @lasx_cast_128_d(ptr %vd, ptr %va) {
23+
; CHECK-LABEL: lasx_cast_128_d:
24+
; CHECK: # %bb.0: # %entry
25+
; CHECK-NEXT: vld $vr0, $a1, 0
26+
; CHECK-NEXT: xvst $xr0, $a0, 0
27+
; CHECK-NEXT: ret
28+
entry:
29+
%a = load <2 x double>, ptr %va
30+
%b = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %a)
31+
store <4 x double> %b, ptr %vd
32+
ret void
33+
}
34+
35+
declare <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>)
36+
37+
define void @lasx_cast_128(ptr %vd, ptr %va) {
38+
; CHECK-LABEL: lasx_cast_128:
39+
; CHECK: # %bb.0: # %entry
40+
; CHECK-NEXT: vld $vr0, $a1, 0
41+
; CHECK-NEXT: xvst $xr0, $a0, 0
42+
; CHECK-NEXT: ret
43+
entry:
44+
%a = load <2 x i64>, ptr %va
45+
%b = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %a)
46+
store <4 x i64> %b, ptr %vd
47+
ret void
48+
}
49+
50+
declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>)
51+
52+
define void @lasx_concat_128_s(ptr %vd, ptr %va, ptr %vb) {
53+
; CHECK-LABEL: lasx_concat_128_s:
54+
; CHECK: # %bb.0: # %entry
55+
; CHECK-NEXT: vld $vr0, $a1, 0
56+
; CHECK-NEXT: vld $vr1, $a2, 0
57+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
58+
; CHECK-NEXT: xvst $xr0, $a0, 0
59+
; CHECK-NEXT: ret
60+
entry:
61+
%a = load <4 x float>, ptr %va
62+
%b = load <4 x float>, ptr %vb
63+
%c = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %a, <4 x float> %b)
64+
store <8 x float> %c, ptr %vd
65+
ret void
66+
}
67+
68+
declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>)
69+
70+
define void @lasx_concat_128_d(ptr %vd, ptr %va, ptr %vb) {
71+
; CHECK-LABEL: lasx_concat_128_d:
72+
; CHECK: # %bb.0: # %entry
73+
; CHECK-NEXT: vld $vr0, $a1, 0
74+
; CHECK-NEXT: vld $vr1, $a2, 0
75+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
76+
; CHECK-NEXT: xvst $xr0, $a0, 0
77+
; CHECK-NEXT: ret
78+
entry:
79+
%a = load <2 x double>, ptr %va
80+
%b = load <2 x double>, ptr %vb
81+
%c = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %a, <2 x double> %b)
82+
store <4 x double> %c, ptr %vd
83+
ret void
84+
}
85+
86+
declare <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>)
87+
88+
define void @lasx_concat_128(ptr %vd, ptr %va, ptr %vb) {
89+
; CHECK-LABEL: lasx_concat_128:
90+
; CHECK: # %bb.0: # %entry
91+
; CHECK-NEXT: vld $vr0, $a1, 0
92+
; CHECK-NEXT: vld $vr1, $a2, 0
93+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
94+
; CHECK-NEXT: xvst $xr0, $a0, 0
95+
; CHECK-NEXT: ret
96+
entry:
97+
%a = load <2 x i64>, ptr %va
98+
%b = load <2 x i64>, ptr %vb
99+
%c = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %a, <2 x i64> %b)
100+
store <4 x i64> %c, ptr %vd
101+
ret void
102+
}
103+
104+
declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>)
105+
106+
define void @lasx_extract_128_lo_s(ptr %vd, ptr %va) {
107+
; CHECK-LABEL: lasx_extract_128_lo_s:
108+
; CHECK: # %bb.0: # %entry
109+
; CHECK-NEXT: xvld $xr0, $a1, 0
110+
; CHECK-NEXT: vst $vr0, $a0, 0
111+
; CHECK-NEXT: ret
112+
entry:
113+
%a = load <8 x float>, ptr %va
114+
%c = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %a)
115+
store <4 x float> %c, ptr %vd
116+
ret void
117+
}
118+
119+
declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>)
120+
121+
define void @lasx_extract_128_lo_d(ptr %vd, ptr %va) {
122+
; CHECK-LABEL: lasx_extract_128_lo_d:
123+
; CHECK: # %bb.0: # %entry
124+
; CHECK-NEXT: xvld $xr0, $a1, 0
125+
; CHECK-NEXT: vst $vr0, $a0, 0
126+
; CHECK-NEXT: ret
127+
entry:
128+
%a = load <4 x double>, ptr %va
129+
%c = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %a)
130+
store <2 x double> %c, ptr %vd
131+
ret void
132+
}
133+
134+
declare <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>)
135+
136+
define void @lasx_extract_128_lo(ptr %vd, ptr %va) {
137+
; CHECK-LABEL: lasx_extract_128_lo:
138+
; CHECK: # %bb.0: # %entry
139+
; CHECK-NEXT: xvld $xr0, $a1, 0
140+
; CHECK-NEXT: vst $vr0, $a0, 0
141+
; CHECK-NEXT: ret
142+
entry:
143+
%a = load <4 x i64>, ptr %va
144+
%c = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %a)
145+
store <2 x i64> %c, ptr %vd
146+
ret void
147+
}
148+
149+
declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>)
150+
151+
define void @lasx_extract_128_hi_s(ptr %vd, ptr %va) {
152+
; CHECK-LABEL: lasx_extract_128_hi_s:
153+
; CHECK: # %bb.0: # %entry
154+
; CHECK-NEXT: xvld $xr0, $a1, 0
155+
; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
156+
; CHECK-NEXT: vst $vr0, $a0, 0
157+
; CHECK-NEXT: ret
158+
entry:
159+
%a = load <8 x float>, ptr %va
160+
%c = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %a)
161+
store <4 x float> %c, ptr %vd
162+
ret void
163+
}
164+
165+
declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>)
166+
167+
define void @lasx_extract_128_hi_d(ptr %vd, ptr %va) {
168+
; CHECK-LABEL: lasx_extract_128_hi_d:
169+
; CHECK: # %bb.0: # %entry
170+
; CHECK-NEXT: xvld $xr0, $a1, 0
171+
; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
172+
; CHECK-NEXT: vst $vr0, $a0, 0
173+
; CHECK-NEXT: ret
174+
entry:
175+
%a = load <4 x double>, ptr %va
176+
%c = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %a)
177+
store <2 x double> %c, ptr %vd
178+
ret void
179+
}
180+
181+
declare <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>)
182+
183+
define void @lasx_extract_128_hi(ptr %vd, ptr %va) {
184+
; CHECK-LABEL: lasx_extract_128_hi:
185+
; CHECK: # %bb.0: # %entry
186+
; CHECK-NEXT: xvld $xr0, $a1, 0
187+
; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1
188+
; CHECK-NEXT: vst $vr0, $a0, 0
189+
; CHECK-NEXT: ret
190+
entry:
191+
%a = load <4 x i64>, ptr %va
192+
%c = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %a)
193+
store <2 x i64> %c, ptr %vd
194+
ret void
195+
}
196+
197+
declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>)
198+
199+
define void @lasx_insert_128_lo_s(ptr %vd, ptr %va, ptr %vb) {
200+
; CHECK-LABEL: lasx_insert_128_lo_s:
201+
; CHECK: # %bb.0: # %entry
202+
; CHECK-NEXT: xvld $xr0, $a1, 0
203+
; CHECK-NEXT: vld $vr1, $a2, 0
204+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
205+
; CHECK-NEXT: xvst $xr0, $a0, 0
206+
; CHECK-NEXT: ret
207+
entry:
208+
%a = load <8 x float>, ptr %va
209+
%b = load <4 x float>, ptr %vb
210+
%c = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %a, <4 x float> %b)
211+
store <8 x float> %c, ptr %vd
212+
ret void
213+
}
214+
215+
declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>)
216+
217+
define void @lasx_insert_128_lo_d(ptr %vd, ptr %va, ptr %vb) {
218+
; CHECK-LABEL: lasx_insert_128_lo_d:
219+
; CHECK: # %bb.0: # %entry
220+
; CHECK-NEXT: xvld $xr0, $a1, 0
221+
; CHECK-NEXT: vld $vr1, $a2, 0
222+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
223+
; CHECK-NEXT: xvst $xr0, $a0, 0
224+
; CHECK-NEXT: ret
225+
entry:
226+
%a = load <4 x double>, ptr %va
227+
%b = load <2 x double>, ptr %vb
228+
%c = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %a, <2 x double> %b)
229+
store <4 x double> %c, ptr %vd
230+
ret void
231+
}
232+
233+
declare <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>)
234+
235+
define void @lasx_insert_128_lo(ptr %vd, ptr %va, ptr %vb) {
236+
; CHECK-LABEL: lasx_insert_128_lo:
237+
; CHECK: # %bb.0: # %entry
238+
; CHECK-NEXT: xvld $xr0, $a1, 0
239+
; CHECK-NEXT: vld $vr1, $a2, 0
240+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
241+
; CHECK-NEXT: xvst $xr0, $a0, 0
242+
; CHECK-NEXT: ret
243+
entry:
244+
%a = load <4 x i64>, ptr %va
245+
%b = load <2 x i64>, ptr %vb
246+
%c = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %a, <2 x i64> %b)
247+
store <4 x i64> %c, ptr %vd
248+
ret void
249+
}
250+
251+
declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>)
252+
253+
define void @lasx_insert_128_hi_s(ptr %vd, ptr %va, ptr %vb) {
254+
; CHECK-LABEL: lasx_insert_128_hi_s:
255+
; CHECK: # %bb.0: # %entry
256+
; CHECK-NEXT: xvld $xr0, $a1, 0
257+
; CHECK-NEXT: vld $vr1, $a2, 0
258+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
259+
; CHECK-NEXT: xvst $xr0, $a0, 0
260+
; CHECK-NEXT: ret
261+
entry:
262+
%a = load <8 x float>, ptr %va
263+
%b = load <4 x float>, ptr %vb
264+
%c = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %a, <4 x float> %b)
265+
store <8 x float> %c, ptr %vd
266+
ret void
267+
}
268+
269+
declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>)
270+
271+
define void @lasx_insert_128_hi_d(ptr %vd, ptr %va, ptr %vb) {
272+
; CHECK-LABEL: lasx_insert_128_hi_d:
273+
; CHECK: # %bb.0: # %entry
274+
; CHECK-NEXT: xvld $xr0, $a1, 0
275+
; CHECK-NEXT: vld $vr1, $a2, 0
276+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
277+
; CHECK-NEXT: xvst $xr0, $a0, 0
278+
; CHECK-NEXT: ret
279+
entry:
280+
%a = load <4 x double>, ptr %va
281+
%b = load <2 x double>, ptr %vb
282+
%c = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %a, <2 x double> %b)
283+
store <4 x double> %c, ptr %vd
284+
ret void
285+
}
286+
287+
declare <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>)
288+
289+
define void @lasx_insert_128_hi(ptr %vd, ptr %va, ptr %vb) {
290+
; CHECK-LABEL: lasx_insert_128_hi:
291+
; CHECK: # %bb.0: # %entry
292+
; CHECK-NEXT: xvld $xr0, $a1, 0
293+
; CHECK-NEXT: vld $vr1, $a2, 0
294+
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
295+
; CHECK-NEXT: xvst $xr0, $a0, 0
296+
; CHECK-NEXT: ret
297+
entry:
298+
%a = load <4 x i64>, ptr %va
299+
%b = load <2 x i64>, ptr %vb
300+
%c = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %a, <2 x i64> %b)
301+
store <4 x i64> %c, ptr %vd
302+
ret void
303+
}

0 commit comments

Comments
 (0)