|
4 | 4 |
|
5 | 5 | declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>) |
6 | 6 |
|
7 | | -define <8 x float> @lasx_cast_128_s(<4 x float> %va) { |
| 7 | +define void @lasx_cast_128_s(ptr %vd, ptr %va) { |
8 | 8 | ; CHECK-LABEL: lasx_cast_128_s: |
9 | 9 | ; CHECK: # %bb.0: # %entry |
10 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 |
| 10 | +; CHECK-NEXT: vld $vr0, $a1, 0 |
| 11 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
11 | 12 | ; CHECK-NEXT: ret |
12 | 13 | entry: |
13 | | - %res = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %va) |
14 | | - ret <8 x float> %res |
| 14 | + %a = load <4 x float>, ptr %va |
| 15 | + %b = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %a) |
| 16 | + store <8 x float> %b, ptr %vd |
| 17 | + ret void |
15 | 18 | } |
16 | 19 |
|
17 | 20 | declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>) |
18 | 21 |
|
19 | | -define <4 x double> @lasx_cast_128_d(<2 x double> %va) { |
| 22 | +define void @lasx_cast_128_d(ptr %vd, ptr %va) { |
20 | 23 | ; CHECK-LABEL: lasx_cast_128_d: |
21 | 24 | ; CHECK: # %bb.0: # %entry |
22 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 |
| 25 | +; CHECK-NEXT: vld $vr0, $a1, 0 |
| 26 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
23 | 27 | ; CHECK-NEXT: ret |
24 | 28 | entry: |
25 | | - %res = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %va) |
26 | | - ret <4 x double> %res |
| 29 | + %a = load <2 x double>, ptr %va |
| 30 | + %b = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %a) |
| 31 | + store <4 x double> %b, ptr %vd |
| 32 | + ret void |
27 | 33 | } |
28 | 34 |
|
29 | 35 | declare <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>) |
30 | 36 |
|
31 | | -define <4 x i64> @lasx_cast_128(<2 x i64> %va) { |
| 37 | +define void @lasx_cast_128(ptr %vd, ptr %va) { |
32 | 38 | ; CHECK-LABEL: lasx_cast_128: |
33 | 39 | ; CHECK: # %bb.0: # %entry |
34 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 |
| 40 | +; CHECK-NEXT: vld $vr0, $a1, 0 |
| 41 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
35 | 42 | ; CHECK-NEXT: ret |
36 | 43 | entry: |
37 | | - %res = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %va) |
38 | | - ret <4 x i64> %res |
| 44 | + %a = load <2 x i64>, ptr %va |
| 45 | + %b = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %a) |
| 46 | + store <4 x i64> %b, ptr %vd |
| 47 | + ret void |
39 | 48 | } |
40 | 49 |
|
41 | 50 | declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>) |
42 | 51 |
|
43 | | -define <8 x float> @lasx_concat_128_s(<4 x float> %va, <4 x float> %vb) { |
| 52 | +define void @lasx_concat_128_s(ptr %vd, ptr %va, ptr %vb) { |
44 | 53 | ; CHECK-LABEL: lasx_concat_128_s: |
45 | 54 | ; CHECK: # %bb.0: # %entry |
46 | | -; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 |
47 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 |
| 55 | +; CHECK-NEXT: vld $vr0, $a1, 0 |
| 56 | +; CHECK-NEXT: vld $vr1, $a2, 0 |
48 | 57 | ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 |
| 58 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
49 | 59 | ; CHECK-NEXT: ret |
50 | 60 | entry: |
51 | | - %res = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %va, <4 x float> %vb) |
52 | | - ret <8 x float> %res |
| 61 | + %a = load <4 x float>, ptr %va |
| 62 | + %b = load <4 x float>, ptr %vb |
| 63 | + %c = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %a, <4 x float> %b) |
| 64 | + store <8 x float> %c, ptr %vd |
| 65 | + ret void |
53 | 66 | } |
54 | 67 |
|
55 | 68 | declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>) |
56 | 69 |
|
57 | | -define <4 x double> @lasx_concat_128_d(<2 x double> %va, <2 x double> %vb) { |
| 70 | +define void @lasx_concat_128_d(ptr %vd, ptr %va, ptr %vb) { |
58 | 71 | ; CHECK-LABEL: lasx_concat_128_d: |
59 | 72 | ; CHECK: # %bb.0: # %entry |
60 | | -; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 |
61 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 |
| 73 | +; CHECK-NEXT: vld $vr0, $a1, 0 |
| 74 | +; CHECK-NEXT: vld $vr1, $a2, 0 |
62 | 75 | ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 |
| 76 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
63 | 77 | ; CHECK-NEXT: ret |
64 | 78 | entry: |
65 | | - %res = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %va, <2 x double> %vb) |
66 | | - ret <4 x double> %res |
| 79 | + %a = load <2 x double>, ptr %va |
| 80 | + %b = load <2 x double>, ptr %vb |
| 81 | + %c = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %a, <2 x double> %b) |
| 82 | + store <4 x double> %c, ptr %vd |
| 83 | + ret void |
67 | 84 | } |
68 | 85 |
|
69 | 86 | declare <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>) |
70 | 87 |
|
71 | | -define <4 x i64> @lasx_concat_128(<2 x i64> %va, <2 x i64> %vb) { |
| 88 | +define void @lasx_concat_128(ptr %vd, ptr %va, ptr %vb) { |
72 | 89 | ; CHECK-LABEL: lasx_concat_128: |
73 | 90 | ; CHECK: # %bb.0: # %entry |
74 | | -; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 |
75 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 |
| 91 | +; CHECK-NEXT: vld $vr0, $a1, 0 |
| 92 | +; CHECK-NEXT: vld $vr1, $a2, 0 |
76 | 93 | ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 |
| 94 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
77 | 95 | ; CHECK-NEXT: ret |
78 | 96 | entry: |
79 | | - %res = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %va, <2 x i64> %vb) |
80 | | - ret <4 x i64> %res |
| 97 | + %a = load <2 x i64>, ptr %va |
| 98 | + %b = load <2 x i64>, ptr %vb |
| 99 | + %c = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %a, <2 x i64> %b) |
| 100 | + store <4 x i64> %c, ptr %vd |
| 101 | + ret void |
81 | 102 | } |
82 | 103 |
|
83 | 104 | declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>) |
84 | 105 |
|
85 | | -define <4 x float> @lasx_extract_128_lo_s(<8 x float> %va) { |
| 106 | +define void @lasx_extract_128_lo_s(ptr %vd, ptr %va) { |
86 | 107 | ; CHECK-LABEL: lasx_extract_128_lo_s: |
87 | 108 | ; CHECK: # %bb.0: # %entry |
88 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 |
| 109 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
| 110 | +; CHECK-NEXT: vst $vr0, $a0, 0 |
89 | 111 | ; CHECK-NEXT: ret |
90 | 112 | entry: |
91 | | - %res = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %va) |
92 | | - ret <4 x float> %res |
| 113 | + %a = load <8 x float>, ptr %va |
| 114 | + %c = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %a) |
| 115 | + store <4 x float> %c, ptr %vd |
| 116 | + ret void |
93 | 117 | } |
94 | 118 |
|
95 | 119 | declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>) |
96 | 120 |
|
97 | | -define <2 x double> @lasx_extract_128_lo_d(<4 x double> %va) { |
| 121 | +define void @lasx_extract_128_lo_d(ptr %vd, ptr %va) { |
98 | 122 | ; CHECK-LABEL: lasx_extract_128_lo_d: |
99 | 123 | ; CHECK: # %bb.0: # %entry |
100 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 |
| 124 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
| 125 | +; CHECK-NEXT: vst $vr0, $a0, 0 |
101 | 126 | ; CHECK-NEXT: ret |
102 | 127 | entry: |
103 | | - %res = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %va) |
104 | | - ret <2 x double> %res |
| 128 | + %a = load <4 x double>, ptr %va |
| 129 | + %c = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %a) |
| 130 | + store <2 x double> %c, ptr %vd |
| 131 | + ret void |
105 | 132 | } |
106 | 133 |
|
107 | 134 | declare <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>) |
108 | 135 |
|
109 | | -define <2 x i64> @lasx_extract_128_lo(<4 x i64> %va) { |
| 136 | +define void @lasx_extract_128_lo(ptr %vd, ptr %va) { |
110 | 137 | ; CHECK-LABEL: lasx_extract_128_lo: |
111 | 138 | ; CHECK: # %bb.0: # %entry |
112 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 |
| 139 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
| 140 | +; CHECK-NEXT: vst $vr0, $a0, 0 |
113 | 141 | ; CHECK-NEXT: ret |
114 | 142 | entry: |
115 | | - %res = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %va) |
116 | | - ret <2 x i64> %res |
| 143 | + %a = load <4 x i64>, ptr %va |
| 144 | + %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %a) |
| 145 | + store <2 x i64> %c, ptr %vd |
| 146 | + ret void |
117 | 147 | } |
118 | 148 |
|
119 | 149 | declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>) |
120 | 150 |
|
121 | | -define <4 x float> @lasx_extract_128_hi_s(<8 x float> %va) { |
| 151 | +define void @lasx_extract_128_hi_s(ptr %vd, ptr %va) { |
122 | 152 | ; CHECK-LABEL: lasx_extract_128_hi_s: |
123 | 153 | ; CHECK: # %bb.0: # %entry |
| 154 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
124 | 155 | ; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 |
125 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 |
| 156 | +; CHECK-NEXT: vst $vr0, $a0, 0 |
126 | 157 | ; CHECK-NEXT: ret |
127 | 158 | entry: |
128 | | - %res = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %va) |
129 | | - ret <4 x float> %res |
| 159 | + %a = load <8 x float>, ptr %va |
| 160 | + %c = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %a) |
| 161 | + store <4 x float> %c, ptr %vd |
| 162 | + ret void |
130 | 163 | } |
131 | 164 |
|
132 | 165 | declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>) |
133 | 166 |
|
134 | | -define <2 x double> @lasx_extract_128_hi_d(<4 x double> %va) { |
| 167 | +define void @lasx_extract_128_hi_d(ptr %vd, ptr %va) { |
135 | 168 | ; CHECK-LABEL: lasx_extract_128_hi_d: |
136 | 169 | ; CHECK: # %bb.0: # %entry |
| 170 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
137 | 171 | ; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 |
138 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 |
| 172 | +; CHECK-NEXT: vst $vr0, $a0, 0 |
139 | 173 | ; CHECK-NEXT: ret |
140 | 174 | entry: |
141 | | - %res = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %va) |
142 | | - ret <2 x double> %res |
| 175 | + %a = load <4 x double>, ptr %va |
| 176 | + %c = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %a) |
| 177 | + store <2 x double> %c, ptr %vd |
| 178 | + ret void |
143 | 179 | } |
144 | 180 |
|
145 | 181 | declare <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>) |
146 | 182 |
|
147 | | -define <2 x i64> @lasx_extract_128_hi(<4 x i64> %va) { |
| 183 | +define void @lasx_extract_128_hi(ptr %vd, ptr %va) { |
148 | 184 | ; CHECK-LABEL: lasx_extract_128_hi: |
149 | 185 | ; CHECK: # %bb.0: # %entry |
| 186 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
150 | 187 | ; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 |
151 | | -; CHECK-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 |
| 188 | +; CHECK-NEXT: vst $vr0, $a0, 0 |
152 | 189 | ; CHECK-NEXT: ret |
153 | 190 | entry: |
154 | | - %res = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %va) |
155 | | - ret <2 x i64> %res |
| 191 | + %a = load <4 x i64>, ptr %va |
| 192 | + %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %a) |
| 193 | + store <2 x i64> %c, ptr %vd |
| 194 | + ret void |
156 | 195 | } |
157 | 196 |
|
158 | 197 | declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>) |
159 | 198 |
|
160 | | -define <8 x float> @lasx_insert_128_lo_s(<8 x float> %va, <4 x float> %vb) { |
| 199 | +define void @lasx_insert_128_lo_s(ptr %vd, ptr %va, ptr %vb) { |
161 | 200 | ; CHECK-LABEL: lasx_insert_128_lo_s: |
162 | 201 | ; CHECK: # %bb.0: # %entry |
163 | | -; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 |
| 202 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
| 203 | +; CHECK-NEXT: vld $vr1, $a2, 0 |
164 | 204 | ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 |
| 205 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
165 | 206 | ; CHECK-NEXT: ret |
166 | 207 | entry: |
167 | | - %res = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %va, <4 x float> %vb) |
168 | | - ret <8 x float> %res |
| 208 | + %a = load <8 x float>, ptr %va |
| 209 | + %b = load <4 x float>, ptr %vb |
| 210 | + %c = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %a, <4 x float> %b) |
| 211 | + store <8 x float> %c, ptr %vd |
| 212 | + ret void |
169 | 213 | } |
170 | 214 |
|
171 | 215 | declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>) |
172 | 216 |
|
173 | | -define <4 x double> @lasx_insert_128_lo_d(<4 x double> %va, <2 x double> %vb) { |
| 217 | +define void @lasx_insert_128_lo_d(ptr %vd, ptr %va, ptr %vb) { |
174 | 218 | ; CHECK-LABEL: lasx_insert_128_lo_d: |
175 | 219 | ; CHECK: # %bb.0: # %entry |
176 | | -; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 |
| 220 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
| 221 | +; CHECK-NEXT: vld $vr1, $a2, 0 |
177 | 222 | ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 |
| 223 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
178 | 224 | ; CHECK-NEXT: ret |
179 | 225 | entry: |
180 | | - %res = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %va, <2 x double> %vb) |
181 | | - ret <4 x double> %res |
| 226 | + %a = load <4 x double>, ptr %va |
| 227 | + %b = load <2 x double>, ptr %vb |
| 228 | + %c = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %a, <2 x double> %b) |
| 229 | + store <4 x double> %c, ptr %vd |
| 230 | + ret void |
182 | 231 | } |
183 | 232 |
|
184 | 233 | declare <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>) |
185 | 234 |
|
186 | | -define <4 x i64> @lasx_insert_128_lo(<4 x i64> %va, <2 x i64> %vb) { |
| 235 | +define void @lasx_insert_128_lo(ptr %vd, ptr %va, ptr %vb) { |
187 | 236 | ; CHECK-LABEL: lasx_insert_128_lo: |
188 | 237 | ; CHECK: # %bb.0: # %entry |
189 | | -; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 |
| 238 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
| 239 | +; CHECK-NEXT: vld $vr1, $a2, 0 |
190 | 240 | ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 |
| 241 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
191 | 242 | ; CHECK-NEXT: ret |
192 | 243 | entry: |
193 | | - %res = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %va, <2 x i64> %vb) |
194 | | - ret <4 x i64> %res |
| 244 | + %a = load <4 x i64>, ptr %va |
| 245 | + %b = load <2 x i64>, ptr %vb |
| 246 | + %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %a, <2 x i64> %b) |
| 247 | + store <4 x i64> %c, ptr %vd |
| 248 | + ret void |
195 | 249 | } |
196 | 250 |
|
197 | 251 | declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>) |
198 | 252 |
|
199 | | -define <8 x float> @lasx_insert_128_hi_s(<8 x float> %va, <4 x float> %vb) { |
| 253 | +define void @lasx_insert_128_hi_s(ptr %vd, ptr %va, ptr %vb) { |
200 | 254 | ; CHECK-LABEL: lasx_insert_128_hi_s: |
201 | 255 | ; CHECK: # %bb.0: # %entry |
202 | | -; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 |
| 256 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
| 257 | +; CHECK-NEXT: vld $vr1, $a2, 0 |
203 | 258 | ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 |
| 259 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
204 | 260 | ; CHECK-NEXT: ret |
205 | 261 | entry: |
206 | | - %res = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %va, <4 x float> %vb) |
207 | | - ret <8 x float> %res |
| 262 | + %a = load <8 x float>, ptr %va |
| 263 | + %b = load <4 x float>, ptr %vb |
| 264 | + %c = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %a, <4 x float> %b) |
| 265 | + store <8 x float> %c, ptr %vd |
| 266 | + ret void |
208 | 267 | } |
209 | 268 |
|
210 | 269 | declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>) |
211 | 270 |
|
212 | | -define <4 x double> @lasx_insert_128_hi_d(<4 x double> %va, <2 x double> %vb) { |
| 271 | +define void @lasx_insert_128_hi_d(ptr %vd, ptr %va, ptr %vb) { |
213 | 272 | ; CHECK-LABEL: lasx_insert_128_hi_d: |
214 | 273 | ; CHECK: # %bb.0: # %entry |
215 | | -; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 |
| 274 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
| 275 | +; CHECK-NEXT: vld $vr1, $a2, 0 |
216 | 276 | ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 |
| 277 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
217 | 278 | ; CHECK-NEXT: ret |
218 | 279 | entry: |
219 | | - %res = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %va, <2 x double> %vb) |
220 | | - ret <4 x double> %res |
| 280 | + %a = load <4 x double>, ptr %va |
| 281 | + %b = load <2 x double>, ptr %vb |
| 282 | + %c = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %a, <2 x double> %b) |
| 283 | + store <4 x double> %c, ptr %vd |
| 284 | + ret void |
221 | 285 | } |
222 | 286 |
|
223 | 287 | declare <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>) |
224 | 288 |
|
225 | | -define <4 x i64> @lasx_insert_128_hi(<4 x i64> %va, <2 x i64> %vb) { |
| 289 | +define void @lasx_insert_128_hi(ptr %vd, ptr %va, ptr %vb) { |
226 | 290 | ; CHECK-LABEL: lasx_insert_128_hi: |
227 | 291 | ; CHECK: # %bb.0: # %entry |
228 | | -; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 |
| 292 | +; CHECK-NEXT: xvld $xr0, $a1, 0 |
| 293 | +; CHECK-NEXT: vld $vr1, $a2, 0 |
229 | 294 | ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 |
| 295 | +; CHECK-NEXT: xvst $xr0, $a0, 0 |
230 | 296 | ; CHECK-NEXT: ret |
231 | 297 | entry: |
232 | | - %res = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %va, <2 x i64> %vb) |
233 | | - ret <4 x i64> %res |
| 298 | + %a = load <4 x i64>, ptr %va |
| 299 | + %b = load <2 x i64>, ptr %vb |
| 300 | + %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %a, <2 x i64> %b) |
| 301 | + store <4 x i64> %c, ptr %vd |
| 302 | + ret void |
234 | 303 | } |
0 commit comments