@@ -174,62 +174,58 @@ CIRGenFunction::convertBuiltinToIntrinsicName(llvm::StringRef builtinName) {
174174 {" movntdqa" , " llvm.x86.sse41.movntdqa" },
175175 {" movntdq" , " llvm.x86.sse2.movnt.dq" },
176176
177- // Arithmetic operations
178- {" addps" , " llvm.x86.sse.add.ps" },
179- {" subps" , " llvm.x86.sse.sub.ps" },
180- {" mulps" , " llvm.x86.sse.mul.ps" },
181- {" divps" , " llvm.x86.sse.div.ps" },
182-
183- // Cast operations (these might not need intrinsics)
184- {" castps_si128" , " llvm.x86.sse.cast.ps.si128" },
185- {" castsi128_ps" , " llvm.x86.sse.cast.si128.ps" },
186-
187- // Set/Zero operations
188- {" setzero_ps" , " llvm.x86.sse.setzero.ps" },
189- {" setzero_si128" , " llvm.x86.sse2.setzero.si128" },
190-
191- // Unpack operations
192- {" unpacklo_epi8" , " llvm.x86.sse2.punpcklbw.128" },
193- {" unpackhi_epi8" , " llvm.x86.sse2.punpckhbw.128" },
194- {" unpacklo_epi16" , " llvm.x86.sse2.punpcklwd.128" },
195- {" unpackhi_epi16" , " llvm.x86.sse2.punpckhwd.128" },
196-
197- // K-mask shift operations (AVX-512)
198- {" kshiftliqi" , " llvm.x86.avx512.kshiftl.b" },
199- {" kshiftlihi" , " llvm.x86.avx512.kshiftl.w" },
200- {" kshiftlisi" , " llvm.x86.avx512.kshiftl.d" },
201- {" kshiftlidi" , " llvm.x86.avx512.kshiftl.q" },
202- {" kshiftriqi" , " llvm.x86.avx512.kshiftr.b" },
203- {" kshiftrihi" , " llvm.x86.avx512.kshiftr.w" },
204- {" kshiftrisi" , " llvm.x86.avx512.kshiftr.d" },
205- {" kshiftridi" , " llvm.x86.avx512.kshiftr.q" },
206-
207- // Pack operations
208- {" packsswb128" , " llvm.x86.sse2.packsswb.128" },
209- {" packssdw128" , " llvm.x86.sse2.packssdw.128" },
210- {" packuswb128" , " llvm.x86.sse2.packuswb.128" },
211-
212- // Conversion operations
213- {" cvtps2dq" , " llvm.x86.sse2.cvtps2dq" },
214- {" cvtdq2ps" , " llvm.x86.sse2.cvtdq2ps" },
215- {" cvtpd2dq" , " llvm.x86.sse2.cvtpd2dq" },
216-
217- // Shuffle operations
218- {" shufps" , " llvm.x86.sse.shuf.ps" },
219- {" pshuflw" , " llvm.x86.sse2.pshufl.w" },
220- {" pshufhw" , " llvm.x86.sse2.pshufh.w" },
221- {" palignr128" , " llvm.x86.ssse3.palign.r.128" },
222- {" palignr256" , " llvm.x86.avx2.palign.r" },
223- {" permdi256" , " llvm.x86.avx2.permd" },
224-
225- // AES operations
226- {" aesdec128" , " llvm.x86.aesni.aesdec" },
227- {" aesenc128" , " llvm.x86.aesni.aesenc" },
228-
229- // Shift operations
230- {" pslldqi128_byteshift" , " llvm.x86.sse2.psll.dq" },
231- {" pslldqi256_byteshift" , " llvm.x86.avx2.psll.dq" },
232- {" pslldqi512_byteshift" , " llvm.x86.avx512.psll.dq.512" },
177+ // Arithmetic operations
178+ {" addps" , " llvm.x86.sse.add.ps" },
179+ {" subps" , " llvm.x86.sse.sub.ps" },
180+ {" mulps" , " llvm.x86.sse.mul.ps" },
181+ {" divps" , " llvm.x86.sse.div.ps" },
182+
183+ // Cast operations (these might not need intrinsics)
184+ {" castps_si128" , " llvm.x86.sse.cast.ps.si128" },
185+ {" castsi128_ps" , " llvm.x86.sse.cast.si128.ps" },
186+
187+ // Set/Zero operations
188+ {" setzero_ps" , " llvm.x86.sse.setzero.ps" },
189+ {" setzero_si128" , " llvm.x86.sse2.setzero.si128" },
190+
191+ // Unpack operations
192+ {" unpacklo_epi8" , " llvm.x86.sse2.punpcklbw.128" },
193+ {" unpackhi_epi8" , " llvm.x86.sse2.punpckhbw.128" },
194+ {" unpacklo_epi16" , " llvm.x86.sse2.punpcklwd.128" },
195+ {" unpackhi_epi16" , " llvm.x86.sse2.punpckhwd.128" },
196+
197+ // K-mask shift operations (AVX-512)
198+ {" kshiftliqi" , " llvm.x86.avx512.kshiftl.b" },
199+ {" kshiftlihi" , " llvm.x86.avx512.kshiftl.w" },
200+ {" kshiftlisi" , " llvm.x86.avx512.kshiftl.d" },
201+ {" kshiftlidi" , " llvm.x86.avx512.kshiftl.q" },
202+ {" kshiftriqi" , " llvm.x86.avx512.kshiftr.b" },
203+ {" kshiftrihi" , " llvm.x86.avx512.kshiftr.w" },
204+ {" kshiftrisi" , " llvm.x86.avx512.kshiftr.d" },
205+ {" kshiftridi" , " llvm.x86.avx512.kshiftr.q" },
206+
207+ // Pack operations
208+ {" packsswb128" , " llvm.x86.sse2.packsswb.128" },
209+ {" packssdw128" , " llvm.x86.sse2.packssdw.128" },
210+ {" packuswb128" , " llvm.x86.sse2.packuswb.128" },
211+
212+ // Conversion operations
213+ {" cvtps2dq" , " llvm.x86.sse2.cvtps2dq" },
214+ {" cvtdq2ps" , " llvm.x86.sse2.cvtdq2ps" },
215+ {" cvtpd2dq" , " llvm.x86.sse2.cvtpd2dq" },
216+
217+ // Comparison operations
218+ {" pcmpeqd128" , " llvm.x86.sse2.pcmpeq.d" },
219+ {" pcmpgtb128" , " llvm.x86.sse2.pcmpgt.b" },
220+
221+ // Shuffle operations
222+ {" shufps" , " llvm.x86.sse.shuf.ps" },
223+ {" pshuflw" , " llvm.x86.sse2.pshufl.w" },
224+ {" pshufhw" , " llvm.x86.sse2.pshufh.w" },
225+
226+ // AES operations
227+ {" aesdec128" , " llvm.x86.aesni.aesdec" },
228+ {" aesenc128" , " llvm.x86.aesni.aesenc" },
233229
234230 // Advanced math operations (using correct LLVM intrinsic names)
235231 {" sqrtps512" , " llvm.x86.avx512.sqrt.ps.512" },
@@ -253,40 +249,63 @@ CIRGenFunction::convertBuiltinToIntrinsicName(llvm::StringRef builtinName) {
253249 {" cmpunordpd" , " llvm.x86.sse2.cmp.pd" },
254250 {" cmpltss" , " llvm.x86.sse.cmp.ss" },
255251
256- // Bit manipulation
257- {" pand128" , " llvm.x86.sse2.pand" },
258- {" por128" , " llvm.x86.sse2.por" },
259- {" pxor128" , " llvm.x86.sse2.pxor" },
260- {" pandn128" , " llvm.x86.sse2.pandn" },
261-
262- // Mask operations (AVX-512)
263- {" kandqi" , " llvm.x86.avx512.kand.b" },
264- {" korqi" , " llvm.x86.avx512.kor.b" },
265- {" kxorqi" , " llvm.x86.avx512.kxor.b" },
266- {" knotqi" , " llvm.x86.avx512.knot.b" },
267-
268- // Conversion operations
269- {" cvtdq2ps256" , " llvm.x86.avx.cvtdq2.ps.256" },
270- {" cvtpd2ps" , " llvm.x86.sse2.cvtpd2ps" },
271- {" cvtps2dq256" , " llvm.x86.avx.cvtps2dq.256" },
272-
273- // Specialized operations
274- {" pternlogd128" , " llvm.x86.avx512.pternlog.d.128" },
275- {" vpopcntd_128" , " llvm.x86.avx512.vpopcnt.d.128" },
276- {" vplzcntd_128" , " llvm.x86.avx512.vplzcnt.d.128" },
277-
278- // Gather/Scatter operations
279- {" gathersiv4sf" , " llvm.x86.avx2.gather.d.ps" },
280- {" scattersiv4sf" , " llvm.x86.avx512.scatter.dps.512" },
281-
282- // Vector size operations
283- {" extract128i256" , " llvm.x86.avx2.vextracti128" },
284- {" insert128i256" , " llvm.x86.avx2.vinserti128" },
285- {" pbroadcastd256" , " llvm.x86.avx2.pbroadcastd.256" },
286-
287- // String processing
288- {" pcmpistri128" , " llvm.x86.sse42.pcmpistri128" },
289- {" pcmpistrm128" , " llvm.x86.sse42.pcmpistrm128" },
252+ // Shuffle operations
253+ {" pshuflw" , " llvm.x86.sse2.pshufl.w" },
254+ {" pshufhw" , " llvm.x86.sse2.pshufh.w" },
255+ {" palignr128" , " llvm.x86.ssse3.palign.r.128" },
256+ {" palignr256" , " llvm.x86.avx2.palign.r" },
257+ {" permdi256" , " llvm.x86.avx2.permd" },
258+
259+ // Comparison operations
260+ {" pcmpeqb128" , " llvm.x86.sse2.pcmpeq.b" },
261+ {" pcmpeqw128" , " llvm.x86.sse2.pcmpeq.w" },
262+ {" pcmpeqd128" , " llvm.x86.sse2.pcmpeq.d" },
263+ {" cmpeqps" , " llvm.x86.sse.cmp.ps" },
264+ {" cmpltps" , " llvm.x86.sse.cmp.ps" },
265+ {" cmpleps" , " llvm.x86.sse.cmp.ps" },
266+ {" cmpunordps" , " llvm.x86.sse.cmp.ps" },
267+ {" cmpunordpd" , " llvm.x86.sse2.cmp.pd" },
268+ {" cmpltss" , " llvm.x86.sse.cmp.ss" },
269+ // Bit manipulation
270+ {" pand128" , " llvm.x86.sse2.pand" },
271+ {" por128" , " llvm.x86.sse2.por" },
272+ {" pxor128" , " llvm.x86.sse2.pxor" },
273+ {" pandn128" , " llvm.x86.sse2.pandn" },
274+
275+ // Load/Store operations
276+ {" loaddqu" , " llvm.x86.sse2.loadu.dq" },
277+ {" storedqu" , " llvm.x86.sse2.storeu.dq" },
278+ {" movntdqa" , " llvm.x86.sse41.movntdqa" },
279+ {" movntdq" , " llvm.x86.sse2.movnt.dq" },
280+
281+ // Mask operations (AVX-512)
282+ {" kandqi" , " llvm.x86.avx512.kand.b" },
283+ {" korqi" , " llvm.x86.avx512.kor.b" },
284+ {" kxorqi" , " llvm.x86.avx512.kxor.b" },
285+ {" knotqi" , " llvm.x86.avx512.knot.b" },
286+
287+ // Conversion operations
288+ {" cvtdq2ps256" , " llvm.x86.avx.cvtdq2.ps.256" },
289+ {" cvtpd2ps" , " llvm.x86.sse2.cvtpd2ps" },
290+ {" cvtps2dq256" , " llvm.x86.avx.cvtps2dq.256" },
291+
292+ // Specialized operations
293+ {" pternlogd128" , " llvm.x86.avx512.pternlog.d.128" },
294+ {" vpopcntd_128" , " llvm.x86.avx512.vpopcnt.d.128" },
295+ {" vplzcntd_128" , " llvm.x86.avx512.vplzcnt.d.128" },
296+
297+ // Gather/Scatter operations
298+ {" gathersiv4sf" , " llvm.x86.avx2.gather.d.ps" },
299+ {" scattersiv4sf" , " llvm.x86.avx512.scatter.dps.512" },
300+
301+ // Vector size operations
302+ {" extract128i256" , " llvm.x86.avx2.vextracti128" },
303+ {" insert128i256" , " llvm.x86.avx2.vinserti128" },
304+ {" pbroadcastd256" , " llvm.x86.avx2.pbroadcastd.256" },
305+
306+ // String processing
307+ {" pcmpistri128" , " llvm.x86.sse42.pcmpistri128" },
308+ {" pcmpistrm128" , " llvm.x86.sse42.pcmpistrm128" },
290309 };
291310
292311 // Check if we have a direct mapping
0 commit comments