@@ -16,25 +16,31 @@ fn call_asm<'tcx>(
1616 code : & [ u8 ] ,
1717) {
1818 let name = format ! ( "__rust_cranelift_{name}" ) ;
19- let is_defined = fx. module . declarations ( ) . get_name ( & name) . is_none ( ) ;
2019
2120 let sig = Signature {
2221 params : args
2322 . iter ( )
24- . map ( |arg| AbiParam :: new ( fx. clif_type ( arg. layout ( ) . ty ) . unwrap ( ) ) )
23+ . map ( |_| AbiParam :: new ( fx. pointer_type ) )
24+ . chain ( Some ( AbiParam :: new ( fx. pointer_type ) ) )
2525 . collect ( ) ,
26- returns : vec ! [ AbiParam :: new ( fx . clif_type ( ret . layout ( ) . ty ) . unwrap ( ) ) ] ,
26+ returns : vec ! [ ] ,
2727 call_conv : CallConv :: SystemV ,
2828 } ;
2929
3030 let func = fx. module . declare_function ( & name, Linkage :: Local , & sig) . unwrap ( ) ;
31- if !is_defined {
32- fx. module . define_function_bytes ( func, & Function :: new ( ) , 4 , & code, & [ ] ) . unwrap ( ) ;
31+ match fx. module . define_function_bytes ( func, & Function :: new ( ) , 4 , & code, & [ ] ) {
32+ Ok ( _) | Err ( cranelift_module:: ModuleError :: DuplicateDefinition ( _) ) => { }
33+ err => err. unwrap ( ) ,
3334 }
3435
3536 let func_ref = fx. module . declare_func_in_func ( func, & mut fx. bcx . func ) ;
36- let res = fx. bcx . ins ( ) . call ( func_ref, & args. into_iter ( ) . map ( |_| todo ! ( ) ) . collect :: < Vec < _ > > ( ) ) ;
37- todo ! ( "write result" )
37+ let mut args =
38+ args. into_iter ( ) . map ( |arg| arg. force_stack ( fx) . 0 . get_addr ( fx) ) . collect :: < Vec < _ > > ( ) ;
39+ let res = CPlace :: new_stack_slot ( fx, ret. layout ( ) ) ;
40+ args. push ( res. to_ptr ( ) . get_addr ( fx) ) ;
41+ fx. bcx . ins ( ) . call ( func_ref, & args) ;
42+ let res = res. to_cvalue ( fx) ;
43+ ret. write_cvalue ( fx, res) ;
3844}
3945
4046pub ( crate ) fn codegen_aarch64_llvm_intrinsic_call < ' tcx > (
@@ -218,7 +224,7 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
218224 } ) ;
219225 }
220226
221- _ if intrinsic. starts_with ( "llvm.aarch64.neon.smax.v" ) => {
227+ /* _ if intrinsic.starts_with("llvm.aarch64.neon.smax.v") => {
222228 intrinsic_args!(fx, args => (x, y); intrinsic);
223229
224230 simd_pair_for_each_lane(
@@ -231,6 +237,186 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
231237 fx.bcx.ins().select(gt, x_lane, y_lane)
232238 },
233239 );
240+ }*/
241+ "llvm.aarch64.neon.smax.v16i8" => {
242+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
243+ call_asm (
244+ fx,
245+ "llvm__aarch64__neon__smax__v16i8" ,
246+ & [ a, b] ,
247+ ret,
248+ & [ 0 , 0 , 192 , 61 , 33 , 0 , 192 , 61 , 0 , 100 , 33 , 78 , 64 , 0 , 128 , 61 , 192 , 3 , 95 , 214 ] ,
249+ ) ;
250+ }
251+ "llvm.aarch64.neon.smax.v2i32" => {
252+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
253+ call_asm (
254+ fx,
255+ "llvm__aarch64__neon__smax__v2i32" ,
256+ & [ a, b] ,
257+ ret,
258+ & [ 0 , 0 , 64 , 253 , 33 , 0 , 64 , 253 , 0 , 100 , 161 , 14 , 64 , 0 , 0 , 253 , 192 , 3 , 95 , 214 ] ,
259+ ) ;
260+ }
261+ "llvm.aarch64.neon.smax.v4i16" => {
262+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
263+ call_asm (
264+ fx,
265+ "llvm__aarch64__neon__smax__v4i16" ,
266+ & [ a, b] ,
267+ ret,
268+ & [ 0 , 0 , 64 , 253 , 33 , 0 , 64 , 253 , 0 , 100 , 97 , 14 , 64 , 0 , 0 , 253 , 192 , 3 , 95 , 214 ] ,
269+ ) ;
270+ }
271+ "llvm.aarch64.neon.smax.v4i32" => {
272+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
273+ call_asm (
274+ fx,
275+ "llvm__aarch64__neon__smax__v4i32" ,
276+ & [ a, b] ,
277+ ret,
278+ & [ 0 , 0 , 192 , 61 , 33 , 0 , 192 , 61 , 0 , 100 , 161 , 78 , 64 , 0 , 128 , 61 , 192 , 3 , 95 , 214 ] ,
279+ ) ;
280+ }
281+ "llvm.aarch64.neon.smax.v8i16" => {
282+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
283+ call_asm (
284+ fx,
285+ "llvm__aarch64__neon__smax__v8i16" ,
286+ & [ a, b] ,
287+ ret,
288+ & [ 0 , 0 , 192 , 61 , 33 , 0 , 192 , 61 , 0 , 100 , 97 , 78 , 64 , 0 , 128 , 61 , 192 , 3 , 95 , 214 ] ,
289+ ) ;
290+ }
291+ "llvm.aarch64.neon.smax.v8i8" => {
292+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
293+ call_asm (
294+ fx,
295+ "llvm__aarch64__neon__smax__v8i8" ,
296+ & [ a, b] ,
297+ ret,
298+ & [ 0 , 0 , 64 , 253 , 33 , 0 , 64 , 253 , 0 , 100 , 33 , 14 , 64 , 0 , 0 , 253 , 192 , 3 , 95 , 214 ] ,
299+ ) ;
300+ }
301+ "llvm.aarch64.neon.smaxp.v16i8" => {
302+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
303+ call_asm (
304+ fx,
305+ "llvm__aarch64__neon__smaxp__v16i8" ,
306+ & [ a, b] ,
307+ ret,
308+ & [ 0 , 0 , 192 , 61 , 33 , 0 , 192 , 61 , 0 , 164 , 33 , 78 , 64 , 0 , 128 , 61 , 192 , 3 , 95 , 214 ] ,
309+ ) ;
310+ }
311+ "llvm.aarch64.neon.smaxp.v2i32" => {
312+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
313+ call_asm (
314+ fx,
315+ "llvm__aarch64__neon__smaxp__v2i32" ,
316+ & [ a, b] ,
317+ ret,
318+ & [ 0 , 0 , 64 , 253 , 33 , 0 , 64 , 253 , 0 , 164 , 161 , 14 , 64 , 0 , 0 , 253 , 192 , 3 , 95 , 214 ] ,
319+ ) ;
320+ }
321+ "llvm.aarch64.neon.smaxp.v4i16" => {
322+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
323+ call_asm (
324+ fx,
325+ "llvm__aarch64__neon__smaxp__v4i16" ,
326+ & [ a, b] ,
327+ ret,
328+ & [ 0 , 0 , 64 , 253 , 33 , 0 , 64 , 253 , 0 , 164 , 97 , 14 , 64 , 0 , 0 , 253 , 192 , 3 , 95 , 214 ] ,
329+ ) ;
330+ }
331+ "llvm.aarch64.neon.smaxp.v4i32" => {
332+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
333+ call_asm (
334+ fx,
335+ "llvm__aarch64__neon__smaxp__v4i32" ,
336+ & [ a, b] ,
337+ ret,
338+ & [ 0 , 0 , 192 , 61 , 33 , 0 , 192 , 61 , 0 , 164 , 161 , 78 , 64 , 0 , 128 , 61 , 192 , 3 , 95 , 214 ] ,
339+ ) ;
340+ }
341+ "llvm.aarch64.neon.smaxp.v8i16" => {
342+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
343+ call_asm (
344+ fx,
345+ "llvm__aarch64__neon__smaxp__v8i16" ,
346+ & [ a, b] ,
347+ ret,
348+ & [ 0 , 0 , 192 , 61 , 33 , 0 , 192 , 61 , 0 , 164 , 97 , 78 , 64 , 0 , 128 , 61 , 192 , 3 , 95 , 214 ] ,
349+ ) ;
350+ }
351+ "llvm.aarch64.neon.smaxp.v8i8" => {
352+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
353+ call_asm (
354+ fx,
355+ "llvm__aarch64__neon__smaxp__v8i8" ,
356+ & [ a, b] ,
357+ ret,
358+ & [ 0 , 0 , 64 , 253 , 33 , 0 , 64 , 253 , 0 , 164 , 33 , 14 , 64 , 0 , 0 , 253 , 192 , 3 , 95 , 214 ] ,
359+ ) ;
360+ }
361+ "llvm.aarch64.neon.smaxv.i16.v4i16" => {
362+ intrinsic_args ! ( fx, args => ( a) ; intrinsic) ;
363+ call_asm (
364+ fx,
365+ "llvm__aarch64__neon__smaxv__i16__v4i16" ,
366+ & [ a] ,
367+ ret,
368+ & [ 0 , 0 , 64 , 253 , 0 , 168 , 112 , 14 , 32 , 0 , 0 , 125 , 192 , 3 , 95 , 214 ] ,
369+ ) ;
370+ }
371+ "llvm.aarch64.neon.smaxv.i16.v8i16" => {
372+ intrinsic_args ! ( fx, args => ( a) ; intrinsic) ;
373+ call_asm (
374+ fx,
375+ "llvm__aarch64__neon__smaxv__i16__v8i16" ,
376+ & [ a] ,
377+ ret,
378+ & [ 0 , 0 , 192 , 61 , 0 , 168 , 112 , 78 , 32 , 0 , 0 , 125 , 192 , 3 , 95 , 214 ] ,
379+ ) ;
380+ }
381+ "llvm.aarch64.neon.smaxv.i32.v2i32" => {
382+ intrinsic_args ! ( fx, args => ( a) ; intrinsic) ;
383+ call_asm (
384+ fx,
385+ "llvm__aarch64__neon__smaxv__i32__v2i32" ,
386+ & [ a] ,
387+ ret,
388+ & [ 0 , 0 , 64 , 253 , 0 , 164 , 160 , 14 , 32 , 0 , 0 , 189 , 192 , 3 , 95 , 214 ] ,
389+ ) ;
390+ }
391+ "llvm.aarch64.neon.smaxv.i32.v4i32" => {
392+ intrinsic_args ! ( fx, args => ( a) ; intrinsic) ;
393+ call_asm (
394+ fx,
395+ "llvm__aarch64__neon__smaxv__i32__v4i32" ,
396+ & [ a] ,
397+ ret,
398+ & [ 0 , 0 , 192 , 61 , 0 , 168 , 176 , 78 , 32 , 0 , 0 , 189 , 192 , 3 , 95 , 214 ] ,
399+ ) ;
400+ }
401+ "llvm.aarch64.neon.smaxv.i8.v16i8" => {
402+ intrinsic_args ! ( fx, args => ( a) ; intrinsic) ;
403+ call_asm (
404+ fx,
405+ "llvm__aarch64__neon__smaxv__i8__v16i8" ,
406+ & [ a] ,
407+ ret,
408+ & [ 0 , 0 , 192 , 61 , 0 , 168 , 48 , 78 , 32 , 0 , 0 , 13 , 192 , 3 , 95 , 214 ] ,
409+ ) ;
410+ }
411+ "llvm.aarch64.neon.smaxv.i8.v8i8" => {
412+ intrinsic_args ! ( fx, args => ( a) ; intrinsic) ;
413+ call_asm (
414+ fx,
415+ "llvm__aarch64__neon__smaxv__i8__v8i8" ,
416+ & [ a] ,
417+ ret,
418+ & [ 0 , 0 , 64 , 253 , 0 , 168 , 48 , 14 , 32 , 0 , 0 , 13 , 192 , 3 , 95 , 214 ] ,
419+ ) ;
234420 }
235421
236422 _ if intrinsic. starts_with ( "llvm.aarch64.neon.umax.v" ) => {
@@ -485,19 +671,43 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
485671 // ==== begin autogenerated section ====
486672 "llvm.trunc.v1f64" => {
487673 intrinsic_args ! ( fx, args => ( a) ; intrinsic) ;
488- call_asm ( fx, "llvm__trunc__v1f64" , & [ a] , ret, & [ 0 , 192 , 101 , 30 , 192 , 3 , 95 , 214 ] ) ;
674+ call_asm (
675+ fx,
676+ "llvm__trunc__v1f64" ,
677+ & [ a] ,
678+ ret,
679+ & [ 0 , 0 , 64 , 253 , 0 , 192 , 101 , 30 , 32 , 0 , 0 , 253 , 192 , 3 , 95 , 214 ] ,
680+ ) ;
489681 }
490682 "llvm.trunc.v2f32" => {
491683 intrinsic_args ! ( fx, args => ( a) ; intrinsic) ;
492- call_asm ( fx, "llvm__trunc__v2f32" , & [ a] , ret, & [ 0 , 152 , 161 , 14 , 192 , 3 , 95 , 214 ] ) ;
684+ call_asm (
685+ fx,
686+ "llvm__trunc__v2f32" ,
687+ & [ a] ,
688+ ret,
689+ & [ 0 , 0 , 64 , 253 , 0 , 152 , 161 , 14 , 32 , 0 , 0 , 253 , 192 , 3 , 95 , 214 ] ,
690+ ) ;
493691 }
494692 "llvm.trunc.v2f64" => {
495693 intrinsic_args ! ( fx, args => ( a) ; intrinsic) ;
496- call_asm ( fx, "llvm__trunc__v2f64" , & [ a] , ret, & [ 0 , 152 , 225 , 78 , 192 , 3 , 95 , 214 ] ) ;
694+ call_asm (
695+ fx,
696+ "llvm__trunc__v2f64" ,
697+ & [ a] ,
698+ ret,
699+ & [ 0 , 0 , 192 , 61 , 0 , 152 , 225 , 78 , 32 , 0 , 128 , 61 , 192 , 3 , 95 , 214 ] ,
700+ ) ;
497701 }
498702 "llvm.trunc.v4f32" => {
499703 intrinsic_args ! ( fx, args => ( a) ; intrinsic) ;
500- call_asm ( fx, "llvm__trunc__v4f32" , & [ a] , ret, & [ 0 , 152 , 161 , 78 , 192 , 3 , 95 , 214 ] ) ;
704+ call_asm (
705+ fx,
706+ "llvm__trunc__v4f32" ,
707+ & [ a] ,
708+ ret,
709+ & [ 0 , 0 , 192 , 61 , 0 , 152 , 161 , 78 , 32 , 0 , 128 , 61 , 192 , 3 , 95 , 214 ] ,
710+ ) ;
501711 }
502712 // ==== end autogenerated section
503713
0 commit comments