Skip to content

Commit 722a47d

Browse files
committed
Get something working
1 parent 376fb2f commit 722a47d

File tree

4 files changed

+263
-37
lines changed

4 files changed

+263
-37
lines changed

gen_intrinsics/src/def_visitor.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,13 @@ impl<'ast> Visit<'ast> for DefVisitor {
6060

6161
let mut ty = i.clone();
6262
ty.attrs = ty.attrs.into_iter().filter(|attr| attr.path().is_ident("repr")).collect();
63+
ty.attrs.push(syn::parse_quote! { #[derive(Copy, Clone)] });
6364

6465
self.structs.push(ty);
6566
}
6667

6768
fn visit_item_type(&mut self, i: &'ast syn::ItemType) {
68-
let mut alias = i.clone();
69-
alias.attrs = alias.attrs.into_iter().filter(|attr| attr.path().is_ident("repr")).collect();
70-
71-
self.aliases.push(alias);
69+
self.aliases.push(i.clone());
7270
}
7371

7472
fn visit_item_foreign_mod(&mut self, i: &'ast syn::ItemForeignMod) {

gen_intrinsics/src/main.rs

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,25 @@ use std::io::Write;
44
use std::process::Stdio;
55

66
use object::{Object, ObjectSection, ObjectSymbol};
7+
use quote::quote;
78
use syn::Ident;
89

910
use crate::def_visitor::{DefVisitor, LlvmIntrinsicDef};
1011

1112
fn compile_object(visitor: &DefVisitor) {
1213
let mut ts = proc_macro2::TokenStream::new();
13-
ts.extend(quote::quote! {
14+
ts.extend(quote! {
1415
#![feature(abi_unadjusted, f16, f128, link_llvm_intrinsics, repr_simd, simd_ffi)]
1516
#![allow(dead_code, improper_ctypes, improper_ctypes_definitions, internal_features, non_camel_case_types)]
1617
});
1718

1819
let structs = &visitor.structs;
19-
ts.extend(quote::quote! {
20+
ts.extend(quote! {
2021
#(#structs)*
2122
});
2223

2324
let aliases = &visitor.aliases;
24-
ts.extend(quote::quote! {
25+
ts.extend(quote! {
2526
#(#aliases)*
2627
});
2728

@@ -31,32 +32,41 @@ fn compile_object(visitor: &DefVisitor) {
3132
let mangled_name = Ident::new(&link_name.replace('.', "__"), sig.ident.span());
3233
sig.ident = mangled_name.clone();
3334

34-
ts.extend(quote::quote! {
35+
ts.extend(quote! {
3536
extern #abi {
3637
#[link_name = #link_name]
3738
#sig;
3839
}
3940
});
4041

4142
sig.ident = Ident::new(&format!("__rust_cranelift_{mangled_name}"), sig.ident.span());
42-
let args = sig
43-
.inputs
44-
.iter()
45-
.map(|arg| match arg {
46-
syn::FnArg::Typed(syn::PatType { pat, .. }) => match &**pat {
47-
syn::Pat::Ident(ident) => ident.ident.clone(),
43+
let mut args = vec![];
44+
for arg in &mut sig.inputs {
45+
match arg {
46+
syn::FnArg::Typed(syn::PatType { pat, ty, .. }) => match &**pat {
47+
syn::Pat::Ident(ident) => {
48+
let ident = ident.ident.clone();
49+
args.push(quote! { *#ident });
50+
*ty = syn::parse_quote! { &#ty };
51+
}
4852
syn::Pat::Wild(_) => unreachable!("{sig:?}"),
4953
_ => unreachable!("{pat:?}"),
5054
},
5155
_ => unreachable!(),
52-
})
53-
.collect::<Vec<_>>();
54-
55-
ts.extend(quote::quote! {
56+
}
57+
}
58+
let ret_ty = match &sig.output {
59+
syn::ReturnType::Default => quote! { () },
60+
syn::ReturnType::Type(_, ty) => quote! { #ty },
61+
};
62+
sig.inputs.push(syn::parse_quote! { ret: &mut #ret_ty });
63+
sig.output = syn::ReturnType::Default;
64+
65+
ts.extend(quote! {
5666
#[no_mangle]
5767
#[target_feature(enable = "neon,aes,sha2,sha3,sm4,crc,frintts,tme,i8mm,fcma,dotprod,rdm")] // FIXME infer from context
5868
unsafe extern "C" #sig {
59-
#mangled_name(#(#args,)*)
69+
*ret = #mangled_name(#(#args,)*)
6070
}
6171
});
6272
}
@@ -108,7 +118,7 @@ fn main() {
108118
// Sanity checks
109119
assert!(section.relocations().next().is_none(), "function {link_name} has relocations");
110120
assert!(
111-
section.size() <= 0x14,
121+
section.size() <= 0x32,
112122
"function {link_name} is too big. it is {} bytes",
113123
section.size(),
114124
);

src/intrinsics/llvm_aarch64.rs

Lines changed: 222 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,31 @@ fn call_asm<'tcx>(
1616
code: &[u8],
1717
) {
1818
let name = format!("__rust_cranelift_{name}");
19-
let is_defined = fx.module.declarations().get_name(&name).is_none();
2019

2120
let sig = Signature {
2221
params: args
2322
.iter()
24-
.map(|arg| AbiParam::new(fx.clif_type(arg.layout().ty).unwrap()))
23+
.map(|_| AbiParam::new(fx.pointer_type))
24+
.chain(Some(AbiParam::new(fx.pointer_type)))
2525
.collect(),
26-
returns: vec![AbiParam::new(fx.clif_type(ret.layout().ty).unwrap())],
26+
returns: vec![],
2727
call_conv: CallConv::SystemV,
2828
};
2929

3030
let func = fx.module.declare_function(&name, Linkage::Local, &sig).unwrap();
31-
if !is_defined {
32-
fx.module.define_function_bytes(func, &Function::new(), 4, &code, &[]).unwrap();
31+
match fx.module.define_function_bytes(func, &Function::new(), 4, &code, &[]) {
32+
Ok(_) | Err(cranelift_module::ModuleError::DuplicateDefinition(_)) => {}
33+
err => err.unwrap(),
3334
}
3435

3536
let func_ref = fx.module.declare_func_in_func(func, &mut fx.bcx.func);
36-
let res = fx.bcx.ins().call(func_ref, &args.into_iter().map(|_| todo!()).collect::<Vec<_>>());
37-
todo!("write result")
37+
let mut args =
38+
args.into_iter().map(|arg| arg.force_stack(fx).0.get_addr(fx)).collect::<Vec<_>>();
39+
let res = CPlace::new_stack_slot(fx, ret.layout());
40+
args.push(res.to_ptr().get_addr(fx));
41+
fx.bcx.ins().call(func_ref, &args);
42+
let res = res.to_cvalue(fx);
43+
ret.write_cvalue(fx, res);
3844
}
3945

4046
pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
@@ -218,7 +224,7 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
218224
});
219225
}
220226

221-
_ if intrinsic.starts_with("llvm.aarch64.neon.smax.v") => {
227+
/*_ if intrinsic.starts_with("llvm.aarch64.neon.smax.v") => {
222228
intrinsic_args!(fx, args => (x, y); intrinsic);
223229
224230
simd_pair_for_each_lane(
@@ -231,6 +237,186 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
231237
fx.bcx.ins().select(gt, x_lane, y_lane)
232238
},
233239
);
240+
}*/
241+
"llvm.aarch64.neon.smax.v16i8" => {
242+
intrinsic_args!(fx, args => (a, b); intrinsic);
243+
call_asm(
244+
fx,
245+
"llvm__aarch64__neon__smax__v16i8",
246+
&[a, b],
247+
ret,
248+
&[0, 0, 192, 61, 33, 0, 192, 61, 0, 100, 33, 78, 64, 0, 128, 61, 192, 3, 95, 214],
249+
);
250+
}
251+
"llvm.aarch64.neon.smax.v2i32" => {
252+
intrinsic_args!(fx, args => (a, b); intrinsic);
253+
call_asm(
254+
fx,
255+
"llvm__aarch64__neon__smax__v2i32",
256+
&[a, b],
257+
ret,
258+
&[0, 0, 64, 253, 33, 0, 64, 253, 0, 100, 161, 14, 64, 0, 0, 253, 192, 3, 95, 214],
259+
);
260+
}
261+
"llvm.aarch64.neon.smax.v4i16" => {
262+
intrinsic_args!(fx, args => (a, b); intrinsic);
263+
call_asm(
264+
fx,
265+
"llvm__aarch64__neon__smax__v4i16",
266+
&[a, b],
267+
ret,
268+
&[0, 0, 64, 253, 33, 0, 64, 253, 0, 100, 97, 14, 64, 0, 0, 253, 192, 3, 95, 214],
269+
);
270+
}
271+
"llvm.aarch64.neon.smax.v4i32" => {
272+
intrinsic_args!(fx, args => (a, b); intrinsic);
273+
call_asm(
274+
fx,
275+
"llvm__aarch64__neon__smax__v4i32",
276+
&[a, b],
277+
ret,
278+
&[0, 0, 192, 61, 33, 0, 192, 61, 0, 100, 161, 78, 64, 0, 128, 61, 192, 3, 95, 214],
279+
);
280+
}
281+
"llvm.aarch64.neon.smax.v8i16" => {
282+
intrinsic_args!(fx, args => (a, b); intrinsic);
283+
call_asm(
284+
fx,
285+
"llvm__aarch64__neon__smax__v8i16",
286+
&[a, b],
287+
ret,
288+
&[0, 0, 192, 61, 33, 0, 192, 61, 0, 100, 97, 78, 64, 0, 128, 61, 192, 3, 95, 214],
289+
);
290+
}
291+
"llvm.aarch64.neon.smax.v8i8" => {
292+
intrinsic_args!(fx, args => (a, b); intrinsic);
293+
call_asm(
294+
fx,
295+
"llvm__aarch64__neon__smax__v8i8",
296+
&[a, b],
297+
ret,
298+
&[0, 0, 64, 253, 33, 0, 64, 253, 0, 100, 33, 14, 64, 0, 0, 253, 192, 3, 95, 214],
299+
);
300+
}
301+
"llvm.aarch64.neon.smaxp.v16i8" => {
302+
intrinsic_args!(fx, args => (a, b); intrinsic);
303+
call_asm(
304+
fx,
305+
"llvm__aarch64__neon__smaxp__v16i8",
306+
&[a, b],
307+
ret,
308+
&[0, 0, 192, 61, 33, 0, 192, 61, 0, 164, 33, 78, 64, 0, 128, 61, 192, 3, 95, 214],
309+
);
310+
}
311+
"llvm.aarch64.neon.smaxp.v2i32" => {
312+
intrinsic_args!(fx, args => (a, b); intrinsic);
313+
call_asm(
314+
fx,
315+
"llvm__aarch64__neon__smaxp__v2i32",
316+
&[a, b],
317+
ret,
318+
&[0, 0, 64, 253, 33, 0, 64, 253, 0, 164, 161, 14, 64, 0, 0, 253, 192, 3, 95, 214],
319+
);
320+
}
321+
"llvm.aarch64.neon.smaxp.v4i16" => {
322+
intrinsic_args!(fx, args => (a, b); intrinsic);
323+
call_asm(
324+
fx,
325+
"llvm__aarch64__neon__smaxp__v4i16",
326+
&[a, b],
327+
ret,
328+
&[0, 0, 64, 253, 33, 0, 64, 253, 0, 164, 97, 14, 64, 0, 0, 253, 192, 3, 95, 214],
329+
);
330+
}
331+
"llvm.aarch64.neon.smaxp.v4i32" => {
332+
intrinsic_args!(fx, args => (a, b); intrinsic);
333+
call_asm(
334+
fx,
335+
"llvm__aarch64__neon__smaxp__v4i32",
336+
&[a, b],
337+
ret,
338+
&[0, 0, 192, 61, 33, 0, 192, 61, 0, 164, 161, 78, 64, 0, 128, 61, 192, 3, 95, 214],
339+
);
340+
}
341+
"llvm.aarch64.neon.smaxp.v8i16" => {
342+
intrinsic_args!(fx, args => (a, b); intrinsic);
343+
call_asm(
344+
fx,
345+
"llvm__aarch64__neon__smaxp__v8i16",
346+
&[a, b],
347+
ret,
348+
&[0, 0, 192, 61, 33, 0, 192, 61, 0, 164, 97, 78, 64, 0, 128, 61, 192, 3, 95, 214],
349+
);
350+
}
351+
"llvm.aarch64.neon.smaxp.v8i8" => {
352+
intrinsic_args!(fx, args => (a, b); intrinsic);
353+
call_asm(
354+
fx,
355+
"llvm__aarch64__neon__smaxp__v8i8",
356+
&[a, b],
357+
ret,
358+
&[0, 0, 64, 253, 33, 0, 64, 253, 0, 164, 33, 14, 64, 0, 0, 253, 192, 3, 95, 214],
359+
);
360+
}
361+
"llvm.aarch64.neon.smaxv.i16.v4i16" => {
362+
intrinsic_args!(fx, args => (a); intrinsic);
363+
call_asm(
364+
fx,
365+
"llvm__aarch64__neon__smaxv__i16__v4i16",
366+
&[a],
367+
ret,
368+
&[0, 0, 64, 253, 0, 168, 112, 14, 32, 0, 0, 125, 192, 3, 95, 214],
369+
);
370+
}
371+
"llvm.aarch64.neon.smaxv.i16.v8i16" => {
372+
intrinsic_args!(fx, args => (a); intrinsic);
373+
call_asm(
374+
fx,
375+
"llvm__aarch64__neon__smaxv__i16__v8i16",
376+
&[a],
377+
ret,
378+
&[0, 0, 192, 61, 0, 168, 112, 78, 32, 0, 0, 125, 192, 3, 95, 214],
379+
);
380+
}
381+
"llvm.aarch64.neon.smaxv.i32.v2i32" => {
382+
intrinsic_args!(fx, args => (a); intrinsic);
383+
call_asm(
384+
fx,
385+
"llvm__aarch64__neon__smaxv__i32__v2i32",
386+
&[a],
387+
ret,
388+
&[0, 0, 64, 253, 0, 164, 160, 14, 32, 0, 0, 189, 192, 3, 95, 214],
389+
);
390+
}
391+
"llvm.aarch64.neon.smaxv.i32.v4i32" => {
392+
intrinsic_args!(fx, args => (a); intrinsic);
393+
call_asm(
394+
fx,
395+
"llvm__aarch64__neon__smaxv__i32__v4i32",
396+
&[a],
397+
ret,
398+
&[0, 0, 192, 61, 0, 168, 176, 78, 32, 0, 0, 189, 192, 3, 95, 214],
399+
);
400+
}
401+
"llvm.aarch64.neon.smaxv.i8.v16i8" => {
402+
intrinsic_args!(fx, args => (a); intrinsic);
403+
call_asm(
404+
fx,
405+
"llvm__aarch64__neon__smaxv__i8__v16i8",
406+
&[a],
407+
ret,
408+
&[0, 0, 192, 61, 0, 168, 48, 78, 32, 0, 0, 13, 192, 3, 95, 214],
409+
);
410+
}
411+
"llvm.aarch64.neon.smaxv.i8.v8i8" => {
412+
intrinsic_args!(fx, args => (a); intrinsic);
413+
call_asm(
414+
fx,
415+
"llvm__aarch64__neon__smaxv__i8__v8i8",
416+
&[a],
417+
ret,
418+
&[0, 0, 64, 253, 0, 168, 48, 14, 32, 0, 0, 13, 192, 3, 95, 214],
419+
);
234420
}
235421

236422
_ if intrinsic.starts_with("llvm.aarch64.neon.umax.v") => {
@@ -485,19 +671,43 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
485671
// ==== begin autogenerated section ====
486672
"llvm.trunc.v1f64" => {
487673
intrinsic_args!(fx, args => (a); intrinsic);
488-
call_asm(fx, "llvm__trunc__v1f64", &[a], ret, &[0, 192, 101, 30, 192, 3, 95, 214]);
674+
call_asm(
675+
fx,
676+
"llvm__trunc__v1f64",
677+
&[a],
678+
ret,
679+
&[0, 0, 64, 253, 0, 192, 101, 30, 32, 0, 0, 253, 192, 3, 95, 214],
680+
);
489681
}
490682
"llvm.trunc.v2f32" => {
491683
intrinsic_args!(fx, args => (a); intrinsic);
492-
call_asm(fx, "llvm__trunc__v2f32", &[a], ret, &[0, 152, 161, 14, 192, 3, 95, 214]);
684+
call_asm(
685+
fx,
686+
"llvm__trunc__v2f32",
687+
&[a],
688+
ret,
689+
&[0, 0, 64, 253, 0, 152, 161, 14, 32, 0, 0, 253, 192, 3, 95, 214],
690+
);
493691
}
494692
"llvm.trunc.v2f64" => {
495693
intrinsic_args!(fx, args => (a); intrinsic);
496-
call_asm(fx, "llvm__trunc__v2f64", &[a], ret, &[0, 152, 225, 78, 192, 3, 95, 214]);
694+
call_asm(
695+
fx,
696+
"llvm__trunc__v2f64",
697+
&[a],
698+
ret,
699+
&[0, 0, 192, 61, 0, 152, 225, 78, 32, 0, 128, 61, 192, 3, 95, 214],
700+
);
497701
}
498702
"llvm.trunc.v4f32" => {
499703
intrinsic_args!(fx, args => (a); intrinsic);
500-
call_asm(fx, "llvm__trunc__v4f32", &[a], ret, &[0, 152, 161, 78, 192, 3, 95, 214]);
704+
call_asm(
705+
fx,
706+
"llvm__trunc__v4f32",
707+
&[a],
708+
ret,
709+
&[0, 0, 192, 61, 0, 152, 161, 78, 32, 0, 128, 61, 192, 3, 95, 214],
710+
);
501711
}
502712
// ==== end autogenerated section
503713

0 commit comments

Comments
 (0)