9
9
//@ assembly-output: emit-asm
10
10
//@ compile-flags: --crate-type=lib -Copt-level=3 -C panic=abort
11
11
12
- #![ feature( no_core, lang_items, repr_simd, intrinsics) ]
12
+ #![ feature( no_core, lang_items, repr_simd, intrinsics, adt_const_params ) ]
13
13
#![ no_core]
14
14
#![ allow( non_camel_case_types) ]
15
15
@@ -35,7 +35,7 @@ pub struct f64x4([f64; 4]);
35
35
pub struct m64x4 ( [ i64 ; 4 ] ) ;
36
36
37
37
#[ rustc_intrinsic]
38
- unsafe fn simd_masked_load < M , P , T > ( mask : M , pointer : P , values : T ) -> T ;
38
+ unsafe fn simd_masked_load < M , P , T , const ALIGN : SimdAlign > ( mask : M , pointer : P , values : T ) -> T ;
39
39
40
40
// CHECK-LABEL: load_i8x16
41
41
#[ no_mangle]
@@ -56,7 +56,11 @@ pub unsafe extern "C" fn load_i8x16(mask: m8x16, pointer: *const i8) -> i8x16 {
56
56
// x86-avx512-NOT: vpsllw
57
57
// x86-avx512: vpmovb2m k1, xmm0
58
58
// x86-avx512-NEXT: vmovdqu8 xmm0 {k1} {z}, xmmword ptr [rdi]
59
- simd_masked_load ( mask, pointer, i8x16 ( [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ) )
59
+ simd_masked_load :: < _ , _ , _ , { SimdAlign :: Element } > (
60
+ mask,
61
+ pointer,
62
+ i8x16 ( [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ) ,
63
+ )
60
64
}
61
65
62
66
// CHECK-LABEL: load_f32x8
@@ -68,7 +72,29 @@ pub unsafe extern "C" fn load_f32x8(mask: m32x8, pointer: *const f32) -> f32x8 {
68
72
// x86-avx512-NOT: vpslld
69
73
// x86-avx512: vpmovd2m k1, ymm0
70
74
// x86-avx512-NEXT: vmovups ymm0 {k1} {z}, ymmword ptr [rdi]
71
- simd_masked_load ( mask, pointer, f32x8 ( [ 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 ] ) )
75
+ simd_masked_load :: < _ , _ , _ , { SimdAlign :: Element } > (
76
+ mask,
77
+ pointer,
78
+ f32x8 ( [ 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 ] ) ,
79
+ )
80
+ }
81
+
82
+ // CHECK-LABEL: load_f32x8_aligned
83
+ #[ no_mangle]
84
+ pub unsafe extern "C" fn load_f32x8_aligned ( mask : m32x8 , pointer : * const f32 ) -> f32x8 {
85
+ // x86-avx2-NOT: vpslld
86
+ // x86-avx2: vmaskmovps ymm0, ymm0, ymmword ptr [rdi]
87
+ //
88
+ // x86-avx512-NOT: vpslld
89
+ // x86-avx512: vpmovd2m k1, ymm0
90
+ // x86-avx512-NEXT: vmovaps ymm0 {k1} {z}, ymmword ptr [rdi]
91
+ //
92
+ // this aligned version should generate `movaps` instead of `movups`
93
+ simd_masked_load :: < _ , _ , _ , { SimdAlign :: Vector } > (
94
+ mask,
95
+ pointer,
96
+ f32x8 ( [ 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 ] ) ,
97
+ )
72
98
}
73
99
74
100
// CHECK-LABEL: load_f64x4
@@ -79,5 +105,9 @@ pub unsafe extern "C" fn load_f64x4(mask: m64x4, pointer: *const f64) -> f64x4 {
79
105
//
80
106
// x86-avx512-NOT: vpsllq
81
107
// x86-avx512: vpmovq2m k1, ymm0
82
- simd_masked_load ( mask, pointer, f64x4 ( [ 0_f64 , 0_f64 , 0_f64 , 0_f64 ] ) )
108
+ simd_masked_load :: < _ , _ , _ , { SimdAlign :: Element } > (
109
+ mask,
110
+ pointer,
111
+ f64x4 ( [ 0_f64 , 0_f64 , 0_f64 , 0_f64 ] ) ,
112
+ )
83
113
}
0 commit comments