@@ -4,6 +4,7 @@ const target = @import("builtin").target;
44const arch = target .cpu .arch ;
55
66const simd = @import ("simd_core.zig" );
7+ const simdg = @import ("simd_generic.zig" );
78
89const VEC_BITS_LEN = simd .VEC_BITS_LEN ;
910const VecLen = simd .VecLen ;
@@ -17,13 +18,88 @@ const c = @cImport({
1718 @cInclude ("x86_64_intrins.h" );
1819});
1920
21+ fn hasAvx2 () bool {
22+ if (arch == .x86_64 ) {
23+ const hasFeature = std .Target .x86 .featureSetHas ;
24+ return hasFeature (target .cpu .features , .avx2 );
25+ }
26+
27+ return false ;
28+ }
29+
2030pub const SimdSamples = struct {
2131 pub fn binOpI16x8 (vec1 : simd.I16x8 , vec2 : simd.I16x8 ) simd.I16x8 {
2232 const acc = c ._mm_mullo_epi16 (@bitCast (vec1 ), @bitCast (vec2 ));
2333 return @bitCast (acc );
2434 }
2535};
2636
37+ inline fn mm_maskload_vec (comptime T : type , mask : @Vector (VecLen (T ), bool ), buf : []T ) @Vector (VecLen (T ), T ) {
38+ const VecBitsInt = std .meta .Int (.unsigned , VEC_BITS_LEN );
39+ const mm_buf : @Vector (VecLen (i64 ), i64 ) = @bitCast (@as (VecBitsInt , @intFromPtr (& buf )));
40+
41+ const all_zeros : @Vector (VecLen (T ), T ) = @splat (0x0 );
42+ const all_ones = ~ all_zeros ;
43+ const t_mask = @select (T , mask , all_ones , all_zeros );
44+ const mm_mask : @Vector (VecLen (i64 ), i64 ) = @bitCast (t_mask );
45+ switch (@sizeOf (T )) {
46+ 32 , 64 , 128 = > {
47+ return asm ("vpmaskmovd %[result], %[mask], %[addr]"
48+ : [result ] "=x" (- > @Vector (VecLen (T ), T )),
49+ : [mask ] "x" (mm_mask ),
50+ [addr ] "x" (mm_buf ),
51+ );
52+ },
53+ else = > @compileError ("Not support type " ++ @typeName (T )),
54+ }
55+ }
56+
57+ pub fn mm_maskstore_vec (comptime T : type , mask : @Vector (VecLen (T ), bool ), buf : []T , vec : @Vector (VecLen (T ), T )) void {
58+ const VecBitsInt = std .meta .Int (.unsigned , VEC_BITS_LEN );
59+ const mm_vec : @Vector (VecLen (i64 ), i64 ) = @bitCast (vec );
60+ const mm_buf : @Vector (VecLen (i64 ), i64 ) = @bitCast (@as (VecBitsInt , @intFromPtr (& buf )));
61+
62+ const all_zeros : @Vector (VecLen (T ), T ) = @splat (0x0 );
63+ const all_ones = ~ all_zeros ;
64+ const t_mask = @select (T , mask , all_ones , all_zeros );
65+ const mm_mask : @Vector (VecLen (i64 ), i64 ) = @bitCast (t_mask );
66+ switch (@sizeOf (T )) {
67+ 32 , 64 , 128 = > {
68+ asm ("vpmaskmovd %[addr], %[mask], %[vec]"
69+ : [addr ] "=x" (mm_buf ),
70+ : [mask ] "x" (mm_mask ),
71+ [vec ] "x" (mm_vec ),
72+ );
73+ },
74+ else = > @compileError ("Not support type " ++ @typeName (T )),
75+ }
76+ }
77+
78+ pub fn maskedLoadVecOr (comptime T : type , val_vec : @Vector (VecLen (T ), T ), mask : @Vector (VecLen (T ), bool ), buf : []T ) @Vector (VecLen (T ), T ) {
79+ if (comptime hasAvx2 () and @sizeOf (T ) >= 32 ) {
80+ const vec = mm_maskload_vec (T , mask , buf );
81+ return @select (T , mask , vec , val_vec );
82+ } else {
83+ return simdg .maskedLoadVecOr (T , val_vec , mask , buf );
84+ }
85+ }
86+
87+ pub fn maskedLoadVec (comptime T : type , mask : @Vector (VecLen (T ), bool ), buf : []T ) @Vector (VecLen (T ), T ) {
88+ if (comptime hasAvx2 () and @sizeOf (T ) >= 32 ) {
89+ return mm_maskload_vec (T , mask , buf );
90+ } else {
91+ return simdg .maskedLoadVec (T , mask , buf );
92+ }
93+ }
94+
95+ pub fn maskedStoreVec (comptime T : type , mask : @Vector (VecLen (T ), bool ), buf : []T , vec : @Vector (VecLen (T ), T )) void {
96+ if (comptime hasAvx2 () and @sizeOf (T ) >= 32 ) {
97+ return mm_maskstore_vec (T , mask , buf , vec );
98+ } else {
99+ return simdg .maskedStoreVec (T , mask , buf , vec );
100+ }
101+ }
102+
27103inline fn mm_shuffle_u8 (vec : @Vector (VecLen (u8 ), u8 ), idx : @Vector (VecLen (i8 ), i8 )) @TypeOf (vec ) {
28104 const mm_vec : @Vector (VecLen (i64 ), i64 ) = @bitCast (vec );
29105 const mm_idx : @Vector (VecLen (i64 ), i64 ) = @bitCast (idx );
0 commit comments