Skip to content

Commit e78052f

Browse files
committed
f32x4::dot
1 parent 72fdc77 commit e78052f

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

cidre/src/simd.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,14 @@ impl f32x4 {
370370
Self(unsafe { std::arch::aarch64::vdupq_n_f32(val) })
371371
}
372372

373+
#[inline]
374+
pub fn dot(&self, other: &Self) -> f32 {
375+
unsafe {
376+
let mul = std::arch::aarch64::vmulq_f32(self.0, other.0);
377+
std::arch::aarch64::vaddvq_f32(mul)
378+
}
379+
}
380+
373381
pub fn to_bits(&self) -> u128 {
374382
unsafe { std::mem::transmute(*self) }
375383
}
@@ -1318,6 +1326,13 @@ mod tests {
13181326
assert_eq!(o1, o2);
13191327
}
13201328

1329+
#[test]
1330+
fn f32x4_dot() {
1331+
let a = f32x4::with_xyzw(1.0, 2.0, 3.0, 4.0);
1332+
let b = f32x4::with_xyzw(5.0, 6.0, 7.0, 8.0);
1333+
assert_eq!(a.dot(&b), 70.0);
1334+
}
1335+
13211336
#[cfg(feature = "half")]
13221337
#[test]
13231338
fn f16quat() {

cidre/src/simd/vector_types.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,14 @@ impl Simd<f32, 4, 4> {
254254
pub const fn with_rgba_f32(r: f32, g: f32, b: f32, a: f32) -> Self {
255255
Self([r, g, b, a])
256256
}
257+
258+
#[inline]
259+
pub fn dot(&self, other: &Self) -> f32 {
260+
self.0[0] * other.0[0]
261+
+ self.0[1] * other.0[1]
262+
+ self.0[2] * other.0[2]
263+
+ self.0[3] * other.0[3]
264+
}
257265
}
258266

259267
#[cfg(feature = "half")]

0 commit comments

Comments
 (0)