1818 */
1919
2020#pragma once
21- #include < cstdint>
2221#include " fury/util/platform.h"
22+ #include < cstdint>
2323
2424namespace fury {
2525#if defined(FURY_HAS_NEON)
26- inline uint16_t getMaxValue (const uint16_t * arr, size_t length) {
26+ inline uint16_t getMaxValue (const uint16_t * arr, size_t length) {
2727 if (length == 0 ) {
28- return 0 ; // Return 0 for empty arrays
28+ return 0 ; // Return 0 for empty arrays
2929 }
30- uint16x8_t max_val = vdupq_n_u16 (0 ); // Initialize max vector to zero
30+ uint16x8_t max_val = vdupq_n_u16 (0 ); // Initialize max vector to zero
3131
3232 size_t i = 0 ;
3333 for (; i + 8 <= length; i += 8 ) {
3434 uint16x8_t current_val = vld1q_u16 (&arr[i]);
35- max_val = vmaxq_u16 (max_val, current_val); // Max operation
35+ max_val = vmaxq_u16 (max_val, current_val); // Max operation
3636 }
3737
3838 // Find the max value in the resulting vector
@@ -54,7 +54,7 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) {
5454 return max_neon;
5555}
5656
57- inline void copyArray (const uint16_t * from, uint8_t * to, size_t length) {
57+ inline void copyArray (const uint16_t * from, uint8_t * to, size_t length) {
5858 size_t i = 0 ;
5959 for (; i + 7 < length; i += 8 ) {
6060 uint16x8_t src = vld1q_u16 (&from[i]);
@@ -68,22 +68,22 @@ inline void copyArray(const uint16_t* from, uint8_t* to, size_t length) {
6868 }
6969}
7070#elif defined(FURY_HAS_SSE2)
71- inline uint16_t getMaxValue (const uint16_t * arr, size_t length) {
71+ inline uint16_t getMaxValue (const uint16_t * arr, size_t length) {
7272 if (length == 0 ) {
73- return 0 ; // Return 0 for empty arrays
73+ return 0 ; // Return 0 for empty arrays
7474 }
7575
76- __m128i max_val = _mm_setzero_si128 (); // Initialize max vector with zeros
76+ __m128i max_val = _mm_setzero_si128 (); // Initialize max vector with zeros
7777
7878 size_t i = 0 ;
7979 for (; i + 8 <= length; i += 8 ) {
80- __m128i current_val = _mm_loadu_si128 ((__m128i*)&arr[i]);
81- max_val = _mm_max_epu16 (max_val, current_val); // Max operation
80+ __m128i current_val = _mm_loadu_si128 ((__m128i *)&arr[i]);
81+ max_val = _mm_max_epu16 (max_val, current_val); // Max operation
8282 }
8383
8484 // Find the max value in the resulting vector
8585 uint16_t temp[8 ];
86- _mm_storeu_si128 ((__m128i*)temp, max_val);
86+ _mm_storeu_si128 ((__m128i *)temp, max_val);
8787 uint16_t max_sse = temp[0 ];
8888 for (int j = 1 ; j < 8 ; j++) {
8989 if (temp[j] > max_sse) {
@@ -100,13 +100,13 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) {
100100 return max_sse;
101101}
102102
103- inline void copyArray (const uint16_t * from, uint8_t * to, size_t length) {
103+ inline void copyArray (const uint16_t * from, uint8_t * to, size_t length) {
104104 size_t i = 0 ;
105- __m128i mask = _mm_set1_epi16 (0xFF ); // Mask to zero out the high byte
105+ __m128i mask = _mm_set1_epi16 (0xFF ); // Mask to zero out the high byte
106106 for (; i + 7 < length; i += 8 ) {
107- __m128i src = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(&from[i]));
107+ __m128i src = _mm_loadu_si128 (reinterpret_cast <const __m128i *>(&from[i]));
108108 __m128i result = _mm_and_si128 (src, mask);
109- _mm_storel_epi64 (reinterpret_cast <__m128i*>(&to[i]),
109+ _mm_storel_epi64 (reinterpret_cast <__m128i *>(&to[i]),
110110 _mm_packus_epi16 (result, result));
111111 }
112112
@@ -116,9 +116,9 @@ inline void copyArray(const uint16_t* from, uint8_t* to, size_t length) {
116116 }
117117}
118118#else
119- inline uint16_t getMaxValue (const uint16_t * arr, size_t length) {
119+ inline uint16_t getMaxValue (const uint16_t * arr, size_t length) {
120120 if (length == 0 ) {
121- return 0 ; // Return 0 for empty arrays
121+ return 0 ; // Return 0 for empty arrays
122122 }
123123 uint16_t max_val = arr[0 ];
124124 for (size_t i = 1 ; i < length; i++) {
@@ -129,11 +129,11 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) {
129129 return max_val;
130130}
131131
132- inline void copyArray (const uint16_t * from, uint8_t * to, size_t length) {
132+ inline void copyArray (const uint16_t * from, uint8_t * to, size_t length) {
133133 // Fallback for systems without SSE2/NEON
134134 for (size_t i = 0 ; i < length; ++i) {
135135 to[i] = static_cast <uint8_t >(from[i]);
136136 }
137137}
138138#endif
139- } // namespace fury
139+ } // namespace fury
0 commit comments