77#include <assert.h>
88#include <stdlib.h>
99#include <string.h>
10+ #if defined(_MSC_VER ) && !defined(__clang__ )
11+ #include <intrin.h>
12+ #endif
1013
1114#ifndef __has_builtin
1215# define __has_builtin (builtin ) 0
2023# define DEBUG_BREAK asm("int3")
2124#endif
2225
26+ #if (defined(__GNUC__ ) || defined(__clang__ ))
27+ # define EXTENDED_ASM 1
28+ #else
29+ # define EXTENDED_ASM 0
30+ #endif
31+
2332#if defined(__GCC_ASM_FLAG_OUTPUTS__ ) && \
2433 (defined(__i386 ) || defined(__x86_64__ ) || \
25- defined(_M_IX86 ) || defined(_M_IX64 ))
34+ defined(_M_IX86 ) || defined(_M_X64 ))
2635# define FLAGS_FROM_EXTENDED_X86_ASM 1
2736#else
2837# define FLAGS_FROM_EXTENDED_X86_ASM 0
3443# define FLAGS_FROM_OVERFLOW_BUILTINS 0
3544#endif
3645
46+ #if _MSC_VER >= 1937 && !defined(__clang__ ) && (defined(_M_IX86 ) || defined(_M_X64 ))
47+ # define FLAGS_FROM_MSVC_INTRINSICS 1
48+ #else
49+ # define FLAGS_FROM_MSVC_INTRINSICS 0
50+ #endif
51+
3752#if __has_builtin (__builtin_constant_p ) || __GNUC__ >= 3 // Not sure, so conservative
3853# define HAVE_BUILTIN_CONSTANT_P 1
3954#else
4358static uint8_t bitcount9 (uint32_t x ) {
4459#if __has_builtin (__builtin_popcount ) || __GNUC__ >= 4
4560 return __builtin_popcount (x & 0777 );
61+ #elif UINT32_MAX >= UINTPTR_MAX
62+ uint32_t res = (x &= 0777 ), mask = UINT32_C (0x11111111 );
63+ res *= UINT32_C (0001001001001 );
64+ res >>= 3 ;
65+ # if EXTENDED_ASM && (defined(__i386__ ) || defined(_M_IX86 ))
66+ __asm__("andl\t%1, %0\nimull\t%1, %0" : "+r" (res ) : "r" (mask ) : "cc" );
67+ # else
68+ res &= mask ;
69+ res *= mask ;
70+ #endif
71+ return (res >> 28 ) + (x >> 8 );
4672#else
4773 uint64_t res = x & 0777 , mask = UINT64_C (0x1111111111111111 );
4874 res *= UINT64_C (0001001001001 );
49- # if defined(__x86_64__ ) || defined(_M_IX64 )
75+ # if EXTENDED_ASM && ( defined(__x86_64__ ) || defined(_M_X64 ) )
5076 __asm__("andq\t%1, %0\nimulq\t%1, %0" : "+r" (res ) : "r" (mask ) : "cc" );
5177# else
5278 res &= mask ;
@@ -60,17 +86,24 @@ static uint8_t lowestsetbit32(uint32_t x) {
6086 assert (x && "invalid argument" );
6187#if __has_builtin (__builtin_ctz ) || __GNUC__ >= 4
6288 return __builtin_ctz (x );
89+ #elif defined(_MSC_VER ) && !defined(__clang__ )
90+ unsigned long index ;
91+ _BitScanForward (& index , x );
92+ return index ;
6393#else
94+ # if EXTENDED_ASM && \
95+ (defined(__i386 ) || defined(__x86_64__ ) || \
96+ defined(_M_IX86 ) || defined(_M_X64 ))
6497 uint32_t res ;
65- # if defined(__i386 ) || defined(__x86_64__ ) || \
66- defined(_M_IX86 ) || defined(_M_IX64 )
6798 __asm__("bsfl\t%1, %0" : "+r" (res ) : "r" (x ) : "cc" );
6899# else
69- res = 0 ;
70- while (!(x & 1 )) {
71- x >>= 1 ;
72- ++ res ;
73- }
100+ uint8_t res = 0 ;
101+ x &= - x ;
102+ if (x & UINT32_C (0xAAAAAAAA )) res += 1 ;
103+ if (x & UINT32_C (0xCCCCCCCC )) res += 2 ;
104+ if (x & UINT32_C (0xF0F0F0F0 )) res += 4 ;
105+ if (x & UINT32_C (0xFF00FF00 )) res += 8 ;
106+ if (x & UINT32_C (0xFFFF0000 )) res += 16 ;
74107# endif
75108 return res ;
76109#endif
@@ -209,13 +242,17 @@ static uint32_t arm_negs(arm_cpu_t *cpu, uint32_t x) {
209242#elif FLAGS_FROM_OVERFLOW_BUILTINS
210243 int32_t res ;
211244 cpu -> v = __builtin_sub_overflow (0 , (int32_t )x , & res );
212- cpu -> c = x ;
213- return arm_movs (cpu , - x );
245+ cpu -> c = !res ;
246+ return arm_movs (cpu , res );
247+ #elif FLAGS_FROM_MSVC_INTRINSICS
248+ int32_t res ;
249+ cpu -> v = _sub_overflow_i32 (0 , 0 , x , & res );
250+ cpu -> c = !res ;
251+ return arm_movs (cpu , res );
214252#else
215- int64_t res = UINT64_C (0 ) - (int32_t )x ;
216- cpu -> v = res != (int32_t )res ;
217- cpu -> c = (uint32_t )res <= 0 ;
218- //cpu->c = 0 >= x;
253+ uint32_t res = - x ;
254+ cpu -> v = (x & res ) >> 31 ;
255+ cpu -> c = !res ;
219256 return arm_movs (cpu , res );
220257#endif
221258}
@@ -229,10 +266,18 @@ static uint32_t arm_adds(arm_cpu_t *cpu, uint32_t x, uint32_t y) {
229266 cpu -> v = __builtin_add_overflow ((int32_t )x , (int32_t )y , & res );
230267 cpu -> c = __builtin_add_overflow (x , y , & x );
231268 return arm_movs (cpu , x );
269+ #elif FLAGS_FROM_MSVC_INTRINSICS
270+ int32_t res ;
271+ cpu -> v = _add_overflow_i32 (0 , x , y , & res );
272+ cpu -> c = _addcarry_u32 (0 , x , y , & x );
273+ return arm_movs (cpu , x );
232274#else
233- int64_t res = (int64_t )(int32_t )x + (int32_t )y ;
234- cpu -> v = res != (int32_t )res ;
235- cpu -> c = (uint32_t )res < x ;
275+ uint32_t res = x + y ;
276+ flags -> v = ((res ^ x ) & (res ^ y )) >> 31 ;
277+ flags -> c = res < x ;
278+ //int64_t res = (int64_t)(int32_t)x + (int32_t)y;
279+ //cpu->v = res != (int32_t)res;
280+ //cpu->c = (uint32_t)res < x;
236281 //cpu->c = x > ~y;
237282 return arm_movs (cpu , res );
238283#endif
@@ -247,10 +292,18 @@ static uint32_t arm_subs(arm_cpu_t *cpu, uint32_t x, uint32_t y) {
247292 cpu -> v = __builtin_sub_overflow ((int32_t )x , (int32_t )y , & res );
248293 cpu -> c = !__builtin_sub_overflow (x , y , & x );
249294 return arm_movs (cpu , x );
295+ #elif FLAGS_FROM_MSVC_INTRINSICS
296+ int32_t res ;
297+ cpu -> v = _sub_overflow_i32 (0 , x , y , & res );
298+ cpu -> c = !_subborrow_u32 (0 , x , y , & x );
299+ return arm_movs (cpu , x );
250300#else
251- int64_t res = (int64_t )(int32_t )x - (int32_t )y ;
252- cpu -> v = res != (int32_t )res ;
253- cpu -> c = (uint32_t )res <= x ;
301+ uint32_t res = x - y ;
302+ cpu -> v = ((x ^ y ) & (res ^ x )) >> 31 ;
303+ cpu -> c = res <= x ;
304+ //int64_t res = (int64_t)(int32_t)x - (int32_t)y;
305+ //cpu->v = res != (int32_t)res;
306+ //cpu->c = (uint32_t)res <= x;
254307 //cpu->c = x >= y;
255308 return arm_movs (cpu , res );
256309#endif
@@ -268,10 +321,20 @@ static uint32_t arm_adcs(arm_cpu_t *cpu, uint32_t x, uint32_t y) {
268321 cpu -> c = __builtin_add_overflow (x , y , & x );
269322 cpu -> c |= __builtin_add_overflow (x , carry , & x );
270323 return arm_movs (cpu , x );
324+ #elif FLAGS_FROM_MSVC_INTRINSICS
325+ bool carry = cpu -> c ;
326+ int32_t res ;
327+ cpu -> v = _add_overflow_i32 (carry , x , y , & res );
328+ cpu -> c = _addcarry_u32 (carry , x , y , & x );
329+ return arm_movs (cpu , x );
271330#else
272- int64_t res = (uint64_t )(int32_t )x + (int32_t )y + cpu -> c ;
273- cpu -> v = res != (int32_t )res ;
274- cpu -> c = ((uint64_t )x + y + cpu -> c ) >> 32 ;
331+ uint32_t res = x + y + cpu -> c ;
332+ uint32_t carries = (x | y ) ^ ((x ^ y ) & res );
333+ cpu -> c = carries >> 31 ;
334+ cpu -> v = cpu -> c ^ (carries >> 30 & 1 );
335+ //int64_t res = (uint64_t)(int32_t)x + (int32_t)y + cpu->c;
336+ //cpu->v = res != (int32_t)res;
337+ //cpu->c = ((uint64_t)x + y + cpu->c) >> 32;
275338 return arm_movs (cpu , res );
276339#endif
277340}
@@ -285,14 +348,21 @@ static uint32_t arm_sbcs(arm_cpu_t *cpu, uint32_t x, uint32_t y) {
285348 int32_t res ;
286349 cpu -> v = __builtin_sub_overflow (x , y , & res );
287350 cpu -> v |= __builtin_sub_overflow (res , borrow , & res );
288- cpu -> c = __builtin_sub_overflow (x , y , & x );
289- cpu -> c |= __builtin_sub_overflow (x , borrow , & x );
351+ cpu -> c = !__builtin_sub_overflow (x , y , & x );
352+ cpu -> c &= !__builtin_sub_overflow (x , borrow , & x );
353+ return arm_movs (cpu , x );
354+ #elif FLAGS_FROM_MSVC_INTRINSICS
355+ bool borrow = !cpu -> c ;
356+ int32_t res ;
357+ cpu -> v = _sub_overflow_i32 (borrow , x , y , & res );
358+ cpu -> c = !_subborrow_u32 (borrow , x , y , & x );
290359 return arm_movs (cpu , x );
291360#else
292- int64_t res = (uint64_t )(int32_t )x - (int32_t )y - !cpu -> c ;
293- cpu -> v = res != (int32_t )res ;
294- cpu -> c = ((uint64_t )x - y - !cpu -> c ) >> 32 ;
295- return arm_movs (cpu , res );
361+ return arm_adcs (cpu , x , ~y );
362+ //int64_t res = (uint64_t)(int32_t)x - (int32_t)y - !cpu->c;
363+ //cpu->v = res != (int32_t)res;
364+ //cpu->c = !(((uint64_t)x - y - !cpu->c) >> 32);
365+ //return arm_movs(cpu, res);
296366#endif
297367}
298368
0 commit comments