|
1 | 1 | discard """ |
2 | 2 | disabled: "arm64" |
3 | | -matrix: "-d:useMalloc --passC:\"-fsanitize=undefined -fsanitize-undefined-trap-on-error\"" |
| 3 | +matrix: "-d:test1;-d:test2;-d:test3;-d:test4" |
4 | 4 | output: "y" |
5 | 5 | """ |
6 | 6 |
|
7 | | -{.passC: "-march=native".} |
| 7 | +when defined(test1): |
| 8 | + {.passC: "-mavx".} |
| 9 | +elif defined(test2): |
| 10 | + {.passC: "-mavx", passC: "-mbmi".} |
| 11 | +elif defined(test3): |
| 12 | + {.passC: "-mavx", passC: "-mbmi", passC: "-mbmi2".} |
| 13 | +elif defined(test3): |
| 14 | + {.passC: "-mavx", passC: "-mbmi", passC: "-mbmi2", pass: "-mlzcnt".} |
| 15 | + |
| 16 | +# ported from Microsoft documentation of __cpuid and __cpuidex |
| 17 | + |
| 18 | +type Rep = object |
| 19 | + nIds: int |
| 20 | + nExIds: int |
| 21 | + isIntel, isAMD: bool |
| 22 | + f_1_ECX: set[0..31] |
| 23 | + f_1_EDX: set[0..31] |
| 24 | + f_7_EBX: set[0..31] |
| 25 | + f_7_ECX: set[0..31] |
| 26 | + f_81_ECX: set[0..31] |
| 27 | + f_81_EDX: set[0..31] |
| 28 | + |
| 29 | +proc cpuid(x: array[4, cint], a: cint) {.header: "intrin.h", importc: "__cpuid".} |
| 30 | +proc cpuidex(x: array[4, cint], a: cint, b: cint) {.header: "intrin.h", importc: "__cpuidex".} |
| 31 | + |
| 32 | +proc read(): Rep = |
| 33 | + var cpui: array[4, cint] |
| 34 | + cpuid(cpui, 0) |
| 35 | + result.nIds = int cpui[0] |
| 36 | + |
| 37 | + var data: seq[array[4, cint]] |
| 38 | + for i in 0..<result.nIds: |
| 39 | + cpuidex(cpui, cint(i), 0); |
| 40 | + data.add(cpui) |
| 41 | + |
| 42 | + var vendor: array[32, char] |
| 43 | + copyMem(addr vendor[0], addr data[0][1], 4) |
| 44 | + copyMem(addr vendor[4], addr data[0][3], 4) |
| 45 | + copyMem(addr vendor[8], addr data[0][2], 4) |
| 46 | + if cast[cstring](addr vendor) == "GenuineIntel": |
| 47 | + result.isIntel = true |
| 48 | + elif cast[cstring](addr vendor) == "AuthenticAMD": |
| 49 | + result.isAMD = true |
| 50 | + |
| 51 | + if result.nIds >= 1: |
| 52 | + result.f_1_ECX = cast[set[0..31]](data[1][2]) |
| 53 | + result.f_1_EDX = cast[set[0..31]](data[1][3]) |
| 54 | + |
| 55 | + if result.nIds >= 7: |
| 56 | + result.f_7_EBX = cast[set[0..31]](data[7][1]) |
| 57 | + result.f_7_ECX = cast[set[0..31]](data[7][2]) |
| 58 | + |
| 59 | + cpuid(cpui, cast[cint](0x80000000)); |
| 60 | + result.nExIds = cpui[0]; |
| 61 | + |
| 62 | + var extdata: seq[array[4, cint]] |
| 63 | + for i in 0x80000000..<result.nExIds: |
| 64 | + cpuidex(cpui, cint(i), 0) |
| 65 | + extdata.add(cpui) |
| 66 | + |
| 67 | + if result.nExIds >= 0x80000001: |
| 68 | + result.f_81_ECX = cast[set[0..31]](extdata[1][2]) |
| 69 | + result.f_81_EDX = cast[set[0..31]](extdata[1][3]) |
| 70 | + |
| 71 | +var x = read() |
| 72 | + |
| 73 | +template show(name: untyped, val, bit: untyped) = |
| 74 | + echo astToStr(name), " = ", bit in val |
| 75 | + |
| 76 | +template show(name: untyped, val: untyped) = |
| 77 | + echo astToStr(name), " = ", val |
| 78 | + |
| 79 | +show SSE3, x.f_1_ECX, 0 |
| 80 | +show PCLMULQDQ, x.f_1_ECX, 1 |
| 81 | +show MONITOR, x.f_1_ECX, 3 |
| 82 | +show SSSE3, x.f_1_ECX, 9 |
| 83 | +show FMA, x.f_1_ECX, 12 |
| 84 | +show CMPXCHG16B, x.f_1_ECX, 13 |
| 85 | +show SSE41, x.f_1_ECX, 19 |
| 86 | +show SSE42, x.f_1_ECX, 20 |
| 87 | +show MOVBE, x.f_1_ECX, 22 |
| 88 | +show POPCNT, x.f_1_ECX, 23 |
| 89 | +show AES, x.f_1_ECX, 25 |
| 90 | +show XSAVE, x.f_1_ECX, 26 |
| 91 | +show OSXSAVE, x.f_1_ECX, 27 |
| 92 | +show AVX, x.f_1_ECX, 28 |
| 93 | +show F16C, x.f_1_ECX, 29 |
| 94 | +show RDRAND, x.f_1_ECX, 30 |
| 95 | + |
| 96 | +show MSR, x.f_1_EDX, 5 |
| 97 | +show CX8, x.f_1_EDX, 8 |
| 98 | +show SEP, x.f_1_EDX, 11 |
| 99 | +show CMOV, x.f_1_EDX, 15 |
| 100 | +show CLFSH, x.f_1_EDX, 19 |
| 101 | +show MMX, x.f_1_EDX, 23 |
| 102 | +show FXSR, x.f_1_EDX, 24 |
| 103 | +show SSE, x.f_1_EDX, 25 |
| 104 | +show SSE2, x.f_1_EDX, 26 |
| 105 | + |
| 106 | +show FSGSBASE, x.f_7_EBX, 0 |
| 107 | +show BMI1, x.f_7_EBX, 3 |
| 108 | +show HLE, x.isIntel and 4 in x.f_7_EBX |
| 109 | +show AVX2, x.f_7_EBX, 5 |
| 110 | +show BMI2, x.f_7_EBX, 8 |
| 111 | +show ERMS, x.f_7_EBX, 9 |
| 112 | +show INVPCID, x.f_7_EBX, 10 |
| 113 | +show RTM, x.isIntel and 11 in x.f_7_EBX |
| 114 | +show AVX512F, x.f_7_EBX, 16 |
| 115 | +show RDSEED, x.f_7_EBX, 18 |
| 116 | +show ADX, x.f_7_EBX, 19 |
| 117 | +show AVX512PF, x.f_7_EBX, 26 |
| 118 | +show AVX512ER, x.f_7_EBX, 27 |
| 119 | +show AVX512CD, x.f_7_EBX, 28 |
| 120 | +show SHA, x.f_7_EBX, 29 |
| 121 | + |
| 122 | +show PREFETCHWT1, x.f_7_ECX, 0 |
| 123 | + |
| 124 | +show LAHF, x.f_81_ECX, 0 |
| 125 | +show LZCNT, x.isIntel and 5 in x.f_81_ECX |
| 126 | +show ABM, x.isAMD and 5 in x.f_81_ECX |
| 127 | +show SSE4a, x.isAMD and 6 in x.f_81_ECX |
| 128 | +show XOP, x.isAMD and 11 in x.f_81_ECX |
| 129 | +show TBM, x.isAMD and 11 in x.f_81_ECX |
| 130 | + |
| 131 | +show SYSCALL, x.isIntel and 11 in x.f_81_EDX |
| 132 | +show MMXEXT, x.isAMD and 22 in x.f_81_EDX |
| 133 | +show RDTSCP, x.isIntel and 27 in x.f_81_EDX |
| 134 | +show "3DNOWEXT", x.isAMD and 30 in x.f_81_EDX |
| 135 | +show "3DNOW", x.isAMD and 31 in x.f_81_EDX |
8 | 136 |
|
9 | 137 | proc isAlignedCheck(p: pointer, alignment: int) = |
10 | 138 | doAssert (cast[uint](p) and uint(alignment - 1)) == 0 |
|
0 commit comments