@@ -96,9 +96,10 @@ enum class CPU : uint32_t {
9696 amd_znver2,
9797 amd_znver3,
9898 amd_znver4,
99+ amd_znver5,
99100};
100101
101- static constexpr size_t feature_sz = 11 ;
102+ static constexpr size_t feature_sz = 12 ;
102103static constexpr FeatureName feature_names[] = {
103104#define JL_FEATURE_DEF (name, bit, llvmver ) {#name, bit, llvmver},
104105#define JL_FEATURE_DEF_NAME (name, bit, llvmver, str ) {str, bit, llvmver},
@@ -141,6 +142,10 @@ static constexpr FeatureDep deps[] = {
141142 {vpclmulqdq, avx},
142143 {vpclmulqdq, pclmul},
143144 {avxvnni, avx2},
145+ {avxvnniint8, avx2},
146+ {avxvnniint16, avx2},
147+ {avxifma, avx2},
148+ {avxneconvert, avx2},
144149 {avx512f, avx2},
145150 {avx512dq, avx512f},
146151 {avx512ifma, avx512f},
@@ -159,13 +164,18 @@ static constexpr FeatureDep deps[] = {
159164 {avx512fp16, avx512vl},
160165 {amx_int8, amx_tile},
161166 {amx_bf16, amx_tile},
167+ {amx_fp16, amx_tile},
168+ {amx_complex, amx_tile},
162169 {sse4a, sse3},
163170 {xop, fma4},
164171 {fma4, avx},
165172 {fma4, sse4a},
166173 {xsaveopt, xsave},
167174 {xsavec, xsave},
168175 {xsaves, xsave},
176+ {sha512, avx2},
177+ {sm3, avx},
178+ {sm4, avx2},
169179};
170180
171181// We require cx16 on 64bit by default. This can be overwritten with `-cx16`
@@ -236,6 +246,7 @@ constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
236246constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq);
237247constexpr auto znver4 = znver3 | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi,
238248 avx512vbmi2, avx512vnni, avx512bitalg, avx512vpopcntdq, avx512bf16, gfni, shstk, xsaves);
249+ constexpr auto znver5 = znver4 | get_feature_masks(avxvnni, movdiri, movdir64b, avx512vp2intersect, prefetchi, avxvnni);
239250
240251}
241252
@@ -298,6 +309,7 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
298309 {" znver2" , CPU::amd_znver2, CPU::generic, 0 , Feature::znver2},
299310 {" znver3" , CPU::amd_znver3, CPU::amd_znver2, 120000 , Feature::znver3},
300311 {" znver4" , CPU::amd_znver4, CPU::amd_znver3, 160000 , Feature::znver4},
312+ {" znver5" , CPU::amd_znver5, CPU::amd_znver4, 190000 , Feature::znver5},
301313};
302314static constexpr size_t ncpu_names = sizeof (cpus) / sizeof (cpus[0 ]);
303315
@@ -575,6 +587,9 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
575587 return CPU::amd_znver4;
576588 }
577589 return CPU::amd_znver3; // fallback
590+ case 26 :
591+ // if (model <= 0x77)
592+ return CPU::amd_znver5;
578593 }
579594}
580595
@@ -660,11 +675,12 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
660675 int32_t info7[4 ];
661676 jl_cpuidex (info7, 7 , 1 );
662677 features[9 ] = info7[0 ];
678+ features[10 ] = info7[1 ];
663679 }
664680 if (maxleaf >= 0x14 ) {
665681 int32_t info14[4 ];
666682 jl_cpuidex (info14, 0x14 , 0 );
667- features[10 ] = info14[1 ];
683+ features[11 ] = info14[1 ];
668684 }
669685
670686 // Fix up AVX bits to account for OS support and match LLVM model
@@ -705,7 +721,20 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
705721 else {
706722 cpu = uint32_t (CPU::generic);
707723 }
708-
724+ /* Feature bits to register map
725+ feature[0] = ecx
726+ feature[1] = edx
727+ feature[2] = leaf 7 ebx
728+ feature[3] = leaf 7 ecx
729+ feature[4] = leaf 7 edx
730+ feature[5] = leaf 0x80000001 ecx
731+ feature[6] = leaf 0x80000001 edx
732+ feature[7] = leaf 0xd subleaf 1 eax
733+ feature[8] = leaf 0x80000008 ebx
734+ feature[9] = leaf 7 ebx subleaf 1 eax
735+ feature[10] = leaf 7 ebx subleaf 1 ebx
736+ feature[11] = leaf 0x14 ebx
737+ */
709738 return std::make_pair (cpu, features);
710739}
711740
0 commit comments