Skip to content

Commit d5f9af9

Browse files
committed
Begin to wire asm stubs into detection (extend asm_type); Add more CPU detection so asm_type can be matched up
1 parent 7917a48 commit d5f9af9

File tree

4 files changed

+91
-21
lines changed

4 files changed

+91
-21
lines changed

xmrstak/backend/cpu/cpuType.cpp

Lines changed: 64 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ namespace cpu
2727
#endif
2828
}
2929

30-
int32_t get_masked(int32_t val, int32_t h, int32_t l)
30+
uint32_t get_masked(int32_t val, int32_t h, int32_t l)
3131
{
3232
val &= (0x7FFFFFFF >> (31-(h-l))) << l;
33-
return val >> l;
33+
return static_cast<uint32_t>(val >> l);
3434
}
3535

3636
bool has_feature(int32_t val, int32_t bit)
@@ -42,34 +42,86 @@ namespace cpu
4242

4343
Model getModel()
4444
{
45+
Model result;
46+
47+
int32_t cpu_HFP = 0; // Highest Function Parameter
48+
int32_t cpu_HEFP = 0; // Highest Extended Function Parameter
4549
int32_t cpu_info[4];
4650
char cpustr[13] = {0};
51+
char brandstr[13] = {0};
4752

4853
cpuid(0, 0, cpu_info);
54+
cpu_HFP = cpu_info[0];
4955
std::memcpy(cpustr, &cpu_info[1], 4);
5056
std::memcpy(cpustr+4, &cpu_info[3], 4);
5157
std::memcpy(cpustr+8, &cpu_info[2], 4);
5258

53-
Model result;
54-
5559
cpuid(1, 0, cpu_info);
56-
60+
result.model = get_masked(cpu_info[0], 8, 4);
5761
result.family = get_masked(cpu_info[0], 12, 8);
58-
result.model = get_masked(cpu_info[0], 8, 4) | get_masked(cpu_info[0], 20, 16) << 4;
59-
result.type_name = cpustr;
62+
if(result.family == 0x6 || result.family == 0xF)
63+
{
64+
result.model += get_masked(cpu_info[0], 20, 16) << 4;
65+
}
66+
if(result.family != 0xF)
67+
{
68+
result.family += get_masked(cpu_info[0], 28, 20);
69+
}
6070

61-
// feature bits https://en.wikipedia.org/wiki/CPUID
62-
// sse2
71+
// feature bits https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits
72+
// sse2/sse3/ssse3
6373
result.sse2 = has_feature(cpu_info[3], 26);
74+
result.sse3 = has_feature(cpu_info[2], 0);
75+
result.ssse3 = has_feature(cpu_info[2], 9);
6476
// aes-ni
6577
result.aes = has_feature(cpu_info[2], 25);
6678
// avx - 27 is the check if the OS overwrote cpu features
67-
result.avx = has_feature(cpu_info[2], 28) && has_feature(cpu_info[2], 27) ;
79+
result.avx = has_feature(cpu_info[2], 28) && has_feature(cpu_info[2], 27);
6880

81+
// extended feature bits https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features
82+
if(cpu_HFP >= 7)
83+
{
84+
cpuid(7, 0, cpu_info);
85+
result.avx2 = has_feature(cpu_info[1], 5);
86+
}
87+
// extended function support https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented
88+
cpuid(0x80000000, 0, cpu_info);
89+
cpu_HEFP = cpu_info[0];
90+
91+
// processor brand string https://en.wikipedia.org/wiki/CPUID#EAX=80000002h,80000003h,80000004h:_Processor_Brand_String
92+
if(cpu_HEFP >= 0x80000004)
93+
{
94+
for(uint32_t efp=0x80000002; efp<0x80000004; efp++){
95+
cpuid(0x80000002, 0, cpu_info);
96+
std::memcpy(brandstr+(16*(efp-0x80000002)), &cpu_info, 16);
97+
}
98+
result.brand_name = brandstr;
99+
}
100+
101+
if(strcmp(cpustr, "GenuineIntel") == 0)
102+
{
103+
if(result.family == 0x6){
104+
result.isIntelXBridge = (
105+
result.model == 0x2A //Sandy Bridge
106+
|| result.model == 0x3A //Ivy Bridge
107+
);
108+
result.isIntelXWell = (
109+
result.model == 0x3C || result.model == 0x45 || result.model == 0x46 //Haswell
110+
|| result.model == 0x47 || result.model == 0x3D //Broadwell
111+
);
112+
result.isIntelXLake = (
113+
result.model == 0x4E || result.model == 0x5E //Skylake
114+
|| result.model == 0x8E //Kaby/Coffee/Whiskey/Amber Lake
115+
|| result.model == 0x9E //Kaby/Coffee Lake
116+
|| result.model == 0x66 //Cannon Lake
117+
);
118+
}
119+
}
69120
if(strcmp(cpustr, "AuthenticAMD") == 0)
70121
{
71-
if(result.family == 0xF)
72-
result.family += get_masked(cpu_info[0], 28, 20);
122+
result.isAMDHammer = (result.family != 0x15 && result.family >= 0xF && result.family <= 0x16);
123+
result.isAMDBulldozer = (result.family == 0x15);
124+
result.isAMDZen = (result.family == 0x17);
73125
}
74126

75127
return result;

xmrstak/backend/cpu/cpuType.hpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,20 @@ namespace cpu
1212
{
1313
uint32_t family = 0u;
1414
uint32_t model = 0u;
15+
bool isIntelXBridge = false;
16+
bool isIntelXWell = false;
17+
bool isIntelXLake = false;
18+
bool isAMDHammer = false;
19+
bool isAMDBulldozer = false;
20+
bool isAMDZen = false;
1521
bool aes = false;
1622
bool sse2 = false;
23+
bool sse3 = false;
24+
bool ssse3 = false;
1725
bool avx = false;
26+
bool avx2 = false;
1827
std::string type_name = "unknown";
28+
std::string brand_name = "unknown";
1929
};
2030

2131
Model getModel();
@@ -25,7 +35,7 @@ namespace cpu
2535
* This enables us to put in values exactly like in the manual
2636
* For example EBX[30:22] is get_masked(cpu_info[1], 31, 22)
2737
*/
28-
int32_t get_masked(int32_t val, int32_t h, int32_t l);
38+
uint32_t get_masked(int32_t val, int32_t h, int32_t l);
2939

3040

3141
} // namespace cpu

xmrstak/backend/cpu/crypto/cryptonight_aesni.h

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,9 +1179,12 @@ struct Cryptonight_hash<5>
11791179
}
11801180
};
11811181

1182-
extern "C" void cryptonight_v8_mainloop_ivybridge_asm(cryptonight_ctx* ctx0);
1183-
extern "C" void cryptonight_v8_mainloop_ryzen_asm(cryptonight_ctx* ctx0);
1182+
extern "C" void cryptonight_v8_mainloop_ivybridge_asm(cryptonight_ctx* ctx0);
11841183
extern "C" void cryptonight_v8_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
1184+
extern "C" void cryptonight_v8_mainloop_bulldozer_asm(cryptonight_ctx* ctx0);
1185+
extern "C" void cryptonight_v8_mainloop_ryzen_asm(cryptonight_ctx* ctx0);
1186+
extern "C" void cryptonight_v8_rwz_mainloop_asm(cryptonight_ctx* ctx0);
1187+
extern "C" void cryptonight_v8_rwz_double_mainloop_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
11851188

11861189

11871190
template< size_t N, size_t asm_version>
@@ -1298,7 +1301,7 @@ void* allocateExecutableMemory(size_t size)
12981301
{
12991302

13001303
#ifdef _WIN64
1301-
return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
1304+
return VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
13021305
#else
13031306
# if defined(__APPLE__)
13041307
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
@@ -1356,7 +1359,7 @@ void patchAsmVariants(std::string selected_asm, cryptonight_ctx** ctx, const xmr
13561359

13571360
cn_mainloop_fun src_code = nullptr;
13581361

1359-
if(selected_asm == "intel_avx")
1362+
if(selected_asm == "intel_avx" || selected_asm == "ivybridge" || selected_asm == "sandybridge")
13601363
{
13611364
// Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx)
13621365
if(N == 2)
@@ -1365,9 +1368,14 @@ void patchAsmVariants(std::string selected_asm, cryptonight_ctx** ctx, const xmr
13651368
src_code = cryptonight_v8_mainloop_ivybridge_asm;;
13661369
}
13671370
// supports only 1 thread per hash
1368-
if(selected_asm == "amd_avx")
1371+
if(selected_asm == "bulldozer")
13691372
{
1370-
// AMD Ryzen (1xxx and 2xxx series)
1373+
// AMD 15h "Bulldozer" - Orochi/Vishera etc; Bulldozer/Piledriver/Steamroller/Excavator
1374+
src_code = cryptonight_v8_mainloop_bulldozer_asm;
1375+
}
1376+
if(selected_asm == "amd_avx" || selected_asm == "zen")
1377+
{
1378+
// AMD 17h "Zen" - Ryzen (1xxx and 2xxx series)
13711379
src_code = cryptonight_v8_mainloop_ryzen_asm;
13721380
}
13731381

xmrstak/backend/cpu/minethd.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,9 +563,9 @@ static std::string getAsmName(const uint32_t num_hashes)
563563

564564
if(cpu_model.avx && cpu_model.aes)
565565
{
566-
if(cpu_model.type_name.find("Intel") != std::string::npos)
566+
if(cpu_model.isIntelXBridge || cpu_model.isIntelXWell || cpu_model.isIntelXLake)
567567
asm_type = "intel_avx";
568-
else if(cpu_model.type_name.find("AMD") != std::string::npos)
568+
else if(cpu_model.isAMDBulldozer || cpu_model.isAMDZen)
569569
asm_type = "amd_avx";
570570
}
571571
}

0 commit comments

Comments
 (0)