Skip to content

Commit fcbfa6d

Browse files
committed
Begin to wire asm stubs into detection (extend asm_type); Add more CPU detection so asm_type can be matched up
1 parent c503d9b commit fcbfa6d

File tree

4 files changed

+91
-20
lines changed

4 files changed

+91
-20
lines changed

xmrstak/backend/cpu/cpuType.cpp

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ void cpuid(uint32_t eax, int32_t ecx, int32_t val[4])
2727
#endif
2828
}
2929

30-
int32_t get_masked(int32_t val, int32_t h, int32_t l)
30+
uint32_t get_masked(int32_t val, int32_t h, int32_t l)
3131
{
3232
val &= (0x7FFFFFFF >> (31 - (h - l))) << l;
33-
return val >> l;
33+
return static_cast<uint32_t>(val >> l);
3434
}
3535

3636
bool has_feature(int32_t val, int32_t bit)
@@ -41,34 +41,86 @@ bool has_feature(int32_t val, int32_t bit)
4141

4242
Model getModel()
4343
{
44+
Model result;
45+
46+
int32_t cpu_HFP = 0; // Highest Function Parameter
47+
int32_t cpu_HEFP = 0; // Highest Extended Function Parameter
4448
int32_t cpu_info[4];
4549
char cpustr[13] = {0};
50+
char brandstr[13] = {0};
4651

4752
cpuid(0, 0, cpu_info);
53+
cpu_HFP = cpu_info[0];
4854
std::memcpy(cpustr, &cpu_info[1], 4);
4955
std::memcpy(cpustr + 4, &cpu_info[3], 4);
5056
std::memcpy(cpustr + 8, &cpu_info[2], 4);
5157

52-
Model result;
53-
5458
cpuid(1, 0, cpu_info);
55-
59+
result.model = get_masked(cpu_info[0], 8, 4);
5660
result.family = get_masked(cpu_info[0], 12, 8);
57-
result.model = get_masked(cpu_info[0], 8, 4) | get_masked(cpu_info[0], 20, 16) << 4;
58-
result.type_name = cpustr;
61+
if(result.family == 0x6 || result.family == 0xF)
62+
{
63+
result.model += get_masked(cpu_info[0], 20, 16) << 4;
64+
}
65+
if(result.family != 0xF)
66+
{
67+
result.family += get_masked(cpu_info[0], 28, 20);
68+
}
5969

60-
// feature bits https://en.wikipedia.org/wiki/CPUID
61-
// sse2
70+
// feature bits https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits
71+
// sse2/sse3/ssse3
6272
result.sse2 = has_feature(cpu_info[3], 26);
73+
result.sse3 = has_feature(cpu_info[2], 0);
74+
result.ssse3 = has_feature(cpu_info[2], 9);
6375
// aes-ni
6476
result.aes = has_feature(cpu_info[2], 25);
6577
// avx - 27 is the check if the OS overwrote cpu features
6678
result.avx = has_feature(cpu_info[2], 28) && has_feature(cpu_info[2], 27);
6779

80+
// extended feature bits https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features
81+
if(cpu_HFP >= 7)
82+
{
83+
cpuid(7, 0, cpu_info);
84+
result.avx2 = has_feature(cpu_info[1], 5);
85+
}
86+
// extended function support https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented
87+
cpuid(0x80000000, 0, cpu_info);
88+
cpu_HEFP = cpu_info[0];
89+
90+
// processor brand string https://en.wikipedia.org/wiki/CPUID#EAX=80000002h,80000003h,80000004h:_Processor_Brand_String
91+
if(cpu_HEFP >= 0x80000004)
92+
{
93+
for(uint32_t efp=0x80000002; efp<0x80000004; efp++){
94+
cpuid(0x80000002, 0, cpu_info);
95+
std::memcpy(brandstr+(16*(efp-0x80000002)), &cpu_info, 16);
96+
}
97+
result.brand_name = brandstr;
98+
}
99+
100+
if(strcmp(cpustr, "GenuineIntel") == 0)
101+
{
102+
if(result.family == 0x6){
103+
result.isIntelXBridge = (
104+
result.model == 0x2A //Sandy Bridge
105+
|| result.model == 0x3A //Ivy Bridge
106+
);
107+
result.isIntelXWell = (
108+
result.model == 0x3C || result.model == 0x45 || result.model == 0x46 //Haswell
109+
|| result.model == 0x47 || result.model == 0x3D //Broadwell
110+
);
111+
result.isIntelXLake = (
112+
result.model == 0x4E || result.model == 0x5E //Skylake
113+
|| result.model == 0x8E //Kaby/Coffee/Whiskey/Amber Lake
114+
|| result.model == 0x9E //Kaby/Coffee Lake
115+
|| result.model == 0x66 //Cannon Lake
116+
);
117+
}
118+
}
68119
if(strcmp(cpustr, "AuthenticAMD") == 0)
69120
{
70-
if(result.family == 0xF)
71-
result.family += get_masked(cpu_info[0], 28, 20);
121+
result.isAMDHammer = (result.family != 0x15 && result.family >= 0xF && result.family <= 0x16);
122+
result.isAMDBulldozer = (result.family == 0x15);
123+
result.isAMDZen = (result.family == 0x17);
72124
}
73125

74126
return result;

xmrstak/backend/cpu/cpuType.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,20 @@ struct Model
1111
{
1212
uint32_t family = 0u;
1313
uint32_t model = 0u;
14+
bool isIntelXBridge = false;
15+
bool isIntelXWell = false;
16+
bool isIntelXLake = false;
17+
bool isAMDHammer = false;
18+
bool isAMDBulldozer = false;
19+
bool isAMDZen = false;
1420
bool aes = false;
1521
bool sse2 = false;
22+
bool sse3 = false;
23+
bool ssse3 = false;
1624
bool avx = false;
25+
bool avx2 = false;
1726
std::string type_name = "unknown";
27+
std::string brand_name = "unknown";
1828
};
1929

2030
Model getModel();
@@ -24,7 +34,8 @@ Model getModel();
2434
* This enables us to put in values exactly like in the manual
2535
* For example EBX[30:22] is get_masked(cpu_info[1], 31, 22)
2636
*/
27-
int32_t get_masked(int32_t val, int32_t h, int32_t l);
37+
uint32_t get_masked(int32_t val, int32_t h, int32_t l);
38+
2839

2940
} // namespace cpu
3041
} // namespace xmrstak

xmrstak/backend/cpu/crypto/cryptonight_aesni.h

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,8 +1198,12 @@ struct Cryptonight_hash<5>
11981198
};
11991199

12001200
extern "C" void cryptonight_v8_mainloop_ivybridge_asm(cryptonight_ctx* ctx0);
1201-
extern "C" void cryptonight_v8_mainloop_ryzen_asm(cryptonight_ctx* ctx0);
12021201
extern "C" void cryptonight_v8_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
1202+
extern "C" void cryptonight_v8_mainloop_bulldozer_asm(cryptonight_ctx* ctx0);
1203+
extern "C" void cryptonight_v8_mainloop_ryzen_asm(cryptonight_ctx* ctx0);
1204+
extern "C" void cryptonight_v8_rwz_mainloop_asm(cryptonight_ctx* ctx0);
1205+
extern "C" void cryptonight_v8_rwz_double_mainloop_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
1206+
12031207

12041208
template <size_t N, size_t asm_version>
12051209
struct Cryptonight_hash_asm
@@ -1318,7 +1322,7 @@ void* allocateExecutableMemory(size_t size)
13181322
{
13191323

13201324
#ifdef _WIN64
1321-
return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
1325+
return VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
13221326
#else
13231327
#if defined(__APPLE__)
13241328
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
@@ -1374,19 +1378,23 @@ void patchAsmVariants(std::string selected_asm, cryptonight_ctx** ctx, const xmr
13741378

13751379
cn_mainloop_fun src_code = nullptr;
13761380

1377-
if(selected_asm == "intel_avx")
1381+
if(selected_asm == "intel_avx" || selected_asm == "ivybridge" || selected_asm == "sandybridge")
13781382
{
13791383
// Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx)
13801384
if(N == 2)
13811385
src_code = reinterpret_cast<cn_mainloop_fun>(cryptonight_v8_double_mainloop_sandybridge_asm);
13821386
else
13831387
src_code = cryptonight_v8_mainloop_ivybridge_asm;
1384-
;
13851388
}
13861389
// supports only 1 thread per hash
1387-
if(selected_asm == "amd_avx")
1390+
if(selected_asm == "bulldozer")
1391+
{
1392+
// AMD 15h "Bulldozer" - Orochi/Vishera etc; Bulldozer/Piledriver/Steamroller/Excavator
1393+
src_code = cryptonight_v8_mainloop_bulldozer_asm;
1394+
}
1395+
if(selected_asm == "amd_avx" || selected_asm == "zen")
13881396
{
1389-
// AMD Ryzen (1xxx and 2xxx series)
1397+
// AMD 17h "Zen" - Ryzen (1xxx and 2xxx series)
13901398
src_code = cryptonight_v8_mainloop_ryzen_asm;
13911399
}
13921400

xmrstak/backend/cpu/minethd.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -567,9 +567,9 @@ static std::string getAsmName(const uint32_t num_hashes)
567567

568568
if(cpu_model.avx && cpu_model.aes)
569569
{
570-
if(cpu_model.type_name.find("Intel") != std::string::npos)
570+
if(cpu_model.isIntelXBridge || cpu_model.isIntelXWell || cpu_model.isIntelXLake)
571571
asm_type = "intel_avx";
572-
else if(cpu_model.type_name.find("AMD") != std::string::npos)
572+
else if(cpu_model.isAMDBulldozer || cpu_model.isAMDZen)
573573
asm_type = "amd_avx";
574574
}
575575
}

0 commit comments

Comments
 (0)