Skip to content

Commit 28bb65e

Browse files
committed
Made correction of wrong memory reporting on Intel Arc more robust
1 parent b470d16 commit 28bb65e

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

src/opencl.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,15 @@ struct Device_Info {
6666
cores = to_uint((float)compute_units*(nvidia+amd+intel+apple+arm)); // for CPUs, compute_units is the number of threads (twice the number of cores with hyperthreading)
6767
tflops = 1E-6f*(float)cores*(float)ipc*(float)clock_frequency; // estimated device floating point performance in TeraFLOPs/s
6868
if(intel==8.0f) { // fix wrong global memory reporting for Intel Arc GPUs
69-
if((contains(name, "A770")&&memory>=12602u&&memory<13416u)||(contains_any(name, {"A770", "A750", "A580"})&&memory>=6286u&&memory<6693u)||(contains(name, "A380")&&memory>=4705u&&memory<5010u)) { // 77.5%-82.5% reporting -> /0.8
69+
if((contains_any(name, {"A770", "0x56a0"})&&memory>=11739u&&memory<14168u)||(contains_any(name, {"A770", "A750", "A580", "0x56a0", "0x56a1", "0x56a2"})&&memory>=5869u&&memory<7084u)||(contains_any(name, {"A380", "0x56a5"})&&memory>=4402u&&memory<5313u)) { // 72.5%-87.5% reporting -> /0.8
7070
memory = (uint)((cl_device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>()*5ull/4ull)/1048576ull); // fix wrong (80% on Windows) memory reporting on Intel Arc
7171
}
72-
if((contains_any(name, {"A770", "0x56a0"})&&memory>=15041u&&memory<15855u)||(contains_any(name, {"A770", "A750", "A580", "0x56a0", "0x56a1", "0x56a2"})&&memory>=7503u&&memory<7910u)||(contains_any(name, {"A380", "0x56a5"})&&memory>=5616u&&memory<5921u)) { // 92.5%-97.5% reporting -> /0.95
72+
if((contains_any(name, {"A770", "0x56a0"})&&memory>=14168u&&memory<15625u)||(contains_any(name, {"A770", "A750", "A580", "0x56a0", "0x56a1", "0x56a2"})&&memory>=7084u&&memory<7812u)||(contains_any(name, {"A380", "0x56a5"})&&memory>=5313u&&memory<5859u)) { // 87.5%-96.5% reporting -> /0.95
7373
memory = (uint)((cl_device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>()*20ull/19ull)/1048576ull); // fix wrong (95% on Linux) memory reporting on Intel Arc
7474
}
75+
if((contains_any(name, {"A770", "0x56a0"})&&memory>=15625u&&memory<16030u)||(contains_any(name, {"A770", "A750", "A580", "0x56a0", "0x56a1", "0x56a2"})&&memory>=7812u&&memory<8015u)||(contains_any(name, {"A380", "0x56a5"})&&memory>=5859u&&memory<6011u)) { // 96.5%-99.0% reporting -> /0.98
76+
memory = (uint)((cl_device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>()*50ull/49ull)/1048576ull); // fix wrong (98% on Windows) memory reporting on Intel Arc
77+
}
7578
}
7679
intel_gpu_above_4gb_patch = (intel==8.0f)&&(memory>4096); // enable memory allocations greater than 4GB for Intel GPUs with >4GB VRAM
7780
}

0 commit comments

Comments
 (0)