Skip to content

Commit cea9ac2

Browse files
committed
Extended profiling events to SVM/USM
- Enable profiling for Intel pointer arithmetic ext. - Squeeze event info into 8 Byte (size and kind). - Established larger/default queue-size (history). - Introduced ACC_OPENCL_PROFILE_QSIZE. Other - Implemented CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL. - Consistently allow to overrule with explicit XHINTS. - Fixed condition wrt RecoverablePageFaults (Intel).
1 parent 4ca2ec2 commit cea9ac2

File tree

5 files changed

+65
-52
lines changed

5 files changed

+65
-52
lines changed

src/acc/opencl/acc_opencl.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ void c_dbcsr_acc_opencl_configure(void) {
170170
# endif
171171
# if defined(ACC_OPENCL_XHINTS)
172172
const char* const env_xhints = (ACC_OPENCL_XHINTS);
173-
const int xhints_default = 1 + 2 + 4 + 8;
173+
const int xhints_default = 1 + 2 + 4 + 8 + 16;
174174
# else
175175
const char* const env_xhints = NULL;
176176
const int xhints_default = 0;
@@ -295,7 +295,7 @@ void c_dbcsr_acc_opencl_configure(void) {
295295
if ((1 & c_dbcsr_acc_opencl_config.wa) && NULL == getenv("ZE_FLAT_DEVICE_HIERARCHY")) {
296296
ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(apply[0]));
297297
}
298-
# if (0 == ACC_OPENCL_USM)
298+
# if (1 >= ACC_OPENCL_USM)
299299
if ((2 & c_dbcsr_acc_opencl_config.wa) && NULL == getenv("EnableRecoverablePageFaults")) {
300300
ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(apply[1]));
301301
}
@@ -605,11 +605,14 @@ int c_dbcsr_acc_init(void) {
605605
# endif
606606
0 > c_dbcsr_acc_opencl_config.profile)
607607
{
608+
const char* const env_qsize = getenv("ACC_OPENCL_PROFILE_QSIZE");
609+
const int psize = (NULL == env_qsize ? 0 : atoi(env_qsize));
610+
const int qsize = (0 >= psize ? 1024 : LIBXSMM_MIN(psize, 65536));
608611
const int profile = LIBXSMM_MAX(LIBXSMM_ABS(c_dbcsr_acc_opencl_config.profile), 2);
609612
const c_dbcsr_acc_opencl_hist_update_fn update[] = {c_dbcsr_acc_opencl_hist_avg, c_dbcsr_acc_opencl_hist_add};
610-
c_dbcsr_acc_opencl_hist_create(&c_dbcsr_acc_opencl_config.hist_h2d, profile + 1, profile * 4, 2, update);
611-
c_dbcsr_acc_opencl_hist_create(&c_dbcsr_acc_opencl_config.hist_d2h, profile + 1, profile * 4, 2, update);
612-
c_dbcsr_acc_opencl_hist_create(&c_dbcsr_acc_opencl_config.hist_d2d, profile + 1, profile * 4, 2, update);
613+
c_dbcsr_acc_opencl_hist_create(&c_dbcsr_acc_opencl_config.hist_h2d, profile + 1, qsize, 2, update);
614+
c_dbcsr_acc_opencl_hist_create(&c_dbcsr_acc_opencl_config.hist_d2h, profile + 1, qsize, 2, update);
615+
c_dbcsr_acc_opencl_hist_create(&c_dbcsr_acc_opencl_config.hist_d2d, profile + 1, qsize, 2, update);
613616
}
614617
else {
615618
assert(NULL == c_dbcsr_acc_opencl_config.hist_h2d);
@@ -679,7 +682,8 @@ LIBXSMM_ATTRIBUTE_CTOR void c_dbcsr_acc_opencl_init(void) {
679682
LIBXSMM_ATTRIBUTE_DTOR void c_dbcsr_acc_opencl_finalize(void) {
680683
assert(c_dbcsr_acc_opencl_config.ndevices < ACC_OPENCL_MAXNDEVS);
681684
if (0 != c_dbcsr_acc_opencl_config.ndevices) {
682-
int precision[] = {0, 1}, i;
685+
const int precision[] = {0, 1};
686+
int i;
683687
LIBXSMM_STDIO_ACQUIRE();
684688
c_dbcsr_acc_opencl_hist_print(stderr, c_dbcsr_acc_opencl_config.hist_h2d, "\nPROF ACC/OpenCL: H2D", precision, NULL /*adjust*/);
685689
c_dbcsr_acc_opencl_hist_print(stderr, c_dbcsr_acc_opencl_config.hist_d2h, "\nPROF ACC/OpenCL: D2H", precision, NULL /*adjust*/);
@@ -1151,7 +1155,7 @@ int c_dbcsr_acc_opencl_set_active_device(ACC_OPENCL_LOCKTYPE* lock, int device_i
11511155
cl_platform_id platform = NULL;
11521156
cl_bitfield bitfield = 0;
11531157
if (0 != (1 & c_dbcsr_acc_opencl_config.xhints) && 2 <= *devinfo->std_level && 0 != devinfo->intel &&
1154-
0 == c_dbcsr_acc_opencl_config.profile && 0 == devinfo->unified &&
1158+
/*0 == c_dbcsr_acc_opencl_config.profile &&*/ (0 == devinfo->unified || NULL != (ACC_OPENCL_XHINTS)) &&
11551159
EXIT_SUCCESS == clGetDeviceInfo(active_id, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL) &&
11561160
EXIT_SUCCESS == c_dbcsr_acc_opencl_device_vendor(active_id, "intel", 2 /*platform vendor*/) &&
11571161
EXIT_SUCCESS == clGetDeviceInfo(active_id, 0x4191 /*CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL*/, sizeof(cl_bitfield),

src/acc/opencl/acc_opencl.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,11 +296,17 @@ typedef struct c_dbcsr_acc_opencl_device_t {
296296
cl_int (*clMemFreeINTEL)(cl_context, void*);
297297
} c_dbcsr_acc_opencl_device_t;
298298

299+
typedef enum c_dbcsr_acc_event_kind_t {
300+
c_dbcsr_acc_event_kind_none,
301+
c_dbcsr_acc_event_kind_h2d,
302+
c_dbcsr_acc_event_kind_d2h,
303+
c_dbcsr_acc_event_kind_d2d
304+
} c_dbcsr_acc_event_kind_t;
305+
299306
/** Information about host/device-memory pointer. */
300307
typedef struct c_dbcsr_acc_opencl_info_memptr_t {
301308
cl_mem memory; /* first item! */
302309
void* memptr;
303-
/*void *data;*/
304310
} c_dbcsr_acc_opencl_info_memptr_t;
305311

306312
/** Enumeration of FP-atomic kinds. */

src/acc/opencl/acc_opencl_event.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ int c_dbcsr_acc_event_synchronize(void* event) { /* waits on the host-side */
182182
if (NULL != clevent) {
183183
if (0 == (32 & c_dbcsr_acc_opencl_config.wa)) {
184184
cl_int status = CL_COMPLETE + 1;
185-
if (32 & c_dbcsr_acc_opencl_config.xhints) {
185+
if (64 & c_dbcsr_acc_opencl_config.xhints) {
186186
result = clGetEventInfo(clevent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
187187
assert(EXIT_SUCCESS == result || CL_COMPLETE != status);
188188
}

src/acc/opencl/acc_opencl_mem.c

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ int c_dbcsr_acc_host_mem_allocate(void** host_mem, size_t nbytes, void* stream)
305305
int memflags = CL_MEM_ALLOC_HOST_PTR;
306306
nbytes += alignment + size_meminfo - 1;
307307
# if defined(ACC_OPENCL_XHINTS)
308-
if (0 != (4 & c_dbcsr_acc_opencl_config.xhints) && (0 != devinfo->nv || NULL != (ACC_OPENCL_XHINTS))) {
308+
if (0 != (8 & c_dbcsr_acc_opencl_config.xhints) && (0 != devinfo->nv || NULL != (ACC_OPENCL_XHINTS))) {
309309
host_ptr = ACC_OPENCL_MEM_ALLOC(nbytes, alignment);
310310
if (NULL != host_ptr) memflags = CL_MEM_USE_HOST_PTR;
311311
}
@@ -316,7 +316,7 @@ int c_dbcsr_acc_host_mem_allocate(void** host_mem, size_t nbytes, void* stream)
316316
if (NULL == host_ptr) {
317317
mapped = clEnqueueMapBuffer(str->queue, memory, CL_TRUE /*always block*/,
318318
# if defined(ACC_OPENCL_XHINTS) && (defined(CL_VERSION_1_2) || defined(CL_MAP_WRITE_INVALIDATE_REGION))
319-
(16 & c_dbcsr_acc_opencl_config.xhints) ? CL_MAP_WRITE_INVALIDATE_REGION :
319+
(32 & c_dbcsr_acc_opencl_config.xhints) ? CL_MAP_WRITE_INVALIDATE_REGION :
320320
# endif
321321
(CL_MAP_READ | CL_MAP_WRITE),
322322
0 /*offset*/, nbytes, 0, NULL, NULL, &result);
@@ -396,33 +396,35 @@ void CL_CALLBACK c_dbcsr_acc_memcpy_notify(cl_event /*event*/, cl_int /*event_st
396396
void CL_CALLBACK c_dbcsr_acc_memcpy_notify(cl_event event, cl_int event_status, void* data) {
397397
int result = EXIT_SUCCESS;
398398
const double durdev = c_dbcsr_acc_opencl_duration(event, &result);
399-
c_dbcsr_acc_opencl_info_memptr_t info;
400-
cl_command_type type;
401-
size_t size = 0, offset = 0;
399+
cl_command_type type = CL_COMMAND_SVM_MEMCPY;
402400
LIBXSMM_UNUSED(event_status);
403-
assert(CL_COMPLETE == event_status && NULL != data);
404-
if (EXIT_SUCCESS == result && EXIT_SUCCESS == clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(type), &type, NULL) &&
405-
EXIT_SUCCESS == c_dbcsr_acc_opencl_info_devptr_lock(&info, NULL, data, 1 /*elsize*/, NULL /*amount*/, &offset) &&
406-
EXIT_SUCCESS == clGetMemObjectInfo(info.memory, CL_MEM_SIZE, sizeof(size_t), &size, NULL) && offset <= size)
407-
{
408-
/*const double durhst = libxsmm_timer_duration((libxsmm_timer_tickint)info.data, libxsmm_timer_tick());
409-
const double durtot = durdev - LIBXSMM_MIN(durdev, durhst);*/
410-
const size_t amount = size - offset;
411-
const double vals[] = {(double)amount, durdev};
412-
const int mb = (int)((amount + (1 << 19)) >> 20);
401+
assert(CL_COMPLETE == event_status && NULL != data && 8 == sizeof(data));
402+
if (EXIT_SUCCESS == result && EXIT_SUCCESS == clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(type), &type, NULL)) {
403+
const size_t size = 0x3FFFFFFFFFFFFFFF & (size_t)data;
404+
const int kind = (int)(((size_t)data) >> 62);
405+
const double vals[] = {(double)size, durdev};
406+
const int mb = (int)((size + (1 << 19)) >> 20);
407+
if (CL_COMMAND_WRITE_BUFFER != type && CL_COMMAND_READ_BUFFER != type && CL_COMMAND_COPY_BUFFER != type) {
408+
switch (kind) {
409+
case c_dbcsr_acc_event_kind_h2d: type = CL_COMMAND_WRITE_BUFFER; break;
410+
case c_dbcsr_acc_event_kind_d2h: type = CL_COMMAND_READ_BUFFER; break;
411+
case c_dbcsr_acc_event_kind_d2d: type = CL_COMMAND_COPY_BUFFER; break;
412+
default: assert(c_dbcsr_acc_event_kind_none == kind); /* should not happen */
413+
}
414+
}
413415
switch (type) {
414416
case CL_COMMAND_WRITE_BUFFER: {
415-
assert(NULL != c_dbcsr_acc_opencl_config.hist_h2d);
417+
assert(NULL != c_dbcsr_acc_opencl_config.hist_h2d && c_dbcsr_acc_event_kind_h2d == kind);
416418
c_dbcsr_acc_opencl_hist_set(c_dbcsr_acc_opencl_config.lock_memory, c_dbcsr_acc_opencl_config.hist_h2d, vals);
417419
if (0 > c_dbcsr_acc_opencl_config.profile) fprintf(stderr, "PROF ACC/OpenCL: H2D mb=%i us=%.0f\n", mb, durdev * 1E6);
418420
} break;
419421
case CL_COMMAND_READ_BUFFER: {
420-
assert(NULL != c_dbcsr_acc_opencl_config.hist_d2h);
422+
assert(NULL != c_dbcsr_acc_opencl_config.hist_d2h && c_dbcsr_acc_event_kind_d2h == kind);
421423
c_dbcsr_acc_opencl_hist_set(c_dbcsr_acc_opencl_config.lock_memory, c_dbcsr_acc_opencl_config.hist_d2h, vals);
422424
if (0 > c_dbcsr_acc_opencl_config.profile) fprintf(stderr, "PROF ACC/OpenCL: D2H mb=%i us=%.0f\n", mb, durdev * 1E6);
423425
} break;
424426
case CL_COMMAND_COPY_BUFFER: {
425-
assert(NULL != c_dbcsr_acc_opencl_config.hist_d2d);
427+
assert(NULL != c_dbcsr_acc_opencl_config.hist_d2d && c_dbcsr_acc_event_kind_d2d == kind);
426428
c_dbcsr_acc_opencl_hist_set(c_dbcsr_acc_opencl_config.lock_memory, c_dbcsr_acc_opencl_config.hist_d2d, vals);
427429
if (0 > c_dbcsr_acc_opencl_config.profile) fprintf(stderr, "PROF ACC/OpenCL: D2D mb=%i us=%.0f\n", mb, durdev * 1E6);
428430
} break;
@@ -489,7 +491,7 @@ int c_dbcsr_acc_dev_mem_allocate(void** dev_mem, size_t nbytes) {
489491
{
490492
# if defined(ACC_OPENCL_XHINTS)
491493
const int devuid = devinfo->uid, devuids = (0x4905 == devuid || 0x020a == devuid || (0x0bd0 <= devuid && 0x0bdb >= devuid));
492-
const int try_flag = ((0 != (8 & c_dbcsr_acc_opencl_config.xhints) && 0 != devinfo->intel && 0 == devinfo->unified &&
494+
const int try_flag = ((0 != (16 & c_dbcsr_acc_opencl_config.xhints) && 0 != devinfo->intel && 0 == devinfo->unified &&
493495
(devuids || NULL != (ACC_OPENCL_XHINTS)))
494496
? (1u << 22)
495497
: 0);
@@ -667,7 +669,8 @@ int c_dbcsr_acc_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, v
667669
assert(NULL != str);
668670
# if (1 >= ACC_OPENCL_USM)
669671
if (NULL != devinfo->clEnqueueMemcpyINTEL) {
670-
result = devinfo->clEnqueueMemcpyINTEL(str->queue, finish, dev_mem, host_mem, nbytes, 0, NULL, NULL);
672+
result = devinfo->clEnqueueMemcpyINTEL(
673+
str->queue, finish, dev_mem, host_mem, nbytes, 0, NULL, NULL == c_dbcsr_acc_opencl_config.hist_h2d ? NULL : &event);
671674
}
672675
else
673676
# endif
@@ -690,18 +693,18 @@ int c_dbcsr_acc_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, v
690693
if (NULL != info) {
691694
result = clEnqueueWriteBuffer(str->queue, info->memory, finish, offset, nbytes, host_mem, 0, NULL,
692695
NULL == c_dbcsr_acc_opencl_config.hist_h2d ? NULL : &event);
693-
/*if (NULL != event && EXIT_SUCCESS == result) info->data = (void*)libxsmm_timer_tick();*/
694696
}
695697
else result = EXIT_FAILURE;
696698
}
697699
ACC_OPENCL_RELEASE(c_dbcsr_acc_opencl_config.lock_memory);
698700
if (NULL != event) { /* c_dbcsr_acc_memcpy_notify must be outside of locked region */
699701
if (EXIT_SUCCESS == result) {
702+
void* const data = (void*)(nbytes | ((size_t)c_dbcsr_acc_event_kind_h2d) << 62);
700703
assert(NULL != c_dbcsr_acc_opencl_config.hist_h2d);
701704
if (!finish) { /* asynchronous */
702-
result = clSetEventCallback(event, CL_COMPLETE, c_dbcsr_acc_memcpy_notify, dev_mem);
705+
result = clSetEventCallback(event, CL_COMPLETE, c_dbcsr_acc_memcpy_notify, data);
703706
}
704-
else c_dbcsr_acc_memcpy_notify(event, CL_COMPLETE, dev_mem); /* synchronous */
707+
else c_dbcsr_acc_memcpy_notify(event, CL_COMPLETE, data); /* synchronous */
705708
}
706709
else ACC_OPENCL_EXPECT(EXIT_SUCCESS == clReleaseEvent(event));
707710
}
@@ -818,16 +821,16 @@ int c_dbcsr_acc_memcpy_d2h(const void* dev_mem, void* host_mem, size_t nbytes, v
818821
else {
819822
result = c_dbcsr_acc_opencl_memcpy_d2h(
820823
info->memory, host_mem, offset, nbytes, str->queue, finish, NULL == c_dbcsr_acc_opencl_config.hist_d2h ? NULL : &event);
821-
/*if (NULL != event && EXIT_SUCCESS == result) info->data = (void*)libxsmm_timer_tick();*/
822824
}
823825
ACC_OPENCL_RELEASE(c_dbcsr_acc_opencl_config.lock_memory);
824826
if (NULL != event) { /* c_dbcsr_acc_memcpy_notify must be outside of locked region */
825827
if (EXIT_SUCCESS == result) {
826-
assert(NULL != c_dbcsr_acc_opencl_config.hist_d2h /*&& NULL == c_dbcsr_acc_opencl_config.device.clEnqueueMemcpyINTEL*/);
828+
void* const data = (void*)(nbytes | ((size_t)c_dbcsr_acc_event_kind_d2h) << 62);
829+
assert(NULL != c_dbcsr_acc_opencl_config.hist_d2h);
827830
if (!finish) { /* asynchronous */
828-
result = clSetEventCallback(event, CL_COMPLETE, c_dbcsr_acc_memcpy_notify, nconst.ptr);
831+
result = clSetEventCallback(event, CL_COMPLETE, c_dbcsr_acc_memcpy_notify, data);
829832
}
830-
else c_dbcsr_acc_memcpy_notify(event, CL_COMPLETE, nconst.ptr); /* synchronous */
833+
else c_dbcsr_acc_memcpy_notify(event, CL_COMPLETE, data); /* synchronous */
831834
}
832835
else ACC_OPENCL_EXPECT(EXIT_SUCCESS == clReleaseEvent(event));
833836
}
@@ -867,7 +870,8 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
867870
assert(NULL != str && NULL != devinfo->context);
868871
# if (1 >= ACC_OPENCL_USM)
869872
if (NULL != devinfo->clEnqueueMemcpyINTEL) {
870-
result = devinfo->clEnqueueMemcpyINTEL(str->queue, CL_FALSE /*blocking*/, devmem_dst, devmem_src, nbytes, 0, NULL, pevent);
873+
result = devinfo->clEnqueueMemcpyINTEL(str->queue, CL_FALSE /*blocking*/, devmem_dst, devmem_src, nbytes, 0, NULL,
874+
NULL == c_dbcsr_acc_opencl_config.hist_d2d ? pevent : &event);
871875
}
872876
else
873877
# endif
@@ -892,26 +896,22 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
892896
if (NULL != info_src && NULL != info_dst) {
893897
result = clEnqueueCopyBuffer(str->queue, info_src->memory, info_dst->memory, offset_src, offset_dst, nbytes, 0, NULL,
894898
NULL == c_dbcsr_acc_opencl_config.hist_d2d ? pevent : &event);
895-
/*if (NULL != event && EXIT_SUCCESS == result && NULL != c_dbcsr_acc_opencl_config.hist_d2d) {
896-
info_src->data = (void*)libxsmm_timer_tick();
897-
}*/
898899
}
899900
else result = EXIT_FAILURE;
900901
}
901902
ACC_OPENCL_RELEASE(c_dbcsr_acc_opencl_config.lock_memory);
902903
if (NULL != event) { /* c_dbcsr_acc_memcpy_notify must be outside of locked region */
903904
if (EXIT_SUCCESS == result) {
905+
void* const data = (void*)(nbytes | ((size_t)c_dbcsr_acc_event_kind_d2d) << 62);
904906
if (NULL == pevent) { /* asynchronous */
905-
assert(NULL == devinfo->clEnqueueMemcpyINTEL);
906907
assert(NULL != c_dbcsr_acc_opencl_config.hist_d2d);
907-
result = clSetEventCallback(event, CL_COMPLETE, c_dbcsr_acc_memcpy_notify, nconst.ptr);
908+
result = clSetEventCallback(event, CL_COMPLETE, c_dbcsr_acc_memcpy_notify, data);
908909
}
909910
else { /* synchronous */
910911
result = clWaitForEvents(1, &event);
911912
if (EXIT_SUCCESS == result) {
912913
if (NULL != c_dbcsr_acc_opencl_config.hist_d2d) {
913-
assert(NULL == devinfo->clEnqueueMemcpyINTEL);
914-
c_dbcsr_acc_memcpy_notify(event, CL_COMPLETE, nconst.ptr);
914+
c_dbcsr_acc_memcpy_notify(event, CL_COMPLETE, data);
915915
}
916916
else result = clReleaseEvent(event);
917917
}

src/acc/opencl/acc_opencl_stream.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -122,21 +122,29 @@ int c_dbcsr_acc_stream_create(void** stream_p, const char* name, int priority) {
122122
# endif
123123
{
124124
const cl_device_id device_id = c_dbcsr_acc_opencl_config.devices[c_dbcsr_acc_opencl_config.device_id];
125+
if (NULL != c_dbcsr_acc_opencl_config.hist_h2d || NULL != c_dbcsr_acc_opencl_config.hist_d2h ||
126+
NULL != c_dbcsr_acc_opencl_config.hist_d2d)
127+
{
128+
properties[1] |= CL_QUEUE_PROFILING_ENABLE;
129+
}
125130
# if defined(ACC_OPENCL_XHINTS)
126-
if ((2 & c_dbcsr_acc_opencl_config.xhints) && 0 != devinfo->intel) { /* enable queue families */
131+
if ((2 & c_dbcsr_acc_opencl_config.xhints) && 0 != devinfo->intel) {
132+
properties[1] |= (((ACC_OPENCL_STREAM_PROPERTIES_TYPE)1) << 31); /* CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL */
133+
}
134+
if ((4 & c_dbcsr_acc_opencl_config.xhints) && 0 != devinfo->intel) {
127135
struct {
128136
cl_command_queue_properties properties;
129137
cl_bitfield capabilities;
130138
cl_uint count;
131139
char name[64 /*CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL*/];
132140
} intel_qfprops[16];
141+
const int j = (0 /*terminator*/ == properties[2] ? 2 : 4);
133142
size_t nbytes = 0, i;
134143
if (EXIT_SUCCESS == clGetDeviceInfo(device_id, 0x418B /*CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL*/, sizeof(intel_qfprops),
135144
intel_qfprops, &nbytes))
136-
{
145+
{ /* enable queue families */
137146
for (i = 0; (i * sizeof(*intel_qfprops)) < nbytes; ++i) {
138147
if (0 /*CL_QUEUE_DEFAULT_CAPABILITIES_INTEL*/ == intel_qfprops[i].capabilities && 1 < intel_qfprops[i].count) {
139-
const int j = (0 /*terminator*/ == properties[2] ? 2 : 4);
140148
properties[j + 0] = 0x418C; /* CL_QUEUE_FAMILY_INTEL */
141149
properties[j + 1] = (int)i;
142150
properties[j + 2] = 0x418D; /* CL_QUEUE_INDEX_INTEL */
@@ -148,11 +156,6 @@ int c_dbcsr_acc_stream_create(void** stream_p, const char* name, int priority) {
148156
}
149157
}
150158
# endif
151-
if (NULL != c_dbcsr_acc_opencl_config.hist_h2d || NULL != c_dbcsr_acc_opencl_config.hist_d2h ||
152-
NULL != c_dbcsr_acc_opencl_config.hist_d2d)
153-
{
154-
properties[1] = CL_QUEUE_PROFILING_ENABLE;
155-
}
156159
queue = ACC_OPENCL_CREATE_COMMAND_QUEUE(devinfo->context, device_id, properties, &result);
157160
}
158161
if (EXIT_SUCCESS == result) { /* register stream */

0 commit comments

Comments
 (0)