Skip to content

Commit 6bd68a2

Browse files
authored
track queues and call finish to emulate a blocking free (#187)
1 parent d75c309 commit 6bd68a2

File tree

4 files changed

+82
-9
lines changed

4 files changed

+82
-9
lines changed

intercept/src/dispatch.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -778,7 +778,7 @@ CL_API_ENTRY cl_command_queue CL_API_CALL CLIRN(clCreateCommandQueue)(
778778
ITT_REGISTER_COMMAND_QUEUE( retVal, false );
779779
ADD_OBJECT_ALLOCATION( retVal );
780780
CALL_LOGGING_EXIT( errcode_ret[0], "returned %p", retVal );
781-
ADD_QUEUE( retVal );
781+
ADD_QUEUE( context, retVal );
782782

783783
return retVal;
784784
}
@@ -6335,7 +6335,7 @@ CL_API_ENTRY cl_command_queue CL_API_CALL CLIRN(clCreateCommandQueueWithProperti
63356335
CHECK_ERROR( errcode_ret[0] );
63366336
ADD_OBJECT_ALLOCATION( retVal );
63376337
CALL_LOGGING_EXIT( errcode_ret[0], "returned %p", retVal );
6338-
ADD_QUEUE( retVal );
6338+
ADD_QUEUE( context, retVal );
63396339

63406340
return retVal;
63416341
}
@@ -6427,7 +6427,7 @@ CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueueWithPropertiesKHR(
64276427
CHECK_ERROR( errcode_ret[0] );
64286428
ADD_OBJECT_ALLOCATION( retVal );
64296429
CALL_LOGGING_EXIT( errcode_ret[0], "returned %p", retVal );
6430-
ADD_QUEUE( retVal );
6430+
ADD_QUEUE( context, retVal );
64316431

64326432
return retVal;
64336433
}

intercept/src/emulate.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,8 @@ cl_int CL_API_CALL clMemBlockingFreeINTEL_EMU(
133133

134134
if( pIntercept && pIntercept->config().Emulate_cl_intel_unified_shared_memory )
135135
{
136-
// TODO: Track queues and block all.
136+
pIntercept->finishAll( context );
137+
137138
return pIntercept->emulatedMemFree(
138139
context,
139140
ptr );

intercept/src/intercept.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5660,6 +5660,7 @@ bool CLIntercept::checkGetSamplerString(
56605660
///////////////////////////////////////////////////////////////////////////////
56615661
//
56625662
void CLIntercept::addQueue(
5663+
cl_context context,
56635664
cl_command_queue queue )
56645665
{
56655666
if( queue )
@@ -5668,6 +5669,8 @@ void CLIntercept::addQueue(
56685669

56695670
m_QueueNumberMap[ queue ] = m_QueueNumber + 1; // should be nonzero
56705671
m_QueueNumber++;
5672+
5673+
m_ContextQueuesMap[context].push_back(queue);
56715674
}
56725675
}
56735676

@@ -5690,6 +5693,25 @@ void CLIntercept::checkRemoveQueue(
56905693
if( errorCode == CL_SUCCESS && refCount == 1 )
56915694
{
56925695
m_QueueNumberMap.erase( queue );
5696+
5697+
cl_context context = NULL;
5698+
5699+
errorCode = dispatch().clGetCommandQueueInfo(
5700+
queue,
5701+
CL_QUEUE_CONTEXT,
5702+
sizeof(context),
5703+
&context,
5704+
NULL );
5705+
if( errorCode == CL_SUCCESS && context )
5706+
{
5707+
CQueueList& queues = m_ContextQueuesMap[context];
5708+
5709+
queues.erase(
5710+
std::find(
5711+
queues.begin(),
5712+
queues.end(),
5713+
queue ) );
5714+
}
56935715
}
56945716
}
56955717

@@ -12133,6 +12155,8 @@ void* CLIntercept::emulatedHostMemAlloc(
1213312155
cl_uint alignment,
1213412156
cl_int* errcode_ret)
1213512157
{
12158+
std::lock_guard<std::mutex> lock(m_Mutex);
12159+
1213612160
if( !validateUSMMemProperties(properties) )
1213712161
{
1213812162
if( errcode_ret )
@@ -12198,6 +12222,8 @@ void* CLIntercept::emulatedDeviceMemAlloc(
1219812222
cl_uint alignment,
1219912223
cl_int* errcode_ret)
1220012224
{
12225+
std::lock_guard<std::mutex> lock(m_Mutex);
12226+
1220112227
if( !validateUSMMemProperties(properties) )
1220212228
{
1220312229
if( errcode_ret )
@@ -12252,6 +12278,8 @@ void* CLIntercept::emulatedSharedMemAlloc(
1225212278
cl_uint alignment,
1225312279
cl_int* errcode_ret)
1225412280
{
12281+
std::lock_guard<std::mutex> lock(m_Mutex);
12282+
1225512283
if( !validateUSMMemProperties(properties) )
1225612284
{
1225712285
if( errcode_ret )
@@ -12314,6 +12342,8 @@ cl_int CLIntercept::emulatedMemFree(
1231412342
cl_context context,
1231512343
const void* ptr )
1231612344
{
12345+
std::lock_guard<std::mutex> lock(m_Mutex);
12346+
1231712347
SUSMContextInfo& usmContextInfo = m_USMContextInfoMap[context];
1231812348

1231912349
CUSMAllocMap::iterator iter = usmContextInfo.AllocMap.find( ptr );
@@ -12710,3 +12740,30 @@ cl_int CLIntercept::setUSMKernelExecInfo(
1271012740

1271112741
return errorCode;
1271212742
}
12743+
12744+
///////////////////////////////////////////////////////////////////////////////
12745+
//
12746+
cl_int CLIntercept::finishAll(
12747+
cl_context context )
12748+
{
12749+
std::lock_guard<std::mutex> lock(m_Mutex);
12750+
12751+
const CQueueList& queues = m_ContextQueuesMap[context];
12752+
12753+
cl_int errorCode = CL_SUCCESS;
12754+
12755+
for( auto queue : queues )
12756+
{
12757+
cl_int tempErrorCode = dispatch().clFinish( queue );
12758+
if( tempErrorCode != CL_SUCCESS )
12759+
{
12760+
logf("clFinish on queue %p returned %s (%d)!\n",
12761+
queue,
12762+
enumName().name(errorCode).c_str(),
12763+
errorCode );
12764+
errorCode = tempErrorCode;
12765+
}
12766+
}
12767+
12768+
return errorCode;
12769+
}

intercept/src/intercept.h

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@ class CLIntercept
415415
std::string& str ) const;
416416

417417
void addQueue(
418+
cl_context context,
418419
cl_command_queue queue );
419420
void checkRemoveQueue(
420421
cl_command_queue queue );
@@ -757,6 +758,9 @@ class CLIntercept
757758
cl_command_queue queue,
758759
cl_kernel kernel );
759760

761+
cl_int finishAll(
762+
cl_context conetxt );
763+
760764
private:
761765
static const char* sc_URL;
762766
static const char* sc_DumpDirectoryName;
@@ -976,6 +980,10 @@ class CLIntercept
976980
typedef std::map< cl_command_queue, unsigned int > CQueueNumberMap;
977981
CQueueNumberMap m_QueueNumberMap;
978982

983+
typedef std::list< cl_command_queue > CQueueList;
984+
typedef std::map< cl_context, CQueueList > CContextQueuesMap;
985+
CContextQueuesMap m_ContextQueuesMap;
986+
979987
unsigned int m_MemAllocNumber;
980988

981989
typedef std::map< const void*, unsigned int > CMemAllocNumberMap;
@@ -1581,17 +1589,24 @@ inline bool CLIntercept::checkDumpImageEnqueueLimits() const
15811589
( m_EnqueueCounter <= m_Config.DumpImagesMaxEnqueue );
15821590
}
15831591

1584-
#define ADD_QUEUE( queue ) \
1592+
#define ADD_QUEUE( context, queue ) \
15851593
if( queue && \
1586-
pIntercept->config().ChromePerformanceTiming ) \
1594+
( pIntercept->config().ChromePerformanceTiming || \
1595+
pIntercept->config().Emulate_cl_intel_unified_shared_memory ) ) \
15871596
{ \
1588-
pIntercept->addQueue( queue ); \
1589-
pIntercept->chromeRegisterCommandQueue( queue ); \
1597+
pIntercept->addQueue( \
1598+
context, \
1599+
queue ); \
1600+
if( pIntercept->config().ChromePerformanceTiming ) \
1601+
{ \
1602+
pIntercept->chromeRegisterCommandQueue( queue ); \
1603+
} \
15901604
}
15911605

15921606
#define REMOVE_QUEUE( queue ) \
15931607
if( queue && \
1594-
pIntercept->config().ChromePerformanceTiming ) \
1608+
( pIntercept->config().ChromePerformanceTiming || \
1609+
pIntercept->config().Emulate_cl_intel_unified_shared_memory ) ) \
15951610
{ \
15961611
pIntercept->checkRemoveQueue( queue ); \
15971612
}

0 commit comments

Comments
 (0)