Skip to content

Commit 7f74dd3

Browse files
committed
wow thanks multi-threading!
1 parent 20e4c5b commit 7f74dd3

File tree

5 files changed

+109
-38
lines changed

5 files changed

+109
-38
lines changed

gameui/OptionsSubVideo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,7 @@ class COptionsSubVideoAdvancedDlg : public vgui::Frame
842842

843843
if (mat_reducefillrate.GetBool())
844844
{
845-
if (mat_phong.GetBool() && r_force_fastpath.GetBool() && r_skybox_lowend.GetBool())
845+
if (!mat_phong.GetBool() && r_force_fastpath.GetBool() && r_skybox_lowend.GetBool())
846846
{
847847
m_pShaderDetail->ActivateItem( 0 );
848848
}

materialsystem/cmaterialsystem.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3702,9 +3702,9 @@ void CMaterialSystem::EndFrame( void )
37023702

37033703
if ( m_pActiveAsyncJob )
37043704
{
3705-
if ( !m_pActiveAsyncJob->IsFinished() )
3705+
while ( !m_pActiveAsyncJob->IsFinished() )
37063706
{
3707-
m_pActiveAsyncJob->WaitForFinish();
3707+
m_pActiveAsyncJob->WaitForFinish(0);
37083708
}
37093709
// Sync with GPU if we had a job for it, even if it finished early on CPU!
37103710
if (!IsPC() && g_config.ForceHWSync())
@@ -3786,7 +3786,10 @@ void CMaterialSystem::EndFrame( void )
37863786
{
37873787
if ( m_pActiveAsyncJob )
37883788
{
3789-
m_pActiveAsyncJob->WaitForFinish();
3789+
while (!m_pActiveAsyncJob->IsFinished())
3790+
{
3791+
m_pActiveAsyncJob->WaitForFinish(0);
3792+
}
37903793
SafeRelease( m_pActiveAsyncJob );
37913794
}
37923795
// probably have a queued context set here, need hardware to flush the queue if the job isn't active
@@ -4871,7 +4874,10 @@ void CMaterialSystem::ThreadRelease( )
48714874
CJob *pActiveAsyncJob = new CThreadRelease();
48724875
IThreadPool *pThreadPool = CreateMatQueueThreadPool();
48734876
pThreadPool->AddJob( pActiveAsyncJob );
4874-
pActiveAsyncJob->WaitForFinish();
4877+
while (!pActiveAsyncJob->IsFinished())
4878+
{
4879+
pActiveAsyncJob->WaitForFinish(0);
4880+
}
48754881

48764882
SafeRelease( pActiveAsyncJob );
48774883

@@ -4958,7 +4964,10 @@ MaterialLock_t CMaterialSystem::Lock()
49584964
#if 1 // Rick's optimization: not sure this is needed anymore
49594965
if ( pCurContext != &m_HardwareRenderContext && m_pActiveAsyncJob )
49604966
{
4961-
m_pActiveAsyncJob->WaitForFinish();
4967+
while (!m_pActiveAsyncJob->IsFinished())
4968+
{
4969+
m_pActiveAsyncJob->WaitForFinish(0);
4970+
}
49624971
// threadsafety note: not releasing or nulling pointer.
49634972
}
49644973

public/vstdlib/jobthread.h

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ enum JobFlags_t
8686
JF_BOOST_THREAD = ( 1 << 1 ), // Up the thread priority to max allowed while processing task
8787
JF_SERIAL = ( 1 << 2 ), // Job cannot be executed out of order relative to other "strict" jobs
8888
JF_QUEUE = ( 1 << 3 ), // Queue it, even if not an IO job
89+
JF_NO_NOTIFY = ( 1 << 4 ), // Do not notify the queue, because we are gonna queue up more calls!
90+
JF_NO_ADD = ( 1 << 5 ), // Do not add to the queue, because we are gonna modify the job!
8991
};
9092

9193
enum JobPriority_t
@@ -346,6 +348,17 @@ abstract_class IThreadPool : public IRefCounted
346348
return pJob; \
347349
}
348350

351+
#define DEFINE_MEMBER_QUEUE_CALL_WITH_FLAGS(N) \
352+
template <typename OBJECT_TYPE, typename FUNCTION_CLASS, typename FUNCTION_RETTYPE FUNC_TEMPLATE_FUNC_PARAMS_##N FUNC_TEMPLATE_ARG_PARAMS_##N> \
353+
CJob *QueueCall(OBJECT_TYPE *pObject, int flags, FUNCTION_RETTYPE ( FUNCTION_CLASS::*pfnProxied )( FUNC_BASE_TEMPLATE_FUNC_PARAMS_##N ) FUNC_ARG_FORMAL_PARAMS_##N ) \
354+
{ \
355+
CJob *pJob; \
356+
int jobFlags = JF_QUEUE; \
357+
jobFlags |= flags; \
358+
AddFunctorInternal( CreateFunctor( pObject, pfnProxied FUNC_FUNCTOR_CALL_ARGS_##N ), &pJob, NULL, jobFlags ); \
359+
return pJob; \
360+
}
361+
349362
//-------------------------------------
350363

351364
#define DEFINE_CONST_MEMBER_QUEUE_CALL(N) \
@@ -387,6 +400,7 @@ abstract_class IThreadPool : public IRefCounted
387400
FUNC_GENERATE_ALL( DEFINE_REF_COUNTING_CONST_MEMBER_ADD_CALL );
388401
FUNC_GENERATE_ALL( DEFINE_NONMEMBER_QUEUE_CALL );
389402
FUNC_GENERATE_ALL( DEFINE_MEMBER_QUEUE_CALL );
403+
FUNC_GENERATE_ALL( DEFINE_MEMBER_QUEUE_CALL_WITH_FLAGS );
390404
FUNC_GENERATE_ALL( DEFINE_CONST_MEMBER_QUEUE_CALL );
391405
FUNC_GENERATE_ALL( DEFINE_REF_COUNTING_MEMBER_QUEUE_CALL );
392406
FUNC_GENERATE_ALL( DEFINE_REF_COUNTING_CONST_MEMBER_QUEUE_CALL );
@@ -398,6 +412,7 @@ abstract_class IThreadPool : public IRefCounted
398412
#undef DEFINE_REF_COUNTING_CONST_MEMBER_ADD_CALL
399413
#undef DEFINE_NONMEMBER_QUEUE_CALL
400414
#undef DEFINE_MEMBER_QUEUE_CALL
415+
#undef DEFINE_MEMBER_QUEUE_CALL_WITH_FLAGS
401416
#undef DEFINE_CONST_MEMBER_QUEUE_CALL
402417
#undef DEFINE_REF_COUNTING_MEMBER_QUEUE_CALL
403418
#undef DEFINE_REF_COUNTING_CONST_MEMBER_QUEUE_CALL
@@ -896,12 +911,18 @@ class CParallelProcessor
896911
CJob **jobs = (CJob **)stackalloc( nJobs * sizeof(CJob **) );
897912
int i = nJobs;
898913

899-
while( i-- )
914+
// For the first jobs, don't notify the thread pool yet so every thread has a fair chance at getting a stab at it.
915+
while( --i )
900916
{
901-
jobs[i] = pThreadPool->QueueCall( this, &CParallelProcessor<ITEM_TYPE, ITEM_PROCESSOR_TYPE>::DoExecute );
917+
jobs[i] = pThreadPool->QueueCall( this, JF_NO_NOTIFY, &CParallelProcessor<ITEM_TYPE, ITEM_PROCESSOR_TYPE>::DoExecute );
902918
jobs[i]->SetDescription( m_szDescription );
903919
}
904920

921+
// Last job notifies for all of them!
922+
jobs[0] = pThreadPool->QueueCall( this, &CParallelProcessor<ITEM_TYPE, ITEM_PROCESSOR_TYPE>::DoExecute );
923+
jobs[0]->SetDescription( m_szDescription );
924+
925+
// Do jobs alongside the threads
905926
DoExecute();
906927

907928
for ( i = 0; i < nJobs; i++ )

tier0/threadtools.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,7 +1621,9 @@ int ThreadWaitForEvents(int nEvents, CThreadEvent* const* pEvents, bool bWaitAll
16211621
return false;
16221622
};
16231623

1624+
// UNDONE(mastercoms): TOO OPTIMISTIC: what if its signaled on this thread, but actually not signaled?
16241625
// Most optimistic case: we have signal state synced already.
1626+
#if 0
16251627
for (int i = 0; i < 20; i++)
16261628
{
16271629
if (lPredSignaledAny())
@@ -1635,6 +1637,7 @@ int ThreadWaitForEvents(int nEvents, CThreadEvent* const* pEvents, bool bWaitAll
16351637
}
16361638
ThreadPause();
16371639
}
1640+
#endif
16381641
if (!bRet)
16391642
{
16401643
// Second optimistic case: we can do an initial check to minimize contention

vstdlib/jobthread.cpp

Lines changed: 68 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class ALIGN16 CJobQueue
9292
return NULL;
9393
}
9494

95-
int Push( CJob *pJob, int iThread = -1 )
95+
int Push( CJob *pJob, bool bNotify = true )
9696
{
9797
pJob->AddRef();
9898

@@ -106,12 +106,15 @@ class ALIGN16 CJobQueue
106106

107107
m_pQueues[pJob->GetPriority()]->PushItem( pJob );
108108

109-
m_mutex.Lock();
110-
if ( ++m_nItems == 1 )
109+
if (bNotify)
111110
{
112-
m_JobAvailableEvent.Set();
111+
m_mutex.Lock();
112+
if ( ++m_nItems == 1 )
113+
{
114+
m_JobAvailableEvent.Set();
115+
}
116+
m_mutex.Unlock();
113117
}
114-
m_mutex.Unlock();
115118

116119
return nOverflow;
117120
}
@@ -598,35 +601,64 @@ int CThreadPool::YieldWait( CThreadEvent **pEvents, int nEvents, bool bWaitAll,
598601
{
599602
tmZone( TELEMETRY_LEVEL0, TMZF_IDLE, "%s(%d) SPINNING %t", __FUNCTION__, timeout, tmSendCallStack( TELEMETRY_LEVEL0, 0 ) );
600603

601-
Assert( timeout == TT_INFINITE ); // unimplemented
602-
603604
int result;
604605
CJob *pJob;
605-
// Always wait for zero milliseconds initially, to let us process jobs on this thread.
606-
timeout = 0;
607-
while ( ( result = ThreadWaitForEvents( nEvents, pEvents, bWaitAll, timeout ) ) == WAIT_TIMEOUT )
606+
607+
if (timeout != TT_INFINITE)
608608
{
609-
if ( !m_bExecOnThreadPoolThreadsOnly && m_SharedQueue.Pop( &pJob ) )
609+
DevWarning("Yielding for non-infinite timeout %d!\n", timeout);
610+
}
611+
612+
// If we aren't exiting this wait immediately, take the opportunity to process jobs on this thread as much as possible.
613+
if (timeout != 0)
614+
{
615+
// If we're still waiting on this thread pool, then process its jobs as much as possible.
616+
while ( (result = ThreadWaitForEvents( nEvents, pEvents, bWaitAll, 0 )) == WAIT_TIMEOUT )
610617
{
611-
ServiceJobAndRelease( pJob );
612-
m_nJobs--;
618+
if ( !m_bExecOnThreadPoolThreadsOnly && m_SharedQueue.Pop( &pJob ) )
619+
{
620+
ServiceJobAndRelease( pJob );
621+
m_nJobs--;
622+
}
623+
#if 1
624+
else
625+
{
626+
// If we can't process jobs or there are no jobs to process, stop trying to respond to them.
627+
break;
628+
}
629+
#else
630+
else
631+
{
632+
// UNDONE(mastercoms): we can just use TT_INFINITE as needed?
633+
634+
// Since there are no jobs for the main thread set the timeout to infinite.
635+
// The only disadvantage to this is that if a job thread creates a new job
636+
// then the main thread will not be available to pick it up, but if that
637+
// is a problem you can just create more worker threads. Debugging test runs
638+
// of TF2 suggests that jobs are only ever added from the main thread which
639+
// means that there is no disadvantage.
640+
// Waiting on the events instead of busy spinning has multiple advantages.
641+
// It avoids wasting CPU time/electricity, it makes it more obvious in profiles
642+
// when the main thread is idle versus busy, and it allows ready thread analysis
643+
// in xperf to find out what woke up a waiting thread.
644+
// It also avoids unnecessary CPU starvation -- seen on customer traces of TF2.
645+
timeout = TT_INFINITE;
646+
}
647+
#endif
613648
}
614-
else
649+
650+
// Now that we have responded to jobs with near zero latency, and there's no more jobs to process, enter our extended wait with timeout if we do need to wait more.
651+
if (result == WAIT_TIMEOUT)
615652
{
616-
// Since there are no jobs for the main thread set the timeout to infinite.
617-
// The only disadvantage to this is that if a job thread creates a new job
618-
// then the main thread will not be available to pick it up, but if that
619-
// is a problem you can just create more worker threads. Debugging test runs
620-
// of TF2 suggests that jobs are only ever added from the main thread which
621-
// means that there is no disadvantage.
622-
// Waiting on the events instead of busy spinning has multiple advantages.
623-
// It avoids wasting CPU time/electricity, it makes it more obvious in profiles
624-
// when the main thread is idle versus busy, and it allows ready thread analysis
625-
// in xperf to find out what woke up a waiting thread.
626-
// It also avoids unnecessary CPU starvation -- seen on customer traces of TF2.
627-
timeout = TT_INFINITE;
653+
result = ThreadWaitForEvents( nEvents, pEvents, bWaitAll, timeout );
628654
}
629655
}
656+
else
657+
{
658+
// We explicitly asked for it, so trust that we have a good reason.
659+
result = ThreadWaitForEvents( nEvents, pEvents, bWaitAll, 0 );
660+
}
661+
630662
return result;
631663
}
632664

@@ -679,6 +711,12 @@ void CThreadPool::AddJob( CJob *pJob )
679711
return;
680712
}
681713

714+
int flags = pJob->GetFlags();
715+
if (flags & JF_NO_ADD)
716+
{
717+
return;
718+
}
719+
682720
if ( m_Threads.Count() == 0 )
683721
{
684722
// So only threadpool jobs are supposed to execute the jobs, but there are no threadpool threads?
@@ -688,8 +726,6 @@ void CThreadPool::AddJob( CJob *pJob )
688726
return;
689727
}
690728

691-
int flags = pJob->GetFlags();
692-
693729
if ( !m_bExecOnThreadPoolThreadsOnly && ( ( flags & ( JF_IO | JF_QUEUE ) ) == 0 ) /* @TBD && !m_queue.Count() */ )
694730
{
695731
if ( !NumIdleThreads() )
@@ -722,7 +758,9 @@ void CThreadPool::InsertJobInQueue( CJob *pJob )
722758
{
723759
CJobQueue *pQueue;
724760

725-
if ( !( pJob->GetFlags() & JF_SERIAL ) )
761+
int flags = pJob->GetFlags();
762+
763+
if ( !( flags & JF_SERIAL ) )
726764
{
727765
int iThread = pJob->GetServiceThread();
728766
if ( iThread == -1 || !m_Threads.IsValidIndex( iThread ) )
@@ -739,7 +777,7 @@ void CThreadPool::InsertJobInQueue( CJob *pJob )
739777
pQueue = &(m_Threads[0]->AccessDirectQueue());
740778
}
741779

742-
m_nJobs -= pQueue->Push( pJob );
780+
m_nJobs -= pQueue->Push( pJob, (flags & JF_NO_NOTIFY) == 0 );
743781
}
744782

745783
//---------------------------------------------------------

0 commit comments

Comments
 (0)