Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit db93fec

Browse files
kouvelKoundinya Veluri
authored andcommitted
Apply tiering's call counting delay more broadly (#18610)
Port of #18610 to 2.2 Issues - When some time passes between process startup and first significant use of the app, startup perf with tiering can be slower because the call counting delay is no longer in effect - This is especially true when the process is affinitized to one cpu Fixes - Initiate and prolong the call counting delay upon tier 0 activity (jitting or r2r code lookup for a new method) - Stop call counting for a called method when the delay is in effect - Stop (and don't start) tier 1 jitting when the delay is in effect - After the delay resume call counting and tier 1 jitting - If the process is affinitized to one cpu at process startup, multiply the delay by 10 No change in benchmarks.
1 parent 9ea3d2a commit db93fec

File tree

10 files changed

+415
-212
lines changed

10 files changed

+415
-212
lines changed

src/inc/CrstTypes.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,3 +787,7 @@ End
787787
Crst ReadyToRunEntryPointToMethodDescMap
788788
AcquiredBefore ExecuteManRangeLock
789789
End
790+
791+
Crst TieredCompilation
792+
AcquiredBefore ThreadpoolTimerQueue
793+
End

src/inc/clrconfigvalues.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,8 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_GainExponent,
653653
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredCompilation, W("TieredCompilation"), 0, "Enables tiered compilation")
654654
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LEGACY_TieredCompilation, W("EXPERIMENTAL_TieredCompilation"), 0, "Deprecated - Use COMPLUS_TieredCompilation")
655655
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold, W("TieredCompilation_Tier1CallCountThreshold"), 30, "Number of times a method must be called after which it is promoted to tier 1.")
656-
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs, W("TieredCompilation_Tier1CallCountingDelayMs"), 100, "Delay in milliseconds since process startup or the last tier 0 JIT before call counting begins for tier 1 promotion.")
656+
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs, W("TieredCompilation_Tier1CallCountingDelayMs"), 100, "A perpetual delay in milliseconds that is applied to tier 1 call counting and jitting, while there is tier 0 activity.")
657+
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1DelaySingleProcMultiplier, W("TieredCompilation_Tier1DelaySingleProcMultiplier"), 10, "Multiplier for TieredCompilation_Tier1CallCountingDelayMs that is applied on a single-processor machine or when the process is affinitized to a single processor.")
657658

658659
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Test_CallCounting, W("TieredCompilation_Test_CallCounting"), 1, "Enabled by default (only activates when TieredCompilation is also enabled). If disabled immediately backpatches prestub, and likely prevents any tier1 promotion")
659660
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Test_OptimizeTier0, W("TieredCompilation_Test_OptimizeTier0"), 0, "Use optimized codegen (normally used by tier1) in tier0")

src/inc/crsttypes.h

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -176,18 +176,19 @@ enum CrstType
176176
CrstThreadpoolWorker = 157,
177177
CrstThreadStaticDataHashTable = 158,
178178
CrstThreadStore = 159,
179-
CrstTPMethodTable = 160,
180-
CrstTypeEquivalenceMap = 161,
181-
CrstTypeIDMap = 162,
182-
CrstUMEntryThunkCache = 163,
183-
CrstUMThunkHash = 164,
184-
CrstUniqueStack = 165,
185-
CrstUnresolvedClassLock = 166,
186-
CrstUnwindInfoTableLock = 167,
187-
CrstVSDIndirectionCellLock = 168,
188-
CrstWinRTFactoryCache = 169,
189-
CrstWrapperTemplate = 170,
190-
kNumberOfCrstTypes = 171
179+
CrstTieredCompilation = 160,
180+
CrstTPMethodTable = 161,
181+
CrstTypeEquivalenceMap = 162,
182+
CrstTypeIDMap = 163,
183+
CrstUMEntryThunkCache = 164,
184+
CrstUMThunkHash = 165,
185+
CrstUniqueStack = 166,
186+
CrstUnresolvedClassLock = 167,
187+
CrstUnwindInfoTableLock = 168,
188+
CrstVSDIndirectionCellLock = 169,
189+
CrstWinRTFactoryCache = 170,
190+
CrstWrapperTemplate = 171,
191+
kNumberOfCrstTypes = 172
191192
};
192193

193194
#endif // __CRST_TYPES_INCLUDED
@@ -358,6 +359,7 @@ int g_rgCrstLevelMap[] =
358359
11, // CrstThreadpoolWorker
359360
4, // CrstThreadStaticDataHashTable
360361
10, // CrstThreadStore
362+
9, // CrstTieredCompilation
361363
9, // CrstTPMethodTable
362364
3, // CrstTypeEquivalenceMap
363365
7, // CrstTypeIDMap
@@ -534,6 +536,7 @@ LPCSTR g_rgCrstNameMap[] =
534536
"CrstThreadpoolWorker",
535537
"CrstThreadStaticDataHashTable",
536538
"CrstThreadStore",
539+
"CrstTieredCompilation",
537540
"CrstTPMethodTable",
538541
"CrstTypeEquivalenceMap",
539542
"CrstTypeIDMap",

src/inc/utilcode.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,6 +1431,7 @@ class CPUGroupInfo
14311431
static BOOL m_threadUseAllCpuGroups;
14321432
static WORD m_initialGroup;
14331433
static CPU_Group_Info *m_CPUGroupInfoArray;
1434+
static bool s_hadSingleProcessorAtStartup;
14341435

14351436
static BOOL InitCPUGroupInfoAPI();
14361437
static BOOL InitCPUGroupInfoArray();
@@ -1485,6 +1486,13 @@ class CPUGroupInfo
14851486
static void ChooseCPUGroupAffinity(GROUP_AFFINITY *gf);
14861487
static void ClearCPUGroupAffinity(GROUP_AFFINITY *gf);
14871488
#endif
1489+
1490+
public:
1491+
static bool HadSingleProcessorAtStartup()
1492+
{
1493+
LIMITED_METHOD_CONTRACT;
1494+
return s_hadSingleProcessorAtStartup;
1495+
}
14881496
};
14891497

14901498
int GetCurrentProcessCpuCount();

src/utilcode/util.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -852,13 +852,14 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr,
852852
}
853853
#endif
854854

855-
/*static*/ BOOL CPUGroupInfo::m_enableGCCPUGroups = FALSE;
856-
/*static*/ BOOL CPUGroupInfo::m_threadUseAllCpuGroups = FALSE;
857-
/*static*/ WORD CPUGroupInfo::m_nGroups = 0;
858-
/*static*/ WORD CPUGroupInfo::m_nProcessors = 0;
859-
/*static*/ WORD CPUGroupInfo::m_initialGroup = 0;
855+
/*static*/ BOOL CPUGroupInfo::m_enableGCCPUGroups = FALSE;
856+
/*static*/ BOOL CPUGroupInfo::m_threadUseAllCpuGroups = FALSE;
857+
/*static*/ WORD CPUGroupInfo::m_nGroups = 0;
858+
/*static*/ WORD CPUGroupInfo::m_nProcessors = 0;
859+
/*static*/ WORD CPUGroupInfo::m_initialGroup = 0;
860860
/*static*/ CPU_Group_Info *CPUGroupInfo::m_CPUGroupInfoArray = NULL;
861-
/*static*/ LONG CPUGroupInfo::m_initialization = 0;
861+
/*static*/ LONG CPUGroupInfo::m_initialization = 0;
862+
/*static*/ bool CPUGroupInfo::s_hadSingleProcessorAtStartup = false;
862863

863864
// Check and setup function pointers for >64 LP Support
864865
/*static*/ BOOL CPUGroupInfo::InitCPUGroupInfoAPI()
@@ -1066,6 +1067,18 @@ DWORD LCM(DWORD u, DWORD v)
10661067
m_enableGCCPUGroups = enableGCCPUGroups && hasMultipleGroups;
10671068
m_threadUseAllCpuGroups = threadUseAllCpuGroups && hasMultipleGroups;
10681069
#endif // _TARGET_AMD64_ || _TARGET_ARM64_
1070+
1071+
// Determine if the process is affinitized to a single processor (or if the system has a single processor)
1072+
DWORD_PTR processAffinityMask, systemAffinityMask;
1073+
if (GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask))
1074+
{
1075+
processAffinityMask &= systemAffinityMask;
1076+
if (processAffinityMask != 0 && // only one CPU group is involved
1077+
(processAffinityMask & (processAffinityMask - 1)) == 0) // only one bit is set
1078+
{
1079+
s_hadSingleProcessorAtStartup = true;
1080+
}
1081+
}
10691082
}
10701083

10711084
/*static*/ BOOL CPUGroupInfo::IsInitialized()

src/vm/ceemain.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,13 +1097,6 @@ void EEStartupHelper(COINITIEE fFlags)
10971097

10981098
#ifndef CROSSGEN_COMPILE
10991099

1100-
#ifdef FEATURE_TIERED_COMPILATION
1101-
if (g_pConfig->TieredCompilation())
1102-
{
1103-
SystemDomain::System()->DefaultDomain()->GetTieredCompilationManager()->InitiateTier1CountingDelay();
1104-
}
1105-
#endif
1106-
11071100
#ifdef _DEBUG
11081101

11091102
//if g_fEEStarted was false when we loaded the System Module, we did not run ExpandAll on it. In

src/vm/eeconfig.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,8 +1253,22 @@ HRESULT EEConfig::sync()
12531253
{
12541254
tieredCompilation_tier1CallCountThreshold = 1;
12551255
}
1256+
12561257
tieredCompilation_tier1CallCountingDelayMs =
12571258
CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs);
1259+
if (CPUGroupInfo::HadSingleProcessorAtStartup())
1260+
{
1261+
DWORD delayMultiplier =
1262+
CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1DelaySingleProcMultiplier);
1263+
if (delayMultiplier > 1)
1264+
{
1265+
DWORD newDelay = tieredCompilation_tier1CallCountingDelayMs * delayMultiplier;
1266+
if (newDelay / delayMultiplier == tieredCompilation_tier1CallCountingDelayMs)
1267+
{
1268+
tieredCompilation_tier1CallCountingDelayMs = newDelay;
1269+
}
1270+
}
1271+
}
12581272
#endif
12591273

12601274
#if defined(FEATURE_GDBJIT) && defined(_DEBUG)

src/vm/prestub.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -733,15 +733,6 @@ PCODE MethodDesc::JitCompileCodeLockedEventWrapper(PrepareCodeConfig* pConfig, J
733733

734734
}
735735

736-
#ifdef FEATURE_TIERED_COMPILATION
737-
if (g_pConfig->TieredCompilation() && flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_TIER0))
738-
{
739-
// The flag above is only set (in TieredCompilationManager::GetJitFlags()) when this method was eligible for tiered
740-
// compilation at the time when it was checked, and a tier 0 JIT was requested for this method
741-
GetAppDomain()->GetTieredCompilationManager()->OnTier0JitInvoked();
742-
}
743-
#endif // FEATURE_TIERED_COMPILATION
744-
745736
#ifdef FEATURE_STACK_SAMPLING
746737
StackSampler::RecordJittingInfo(this, flags);
747738
#endif // FEATURE_STACK_SAMPLING

0 commit comments

Comments
 (0)