Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 40970bc

Browse files
authored
Enable tiered jitting for R2R methods (#15967) (#16040)
Enable tiered jitting for R2R methods - Included R2R methods and generics over value types in CoreLib for tiered jitting. Tier 0 for R2R methods is the precompiled code if available, and tier 1 is selectively scheduled based on call counting. - Added a delay before starting to count calls for tier 1 promotion. The delay is a short duration after frequent tier 0 jitting stops (current heuristic for identifying startup). - Startup time and steady-state performance have improved on JitBench. There is a regression shortly following startup due to call counting and tier 1 jitting, for a short duration before steady-state performance stabilizes. - Added two new config values, one for configuring the call count threshold for promoting to tier 1, and another for specifying the delay from the last tier 0 JIT invocation before starting to count calls
1 parent 2f402dc commit 40970bc

17 files changed

+395
-24
lines changed

src/inc/clrconfigvalues.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,8 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_GainExponent,
730730
///
731731
#ifdef FEATURE_TIERED_COMPILATION
732732
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation, W("EXPERIMENTAL_TieredCompilation"), 0, "Enables tiered compilation")
733+
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold, W("TieredCompilation_Tier1CallCountThreshold"), 30, "Number of times a method must be called after which it is promoted to tier 1.")
734+
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs, W("TieredCompilation_Tier1CallCountingDelayMs"), 100, "Delay in milliseconds since process startup or the last tier 0 JIT before call counting begins for tier 1 promotion.")
733735
#endif
734736

735737

src/vm/arm/cgencpu.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ class BaseDomain;
2929
class ZapNode;
3030
struct ArgLocDesc;
3131

32+
extern PCODE GetPreStubEntryPoint();
33+
3234
#define USE_REDIRECT_FOR_GCSTRESS
3335

3436
// CPU-dependent functions
@@ -1113,6 +1115,19 @@ struct StubPrecode {
11131115
return m_pTarget;
11141116
}
11151117

1118+
void ResetTargetInterlocked()
1119+
{
1120+
CONTRACTL
1121+
{
1122+
THROWS;
1123+
GC_TRIGGERS;
1124+
}
1125+
CONTRACTL_END;
1126+
1127+
EnsureWritableExecutablePages(&m_pTarget);
1128+
InterlockedExchange((LONG*)&m_pTarget, (LONG)GetPreStubEntryPoint());
1129+
}
1130+
11161131
BOOL SetTargetInterlocked(TADDR target, TADDR expected)
11171132
{
11181133
CONTRACTL
@@ -1206,6 +1221,19 @@ struct FixupPrecode {
12061221
return m_pTarget;
12071222
}
12081223

1224+
void ResetTargetInterlocked()
1225+
{
1226+
CONTRACTL
1227+
{
1228+
THROWS;
1229+
GC_TRIGGERS;
1230+
}
1231+
CONTRACTL_END;
1232+
1233+
EnsureWritableExecutablePages(&m_pTarget);
1234+
InterlockedExchange((LONG*)&m_pTarget, (LONG)GetEEFuncEntryPoint(PrecodeFixupThunk));
1235+
}
1236+
12091237
BOOL SetTargetInterlocked(TADDR target, TADDR expected)
12101238
{
12111239
CONTRACTL

src/vm/arm64/cgencpu.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal);
2424

2525
class ComCallMethodDesc;
2626

27+
extern PCODE GetPreStubEntryPoint();
2728

2829
#define COMMETHOD_PREPAD 24 // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc)
2930
#ifdef FEATURE_COMINTEROP
@@ -572,6 +573,19 @@ struct StubPrecode {
572573
return m_pTarget;
573574
}
574575

576+
void ResetTargetInterlocked()
577+
{
578+
CONTRACTL
579+
{
580+
THROWS;
581+
GC_TRIGGERS;
582+
}
583+
CONTRACTL_END;
584+
585+
EnsureWritableExecutablePages(&m_pTarget);
586+
InterlockedExchange64((LONGLONG*)&m_pTarget, (TADDR)GetPreStubEntryPoint());
587+
}
588+
575589
BOOL SetTargetInterlocked(TADDR target, TADDR expected)
576590
{
577591
CONTRACTL
@@ -685,6 +699,19 @@ struct FixupPrecode {
685699
return m_pTarget;
686700
}
687701

702+
void ResetTargetInterlocked()
703+
{
704+
CONTRACTL
705+
{
706+
THROWS;
707+
GC_TRIGGERS;
708+
}
709+
CONTRACTL_END;
710+
711+
EnsureWritableExecutablePages(&m_pTarget);
712+
InterlockedExchange64((LONGLONG*)&m_pTarget, (TADDR)GetEEFuncEntryPoint(PrecodeFixupThunk));
713+
}
714+
688715
BOOL SetTargetInterlocked(TADDR target, TADDR expected)
689716
{
690717
CONTRACTL

src/vm/callcounter.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,18 @@ CallCounter::CallCounter()
3232
// Returns TRUE if no future invocations are needed (we reached the count we cared about)
3333
// and FALSE otherwise. It is permissible to keep calling even when TRUE was previously
3434
// returned and multi-threaded race conditions will surely cause this to occur.
35-
BOOL CallCounter::OnMethodCalled(MethodDesc* pMethodDesc)
35+
void CallCounter::OnMethodCalled(
36+
MethodDesc* pMethodDesc,
37+
TieredCompilationManager *pTieredCompilationManager,
38+
BOOL* shouldStopCountingCallsRef,
39+
BOOL* wasPromotedToTier1Ref)
3640
{
3741
STANDARD_VM_CONTRACT;
3842

3943
_ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());
44+
_ASSERTE(pTieredCompilationManager != nullptr);
45+
_ASSERTE(shouldStopCountingCallsRef != nullptr);
46+
_ASSERTE(wasPromotedToTier1Ref != nullptr);
4047

4148
// PERF: This as a simple to implement, but not so performant, call counter
4249
// Currently this is only called until we reach a fixed call count and then
@@ -75,7 +82,7 @@ BOOL CallCounter::OnMethodCalled(MethodDesc* pMethodDesc)
7582
}
7683
}
7784

78-
return GetAppDomain()->GetTieredCompilationManager()->OnMethodCalled(pMethodDesc, callCount);
85+
pTieredCompilationManager->OnMethodCalled(pMethodDesc, callCount, shouldStopCountingCallsRef, wasPromotedToTier1Ref);
7986
}
8087

8188
#endif // FEATURE_TIERED_COMPILATION

src/vm/callcounter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class CallCounter
7070
CallCounter();
7171
#endif
7272

73-
BOOL OnMethodCalled(MethodDesc* pMethodDesc);
73+
void OnMethodCalled(MethodDesc* pMethodDesc, TieredCompilationManager *pTieredCompilationManager, BOOL* shouldStopCountingCallsRef, BOOL* wasPromotedToTier1Ref);
7474

7575
private:
7676

src/vm/ceemain.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,7 +1102,16 @@ void EEStartupHelper(COINITIEE fFlags)
11021102
hr = S_OK;
11031103
STRESS_LOG0(LF_STARTUP, LL_ALWAYS, "===================EEStartup Completed===================");
11041104

1105-
#if defined(_DEBUG) && !defined(CROSSGEN_COMPILE)
1105+
#ifndef CROSSGEN_COMPILE
1106+
1107+
#ifdef FEATURE_TIERED_COMPILATION
1108+
if (g_pConfig->TieredCompilation())
1109+
{
1110+
SystemDomain::System()->DefaultDomain()->GetTieredCompilationManager()->InitiateTier1CountingDelay();
1111+
}
1112+
#endif
1113+
1114+
#ifdef _DEBUG
11061115

11071116
//if g_fEEStarted was false when we loaded the System Module, we did not run ExpandAll on it. In
11081117
//this case, make sure we run ExpandAll here. The rationale is that if we Jit before g_fEEStarted
@@ -1120,7 +1129,9 @@ void EEStartupHelper(COINITIEE fFlags)
11201129
// Perform mscorlib consistency check if requested
11211130
g_Mscorlib.CheckExtended();
11221131

1123-
#endif // _DEBUG && !CROSSGEN_COMPILE
1132+
#endif // _DEBUG
1133+
1134+
#endif // !CROSSGEN_COMPILE
11241135

11251136
ErrExit: ;
11261137
}

src/vm/codeversion.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2177,12 +2177,14 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(MethodDesc* pMethodD
21772177
// attempt to publish the active version still under the lock
21782178
if (FAILED(hr = PublishNativeCodeVersion(pMethodDesc, activeVersion, fEESuspend)))
21792179
{
2180-
// if we need an EESuspend to publish then start over. We have to leave the lock in order to suspend,
2181-
// and when we leave the lock the active version might change again. However now we know that suspend
2180+
// If we need an EESuspend to publish then start over. We have to leave the lock in order to suspend,
2181+
// and when we leave the lock the active version might change again. However now we know that suspend is
2182+
// necessary.
21822183
if (hr == CORPROF_E_RUNTIME_SUSPEND_REQUIRED)
21832184
{
21842185
_ASSERTE(!fEESuspend);
21852186
fEESuspend = true;
2187+
continue; // skip RestartEE() below since SuspendEE() has not been called yet
21862188
}
21872189
else
21882190
{
@@ -2215,6 +2217,8 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(MethodDesc* pMethodD
22152217

22162218
HRESULT CodeVersionManager::PublishNativeCodeVersion(MethodDesc* pMethod, NativeCodeVersion nativeCodeVersion, BOOL fEESuspended)
22172219
{
2220+
// TODO: This function needs to make sure it does not change the precode's target if call counting is in progress. Track
2221+
// whether call counting is currently being done for the method, and use a lock to ensure the expected precode target.
22182222
LIMITED_METHOD_CONTRACT;
22192223
_ASSERTE(LockOwnedByCurrentThread());
22202224
_ASSERTE(pMethod->IsVersionable());
@@ -2236,7 +2240,12 @@ HRESULT CodeVersionManager::PublishNativeCodeVersion(MethodDesc* pMethod, Native
22362240
{
22372241
EX_TRY
22382242
{
2239-
hr = pPrecode->SetTargetInterlocked(pCode, FALSE) ? S_OK : E_FAIL;
2243+
pPrecode->SetTargetInterlocked(pCode, FALSE);
2244+
2245+
// SetTargetInterlocked() would return false if it lost the race with another thread. That is fine, this thread
2246+
// can continue assuming it was successful, similarly to it successfully updating the target and another thread
2247+
// updating the target again shortly afterwards.
2248+
hr = S_OK;
22402249
}
22412250
EX_CATCH_HRESULT(hr);
22422251
return hr;

src/vm/eeconfig.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,8 @@ HRESULT EEConfig::Init()
378378

379379
#if defined(FEATURE_TIERED_COMPILATION)
380380
fTieredCompilation = false;
381+
tieredCompilation_tier1CallCountThreshold = 1;
382+
tieredCompilation_tier1CallCountingDelayMs = 0;
381383
#endif
382384

383385
#if defined(FEATURE_GDBJIT) && defined(_DEBUG)
@@ -1250,6 +1252,14 @@ HRESULT EEConfig::sync()
12501252

12511253
#if defined(FEATURE_TIERED_COMPILATION)
12521254
fTieredCompilation = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation) != 0;
1255+
tieredCompilation_tier1CallCountThreshold =
1256+
CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold);
1257+
if (tieredCompilation_tier1CallCountThreshold < 1)
1258+
{
1259+
tieredCompilation_tier1CallCountThreshold = 1;
1260+
}
1261+
tieredCompilation_tier1CallCountingDelayMs =
1262+
CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs);
12531263
#endif
12541264

12551265
#if defined(FEATURE_GDBJIT) && defined(_DEBUG)

src/vm/eeconfig.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ class EEConfig
285285
// Tiered Compilation config
286286
#if defined(FEATURE_TIERED_COMPILATION)
287287
bool TieredCompilation(void) const {LIMITED_METHOD_CONTRACT; return fTieredCompilation; }
288+
DWORD TieredCompilation_Tier1CallCountThreshold() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_tier1CallCountThreshold; }
289+
DWORD TieredCompilation_Tier1CallCountingDelayMs() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_tier1CallCountingDelayMs; }
288290
#endif
289291

290292
#if defined(FEATURE_GDBJIT) && defined(_DEBUG)
@@ -1109,6 +1111,8 @@ class EEConfig
11091111

11101112
#if defined(FEATURE_TIERED_COMPILATION)
11111113
bool fTieredCompilation;
1114+
DWORD tieredCompilation_tier1CallCountThreshold;
1115+
DWORD tieredCompilation_tier1CallCountingDelayMs;
11121116
#endif
11131117

11141118
#if defined(FEATURE_GDBJIT) && defined(_DEBUG)

src/vm/i386/stublinkerx86.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6416,6 +6416,21 @@ void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
64166416

64176417
#ifndef DACCESS_COMPILE
64186418

6419+
void rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, MethodDesc* pMD)
6420+
{
6421+
CONTRACTL
6422+
{
6423+
THROWS; // Creating a JumpStub could throw OutOfMemory
6424+
GC_TRIGGERS;
6425+
}
6426+
CONTRACTL_END;
6427+
6428+
INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD);
6429+
6430+
_ASSERTE(IS_ALIGNED(pRel32, sizeof(INT32)));
6431+
FastInterlockExchange((LONG*)pRel32, (LONG)targetRel32);
6432+
}
6433+
64196434
BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD)
64206435
{
64216436
CONTRACTL
@@ -6535,6 +6550,33 @@ void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int
65356550
}
65366551
}
65376552

6553+
void FixupPrecode::ResetTargetInterlocked()
6554+
{
6555+
CONTRACTL
6556+
{
6557+
THROWS; // Creating a JumpStub could throw OutOfMemory
6558+
GC_NOTRIGGER;
6559+
}
6560+
CONTRACTL_END;
6561+
6562+
FixupPrecode newValue = *this;
6563+
newValue.m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
6564+
newValue.m_type = FixupPrecode::TypePrestub;
6565+
6566+
PCODE target = (PCODE)GetEEFuncEntryPoint(PrecodeFixupThunk);
6567+
MethodDesc* pMD = (MethodDesc*)GetMethodDesc();
6568+
newValue.m_rel32 =
6569+
#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6570+
pMD->IsLCGMethod() ?
6571+
rel32UsingPreallocatedJumpStub(&m_rel32, target, GetDynamicMethodEntryJumpStub()) :
6572+
#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6573+
rel32UsingJumpStub(&m_rel32, target, pMD);
6574+
6575+
_ASSERTE(IS_ALIGNED(this, sizeof(INT64)));
6576+
EnsureWritableExecutablePages(this, sizeof(INT64));
6577+
FastInterlockExchangeLong((INT64*)this, *(INT64*)&newValue);
6578+
}
6579+
65386580
BOOL FixupPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
65396581
{
65406582
CONTRACTL

0 commit comments

Comments
 (0)