Skip to content

Commit 32e6834

Browse files
authored
Merge pull request #163 from intel-innersource/rdementi/changes-2022-02-09
Rdementi/changes 2022 02 09
2 parents 781d335 + 24ca657 commit 32e6834

File tree

5 files changed

+161
-53
lines changed

5 files changed

+161
-53
lines changed

src/cpucounters.cpp

Lines changed: 111 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,7 @@ bool PCM::detectModel()
657657
std::cerr << "IBRS and IBPB supported : " << ((cpuinfo.reg.edx & (1 << 26)) ? "yes" : "no") << "\n";
658658
std::cerr << "STIBP supported : " << ((cpuinfo.reg.edx & (1 << 27)) ? "yes" : "no") << "\n";
659659
std::cerr << "Spec arch caps supported : " << ((cpuinfo.reg.edx & (1 << 29)) ? "yes" : "no") << "\n";
660+
std::cerr << "Max CPUID level : " << max_cpuid << "\n";
660661

661662
return true;
662663
}
@@ -1597,9 +1598,17 @@ bool PCM::detectNominalFrequency()
15971598
{
15981599
if (MSR.size())
15991600
{
1600-
uint64 freq = 0;
1601-
MSR[socketRefCore[0]]->read(PLATFORM_INFO_ADDR, &freq);
1602-
const uint64 bus_freq = (
1601+
if (max_cpuid >= 0x16)
1602+
{
1603+
PCM_CPUID_INFO cpuinfo;
1604+
pcm_cpuid(0x16, cpuinfo);
1605+
nominal_frequency = uint64(extract_bits_ui(cpuinfo.reg.eax, 0, 15)) * 1000000ULL;;
1606+
}
1607+
if (!nominal_frequency)
1608+
{
1609+
uint64 freq = 0;
1610+
MSR[socketRefCore[0]]->read(PLATFORM_INFO_ADDR, &freq);
1611+
const uint64 bus_freq = (
16031612
cpu_model == SANDY_BRIDGE
16041613
|| cpu_model == JAKETOWN
16051614
|| cpu_model == IVYTOWN
@@ -1620,11 +1629,17 @@ bool PCM::detectNominalFrequency()
16201629
|| cpu_model == ICX
16211630
) ? (100000000ULL) : (133333333ULL);
16221631

1623-
nominal_frequency = ((freq >> 8) & 255) * bus_freq;
1632+
nominal_frequency = ((freq >> 8) & 255) * bus_freq;
1633+
}
16241634

16251635
if(!nominal_frequency)
16261636
nominal_frequency = get_frequency_from_cpuid();
16271637

1638+
if(!nominal_frequency)
1639+
{
1640+
computeNominalFrequency();
1641+
}
1642+
16281643
if(!nominal_frequency)
16291644
{
16301645
std::cerr << "Error: Can not detect core frequency.\n";
@@ -2163,7 +2178,7 @@ PCM::PCM() :
21632178
cpu_model(-1),
21642179
cpu_stepping(-1),
21652180
cpu_microcode_level(-1),
2166-
max_cpuid(-1),
2181+
max_cpuid(0),
21672182
threads_per_core(0),
21682183
num_cores(0),
21692184
num_sockets(0),
@@ -2481,12 +2496,12 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
24812496
canUsePerf = false;
24822497
if (!silent) std::cerr << "Can not use Linux perf because your Linux kernel does not support PERF_COUNT_HW_REF_CPU_CYCLES event. Falling-back to direct PMU programming.\n";
24832498
}
2484-
else if(EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg && pExtDesc->fixedCfg->value != 0x333)
2499+
else if(EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg && (pExtDesc->fixedCfg->value & 0x444))
24852500
{
24862501
canUsePerf = false;
24872502
if (!silent)
24882503
{
2489-
std::cerr << "Can not use Linux perf because non-standard fixed counter configuration requested (0x" << std::hex << pExtDesc->fixedCfg->value
2504+
std::cerr << "Can not use Linux perf because \"any_thread\" fixed counter configuration requested (0x" << std::hex << pExtDesc->fixedCfg->value
24902505
<< std::dec << ") =\n" << *(pExtDesc->fixedCfg) << "\nFalling-back to direct PMU programming.\n\n";
24912506
}
24922507
}
@@ -2504,6 +2519,12 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
25042519
canUsePerf = false;
25052520
if (!silent) std::cerr << "Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n";
25062521
}
2522+
2523+
if (canUsePerf == false && noMSRMode())
2524+
{
2525+
std::cerr << "ERROR: can not use perf driver and no-MSR mode is enabled\n" ;
2526+
return PCM::UnknownError;
2527+
}
25072528
#endif
25082529

25092530
if(allow_multiple_instances)
@@ -2948,6 +2969,8 @@ void PCM::checkError(const PCM::ErrorCode code)
29482969
}
29492970
}
29502971

2972+
std::mutex printErrorMutex;
2973+
29512974
PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
29522975
const PCM::ProgramMode mode_,
29532976
const ExtendedCustomCoreEventDescription * pExtDesc,
@@ -2957,6 +2980,31 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
29572980

29582981
result.clear();
29592982
FixedEventControlRegister ctrl_reg;
2983+
auto initFixedCtrl = [&](const bool & enableCtr3)
2984+
{
2985+
if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg)
2986+
{
2987+
ctrl_reg = *(pExtDesc->fixedCfg);
2988+
}
2989+
else
2990+
{
2991+
ctrl_reg.value = 0;
2992+
ctrl_reg.fields.os0 = 1;
2993+
ctrl_reg.fields.usr0 = 1;
2994+
2995+
ctrl_reg.fields.os1 = 1;
2996+
ctrl_reg.fields.usr1 = 1;
2997+
2998+
ctrl_reg.fields.os2 = 1;
2999+
ctrl_reg.fields.usr2 = 1;
3000+
3001+
if (enableCtr3 && isFixedCounterSupported(3))
3002+
{
3003+
ctrl_reg.fields.os3 = 1;
3004+
ctrl_reg.fields.usr3 = 1;
3005+
}
3006+
}
3007+
};
29603008
#ifdef PCM_USE_PERF
29613009
int leader_counter = -1;
29623010
auto programPerfEvent = [this, &leader_counter, &i](perf_event_attr & e, const int eventPos, const std::string & eventName) -> bool
@@ -2965,6 +3013,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
29653013
if ((perfEventHandle[i][eventPos] = syscall(SYS_perf_event_open, &e, -1,
29663014
i /* core id */, leader_counter /* group leader */, 0)) <= 0)
29673015
{
3016+
std::lock_guard<std::mutex> _(printErrorMutex);
29683017
std::cerr << "Linux Perf: Error when programming " << eventName << ", error: " << strerror(errno) <<
29693018
" with config 0x" << std::hex << e.config <<
29703019
" config1 0x" << e.config1 << std::dec << "\n";
@@ -2983,20 +3032,30 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
29833032
};
29843033
if (canUsePerf)
29853034
{
3035+
initFixedCtrl(false);
29863036
perf_event_attr e = PCM_init_perf_event_attr();
29873037
e.type = PERF_TYPE_HARDWARE;
29883038
e.config = PERF_COUNT_HW_INSTRUCTIONS;
3039+
e.exclude_kernel = 1 - ctrl_reg.fields.os0;
3040+
e.exclude_hv = e.exclude_kernel;
3041+
e.exclude_user = 1 - ctrl_reg.fields.usr0;
29893042
if (programPerfEvent(e, PERF_INST_RETIRED_POS, "INST_RETIRED") == false)
29903043
{
29913044
return PCM::UnknownError;
29923045
}
29933046
leader_counter = perfEventHandle[i][PERF_INST_RETIRED_POS];
29943047
e.config = PERF_COUNT_HW_CPU_CYCLES;
3048+
e.exclude_kernel = 1 - ctrl_reg.fields.os1;
3049+
e.exclude_hv = e.exclude_kernel;
3050+
e.exclude_user = 1 - ctrl_reg.fields.usr1;
29953051
if (programPerfEvent(e, PERF_CPU_CLK_UNHALTED_THREAD_POS, "CPU_CLK_UNHALTED_THREAD") == false)
29963052
{
29973053
return PCM::UnknownError;
29983054
}
29993055
e.config = PCM_PERF_COUNT_HW_REF_CPU_CYCLES;
3056+
e.exclude_kernel = 1 - ctrl_reg.fields.os2;
3057+
e.exclude_hv = e.exclude_kernel;
3058+
e.exclude_user = 1 - ctrl_reg.fields.usr2;
30003059
if (programPerfEvent(e, PERF_CPU_CLK_UNHALTED_REF_POS, "CPU_CLK_UNHALTED_REF") == false)
30013060
{
30023061
return PCM::UnknownError;
@@ -3009,30 +3068,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
30093068
MSR[i]->write(IA32_CR_PERF_GLOBAL_CTRL, 0);
30103069
MSR[i]->read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value);
30113070

3012-
3013-
if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg)
3014-
{
3015-
ctrl_reg = *(pExtDesc->fixedCfg);
3016-
}
3017-
else
3018-
{
3019-
ctrl_reg.value = 0;
3020-
3021-
ctrl_reg.fields.os0 = 1;
3022-
ctrl_reg.fields.usr0 = 1;
3023-
3024-
ctrl_reg.fields.os1 = 1;
3025-
ctrl_reg.fields.usr1 = 1;
3026-
3027-
ctrl_reg.fields.os2 = 1;
3028-
ctrl_reg.fields.usr2 = 1;
3029-
3030-
if (isFixedCounterSupported(3))
3031-
{
3032-
ctrl_reg.fields.os3 = 1;
3033-
ctrl_reg.fields.usr3 = 1;
3034-
}
3035-
}
3071+
initFixedCtrl(true);
30363072

30373073
MSR[i]->write(INST_RETIRED_ADDR, 0);
30383074
MSR[i]->write(CPU_CLK_UNHALTED_THREAD_ADDR, 0);
@@ -3206,6 +3242,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
32063242
}
32073243
else
32083244
{
3245+
std::lock_guard<std::mutex> _(printErrorMutex);
32093246
std::cerr << "ERROR: unknown token " << token << " in event description \"" << eventDesc << "\" from " << event.first << "\n";
32103247
decrementInstanceSemaphore();
32113248
return PCM::UnknownError;
@@ -3441,11 +3478,11 @@ uint64 RDTSC();
34413478
void PCM::computeNominalFrequency()
34423479
{
34433480
const int ref_core = 0;
3444-
uint64 before = 0, after = 0;
3445-
MSR[ref_core]->read(IA32_TIME_STAMP_COUNTER, &before);
3446-
MySleepMs(1000);
3447-
MSR[ref_core]->read(IA32_TIME_STAMP_COUNTER, &after);
3448-
nominal_frequency = after-before;
3481+
const uint64 before = getInvariantTSC_Fast(ref_core);
3482+
MySleepMs(100);
3483+
const uint64 after = getInvariantTSC_Fast(ref_core);
3484+
nominal_frequency = 10ULL*(after-before);
3485+
std::cerr << "WARNING: Core nominal frequency has to be estimated\n";
34493486
}
34503487
std::string PCM::getCPUBrandString()
34513488
{
@@ -3691,9 +3728,9 @@ bool PCM::PMUinUse()
36913728

36923729
for (uint32 j = 0; j < core_gen_counter_num_max; ++j)
36933730
{
3694-
MSR[i]->read(IA32_PERFEVTSEL0_ADDR + j, &event_select_reg.value);
3731+
const auto count = MSR[i]->read(IA32_PERFEVTSEL0_ADDR + j, &event_select_reg.value);
36953732

3696-
if (event_select_reg.fields.event_select != 0 || event_select_reg.fields.apic_int != 0)
3733+
if (count && (event_select_reg.fields.event_select != 0 || event_select_reg.fields.apic_int != 0))
36973734
{
36983735
std::cerr << "WARNING: Core " << i <<" IA32_PERFEVTSEL" << j << "_ADDR is not zeroed " << event_select_reg.value << "\n";
36993736

@@ -3709,12 +3746,12 @@ bool PCM::PMUinUse()
37093746
FixedEventControlRegister ctrl_reg;
37103747
ctrl_reg.value = 0xffffffffffffffff;
37113748

3712-
MSR[i]->read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value);
3749+
const auto count = MSR[i]->read(IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value);
37133750

37143751
// Check if someone has installed pmi handler on counter overflow.
37153752
// If so, that agent might potentially need to change counter value
37163753
// for the "sample after"-mode messing up PCM measurements
3717-
if(ctrl_reg.fields.enable_pmi0 || ctrl_reg.fields.enable_pmi1 || ctrl_reg.fields.enable_pmi2)
3754+
if (count && (ctrl_reg.fields.enable_pmi0 || ctrl_reg.fields.enable_pmi1 || ctrl_reg.fields.enable_pmi2))
37183755
{
37193756
std::cerr << "WARNING: Core " << i << " fixed ctrl:" << ctrl_reg.value << "\n";
37203757
if (needToRestoreNMIWatchdog == false) // if NMI watchdog did not clear the fields, ignore it
@@ -4088,6 +4125,18 @@ bool PCM::supportsRTM() const
40884125
return (info.reg.ebx & (0x1 << 11)) ? true : false;
40894126
}
40904127

4128+
bool PCM::supportsRDTSCP() const
4129+
{
4130+
static int supports = -1;
4131+
if (supports < 0)
4132+
{
4133+
PCM_CPUID_INFO info;
4134+
pcm_cpuid(0x80000001, info);
4135+
supports = (info.reg.edx & (0x1 << 27)) ? 1 : 0;
4136+
}
4137+
return 1 == supports;
4138+
}
4139+
40914140
#ifdef __APPLE__
40924141

40934142
uint32 PCM::getNumInstances()
@@ -4208,12 +4257,24 @@ bool PCM::decrementInstanceSemaphore()
42084257

42094258
uint64 PCM::getTickCount(uint64 multiplier, uint32 core)
42104259
{
4211-
return (multiplier * getInvariantTSC(CoreCounterState(), getCoreCounterState(core))) / getNominalFrequency();
4260+
return (multiplier * getInvariantTSC_Fast(core)) / getNominalFrequency();
42124261
}
42134262

4214-
uint64 PCM::getTickCountRDTSCP(uint64 multiplier)
4263+
uint64 PCM::getInvariantTSC_Fast(uint32 core)
42154264
{
4216-
return (multiplier*RDTSCP())/getNominalFrequency();
4265+
if (supportsRDTSCP())
4266+
{
4267+
TemporalThreadAffinity aff(core);
4268+
return RDTSCP();
4269+
}
4270+
else if (core < MSR.size())
4271+
{
4272+
uint64 cInvariantTSC = 0;
4273+
MSR[core]->read(IA32_TIME_STAMP_COUNTER, &cInvariantTSC);
4274+
if (cInvariantTSC) return cInvariantTSC;
4275+
}
4276+
std::cerr << "ERROR: cannot read time stamp counter\n";
4277+
return 0ULL;
42174278
}
42184279

42194280
SystemCounterState getSystemCounterState()
@@ -4285,7 +4346,7 @@ void BasicCounterState::readAndAggregateTSC(std::shared_ptr<SafeMsrHandle> msr)
42854346
const auto cpu_model = m->getCPUModel();
42864347
if (m->isAtom() == false || cpu_model == PCM::AVOTON)
42874348
{
4288-
msr->read(IA32_TIME_STAMP_COUNTER, &cInvariantTSC);
4349+
cInvariantTSC = m->getInvariantTSC_Fast(msr->getCoreId());
42894350
MSRValues[IA32_TIME_STAMP_COUNTER] = cInvariantTSC;
42904351
}
42914352
else
@@ -4685,7 +4746,11 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile
46854746
auto corePMUConfig = curPMUConfigs["core"];
46864747
if (corePMUConfig.programmable.size() > (size_t)getMaxCustomCoreEvents())
46874748
{
4688-
std::cerr << "ERROR: trying to program " << corePMUConfig.programmable.size() << " core PMU counters, which exceeds the max num possible ("<< getMaxCustomCoreEvents() << ").";
4749+
std::cerr << "ERROR: trying to program " << corePMUConfig.programmable.size() << " core PMU counters, which exceeds the max num possible ("<< getMaxCustomCoreEvents() << ").\n";
4750+
for (const auto & e : corePMUConfig.programmable)
4751+
{
4752+
std::cerr << " Event: " << e.second << "\n";
4753+
}
46894754
return PCM::UnknownError;
46904755
}
46914756
size_t c = 0;
@@ -5537,7 +5602,7 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket)
55375602
//std::cout << "Energy status: " << val << "\n";
55385603
MSR[refCore]->read(MSR_PACKAGE_THERM_STATUS,&val);
55395604
result.PackageThermalHeadroom = extractThermalHeadroom(val);
5540-
MSR[refCore]->read(IA32_TIME_STAMP_COUNTER, &result.InvariantTSC);
5605+
result.InvariantTSC = getInvariantTSC_Fast(refCore);
55415606
readAndAggregatePackageCStateResidencies(MSR[refCore], result);
55425607
}
55435608
// std::cout << std::flush;

src/cpucounters.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ class PCM_API PCM
568568
bool hybrid = false;
569569
int32 cpu_stepping;
570570
int64 cpu_microcode_level;
571-
int32 max_cpuid;
571+
uint32 max_cpuid;
572572
int32 threads_per_core;
573573
int32 num_cores;
574574
int32 num_sockets;
@@ -1695,11 +1695,7 @@ class PCM_API PCM
16951695
//! \return time counter value
16961696
uint64 getTickCount(uint64 multiplier = 1000 /* ms */, uint32 core = 0);
16971697

1698-
//! \brief Return TSC timer value in time units using rdtscp instruction from current core
1699-
//! \param multiplier use 1 for seconds, 1000 for ms, 1000000 for mks, etc (default is 1000: ms)
1700-
//! \warning Processor support is required bit 27 of cpuid EDX must be set, for Windows, Visual Studio 2010 is required
1701-
//! \return time counter value
1702-
uint64 getTickCountRDTSCP(uint64 multiplier = 1000 /* ms */);
1698+
uint64 getInvariantTSC_Fast(uint32 core = 0);
17031699

17041700
//! \brief Returns uncore clock ticks on specified socket
17051701
uint64 getUncoreClocks(const uint32 socket_);
@@ -2101,6 +2097,7 @@ class PCM_API PCM
21012097

21022098
bool supportsHLE() const;
21032099
bool supportsRTM() const;
2100+
bool supportsRDTSCP() const;
21042101

21052102
bool useSkylakeEvents() const
21062103
{

0 commit comments

Comments
 (0)