@@ -657,6 +657,7 @@ bool PCM::detectModel()
657
657
std::cerr << " IBRS and IBPB supported : " << ((cpuinfo.reg .edx & (1 << 26 )) ? " yes" : " no" ) << " \n " ;
658
658
std::cerr << " STIBP supported : " << ((cpuinfo.reg .edx & (1 << 27 )) ? " yes" : " no" ) << " \n " ;
659
659
std::cerr << " Spec arch caps supported : " << ((cpuinfo.reg .edx & (1 << 29 )) ? " yes" : " no" ) << " \n " ;
660
+ std::cerr << " Max CPUID level : " << max_cpuid << " \n " ;
660
661
661
662
return true ;
662
663
}
@@ -1597,9 +1598,17 @@ bool PCM::detectNominalFrequency()
1597
1598
{
1598
1599
if (MSR.size ())
1599
1600
{
1600
- uint64 freq = 0 ;
1601
- MSR[socketRefCore[0 ]]->read (PLATFORM_INFO_ADDR, &freq);
1602
- const uint64 bus_freq = (
1601
+ if (max_cpuid >= 0x16 )
1602
+ {
1603
+ PCM_CPUID_INFO cpuinfo;
1604
+ pcm_cpuid (0x16 , cpuinfo);
1605
+ nominal_frequency = uint64 (extract_bits_ui (cpuinfo.reg .eax , 0 , 15 )) * 1000000ULL ;;
1606
+ }
1607
+ if (!nominal_frequency)
1608
+ {
1609
+ uint64 freq = 0 ;
1610
+ MSR[socketRefCore[0 ]]->read (PLATFORM_INFO_ADDR, &freq);
1611
+ const uint64 bus_freq = (
1603
1612
cpu_model == SANDY_BRIDGE
1604
1613
|| cpu_model == JAKETOWN
1605
1614
|| cpu_model == IVYTOWN
@@ -1620,11 +1629,17 @@ bool PCM::detectNominalFrequency()
1620
1629
|| cpu_model == ICX
1621
1630
) ? (100000000ULL ) : (133333333ULL );
1622
1631
1623
- nominal_frequency = ((freq >> 8 ) & 255 ) * bus_freq;
1632
+ nominal_frequency = ((freq >> 8 ) & 255 ) * bus_freq;
1633
+ }
1624
1634
1625
1635
if (!nominal_frequency)
1626
1636
nominal_frequency = get_frequency_from_cpuid ();
1627
1637
1638
+ if (!nominal_frequency)
1639
+ {
1640
+ computeNominalFrequency ();
1641
+ }
1642
+
1628
1643
if (!nominal_frequency)
1629
1644
{
1630
1645
std::cerr << " Error: Can not detect core frequency.\n " ;
@@ -2163,7 +2178,7 @@ PCM::PCM() :
2163
2178
cpu_model (-1 ),
2164
2179
cpu_stepping (-1 ),
2165
2180
cpu_microcode_level (-1 ),
2166
- max_cpuid (- 1 ),
2181
+ max_cpuid (0 ),
2167
2182
threads_per_core (0 ),
2168
2183
num_cores (0 ),
2169
2184
num_sockets (0 ),
@@ -2481,12 +2496,12 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
2481
2496
canUsePerf = false ;
2482
2497
if (!silent) std::cerr << " Can not use Linux perf because your Linux kernel does not support PERF_COUNT_HW_REF_CPU_CYCLES event. Falling-back to direct PMU programming.\n " ;
2483
2498
}
2484
- else if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg && pExtDesc->fixedCfg ->value != 0x333 )
2499
+ else if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg && ( pExtDesc->fixedCfg ->value & 0x444 ) )
2485
2500
{
2486
2501
canUsePerf = false ;
2487
2502
if (!silent)
2488
2503
{
2489
- std::cerr << " Can not use Linux perf because non-standard fixed counter configuration requested (0x" << std::hex << pExtDesc->fixedCfg ->value
2504
+ std::cerr << " Can not use Linux perf because \" any_thread \" fixed counter configuration requested (0x" << std::hex << pExtDesc->fixedCfg ->value
2490
2505
<< std::dec << " ) =\n " << *(pExtDesc->fixedCfg ) << " \n Falling-back to direct PMU programming.\n\n " ;
2491
2506
}
2492
2507
}
@@ -2504,6 +2519,12 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
2504
2519
canUsePerf = false ;
2505
2520
if (!silent) std::cerr << " Installed Linux kernel perf does not support hardware top-down level-1 counters. Using direct PMU programming instead.\n " ;
2506
2521
}
2522
+
2523
+ if (canUsePerf == false && noMSRMode ())
2524
+ {
2525
+ std::cerr << " ERROR: can not use perf driver and no-MSR mode is enabled\n " ;
2526
+ return PCM::UnknownError;
2527
+ }
2507
2528
#endif
2508
2529
2509
2530
if (allow_multiple_instances)
@@ -2948,6 +2969,8 @@ void PCM::checkError(const PCM::ErrorCode code)
2948
2969
}
2949
2970
}
2950
2971
2972
+ std::mutex printErrorMutex;
2973
+
2951
2974
PCM::ErrorCode PCM::programCoreCounters (const int i /* core */ ,
2952
2975
const PCM::ProgramMode mode_,
2953
2976
const ExtendedCustomCoreEventDescription * pExtDesc,
@@ -2957,6 +2980,31 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
2957
2980
2958
2981
result.clear ();
2959
2982
FixedEventControlRegister ctrl_reg;
2983
+ auto initFixedCtrl = [&](const bool & enableCtr3)
2984
+ {
2985
+ if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg )
2986
+ {
2987
+ ctrl_reg = *(pExtDesc->fixedCfg );
2988
+ }
2989
+ else
2990
+ {
2991
+ ctrl_reg.value = 0 ;
2992
+ ctrl_reg.fields .os0 = 1 ;
2993
+ ctrl_reg.fields .usr0 = 1 ;
2994
+
2995
+ ctrl_reg.fields .os1 = 1 ;
2996
+ ctrl_reg.fields .usr1 = 1 ;
2997
+
2998
+ ctrl_reg.fields .os2 = 1 ;
2999
+ ctrl_reg.fields .usr2 = 1 ;
3000
+
3001
+ if (enableCtr3 && isFixedCounterSupported (3 ))
3002
+ {
3003
+ ctrl_reg.fields .os3 = 1 ;
3004
+ ctrl_reg.fields .usr3 = 1 ;
3005
+ }
3006
+ }
3007
+ };
2960
3008
#ifdef PCM_USE_PERF
2961
3009
int leader_counter = -1 ;
2962
3010
auto programPerfEvent = [this , &leader_counter, &i](perf_event_attr & e, const int eventPos, const std::string & eventName) -> bool
@@ -2965,6 +3013,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
2965
3013
if ((perfEventHandle[i][eventPos] = syscall (SYS_perf_event_open, &e, -1 ,
2966
3014
i /* core id */ , leader_counter /* group leader */ , 0 )) <= 0 )
2967
3015
{
3016
+ std::lock_guard<std::mutex> _ (printErrorMutex);
2968
3017
std::cerr << " Linux Perf: Error when programming " << eventName << " , error: " << strerror (errno) <<
2969
3018
" with config 0x" << std::hex << e.config <<
2970
3019
" config1 0x" << e.config1 << std::dec << " \n " ;
@@ -2983,20 +3032,30 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
2983
3032
};
2984
3033
if (canUsePerf)
2985
3034
{
3035
+ initFixedCtrl (false );
2986
3036
perf_event_attr e = PCM_init_perf_event_attr ();
2987
3037
e.type = PERF_TYPE_HARDWARE;
2988
3038
e.config = PERF_COUNT_HW_INSTRUCTIONS;
3039
+ e.exclude_kernel = 1 - ctrl_reg.fields .os0 ;
3040
+ e.exclude_hv = e.exclude_kernel ;
3041
+ e.exclude_user = 1 - ctrl_reg.fields .usr0 ;
2989
3042
if (programPerfEvent (e, PERF_INST_RETIRED_POS, " INST_RETIRED" ) == false )
2990
3043
{
2991
3044
return PCM::UnknownError;
2992
3045
}
2993
3046
leader_counter = perfEventHandle[i][PERF_INST_RETIRED_POS];
2994
3047
e.config = PERF_COUNT_HW_CPU_CYCLES;
3048
+ e.exclude_kernel = 1 - ctrl_reg.fields .os1 ;
3049
+ e.exclude_hv = e.exclude_kernel ;
3050
+ e.exclude_user = 1 - ctrl_reg.fields .usr1 ;
2995
3051
if (programPerfEvent (e, PERF_CPU_CLK_UNHALTED_THREAD_POS, " CPU_CLK_UNHALTED_THREAD" ) == false )
2996
3052
{
2997
3053
return PCM::UnknownError;
2998
3054
}
2999
3055
e.config = PCM_PERF_COUNT_HW_REF_CPU_CYCLES;
3056
+ e.exclude_kernel = 1 - ctrl_reg.fields .os2 ;
3057
+ e.exclude_hv = e.exclude_kernel ;
3058
+ e.exclude_user = 1 - ctrl_reg.fields .usr2 ;
3000
3059
if (programPerfEvent (e, PERF_CPU_CLK_UNHALTED_REF_POS, " CPU_CLK_UNHALTED_REF" ) == false )
3001
3060
{
3002
3061
return PCM::UnknownError;
@@ -3009,30 +3068,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
3009
3068
MSR[i]->write (IA32_CR_PERF_GLOBAL_CTRL, 0 );
3010
3069
MSR[i]->read (IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value );
3011
3070
3012
-
3013
- if (EXT_CUSTOM_CORE_EVENTS == mode_ && pExtDesc && pExtDesc->fixedCfg )
3014
- {
3015
- ctrl_reg = *(pExtDesc->fixedCfg );
3016
- }
3017
- else
3018
- {
3019
- ctrl_reg.value = 0 ;
3020
-
3021
- ctrl_reg.fields .os0 = 1 ;
3022
- ctrl_reg.fields .usr0 = 1 ;
3023
-
3024
- ctrl_reg.fields .os1 = 1 ;
3025
- ctrl_reg.fields .usr1 = 1 ;
3026
-
3027
- ctrl_reg.fields .os2 = 1 ;
3028
- ctrl_reg.fields .usr2 = 1 ;
3029
-
3030
- if (isFixedCounterSupported (3 ))
3031
- {
3032
- ctrl_reg.fields .os3 = 1 ;
3033
- ctrl_reg.fields .usr3 = 1 ;
3034
- }
3035
- }
3071
+ initFixedCtrl (true );
3036
3072
3037
3073
MSR[i]->write (INST_RETIRED_ADDR, 0 );
3038
3074
MSR[i]->write (CPU_CLK_UNHALTED_THREAD_ADDR, 0 );
@@ -3206,6 +3242,7 @@ PCM::ErrorCode PCM::programCoreCounters(const int i /* core */,
3206
3242
}
3207
3243
else
3208
3244
{
3245
+ std::lock_guard<std::mutex> _ (printErrorMutex);
3209
3246
std::cerr << " ERROR: unknown token " << token << " in event description \" " << eventDesc << " \" from " << event.first << " \n " ;
3210
3247
decrementInstanceSemaphore ();
3211
3248
return PCM::UnknownError;
@@ -3441,11 +3478,11 @@ uint64 RDTSC();
3441
3478
void PCM::computeNominalFrequency ()
3442
3479
{
3443
3480
const int ref_core = 0 ;
3444
- uint64 before = 0 , after = 0 ;
3445
- MSR[ref_core]-> read (IA32_TIME_STAMP_COUNTER, &before );
3446
- MySleepMs ( 1000 );
3447
- MSR[ref_core]-> read (IA32_TIME_STAMP_COUNTER, & after);
3448
- nominal_frequency = after-before ;
3481
+ const uint64 before = getInvariantTSC_Fast (ref_core) ;
3482
+ MySleepMs ( 100 );
3483
+ const uint64 after = getInvariantTSC_Fast (ref_core );
3484
+ nominal_frequency = 10ULL *( after-before );
3485
+ std::cerr << " WARNING: Core nominal frequency has to be estimated \n " ;
3449
3486
}
3450
3487
std::string PCM::getCPUBrandString ()
3451
3488
{
@@ -3691,9 +3728,9 @@ bool PCM::PMUinUse()
3691
3728
3692
3729
for (uint32 j = 0 ; j < core_gen_counter_num_max; ++j)
3693
3730
{
3694
- MSR[i]->read (IA32_PERFEVTSEL0_ADDR + j, &event_select_reg.value );
3731
+ const auto count = MSR[i]->read (IA32_PERFEVTSEL0_ADDR + j, &event_select_reg.value );
3695
3732
3696
- if (event_select_reg.fields .event_select != 0 || event_select_reg.fields .apic_int != 0 )
3733
+ if (count && ( event_select_reg.fields .event_select != 0 || event_select_reg.fields .apic_int != 0 ) )
3697
3734
{
3698
3735
std::cerr << " WARNING: Core " << i <<" IA32_PERFEVTSEL" << j << " _ADDR is not zeroed " << event_select_reg.value << " \n " ;
3699
3736
@@ -3709,12 +3746,12 @@ bool PCM::PMUinUse()
3709
3746
FixedEventControlRegister ctrl_reg;
3710
3747
ctrl_reg.value = 0xffffffffffffffff ;
3711
3748
3712
- MSR[i]->read (IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value );
3749
+ const auto count = MSR[i]->read (IA32_CR_FIXED_CTR_CTRL, &ctrl_reg.value );
3713
3750
3714
3751
// Check if someone has installed pmi handler on counter overflow.
3715
3752
// If so, that agent might potentially need to change counter value
3716
3753
// for the "sample after"-mode messing up PCM measurements
3717
- if ( ctrl_reg.fields .enable_pmi0 || ctrl_reg.fields .enable_pmi1 || ctrl_reg.fields .enable_pmi2 )
3754
+ if (count && ( ctrl_reg.fields .enable_pmi0 || ctrl_reg.fields .enable_pmi1 || ctrl_reg.fields .enable_pmi2 ) )
3718
3755
{
3719
3756
std::cerr << " WARNING: Core " << i << " fixed ctrl:" << ctrl_reg.value << " \n " ;
3720
3757
if (needToRestoreNMIWatchdog == false ) // if NMI watchdog did not clear the fields, ignore it
@@ -4088,6 +4125,18 @@ bool PCM::supportsRTM() const
4088
4125
return (info.reg .ebx & (0x1 << 11 )) ? true : false ;
4089
4126
}
4090
4127
4128
+ bool PCM::supportsRDTSCP () const
4129
+ {
4130
+ static int supports = -1 ;
4131
+ if (supports < 0 )
4132
+ {
4133
+ PCM_CPUID_INFO info;
4134
+ pcm_cpuid (0x80000001 , info);
4135
+ supports = (info.reg .edx & (0x1 << 27 )) ? 1 : 0 ;
4136
+ }
4137
+ return 1 == supports;
4138
+ }
4139
+
4091
4140
#ifdef __APPLE__
4092
4141
4093
4142
uint32 PCM::getNumInstances ()
@@ -4208,12 +4257,24 @@ bool PCM::decrementInstanceSemaphore()
4208
4257
4209
4258
uint64 PCM::getTickCount (uint64 multiplier, uint32 core)
4210
4259
{
4211
- return (multiplier * getInvariantTSC ( CoreCounterState (), getCoreCounterState ( core) )) / getNominalFrequency ();
4260
+ return (multiplier * getInvariantTSC_Fast ( core)) / getNominalFrequency ();
4212
4261
}
4213
4262
4214
- uint64 PCM::getTickCountRDTSCP (uint64 multiplier )
4263
+ uint64 PCM::getInvariantTSC_Fast (uint32 core )
4215
4264
{
4216
- return (multiplier*RDTSCP ())/getNominalFrequency ();
4265
+ if (supportsRDTSCP ())
4266
+ {
4267
+ TemporalThreadAffinity aff (core);
4268
+ return RDTSCP ();
4269
+ }
4270
+ else if (core < MSR.size ())
4271
+ {
4272
+ uint64 cInvariantTSC = 0 ;
4273
+ MSR[core]->read (IA32_TIME_STAMP_COUNTER, &cInvariantTSC);
4274
+ if (cInvariantTSC) return cInvariantTSC;
4275
+ }
4276
+ std::cerr << " ERROR: cannot read time stamp counter\n " ;
4277
+ return 0ULL ;
4217
4278
}
4218
4279
4219
4280
SystemCounterState getSystemCounterState ()
@@ -4285,7 +4346,7 @@ void BasicCounterState::readAndAggregateTSC(std::shared_ptr<SafeMsrHandle> msr)
4285
4346
const auto cpu_model = m->getCPUModel ();
4286
4347
if (m->isAtom () == false || cpu_model == PCM::AVOTON)
4287
4348
{
4288
- msr->read (IA32_TIME_STAMP_COUNTER, &cInvariantTSC );
4349
+ cInvariantTSC = m-> getInvariantTSC_Fast ( msr->getCoreId () );
4289
4350
MSRValues[IA32_TIME_STAMP_COUNTER] = cInvariantTSC;
4290
4351
}
4291
4352
else
@@ -4685,7 +4746,11 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& curPMUConfigs_, const bool sile
4685
4746
auto corePMUConfig = curPMUConfigs[" core" ];
4686
4747
if (corePMUConfig.programmable .size () > (size_t )getMaxCustomCoreEvents ())
4687
4748
{
4688
- std::cerr << " ERROR: trying to program " << corePMUConfig.programmable .size () << " core PMU counters, which exceeds the max num possible (" << getMaxCustomCoreEvents () << " )." ;
4749
+ std::cerr << " ERROR: trying to program " << corePMUConfig.programmable .size () << " core PMU counters, which exceeds the max num possible (" << getMaxCustomCoreEvents () << " ).\n " ;
4750
+ for (const auto & e : corePMUConfig.programmable )
4751
+ {
4752
+ std::cerr << " Event: " << e.second << " \n " ;
4753
+ }
4689
4754
return PCM::UnknownError;
4690
4755
}
4691
4756
size_t c = 0 ;
@@ -5537,7 +5602,7 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket)
5537
5602
// std::cout << "Energy status: " << val << "\n";
5538
5603
MSR[refCore]->read (MSR_PACKAGE_THERM_STATUS,&val);
5539
5604
result.PackageThermalHeadroom = extractThermalHeadroom (val);
5540
- MSR[refCore]-> read (IA32_TIME_STAMP_COUNTER, & result.InvariantTSC );
5605
+ result.InvariantTSC = getInvariantTSC_Fast (refCore );
5541
5606
readAndAggregatePackageCStateResidencies (MSR[refCore], result);
5542
5607
}
5543
5608
// std::cout << std::flush;
0 commit comments