@@ -352,6 +352,11 @@ void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & inf
352
352
#endif
353
353
}
354
354
355
+ #ifdef __linux__
356
+ bool isNMIWatchdogEnabled (const bool silent);
357
+ bool keepNMIWatchdogEnabled ();
358
+ #endif
359
+
355
360
void PCM::readCoreCounterConfig (const bool complainAboutMSR)
356
361
{
357
362
if (max_cpuid >= 0xa )
@@ -412,6 +417,11 @@ void PCM::readCoreCounterConfig(const bool complainAboutMSR)
412
417
std::cerr << " INFO: Reducing the number of programmable counters to 3 to workaround the fixed cycle counter virtualization issue on AWS.\n " ;
413
418
std::cerr << " You can disable the workaround by setting PCM_NO_AWS_WORKAROUND=1 environment variable\n " ;
414
419
}
420
+ if (isNMIWatchdogEnabled (true ) && keepNMIWatchdogEnabled ())
421
+ {
422
+ --core_gen_counter_num_max;
423
+ std::cerr << " INFO: Reducing the number of programmable counters to " << core_gen_counter_num_max << " because NMI watchdog is enabled.\n " ;
424
+ }
415
425
#endif
416
426
}
417
427
}
@@ -1980,6 +1990,18 @@ void PCM::initUncorePMUsPerf()
1980
1990
1981
1991
#ifdef __linux__
1982
1992
1993
+ const char * keepNMIWatchdogEnabledEnvStr = " PCM_KEEP_NMI_WATCHDOG" ;
1994
+
1995
+ bool keepNMIWatchdogEnabled ()
1996
+ {
1997
+ static int keep = -1 ;
1998
+ if (keep < 0 )
1999
+ {
2000
+ keep = (safe_getenv (keepNMIWatchdogEnabledEnvStr) == std::string (" 1" )) ? 1 : 0 ;
2001
+ }
2002
+ return keep == 1 ;
2003
+ }
2004
+
1983
2005
#define PCM_NMI_WATCHDOG_PATH " /proc/sys/kernel/nmi_watchdog"
1984
2006
1985
2007
bool isNMIWatchdogEnabled (const bool silent)
@@ -1995,7 +2017,11 @@ bool isNMIWatchdogEnabled(const bool silent)
1995
2017
1996
2018
void disableNMIWatchdog (const bool silent)
1997
2019
{
1998
- if (!silent) std::cerr << " Disabling NMI watchdog since it consumes one hw-PMU counter.\n " ;
2020
+ if (!silent)
2021
+ {
2022
+ std::cerr << " Disabling NMI watchdog since it consumes one hw-PMU counter. To keep NMU watchdog set environment variable "
2023
+ << keepNMIWatchdogEnabledEnvStr << " =1 (this reduces the core metrics set)\n " ;
2024
+ }
1999
2025
writeSysFS (PCM_NMI_WATCHDOG_PATH, " 0" );
2000
2026
}
2001
2027
@@ -2423,7 +2449,7 @@ perf_event_attr PCM_init_perf_event_attr(bool group = true)
2423
2449
PCM::ErrorCode PCM::program (const PCM::ProgramMode mode_, const void * parameter_, const bool silent, const int pid)
2424
2450
{
2425
2451
#ifdef __linux__
2426
- if (isNMIWatchdogEnabled (silent))
2452
+ if (isNMIWatchdogEnabled (silent) && ( keepNMIWatchdogEnabled () == false ) )
2427
2453
{
2428
2454
disableNMIWatchdog (silent);
2429
2455
needToRestoreNMIWatchdog = true ;
@@ -2725,6 +2751,13 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter
2725
2751
std::cerr << " PCM ERROR: pid monitoring is only supported with Linux perf_event driver\n " ;
2726
2752
return PCM::UnknownError;
2727
2753
}
2754
+ #ifdef __linux__
2755
+ if (isNMIWatchdogEnabled (silent) && (canUsePerf == false ))
2756
+ {
2757
+ std::cerr << " PCM ERROR: Unsupported mode. NMI watchdog is enabled and Linux perf_event driver is not used\n " ;
2758
+ return PCM::UnknownError;
2759
+ }
2760
+ #endif
2728
2761
2729
2762
std::vector<int > tids{};
2730
2763
#ifdef PCM_USE_PERF
0 commit comments