@@ -1317,26 +1317,21 @@ static void bic_disable_msr_access(void)
1317
1317
bic_enabled &= ~bic_msrs ;
1318
1318
}
1319
1319
1320
- static void bic_disable_perf_access (void )
1321
- {
1322
- const unsigned long bic_perf = BIC_IPC ;
1323
-
1324
- bic_enabled &= ~bic_perf ;
1325
- }
1326
-
1327
1320
static long perf_event_open (struct perf_event_attr * hw_event , pid_t pid , int cpu , int group_fd , unsigned long flags )
1328
1321
{
1329
1322
assert (!no_perf );
1330
1323
1331
1324
return syscall (__NR_perf_event_open , hw_event , pid , cpu , group_fd , flags );
1332
1325
}
1333
1326
1334
- static long open_perf_counter_or_fail (int cpu , unsigned int type , unsigned int config , int group_fd , __u64 read_format )
1327
+ static long open_perf_counter (int cpu , unsigned int type , unsigned int config , int group_fd , __u64 read_format )
1335
1328
{
1336
1329
struct perf_event_attr attr ;
1337
1330
const pid_t pid = -1 ;
1338
1331
const unsigned long flags = 0 ;
1339
1332
1333
+ assert (!no_perf );
1334
+
1340
1335
memset (& attr , 0 , sizeof (struct perf_event_attr ));
1341
1336
1342
1337
attr .type = type ;
@@ -1347,15 +1342,6 @@ static long open_perf_counter_or_fail(int cpu, unsigned int type, unsigned int c
1347
1342
attr .read_format = read_format ;
1348
1343
1349
1344
const int fd = perf_event_open (& attr , pid , cpu , group_fd , flags );
1350
- if (fd == -1 ) {
1351
- if (errno == EACCES ) {
1352
- errx (1 , "capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\""
1353
- " or use --no-perf or run as root" , progname );
1354
- } else {
1355
- perror ("perf_event_open" );
1356
- errx (1 , "use --no-perf or run as root" );
1357
- }
1358
- }
1359
1345
1360
1346
return fd ;
1361
1347
}
@@ -1365,8 +1351,7 @@ int get_instr_count_fd(int cpu)
1365
1351
if (fd_instr_count_percpu [cpu ])
1366
1352
return fd_instr_count_percpu [cpu ];
1367
1353
1368
- fd_instr_count_percpu [cpu ] =
1369
- open_perf_counter_or_fail (cpu , PERF_TYPE_HARDWARE , PERF_COUNT_HW_INSTRUCTIONS , -1 , 0 );
1354
+ fd_instr_count_percpu [cpu ] = open_perf_counter (cpu , PERF_TYPE_HARDWARE , PERF_COUNT_HW_INSTRUCTIONS , -1 , 0 );
1370
1355
1371
1356
return fd_instr_count_percpu [cpu ];
1372
1357
}
@@ -2833,8 +2818,8 @@ static struct amperf_group_fd open_amperf_fd(int cpu)
2833
2818
const unsigned int mperf_config = read_mperf_config ();
2834
2819
struct amperf_group_fd fds = {.aperf = -1 ,.mperf = -1 };
2835
2820
2836
- fds .aperf = open_perf_counter_or_fail (cpu , msr_type , aperf_config , -1 , PERF_FORMAT_GROUP );
2837
- fds .mperf = open_perf_counter_or_fail (cpu , msr_type , mperf_config , fds .aperf , PERF_FORMAT_GROUP );
2821
+ fds .aperf = open_perf_counter (cpu , msr_type , aperf_config , -1 , PERF_FORMAT_GROUP );
2822
+ fds .mperf = open_perf_counter (cpu , msr_type , mperf_config , fds .aperf , PERF_FORMAT_GROUP );
2838
2823
2839
2824
return fds ;
2840
2825
}
@@ -4509,7 +4494,8 @@ void msr_sum_record(void)
4509
4494
4510
4495
/*
4511
4496
* set_my_sched_priority(pri)
4512
- * return previous
4497
+ * return previous priority on success
4498
+ * return value < -20 on failure
4513
4499
*/
4514
4500
int set_my_sched_priority (int priority )
4515
4501
{
@@ -4519,16 +4505,16 @@ int set_my_sched_priority(int priority)
4519
4505
errno = 0 ;
4520
4506
original_priority = getpriority (PRIO_PROCESS , 0 );
4521
4507
if (errno && (original_priority == -1 ))
4522
- err ( errno , "getpriority" ) ;
4508
+ return -21 ;
4523
4509
4524
4510
retval = setpriority (PRIO_PROCESS , 0 , priority );
4525
4511
if (retval )
4526
- errx ( retval , "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"" , progname ) ;
4512
+ return -21 ;
4527
4513
4528
4514
errno = 0 ;
4529
4515
retval = getpriority (PRIO_PROCESS , 0 );
4530
4516
if (retval != priority )
4531
- err ( retval , "getpriority(%d) != setpriority(%d)" , retval , priority ) ;
4517
+ return -21 ;
4532
4518
4533
4519
return original_priority ;
4534
4520
}
@@ -4543,6 +4529,9 @@ void turbostat_loop()
4543
4529
4544
4530
/*
4545
4531
* elevate own priority for interval mode
4532
+ *
4533
+ * ignore on error - we probably don't have permission to set it, but
4534
+ * it's not a big deal
4546
4535
*/
4547
4536
set_my_sched_priority (-20 );
4548
4537
@@ -4628,10 +4617,13 @@ void check_dev_msr()
4628
4617
struct stat sb ;
4629
4618
char pathname [32 ];
4630
4619
4620
+ if (no_msr )
4621
+ return ;
4622
+
4631
4623
sprintf (pathname , "/dev/cpu/%d/msr" , base_cpu );
4632
4624
if (stat (pathname , & sb ))
4633
4625
if (system ("/sbin/modprobe msr > /dev/null 2>&1" ))
4634
- err ( -5 , "no /dev/cpu/0/msr, Try \"# modprobe msr\" " ) ;
4626
+ no_msr = 1 ;
4635
4627
}
4636
4628
4637
4629
/*
@@ -4643,47 +4635,51 @@ int check_for_cap_sys_rawio(void)
4643
4635
{
4644
4636
cap_t caps ;
4645
4637
cap_flag_value_t cap_flag_value ;
4638
+ int ret = 0 ;
4646
4639
4647
4640
caps = cap_get_proc ();
4648
4641
if (caps == NULL )
4649
- err ( -6 , "cap_get_proc\n" ) ;
4642
+ return 1 ;
4650
4643
4651
- if (cap_get_flag (caps , CAP_SYS_RAWIO , CAP_EFFECTIVE , & cap_flag_value ))
4652
- err (-6 , "cap_get\n" );
4644
+ if (cap_get_flag (caps , CAP_SYS_RAWIO , CAP_EFFECTIVE , & cap_flag_value )) {
4645
+ ret = 1 ;
4646
+ goto free_and_exit ;
4647
+ }
4653
4648
4654
4649
if (cap_flag_value != CAP_SET ) {
4655
- warnx ( "capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"" , progname ) ;
4656
- return 1 ;
4650
+ ret = 1 ;
4651
+ goto free_and_exit ;
4657
4652
}
4658
4653
4654
+ free_and_exit :
4659
4655
if (cap_free (caps ) == -1 )
4660
4656
err (-6 , "cap_free\n" );
4661
4657
4662
- return 0 ;
4658
+ return ret ;
4663
4659
}
4664
4660
4665
- void check_permissions (void )
4661
+ void check_msr_permission (void )
4666
4662
{
4667
- int do_exit = 0 ;
4663
+ int failed = 0 ;
4668
4664
char pathname [32 ];
4669
4665
4666
+ if (no_msr )
4667
+ return ;
4668
+
4670
4669
/* check for CAP_SYS_RAWIO */
4671
- do_exit += check_for_cap_sys_rawio ();
4670
+ failed += check_for_cap_sys_rawio ();
4672
4671
4673
4672
/* test file permissions */
4674
4673
sprintf (pathname , "/dev/cpu/%d/msr" , base_cpu );
4675
4674
if (euidaccess (pathname , R_OK )) {
4676
- do_exit ++ ;
4677
- warn ("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr, or run with --no-msr" );
4675
+ failed ++ ;
4678
4676
}
4679
4677
4680
- /* if all else fails, thell them to be root */
4681
- if (do_exit )
4682
- if (getuid () != 0 )
4683
- warnx ("... or simply run as root" );
4684
-
4685
- if (do_exit )
4686
- exit (-6 );
4678
+ if (failed ) {
4679
+ warnx ("Failed to access %s. Some of the counters may not be available\n"
4680
+ "\tRun as root to enable them or use %s to disable the access explicitly" , pathname , "--no-msr" );
4681
+ no_msr = 1 ;
4682
+ }
4687
4683
}
4688
4684
4689
4685
void probe_bclk (void )
@@ -5800,6 +5796,28 @@ void print_dev_latency(void)
5800
5796
close (fd );
5801
5797
}
5802
5798
5799
+ static int has_instr_count_access (void )
5800
+ {
5801
+ int fd ;
5802
+ int has_access ;
5803
+
5804
+ if (no_perf )
5805
+ return 0 ;
5806
+
5807
+ fd = open_perf_counter (base_cpu , PERF_TYPE_HARDWARE , PERF_COUNT_HW_INSTRUCTIONS , -1 , 0 );
5808
+ has_access = fd != -1 ;
5809
+
5810
+ if (fd != -1 )
5811
+ close (fd );
5812
+
5813
+ if (!has_access )
5814
+ warnx ("Failed to access %s. Some of the counters may not be available\n"
5815
+ "\tRun as root to enable them or use %s to disable the access explicitly" ,
5816
+ "instructions retired perf counter" , "--no-perf" );
5817
+
5818
+ return has_access ;
5819
+ }
5820
+
5803
5821
/*
5804
5822
* Linux-perf manages the HW instructions-retired counter
5805
5823
* by enabling when requested, and hiding rollover
@@ -5826,30 +5844,60 @@ void linux_perf_init(void)
5826
5844
5827
5845
static int has_amperf_access_via_msr (void )
5828
5846
{
5829
- const int cpu = sched_getcpu ();
5830
5847
unsigned long long dummy ;
5831
5848
5832
- if (get_msr (cpu , MSR_IA32_APERF , & dummy ))
5849
+ if (no_msr )
5850
+ return 0 ;
5851
+
5852
+ if (get_msr (base_cpu , MSR_IA32_APERF , & dummy ))
5833
5853
return 0 ;
5834
5854
5835
- if (get_msr (cpu , MSR_IA32_MPERF , & dummy ))
5855
+ if (get_msr (base_cpu , MSR_IA32_MPERF , & dummy ))
5836
5856
return 0 ;
5837
5857
5838
5858
return 1 ;
5839
5859
}
5840
5860
5841
5861
static int has_amperf_access_via_perf (void )
5842
5862
{
5843
- if (access ("/sys/bus/event_source/devices/msr/type" , F_OK ))
5844
- return 0 ;
5863
+ struct amperf_group_fd fds ;
5845
5864
5846
- if (access ("/sys/bus/event_source/devices/msr/events/aperf" , F_OK ))
5847
- return 0 ;
5865
+ /*
5866
+ * Cache the last result, so we don't warn the user multiple times
5867
+ *
5868
+ * Negative means cached, no access
5869
+ * Zero means not cached
5870
+ * Positive means cached, has access
5871
+ */
5872
+ static int has_access_cached ;
5848
5873
5849
- if (access ( "/sys/bus/event_source/devices/msr/events/mperf" , F_OK ) )
5874
+ if (no_perf )
5850
5875
return 0 ;
5851
5876
5852
- return 1 ;
5877
+ if (has_access_cached != 0 )
5878
+ return has_access_cached > 0 ;
5879
+
5880
+ fds = open_amperf_fd (base_cpu );
5881
+ has_access_cached = (fds .aperf != -1 ) && (fds .mperf != -1 );
5882
+
5883
+ if (fds .aperf == -1 )
5884
+ warnx ("Failed to access %s. Some of the counters may not be available\n"
5885
+ "\tRun as root to enable them or use %s to disable the access explicitly" ,
5886
+ "APERF perf counter" , "--no-perf" );
5887
+ else
5888
+ close (fds .aperf );
5889
+
5890
+ if (fds .mperf == -1 )
5891
+ warnx ("Failed to access %s. Some of the counters may not be available\n"
5892
+ "\tRun as root to enable them or use %s to disable the access explicitly" ,
5893
+ "MPERF perf counter" , "--no-perf" );
5894
+ else
5895
+ close (fds .mperf );
5896
+
5897
+ if (has_access_cached == 0 )
5898
+ has_access_cached = -1 ;
5899
+
5900
+ return has_access_cached > 0 ;
5853
5901
}
5854
5902
5855
5903
/* Check if we can access APERF and MPERF */
@@ -6542,14 +6590,34 @@ static void set_amperf_source(void)
6542
6590
fprintf (outf , "aperf/mperf source preference: %s\n" , amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf" );
6543
6591
}
6544
6592
6593
+ void check_msr_access (void )
6594
+ {
6595
+ check_dev_msr ();
6596
+ check_msr_permission ();
6597
+
6598
+ if (no_msr )
6599
+ bic_disable_msr_access ();
6600
+ }
6601
+
6602
+ void check_perf_access (void )
6603
+ {
6604
+ if (no_perf || !has_instr_count_access ())
6605
+ bic_enabled &= ~BIC_IPC ;
6606
+
6607
+ if (!has_amperf_access ()) {
6608
+ bic_enabled &= ~BIC_Avg_MHz ;
6609
+ bic_enabled &= ~BIC_Busy ;
6610
+ bic_enabled &= ~BIC_Bzy_MHz ;
6611
+ bic_enabled &= ~BIC_IPC ;
6612
+ }
6613
+ }
6614
+
6545
6615
void turbostat_init ()
6546
6616
{
6547
6617
setup_all_buffers (true);
6548
6618
set_base_cpu ();
6549
- if (!no_msr ) {
6550
- check_dev_msr ();
6551
- check_permissions ();
6552
- }
6619
+ check_msr_access ();
6620
+ check_perf_access ();
6553
6621
process_cpuid ();
6554
6622
probe_pm_features ();
6555
6623
set_amperf_source ();
@@ -7150,12 +7218,6 @@ int main(int argc, char **argv)
7150
7218
outf = stderr ;
7151
7219
cmdline (argc , argv );
7152
7220
7153
- if (no_msr )
7154
- bic_disable_msr_access ();
7155
-
7156
- if (no_perf )
7157
- bic_disable_perf_access ();
7158
-
7159
7221
if (!quiet ) {
7160
7222
print_version ();
7161
7223
print_bootcmd ();
0 commit comments