Skip to content

Commit 5088741

Browse files
Patryk Wlazlynlenb
authored andcommitted
tools/power turbostat: detect and disable unavailable BICs at runtime
To allow unprivileged user to run turbostat seamlessly. Signed-off-by: Patryk Wlazlyn <[email protected]> Signed-off-by: Len Brown <[email protected]>
1 parent e48934c commit 5088741

File tree

1 file changed

+125
-63
lines changed

1 file changed

+125
-63
lines changed

tools/power/x86/turbostat/turbostat.c

Lines changed: 125 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,26 +1317,21 @@ static void bic_disable_msr_access(void)
13171317
bic_enabled &= ~bic_msrs;
13181318
}
13191319

1320-
static void bic_disable_perf_access(void)
1321-
{
1322-
const unsigned long bic_perf = BIC_IPC;
1323-
1324-
bic_enabled &= ~bic_perf;
1325-
}
1326-
13271320
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
13281321
{
13291322
assert(!no_perf);
13301323

13311324
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
13321325
}
13331326

1334-
static long open_perf_counter_or_fail(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
1327+
static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
13351328
{
13361329
struct perf_event_attr attr;
13371330
const pid_t pid = -1;
13381331
const unsigned long flags = 0;
13391332

1333+
assert(!no_perf);
1334+
13401335
memset(&attr, 0, sizeof(struct perf_event_attr));
13411336

13421337
attr.type = type;
@@ -1347,15 +1342,6 @@ static long open_perf_counter_or_fail(int cpu, unsigned int type, unsigned int c
13471342
attr.read_format = read_format;
13481343

13491344
const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
1350-
if (fd == -1) {
1351-
if (errno == EACCES) {
1352-
errx(1, "capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\""
1353-
" or use --no-perf or run as root", progname);
1354-
} else {
1355-
perror("perf_event_open");
1356-
errx(1, "use --no-perf or run as root");
1357-
}
1358-
}
13591345

13601346
return fd;
13611347
}
@@ -1365,8 +1351,7 @@ int get_instr_count_fd(int cpu)
13651351
if (fd_instr_count_percpu[cpu])
13661352
return fd_instr_count_percpu[cpu];
13671353

1368-
fd_instr_count_percpu[cpu] =
1369-
open_perf_counter_or_fail(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
1354+
fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
13701355

13711356
return fd_instr_count_percpu[cpu];
13721357
}
@@ -2833,8 +2818,8 @@ static struct amperf_group_fd open_amperf_fd(int cpu)
28332818
const unsigned int mperf_config = read_mperf_config();
28342819
struct amperf_group_fd fds = {.aperf = -1,.mperf = -1 };
28352820

2836-
fds.aperf = open_perf_counter_or_fail(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP);
2837-
fds.mperf = open_perf_counter_or_fail(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP);
2821+
fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP);
2822+
fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP);
28382823

28392824
return fds;
28402825
}
@@ -4509,7 +4494,8 @@ void msr_sum_record(void)
45094494

45104495
/*
45114496
* set_my_sched_priority(pri)
4512-
* return previous
4497+
* return previous priority on success
4498+
* return value < -20 on failure
45134499
*/
45144500
int set_my_sched_priority(int priority)
45154501
{
@@ -4519,16 +4505,16 @@ int set_my_sched_priority(int priority)
45194505
errno = 0;
45204506
original_priority = getpriority(PRIO_PROCESS, 0);
45214507
if (errno && (original_priority == -1))
4522-
err(errno, "getpriority");
4508+
return -21;
45234509

45244510
retval = setpriority(PRIO_PROCESS, 0, priority);
45254511
if (retval)
4526-
errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname);
4512+
return -21;
45274513

45284514
errno = 0;
45294515
retval = getpriority(PRIO_PROCESS, 0);
45304516
if (retval != priority)
4531-
err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
4517+
return -21;
45324518

45334519
return original_priority;
45344520
}
@@ -4543,6 +4529,9 @@ void turbostat_loop()
45434529

45444530
/*
45454531
* elevate own priority for interval mode
4532+
*
4533+
* ignore on error - we probably don't have permission to set it, but
4534+
* it's not a big deal
45464535
*/
45474536
set_my_sched_priority(-20);
45484537

@@ -4628,10 +4617,13 @@ void check_dev_msr()
46284617
struct stat sb;
46294618
char pathname[32];
46304619

4620+
if (no_msr)
4621+
return;
4622+
46314623
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
46324624
if (stat(pathname, &sb))
46334625
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
4634-
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
4626+
no_msr = 1;
46354627
}
46364628

46374629
/*
@@ -4643,47 +4635,51 @@ int check_for_cap_sys_rawio(void)
46434635
{
46444636
cap_t caps;
46454637
cap_flag_value_t cap_flag_value;
4638+
int ret = 0;
46464639

46474640
caps = cap_get_proc();
46484641
if (caps == NULL)
4649-
err(-6, "cap_get_proc\n");
4642+
return 1;
46504643

4651-
if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
4652-
err(-6, "cap_get\n");
4644+
if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
4645+
ret = 1;
4646+
goto free_and_exit;
4647+
}
46534648

46544649
if (cap_flag_value != CAP_SET) {
4655-
warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
4656-
return 1;
4650+
ret = 1;
4651+
goto free_and_exit;
46574652
}
46584653

4654+
free_and_exit:
46594655
if (cap_free(caps) == -1)
46604656
err(-6, "cap_free\n");
46614657

4662-
return 0;
4658+
return ret;
46634659
}
46644660

4665-
void check_permissions(void)
4661+
void check_msr_permission(void)
46664662
{
4667-
int do_exit = 0;
4663+
int failed = 0;
46684664
char pathname[32];
46694665

4666+
if (no_msr)
4667+
return;
4668+
46704669
/* check for CAP_SYS_RAWIO */
4671-
do_exit += check_for_cap_sys_rawio();
4670+
failed += check_for_cap_sys_rawio();
46724671

46734672
/* test file permissions */
46744673
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
46754674
if (euidaccess(pathname, R_OK)) {
4676-
do_exit++;
4677-
warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr, or run with --no-msr");
4675+
failed++;
46784676
}
46794677

4680-
/* if all else fails, thell them to be root */
4681-
if (do_exit)
4682-
if (getuid() != 0)
4683-
warnx("... or simply run as root");
4684-
4685-
if (do_exit)
4686-
exit(-6);
4678+
if (failed) {
4679+
warnx("Failed to access %s. Some of the counters may not be available\n"
4680+
"\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
4681+
no_msr = 1;
4682+
}
46874683
}
46884684

46894685
void probe_bclk(void)
@@ -5800,6 +5796,28 @@ void print_dev_latency(void)
58005796
close(fd);
58015797
}
58025798

5799+
static int has_instr_count_access(void)
5800+
{
5801+
int fd;
5802+
int has_access;
5803+
5804+
if (no_perf)
5805+
return 0;
5806+
5807+
fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
5808+
has_access = fd != -1;
5809+
5810+
if (fd != -1)
5811+
close(fd);
5812+
5813+
if (!has_access)
5814+
warnx("Failed to access %s. Some of the counters may not be available\n"
5815+
"\tRun as root to enable them or use %s to disable the access explicitly",
5816+
"instructions retired perf counter", "--no-perf");
5817+
5818+
return has_access;
5819+
}
5820+
58035821
/*
58045822
* Linux-perf manages the HW instructions-retired counter
58055823
* by enabling when requested, and hiding rollover
@@ -5826,30 +5844,60 @@ void linux_perf_init(void)
58265844

58275845
static int has_amperf_access_via_msr(void)
58285846
{
5829-
const int cpu = sched_getcpu();
58305847
unsigned long long dummy;
58315848

5832-
if (get_msr(cpu, MSR_IA32_APERF, &dummy))
5849+
if (no_msr)
5850+
return 0;
5851+
5852+
if (get_msr(base_cpu, MSR_IA32_APERF, &dummy))
58335853
return 0;
58345854

5835-
if (get_msr(cpu, MSR_IA32_MPERF, &dummy))
5855+
if (get_msr(base_cpu, MSR_IA32_MPERF, &dummy))
58365856
return 0;
58375857

58385858
return 1;
58395859
}
58405860

58415861
static int has_amperf_access_via_perf(void)
58425862
{
5843-
if (access("/sys/bus/event_source/devices/msr/type", F_OK))
5844-
return 0;
5863+
struct amperf_group_fd fds;
58455864

5846-
if (access("/sys/bus/event_source/devices/msr/events/aperf", F_OK))
5847-
return 0;
5865+
/*
5866+
* Cache the last result, so we don't warn the user multiple times
5867+
*
5868+
* Negative means cached, no access
5869+
* Zero means not cached
5870+
* Positive means cached, has access
5871+
*/
5872+
static int has_access_cached;
58485873

5849-
if (access("/sys/bus/event_source/devices/msr/events/mperf", F_OK))
5874+
if (no_perf)
58505875
return 0;
58515876

5852-
return 1;
5877+
if (has_access_cached != 0)
5878+
return has_access_cached > 0;
5879+
5880+
fds = open_amperf_fd(base_cpu);
5881+
has_access_cached = (fds.aperf != -1) && (fds.mperf != -1);
5882+
5883+
if (fds.aperf == -1)
5884+
warnx("Failed to access %s. Some of the counters may not be available\n"
5885+
"\tRun as root to enable them or use %s to disable the access explicitly",
5886+
"APERF perf counter", "--no-perf");
5887+
else
5888+
close(fds.aperf);
5889+
5890+
if (fds.mperf == -1)
5891+
warnx("Failed to access %s. Some of the counters may not be available\n"
5892+
"\tRun as root to enable them or use %s to disable the access explicitly",
5893+
"MPERF perf counter", "--no-perf");
5894+
else
5895+
close(fds.mperf);
5896+
5897+
if (has_access_cached == 0)
5898+
has_access_cached = -1;
5899+
5900+
return has_access_cached > 0;
58535901
}
58545902

58555903
/* Check if we can access APERF and MPERF */
@@ -6542,14 +6590,34 @@ static void set_amperf_source(void)
65426590
fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf");
65436591
}
65446592

6593+
void check_msr_access(void)
6594+
{
6595+
check_dev_msr();
6596+
check_msr_permission();
6597+
6598+
if (no_msr)
6599+
bic_disable_msr_access();
6600+
}
6601+
6602+
void check_perf_access(void)
6603+
{
6604+
if (no_perf || !has_instr_count_access())
6605+
bic_enabled &= ~BIC_IPC;
6606+
6607+
if (!has_amperf_access()) {
6608+
bic_enabled &= ~BIC_Avg_MHz;
6609+
bic_enabled &= ~BIC_Busy;
6610+
bic_enabled &= ~BIC_Bzy_MHz;
6611+
bic_enabled &= ~BIC_IPC;
6612+
}
6613+
}
6614+
65456615
void turbostat_init()
65466616
{
65476617
setup_all_buffers(true);
65486618
set_base_cpu();
6549-
if (!no_msr) {
6550-
check_dev_msr();
6551-
check_permissions();
6552-
}
6619+
check_msr_access();
6620+
check_perf_access();
65536621
process_cpuid();
65546622
probe_pm_features();
65556623
set_amperf_source();
@@ -7150,12 +7218,6 @@ int main(int argc, char **argv)
71507218
outf = stderr;
71517219
cmdline(argc, argv);
71527220

7153-
if (no_msr)
7154-
bic_disable_msr_access();
7155-
7156-
if (no_perf)
7157-
bic_disable_perf_access();
7158-
71597221
if (!quiet) {
71607222
print_version();
71617223
print_bootcmd();

0 commit comments

Comments
 (0)