Skip to content

Commit 7990be2

Browse files
Continue counting other perf events group, even if there is an error in one (#2705)
Signed-off-by: Wisniewski, Krzysztof2 <[email protected]>
1 parent 73fc5bc commit 7990be2

File tree

4 files changed

+153
-68
lines changed

4 files changed

+153
-68
lines changed

perf/collector_libpfm.go

Lines changed: 63 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ type collector struct {
4747
onlineCPUs []int
4848
eventToCustomEvent map[Event]*CustomEvent
4949
uncore stats.Collector
50+
51+
// Handle for mocking purposes.
52+
perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error)
53+
ioctlSetInt func(fd int, req uint, value int) error
5054
}
5155

5256
type group struct {
@@ -76,7 +80,7 @@ func init() {
7680
}
7781

7882
func newCollector(cgroupPath string, events PerfEvents, onlineCPUs []int, cpuToSocket map[int]int) *collector {
79-
collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket)}
83+
collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket), perfEventOpen: unix.PerfEventOpen, ioctlSetInt: unix.IoctlSetInt}
8084
mapEventsToCustomEvents(collector)
8185
return collector
8286
}
@@ -185,44 +189,30 @@ func (c *collector) setup() error {
185189
c.cpuFilesLock.Lock()
186190
defer c.cpuFilesLock.Unlock()
187191
cgroupFd := int(cgroup.Fd())
188-
for i, group := range c.events.Core.Events {
192+
groupIndex := 0
193+
for _, group := range c.events.Core.Events {
189194
// CPUs file descriptors of group leader needed for perf_event_open.
190195
leaderFileDescriptors := make(map[int]int, len(c.onlineCPUs))
191196
for _, cpu := range c.onlineCPUs {
192197
leaderFileDescriptors[cpu] = groupLeaderFileDescriptor
193198
}
194199

195-
for j, event := range group.events {
196-
// First element is group leader.
197-
isGroupLeader := j == 0
198-
customEvent, ok := c.eventToCustomEvent[event]
199-
if ok {
200-
config := c.createConfigFromRawEvent(customEvent)
201-
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors)
202-
if err != nil {
203-
return err
204-
}
205-
} else {
206-
config, err := c.createConfigFromEvent(event)
207-
if err != nil {
208-
return err
209-
}
210-
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors)
211-
if err != nil {
212-
return err
213-
}
214-
// Clean memory allocated by C code.
215-
C.free(unsafe.Pointer(config))
216-
}
200+
leaderFileDescriptors, err := c.createLeaderFileDescriptors(group.events, cgroupFd, groupIndex, leaderFileDescriptors)
201+
if err != nil {
202+
klog.Errorf("Cannot count perf event group %v: %v", group.events, err)
203+
c.deleteGroup(groupIndex)
204+
continue
205+
} else {
206+
groupIndex++
217207
}
218208

219209
// Group is prepared so we should reset and enable counting.
220210
for _, fd := range leaderFileDescriptors {
221-
err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0)
211+
err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0)
222212
if err != nil {
223213
return err
224214
}
225-
err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0)
215+
err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0)
226216
if err != nil {
227217
return err
228218
}
@@ -232,6 +222,35 @@ func (c *collector) setup() error {
232222
return nil
233223
}
234224

225+
func (c *collector) createLeaderFileDescriptors(events []Event, cgroupFd int, groupIndex int, leaderFileDescriptors map[int]int) (map[int]int, error) {
226+
for j, event := range events {
227+
// First element is group leader.
228+
isGroupLeader := j == 0
229+
customEvent, ok := c.eventToCustomEvent[event]
230+
var err error
231+
if ok {
232+
config := c.createConfigFromRawEvent(customEvent)
233+
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors)
234+
if err != nil {
235+
return nil, fmt.Errorf("cannot register perf event: %v", err)
236+
}
237+
} else {
238+
config, err := c.createConfigFromEvent(event)
239+
if err != nil {
240+
return nil, fmt.Errorf("cannot create config from perf event: %v", err)
241+
242+
}
243+
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors)
244+
if err != nil {
245+
return nil, fmt.Errorf("cannot register perf event: %v", err)
246+
}
247+
// Clean memory allocated by C code.
248+
C.free(unsafe.Pointer(config))
249+
}
250+
}
251+
return leaderFileDescriptors, nil
252+
}
253+
235254
func readPerfEventAttr(name string, pfmGetOsEventEncoding func(string, unsafe.Pointer) error) (*unix.PerfEventAttr, error) {
236255
perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{})))
237256
// Fill memory with 0 values.
@@ -279,13 +298,13 @@ func (c *collector) registerEvent(event eventInfo, leaderFileDescriptors map[int
279298
setAttributes(event.config, event.isGroupLeader)
280299

281300
for _, cpu := range c.onlineCPUs {
282-
fd, err := unix.PerfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags)
301+
fd, err := c.perfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags)
283302
if err != nil {
284-
return nil, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err)
303+
return leaderFileDescriptors, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err)
285304
}
286305
perfFile := os.NewFile(uintptr(fd), event.name)
287306
if perfFile == nil {
288-
return nil, fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
307+
return leaderFileDescriptors, fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
289308
}
290309

291310
c.addEventFile(event.groupIndex, event.name, cpu, perfFile)
@@ -333,6 +352,19 @@ func (c *collector) addEventFile(index int, name string, cpu int, perfFile *os.F
333352
}
334353
}
335354

355+
func (c *collector) deleteGroup(index int) {
356+
for name, files := range c.cpuFiles[index].cpuFiles {
357+
for cpu, file := range files {
358+
klog.V(5).Infof("Closing perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
359+
err := file.Close()
360+
if err != nil {
361+
klog.Warningf("Unable to close perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
362+
}
363+
}
364+
}
365+
delete(c.cpuFiles, index)
366+
}
367+
336368
func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
337369
length := len(event.Config)
338370

@@ -369,17 +401,8 @@ func (c *collector) Destroy() {
369401
c.cpuFilesLock.Lock()
370402
defer c.cpuFilesLock.Unlock()
371403

372-
for _, group := range c.cpuFiles {
373-
for name, files := range group.cpuFiles {
374-
for cpu, file := range files {
375-
klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
376-
err := file.Close()
377-
if err != nil {
378-
klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
379-
}
380-
}
381-
delete(group.cpuFiles, name)
382-
}
404+
for i := range c.cpuFiles {
405+
c.deleteGroup(i)
383406
}
384407
}
385408

perf/collector_libpfm_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ package perf
2020
import (
2121
"bytes"
2222
"encoding/binary"
23+
"io/ioutil"
24+
"os"
2325
"testing"
2426
"unsafe"
2527

@@ -200,6 +202,34 @@ func TestNewCollector(t *testing.T) {
200202
assert.Same(t, &perfCollector.events.Core.CustomEvents[0], perfCollector.eventToCustomEvent[Event("event_2")])
201203
}
202204

205+
func TestCollectorSetup(t *testing.T) {
206+
path, err := ioutil.TempDir("", "cgroup")
207+
assert.Nil(t, err)
208+
defer func() {
209+
err := os.RemoveAll(path)
210+
assert.Nil(t, err)
211+
}()
212+
events := PerfEvents{
213+
Core: Events{
214+
Events: []Group{
215+
{[]Event{"cache-misses"}, false},
216+
{[]Event{"non-existing-event"}, false},
217+
},
218+
},
219+
}
220+
c := newCollector(path, events, []int{0}, map[int]int{0: 0})
221+
c.perfEventOpen = func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
222+
return int(attr.Config), nil
223+
}
224+
c.ioctlSetInt = func(fd int, req uint, value int) error {
225+
return nil
226+
}
227+
err = c.setup()
228+
assert.Nil(t, err)
229+
assert.Equal(t, 1, len(c.cpuFiles))
230+
assert.Equal(t, []string{"cache-misses"}, c.cpuFiles[0].names)
231+
}
232+
203233
var readGroupPerfStatCases = []struct {
204234
test string
205235
file GroupReadFormat

perf/uncore_libpfm.go

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,28 @@ func NewUncoreCollector(cgroupPath string, events PerfEvents, cpuToSocket map[in
158158
return collector
159159
}
160160

161+
func (c *uncoreCollector) createLeaderFileDescriptors(events []Event, groupIndex int, groupPMUs map[Event]uncorePMUs,
162+
leaderFileDescriptors map[string]map[uint32]int) (map[string]map[uint32]int, error) {
163+
var err error
164+
for _, event := range events {
165+
eventName, _ := parseEventName(string(event))
166+
customEvent, ok := c.eventToCustomEvent[event]
167+
if ok {
168+
err = c.setupRawEvent(customEvent, groupPMUs[event], groupIndex, leaderFileDescriptors)
169+
} else {
170+
err = c.setupEvent(eventName, groupPMUs[event], groupIndex, leaderFileDescriptors)
171+
}
172+
if err != nil {
173+
break
174+
}
175+
}
176+
if err != nil {
177+
c.deleteGroup(groupIndex)
178+
return nil, fmt.Errorf("cannot create config from perf event: %v", err)
179+
}
180+
return leaderFileDescriptors, nil
181+
}
182+
161183
func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error {
162184
readUncorePMUs, err := getUncorePMUs(devicesPath)
163185
if err != nil {
@@ -190,21 +212,11 @@ func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error {
190212
leaderFileDescriptors[pmu.name][cpu] = groupLeaderFileDescriptor
191213
}
192214
}
193-
194-
for _, event := range group.events {
195-
eventName, _ := parseEventName(string(event))
196-
customEvent, ok := c.eventToCustomEvent[event]
197-
if ok {
198-
err = c.setupRawEvent(customEvent, groupPMUs[event], i, leaderFileDescriptors)
199-
} else {
200-
err = c.setupEvent(eventName, groupPMUs[event], i, leaderFileDescriptors)
201-
}
202-
203-
if err != nil {
204-
return err
205-
}
215+
leaderFileDescriptors, err = c.createLeaderFileDescriptors(group.events, i, groupPMUs, leaderFileDescriptors)
216+
if err != nil {
217+
klog.Error(err)
218+
continue
206219
}
207-
208220
// Group is prepared so we should reset and enable counting.
209221
for _, pmuCPUs := range leaderFileDescriptors {
210222
for _, fd := range pmuCPUs {
@@ -320,20 +332,8 @@ func (c *uncoreCollector) Destroy() {
320332
c.cpuFilesLock.Lock()
321333
defer c.cpuFilesLock.Unlock()
322334

323-
for groupIndex, groupPMUs := range c.cpuFiles {
324-
for pmu, group := range groupPMUs {
325-
for name, cpus := range group.cpuFiles {
326-
for cpu, file := range cpus {
327-
klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
328-
err := file.Close()
329-
if err != nil {
330-
klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
331-
}
332-
}
333-
delete(group.cpuFiles, name)
334-
}
335-
delete(groupPMUs, pmu)
336-
}
335+
for groupIndex := range c.cpuFiles {
336+
c.deleteGroup(groupIndex)
337337
delete(c.cpuFiles, groupIndex)
338338
}
339339
}
@@ -475,6 +475,24 @@ func (c *uncoreCollector) setupRawEvent(event *CustomEvent, pmus uncorePMUs, gro
475475
return nil
476476
}
477477

478+
func (c *uncoreCollector) deleteGroup(groupIndex int) {
479+
groupPMUs := c.cpuFiles[groupIndex]
480+
for pmu, group := range groupPMUs {
481+
for name, cpus := range group.cpuFiles {
482+
for cpu, file := range cpus {
483+
klog.V(5).Infof("Closing uncore perf event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
484+
err := file.Close()
485+
if err != nil {
486+
klog.Warningf("Unable to close perf event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
487+
}
488+
}
489+
delete(group.cpuFiles, name)
490+
}
491+
delete(groupPMUs, pmu)
492+
}
493+
delete(c.cpuFiles, groupIndex)
494+
}
495+
478496
func readPerfUncoreStat(file readerCloser, group group, cpu int, pmu string, cpuToSocket map[int]int) ([]info.PerfUncoreStat, error) {
479497
values, err := getPerfValues(file, group)
480498
if err != nil {

perf/uncore_libpfm_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ func TestUncoreCollectorSetup(t *testing.T) {
114114
Uncore: Events{
115115
Events: []Group{
116116
{[]Event{"uncore_imc_1/cas_count_read"}, false},
117+
{[]Event{"uncore_imc_1/non_existing_event"}, false},
117118
{[]Event{"uncore_imc_0/cas_count_write", "uncore_imc_0/cas_count_read"}, true},
118119
},
119120
CustomEvents: []CustomEvent{
@@ -133,6 +134,11 @@ func TestUncoreCollectorSetup(t *testing.T) {
133134
}
134135

135136
err = collector.setup(events, path)
137+
assert.Equal(t, []string{"uncore_imc_1/cas_count_read"},
138+
getMapKeys(collector.cpuFiles[0]["uncore_imc_1"].cpuFiles))
139+
assert.ElementsMatch(t, []string{"uncore_imc_0/cas_count_write", "uncore_imc_0/cas_count_read"},
140+
getMapKeys(collector.cpuFiles[2]["uncore_imc_0"].cpuFiles))
141+
136142
// There are no errors.
137143
assert.Nil(t, err)
138144
}
@@ -295,3 +301,11 @@ func TestReadPerfUncoreStat(t *testing.T) {
295301
assert.NoError(t, err)
296302
assert.Equal(t, expectedStat, stat)
297303
}
304+
305+
func getMapKeys(someMap map[string]map[int]readerCloser) []string {
306+
var keys []string
307+
for key := range someMap {
308+
keys = append(keys, key)
309+
}
310+
return keys
311+
}

0 commit comments

Comments
 (0)