Skip to content

Commit 82e4c62

Browse files
tanquetavbeniwohlibasepi
authored
Add cgroup support (#831) (#846)
* Add cgroup support (#831) * Fix search to cgroup directory, looking for where it is mounted * Fix None * Change metrics names * Round fix * adapted code style to pep8, added some comments and a couple more tests * use tighter exception handling, and removed a couple of unnecessary `return None` Co-authored-by: Benjamin Wohlwend <[email protected]> Co-authored-by: Colton Myers <[email protected]>
1 parent f09eeaa commit 82e4c62

File tree

2 files changed

+347
-1
lines changed

2 files changed

+347
-1
lines changed

elasticapm/metrics/sets/cpu_linux.py

Lines changed: 127 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2929
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3030

31+
import logging
3132
import os
3233
import re
3334
import resource
@@ -38,19 +39,42 @@
3839
SYS_STATS = "/proc/stat"
3940
MEM_STATS = "/proc/meminfo"
4041
PROC_STATS = "/proc/self/stat"
42+
CGROUP1_MEMORY_LIMIT = "memory.limit_in_bytes"
43+
CGROUP1_MEMORY_USAGE = "memory.usage_in_bytes"
44+
CGROUP1_MEMORY_STAT = "memory.stat"
45+
CGROUP2_MEMORY_LIMIT = "memory.max"
46+
CGROUP2_MEMORY_USAGE = "memory.current"
47+
CGROUP2_MEMORY_STAT = "memory.stat"
48+
UNLIMITED = 0x7FFFFFFFFFFFF000
49+
PROC_SELF_CGROUP = "/proc/self/cgroup"
50+
PROC_SELF_MOUNTINFO = "/proc/self/mountinfo"
51+
SYS_FS_CGROUP = "/sys/fs/cgroup"
4152

4253
CPU_FIELDS = ("user", "nice", "system", "idle", "iowait", "irq", "softirq", "steal", "guest", "guest_nice")
4354
MEM_FIELDS = ("MemTotal", "MemAvailable", "MemFree", "Buffers", "Cached")
4455

4556
whitespace_re = re.compile(r"\s+")
4657

58+
MEMORY_CGROUP = re.compile(r"^\d+:memory:.*")
59+
CGROUP_V1_MOUNT_POINT = re.compile(r"^\d+? \d+? .+? .+? (.*?) .*cgroup.*memory.*")
60+
CGROUP_V2_MOUNT_POINT = re.compile(r"^\d+? \d+? .+? .+? (.*?) .*cgroup2.*cgroup.*")
4761

4862
if not os.path.exists(SYS_STATS):
4963
raise ImportError("This metric set is only available on Linux")
5064

65+
logger = logging.getLogger("elasticapm.metrics.cpu_linux")
66+
5167

5268
class CPUMetricSet(MetricsSet):
53-
def __init__(self, registry, sys_stats_file=SYS_STATS, process_stats_file=PROC_STATS, memory_stats_file=MEM_STATS):
69+
def __init__(
70+
self,
71+
registry,
72+
sys_stats_file=SYS_STATS,
73+
process_stats_file=PROC_STATS,
74+
memory_stats_file=MEM_STATS,
75+
proc_self_cgroup=PROC_SELF_CGROUP,
76+
mount_info=PROC_SELF_MOUNTINFO,
77+
):
5478
self.page_size = resource.getpagesize()
5579
self.previous = {}
5680
self._read_data_lock = threading.Lock()
@@ -59,10 +83,91 @@ def __init__(self, registry, sys_stats_file=SYS_STATS, process_stats_file=PROC_S
5983
self.memory_stats_file = memory_stats_file
6084
self._sys_clock_ticks = os.sysconf("SC_CLK_TCK")
6185
with self._read_data_lock:
86+
try:
87+
self.cgroup_files = self.get_cgroup_file_paths(proc_self_cgroup, mount_info)
88+
except Exception:
89+
logger.debug("Reading/Parsing of cgroup memory files failed, skipping cgroup metrics", exc_info=True)
6290
self.previous.update(self.read_process_stats())
6391
self.previous.update(self.read_system_stats())
6492
super(CPUMetricSet, self).__init__(registry)
6593

94+
def get_cgroup_file_paths(self, proc_self_cgroup, mount_info):
95+
"""
96+
Try and find the paths for CGROUP memory limit files, first trying to find the root path
97+
in /proc/self/mountinfo, then falling back to the default location /sys/fs/cgroup
98+
:param proc_self_cgroup: path to "self" cgroup file, usually /proc/self/cgroup
99+
:param mount_info: path to "mountinfo" file, usually proc/self/mountinfo
100+
:return: a 3-tuple of memory info files, or None
101+
"""
102+
line_cgroup = None
103+
try:
104+
with open(proc_self_cgroup, "r") as proc_self_cgroup_file:
105+
for line in proc_self_cgroup_file:
106+
if line_cgroup is None and line.startswith("0:"):
107+
line_cgroup = line
108+
if MEMORY_CGROUP.match(line):
109+
line_cgroup = line
110+
break
111+
except IOError:
112+
logger.debug("Cannot read %s, skipping cgroup metrics", proc_self_cgroup, exc_info=True)
113+
return
114+
if line_cgroup is None:
115+
return
116+
try:
117+
with open(mount_info, "r") as mount_info_file:
118+
for line in mount_info_file:
119+
# cgroup v2
120+
matcher = CGROUP_V2_MOUNT_POINT.match(line)
121+
if matcher is not None:
122+
files = self._get_cgroup_v2_file_paths(line_cgroup, matcher.group(1))
123+
if files:
124+
return files
125+
# cgroup v1
126+
matcher = CGROUP_V1_MOUNT_POINT.match(line)
127+
if matcher is not None:
128+
files = self._get_cgroup_v1_file_paths(matcher.group(1))
129+
if files:
130+
return files
131+
except IOError:
132+
logger.debug("Cannot read %s, skipping cgroup metrics", mount_info, exc_info=True)
133+
return
134+
# discovery of cgroup path failed, try with default path
135+
files = self._get_cgroup_v2_file_paths(line_cgroup, SYS_FS_CGROUP)
136+
if files:
137+
return files
138+
files = self._get_cgroup_v1_file_paths(os.path.join(SYS_FS_CGROUP, "memory"))
139+
if files:
140+
return files
141+
logger.debug("Location of cgroup files failed, skipping cgroup metrics")
142+
143+
def _get_cgroup_v2_file_paths(self, line_cgroup, mount_discovered):
144+
line_split = line_cgroup.strip().split(":")
145+
slice_path = line_split[-1][1:]
146+
try:
147+
with open(os.path.join(mount_discovered, slice_path, CGROUP2_MEMORY_LIMIT), "r") as memfile:
148+
line_mem = memfile.readline().strip()
149+
if line_mem != "max":
150+
return (
151+
os.path.join(mount_discovered, slice_path, CGROUP2_MEMORY_LIMIT),
152+
os.path.join(mount_discovered, slice_path, CGROUP2_MEMORY_USAGE),
153+
os.path.join(mount_discovered, slice_path, CGROUP2_MEMORY_STAT),
154+
)
155+
except IOError:
156+
pass
157+
158+
def _get_cgroup_v1_file_paths(self, mount_discovered):
159+
try:
160+
with open(os.path.join(mount_discovered, CGROUP1_MEMORY_LIMIT), "r") as memfile:
161+
mem_max = int(memfile.readline().strip())
162+
if mem_max < UNLIMITED:
163+
return (
164+
os.path.join(mount_discovered, CGROUP1_MEMORY_LIMIT),
165+
os.path.join(mount_discovered, CGROUP1_MEMORY_USAGE),
166+
os.path.join(mount_discovered, CGROUP1_MEMORY_STAT),
167+
)
168+
except IOError:
169+
pass
170+
66171
def before_collect(self):
67172
new = self.read_process_stats()
68173
new.update(self.read_system_stats())
@@ -83,6 +188,13 @@ def before_collect(self):
83188
self.gauge("system.memory.actual.free").val = mem_free
84189
self.gauge("system.memory.total").val = new["MemTotal"]
85190

191+
if "cgroup_mem_total" in new:
192+
self.gauge("system.process.cgroup.memory.mem.limit.bytes").val = new["cgroup_mem_total"]
193+
if "cgroup_mem_used" in new:
194+
self.gauge("system.process.cgroup.memory.mem.usage.bytes").val = new["cgroup_mem_used"]
195+
if "cgroup_mem_inactive" in new:
196+
self.gauge("system.process.cgroup.memory.stats.inactive_file.bytes").val = new["cgroup_mem_inactive"]
197+
86198
try:
87199
cpu_process_percent = delta["proc_total_time"] / delta["cpu_total"]
88200
except ZeroDivisionError:
@@ -115,6 +227,20 @@ def read_system_stats(self):
115227
)
116228
stats["cpu_usage"] = stats["cpu_total"] - (f["idle"] + f["iowait"])
117229
break
230+
if self.cgroup_files:
231+
with open(self.cgroup_files[0], "r") as memfile:
232+
stats["cgroup_mem_total"] = int(memfile.readline())
233+
with open(self.cgroup_files[1], "r") as memfile:
234+
usage = int(memfile.readline())
235+
stats["cgroup_mem_used"] = usage
236+
with open(self.cgroup_files[2], "r") as memfile:
237+
sum = 0
238+
for line in memfile:
239+
(metric_name, value) = line.split(" ")
240+
if metric_name == "inactive_file":
241+
sum = sum + int(value)
242+
stats["cgroup_mem_used"] = stats["cgroup_mem_used"] - sum
243+
stats["cgroup_mem_inactive"] = sum
118244
with open(self.memory_stats_file, "r") as memfile:
119245
for line in memfile:
120246
metric_name = line.split(":")[0]

0 commit comments

Comments
 (0)