2828# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2929# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3030
31+ import logging
3132import os
3233import re
3334import resource
3839SYS_STATS = "/proc/stat"
3940MEM_STATS = "/proc/meminfo"
4041PROC_STATS = "/proc/self/stat"
42+ CGROUP1_MEMORY_LIMIT = "memory.limit_in_bytes"
43+ CGROUP1_MEMORY_USAGE = "memory.usage_in_bytes"
44+ CGROUP1_MEMORY_STAT = "memory.stat"
45+ CGROUP2_MEMORY_LIMIT = "memory.max"
46+ CGROUP2_MEMORY_USAGE = "memory.current"
47+ CGROUP2_MEMORY_STAT = "memory.stat"
48+ UNLIMITED = 0x7FFFFFFFFFFFF000
49+ PROC_SELF_CGROUP = "/proc/self/cgroup"
50+ PROC_SELF_MOUNTINFO = "/proc/self/mountinfo"
51+ SYS_FS_CGROUP = "/sys/fs/cgroup"
4152
4253CPU_FIELDS = ("user" , "nice" , "system" , "idle" , "iowait" , "irq" , "softirq" , "steal" , "guest" , "guest_nice" )
4354MEM_FIELDS = ("MemTotal" , "MemAvailable" , "MemFree" , "Buffers" , "Cached" )
4455
4556whitespace_re = re .compile (r"\s+" )
4657
58+ MEMORY_CGROUP = re .compile (r"^\d+:memory:.*" )
59+ CGROUP_V1_MOUNT_POINT = re .compile (r"^\d+? \d+? .+? .+? (.*?) .*cgroup.*memory.*" )
60+ CGROUP_V2_MOUNT_POINT = re .compile (r"^\d+? \d+? .+? .+? (.*?) .*cgroup2.*cgroup.*" )
4761
4862if not os .path .exists (SYS_STATS ):
4963 raise ImportError ("This metric set is only available on Linux" )
5064
65+ logger = logging .getLogger ("elasticapm.metrics.cpu_linux" )
66+
5167
5268class CPUMetricSet (MetricsSet ):
53- def __init__ (self , registry , sys_stats_file = SYS_STATS , process_stats_file = PROC_STATS , memory_stats_file = MEM_STATS ):
69+ def __init__ (
70+ self ,
71+ registry ,
72+ sys_stats_file = SYS_STATS ,
73+ process_stats_file = PROC_STATS ,
74+ memory_stats_file = MEM_STATS ,
75+ proc_self_cgroup = PROC_SELF_CGROUP ,
76+ mount_info = PROC_SELF_MOUNTINFO ,
77+ ):
5478 self .page_size = resource .getpagesize ()
5579 self .previous = {}
5680 self ._read_data_lock = threading .Lock ()
@@ -59,10 +83,91 @@ def __init__(self, registry, sys_stats_file=SYS_STATS, process_stats_file=PROC_S
5983 self .memory_stats_file = memory_stats_file
6084 self ._sys_clock_ticks = os .sysconf ("SC_CLK_TCK" )
6185 with self ._read_data_lock :
86+ try :
87+ self .cgroup_files = self .get_cgroup_file_paths (proc_self_cgroup , mount_info )
88+ except Exception :
89+ logger .debug ("Reading/Parsing of cgroup memory files failed, skipping cgroup metrics" , exc_info = True )
6290 self .previous .update (self .read_process_stats ())
6391 self .previous .update (self .read_system_stats ())
6492 super (CPUMetricSet , self ).__init__ (registry )
6593
94+ def get_cgroup_file_paths (self , proc_self_cgroup , mount_info ):
95+ """
96+ Try and find the paths for CGROUP memory limit files, first trying to find the root path
97+ in /proc/self/mountinfo, then falling back to the default location /sys/fs/cgroup
98+ :param proc_self_cgroup: path to "self" cgroup file, usually /proc/self/cgroup
99+ :param mount_info: path to "mountinfo" file, usually proc/self/mountinfo
100+ :return: a 3-tuple of memory info files, or None
101+ """
102+ line_cgroup = None
103+ try :
104+ with open (proc_self_cgroup , "r" ) as proc_self_cgroup_file :
105+ for line in proc_self_cgroup_file :
106+ if line_cgroup is None and line .startswith ("0:" ):
107+ line_cgroup = line
108+ if MEMORY_CGROUP .match (line ):
109+ line_cgroup = line
110+ break
111+ except IOError :
112+ logger .debug ("Cannot read %s, skipping cgroup metrics" , proc_self_cgroup , exc_info = True )
113+ return
114+ if line_cgroup is None :
115+ return
116+ try :
117+ with open (mount_info , "r" ) as mount_info_file :
118+ for line in mount_info_file :
119+ # cgroup v2
120+ matcher = CGROUP_V2_MOUNT_POINT .match (line )
121+ if matcher is not None :
122+ files = self ._get_cgroup_v2_file_paths (line_cgroup , matcher .group (1 ))
123+ if files :
124+ return files
125+ # cgroup v1
126+ matcher = CGROUP_V1_MOUNT_POINT .match (line )
127+ if matcher is not None :
128+ files = self ._get_cgroup_v1_file_paths (matcher .group (1 ))
129+ if files :
130+ return files
131+ except IOError :
132+ logger .debug ("Cannot read %s, skipping cgroup metrics" , mount_info , exc_info = True )
133+ return
134+ # discovery of cgroup path failed, try with default path
135+ files = self ._get_cgroup_v2_file_paths (line_cgroup , SYS_FS_CGROUP )
136+ if files :
137+ return files
138+ files = self ._get_cgroup_v1_file_paths (os .path .join (SYS_FS_CGROUP , "memory" ))
139+ if files :
140+ return files
141+ logger .debug ("Location of cgroup files failed, skipping cgroup metrics" )
142+
143+ def _get_cgroup_v2_file_paths (self , line_cgroup , mount_discovered ):
144+ line_split = line_cgroup .strip ().split (":" )
145+ slice_path = line_split [- 1 ][1 :]
146+ try :
147+ with open (os .path .join (mount_discovered , slice_path , CGROUP2_MEMORY_LIMIT ), "r" ) as memfile :
148+ line_mem = memfile .readline ().strip ()
149+ if line_mem != "max" :
150+ return (
151+ os .path .join (mount_discovered , slice_path , CGROUP2_MEMORY_LIMIT ),
152+ os .path .join (mount_discovered , slice_path , CGROUP2_MEMORY_USAGE ),
153+ os .path .join (mount_discovered , slice_path , CGROUP2_MEMORY_STAT ),
154+ )
155+ except IOError :
156+ pass
157+
158+ def _get_cgroup_v1_file_paths (self , mount_discovered ):
159+ try :
160+ with open (os .path .join (mount_discovered , CGROUP1_MEMORY_LIMIT ), "r" ) as memfile :
161+ mem_max = int (memfile .readline ().strip ())
162+ if mem_max < UNLIMITED :
163+ return (
164+ os .path .join (mount_discovered , CGROUP1_MEMORY_LIMIT ),
165+ os .path .join (mount_discovered , CGROUP1_MEMORY_USAGE ),
166+ os .path .join (mount_discovered , CGROUP1_MEMORY_STAT ),
167+ )
168+ except IOError :
169+ pass
170+
66171 def before_collect (self ):
67172 new = self .read_process_stats ()
68173 new .update (self .read_system_stats ())
@@ -83,6 +188,13 @@ def before_collect(self):
83188 self .gauge ("system.memory.actual.free" ).val = mem_free
84189 self .gauge ("system.memory.total" ).val = new ["MemTotal" ]
85190
191+ if "cgroup_mem_total" in new :
192+ self .gauge ("system.process.cgroup.memory.mem.limit.bytes" ).val = new ["cgroup_mem_total" ]
193+ if "cgroup_mem_used" in new :
194+ self .gauge ("system.process.cgroup.memory.mem.usage.bytes" ).val = new ["cgroup_mem_used" ]
195+ if "cgroup_mem_inactive" in new :
196+ self .gauge ("system.process.cgroup.memory.stats.inactive_file.bytes" ).val = new ["cgroup_mem_inactive" ]
197+
86198 try :
87199 cpu_process_percent = delta ["proc_total_time" ] / delta ["cpu_total" ]
88200 except ZeroDivisionError :
@@ -115,6 +227,20 @@ def read_system_stats(self):
115227 )
116228 stats ["cpu_usage" ] = stats ["cpu_total" ] - (f ["idle" ] + f ["iowait" ])
117229 break
230+ if self .cgroup_files :
231+ with open (self .cgroup_files [0 ], "r" ) as memfile :
232+ stats ["cgroup_mem_total" ] = int (memfile .readline ())
233+ with open (self .cgroup_files [1 ], "r" ) as memfile :
234+ usage = int (memfile .readline ())
235+ stats ["cgroup_mem_used" ] = usage
236+ with open (self .cgroup_files [2 ], "r" ) as memfile :
237+ sum = 0
238+ for line in memfile :
239+ (metric_name , value ) = line .split (" " )
240+ if metric_name == "inactive_file" :
241+ sum = sum + int (value )
242+ stats ["cgroup_mem_used" ] = stats ["cgroup_mem_used" ] - sum
243+ stats ["cgroup_mem_inactive" ] = sum
118244 with open (self .memory_stats_file , "r" ) as memfile :
119245 for line in memfile :
120246 metric_name = line .split (":" )[0 ]
0 commit comments