Skip to content

Commit 4941b6f

Browse files
committed
confd: add support for container resource limits
Add resource-limit and resource-usage containers to YANG model. Podman, and later conmon, enforce CPU and memory limits in a delegated cgroupsv2 hierarchy managed by Finit. Resource usage is queried from 'podman inspect', which has more nodes than what is currently possible to limit. Requires Finit 4.15, or later. Signed-off-by: Joachim Wiberg <[email protected]>
1 parent 9d915f4 commit 4941b6f

File tree

7 files changed

+306
-3
lines changed

7 files changed

+306
-3
lines changed

board/common/rootfs/etc/finit.d/available/[email protected]

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@
55
# 'podman load', must not have a timeout.
66
sysv log:prio:local1,tag:%i kill:30 pid:!/run/container:%i.pid \
77
pre:0,/usr/sbin/container cleanup:0,/usr/sbin/container \
8+
cgroup.system,delegate \
89
[2345] <!> :%i container -n %i -- container %i

board/common/rootfs/usr/sbin/container

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,10 +411,20 @@ create()
411411
logging="--log-driver syslog"
412412
fi
413413

414+
# Build resource limit arguments
415+
resource=""
416+
if [ -n "$memory" ]; then
417+
resource="$resource --memory=$memory"
418+
fi
419+
if [ -n "$cpu_limit" ]; then
420+
resource="$resource --cpu-quota=$cpu_limit"
421+
fi
422+
414423
# When we get here we've already fetched, or pulled, the image
415-
args="$args --read-only --replace --quiet --cgroup-parent=containers $caps"
424+
args="$args --read-only --replace --quiet $caps"
425+
args="$args --cgroups=enabled --cgroupns=host --cgroup-parent=system/container@$name"
416426
args="$args --restart=$restart --systemd=false --tz=local $privileged"
417-
args="$args $vol $mount $hostname $entrypoint $env $port $logging"
427+
args="$args $vol $mount $hostname $entrypoint $env $port $logging $resource"
418428
pidfile=/run/container:${name}.pid
419429

420430
[ -n "$quiet" ] || log "---------------------------------------"
@@ -716,6 +726,8 @@ options:
716726
--log-path PATH Path for k8s-file log pipe
717727
-m, --mount HOST:DEST Bind mount a read-only file inside a container
718728
--manual Do not start container automatically after creation
729+
--memory BYTES Memory limit in bytes (supports K/M/G suffix)
730+
--cpu-limit LIMIT CPU limit in millicores (1000m = 100% of 1 core)
719731
-n, --name NAME Alternative way of supplying name to start/stop/restart
720732
--privileged Give container extended privileges
721733
-p, --publish PORT Publish ports when creating container
@@ -836,6 +848,14 @@ while [ "$1" != "" ]; do
836848
--manual)
837849
manual=true
838850
;;
851+
--memory)
852+
shift
853+
memory="$1"
854+
;;
855+
--cpu-limit)
856+
shift
857+
cpu_limit="$1"
858+
;;
839859
-n | --name)
840860
shift
841861
name="$1"

src/confd/src/containers.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,25 @@ static int add(const char *name, struct lyd_node *cif)
294294
fprintf(fp, " --checksum sha512:%s", string);
295295
}
296296

297+
/* Add resource limits for Podman to enforce via cgroups */
298+
node = lydx_get_descendant(lyd_child(cif), "resource-limit", NULL);
299+
if (node) {
300+
struct lyd_node *mem_node, *cpu_node;
301+
302+
/* Memory limit in KiB, Podman accepts with 'k' suffix */
303+
mem_node = lydx_get_descendant(lyd_child(node), "memory", NULL);
304+
if (mem_node)
305+
fprintf(fp, " --memory %sk", lyd_get_value(mem_node));
306+
307+
/* CPU limit in millicores, convert to quota (microseconds per 100ms) */
308+
cpu_node = lydx_get_descendant(lyd_child(node), "cpu", NULL);
309+
if (cpu_node) {
310+
uint32_t millicores = strtoul(lyd_get_value(cpu_node), NULL, 10);
311+
uint32_t quota = millicores * 100; /* 1000m → 100000µs, 2000m → 200000µs */
312+
fprintf(fp, " --cpu-limit %u", quota);
313+
}
314+
}
315+
297316
fprintf(fp, " create %s %s", name, image);
298317

299318
if ((string = lydx_get_cattr(cif, "command")))

src/confd/yang/confd/infix-containers.yang

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@ module infix-containers {
2222
prefix infix-sys;
2323
}
2424

25+
revision 2025-12-09 {
26+
description "Add resource management:
27+
- Add resource-limit container with memory and cpu configuration.
28+
- Add resource-usage operational data for live resource usage statistics.";
29+
reference "internal";
30+
}
31+
2532
revision 2025-10-12 {
2633
description "Two major changes:
2734
- Add dedicated 'ident' type for container and volume names.
@@ -341,6 +348,86 @@ module infix-containers {
341348
}
342349
}
343350

351+
container resource-limit {
352+
description "Resource limits for the container.";
353+
354+
leaf memory {
355+
description "Maximum memory limit in kibibytes, default: unlimited.";
356+
type uint64;
357+
units "KiB";
358+
}
359+
360+
leaf cpu {
361+
description "CPU limit in millicores, default: unlimited.
362+
363+
Millicores represent thousandths of a CPU core:
364+
500 = 0.5 cores (50% of one core)
365+
1000 = 1.0 cores (one full core)
366+
2000 = 2.0 cores (two full cores)
367+
3500 = 3.5 cores
368+
369+
This is converted to cgroup cpu.quota internally.";
370+
type uint32;
371+
units "millicores";
372+
}
373+
}
374+
375+
container resource-usage {
376+
description "Runtime container resource usage statistics.";
377+
config false;
378+
379+
leaf memory {
380+
description "Used memory in kibibytes.";
381+
type uint64;
382+
units "KiB";
383+
}
384+
385+
leaf cpu {
386+
description "CPU usage percentage.";
387+
type decimal64 {
388+
fraction-digits 2;
389+
}
390+
units "percent";
391+
}
392+
393+
container block-io {
394+
description "Block I/O statistics";
395+
396+
leaf read {
397+
description "Block I/O read in kibibytes.";
398+
type uint64;
399+
units "KiB";
400+
}
401+
402+
leaf write {
403+
description "Block I/O write in kibibytes.";
404+
type uint64;
405+
units "KiB";
406+
}
407+
}
408+
409+
container net-io {
410+
description "Network I/O statistics";
411+
412+
leaf received {
413+
description "Network I/O received in kibibytes.";
414+
type uint64;
415+
units "KiB";
416+
}
417+
418+
leaf sent {
419+
description "Network I/O sent in kibibytes.";
420+
type uint64;
421+
units "KiB";
422+
}
423+
}
424+
425+
leaf pids {
426+
description "Number of processes/threads.";
427+
type uint32;
428+
}
429+
}
430+
344431
list mount {
345432
description "Files, content, and directories to mount inside container.";
346433
key name;

src/confd/yang/containers.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -*- sh -*-
22
MODULES=(
33
"infix-interfaces -e containers"
4-
"infix-containers@2025-10-12.yang"
4+
"infix-containers@2025-12-09.yang"
55
)

src/statd/python/yanger/infix_containers.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
"""Operational data provider for infix-containers YANG model.
2+
3+
Collects container status, network info, resource limits from cgroups,
4+
and runtime statistics via podman commands.
5+
"""
6+
import os
7+
import re
8+
19
from .common import LOG
210
from .host import HOST
311

@@ -46,6 +54,165 @@ def network(ps, inspect):
4654
return net
4755

4856

57+
def parse_size_kib(size_str):
58+
"""Parse size string like '1.5MB' or '512kB' to KiB (kibibytes)."""
59+
if not size_str:
60+
return 0
61+
62+
size_str = size_str.strip().upper()
63+
64+
# Extract numeric part and unit
65+
match = re.match(r'([0-9.]+)\s*([KMGT]?I?B)?', size_str)
66+
if not match:
67+
return 0
68+
69+
value = float(match.group(1))
70+
unit = match.group(2) if match.group(2) else 'B'
71+
72+
# Convert to KiB (kibibytes)
73+
multipliers = {
74+
'B': 1/1024,
75+
'KB': 1000/1024, 'KIB': 1,
76+
'MB': (1000**2)/1024, 'MIB': 1024,
77+
'GB': (1000**3)/1024, 'GIB': 1024**2,
78+
'TB': (1000**4)/1024, 'TIB': 1024**3,
79+
}
80+
81+
return int(value * multipliers.get(unit, 1))
82+
83+
84+
def parse_cgroup_memory(mem_str):
85+
"""Parse cgroup memory.max value (bytes) to KiB."""
86+
if not mem_str or mem_str == "max":
87+
return 0
88+
try:
89+
mem_bytes = int(mem_str)
90+
return mem_bytes // 1024
91+
except ValueError:
92+
return 0
93+
94+
95+
def parse_cgroup_cpu(cpu_str):
96+
"""Parse cgroup cpu.max value to millicores."""
97+
if not cpu_str:
98+
return 0
99+
parts = cpu_str.split()
100+
if len(parts) != 2 or parts[0] == "max":
101+
return 0
102+
try:
103+
quota = int(parts[0])
104+
period = int(parts[1])
105+
# Convert to millicores: (quota/period) * 1000
106+
return (quota * 1000) // period
107+
except ValueError:
108+
return 0
109+
110+
111+
def read_cgroup_limits(inspect):
112+
"""Read resource limits from cgroup files for a container."""
113+
if not inspect or not isinstance(inspect, dict):
114+
return None
115+
116+
cgroup_path = inspect.get("State", {}).get("CgroupPath")
117+
if not cgroup_path:
118+
return None
119+
120+
cgroup_base = f"/sys/fs/cgroup{cgroup_path}"
121+
mem_val = 0
122+
cpu_val = 0
123+
124+
try:
125+
# Read memory limit (in bytes, convert to KiB)
126+
mem_max_path = os.path.join(cgroup_base, "memory.max")
127+
if os.path.exists(mem_max_path):
128+
with open(mem_max_path, 'r') as f:
129+
mem_str = f.read().strip()
130+
mem_val = parse_cgroup_memory(mem_str)
131+
132+
# Read CPU limit (quota and period in microseconds, convert to millicores)
133+
cpu_max_path = os.path.join(cgroup_base, "cpu.max")
134+
if os.path.exists(cpu_max_path):
135+
with open(cpu_max_path, 'r') as f:
136+
cpu_str = f.read().strip()
137+
cpu_val = parse_cgroup_cpu(cpu_str)
138+
except Exception as e:
139+
LOG.error(f"failed reading cgroup limits: {e}")
140+
return None
141+
142+
if mem_val > 0 or cpu_val > 0:
143+
result = {}
144+
if mem_val > 0:
145+
result["memory"] = f"{mem_val}"
146+
if cpu_val > 0:
147+
result["cpu"] = cpu_val
148+
return result
149+
150+
return None
151+
152+
153+
def resource_stats(name):
154+
"""Get resource usage stats for a running container using podman stats."""
155+
cmd = ['podman', 'stats', '--no-stream', '--format', 'json', '--no-reset', name]
156+
try:
157+
stats = HOST.run_json(cmd, default=[])
158+
if not stats or len(stats) == 0:
159+
return None
160+
161+
stat = stats[0]
162+
rusage = {}
163+
164+
# Memory usage - parse used memory, convert to KiB
165+
# Encode as string for uint64 compatibility
166+
mem_usage_str = stat.get("mem_usage", "")
167+
if "/" in mem_usage_str:
168+
mem_used_str = mem_usage_str.split("/")[0].strip()
169+
mem_used_kib = parse_size_kib(mem_used_str)
170+
rusage["memory"] = f"{mem_used_kib}"
171+
172+
# CPU percentage - format as decimal64 with 2 fractional digits
173+
cpu_perc = stat.get("cpu_percent", "0%").rstrip("%")
174+
try:
175+
rusage["cpu"] = "{:.2f}".format(float(cpu_perc))
176+
except (ValueError, TypeError):
177+
pass
178+
179+
block_io = stat.get("block_io", "0B / 0B")
180+
if "/" in block_io:
181+
block_read_str, block_write_str = block_io.split("/")
182+
block_read_kib = parse_size_kib(block_read_str.strip())
183+
block_write_kib = parse_size_kib(block_write_str.strip())
184+
185+
rusage["block-io"] = {}
186+
if block_read_kib > 0:
187+
rusage["block-io"]["read"] = f"{block_read_kib}"
188+
if block_write_kib > 0:
189+
rusage["block-io"]["write"] = f"{block_write_kib}"
190+
191+
net_io = stat.get("net_io", "0B / 0B")
192+
if "/" in net_io:
193+
net_rx_str, net_tx_str = net_io.split("/")
194+
net_rx_kib = parse_size_kib(net_rx_str.strip())
195+
net_tx_kib = parse_size_kib(net_tx_str.strip())
196+
197+
rusage["net-io"] = {}
198+
if net_rx_kib > 0:
199+
rusage["net-io"]["received"] = f"{net_rx_kib}"
200+
if net_tx_kib > 0:
201+
rusage["net-io"]["sent"] = f"{net_tx_kib}"
202+
203+
pids = stat.get("pids", "0")
204+
try:
205+
rusage["pids"] = int(pids)
206+
except (ValueError, TypeError):
207+
pass
208+
209+
return rusage if rusage else None
210+
211+
except Exception as e:
212+
LOG.error(f"failed getting stats for {name}: {e}")
213+
return None
214+
215+
49216
def container(ps):
50217
out = {
51218
"name": ps["Names"][0],
@@ -70,6 +237,15 @@ def container(ps):
70237
if net:
71238
out["network"] = net
72239

240+
limits = read_cgroup_limits(inspect)
241+
if limits:
242+
out["resource-limit"] = limits
243+
244+
if out["running"]:
245+
rusage = resource_stats(out["name"])
246+
if rusage:
247+
out["resource-usage"] = rusage
248+
73249
return out
74250

75251

0 commit comments

Comments
 (0)