Skip to content

Commit 928e8ae

Browse files
rds.py: Added RDMA restrack resource usage in rds.py script
Enhance rds.py to report RDMA restrack resource usage (PD, CQ, QP, CM_ID, MR, CTX, SRQ) per RDS interface. This is based on the xa-for-each traversal of res[] arrays in struct ib_device. Useful for visibility into actual RDMA object usage and correlating with rdma resource show script Orabug: 38221449 Signed-off-by: Palak Tripathi <[email protected]> Reviewed-by: Anand Khoje <anand.a.khoje.oracle.com> Reviewed-by: Stephen Brennan <[email protected]>
1 parent e3080da commit 928e8ae

File tree

1 file changed

+49
-3
lines changed

1 file changed

+49
-3
lines changed

drgn_tools/rds.py

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from drgn.helpers.linux.list import list_for_each
3232
from drgn.helpers.linux.list import list_for_each_entry
3333
from drgn.helpers.linux.pid import find_task
34-
34+
from drgn.helpers.linux import xa_for_each
3535
from drgn_tools.corelens import CorelensModule
3636
from drgn_tools.module import ensure_debuginfo
3737
from drgn_tools.table import print_table
@@ -85,7 +85,6 @@
8585

8686
# Helpers #
8787

88-
8988
def be64_to_host(prog: drgn.Program, value: int) -> int:
9089
"""
9190
Convert 64 byte value from big endian to host order
@@ -570,6 +569,53 @@ def rds_dev_info(
570569
else:
571570
return None
572571

572+
def rdma_resource_usage(prog: Program, outfile: Optional[str] = None, report: bool = False) -> None:
573+
"""
574+
Print RDMA restrack resource usage counts for ALL mlx5_* devices, similar to 'rdma res show'
575+
576+
:param prog: drgn.Program
577+
:param outfile: A file to write the output to.
578+
:param report: Whether to open file in append mode for report.
579+
"""
580+
dev_kset = prog["devices_kset"]
581+
data = [["Index", "Device", "PD", "CQ", "QP", "CM_ID", "MR", "CTX", "SRQ"]]
582+
index = 0
583+
res_types_enum = prog.type("enum rdma_restrack_type")
584+
for dev in list_for_each_entry("struct device", dev_kset.list.address_of_(), "kobj.entry"):
585+
try:
586+
name = dev.kobj.name.string_().decode()
587+
ib_dev = container_of(dev, "struct ib_device", "dev")
588+
dev_name = ib_dev.name.string_().decode()
589+
if not dev_name.startswith("mlx"):
590+
continue
591+
counts = {}
592+
for name, i in res_types_enum.enumerators:
593+
res_name = name[len("RDMA_RESTRACK_"):].lower()
594+
try:
595+
xa = ib_dev.res[i].xa
596+
counts[res_name] = sum(1 for _ in xa_for_each(xa.address_of_()))
597+
except Exception:
598+
counts[res_name] = -1
599+
600+
def fmt(val):
601+
return "NA" if val == -1 else str(val)
602+
603+
data.append([
604+
index,
605+
dev_name,
606+
fmt(counts["pd"]),
607+
fmt(counts["cq"]),
608+
fmt(counts["qp"]),
609+
fmt(counts["cm_id"]),
610+
fmt(counts["mr"]),
611+
fmt(counts["ctx"]),
612+
fmt(counts["srq"]),
613+
])
614+
index += 1
615+
except Exception:
616+
continue
617+
618+
print_table(data, outfile, report)
573619

574620
def rds_stats(
575621
prog: drgn.Program,
@@ -1402,7 +1448,6 @@ def rds_print_msg_queue(
14021448
prog, laddr, raddr, tos, lport, rport, ret, outfile, report
14031449
)
14041450

1405-
14061451
def print_mr_list_head_info(
14071452
prog: drgn.Program, list_head: Object, pool_name: str, list_name: str
14081453
) -> None:
@@ -1529,6 +1574,7 @@ def report(prog: drgn.Program, outfile: Optional[str] = None) -> None:
15291574
return None
15301575

15311576
rds_dev_info(prog, outfile=outfile, report=False)
1577+
rdma_resource_usage(prog, outfile=outfile, report=False)
15321578
rds_sock_info(prog, outfile=outfile, report=True)
15331579
rds_conn_info(prog, outfile=outfile, report=True)
15341580
rds_info_verbose(prog, outfile=outfile, report=True)

0 commit comments

Comments
 (0)