44Helper to view slabinfo data
55"""
66import argparse
7+ from typing import List
78from typing import NamedTuple
89from typing import Set
910from typing import Tuple
1011
1112from drgn import cast
13+ from drgn import FaultError
1214from drgn import Object
1315from drgn import Program
16+ from drgn import ProgramFlags
1417from drgn import Type
1518from drgn .helpers .linux .cpumask import for_each_present_cpu
1619from drgn .helpers .linux .list import list_for_each_entry
@@ -40,6 +43,8 @@ class SlabCacheInfo(NamedTuple):
4043 """Slab size"""
4144 name : str
4245 """Name of the slab cache"""
46+ freelist_corrupt_cpus : List [int ]
47+ """A list of CPUs for which the freelist was found to be corrupt"""
4348
4449
4550def _slab_type (prog : Program ) -> Type :
@@ -204,19 +209,41 @@ def slub_per_cpu_partial_free(cpu_partial: Object) -> int:
204209 return partial_free
205210
206211
207- def kmem_cache_slub_info (cache : Object ) -> Tuple [int , int ]:
212+ class _CpuSlubWrapper :
213+ def __init__ (self , obj ):
214+ self ._obj = obj
215+
216+ def __getattr__ (self , key ):
217+ if key == "cpu_slab" :
218+ raise AttributeError ("CpuSlubWrapper!" )
219+ return self ._obj .__getattribute__ (key )
220+
221+
222+ def kmem_cache_slub_info (cache : Object ) -> Tuple [int , int , List [int ]]:
208223 """
209224 For given kmem_cache object, parse through each cpu
210225 and get number of total slabs and free objects
211226
227+ If the CPU freelist was corrupt, then we do our best effort to count free
228+ objects, but we may undercount them. We set the corruption flag when this
229+ happens.
230+
212231 :param: ``struct kmem_cache`` drgn object
213- :returns: total slabs, free objects
232+ :returns: total slabs, free objects, corruption instances
214233 """
215234 prog = cache .prog_
216235 use_slab = _has_struct_slab (prog )
217236
218237 total_slabs = objects = free_objects = 0
219- slub_helper = _get_slab_cache_helper (cache )
238+
239+ # The "cpu_slab" variable is used by the slab helper to preload the percpu
240+ # freelists. Not only does this duplicate work we're about to do, but also
241+ # corrupt slab caches will crash this function before we can detect which
242+ # CPU is corrupt. Pretend we have no "cpu_slab" variable when getting the
243+ # helper. This depends on implementation details: we will improve the helper
244+ # upstream to avoid this for the future.
245+ slub_helper = _get_slab_cache_helper (_CpuSlubWrapper (cache ))
246+ corrupt = []
220247
221248 for cpuid in for_each_present_cpu (prog ):
222249 per_cpu_slab = per_cpu_ptr (cache .cpu_slab , cpuid )
@@ -237,15 +264,25 @@ def kmem_cache_slub_info(cache: Object) -> Tuple[int, int]:
237264 objects = 0
238265
239266 free_objects += objects - page_inuse
240- cpu_free_objects = slub_get_cpu_freelist_cnt (cpu_freelist , slub_helper )
241- free_objects += cpu_free_objects
267+
268+ # Easily the most common form of corruption in the slab allocator comes
269+ # from use after free, which overwrites the freelist pointer and causes
270+ # a fault error. Catch this and report it for later.
271+ try :
272+ cpu_free_objects = slub_get_cpu_freelist_cnt (
273+ cpu_freelist , slub_helper
274+ )
275+ except FaultError :
276+ corrupt .append (cpuid )
277+ else :
278+ free_objects += cpu_free_objects
242279
243280 partial_frees = slub_per_cpu_partial_free (cpu_partial )
244281 free_objects += partial_frees
245282
246283 total_slabs += 1
247284
248- return total_slabs , free_objects
285+ return total_slabs , free_objects , corrupt
249286
250287
251288def get_kmem_cache_slub_info (cache : Object ) -> SlabCacheInfo :
@@ -255,7 +292,7 @@ def get_kmem_cache_slub_info(cache: Object) -> SlabCacheInfo:
255292 :param cache: ``struct kmem_cache`` drgn object
256293 :returns: a :class:`SlabCacheInfo` with statistics about the cache
257294 """
258- total_slabs , free_objects = kmem_cache_slub_info (cache )
295+ total_slabs , free_objects , corrupt = kmem_cache_slub_info (cache )
259296 (
260297 nr_slabs ,
261298 nr_total_objs ,
@@ -280,6 +317,7 @@ def get_kmem_cache_slub_info(cache: Object) -> SlabCacheInfo:
280317 total_slabs ,
281318 ssize ,
282319 cache .name .string_ ().decode ("utf-8" ),
320+ corrupt ,
283321 )
284322
285323
@@ -296,19 +334,42 @@ def print_slab_info(prog: Program) -> None:
296334 "NAME" ,
297335 ]
298336 )
337+ corruption = []
299338 for cache in for_each_slab_cache (prog ):
300339 slabinfo = get_kmem_cache_slub_info (cache )
340+ maybe_asterisk = ""
341+ if slabinfo .freelist_corrupt_cpus :
342+ maybe_asterisk = "*"
343+ corruption .append (slabinfo )
301344 table .row (
302345 slabinfo .cache .value_ (),
303346 slabinfo .objsize ,
304- slabinfo .allocated ,
347+ f" { slabinfo .allocated } { maybe_asterisk } " ,
305348 slabinfo .total ,
306349 slabinfo .nr_slabs ,
307350 f"{ int (slabinfo .ssize / 1024 )} k" ,
308351 slabinfo .name ,
309352 )
310353 table .write ()
311354
355+ if corruption :
356+ if prog .flags & ProgramFlags .IS_LIVE :
357+ print (
358+ "NOTE: freelist corruption was detected. This is not "
359+ "necessarily an error, as live systems may encounter race "
360+ "conditions."
361+ )
362+ else :
363+ print (
364+ "WARNING: freelist corruption was detected. It is likely that "
365+ "a use-after-free bug occurred."
366+ )
367+ table = FixedTable (["CACHE:<24s" , "CORRUPT CPUS" ])
368+ for slabinfo in corruption :
369+ cpus = ", " .join (map (str , slabinfo .freelist_corrupt_cpus ))
370+ table .row (slabinfo .name , cpus )
371+ table .write ()
372+
312373
313374class SlabInfo (CorelensModule ):
314375 """Print info about each slab cache"""
0 commit comments