Gallopsled · dbgbgtf1 · Oct 25, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -75,6 +75,7 @@ The table below shows which release corresponds to each branch, and what date th
 
 ## 5.0.0 (`dev`)
 
+- [#2643][2643] Refactor getdents.py, add support for SYS_getdents64
 - [#2638][2638] feat: add disable_corefiles context option
 - [#2598][2598] aarch64: Fix ABI definition
 - [#2419][2419] riscv: avoid compressed instructions (if you need compressed, use .option rvc)
@@ -105,6 +106,7 @@ The table below shows which release corresponds to each branch, and what date th
 - [#2639][2639] ROP: Remove stdout and argv workaround in ROPgadget invocation
 - [#2630][2630] support `preexec_fn` in `debug()`
 
+[2638]: https://github.com/Gallopsled/pwntools/pull/2643
 [2638]: https://github.com/Gallopsled/pwntools/pull/2638
 [2598]: https://github.com/Gallopsled/pwntools/pull/2598
 [2419]: https://github.com/Gallopsled/pwntools/pull/2419

diff --git a/pwnlib/util/getdents.py b/pwnlib/util/getdents.py
@@ -1,62 +1,117 @@
-from __future__ import absolute_import
-from __future__ import division
-
 from pwnlib.context import context
-from pwnlib.util.fiddling import hexdump
 from pwnlib.util.packing import unpack
+from pwnlib.util.fiddling import unhex
+from pwnlib.log import getLogger
+from enum import IntEnum
+
+log = getLogger(__name__)
+
+class Dtype(IntEnum):
+    DT_UNK = 0
+    DT_FIFO = 1
+    DT_CHR = 2
+    DT_DIR = 4
+    DT_BLK = 6
+    DT_REG = 8
+    DT_LNK = 10
+    DT_SOCK = 12
+
+class linux_dirent:
+    """
+    Represent struct linux_dirent
+
+    struct linux_dirent
+    {
+      unsigned long d_ino;
+      unsigned long d_off;
+      unsigned short d_reclen;
+      char d_name[];
+    };
+
+    struct linux_dirent64 {
+        u64		d_ino;
+        s64		d_off;
+        unsigned short	d_reclen;
+        unsigned char	d_type;
+        char		d_name[];
+    };
+
+    enum
+    {
+        DT_UNKNOWN = 0,
+        DT_FIFO = 1,
+        DT_CHR = 2,
+        DT_DIR = 4,
+        DT_BLK = 6,
+        DT_REG = 8,
+        DT_LNK = 10,
+        DT_SOCK = 12,
+        DT_WHT = 14
+    };
+    """
 
+    d_ino: int
+    d_off: int
+    d_reclen: int
+    d_type: Dtype
+    d_name: str
 
-class linux_dirent(object):
-    def __init__(self, buf):
-        n = context.bytes
+    def __init__(self, buf: bytes, is_dirent64: bool):
+        size_t = 8 if is_dirent64 else int(context.bits / 8)
 
-        # Long
-        self.d_ino    = unpack(buf[:n])
-        buf=buf[n:]
+        self.d_ino = unpack(buf[0:size_t])
+        self.d_off = unpack(buf[size_t : 2 * size_t])
+        self.d_reclen = unpack(buf[2 * size_t : 2 * size_t + 2], 16)
 
-        # Long
-        self.d_off    = unpack(buf[:n])
-        buf=buf[n:]
+        if is_dirent64:
+            d_type = unpack(buf[2 * size_t + 2 : 2 * size_t + 3], 8)
+            self.d_name = buf[2 * size_t + 3 : self.d_reclen - 1].split(b'\x00', 1)[0].decode('utf-8')
 
-        # Short
-        self.d_reclen = unpack(buf[:2], 16)
-        buf=buf[2:]
+        else:
+            d_type = unpack(buf[self.d_reclen - 1 : self.d_reclen], 8)
+            self.d_name = buf[2 * size_t + 2 : self.d_reclen - 1].split(b'\x00', 1)[0].decode('utf-8')
+        self.d_type = Dtype(d_type)
 
-        # Name
-        self.d_name = buf[:buf.index(b'\x00')].decode('utf-8')
+    def __str__(self):
+        return self.d_name
 
-    def __len__(self):
-        return self.d_reclen # 2 * context.bytes + 2 + len(self.d_name) + 1
+    def __repr__(self):
+        return f'{self.d_type.name:<8}{self.d_name}'
 
-    def __str__(self):
-        return "inode=%i %r" % (self.d_ino, self.d_name)
 
-def dirents(buf):
-    """unpack_dents(buf) -> list
+def dirents(buf: bytes, is_dirent64: bool = False) -> list[linux_dirent]:
+    """dirents(buf: bytes, is_dirent64: bool=False) -> list[linux_dirent]:
 
-    Extracts data from a buffer emitted by getdents()
+    Extracts data from a buffer emitted by getdents or getdents64
 
     Arguments:
-        buf(str): Byte array
+        buf(bytes): getdents result
+        is_dirent64(bool): Is buf generated by getdents64
 
     Returns:
-        A list of filenames.
+        A list of file names
 
     Example:
-
-        >>> data = '5ade6d010100000010002e0000000004010000000200000010002e2e006e3d04092b6d010300000010007461736b00045bde6d010400000010006664003b3504'
-        >>> data = unhex(data)
-        >>> print(dirents(data))
-        ['.', '..', 'fd', 'task']
+        >>> with context.local(bits = 64):
+        ...     buf = unhex('223a2c0000000000786a631cc120fc1a2000746573742e6300e57464040000080d002c00000000004802ee451f347e3018002e000000000402002c0000000000ffffffffffffff7f18002e2e00000004')
+        ...     print(dirents(buf, False))
+        ...     buf = unhex('223a2c0000000000786a631cc120fc1a200008746573742e63007464040000000d002c00000000004802ee451f347e301800042e0000000002002c0000000000ffffffffffffff7f1800042e2e000000')
+        ...     print(dirents(buf, True))
+        ...     
+        [DT_REG  test.c, DT_DIR  ., DT_DIR  ..]
+        [DT_REG  test.c, DT_DIR  ., DT_DIR  ..]
     """
-    d = []
 
-    while buf:
+    bpos = 0
+    buf_len = len(buf)
+    entries = []
+
+    while bpos < buf_len:
         try:
-            ent = linux_dirent(buf)
-        except ValueError:
+            dirent = linux_dirent(buf[bpos:], is_dirent64)
+            bpos += dirent.d_reclen
+            entries.append(dirent)
+        except (ValueError, UnicodeDecodeError):
+            log.warning("Failed to parse struct linux_dirent at position %d", bpos)
             break
-        d.append(ent.d_name)
-        buf = buf[len(ent):]
-
-    return sorted(d)
+    return entries