diff --git a/CHANGELOG.md b/CHANGELOG.md index ba3b1dd34..dd836e102 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,6 +75,7 @@ The table below shows which release corresponds to each branch, and what date th ## 5.0.0 (`dev`) +- [#2643][2643] Refactor getdents.py, add support for SYS_getdents64 - [#2638][2638] feat: add disable_corefiles context option - [#2598][2598] aarch64: Fix ABI definition - [#2419][2419] riscv: avoid compressed instructions (if you need compressed, use .option rvc) @@ -105,6 +106,7 @@ The table below shows which release corresponds to each branch, and what date th - [#2639][2639] ROP: Remove stdout and argv workaround in ROPgadget invocation - [#2630][2630] support `preexec_fn` in `debug()` +[2638]: https://github.com/Gallopsled/pwntools/pull/2643 [2638]: https://github.com/Gallopsled/pwntools/pull/2638 [2598]: https://github.com/Gallopsled/pwntools/pull/2598 [2419]: https://github.com/Gallopsled/pwntools/pull/2419 diff --git a/pwnlib/util/getdents.py b/pwnlib/util/getdents.py index 729e9f7a0..0c0434bef 100644 --- a/pwnlib/util/getdents.py +++ b/pwnlib/util/getdents.py @@ -1,62 +1,117 @@ -from __future__ import absolute_import -from __future__ import division - from pwnlib.context import context -from pwnlib.util.fiddling import hexdump from pwnlib.util.packing import unpack +from pwnlib.util.fiddling import unhex +from pwnlib.log import getLogger +from enum import IntEnum + +log = getLogger(__name__) + +class Dtype(IntEnum): + DT_UNK = 0 + DT_FIFO = 1 + DT_CHR = 2 + DT_DIR = 4 + DT_BLK = 6 + DT_REG = 8 + DT_LNK = 10 + DT_SOCK = 12 + +class linux_dirent: + """ + Represent struct linux_dirent + + struct linux_dirent + { + unsigned long d_ino; + unsigned long d_off; + unsigned short d_reclen; + char d_name[]; + }; + + struct linux_dirent64 { + u64 d_ino; + s64 d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[]; + }; + + enum + { + DT_UNKNOWN = 0, + DT_FIFO = 1, + DT_CHR = 2, + DT_DIR = 4, + DT_BLK = 6, + DT_REG = 8, + DT_LNK = 10, + DT_SOCK = 12, + DT_WHT = 14 + }; + """ + d_ino: int + d_off: int + d_reclen: int + d_type: Dtype + d_name: str -class linux_dirent(object): - def __init__(self, buf): - n = context.bytes + def __init__(self, buf: bytes, is_dirent64: bool): + size_t = 8 if is_dirent64 else int(context.bits / 8) - # Long - self.d_ino = unpack(buf[:n]) - buf=buf[n:] + self.d_ino = unpack(buf[0:size_t]) + self.d_off = unpack(buf[size_t : 2 * size_t]) + self.d_reclen = unpack(buf[2 * size_t : 2 * size_t + 2], 16) - # Long - self.d_off = unpack(buf[:n]) - buf=buf[n:] + if is_dirent64: + d_type = unpack(buf[2 * size_t + 2 : 2 * size_t + 3], 8) + self.d_name = buf[2 * size_t + 3 : self.d_reclen - 1].split(b'\x00', 1)[0].decode('utf-8') - # Short - self.d_reclen = unpack(buf[:2], 16) - buf=buf[2:] + else: + d_type = unpack(buf[self.d_reclen - 1 : self.d_reclen], 8) + self.d_name = buf[2 * size_t + 2 : self.d_reclen - 1].split(b'\x00', 1)[0].decode('utf-8') + self.d_type = Dtype(d_type) - # Name - self.d_name = buf[:buf.index(b'\x00')].decode('utf-8') + def __str__(self): + return self.d_name - def __len__(self): - return self.d_reclen # 2 * context.bytes + 2 + len(self.d_name) + 1 + def __repr__(self): + return f'{self.d_type.name:<8}{self.d_name}' - def __str__(self): - return "inode=%i %r" % (self.d_ino, self.d_name) -def dirents(buf): - """unpack_dents(buf) -> list +def dirents(buf: bytes, is_dirent64: bool = False) -> list[linux_dirent]: + """dirents(buf: bytes, is_dirent64: bool=False) -> list[linux_dirent]: - Extracts data from a buffer emitted by getdents() + Extracts data from a buffer emitted by getdents or getdents64 Arguments: - buf(str): Byte array + buf(bytes): getdents result + is_dirent64(bool): Is buf generated by getdents64 Returns: - A list of filenames. + A list of file names Example: - - >>> data = '5ade6d010100000010002e0000000004010000000200000010002e2e006e3d04092b6d010300000010007461736b00045bde6d010400000010006664003b3504' - >>> data = unhex(data) - >>> print(dirents(data)) - ['.', '..', 'fd', 'task'] + >>> with context.local(bits = 64): + ... buf = unhex('223a2c0000000000786a631cc120fc1a2000746573742e6300e57464040000080d002c00000000004802ee451f347e3018002e000000000402002c0000000000ffffffffffffff7f18002e2e00000004') + ... print(dirents(buf, False)) + ... buf = unhex('223a2c0000000000786a631cc120fc1a200008746573742e63007464040000000d002c00000000004802ee451f347e301800042e0000000002002c0000000000ffffffffffffff7f1800042e2e000000') + ... print(dirents(buf, True)) + ... + [DT_REG test.c, DT_DIR ., DT_DIR ..] + [DT_REG test.c, DT_DIR ., DT_DIR ..] """ - d = [] - while buf: + bpos = 0 + buf_len = len(buf) + entries = [] + + while bpos < buf_len: try: - ent = linux_dirent(buf) - except ValueError: + dirent = linux_dirent(buf[bpos:], is_dirent64) + bpos += dirent.d_reclen + entries.append(dirent) + except (ValueError, UnicodeDecodeError): + log.warning("Failed to parse struct linux_dirent at position %d", bpos) break - d.append(ent.d_name) - buf = buf[len(ent):] - - return sorted(d) + return entries