Skip to content

Commit d404e06

Browse files
e3krisztianqkaiser
authored andcommitted
feat(extractors): introduce FileSystem helper class to remove boilerplate in Extractors
For every handler every file system operation needs to be checked for potential path traversal, attempt to create devices, and the ones that could be problematic should be prevented and reported. This was managed so far individually in ever handler, which resulted in reporting the same problems somewhat differently and allowed for potential bugs in implementation. The FileSystem helper class introduced here should take care of these repetitive tasks and hopefully could make for more succinct, yet safer Handler/Extractor implementations.
1 parent 388c535 commit d404e06

File tree

4 files changed

+446
-1
lines changed

4 files changed

+446
-1
lines changed

docs/development.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,15 @@ Two methods are exposed by this class:
526526
- `extract()`: you must override this function. This is where you'll perform the
527527
extraction of `inpath` content into `outdir` extraction directory
528528

529+
!!! Recommendation
530+
531+
Although it is possible to implement `extract()` with path manipulations,
532+
checks for path traversals, and performing io by using Python libraries
533+
(`os`, `pathlib.Path`), but it turns out somewhat tedious.
534+
Instead we recommend to remove boilerplate and use a helper class `FileSystem` from
535+
[unblob/file_utils.py](https://github.com/onekey-sec/unblob/blob/main/unblob/file_utils.py)
536+
which ensures that all file objects are created under its root.
537+
529538
### DirectoryExtractor class
530539

531540
The `DirectoryExtractor` interface is defined in
@@ -552,6 +561,11 @@ Two methods are exposed by this class:
552561
- `extract()`: you must override this function. This is where you'll perform the
553562
extraction of `paths` files into `outdir` extraction directory
554563

564+
!!! Recommendation
565+
566+
Similarly to `Extractor`, it is recommended to use the `FileSystem` helper class to
567+
implement `extract`.
568+
555569
### Example Extractor
556570

557571
Extractors are quite complex beasts, so rather than trying to come up with a

tests/test_file_utils.py

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import io
2+
import os
23
from pathlib import Path
34
from typing import List
45

@@ -7,8 +8,10 @@
78
from unblob.file_utils import (
89
Endian,
910
File,
11+
FileSystem,
1012
InvalidInputFormat,
1113
StructParser,
14+
chop_root,
1215
convert_int8,
1316
convert_int16,
1417
convert_int32,
@@ -360,3 +363,198 @@ def test_get_endian_resets_the_file_pointer(self):
360363
with pytest.raises(InvalidInputFormat):
361364
get_endian(file, 0xFFFF_0000)
362365
assert file.tell() == pos
366+
367+
368+
@pytest.mark.parametrize(
369+
"input_path, expected",
370+
[
371+
pytest.param("/", ".", id="absolute-root"),
372+
pytest.param("/path/to/file", "path/to/file", id="absolute-path"),
373+
pytest.param(".", ".", id="current-directory"),
374+
pytest.param("path/to/file", "path/to/file", id="relative-path"),
375+
],
376+
)
377+
def test_chop_root(input_path: str, expected: str):
378+
assert chop_root(Path(input_path)) == Path(expected)
379+
380+
381+
class TestFileSystem:
382+
@pytest.mark.parametrize(
383+
"path",
384+
[
385+
"/etc/passwd",
386+
"file",
387+
"some/dir/file",
388+
"some/dir/../file",
389+
"some/dir/../../file",
390+
],
391+
)
392+
def test_get_checked_path_success(self, path):
393+
fs = FileSystem(Path("/unblob/sandbox"))
394+
checked_path = fs.get_checked_path(Path(path), "test")
395+
assert checked_path
396+
assert fs.problems == []
397+
assert checked_path.relative_to(fs.root)
398+
399+
@pytest.mark.parametrize(
400+
"path",
401+
[
402+
"../file",
403+
"some/dir/../../../file",
404+
"some/dir/../../../",
405+
"some/dir/../../..",
406+
],
407+
)
408+
def test_get_checked_path_path_traversal_is_reported(self, path):
409+
fs = FileSystem(Path("/unblob/sandbox"))
410+
assert not fs.get_checked_path(Path(path), "test")
411+
assert fs.problems
412+
413+
def test_get_checked_path_path_traversal_reports(self):
414+
fs = FileSystem(Path("/unblob/sandbox"))
415+
op1 = f"test1-{object()}"
416+
op2 = f"test2-{object()}"
417+
assert op1 != op2
418+
assert not fs.get_checked_path(Path("../file"), op1)
419+
assert not fs.get_checked_path(Path("../etc/passwd"), op2)
420+
421+
report1, report2 = fs.problems
422+
423+
assert "path traversal" in report1.problem
424+
assert op1 in report1.problem
425+
assert report1.path == "../file"
426+
427+
assert "path traversal" in report2.problem
428+
assert op2 in report2.problem
429+
assert report2.path == "../etc/passwd"
430+
431+
@pytest.fixture
432+
def sandbox_parent(self, tmp_path: Path):
433+
return tmp_path
434+
435+
@pytest.fixture
436+
def sandbox_root(self, sandbox_parent: Path):
437+
return sandbox_parent / "sandbox"
438+
439+
@pytest.fixture
440+
def sandbox(self, sandbox_root: Path):
441+
sandbox_root.mkdir(parents=True, exist_ok=True)
442+
return FileSystem(sandbox_root)
443+
444+
def test_carve(self, sandbox: FileSystem):
445+
file = File.from_bytes(b"0123456789")
446+
sandbox.carve(Path("carved"), file, 1, 2)
447+
448+
assert (sandbox.root / "carved").read_bytes() == b"12"
449+
assert sandbox.problems == []
450+
451+
def test_carve_outside_sandbox(self, sandbox: FileSystem):
452+
file = File.from_bytes(b"0123456789")
453+
sandbox.carve(Path("../carved"), file, 1, 2)
454+
455+
assert not (sandbox.root / "../carved").exists()
456+
assert sandbox.problems
457+
458+
def test_mkdir(self, sandbox: FileSystem):
459+
sandbox.mkdir(Path("directory"))
460+
461+
assert (sandbox.root / "directory").is_dir()
462+
assert sandbox.problems == []
463+
464+
def test_mkdir_outside_sandbox(self, sandbox: FileSystem):
465+
sandbox.mkdir(Path("../directory"))
466+
467+
assert not (sandbox.root / "../directory").exists()
468+
assert sandbox.problems
469+
470+
def test_mkfifo(self, sandbox: FileSystem):
471+
sandbox.mkfifo(Path("named_pipe"))
472+
473+
assert (sandbox.root / "named_pipe").is_fifo()
474+
assert sandbox.problems == []
475+
476+
def test_mkfifo_outside_sandbox(self, sandbox: FileSystem):
477+
sandbox.mkfifo(Path("../named_pipe"))
478+
479+
assert not (sandbox.root / "../named_pipe").exists()
480+
assert sandbox.problems
481+
482+
def test_create_symlink(self, sandbox: FileSystem):
483+
sandbox.create_symlink(Path("target file"), Path("symlink"))
484+
485+
output_path = sandbox.root / "symlink"
486+
assert not output_path.exists()
487+
assert os.readlink(output_path) == "target file"
488+
assert sandbox.problems == []
489+
490+
def test_create_symlink_absolute_paths(self, sandbox: FileSystem):
491+
sandbox.write_bytes(Path("target file"), b"test content")
492+
sandbox.create_symlink(Path("/target file"), Path("/symlink"))
493+
494+
output_path = sandbox.root / "symlink"
495+
assert output_path.exists()
496+
assert os.readlink(output_path) == "target file"
497+
assert sandbox.problems == []
498+
499+
def test_create_symlink_absolute_paths_self_referenced(self, sandbox: FileSystem):
500+
sandbox.mkdir(Path("/etc"))
501+
sandbox.create_symlink(Path("/etc/passwd"), Path("/etc/passwd"))
502+
503+
output_path = sandbox.root / "etc/passwd"
504+
assert not output_path.exists()
505+
assert os.readlink(output_path) == "../etc/passwd"
506+
assert sandbox.problems == []
507+
508+
def test_create_symlink_outside_sandbox(self, sandbox: FileSystem):
509+
sandbox.create_symlink(Path("target file"), Path("../symlink"))
510+
511+
output_path = sandbox.root / "../symlink"
512+
assert not os.path.lexists(output_path)
513+
assert sandbox.problems
514+
515+
def test_create_symlink_path_traversal(
516+
self, sandbox: FileSystem, sandbox_parent: Path
517+
):
518+
"""Document a remaining path traversal scenario through a symlink chain.
519+
520+
unblob.extractor.fix_symlinks() exists to cover up cases like this.
521+
"""
522+
(sandbox_parent / "outer-secret").write_text("private key")
523+
524+
# The path traversal is possible because at the creation of "secret" "future" does not exist
525+
# so it is not yet possible to determine if it will be a symlink to be allowed or not.
526+
# When the order of the below 2 lines are changed, the path traversal is recognized and prevented.
527+
sandbox.create_symlink(Path("future/../outer-secret"), Path("secret"))
528+
sandbox.create_symlink(Path("."), Path("future"))
529+
530+
assert sandbox.problems == []
531+
assert (sandbox.root / "secret").read_text() == "private key"
532+
533+
def test_create_hardlink(self, sandbox: FileSystem):
534+
output_path = sandbox.root / "hardlink"
535+
linked_file = sandbox.root / "file"
536+
linked_file.write_bytes(b"")
537+
sandbox.create_hardlink(Path("file"), Path("hardlink"))
538+
539+
assert output_path.stat().st_nlink == 2
540+
assert output_path.stat().st_ino == linked_file.stat().st_ino
541+
assert sandbox.problems == []
542+
543+
def test_create_hardlink_absolute_paths(self, sandbox: FileSystem):
544+
output_path = sandbox.root / "hardlink"
545+
linked_file = sandbox.root / "file"
546+
linked_file.write_bytes(b"")
547+
sandbox.create_hardlink(Path("/file"), Path("/hardlink"))
548+
549+
assert output_path.stat().st_nlink == 2
550+
assert output_path.stat().st_ino == linked_file.stat().st_ino
551+
assert sandbox.problems == []
552+
553+
def test_create_hardlink_outside_sandbox(self, sandbox: FileSystem):
554+
output_path = sandbox.root / "../hardlink"
555+
linked_file = sandbox.root / "file"
556+
linked_file.write_bytes(b"")
557+
sandbox.create_hardlink(Path("file"), Path("../hardlink"))
558+
559+
assert not os.path.lexists(output_path)
560+
assert sandbox.problems

0 commit comments

Comments
 (0)