Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 73 additions & 19 deletions src/clusterfuzz/_internal/build_management/build_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,14 @@
"""Build Archive manager."""

import abc
import json
import os
from typing import BinaryIO
from typing import Callable
from typing import List
from typing import Optional
from typing import Union
from typing import BinaryIO, Callable, List, Optional, Union

from clusterfuzz._internal.metrics import logs
from clusterfuzz._internal.system import archive


# Extensions to exclude when unarchiving a fuzz target. Note that fuzz target
# own files like seed corpus, options, etc are covered by its own regex.
FUZZ_TARGET_EXCLUDED_EXTENSIONS = [
Expand Down Expand Up @@ -219,29 +217,81 @@ def unpack(self,

class ChromeBuildArchive(DefaultBuildArchive):
"""Handles chrome build archives. This special cases the default behaviour by
looking at the content of the `.runtime_deps` file, in order to unpack all the
fuzzer dependencies correctly.
In case something goes wrong, this defaults to using the default unpacker.
looking at the content of the `.runtime_deps` file for each fuzzer target in
order to unpack all of its dependencies correctly.

Expects a manifest file named `clusterfuzz_manifest.json` in the root of the
archive to decide which schema version to use when interpreting its contents.
The legacy schema is applied to archives with no manifest.

Defaults to using the default unpacker in case something goes wrong.
"""

def __init__(self,
reader: archive.ArchiveReader,
archive_schema_version: int = 0):
super().__init__(reader)
# The manifest may not exist for earlier versions of archives. In this
# case, default to schema version 0.
manifest_path = 'clusterfuzz_manifest.json'
if self.file_exists(manifest_path):
with self.open(manifest_path) as f:
manifest = json.load(f)
self._archive_schema_version = manifest.get('archive_schema_version', 0)
if self._archive_schema_version == 0:
logs.warning(
'clusterfuzz_manifest.json was incorrectly formatted or missing an archive_schema_version field'
)
else:
self._archive_schema_version = archive_schema_version

def root_dir(self) -> str:
if not hasattr(self, '_root_dir'):
self._root_dir = super().root_dir() # pylint: disable=attribute-defined-outside-init
return self._root_dir

def to_archive_path(self, path: str) -> str:
"""Deps are relative to the Chrome root directory. However, there might be
a common root directory in the archive, which means we need to make sure
the file path is correct.
def get_dependency_path(self, path: str, deps_file_path: str) -> str:
"""Deps are given as paths relative to the deps file where they are listed,
so we need to translate them to the corresponding paths relative to the
archive root.

Args:
path: the dependency path relative to Chrome's root directory.
path: the dependency path relative to the deps file.
deps_file_path: the path to the deps file, relative to the archive root.

Returns:
the path relative to the archive.
the dependency path relative to the archive root.
"""
path = os.path.normpath(path)

# Archive schema version 0 represents legacy behavior. For newer archive
# versions, runtime_deps that were formerly stored under
# {self.root_dir()}/src_root/ are now stored in the root directory, while
# the build artifacts formerly stored in the root directory are now stored
# in the build directory.

if self._archive_schema_version > 0:
# Assumes the dependency path is relative to the deps file and
# transforms it into into a full path relative to the archive root. For
# example:
#
# deps_file_path: "/A/B/fuzz_target.runtime_deps"
# os.path.dirname(deps_file_path) => "/A/B/" (call this DEPS_DIR)
# path1: "./my_dep"
# path2: "../../C/my_dep2"
# path3: "D/my_dep3"
#
# os.path.join(DEPS_DIR, path1) => "/A/B/./my_dep"
# os.path.join(DEPS_DIR, path2) => "/A/B/../../C/my_dep2"
# os.path.join(DEPS_DIR, path3) => "/A/B/D/my_dep3"
#
# os.path.normpath(os.path.join(DEPS_DIR, path1)) => "/A/B/my_dep"
# os.path.normpath(os.path.join(DEPS_DIR, path2)) => "/C/my_dep2"
# os.path.normpath(os.path.join(DEPS_DIR, path3)) => "/A/B/D/my_dep3"
return os.path.normpath(
os.path.join(os.path.dirname(deps_file_path), path))

# Legacy behavior. Remap `../../` to `src_root/`.
path = os.path.normpath(path)
if path.startswith('../../'):
path = path.replace('../../', 'src_root/')

Expand All @@ -253,8 +303,9 @@ def _get_prefix_matcher(self, prefix: str) -> Callable[[str], bool]:
def _get_filename_matcher(self, file: str) -> Callable[[str], bool]:
return lambda f: os.path.basename(f) == file

def _match_files(self, matchers: List[Callable[[str], bool]]
) -> List[archive.ArchiveMemberInfo]:
def _match_files(
self, matchers: List[Callable[[str],
bool]]) -> List[archive.ArchiveMemberInfo]:
res = []
for member in self.list_members():
if any(matcher(member.name) for matcher in matchers):
Expand All @@ -271,7 +322,7 @@ def _get_common_files(self) -> List[str]:

def get_target_dependencies(
self, fuzz_target: str) -> List[archive.ArchiveMemberInfo]:
target_path = self.to_archive_path(fuzz_target)
target_path = self.get_path_for_target(fuzz_target)
deps_file = f'{target_path}.runtime_deps'
if not self.file_exists(deps_file):
logs.warning(f'runtime_deps file not found for {target_path}')
Expand All @@ -280,7 +331,10 @@ def get_target_dependencies(
res = []
matchers = []
with self.open(deps_file) as f:
deps = [self.to_archive_path(l.decode()) for l in f.read().splitlines()]
deps = [
self.get_dependency_path(l.decode(), deps_file)
for l in f.read().splitlines()
]
for dep in deps:
# We need to match the file prefixes here, because some of the deps are
# globering the whole directory. Same for files, on mac platform, we
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
"""Build archive tests."""
import io
import json
import os
import tempfile
import unittest
Expand All @@ -24,6 +25,7 @@
from clusterfuzz._internal.system import shell
from clusterfuzz._internal.tests.test_libs import helpers as test_helpers


TESTDATA_PATH = os.path.join(os.path.dirname(__file__), 'build_archive_data')


Expand Down Expand Up @@ -152,8 +154,18 @@ def _add_files_to_archive(self, files):
name=file, is_dir=False, size_bytes=0, mode=0))
self.mock.open.return_value.list_members.return_value = res

def _generate_possible_fuzzer_dependencies(self, dir_prefix, fuzz_target):
"""Generates all possible dependencies for the given target."""
def _generate_possible_fuzzer_dependencies_legacy(self, dir_prefix,
fuzz_target):
"""Generates all possible dependencies for the given target.

This implementation represents the legacy archive schema prior to version 1
and should not be used for new tests; we keep it around for backwards
compatibility.

New tests should use a combination of
`_generate_possible_fuzzer_dependencies()` and
`_generate_normalized_dependency_filenames()`.
"""
needed_files = [
f'{fuzz_target}',
f'{fuzz_target}.exe',
Expand All @@ -175,6 +187,41 @@ def _generate_possible_fuzzer_dependencies(self, dir_prefix, fuzz_target):
]
return [os.path.join(dir_prefix, file) for file in needed_files]

def _generate_possible_fuzzer_dependencies(self, fuzz_target):
"""Returns a list of dependencies as file paths relative to
{fuzz_target}.runtime_deps, as they appear in runtime_deps files in real
archives.
"""
return [
f'{fuzz_target}',
f'{fuzz_target}.exe',
f'{fuzz_target}.exe.pdb',
f'{fuzz_target}.dict',
f'{fuzz_target}.options',
f'{fuzz_target}.runtime_deps',
f'{fuzz_target}.par',
f'{fuzz_target}.dSYM/Contents/Resources/DWARF/some_dependency',
'shared.dll',
'shared.dll.pdb',
'./llvm-symbolizer',
'icudtl.dat',
'swiftshader/libGLESv2.so',
'instrumented_libraries/msan/lib/libgcrypt.so.11.8.2',
'afl-fuzz',
'../some_dependency',
'./chrome_crashpad_handler',
]

def _generate_normalized_dependency_filenames(self, dir_prefix, fuzz_target):
"""Returns a list of dependencies as normalized file paths, i.e. with
relative path separators like './' and '../' resolved to their true
directory names.
"""
return [
os.path.normpath(os.path.join(dir_prefix, file))
for file in self._generate_possible_fuzzer_dependencies(fuzz_target)
]

def _generate_runtime_deps(self, deps):

def _mock_open(_):
Expand All @@ -189,12 +236,18 @@ def _mock_open(_):
def _declare_fuzzers(self, fuzzers):
self._declared_fuzzers = fuzzers

def _set_archive_schema_version(self, version):
self.build._archive_schema_version = version

@parameterized.parameterized.expand(['/b/build/', 'build/', ''])
def test_possible_dependencies(self, dir_prefix):
def test_possible_dependencies_legacy(self, dir_prefix):
"""Tests that all the necessary dependencies are correctly extracted from
the runtime_deps file."""
deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer')
needed_files = self._generate_possible_fuzzer_dependencies(
the runtime_deps file, using the legacy archive schema where dependency
paths are interpreted as relative to the archive root and `../../` is
remapped to `src_root/`."""
deps_files = self._generate_possible_fuzzer_dependencies_legacy(
'', 'my_fuzzer')
needed_files = self._generate_possible_fuzzer_dependencies_legacy(
dir_prefix, 'my_fuzzer')
self._add_files_to_archive(needed_files)
self._generate_runtime_deps(deps_files)
Expand All @@ -204,49 +257,72 @@ def test_possible_dependencies(self, dir_prefix):
self.assertCountEqual(to_extract, needed_files)

@parameterized.parameterized.expand(['/b/build/', 'build/', ''])
def test_possible_dependencies_archive_without_normalized_path(
def test_possible_dependencies_deps_without_normalized_path_legacy(
self, dir_prefix):
"""Tests that the chrome build handler correctly handles mixed-up
normalized and not normalized path."""
deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer')
needed_files = self._generate_possible_fuzzer_dependencies(
deps_files = self._generate_possible_fuzzer_dependencies_legacy(
'', 'my_fuzzer')
needed_files = self._generate_possible_fuzzer_dependencies_legacy(
dir_prefix, 'my_fuzzer')
self._add_files_to_archive(needed_files)

# we want our runtime_deps to have normalized path so that they do not
# exactly match the archive paths.
self._add_files_to_archive([os.path.normpath(f) for f in needed_files])
self._generate_runtime_deps(deps_files)
self._declare_fuzzers(['my_fuzzer'])
to_extract = self.build.get_target_dependencies('my_fuzzer')
to_extract = [f.name for f in to_extract]
self.assertCountEqual(to_extract, needed_files)
self.assertCountEqual(to_extract,
[os.path.normpath(f) for f in needed_files])

@parameterized.parameterized.expand(['/b/build/', 'build/', ''])
def test_possible_dependencies_deps_without_normalized_path(self, dir_prefix):
"""Tests that the chrome build handler correctly handles mixed-up
normalized and not normalized path."""
deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer')
needed_files = self._generate_possible_fuzzer_dependencies(
def test_other_fuzzer_not_extracted_legacy(self, dir_prefix):
"""Tests that the chrome build handler only unpacks dependencies for the
requested fuzzer, even if other fuzzers exist in the build."""
deps_files = self._generate_possible_fuzzer_dependencies_legacy(
'', 'my_fuzzer')
needed_files = self._generate_possible_fuzzer_dependencies_legacy(
dir_prefix, 'my_fuzzer')
self._add_files_to_archive([os.path.normpath(f) for f in needed_files])
other_fuzzer = self._generate_possible_fuzzer_dependencies_legacy(
dir_prefix, 'other_fuzzer')
self._add_files_to_archive(list(set(needed_files + other_fuzzer)))
self._generate_runtime_deps(deps_files)
self._declare_fuzzers(['my_fuzzer', 'other_fuzzer'])
to_extract = self.build.get_target_dependencies('my_fuzzer')
to_extract = [f.name for f in to_extract]
self.assertCountEqual(to_extract, needed_files)

@parameterized.parameterized.expand(['/b/build/', 'build/', ''])
def test_possible_dependencies(self, dir_prefix):
"""Tests that all the necessary dependencies are correctly extracted from
the runtime_deps file.

Under the current archive schema, dependency paths in `runtime_deps` files
are interpreted as being relative to the file itself, meaning that they must
be normalized to the equivalent path relative to the archive root before
they can be extracted.
"""
self._set_archive_schema_version(1)
deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer')
deps_files = self._generate_normalized_dependency_filenames(
dir_prefix, 'my_fuzzer')
self._add_files_to_archive(deps_files)
self._generate_runtime_deps(deps_entries)
self._declare_fuzzers(['my_fuzzer'])
to_extract = self.build.get_target_dependencies('my_fuzzer')
to_extract = [f.name for f in to_extract]
self.assertCountEqual(to_extract,
[os.path.normpath(f) for f in needed_files])
self.assertCountEqual(to_extract, deps_files)

@parameterized.parameterized.expand(['/b/build/', 'build/', ''])
def test_other_fuzzer_not_extracted(self, dir_prefix):
"""Tests that the chrome build handler only unpacks dependencies for the
requested fuzzer, even if other fuzzers exist in the build."""
deps_files = self._generate_possible_fuzzer_dependencies('', 'my_fuzzer')
needed_files = self._generate_possible_fuzzer_dependencies(
self._set_archive_schema_version(1)
deps_entries = self._generate_possible_fuzzer_dependencies('my_fuzzer')
needed_files = self._generate_normalized_dependency_filenames(
dir_prefix, 'my_fuzzer')
other_fuzzer = self._generate_possible_fuzzer_dependencies(
other_fuzzer = self._generate_normalized_dependency_filenames(
dir_prefix, 'other_fuzzer')
self._add_files_to_archive(list(set(needed_files + other_fuzzer)))
self._generate_runtime_deps(deps_files)
self._generate_runtime_deps(deps_entries)
self._declare_fuzzers(['my_fuzzer', 'other_fuzzer'])
to_extract = self.build.get_target_dependencies('my_fuzzer')
to_extract = [f.name for f in to_extract]
Expand All @@ -256,7 +332,8 @@ def test_other_fuzzer_not_extracted(self, dir_prefix):
def test_dsyms_are_correctly_unpacked(self, dir_prefix):
"""Tests that even if not listed in the runtime deps, dSYMs are correctly unpacked.
"""
needed_files = self._generate_possible_fuzzer_dependencies(
self._set_archive_schema_version(1)
needed_files = self._generate_normalized_dependency_filenames(
dir_prefix, 'my_fuzzer')
self._add_files_to_archive(needed_files)
self._generate_runtime_deps(['my_fuzzer'])
Expand Down
Loading