Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion scanpipe/pipelines/deploy_to_develop.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def steps(cls):
cls.map_javascript,
cls.map_javascript_symbols,
cls.map_javascript_strings,
cls.get_symbols_from_binaries,
cls.map_elf,
cls.map_macho,
cls.map_winpe,
Expand Down Expand Up @@ -197,6 +198,14 @@ def map_javascript_strings(self):
"""Map deployed JavaScript, TypeScript to its sources using string literals."""
d2d.map_javascript_strings(project=self.project, logger=self.log)

def get_symbols_from_binaries(self):
"""Extract symbols from Elf, Mach0 and windows binaries for mapping."""
d2d.extract_binary_symbols(
project=self.project,
options=self.selected_groups,
logger=self.log,
)

@optional_step("Elf")
def map_elf(self):
"""Map ELF binaries to their sources using dwarf paths and symbols."""
Expand All @@ -215,8 +224,9 @@ def map_winpe(self):

@optional_step("Go")
def map_go(self):
"""Map Go binaries to their sources using paths."""
"""Map Go binaries to their sources using paths and symbols."""
d2d.map_go_paths(project=self.project, logger=self.log)
d2d.map_go_binaries_with_symbols(project=self.project, logger=self.log)

@optional_step("Rust")
def map_rust(self):
Expand Down
134 changes: 107 additions & 27 deletions scanpipe/pipes/d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -1930,6 +1930,53 @@ def map_go_paths(project, logger=None):
)


RUST_BINARY_OPTIONS = ["Rust"]
ELF_BINARY_OPTIONS = ["Python", "Go", "Elf"]
MACHO_BINARY_OPTIONS = ["Rust", "Go", "MacOS"]
WINPE_BINARY_OPTIONS = ["Windows"]


def extract_binary_symbols(project, options, logger=None):
"""
Extract binary symbols for all Elf, Mach0 and Winpe binaries
found in the ``project`` resources, based on selected
ecosystem ``options`` so that these symbols can be mapped to
extracted source symbols.
"""
to_resources = project.codebaseresources.files().to_codebase().has_no_relation()
if any([option in ELF_BINARY_OPTIONS for option in options]):
to_binaries = to_resources.elfs()
extract_binary_symbols_from_resources(
resources=to_binaries,
binary_symbols_func=collect_and_parse_elf_symbols,
logger=logger,
)

if any([option in RUST_BINARY_OPTIONS for option in options]):
to_binaries = to_resources.executable_binaries()
extract_binary_symbols_from_resources(
resources=to_binaries,
binary_symbols_func=collect_and_parse_rust_symbols,
logger=logger,
)

if any([option in MACHO_BINARY_OPTIONS for option in options]):
to_binaries = to_resources.macho_binaries()
extract_binary_symbols_from_resources(
resources=to_binaries,
binary_symbols_func=collect_and_parse_macho_symbols,
logger=logger,
)

if any([option in WINPE_BINARY_OPTIONS for option in options]):
to_binaries = to_resources.win_exes()
extract_binary_symbols_from_resources(
resources=to_binaries,
binary_symbols_func=collect_and_parse_winpe_symbols,
logger=logger,
)


def map_rust_binaries_with_symbols(project, logger=None):
"""Map Rust binaries to their source using symbols in ``project``."""
from_resources = project.codebaseresources.files().from_codebase()
Expand All @@ -1950,8 +1997,32 @@ def map_rust_binaries_with_symbols(project, logger=None):
project=project,
from_resources=rust_from_resources,
to_resources=to_binaries,
binary_symbols_func=collect_and_parse_rust_symbols,
map_type="rust_symbols",
map_types=["rust_symbols", "elf_symbols", "macho_symbols"],
logger=logger,
)


def map_go_binaries_with_symbols(project, logger=None):
"""Map Go binaries to their source using symbols in ``project``."""
from_resources = project.codebaseresources.files().from_codebase()
to_binaries = (
project.codebaseresources.files()
.to_codebase()
.has_no_relation()
.executable_binaries()
)

# Collect source symbols from rust source files
go_config = d2d_config.get_ecosystem_config(ecosystem="Go")
go_from_resources = from_resources.filter(
extension__in=go_config.source_symbol_extensions
)

map_binaries_with_symbols(
project=project,
from_resources=go_from_resources,
to_resources=to_binaries,
map_types=["elf_symbols", "macho_symbols"],
logger=logger,
)

Expand All @@ -1973,8 +2044,7 @@ def map_elfs_binaries_with_symbols(project, logger=None):
project=project,
from_resources=elf_from_resources,
to_resources=elf_binaries,
binary_symbols_func=collect_and_parse_elf_symbols,
map_type="elf_symbols",
map_types=["elf_symbols"],
logger=logger,
)

Expand All @@ -1999,8 +2069,7 @@ def map_macho_binaries_with_symbols(project, logger=None):
project=project,
from_resources=mac_from_resources,
to_resources=macho_binaries,
binary_symbols_func=collect_and_parse_macho_symbols,
map_type="macho_symbols",
map_types=["macho_symbols"],
logger=logger,
)

Expand All @@ -2022,18 +2091,29 @@ def map_winpe_binaries_with_symbols(project, logger=None):
project=project,
from_resources=windows_from_resources,
to_resources=winexe_binaries,
binary_symbols_func=collect_and_parse_winpe_symbols,
map_type="winpe_symbols",
map_types=["winpe_symbols"],
logger=logger,
)


def get_binary_symbols(resource, map_types):
"""
Return the map_type and binary symbols from `resource` for different kind of
binary `map_types`.
"""
for map_type in map_types:
symbols = resource.extra_data.get(map_type)
if symbols:
return map_type, symbols

return None, []


def map_binaries_with_symbols(
project,
from_resources,
to_resources,
binary_symbols_func,
map_type,
map_types,
logger=None,
):
"""Map Binaries to their source using symbols in ``project``."""
Expand All @@ -2043,14 +2123,6 @@ def map_binaries_with_symbols(
project_files=from_resources,
)

# Collect binary symbols from rust binaries
for resource in to_resources:
try:
binary_symbols = binary_symbols_func(resource.location)
resource.update_extra_data(binary_symbols)
except Exception as e:
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")

if logger:
logger(
f"Mapping {to_resources.count():,d} to/ resources using symbols "
Expand All @@ -2060,7 +2132,10 @@ def map_binaries_with_symbols(
resource_iterator = to_resources.iterator(chunk_size=2000)
progress = LoopProgress(to_resources.count(), logger)
for to_resource in progress.iter(resource_iterator):
binary_symbols = to_resource.extra_data.get(map_type)
map_type, binary_symbols = get_binary_symbols(
resource=to_resource,
map_types=map_types,
)
if not binary_symbols:
continue

Expand All @@ -2077,6 +2152,19 @@ def map_binaries_with_symbols(
)


def extract_binary_symbols_from_resources(resources, binary_symbols_func, logger):
"""
Extract binary symbols from ``resources`` using the ecosystem specific
symbol extractor function ``binary_symbols_func``.
"""
for resource in resources:
try:
binary_symbols = binary_symbols_func(resource.location)
resource.update_extra_data(binary_symbols)
except Exception as e:
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")


def map_javascript_symbols(project, logger=None):
"""Map deployed JavaScript, TypeScript to its sources using symbols."""
project_files = project.codebaseresources.files()
Expand Down Expand Up @@ -2270,14 +2358,6 @@ def map_python_pyx_to_binaries(project, logger=None):
project.codebaseresources.files().to_codebase().has_no_relation().elfs()
)

# Collect binary symbols from binaries
for resource in to_resources:
try:
binary_symbols = collect_and_parse_elf_symbols(resource.location)
resource.update_extra_data(binary_symbols)
except Exception as e:
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")

for resource in from_resources:
# Open Cython source file, create AST, parse it for function definitions
# and save them in a list
Expand Down
1 change: 1 addition & 0 deletions scanpipe/pipes/d2d_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ class EcosystemConfig:
"Go": EcosystemConfig(
ecosystem_option="Go",
matchable_resource_extensions=[".go"],
source_symbol_extensions=[".go"],
),
"Rust": EcosystemConfig(
ecosystem_option="Rust",
Expand Down
15 changes: 15 additions & 0 deletions scanpipe/tests/pipes/test_d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -1603,6 +1603,9 @@ def test_scanpipe_pipes_d2d_map_rust_symbols(self):
)
pipes.collect_and_create_codebase_resources(self.project1)
buffer = io.StringIO()
d2d.extract_binary_symbols(
project=self.project1, options=["Rust"], logger=buffer.write
)
d2d.map_rust_binaries_with_symbols(project=self.project1, logger=buffer.write)
self.assertEqual(
2,
Expand Down Expand Up @@ -1640,6 +1643,9 @@ def test_scanpipe_pipes_d2d_map_elf_symbols(self):
)
pipes.collect_and_create_codebase_resources(self.project1)
buffer = io.StringIO()
d2d.extract_binary_symbols(
project=self.project1, options=["Elf"], logger=buffer.write
)
d2d.map_elfs_binaries_with_symbols(project=self.project1, logger=buffer.write)
self.assertEqual(
7,
Expand Down Expand Up @@ -1671,6 +1677,9 @@ def test_scanpipe_pipes_d2d_map_macho_symbols(self):
)
pipes.collect_and_create_codebase_resources(self.project1)
buffer = io.StringIO()
d2d.extract_binary_symbols(
project=self.project1, options=["MacOS"], logger=buffer.write
)
d2d.map_macho_binaries_with_symbols(project=self.project1, logger=buffer.write)
self.assertEqual(
9,
Expand Down Expand Up @@ -1699,6 +1708,9 @@ def test_scanpipe_pipes_d2d_map_python_pyx(self):
scancode.extract_archives(self.project1.codebase_path, recurse=True)
pipes.collect_and_create_codebase_resources(self.project1)
buffer = io.StringIO()
d2d.extract_binary_symbols(
project=self.project1, options=["Python"], logger=buffer.write
)
d2d.map_python_pyx_to_binaries(project=self.project1, logger=buffer.write)
pyx_match_relations = CodebaseRelation.objects.filter(
project=self.project1, map_type="python_pyx_match"
Expand Down Expand Up @@ -1728,6 +1740,9 @@ def test_scanpipe_pipes_d2d_map_winpe_symbols(self):
)
pipes.collect_and_create_codebase_resources(self.project1)
buffer = io.StringIO()
d2d.extract_binary_symbols(
project=self.project1, options=["Windows"], logger=buffer.write
)
d2d.map_winpe_binaries_with_symbols(project=self.project1, logger=buffer.write)
self.assertEqual(
4,
Expand Down