Skip to content

Commit cd8a6c5

Browse files
committed
Add step to map Cython files to binaries in d2d pipeline
Signed-off-by: Jono Yang <[email protected]>
1 parent 857ab86 commit cd8a6c5

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def steps(cls):
7777
cls.map_winpe,
7878
cls.map_go,
7979
cls.map_rust,
80+
cls.map_python,
8081
cls.match_directories_to_purldb,
8182
cls.match_resources_to_purldb,
8283
cls.map_javascript_post_purldb_match,
@@ -230,6 +231,11 @@ def map_rust(self):
230231
"""Map Rust binaries to their sources using symbols."""
231232
d2d.map_rust_binaries_with_symbols(project=self.project, logger=self.log)
232233

234+
@optional_step("Python")
235+
def map_python(self):
236+
"""Map binaries from Python packages to their sources using dwarf paths and symbols."""
237+
d2d.map_python_pyx_to_binaries(project=self.project, logger=self.log)
238+
233239
def match_directories_to_purldb(self):
234240
"""Match selected directories in PurlDB."""
235241
if not purldb.is_available():

scanpipe/pipes/d2d.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from packagedcode.npm import NpmPackageJsonHandler
4949
from rust_inspector.binary import collect_and_parse_rust_symbols
5050
from summarycode.classify import LEGAL_STARTS_ENDS
51+
from source_inspector.symbols_tree_sitter import get_tree
5152

5253
from aboutcode.pipeline import LoopProgress
5354
from scanpipe import pipes
@@ -1945,7 +1946,7 @@ def map_elfs_binaries_with_symbols(project, logger=None):
19451946
)
19461947

19471948
# Collect source symbols from elf related source files
1948-
elf_from_resources = from_resources.filter(extension__in=[".c", ".cpp", ".h"])
1949+
elf_from_resources = from_resources.filter(extension__in=[".c", ".cpp", ".h", ".pyx", ".pxd"])
19491950

19501951
map_binaries_with_symbols(
19511952
project=project,
@@ -2146,3 +2147,39 @@ def _map_javascript_symbols(to_resource, javascript_from_resources, logger):
21462147
to_resource.update(status=flag.MAPPED)
21472148
return 1
21482149
return 0
2150+
2151+
2152+
def map_python_pyx_to_binaries(project, logger=None):
2153+
"""Map ELF binaries to their sources in ``project``."""
2154+
from_resources = project.codebaseresources.files().from_codebase().filter(extension__endswith=".pyx")
2155+
to_resources = (
2156+
project.codebaseresources.files().to_codebase().has_no_relation().elfs()
2157+
)
2158+
2159+
# Collect binary symbols from binaries
2160+
for resource in to_resources:
2161+
try:
2162+
binary_symbols = collect_and_parse_elf_symbols(resource.location)
2163+
resource.update_extra_data(binary_symbols)
2164+
except Exception as e:
2165+
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")
2166+
2167+
for resource in from_resources:
2168+
tree, _ = get_tree(resource.location)
2169+
function_definitions = [node for node in tree.root_node.children if node.type == "function_definition"]
2170+
identifiers = []
2171+
for node in function_definitions:
2172+
for child in node.children:
2173+
if child.type == "identifier":
2174+
identifiers.append(child.text.decode())
2175+
2176+
identifiers_qs = Q()
2177+
for identifier in identifiers:
2178+
identifiers_qs |= Q(extra_data__icontains=identifier)
2179+
matching_elfs = to_resources.filter(identifiers_qs)
2180+
for matching_elf in matching_elfs:
2181+
pipes.make_relation(
2182+
from_resource=resource,
2183+
to_resource=matching_elf,
2184+
map_type="python_pyx_match",
2185+
)

0 commit comments

Comments
 (0)