Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ v35.1.0 (unreleased)
license rules used during the scan.
https://github.com/aboutcode-org/scancode.io/issues/1657

- Add a new step to the ``DeployToDevelop`` pipeline, ``map_python``, to match
Cython source files (.pyx) to their compiled binaries.
https://github.com/aboutcode-org/scancode.io/pull/1703

- Update scancode-toolkit to v32.4.0. See CHANGELOG for updates:
https://github.com/aboutcode-org/scancode-toolkit/releases/tag/v32.4.0
Adds a new ``git_sha1`` attribute to the ``CodebaseResource`` model as this
Expand Down
9 changes: 9 additions & 0 deletions scanpipe/pipelines/deploy_to_develop.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def steps(cls):
cls.map_winpe,
cls.map_go,
cls.map_rust,
cls.map_python,
cls.match_directories_to_purldb,
cls.match_resources_to_purldb,
cls.map_javascript_post_purldb_match,
Expand Down Expand Up @@ -221,6 +222,14 @@ def map_rust(self):
"""Map Rust binaries to their sources using symbols."""
d2d.map_rust_binaries_with_symbols(project=self.project, logger=self.log)

@optional_step("Python")
def map_python(self):
"""
Map binaries from Python packages to their sources using dwarf paths and
symbols.
"""
d2d.map_python_pyx_to_binaries(project=self.project, logger=self.log)

def match_directories_to_purldb(self):
"""Match selected directories in PurlDB."""
if not purldb.is_available():
Expand Down
52 changes: 52 additions & 0 deletions scanpipe/pipes/d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -2254,3 +2254,55 @@ def _map_javascript_strings(to_resource, javascript_from_resources, logger):
to_resource.update(status=flag.MAPPED)
return 1
return 0


def map_python_pyx_to_binaries(project, logger=None):
"""Map Cython source to their compiled binaries in ``project``."""
from source_inspector.symbols_tree_sitter import get_tree_and_language_info

python_config = d2d_config.get_ecosystem_config(ecosystem="Python")
from_resources = (
project.codebaseresources.files()
.from_codebase()
.filter(extension__in=python_config.source_symbol_extensions)
)
to_resources = (
project.codebaseresources.files().to_codebase().has_no_relation().elfs()
)

# Collect binary symbols from binaries
for resource in to_resources:
try:
binary_symbols = collect_and_parse_elf_symbols(resource.location)
resource.update_extra_data(binary_symbols)
except Exception as e:
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")

for resource in from_resources:
# Open Cython source file, create AST, parse it for function definitions
# and save them in a list
tree, _ = get_tree_and_language_info(resource.location)
function_definitions = [
node
for node in tree.root_node.children
if node.type == "function_definition"
]
identifiers = []
for node in function_definitions:
for child in node.children:
if child.type == "identifier":
identifiers.append(child.text.decode())

# Find matching to/ resource by checking to see which to/ resource's
# extra_data field contains function definitions found from Cython
# source files
identifiers_qs = Q()
for identifier in identifiers:
identifiers_qs |= Q(extra_data__icontains=identifier)
matching_elfs = to_resources.filter(identifiers_qs)
for matching_elf in matching_elfs:
pipes.make_relation(
from_resource=resource,
to_resource=matching_elf,
map_type="python_pyx_match",
)
4 changes: 4 additions & 0 deletions scanpipe/pipes/d2d_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ class EcosystemConfig:
ecosystem_option="Windows",
source_symbol_extensions=[".c", ".cpp", ".h", ".cs"],
),
"Python": EcosystemConfig(
ecosystem_option="Python",
source_symbol_extensions=[".pyx", ".pxd"],
),
}


Expand Down
Binary file not shown.
Binary file added scanpipe/tests/data/d2d-python/to-intbitset.whl
Binary file not shown.
26 changes: 26 additions & 0 deletions scanpipe/tests/pipes/test_d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -1677,6 +1677,32 @@ def test_scanpipe_pipes_d2d_map_macho_symbols(self):
).count(),
)

@skipIf(sys.platform == "darwin", "Test is failing on macOS")
def test_scanpipe_pipes_d2d_map_python_pyx(self):
input_dir = self.project1.input_path
input_resources = [
self.data / "d2d-python/to-intbitset.whl",
self.data / "d2d-python/from-intbitset.tar.gz",
]
copy_inputs(input_resources, input_dir)
self.from_files, self.to_files = d2d.get_inputs(self.project1)
inputs_with_codebase_path_destination = [
(self.from_files, self.project1.codebase_path / d2d.FROM),
(self.to_files, self.project1.codebase_path / d2d.TO),
]
for input_files, codebase_path in inputs_with_codebase_path_destination:
for input_file_path in input_files:
scancode.extract_archive(input_file_path, codebase_path)

scancode.extract_archives(self.project1.codebase_path, recurse=True)
pipes.collect_and_create_codebase_resources(self.project1)
buffer = io.StringIO()
d2d.map_python_pyx_to_binaries(project=self.project1, logger=buffer.write)
pyx_match_relations = CodebaseRelation.objects.filter(
project=self.project1, map_type="python_pyx_match"
)
self.assertEqual(1, pyx_match_relations.count())

@skipIf(sys.platform == "darwin", "Test is failing on macOS")
def test_scanpipe_pipes_d2d_map_winpe_symbols(self):
input_dir = self.project1.input_path
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ install_requires =
rust-inspector==0.1.0
binary-inspector==0.1.2
python-inspector==0.14.0
source-inspector==0.6.1; sys_platform != "darwin" and platform_machine != "arm64"
source-inspector==0.7.0; sys_platform != "darwin" and platform_machine != "arm64"
aboutcode-toolkit==11.1.1
# Utilities
XlsxWriter==3.2.5
Expand Down