Skip to content

Commit 2da4d37

Browse files
authored
Update Java D2D Pipeline to Include Checksum Mapped Sources for Accurate Java Mapping (#1870)
* Update Java D2D Pipeline to Include Checksum Mapped Sources for Accurate Java Mapping #1854 Signed-off-by: Chin Yeung Li <[email protected]> * Add test for checksum and java mapping in the same run #1854 Signed-off-by: Chin Yeung Li <[email protected]> * Update test to use 'map_jvm_to_class' #1854 Signed-off-by: Chin Yeung Li <[email protected]> --------- Signed-off-by: Chin Yeung Li <[email protected]>
1 parent 2c3e120 commit 2da4d37

File tree

2 files changed

+54
-2
lines changed

2 files changed

+54
-2
lines changed

scanpipe/pipes/d2d.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,15 @@ def map_jvm_to_class(project, jvm_lang: jvm.JvmLanguage, logger=None):
197197
Map to/ compiled Jvm's binary files to from/ using Jvm language's fully
198198
qualified paths and indexing from/ Jvm lang's source files.
199199
"""
200-
project_files = project.codebaseresources.files().no_status()
200+
project_files = project.codebaseresources.files()
201+
# Collect all files from "from_codebase", even if they already have a
202+
# status or are mapped. This is necessary because the deploy codebase
203+
# may contain sources that match "from_codebase" via checksum. If those
204+
# checksum-matched files are excluded from mapping, it can result in
205+
# .class files failing to resolve. See
206+
# https://github.com/aboutcode-org/scancode.io/issues/1854#issuecomment-3273472895
201207
from_resources = project_files.from_codebase()
202-
to_resources = project_files.to_codebase().has_no_relation()
208+
to_resources = project_files.to_codebase().no_status().has_no_relation()
203209

204210
has_source_pkg_attr_name = {
205211
f"extra_data__{jvm_lang.source_package_attribute_name}__isnull": False

scanpipe/tests/pipes/test_d2d.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,52 @@ def test_scanpipe_pipes_d2d_map_java_to_class(self):
393393
to3.refresh_from_db()
394394
self.assertEqual("", to3.status)
395395

396+
def test_scanpipe_pipes_d2d_map_java_to_class_with_java_in_deploy(self):
397+
sha1 = "abcde"
398+
from1 = make_resource_file(
399+
self.project1,
400+
path="from/flume-ng-node-1.9.0-sources.jar-extract/org/apache/flume/node/"
401+
"AbstractConfigurationProvider.java",
402+
extra_data={"java_package": "org.apache.flume.node"},
403+
sha1=sha1,
404+
)
405+
to1 = make_resource_file(
406+
self.project1,
407+
path="to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/"
408+
"AbstractConfigurationProvider.java",
409+
sha1=sha1,
410+
)
411+
to2 = make_resource_file(
412+
self.project1,
413+
path="to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/"
414+
"AbstractConfigurationProvider.class",
415+
)
416+
417+
buffer = io.StringIO()
418+
419+
# The pipeline will run map_checksum first
420+
d2d.map_checksum(self.project1, "sha1", logger=buffer.write)
421+
expected = "Mapping 1 to/ resources using sha1 against from/ codebase"
422+
self.assertEqual(expected, buffer.getvalue())
423+
self.assertEqual(1, to1.related_from.count())
424+
relation1 = to1.related_from.get()
425+
self.assertEqual("sha1", relation1.map_type)
426+
self.assertEqual(from1, relation1.from_resource)
427+
428+
# Now run map_java_to_class
429+
d2d.map_jvm_to_class(
430+
self.project1, logger=buffer.write, jvm_lang=jvm.JavaLanguage
431+
)
432+
expected = "Mapping 1 .class resources to 1 ('.java',)"
433+
self.assertIn(expected, buffer.getvalue())
434+
self.assertEqual(2, self.project1.codebaserelations.count())
435+
relation2 = self.project1.codebaserelations.get(
436+
to_resource=to2, from_resource=from1
437+
)
438+
self.assertEqual("java_to_class", relation2.map_type)
439+
expected = {"from_source_root": "from/flume-ng-node-1.9.0-sources.jar-extract/"}
440+
self.assertEqual(expected, relation2.extra_data)
441+
396442
def test_scanpipe_pipes_d2d_map_java_to_class_no_java(self):
397443
make_resource_file(self.project1, path="to/Abstract.class")
398444
buffer = io.StringIO()

0 commit comments

Comments
 (0)