Skip to content

Commit 2ad9b40

Browse files
authored
Refine d2d pipeline for scala and kotlin (#1898)
* Refine D2D pipeline for Scala and Kotlin Signed-off-by: Tushar Goel <[email protected]> * Add tests for scala and kotlin Signed-off-by: Tushar Goel <[email protected]> * Fix formatting Signed-off-by: Tushar Goel <[email protected]> * Handle path normalization for kotlin Signed-off-by: Tushar Goel <[email protected]> * Add tests for Kotlin and Scala Signed-off-by: Tushar Goel <[email protected]> * Address review comments Signed-off-by: Tushar Goel <[email protected]> --------- Signed-off-by: Tushar Goel <[email protected]>
1 parent 26a8982 commit 2ad9b40

File tree

6 files changed

+556
-203
lines changed

6 files changed

+556
-203
lines changed

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from scanpipe.pipes import d2d_config
2828
from scanpipe.pipes import flag
2929
from scanpipe.pipes import input
30+
from scanpipe.pipes import jvm
3031
from scanpipe.pipes import matchcode
3132
from scanpipe.pipes import purldb
3233
from scanpipe.pipes import scancode
@@ -72,7 +73,13 @@ def steps(cls):
7273
cls.match_archives_to_purldb,
7374
cls.find_java_packages,
7475
cls.map_java_to_class,
75-
cls.map_jar_to_source,
76+
cls.map_jar_to_java_source,
77+
cls.find_scala_packages,
78+
cls.map_scala_to_class,
79+
cls.map_jar_to_scala_source,
80+
cls.find_kotlin_packages,
81+
cls.map_kotlin_to_class,
82+
cls.map_jar_to_kotlin_source,
7683
cls.map_javascript,
7784
cls.map_javascript_symbols,
7885
cls.map_javascript_strings,
@@ -168,17 +175,65 @@ def match_archives_to_purldb(self):
168175
@optional_step("Java")
169176
def find_java_packages(self):
170177
"""Find the java package of the .java source files."""
171-
d2d.find_java_packages(self.project, logger=self.log)
178+
d2d.find_jvm_packages(
179+
project=self.project, jvm_lang=jvm.JavaLanguage, logger=self.log
180+
)
172181

173182
@optional_step("Java")
174183
def map_java_to_class(self):
175184
"""Map a .class compiled file to its .java source."""
176-
d2d.map_java_to_class(project=self.project, logger=self.log)
185+
d2d.map_jvm_to_class(
186+
project=self.project, jvm_lang=jvm.JavaLanguage, logger=self.log
187+
)
177188

178189
@optional_step("Java")
179-
def map_jar_to_source(self):
190+
def map_jar_to_java_source(self):
191+
"""Map .jar files to their related source directory."""
192+
d2d.map_jar_to_jvm_source(
193+
project=self.project, jvm_lang=jvm.JavaLanguage, logger=self.log
194+
)
195+
196+
@optional_step("Scala")
197+
def find_scala_packages(self):
198+
"""Find the java package of the .scala source files."""
199+
d2d.find_jvm_packages(
200+
project=self.project, jvm_lang=jvm.ScalaLanguage, logger=self.log
201+
)
202+
203+
@optional_step("Scala")
204+
def map_scala_to_class(self):
205+
"""Map a .class compiled file to its .java source."""
206+
d2d.map_jvm_to_class(
207+
project=self.project, jvm_lang=jvm.ScalaLanguage, logger=self.log
208+
)
209+
210+
@optional_step("Scala")
211+
def map_jar_to_scala_source(self):
180212
"""Map .jar files to their related source directory."""
181-
d2d.map_jar_to_source(project=self.project, logger=self.log)
213+
d2d.map_jar_to_jvm_source(
214+
project=self.project, jvm_lang=jvm.ScalaLanguage, logger=self.log
215+
)
216+
217+
@optional_step("Kotlin")
218+
def find_kotlin_packages(self):
219+
"""Find the java package of the .java source files."""
220+
d2d.find_jvm_packages(
221+
project=self.project, jvm_lang=jvm.KotlinLanguage, logger=self.log
222+
)
223+
224+
@optional_step("Kotlin")
225+
def map_kotlin_to_class(self):
226+
"""Map a .class compiled file to its .java source."""
227+
d2d.map_jvm_to_class(
228+
project=self.project, jvm_lang=jvm.KotlinLanguage, logger=self.log
229+
)
230+
231+
@optional_step("Kotlin")
232+
def map_jar_to_kotlin_source(self):
233+
"""Map .jar files to their related source directory."""
234+
d2d.map_jar_to_jvm_source(
235+
project=self.project, jvm_lang=jvm.KotlinLanguage, logger=self.log
236+
)
182237

183238
@optional_step("JavaScript")
184239
def map_javascript(self):

scanpipe/pipes/d2d.py

Lines changed: 79 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -161,149 +161,124 @@ def map_checksum(project, checksum_field, logger=None):
161161
_map_checksum_resource(to_resource, from_resources, checksum_field)
162162

163163

164-
def _map_java_to_class_resource(to_resource, from_resources, from_classes_index):
164+
def _map_jvm_to_class_resource(
165+
to_resource, from_resources, from_classes_index, jvm_lang: jvm.JvmLanguage
166+
):
165167
"""
166168
Map the ``to_resource`` .class file Resource with a Resource in
167-
``from_resources`` .java files, using the ``from_classes_index`` index of
168-
from/ fully qualified Java class names.
169+
``from_resources`` source files, using the ``from_classes_index`` index of
170+
from/ fully qualified binary files.
169171
"""
170-
normalized_java_path = jvm.get_normalized_java_path(to_resource.path)
171-
match = pathmap.find_paths(path=normalized_java_path, index=from_classes_index)
172-
if not match:
173-
return
174-
175-
for resource_id in match.resource_ids:
176-
from_resource = from_resources.get(id=resource_id)
177-
# compute the root of the packages on the source side
178-
from_source_root_parts = from_resource.path.strip("/").split("/")
179-
from_source_root = "/".join(
180-
from_source_root_parts[: -match.matched_path_length]
181-
)
182-
pipes.make_relation(
183-
from_resource=from_resource,
184-
to_resource=to_resource,
185-
map_type="java_to_class",
186-
extra_data={"from_source_root": f"{from_source_root}/"},
172+
for extension in jvm_lang.source_extensions:
173+
normalized_path = jvm_lang.get_normalized_path(
174+
path=to_resource.path, extension=extension
187175
)
176+
match = pathmap.find_paths(path=normalized_path, index=from_classes_index)
177+
if not match:
178+
return
179+
180+
for resource_id in match.resource_ids:
181+
from_resource = from_resources.get(id=resource_id)
182+
# compute the root of the packages on the source side
183+
from_source_root_parts = from_resource.path.strip("/").split("/")
184+
from_source_root = "/".join(
185+
from_source_root_parts[: -match.matched_path_length]
186+
)
187+
pipes.make_relation(
188+
from_resource=from_resource,
189+
to_resource=to_resource,
190+
map_type=jvm_lang.binary_map_type,
191+
extra_data={"from_source_root": f"{from_source_root}/"},
192+
)
188193

189194

190-
def map_java_to_class(project, logger=None):
195+
def map_jvm_to_class(project, jvm_lang: jvm.JvmLanguage, logger=None):
191196
"""
192-
Map to/ compiled Java .class(es) to from/ .java source using Java fully
193-
qualified paths and indexing from/ .java files.
197+
Map to/ compiled Jvm's binary files to from/ using Jvm language's fully
198+
qualified paths and indexing from/ Jvm lang's source files.
194199
"""
195200
project_files = project.codebaseresources.files().no_status()
196201
from_resources = project_files.from_codebase()
197202
to_resources = project_files.to_codebase().has_no_relation()
198203

199-
to_resources_dot_class = to_resources.filter(extension=".class")
200-
from_resources_dot_java = (
201-
from_resources.filter(extension=".java")
202-
# The "java_package" extra_data value is set during the `find_java_packages`,
204+
has_source_pkg_attr_name = {
205+
f"extra_data__{jvm_lang.source_package_attribute_name}__isnull": False
206+
}
207+
208+
to_resources_binary_extension = to_resources.filter(
209+
extension__in=jvm_lang.binary_extensions
210+
)
211+
from_resources_source_extension = (
212+
from_resources.filter(extension__in=jvm_lang.source_extensions)
213+
# The source_package_attribute_name extra_data value
214+
# is set during the `find_jvm_package`,
203215
# it is required to build the index.
204-
.filter(extra_data__java_package__isnull=False)
216+
.filter(**has_source_pkg_attr_name)
205217
)
206-
to_resource_count = to_resources_dot_class.count()
207-
from_resource_count = from_resources_dot_java.count()
218+
to_resource_count = to_resources_binary_extension.count()
219+
from_resource_count = from_resources_source_extension.count()
208220

209221
if not from_resource_count:
210-
logger("No .java resources to map.")
222+
logger(f"No {jvm_lang.source_extensions} resources to map.")
211223
return
212224

213225
if logger:
214226
logger(
215227
f"Mapping {to_resource_count:,d} .class resources to "
216-
f"{from_resource_count:,d} .java"
228+
f"{from_resource_count:,d} {jvm_lang.source_extensions}"
217229
)
218230

219-
# build an index using from-side Java fully qualified class file names
220-
# built from the "java_package" and file name
221-
indexables = get_indexable_qualified_java_paths(from_resources_dot_java)
231+
# build an index using from-side fully qualified class file names
232+
# built from the source_package_attribute_name and file name
233+
indexables = jvm_lang.get_indexable_qualified_paths(from_resources_source_extension)
222234

223235
# we do not index subpath since we want to match only fully qualified names
224236
from_classes_index = pathmap.build_index(indexables, with_subpaths=False)
225237

226-
resource_iterator = to_resources_dot_class.iterator(chunk_size=2000)
238+
resource_iterator = to_resources_binary_extension.iterator(chunk_size=2000)
227239
progress = LoopProgress(to_resource_count, logger)
228240

229241
for to_resource in progress.iter(resource_iterator):
230-
_map_java_to_class_resource(to_resource, from_resources, from_classes_index)
231-
232-
233-
def get_indexable_qualified_java_paths_from_values(resource_values):
234-
"""
235-
Yield tuples of (resource id, fully-qualified Java path) for indexable
236-
classes from a list of ``resource_data`` tuples of "from/" side of the
237-
project codebase.
238-
239-
These ``resource_data`` input tuples are in the form:
240-
(resource.id, resource.name, resource.extra_data)
241-
242-
And the output tuples look like this example::
243-
(123, "org/apache/commons/LoggerImpl.java")
244-
"""
245-
for resource_id, resource_name, resource_extra_data in resource_values:
246-
fully_qualified = jvm.get_fully_qualified_java_path(
247-
java_package=resource_extra_data.get("java_package"),
248-
filename=resource_name,
242+
_map_jvm_to_class_resource(
243+
to_resource=to_resource,
244+
from_resources=from_resources,
245+
from_classes_index=from_classes_index,
246+
jvm_lang=jvm_lang,
249247
)
250-
yield resource_id, fully_qualified
251248

252249

253-
def get_indexable_qualified_java_paths(from_resources_dot_java):
254-
"""
255-
Yield tuples of (resource id, fully-qualified Java class name) for indexable
256-
classes from the "from/" side of the project codebase using the
257-
"java_package" Resource.extra_data.
250+
def find_jvm_packages(project, jvm_lang: jvm.JvmLanguage, logger=None):
258251
"""
259-
resource_values = from_resources_dot_java.values_list("id", "name", "extra_data")
260-
return get_indexable_qualified_java_paths_from_values(resource_values)
261-
262-
263-
def find_java_packages(project, logger=None):
264-
"""
265-
Collect the Java packages of Java source files for a ``project``.
252+
Collect the JVM packages of source files for a ``project``.
266253
267254
Multiprocessing is enabled by default on this pipe, the number of processes
268255
can be controlled through the SCANCODEIO_PROCESSES setting.
269256
270257
Note: we use the same API as the ScanCode scans by design
271258
"""
272-
from_java_resources = (
273-
project.codebaseresources.files()
274-
.no_status()
275-
.from_codebase()
276-
.has_no_relation()
277-
.filter(extension=".java")
259+
resources = (
260+
project.codebaseresources.files().no_status().from_codebase().has_no_relation()
278261
)
279262

263+
from_jvm_resources = resources.filter(extension__in=jvm_lang.source_extensions)
264+
280265
if logger:
281266
logger(
282-
f"Finding Java package for {from_java_resources.count():,d} "
283-
".java resources."
267+
f"Finding {jvm_lang.name} packages for {from_jvm_resources.count():,d} "
268+
f"{jvm_lang.source_extensions} resources."
284269
)
285270

286271
scancode.scan_resources(
287-
resource_qs=from_java_resources,
288-
scan_func=scan_for_java_package,
289-
save_func=save_java_package_scan_results,
272+
resource_qs=from_jvm_resources,
273+
scan_func=jvm_lang.scan_for_source_package,
274+
save_func=save_jvm_package_scan_results,
290275
progress_logger=logger,
291276
)
292277

293278

294-
def scan_for_java_package(location, with_threading=True):
295-
"""
296-
Run a Java package scan on provided ``location``.
297-
298-
Return a dict of scan ``results`` and a list of ``errors``.
299-
"""
300-
scanners = [scancode.Scanner("java_package", jvm.get_java_package)]
301-
return scancode._scan_resource(location, scanners, with_threading=with_threading)
302-
303-
304-
def save_java_package_scan_results(codebase_resource, scan_results, scan_errors):
279+
def save_jvm_package_scan_results(codebase_resource, scan_results, scan_errors):
305280
"""
306-
Save the resource Java package scan results in the database as Resource.extra_data.
281+
Save the resource Jvm package scan results in the database as Resource.extra_data.
307282
Create project errors if any occurred during the scan.
308283
"""
309284
# The status is only updated in case of errors.
@@ -314,11 +289,14 @@ def save_java_package_scan_results(codebase_resource, scan_results, scan_errors)
314289
codebase_resource.update_extra_data(scan_results)
315290

316291

317-
def _map_jar_to_source_resource(jar_resource, to_resources, from_resources):
292+
def _map_jar_to_jvm_source_resource(
293+
jar_resource, to_resources, from_resources, jvm_lang: jvm.JvmLanguage
294+
):
318295
jar_extracted_path = get_extracted_path(jar_resource)
319296
jar_extracted_dot_class_files = list(
320297
to_resources.filter(
321-
extension=".class", path__startswith=jar_extracted_path
298+
extension__in=jvm_lang.binary_extensions,
299+
path__startswith=jar_extracted_path,
322300
).values("id", "status")
323301
)
324302

@@ -337,16 +315,16 @@ def _map_jar_to_source_resource(jar_resource, to_resources, from_resources):
337315
dot_class_file_ids = [
338316
dot_class_file.get("id") for dot_class_file in jar_extracted_dot_class_files
339317
]
340-
java_to_class_extra_data_list = CodebaseRelation.objects.filter(
341-
to_resource__in=dot_class_file_ids, map_type="java_to_class"
318+
jvm_binary_map_type_extra_data_list = CodebaseRelation.objects.filter(
319+
to_resource__in=dot_class_file_ids, map_type=jvm_lang.binary_map_type
342320
).values_list("extra_data", flat=True)
343321

344322
from_source_roots = [
345323
extra_data.get("from_source_root", "")
346-
for extra_data in java_to_class_extra_data_list
324+
for extra_data in jvm_binary_map_type_extra_data_list
347325
]
348326
if len(set(from_source_roots)) != 1:
349-
# Could not determine a common root directory for the java_to_class files
327+
# Could not determine a common root directory for the binary_map_type files
350328
return
351329

352330
common_source_root = from_source_roots[0].rstrip("/")
@@ -358,7 +336,7 @@ def _map_jar_to_source_resource(jar_resource, to_resources, from_resources):
358336
)
359337

360338

361-
def map_jar_to_source(project, logger=None):
339+
def map_jar_to_jvm_source(project, jvm_lang: jvm.JvmLanguage, logger=None):
362340
"""Map .jar files to their related source directory."""
363341
project_files = project.codebaseresources.files()
364342
# Include the directories to map on the common source
@@ -377,7 +355,9 @@ def map_jar_to_source(project, logger=None):
377355
progress = LoopProgress(to_jars_count, logger)
378356

379357
for jar_resource in progress.iter(resource_iterator):
380-
_map_jar_to_source_resource(jar_resource, to_resources, from_resources)
358+
_map_jar_to_jvm_source_resource(
359+
jar_resource, to_resources, from_resources, jvm_lang=jvm_lang
360+
)
381361

382362

383363
def _map_path_resource(

scanpipe/pipes/d2d_config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ class EcosystemConfig:
8686
matchable_package_extensions=[".jar", ".war"],
8787
matchable_resource_extensions=[".class"],
8888
),
89+
"Scala": EcosystemConfig(
90+
ecosystem_option="Scala",
91+
matchable_package_extensions=[".jar", ".war"],
92+
matchable_resource_extensions=[".class"],
93+
),
94+
"Kotlin": EcosystemConfig(
95+
ecosystem_option="Kotlin",
96+
matchable_package_extensions=[".jar", ".war"],
97+
matchable_resource_extensions=[".class"],
98+
),
8999
"JavaScript": EcosystemConfig(
90100
ecosystem_option="JavaScript",
91101
matchable_resource_extensions=[

0 commit comments

Comments
 (0)