Skip to content

Commit fb86667

Browse files
committed
Refine D2D pipeline for Scala and Kotlin
Signed-off-by: Tushar Goel <[email protected]>
1 parent 5e4aff9 commit fb86667

File tree

6 files changed

+304
-182
lines changed

6 files changed

+304
-182
lines changed

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from scanpipe.pipes import d2d_config
2828
from scanpipe.pipes import flag
2929
from scanpipe.pipes import input
30+
from scanpipe.pipes import jvm
3031
from scanpipe.pipes import matchcode
3132
from scanpipe.pipes import purldb
3233
from scanpipe.pipes import scancode
@@ -73,6 +74,10 @@ def steps(cls):
7374
cls.find_java_packages,
7475
cls.map_java_to_class,
7576
cls.map_jar_to_source,
77+
cls.find_scala_packages,
78+
cls.map_scala_to_class,
79+
cls.find_kotlin_packages,
80+
cls.map_kotlin_to_class,
7681
cls.map_javascript,
7782
cls.map_javascript_symbols,
7883
cls.map_javascript_strings,
@@ -168,17 +173,58 @@ def match_archives_to_purldb(self):
168173
@optional_step("Java")
169174
def find_java_packages(self):
170175
"""Find the java package of the .java source files."""
171-
d2d.find_java_packages(self.project, logger=self.log)
176+
d2d.find_jvm_packages(
177+
project=self.project, jvm_lang=jvm.JavaLanguage, logger=self.log
178+
)
172179

173180
@optional_step("Java")
174181
def map_java_to_class(self):
175182
"""Map a .class compiled file to its .java source."""
176-
d2d.map_java_to_class(project=self.project, logger=self.log)
183+
d2d.map_jvm_to_class(
184+
project=self.project, logger=self.log, jvm_lang=jvm.JavaLanguage
185+
)
177186

178187
@optional_step("Java")
179188
def map_jar_to_source(self):
180189
"""Map .jar files to their related source directory."""
181-
d2d.map_jar_to_source(project=self.project, logger=self.log)
190+
d2d.map_jar_to_jvm_source(
191+
project=self.project, logger=self.log, jvm_lang=jvm.JavaLanguage
192+
)
193+
194+
@optional_step("Scala")
195+
def find_scala_packages(self):
196+
"""Find the java package of the .scala source files."""
197+
d2d.find_jvm_packages(
198+
project=self.project, jvm_lang=jvm.ScalaLanguage, logger=self.log
199+
)
200+
201+
@optional_step("Scala")
202+
def map_scala_to_class(self):
203+
"""Map a .class compiled file to its .java source."""
204+
d2d.map_jvm_to_class(
205+
project=self.project, logger=self.log, jvm_lang=jvm.ScalaLanguage
206+
)
207+
208+
@optional_step("Scala")
209+
def map_jar_to_scala_source(self):
210+
"""Map .jar files to their related source directory."""
211+
d2d.map_jar_to_jvm_source(
212+
project=self.project, logger=self.log, jvm_lang=jvm.ScalaLanguage
213+
)
214+
215+
@optional_step("Kotlin")
216+
def find_kotlin_packages(self):
217+
"""Find the java package of the .java source files."""
218+
d2d.find_jvm_packages(
219+
project=self.project, jvm_lang=jvm.KotlinLanguage, logger=self.log
220+
)
221+
222+
@optional_step("Kotlin")
223+
def map_kotlin_to_class(self):
224+
"""Map a .class compiled file to its .java source."""
225+
d2d.map_jvm_to_class(
226+
project=self.project, logger=self.log, jvm_lang=jvm.KotlinLanguage
227+
)
182228

183229
@optional_step("JavaScript")
184230
def map_javascript(self):

scanpipe/pipes/d2d.py

Lines changed: 62 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -161,33 +161,38 @@ def map_checksum(project, checksum_field, logger=None):
161161
_map_checksum_resource(to_resource, from_resources, checksum_field)
162162

163163

164-
def _map_java_to_class_resource(to_resource, from_resources, from_classes_index):
164+
def _map_jvm_to_class_resource(
165+
to_resource, from_resources, from_classes_index, jvm_lang: jvm.JvmLanguage
166+
):
165167
"""
166168
Map the ``to_resource`` .class file Resource with a Resource in
167169
``from_resources`` .java files, using the ``from_classes_index`` index of
168170
from/ fully qualified Java class names.
169171
"""
170-
normalized_java_path = jvm.get_normalized_java_path(to_resource.path)
171-
match = pathmap.find_paths(path=normalized_java_path, index=from_classes_index)
172-
if not match:
173-
return
174-
175-
for resource_id in match.resource_ids:
176-
from_resource = from_resources.get(id=resource_id)
177-
# compute the root of the packages on the source side
178-
from_source_root_parts = from_resource.path.strip("/").split("/")
179-
from_source_root = "/".join(
180-
from_source_root_parts[: -match.matched_path_length]
181-
)
182-
pipes.make_relation(
183-
from_resource=from_resource,
184-
to_resource=to_resource,
185-
map_type="java_to_class",
186-
extra_data={"from_source_root": f"{from_source_root}/"},
172+
for extension in jvm_lang.source_extensions:
173+
normalized_path = jvm_lang.get_normalized_path(
174+
path=to_resource.path, extension=extension
187175
)
176+
match = pathmap.find_paths(path=normalized_path, index=from_classes_index)
177+
if not match:
178+
return
179+
180+
for resource_id in match.resource_ids:
181+
from_resource = from_resources.get(id=resource_id)
182+
# compute the root of the packages on the source side
183+
from_source_root_parts = from_resource.path.strip("/").split("/")
184+
from_source_root = "/".join(
185+
from_source_root_parts[: -match.matched_path_length]
186+
)
187+
pipes.make_relation(
188+
from_resource=from_resource,
189+
to_resource=to_resource,
190+
map_type=jvm_lang.binary_map_type,
191+
extra_data={"from_source_root": f"{from_source_root}/"},
192+
)
188193

189194

190-
def map_java_to_class(project, logger=None):
195+
def map_jvm_to_class(project, jvm_lang: jvm.JvmLanguage, logger=None):
191196
"""
192197
Map to/ compiled Java .class(es) to from/ .java source using Java fully
193198
qualified paths and indexing from/ .java files.
@@ -196,112 +201,76 @@ def map_java_to_class(project, logger=None):
196201
from_resources = project_files.from_codebase()
197202
to_resources = project_files.to_codebase().has_no_relation()
198203

199-
to_resources_dot_class = to_resources.filter(extension=".class")
200-
from_resources_dot_java = (
201-
from_resources.filter(extension=".java")
204+
filter = {f"extra_data__{jvm_lang.source_package_attribute_name}__isnull": False}
205+
206+
to_resources_binary_extension = to_resources.filter(
207+
extension__in=jvm_lang.binary_extensions
208+
)
209+
from_resources_source_extension = (
210+
from_resources.filter(extension__in=jvm_lang.source_extensions)
202211
# The "java_package" extra_data value is set during the `find_java_packages`,
203212
# it is required to build the index.
204-
.filter(extra_data__java_package__isnull=False)
213+
.filter(**filter)
205214
)
206-
to_resource_count = to_resources_dot_class.count()
207-
from_resource_count = from_resources_dot_java.count()
215+
to_resource_count = to_resources_binary_extension.count()
216+
from_resource_count = from_resources_source_extension.count()
208217

209218
if not from_resource_count:
210-
logger("No .java resources to map.")
219+
logger(f"No {jvm_lang.source_extensions} resources to map.")
211220
return
212221

213222
if logger:
214223
logger(
215224
f"Mapping {to_resource_count:,d} .class resources to "
216-
f"{from_resource_count:,d} .java"
225+
f"{from_resource_count:,d} {jvm_lang.source_extensions}"
217226
)
218227

219228
# build an index using from-side Java fully qualified class file names
220229
# built from the "java_package" and file name
221-
indexables = get_indexable_qualified_java_paths(from_resources_dot_java)
230+
indexables = jvm_lang.get_indexable_qualified_paths(from_resources_source_extension)
222231

223232
# we do not index subpath since we want to match only fully qualified names
224233
from_classes_index = pathmap.build_index(indexables, with_subpaths=False)
225234

226-
resource_iterator = to_resources_dot_class.iterator(chunk_size=2000)
235+
resource_iterator = to_resources_binary_extension.iterator(chunk_size=2000)
227236
progress = LoopProgress(to_resource_count, logger)
228237

229238
for to_resource in progress.iter(resource_iterator):
230-
_map_java_to_class_resource(to_resource, from_resources, from_classes_index)
231-
232-
233-
def get_indexable_qualified_java_paths_from_values(resource_values):
234-
"""
235-
Yield tuples of (resource id, fully-qualified Java path) for indexable
236-
classes from a list of ``resource_data`` tuples of "from/" side of the
237-
project codebase.
238-
239-
These ``resource_data`` input tuples are in the form:
240-
(resource.id, resource.name, resource.extra_data)
241-
242-
And the output tuples look like this example::
243-
(123, "org/apache/commons/LoggerImpl.java")
244-
"""
245-
for resource_id, resource_name, resource_extra_data in resource_values:
246-
fully_qualified = jvm.get_fully_qualified_java_path(
247-
java_package=resource_extra_data.get("java_package"),
248-
filename=resource_name,
239+
_map_jvm_to_class_resource(
240+
to_resource, from_resources, from_classes_index, jvm_lang
249241
)
250-
yield resource_id, fully_qualified
251-
252-
253-
def get_indexable_qualified_java_paths(from_resources_dot_java):
254-
"""
255-
Yield tuples of (resource id, fully-qualified Java class name) for indexable
256-
classes from the "from/" side of the project codebase using the
257-
"java_package" Resource.extra_data.
258-
"""
259-
resource_values = from_resources_dot_java.values_list("id", "name", "extra_data")
260-
return get_indexable_qualified_java_paths_from_values(resource_values)
261242

262243

263-
def find_java_packages(project, logger=None):
244+
def find_jvm_packages(project, jvm_lang: jvm.JvmLanguage, logger=None):
264245
"""
265-
Collect the Java packages of Java source files for a ``project``.
246+
Collect the JVM packages of Java source files for a ``project``.
266247
267248
Multiprocessing is enabled by default on this pipe, the number of processes
268249
can be controlled through the SCANCODEIO_PROCESSES setting.
269250
270251
Note: we use the same API as the ScanCode scans by design
271252
"""
272-
from_java_resources = (
273-
project.codebaseresources.files()
274-
.no_status()
275-
.from_codebase()
276-
.has_no_relation()
277-
.filter(extension=".java")
253+
resources = (
254+
project.codebaseresources.files().no_status().from_codebase().has_no_relation()
278255
)
279256

257+
from_jvm_resources = resources.filter(extension__in=jvm_lang.source_extensions)
258+
280259
if logger:
281260
logger(
282-
f"Finding Java package for {from_java_resources.count():,d} "
283-
".java resources."
261+
f"Finding {jvm_lang.name} packages for {from_jvm_resources.count():,d} "
262+
f"{jvm_lang.source_extensions} resources."
284263
)
285264

286265
scancode.scan_resources(
287-
resource_qs=from_java_resources,
288-
scan_func=scan_for_java_package,
289-
save_func=save_java_package_scan_results,
266+
resource_qs=from_jvm_resources,
267+
scan_func=jvm_lang.scan_for_source_package,
268+
save_func=save_jvm_package_scan_results,
290269
progress_logger=logger,
291270
)
292271

293272

294-
def scan_for_java_package(location, with_threading=True):
295-
"""
296-
Run a Java package scan on provided ``location``.
297-
298-
Return a dict of scan ``results`` and a list of ``errors``.
299-
"""
300-
scanners = [scancode.Scanner("java_package", jvm.get_java_package)]
301-
return scancode._scan_resource(location, scanners, with_threading=with_threading)
302-
303-
304-
def save_java_package_scan_results(codebase_resource, scan_results, scan_errors):
273+
def save_jvm_package_scan_results(codebase_resource, scan_results, scan_errors):
305274
"""
306275
Save the resource Java package scan results in the database as Resource.extra_data.
307276
Create project errors if any occurred during the scan.
@@ -314,11 +283,14 @@ def save_java_package_scan_results(codebase_resource, scan_results, scan_errors)
314283
codebase_resource.update_extra_data(scan_results)
315284

316285

317-
def _map_jar_to_source_resource(jar_resource, to_resources, from_resources):
286+
def _map_jar_to_jvm_source_resource(
287+
jar_resource, to_resources, from_resources, jvm_lang: jvm.JvmLanguage
288+
):
318289
jar_extracted_path = get_extracted_path(jar_resource)
319290
jar_extracted_dot_class_files = list(
320291
to_resources.filter(
321-
extension=".class", path__startswith=jar_extracted_path
292+
extension__in=jvm_lang.binary_extensions,
293+
path__startswith=jar_extracted_path,
322294
).values("id", "status")
323295
)
324296

@@ -338,7 +310,7 @@ def _map_jar_to_source_resource(jar_resource, to_resources, from_resources):
338310
dot_class_file.get("id") for dot_class_file in jar_extracted_dot_class_files
339311
]
340312
java_to_class_extra_data_list = CodebaseRelation.objects.filter(
341-
to_resource__in=dot_class_file_ids, map_type="java_to_class"
313+
to_resource__in=dot_class_file_ids, map_type=jvm_lang.binary_map_type
342314
).values_list("extra_data", flat=True)
343315

344316
from_source_roots = [
@@ -358,7 +330,7 @@ def _map_jar_to_source_resource(jar_resource, to_resources, from_resources):
358330
)
359331

360332

361-
def map_jar_to_source(project, logger=None):
333+
def map_jar_to_jvm_source(project, jvm_lang: jvm.JvmLanguage, logger=None):
362334
"""Map .jar files to their related source directory."""
363335
project_files = project.codebaseresources.files()
364336
# Include the directories to map on the common source
@@ -377,7 +349,9 @@ def map_jar_to_source(project, logger=None):
377349
progress = LoopProgress(to_jars_count, logger)
378350

379351
for jar_resource in progress.iter(resource_iterator):
380-
_map_jar_to_source_resource(jar_resource, to_resources, from_resources)
352+
_map_jar_to_jvm_source_resource(
353+
jar_resource, to_resources, from_resources, jvm_lang=jvm_lang
354+
)
381355

382356

383357
def _map_path_resource(

scanpipe/pipes/d2d_config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ class EcosystemConfig:
8686
matchable_package_extensions=[".jar", ".war"],
8787
matchable_resource_extensions=[".class"],
8888
),
89+
"Scala": EcosystemConfig(
90+
ecosystem_option="Scala",
91+
matchable_package_extensions=[".jar", ".war"],
92+
matchable_resource_extensions=[".class"],
93+
),
94+
"Kotlin": EcosystemConfig(
95+
ecosystem_option="Kotlin",
96+
matchable_package_extensions=[".jar", ".war"],
97+
matchable_resource_extensions=[".class"],
98+
),
8999
"JavaScript": EcosystemConfig(
90100
ecosystem_option="JavaScript",
91101
matchable_resource_extensions=[

0 commit comments

Comments
 (0)