@@ -161,33 +161,38 @@ def map_checksum(project, checksum_field, logger=None):
161161 _map_checksum_resource (to_resource , from_resources , checksum_field )
162162
163163
164- def _map_java_to_class_resource (to_resource , from_resources , from_classes_index ):
164+ def _map_jvm_to_class_resource (
165+ to_resource , from_resources , from_classes_index , jvm_lang : jvm .JvmLanguage
166+ ):
165167 """
166168 Map the ``to_resource`` .class file Resource with a Resource in
167169 ``from_resources`` .java files, using the ``from_classes_index`` index of
168170 from/ fully qualified Java class names.
169171 """
170- normalized_java_path = jvm .get_normalized_java_path (to_resource .path )
171- match = pathmap .find_paths (path = normalized_java_path , index = from_classes_index )
172- if not match :
173- return
174-
175- for resource_id in match .resource_ids :
176- from_resource = from_resources .get (id = resource_id )
177- # compute the root of the packages on the source side
178- from_source_root_parts = from_resource .path .strip ("/" ).split ("/" )
179- from_source_root = "/" .join (
180- from_source_root_parts [: - match .matched_path_length ]
181- )
182- pipes .make_relation (
183- from_resource = from_resource ,
184- to_resource = to_resource ,
185- map_type = "java_to_class" ,
186- extra_data = {"from_source_root" : f"{ from_source_root } /" },
172+ for extension in jvm_lang .source_extensions :
173+ normalized_path = jvm_lang .get_normalized_path (
174+ path = to_resource .path , extension = extension
187175 )
176+ match = pathmap .find_paths (path = normalized_path , index = from_classes_index )
177+ if not match :
178+ return
179+
180+ for resource_id in match .resource_ids :
181+ from_resource = from_resources .get (id = resource_id )
182+ # compute the root of the packages on the source side
183+ from_source_root_parts = from_resource .path .strip ("/" ).split ("/" )
184+ from_source_root = "/" .join (
185+ from_source_root_parts [: - match .matched_path_length ]
186+ )
187+ pipes .make_relation (
188+ from_resource = from_resource ,
189+ to_resource = to_resource ,
190+ map_type = jvm_lang .binary_map_type ,
191+ extra_data = {"from_source_root" : f"{ from_source_root } /" },
192+ )
188193
189194
190- def map_java_to_class (project , logger = None ):
195+ def map_jvm_to_class (project , jvm_lang : jvm . JvmLanguage , logger = None ):
191196 """
192197 Map to/ compiled Java .class(es) to from/ .java source using Java fully
193198 qualified paths and indexing from/ .java files.
@@ -196,112 +201,76 @@ def map_java_to_class(project, logger=None):
196201 from_resources = project_files .from_codebase ()
197202 to_resources = project_files .to_codebase ().has_no_relation ()
198203
199- to_resources_dot_class = to_resources .filter (extension = ".class" )
200- from_resources_dot_java = (
201- from_resources .filter (extension = ".java" )
204+ filter = {f"extra_data__{ jvm_lang .source_package_attribute_name } __isnull" : False }
205+
206+ to_resources_binary_extension = to_resources .filter (
207+ extension__in = jvm_lang .binary_extensions
208+ )
209+ from_resources_source_extension = (
210+ from_resources .filter (extension__in = jvm_lang .source_extensions )
202211 # The "java_package" extra_data value is set during the `find_java_packages`,
203212 # it is required to build the index.
204- .filter (extra_data__java_package__isnull = False )
213+ .filter (** filter )
205214 )
206- to_resource_count = to_resources_dot_class .count ()
207- from_resource_count = from_resources_dot_java .count ()
215+ to_resource_count = to_resources_binary_extension .count ()
216+ from_resource_count = from_resources_source_extension .count ()
208217
209218 if not from_resource_count :
210- logger ("No .java resources to map." )
219+ logger (f "No { jvm_lang . source_extensions } resources to map." )
211220 return
212221
213222 if logger :
214223 logger (
215224 f"Mapping { to_resource_count :,d} .class resources to "
216- f"{ from_resource_count :,d} .java "
225+ f"{ from_resource_count :,d} { jvm_lang . source_extensions } "
217226 )
218227
219228 # build an index using from-side Java fully qualified class file names
220229 # built from the "java_package" and file name
221- indexables = get_indexable_qualified_java_paths ( from_resources_dot_java )
230+ indexables = jvm_lang . get_indexable_qualified_paths ( from_resources_source_extension )
222231
223232 # we do not index subpath since we want to match only fully qualified names
224233 from_classes_index = pathmap .build_index (indexables , with_subpaths = False )
225234
226- resource_iterator = to_resources_dot_class .iterator (chunk_size = 2000 )
235+ resource_iterator = to_resources_binary_extension .iterator (chunk_size = 2000 )
227236 progress = LoopProgress (to_resource_count , logger )
228237
229238 for to_resource in progress .iter (resource_iterator ):
230- _map_java_to_class_resource (to_resource , from_resources , from_classes_index )
231-
232-
233- def get_indexable_qualified_java_paths_from_values (resource_values ):
234- """
235- Yield tuples of (resource id, fully-qualified Java path) for indexable
236- classes from a list of ``resource_data`` tuples of "from/" side of the
237- project codebase.
238-
239- These ``resource_data`` input tuples are in the form:
240- (resource.id, resource.name, resource.extra_data)
241-
242- And the output tuples look like this example::
243- (123, "org/apache/commons/LoggerImpl.java")
244- """
245- for resource_id , resource_name , resource_extra_data in resource_values :
246- fully_qualified = jvm .get_fully_qualified_java_path (
247- java_package = resource_extra_data .get ("java_package" ),
248- filename = resource_name ,
239+ _map_jvm_to_class_resource (
240+ to_resource , from_resources , from_classes_index , jvm_lang
249241 )
250- yield resource_id , fully_qualified
251-
252-
253- def get_indexable_qualified_java_paths (from_resources_dot_java ):
254- """
255- Yield tuples of (resource id, fully-qualified Java class name) for indexable
256- classes from the "from/" side of the project codebase using the
257- "java_package" Resource.extra_data.
258- """
259- resource_values = from_resources_dot_java .values_list ("id" , "name" , "extra_data" )
260- return get_indexable_qualified_java_paths_from_values (resource_values )
261242
262243
263- def find_java_packages (project , logger = None ):
244+ def find_jvm_packages (project , jvm_lang : jvm . JvmLanguage , logger = None ):
264245 """
265- Collect the Java packages of Java source files for a ``project``.
246+ Collect the JVM packages of Java source files for a ``project``.
266247
267248 Multiprocessing is enabled by default on this pipe, the number of processes
268249 can be controlled through the SCANCODEIO_PROCESSES setting.
269250
270251 Note: we use the same API as the ScanCode scans by design
271252 """
272- from_java_resources = (
273- project .codebaseresources .files ()
274- .no_status ()
275- .from_codebase ()
276- .has_no_relation ()
277- .filter (extension = ".java" )
253+ resources = (
254+ project .codebaseresources .files ().no_status ().from_codebase ().has_no_relation ()
278255 )
279256
257+ from_jvm_resources = resources .filter (extension__in = jvm_lang .source_extensions )
258+
280259 if logger :
281260 logger (
282- f"Finding Java package for { from_java_resources .count ():,d} "
283- ".java resources."
261+ f"Finding { jvm_lang . name } packages for { from_jvm_resources .count ():,d} "
262+ f" { jvm_lang . source_extensions } resources."
284263 )
285264
286265 scancode .scan_resources (
287- resource_qs = from_java_resources ,
288- scan_func = scan_for_java_package ,
289- save_func = save_java_package_scan_results ,
266+ resource_qs = from_jvm_resources ,
267+ scan_func = jvm_lang . scan_for_source_package ,
268+ save_func = save_jvm_package_scan_results ,
290269 progress_logger = logger ,
291270 )
292271
293272
294- def scan_for_java_package (location , with_threading = True ):
295- """
296- Run a Java package scan on provided ``location``.
297-
298- Return a dict of scan ``results`` and a list of ``errors``.
299- """
300- scanners = [scancode .Scanner ("java_package" , jvm .get_java_package )]
301- return scancode ._scan_resource (location , scanners , with_threading = with_threading )
302-
303-
304- def save_java_package_scan_results (codebase_resource , scan_results , scan_errors ):
273+ def save_jvm_package_scan_results (codebase_resource , scan_results , scan_errors ):
305274 """
306275 Save the resource Java package scan results in the database as Resource.extra_data.
307276 Create project errors if any occurred during the scan.
@@ -314,11 +283,14 @@ def save_java_package_scan_results(codebase_resource, scan_results, scan_errors)
314283 codebase_resource .update_extra_data (scan_results )
315284
316285
317- def _map_jar_to_source_resource (jar_resource , to_resources , from_resources ):
286+ def _map_jar_to_jvm_source_resource (
287+ jar_resource , to_resources , from_resources , jvm_lang : jvm .JvmLanguage
288+ ):
318289 jar_extracted_path = get_extracted_path (jar_resource )
319290 jar_extracted_dot_class_files = list (
320291 to_resources .filter (
321- extension = ".class" , path__startswith = jar_extracted_path
292+ extension__in = jvm_lang .binary_extensions ,
293+ path__startswith = jar_extracted_path ,
322294 ).values ("id" , "status" )
323295 )
324296
@@ -338,7 +310,7 @@ def _map_jar_to_source_resource(jar_resource, to_resources, from_resources):
338310 dot_class_file .get ("id" ) for dot_class_file in jar_extracted_dot_class_files
339311 ]
340312 java_to_class_extra_data_list = CodebaseRelation .objects .filter (
341- to_resource__in = dot_class_file_ids , map_type = "java_to_class"
313+ to_resource__in = dot_class_file_ids , map_type = jvm_lang . binary_map_type
342314 ).values_list ("extra_data" , flat = True )
343315
344316 from_source_roots = [
@@ -358,7 +330,7 @@ def _map_jar_to_source_resource(jar_resource, to_resources, from_resources):
358330 )
359331
360332
361- def map_jar_to_source (project , logger = None ):
333+ def map_jar_to_jvm_source (project , jvm_lang : jvm . JvmLanguage , logger = None ):
362334 """Map .jar files to their related source directory."""
363335 project_files = project .codebaseresources .files ()
364336 # Include the directories to map on the common source
@@ -377,7 +349,9 @@ def map_jar_to_source(project, logger=None):
377349 progress = LoopProgress (to_jars_count , logger )
378350
379351 for jar_resource in progress .iter (resource_iterator ):
380- _map_jar_to_source_resource (jar_resource , to_resources , from_resources )
352+ _map_jar_to_jvm_source_resource (
353+ jar_resource , to_resources , from_resources , jvm_lang = jvm_lang
354+ )
381355
382356
383357def _map_path_resource (
0 commit comments