diff --git a/scanpipe/pipes/jvm.py b/scanpipe/pipes/jvm.py index b071d9dcd4..675eace17a 100644 --- a/scanpipe/pipes/jvm.py +++ b/scanpipe/pipes/jvm.py @@ -41,6 +41,8 @@ class JvmLanguage: binary_extensions: tuple = (".class",) # Like java_package, kotlin_package, scala_package, used as an attribute in resource source_package_attribute_name: str = None + # Like java_classes, stores the class names defined in the source file + source_classes_attribute_name: str = None # A regex pattern to extract a package from a source file package_regex: Pattern = None # Type of relation for a binary file to its source file @@ -115,14 +117,38 @@ def get_indexable_qualified_paths_from_values(cls, resource_values): And the output tuples look like this example:: (123, "org/apache/commons/LoggerImpl.java") + + If the source file contains class names that differ from the filename + (e.g., a file named "Foo.java" containing "class Bar"), additional + entries are yielded for each class name. """ for resource_id, resource_name, resource_extra_data in resource_values: + jvm_package = resource_extra_data.get(cls.source_package_attribute_name) + # Yield the original filename-based path fully_qualified = get_fully_qualified_path( - jvm_package=resource_extra_data.get(cls.source_package_attribute_name), + jvm_package=jvm_package, filename=resource_name, ) yield resource_id, fully_qualified + # Also yield paths for any class names that differ from the filename + if cls.source_classes_attribute_name: + class_names = resource_extra_data.get( + cls.source_classes_attribute_name, [] + ) + # Get the base name without extension to compare + base_name = Path(resource_name).stem + extension = Path(resource_name).suffix + for class_name in class_names: + # Only yield if class name differs from filename + if class_name != base_name: + class_filename = f"{class_name}{extension}" + class_path = get_fully_qualified_path( + jvm_package=jvm_package, + filename=class_filename, + ) + yield resource_id, class_path + @classmethod def get_normalized_path(cls, path, extension): """ @@ -180,14 +206,55 @@ def find_expression(lines, regex): return value +def find_all_expressions(lines, regex, max_lines=500): + """Return all values found using ``regex`` in the first ``max_lines`` lines.""" + results = [] + for ln, line in enumerate(lines): + if ln > max_lines: + break + for value in regex.findall(line): + if value and value not in results: + results.append(value) + return results + + class JavaLanguage(JvmLanguage): name = "java" source_extensions = (".java",) binary_extensions = (".class",) source_package_attribute_name = "java_package" + source_classes_attribute_name = "java_classes" package_regex = re.compile(r"^\s*package\s+([\w\.]+)\s*;") + # Regex to match class/interface/enum declarations in Java + # Matches patterns like: "class Foo", "public class Foo", "interface Bar", etc. + class_name_regex = re.compile( + r"(?:^|[;\s{}])\s*" # Start of line or after ; { } or whitespace + r"(?:public\s+|private\s+|protected\s+|abstract\s+|final\s+|static\s+)*" + r"(?:class|interface|enum)\s+" + r"(\w+)" # Capture the class/interface/enum name + ) binary_map_type = "java_to_class" + @classmethod + def find_source_package(cls, lines): + """Find the package and class names from Java source lines.""" + result = {} + lines_list = list(lines) + + # Find package + package = find_expression(lines=iter(lines_list), regex=cls.package_regex) + if package: + result[cls.source_package_attribute_name] = package + + # Find all class/interface/enum names + class_names = find_all_expressions( + lines=iter(lines_list), regex=cls.class_name_regex + ) + if class_names: + result[cls.source_classes_attribute_name] = class_names + + return result if result else None + class ScalaLanguage(JvmLanguage): name = "scala" diff --git a/scanpipe/tests/pipes/test_d2d.py b/scanpipe/tests/pipes/test_d2d.py index 4d8433498e..81281e4d53 100644 --- a/scanpipe/tests/pipes/test_d2d.py +++ b/scanpipe/tests/pipes/test_d2d.py @@ -498,6 +498,34 @@ def test_scanpipe_pipes_d2d_map_java_to_class_no_java(self): expected = "No ('.java',) resources to map." self.assertIn(expected, buffer.getvalue()) + def test_scanpipe_pipes_d2d_map_java_to_class_different_class_name(self): + """Test D2D mapping when class name differs from source filename (#1993).""" + # Source file named DelombokTask.java but contains class Tasks + from1 = make_resource_file( + self.project1, + path="from/lombok/delombok/ant/DelombokTask.java", + extra_data={ + "java_package": "lombok.delombok.ant", + "java_classes": ["Tasks", "Delombok"], + }, + ) + # The .class file is named after the class, not the source file + to1 = make_resource_file( + self.project1, + path="to/lombok/delombok/ant/Tasks.class", + ) + + buffer = io.StringIO() + d2d.map_jvm_to_class( + self.project1, logger=buffer.write, jvm_lang=jvm.JavaLanguage + ) + + # Should find the mapping via class name + relation = self.project1.codebaserelations.get() + self.assertEqual(from1, relation.from_resource) + self.assertEqual(to1, relation.to_resource) + self.assertEqual("java_to_class", relation.map_type) + def test_scanpipe_pipes_d2d_java_ignore_pattern(self): make_resource_file(self.project1, path="to/module-info.class") make_resource_file(self.project1, path="to/META-INF/MANIFEST.MF") diff --git a/scanpipe/tests/pipes/test_jvm.py b/scanpipe/tests/pipes/test_jvm.py index a832fe3a1d..1c6df2b8f7 100644 --- a/scanpipe/tests/pipes/test_jvm.py +++ b/scanpipe/tests/pipes/test_jvm.py @@ -50,6 +50,48 @@ java_package_too_far_down = ("\n" * 501) + "package org.apache.logging.log4j.core;" +# Java code where class name differs from filename (like lombok's DelombokTask.java) +java_code_different_class_name = """ +package lombok.delombok.ant; + +import org.apache.tools.ant.Task; + +/** + * Ant tasks for delombok. + * This file is named DelombokTask.java but contains class Tasks. + */ +class Tasks { + public static class Delombok extends Task { + public void execute() {} + } + + public static class Format extends Task { + public void execute() {} + } +} +""" + +# Java code with multiple classes including interface and enum +java_code_multiple_types = """ +package com.example; + +public class MainClass { + // Main implementation +} + +interface SomeInterface { + void doSomething(); +} + +enum Status { + ACTIVE, INACTIVE +} + +abstract class AbstractBase { + public abstract void process(); +} +""" + class ScanPipeJvmTest(TestCase): data = Path(__file__).parent.parent / "data" @@ -114,6 +156,53 @@ def test_scanpipe_pipes_jvm_get_fully_qualified_java_path(self): fqjp = jvm.get_fully_qualified_path("org.common", "Bar.java") self.assertEqual("org/common/Bar.java", fqjp) + def test_scanpipe_pipes_jvm_find_java_package_with_different_class_name(self): + """Test that find_source_package extracts class names differing from filename.""" + result = jvm.JavaLanguage.find_source_package( + java_code_different_class_name.splitlines() + ) + self.assertEqual("lombok.delombok.ant", result["java_package"]) + # The class name "Tasks" differs from what would be the filename + self.assertIn("Tasks", result["java_classes"]) + # Also check for inner classes + self.assertIn("Delombok", result["java_classes"]) + self.assertIn("Format", result["java_classes"]) + + def test_scanpipe_pipes_jvm_find_java_package_with_multiple_types(self): + """Test that find_source_package extracts all class/interface/enum names.""" + result = jvm.JavaLanguage.find_source_package( + java_code_multiple_types.splitlines() + ) + self.assertEqual("com.example", result["java_package"]) + classes = result["java_classes"] + self.assertIn("MainClass", classes) + self.assertIn("SomeInterface", classes) + self.assertIn("Status", classes) + self.assertIn("AbstractBase", classes) + + def test_scanpipe_pipes_jvm_get_indexable_qualified_paths_with_class_names(self): + """Test get_indexable_qualified_paths_from_values yields class name paths.""" + resource_values = [ + ( + 1, + "DelombokTask.java", + { + "java_package": "lombok.delombok.ant", + "java_classes": ["Tasks", "Delombok"], + }, + ), + ] + paths = list( + jvm.JavaLanguage.get_indexable_qualified_paths_from_values(resource_values) + ) + # Should yield: filename path, and paths for each class name that differs + self.assertEqual(3, len(paths)) + # First is the original filename-based path + self.assertEqual((1, "lombok/delombok/ant/DelombokTask.java"), paths[0]) + # Then paths for class names that differ from filename + self.assertEqual((1, "lombok/delombok/ant/Tasks.java"), paths[1]) + self.assertEqual((1, "lombok/delombok/ant/Delombok.java"), paths[2]) + class ScanPipeJvmScalaTest(TestCase): data = Path(__file__).parent.parent / "data"