Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 68 additions & 1 deletion scanpipe/pipes/jvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class JvmLanguage:
binary_extensions: tuple = (".class",)
# Like java_package, kotlin_package, scala_package, used as an attribute in resource
source_package_attribute_name: str = None
# Like java_classes, stores the class names defined in the source file
source_classes_attribute_name: str = None
# A regex pattern to extract a package from a source file
package_regex: Pattern = None
# Type of relation for a binary file to its source file
Expand Down Expand Up @@ -115,14 +117,38 @@ def get_indexable_qualified_paths_from_values(cls, resource_values):

And the output tuples look like this example::
(123, "org/apache/commons/LoggerImpl.java")

If the source file contains class names that differ from the filename
(e.g., a file named "Foo.java" containing "class Bar"), additional
entries are yielded for each class name.
"""
for resource_id, resource_name, resource_extra_data in resource_values:
jvm_package = resource_extra_data.get(cls.source_package_attribute_name)
# Yield the original filename-based path
fully_qualified = get_fully_qualified_path(
jvm_package=resource_extra_data.get(cls.source_package_attribute_name),
jvm_package=jvm_package,
filename=resource_name,
)
yield resource_id, fully_qualified

# Also yield paths for any class names that differ from the filename
if cls.source_classes_attribute_name:
class_names = resource_extra_data.get(
cls.source_classes_attribute_name, []
)
# Get the base name without extension to compare
base_name = Path(resource_name).stem
extension = Path(resource_name).suffix
for class_name in class_names:
# Only yield if class name differs from filename
if class_name != base_name:
class_filename = f"{class_name}{extension}"
class_path = get_fully_qualified_path(
jvm_package=jvm_package,
filename=class_filename,
)
yield resource_id, class_path

@classmethod
def get_normalized_path(cls, path, extension):
"""
Expand Down Expand Up @@ -180,14 +206,55 @@ def find_expression(lines, regex):
return value


def find_all_expressions(lines, regex, max_lines=500):
"""Return all values found using ``regex`` in the first ``max_lines`` lines."""
results = []
for ln, line in enumerate(lines):
if ln > max_lines:
break
for value in regex.findall(line):
if value and value not in results:
results.append(value)
return results


class JavaLanguage(JvmLanguage):
name = "java"
source_extensions = (".java",)
binary_extensions = (".class",)
source_package_attribute_name = "java_package"
source_classes_attribute_name = "java_classes"
package_regex = re.compile(r"^\s*package\s+([\w\.]+)\s*;")
# Regex to match class/interface/enum declarations in Java
# Matches patterns like: "class Foo", "public class Foo", "interface Bar", etc.
class_name_regex = re.compile(
r"(?:^|[;\s{}])\s*" # Start of line or after ; { } or whitespace
r"(?:public\s+|private\s+|protected\s+|abstract\s+|final\s+|static\s+)*"
r"(?:class|interface|enum)\s+"
r"(\w+)" # Capture the class/interface/enum name
)
binary_map_type = "java_to_class"

@classmethod
def find_source_package(cls, lines):
"""Find the package and class names from Java source lines."""
result = {}
lines_list = list(lines)

# Find package
package = find_expression(lines=iter(lines_list), regex=cls.package_regex)
if package:
result[cls.source_package_attribute_name] = package

# Find all class/interface/enum names
class_names = find_all_expressions(
lines=iter(lines_list), regex=cls.class_name_regex
)
if class_names:
result[cls.source_classes_attribute_name] = class_names

return result if result else None


class ScalaLanguage(JvmLanguage):
name = "scala"
Expand Down
28 changes: 28 additions & 0 deletions scanpipe/tests/pipes/test_d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,34 @@ def test_scanpipe_pipes_d2d_map_java_to_class_no_java(self):
expected = "No ('.java',) resources to map."
self.assertIn(expected, buffer.getvalue())

def test_scanpipe_pipes_d2d_map_java_to_class_different_class_name(self):
"""Test D2D mapping when class name differs from source filename (#1993)."""
# Source file named DelombokTask.java but contains class Tasks
from1 = make_resource_file(
self.project1,
path="from/lombok/delombok/ant/DelombokTask.java",
extra_data={
"java_package": "lombok.delombok.ant",
"java_classes": ["Tasks", "Delombok"],
},
)
# The .class file is named after the class, not the source file
to1 = make_resource_file(
self.project1,
path="to/lombok/delombok/ant/Tasks.class",
)

buffer = io.StringIO()
d2d.map_jvm_to_class(
self.project1, logger=buffer.write, jvm_lang=jvm.JavaLanguage
)

# Should find the mapping via class name
relation = self.project1.codebaserelations.get()
self.assertEqual(from1, relation.from_resource)
self.assertEqual(to1, relation.to_resource)
self.assertEqual("java_to_class", relation.map_type)

def test_scanpipe_pipes_d2d_java_ignore_pattern(self):
make_resource_file(self.project1, path="to/module-info.class")
make_resource_file(self.project1, path="to/META-INF/MANIFEST.MF")
Expand Down
89 changes: 89 additions & 0 deletions scanpipe/tests/pipes/test_jvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,48 @@

java_package_too_far_down = ("\n" * 501) + "package org.apache.logging.log4j.core;"

# Java code where class name differs from filename (like lombok's DelombokTask.java)
java_code_different_class_name = """
package lombok.delombok.ant;

import org.apache.tools.ant.Task;

/**
* Ant tasks for delombok.
* This file is named DelombokTask.java but contains class Tasks.
*/
class Tasks {
public static class Delombok extends Task {
public void execute() {}
}

public static class Format extends Task {
public void execute() {}
}
}
"""

# Java code with multiple classes including interface and enum
java_code_multiple_types = """
package com.example;

public class MainClass {
// Main implementation
}

interface SomeInterface {
void doSomething();
}

enum Status {
ACTIVE, INACTIVE
}

abstract class AbstractBase {
public abstract void process();
}
"""


class ScanPipeJvmTest(TestCase):
data = Path(__file__).parent.parent / "data"
Expand Down Expand Up @@ -114,6 +156,53 @@ def test_scanpipe_pipes_jvm_get_fully_qualified_java_path(self):
fqjp = jvm.get_fully_qualified_path("org.common", "Bar.java")
self.assertEqual("org/common/Bar.java", fqjp)

def test_scanpipe_pipes_jvm_find_java_package_with_different_class_name(self):
"""Test that find_source_package extracts class names differing from filename."""
result = jvm.JavaLanguage.find_source_package(
java_code_different_class_name.splitlines()
)
self.assertEqual("lombok.delombok.ant", result["java_package"])
# The class name "Tasks" differs from what would be the filename
self.assertIn("Tasks", result["java_classes"])
# Also check for inner classes
self.assertIn("Delombok", result["java_classes"])
self.assertIn("Format", result["java_classes"])

def test_scanpipe_pipes_jvm_find_java_package_with_multiple_types(self):
"""Test that find_source_package extracts all class/interface/enum names."""
result = jvm.JavaLanguage.find_source_package(
java_code_multiple_types.splitlines()
)
self.assertEqual("com.example", result["java_package"])
classes = result["java_classes"]
self.assertIn("MainClass", classes)
self.assertIn("SomeInterface", classes)
self.assertIn("Status", classes)
self.assertIn("AbstractBase", classes)

def test_scanpipe_pipes_jvm_get_indexable_qualified_paths_with_class_names(self):
"""Test get_indexable_qualified_paths_from_values yields class name paths."""
resource_values = [
(
1,
"DelombokTask.java",
{
"java_package": "lombok.delombok.ant",
"java_classes": ["Tasks", "Delombok"],
},
),
]
paths = list(
jvm.JavaLanguage.get_indexable_qualified_paths_from_values(resource_values)
)
# Should yield: filename path, and paths for each class name that differs
self.assertEqual(3, len(paths))
# First is the original filename-based path
self.assertEqual((1, "lombok/delombok/ant/DelombokTask.java"), paths[0])
# Then paths for class names that differ from filename
self.assertEqual((1, "lombok/delombok/ant/Tasks.java"), paths[1])
self.assertEqual((1, "lombok/delombok/ant/Delombok.java"), paths[2])


class ScanPipeJvmScalaTest(TestCase):
data = Path(__file__).parent.parent / "data"
Expand Down