Skip to content

Commit 9812129

Browse files
committed
Removed dup code that's already present in ScanSinglePackage #1763
Signed-off-by: Chin Yeung Li <[email protected]>
1 parent d63a1e5 commit 9812129

File tree

1 file changed

+19
-109
lines changed

1 file changed

+19
-109
lines changed

scanpipe/pipelines/scan_maven_package.py

Lines changed: 19 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,13 @@
2222

2323
import json
2424

25-
from django.core.serializers.json import DjangoJSONEncoder
26-
27-
from commoncode.hash import multi_checksums
28-
29-
from scanpipe.pipelines import Pipeline
30-
from scanpipe.pipes import input
31-
from scanpipe.pipes import scancode
32-
from scanpipe.pipes.input import copy_input
33-
from scanpipe.pipes.input import is_archive
25+
from scanpipe.pipelines.scan_single_package import ScanSinglePackage
3426

3527
from scanpipe.pipes.resolve import get_pom_url_list
3628
from scanpipe.pipes.resolve import download_and_scan_pom_file
3729

3830

39-
class ScanMavenPackage(Pipeline):
31+
class ScanMavenPackage(ScanSinglePackage):
4032
"""
4133
Scan a single package archive (or package manifest file).
4234
@@ -59,104 +51,22 @@ def steps(cls):
5951
cls.make_summary_from_scan_results,
6052
)
6153

62-
scancode_run_scan_args = {
63-
"copyright": True,
64-
"email": True,
65-
"info": True,
66-
"license": True,
67-
"license_text": True,
68-
"license_diagnostics": True,
69-
"license_text_diagnostics": True,
70-
"license_references": True,
71-
"package": True,
72-
"url": True,
73-
"classify": True,
74-
"summary": True,
75-
"todo": True,
76-
}
77-
78-
def get_package_input(self):
79-
"""Locate the package input in the project's input/ directory."""
80-
# Using the input_sources model property as it includes input sources instances
81-
# as well as any files manually copied into the input/ directory.
82-
input_sources = self.project.input_sources
83-
inputs = list(self.project.inputs("*"))
84-
85-
if len(inputs) != 1 or len(input_sources) != 1:
86-
raise Exception("Only 1 input file supported")
87-
88-
self.input_path = inputs[0]
89-
90-
def collect_input_information(self):
91-
"""Collect and store information about the project input."""
92-
self.project.update_extra_data(
93-
{
94-
"filename": self.input_path.name,
95-
"size": self.input_path.stat().st_size,
96-
**multi_checksums(self.input_path),
97-
}
98-
)
99-
100-
def extract_input_to_codebase_directory(self):
101-
"""Copy or extract input to project codebase/ directory."""
102-
if not is_archive(self.input_path):
103-
copy_input(self.input_path, self.project.codebase_path)
104-
return
105-
106-
self.extract_archive(self.input_path, self.project.codebase_path)
107-
108-
# Reload the project env post-extraction as the scancode-config.yml file
109-
# may be located in one of the extracted archives.
110-
self.env = self.project.get_env()
111-
112-
def run_scan(self):
113-
"""Scan extracted codebase/ content."""
114-
scan_output_path = self.project.get_output_file_path("scancode", "json")
115-
self.scan_output_location = str(scan_output_path.absolute())
116-
117-
scanning_errors = scancode.run_scan(
118-
location=str(self.project.codebase_path),
119-
output_file=self.scan_output_location,
120-
run_scan_args=self.scancode_run_scan_args.copy(),
121-
)
122-
123-
for resource_path, errors in scanning_errors.items():
124-
self.project.add_error(
125-
description="\n".join(errors),
126-
model=self.pipeline_name,
127-
details={"resource_path": resource_path.removeprefix("codebase/")},
128-
)
129-
130-
if not scan_output_path.exists():
131-
raise FileNotFoundError("ScanCode output not available.")
132-
13354
def fetch_and_scan_remote_pom(self):
13455
"""Fetch the pom.xml file from from maven.org if not present in codebase."""
135-
# TODO Verify if the following filter actually work
136-
if not self.project.codebaseresources.files().filter(name="pom.xml").exists():
137-
with open(self.scan_output_location, 'r') as file:
138-
data = json.load(file)
139-
packages = data.get("packages", [])
140-
141-
pom_url_list = get_pom_url_list(self.project.input_sources[0], packages)
142-
scanned_pom_packages, scanned_dependencies = download_and_scan_pom_file(pom_url_list)
143-
144-
updated_pacakges = packages + scanned_pom_packages
145-
# Replace/Update the package and dependencies section
146-
data['packages'] = updated_pacakges
147-
# Need to update the dependencies
148-
# data['dependencies'] = scanned_dependencies
149-
with open(self.scan_output_location, 'w') as file:
150-
json.dump(data, file, indent=2)
151-
152-
def load_inventory_from_toolkit_scan(self):
153-
"""Process a JSON Scan results to populate codebase resources and packages."""
154-
input.load_inventory_from_toolkit_scan(self.project, self.scan_output_location)
155-
156-
def make_summary_from_scan_results(self):
157-
"""Build a summary in JSON format from the generated scan results."""
158-
summary = scancode.make_results_summary(self.project, self.scan_output_location)
159-
output_file = self.project.get_output_file_path("summary", "json")
160-
161-
with output_file.open("w") as summary_file:
162-
summary_file.write(json.dumps(summary, indent=2, cls=DjangoJSONEncoder))
56+
with open(self.scan_output_location, 'r') as file:
57+
data = json.load(file)
58+
# Return and do nothing if data has pom.xml
59+
for file in data['files']:
60+
if 'pom.xml' in file['path']:
61+
return
62+
packages = data.get("packages", [])
63+
64+
pom_url_list = get_pom_url_list(self.project.input_sources[0], packages)
65+
scanned_pom_packages, scanned_dependencies = download_and_scan_pom_file(pom_url_list)
66+
67+
updated_pacakges = packages + scanned_pom_packages
68+
# Replace/Update the package and dependencies section
69+
data['packages'] = updated_pacakges
70+
data['dependencies'] = scanned_dependencies
71+
with open(self.scan_output_location, 'w') as file:
72+
json.dump(data, file, indent=2)

0 commit comments

Comments
 (0)