2222
2323import json
2424
25- from django .core .serializers .json import DjangoJSONEncoder
26-
27- from commoncode .hash import multi_checksums
28-
29- from scanpipe .pipelines import Pipeline
30- from scanpipe .pipes import input
31- from scanpipe .pipes import scancode
32- from scanpipe .pipes .input import copy_input
33- from scanpipe .pipes .input import is_archive
25+ from scanpipe .pipelines .scan_single_package import ScanSinglePackage
3426
3527from scanpipe .pipes .resolve import get_pom_url_list
3628from scanpipe .pipes .resolve import download_and_scan_pom_file
3729
3830
39- class ScanMavenPackage (Pipeline ):
31+ class ScanMavenPackage (ScanSinglePackage ):
4032 """
4133 Scan a single package archive (or package manifest file).
4234
@@ -59,104 +51,22 @@ def steps(cls):
5951 cls .make_summary_from_scan_results ,
6052 )
6153
62- scancode_run_scan_args = {
63- "copyright" : True ,
64- "email" : True ,
65- "info" : True ,
66- "license" : True ,
67- "license_text" : True ,
68- "license_diagnostics" : True ,
69- "license_text_diagnostics" : True ,
70- "license_references" : True ,
71- "package" : True ,
72- "url" : True ,
73- "classify" : True ,
74- "summary" : True ,
75- "todo" : True ,
76- }
77-
78- def get_package_input (self ):
79- """Locate the package input in the project's input/ directory."""
80- # Using the input_sources model property as it includes input sources instances
81- # as well as any files manually copied into the input/ directory.
82- input_sources = self .project .input_sources
83- inputs = list (self .project .inputs ("*" ))
84-
85- if len (inputs ) != 1 or len (input_sources ) != 1 :
86- raise Exception ("Only 1 input file supported" )
87-
88- self .input_path = inputs [0 ]
89-
90- def collect_input_information (self ):
91- """Collect and store information about the project input."""
92- self .project .update_extra_data (
93- {
94- "filename" : self .input_path .name ,
95- "size" : self .input_path .stat ().st_size ,
96- ** multi_checksums (self .input_path ),
97- }
98- )
99-
100- def extract_input_to_codebase_directory (self ):
101- """Copy or extract input to project codebase/ directory."""
102- if not is_archive (self .input_path ):
103- copy_input (self .input_path , self .project .codebase_path )
104- return
105-
106- self .extract_archive (self .input_path , self .project .codebase_path )
107-
108- # Reload the project env post-extraction as the scancode-config.yml file
109- # may be located in one of the extracted archives.
110- self .env = self .project .get_env ()
111-
112- def run_scan (self ):
113- """Scan extracted codebase/ content."""
114- scan_output_path = self .project .get_output_file_path ("scancode" , "json" )
115- self .scan_output_location = str (scan_output_path .absolute ())
116-
117- scanning_errors = scancode .run_scan (
118- location = str (self .project .codebase_path ),
119- output_file = self .scan_output_location ,
120- run_scan_args = self .scancode_run_scan_args .copy (),
121- )
122-
123- for resource_path , errors in scanning_errors .items ():
124- self .project .add_error (
125- description = "\n " .join (errors ),
126- model = self .pipeline_name ,
127- details = {"resource_path" : resource_path .removeprefix ("codebase/" )},
128- )
129-
130- if not scan_output_path .exists ():
131- raise FileNotFoundError ("ScanCode output not available." )
132-
13354 def fetch_and_scan_remote_pom (self ):
13455 """Fetch the pom.xml file from from maven.org if not present in codebase."""
135- # TODO Verify if the following filter actually work
136- if not self .project .codebaseresources .files ().filter (name = "pom.xml" ).exists ():
137- with open (self .scan_output_location , 'r' ) as file :
138- data = json .load (file )
139- packages = data .get ("packages" , [])
140-
141- pom_url_list = get_pom_url_list (self .project .input_sources [0 ], packages )
142- scanned_pom_packages , scanned_dependencies = download_and_scan_pom_file (pom_url_list )
143-
144- updated_pacakges = packages + scanned_pom_packages
145- # Replace/Update the package and dependencies section
146- data ['packages' ] = updated_pacakges
147- # Need to update the dependencies
148- # data['dependencies'] = scanned_dependencies
149- with open (self .scan_output_location , 'w' ) as file :
150- json .dump (data , file , indent = 2 )
151-
152- def load_inventory_from_toolkit_scan (self ):
153- """Process a JSON Scan results to populate codebase resources and packages."""
154- input .load_inventory_from_toolkit_scan (self .project , self .scan_output_location )
155-
156- def make_summary_from_scan_results (self ):
157- """Build a summary in JSON format from the generated scan results."""
158- summary = scancode .make_results_summary (self .project , self .scan_output_location )
159- output_file = self .project .get_output_file_path ("summary" , "json" )
160-
161- with output_file .open ("w" ) as summary_file :
162- summary_file .write (json .dumps (summary , indent = 2 , cls = DjangoJSONEncoder ))
56+ with open (self .scan_output_location , 'r' ) as file :
57+ data = json .load (file )
58+ # Return and do nothing if data has pom.xml
59+ for file in data ['files' ]:
60+ if 'pom.xml' in file ['path' ]:
61+ return
62+ packages = data .get ("packages" , [])
63+
64+ pom_url_list = get_pom_url_list (self .project .input_sources [0 ], packages )
65+ scanned_pom_packages , scanned_dependencies = download_and_scan_pom_file (pom_url_list )
66+
67+ updated_pacakges = packages + scanned_pom_packages
68+ # Replace/Update the package and dependencies section
69+ data ['packages' ] = updated_pacakges
70+ data ['dependencies' ] = scanned_dependencies
71+ with open (self .scan_output_location , 'w' ) as file :
72+ json .dump (data , file , indent = 2 )
0 commit comments