1313
1414import javaproperties
1515import lxml
16+ from fnmatch import fnmatchcase
1617from packageurl import PackageURL
1718from pymaven import artifact
1819from pymaven import pom
1920from pymaven .pom import strip_namespace
2021
2122from commoncode import fileutils
2223from packagedcode import models
24+ from packagedcode .jar_manifest import parse_scm_connection
25+ from packagedcode .jar_manifest import parse_manifest
26+ from packagedcode .jar_manifest import get_normalized_java_manifest_data
2327from packagedcode .utils import normalize_vcs_url
24- from packagedcode .utils import VCS_URLS
2528from textcode import analysis
2629from typecode import contenttype
2730
5255there is no pom.properties check if there are side-by-side artifacts
5356"""
5457
55- # TODO: combine with jar_manifets.py
58+ class MavenBasePackageHandler (models .DatafileHandler ):
59+
60+ @classmethod
61+ def assemble (cls , package_data , resource , codebase , package_adder = models .add_to_package ):
62+ """
63+ """
64+ if codebase .has_single_resource :
65+ yield from models .DatafileHandler .assemble (package_data , resource , codebase )
66+ return
67+
68+ datafile_path = resource .path
69+
70+ # This order is important as we want pom.xml to be used for package
71+ # creation and then to update from MANIFEST later
72+ manifest_path_pattern = '*/META-INF/MANIFEST.MF'
73+ nested_pom_xml_path_pattern = '*/META-INF/maven/**/pom.xml'
74+ datafile_name_patterns = (nested_pom_xml_path_pattern , manifest_path_pattern )
75+
76+ is_manifest = fnmatchcase (datafile_path , manifest_path_pattern )
77+ is_pom_xml = fnmatchcase (datafile_path , nested_pom_xml_path_pattern )
78+ if is_manifest :
79+ meta_inf_resource = resource .parent (codebase )
80+ elif is_pom_xml :
81+ upward_segments = 4
82+ root = resource
83+ for _ in range (upward_segments ):
84+ root = root .parent (codebase )
85+ meta_inf_resource = root
86+ else :
87+ yield from MavenPomXmlHandlerMixin .assemble (package_data , resource , codebase )
88+ return
89+
90+ pom_xmls = []
91+ manifests = []
92+ for r in meta_inf_resource .walk (codebase ):
93+ if fnmatchcase (r .path , manifest_path_pattern ):
94+ manifests .append (r )
95+ if fnmatchcase (r .path , nested_pom_xml_path_pattern ):
96+ pom_xmls .append (r )
97+
98+ if len (pom_xmls ) > 1 :
99+ yield from MavenPomXmlHandlerMixin .assemble (package_data , resource , codebase )
100+ return
101+
102+ if manifests and pom_xmls :
103+ #raise Exception(resource.path, meta_inf_resource, datafile_name_patterns, package_adder)
104+ parent_resource = meta_inf_resource .parent (codebase )
105+ if not parent_resource :
106+ parent_resource = meta_inf_resource
107+ yield from cls .assemble_from_many_datafiles_in_directory (
108+ datafile_name_patterns = datafile_name_patterns ,
109+ directory = meta_inf_resource ,
110+ codebase = codebase ,
111+ package_adder = package_adder ,
112+ ignore_name_check = True ,
113+ parent_resource = parent_resource ,
114+ )
115+ elif manifests and not pom_xmls :
116+ yield from JavaJarManifestHandlerMixin .assemble (package_data , resource , codebase )
117+ elif pom_xmls and not manifests :
118+ yield from MavenPomXmlHandlerMixin .assemble (package_data , resource , codebase )
119+ else :
120+ yield from models .DatafileHandler .assemble (package_data , resource , codebase )
56121
57122
58- class MavenPomXmlHandler (models .DatafileHandler ):
123+ class JavaJarManifestHandler (MavenBasePackageHandler ):
124+ datasource_id = 'java_jar_manifest'
125+ path_patterns = ('*/META-INF/MANIFEST.MF' ,)
126+ default_package_type = 'jar'
127+ default_primary_language = 'Java'
128+ description = 'Java JAR MANIFEST.MF'
129+ documentation_url = 'https://docs.oracle.com/javase/tutorial/deployment/jar/manifestindex.html'
130+
131+ @classmethod
132+ def parse (cls , location ):
133+ sections = parse_manifest (location )
134+ if sections :
135+ main_section = sections [0 ]
136+ manifest = get_normalized_java_manifest_data (main_section )
137+ if manifest :
138+ yield models .PackageData (** manifest ,)
139+
140+
141+ class JavaJarManifestHandlerMixin (models .DatafileHandler ):
142+
143+ @classmethod
144+ def assign_package_to_resources (cls , package , resource , codebase , package_adder ):
145+ # we want to root of the jar, two levels up
146+ parent = resource .parent (codebase )
147+ if parent :
148+ parent = resource .parent (codebase )
149+ if parent :
150+ models .DatafileHandler .assign_package_to_resources (
151+ package ,
152+ resource = parent ,
153+ codebase = codebase ,
154+ package_adder = package_adder ,
155+ )
156+
157+
158+ class JavaOSGiManifestHandler (JavaJarManifestHandler ):
159+ datasource_id = 'java_osgi_manifest'
160+ # This is an empty tuple to avoid getting back two packages
161+ # essentially this is the same as JavaJarManifestHandler
162+ path_patterns = ()
163+ default_primary_language = 'Java'
164+ description = 'Java OSGi MANIFEST.MF'
165+ default_package_type = 'osgi'
166+
167+
168+ class MavenPomXmlHandler (MavenBasePackageHandler ):
59169 datasource_id = 'maven_pom'
60170 # NOTE: Maven 1.x used project.xml
61171 path_patterns = ('*.pom' , '*pom.xml' ,)
@@ -64,8 +174,6 @@ class MavenPomXmlHandler(models.DatafileHandler):
64174 description = 'Apache Maven pom'
65175 documentation_url = 'https://maven.apache.org/pom.html'
66176
67- # TODO: implment more sophistcaed assembly with META-INF/MANIFEST.MF and META-INF/LICENSE
68-
69177 @classmethod
70178 def is_datafile (cls , location , filetypes = tuple ()):
71179 """
@@ -106,6 +214,23 @@ def parse(cls, location, base_url='https://repo1.maven.org/maven2'):
106214 if package_data :
107215 yield package_data
108216
217+ @classmethod
218+ def get_top_level_resources (cls , manifest_resource , codebase ):
219+ """
220+ Yield Resources that are top-level based on a JAR's directory structure
221+ """
222+ if 'META-INF' in manifest_resource .path :
223+ path_segments = manifest_resource .path .split ('META-INF' )
224+ leading_segment = path_segments [0 ].strip ()
225+ meta_inf_dir_path = f'{ leading_segment } /META-INF'
226+ meta_inf_resource = codebase .get_resource (meta_inf_dir_path )
227+ if meta_inf_resource :
228+ yield meta_inf_resource
229+ yield from meta_inf_resource .walk (codebase )
230+
231+
232+ class MavenPomXmlHandlerMixin (models .DatafileHandler ):
233+
109234 @classmethod
110235 def assign_package_to_resources (cls , package , resource , codebase , package_adder ):
111236 """
@@ -164,20 +289,6 @@ def assign_package_to_resources(cls, package, resource, codebase, package_adder)
164289 package_adder = package_adder
165290 )
166291
167- @classmethod
168- def get_top_level_resources (cls , manifest_resource , codebase ):
169- """
170- Yield Resources that are top-level based on a JAR's directory structure
171- """
172- if 'META-INF' in manifest_resource .path :
173- path_segments = manifest_resource .path .split ('META-INF' )
174- leading_segment = path_segments [0 ].strip ()
175- meta_inf_dir_path = f'{ leading_segment } /META-INF'
176- meta_inf_resource = codebase .get_resource (meta_inf_dir_path )
177- if meta_inf_resource :
178- yield meta_inf_resource
179- yield from meta_inf_resource .walk (codebase )
180-
181292
182293# TODO: assemble with its pom!!
183294class MavenPomPropertiesHandler (models .NonAssemblableDatafileHandler ):
@@ -201,8 +312,8 @@ def parse(cls, location):
201312 if properties :
202313 yield models .PackageData (
203314 datasource_id = cls .datasource_id ,
204- type = cls .package_type ,
205- primary_language = cls .primary_language ,
315+ type = cls .default_package_type ,
316+ primary_language = cls .default_primary_language ,
206317 extra_data = dict (pom_properties = properties )
207318 )
208319
@@ -1267,40 +1378,6 @@ def build_vcs_and_code_view_urls(scm):
12671378 return dict (vcs_url = vcs_url , code_view_url = code_view_url ,)
12681379
12691380
1270- def parse_scm_connection (scm_connection ):
1271- """
1272- Return an SPDX vcs_url given a Maven `scm_connection` string or the string
1273- as-is if it cannot be parsed.
1274-
1275- See https://maven.apache.org/scm/scm-url-format.html
1276- scm:<scm_provider><delimiter><provider_specific_part>
1277-
1278- scm:git:git://server_name[:port]/path_to_repository
1279- scm:git:http://server_name[:port]/path_to_repository
1280- scm:git:https://server_name[:port]/path_to_repository
1281- scm:git:ssh://server_name[:port]/path_to_repository
1282- scm:git:file://[hostname]/path_to_repository
1283- """
1284-
1285- delimiter = '|' if '|' in scm_connection else ':'
1286- segments = scm_connection .split (delimiter , 2 )
1287- if not len (segments ) == 3 :
1288- # we cannot parse this so we return it as is
1289- return scm_connection
1290-
1291- _scm , scm_tool , vcs_url = segments
1292- # TODO: vcs_tool is not yet supported
1293- normalized = normalize_vcs_url (vcs_url , vcs_tool = scm_tool )
1294- if normalized :
1295- vcs_url = normalized
1296-
1297- if not vcs_url .startswith (VCS_URLS ):
1298- if not vcs_url .startswith (scm_tool ):
1299- vcs_url = '{scm_tool}+{vcs_url}' .format (** locals ())
1300-
1301- return vcs_url
1302-
1303-
13041381def get_extension (packaging ):
13051382 """
13061383 We only care for certain artifacts extension/packaging/classifier.
0 commit comments