Skip to content

Commit f117a76

Browse files
Fix maven package assembly
Adds a fix for maven packacge assembly, combining MANIFEST.MF and pom.xml files into a single package, instead of multiple top level packages. Also fixes pom.properties file parsing. Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent ac04984 commit f117a76

File tree

18 files changed

+1585
-268
lines changed

18 files changed

+1585
-268
lines changed

src/packagedcode/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,10 @@
101101

102102
haxe.HaxelibJsonHandler,
103103

104-
jar_manifest.JavaJarManifestHandler,
105-
jar_manifest.JavaOSGiManifestHandler,
106-
107104
maven.MavenPomXmlHandler,
108105
maven.MavenPomPropertiesHandler,
106+
maven.JavaJarManifestHandler,
107+
maven.JavaOSGiManifestHandler,
109108

110109
misc.AndroidAppArchiveHandler,
111110
misc.AndroidLibraryHandler,

src/packagedcode/jar_manifest.py

Lines changed: 48 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99

1010
import re
1111

12-
from packagedcode import models
13-
from packagedcode.maven import parse_scm_connection
12+
from packagedcode.utils import VCS_URLS
1413
from packagedcode.utils import normalize_vcs_url
1514

1615
"""
@@ -23,48 +22,6 @@
2322
"""
2423

2524

26-
class JavaJarManifestHandler(models.DatafileHandler):
27-
datasource_id = 'java_jar_manifest'
28-
path_patterns = ('*/META-INF/MANIFEST.MF',)
29-
default_package_type = 'jar'
30-
default_primary_language = 'Java'
31-
description = 'Java JAR MANIFEST.MF'
32-
documentation_url = 'https://docs.oracle.com/javase/tutorial/deployment/jar/manifestindex.html'
33-
34-
@classmethod
35-
def parse(cls, location):
36-
sections = parse_manifest(location)
37-
if sections:
38-
main_section = sections[0]
39-
manifest = get_normalized_java_manifest_data(main_section)
40-
if manifest:
41-
yield models.PackageData(**manifest,)
42-
43-
@classmethod
44-
def assign_package_to_resources(cls, package, resource, codebase, package_adder):
45-
# we want to root of the jar, two levels up
46-
parent = resource.parent(codebase)
47-
if parent:
48-
parent = resource.parent(codebase)
49-
50-
if parent:
51-
models.DatafileHandler.assign_package_to_resources(
52-
package,
53-
resource=parent,
54-
codebase=codebase,
55-
package_adder=package_adder,
56-
)
57-
58-
59-
class JavaOSGiManifestHandler(JavaJarManifestHandler):
60-
datasource_id = 'java_osgi_manifest'
61-
# This is an empty tuple to avoid getting back two packages
62-
# essentially this is the same as JavaJarManifestHandler
63-
path_patterns = ()
64-
default_primary_language = 'Java'
65-
description = 'Java OSGi MANIFEST.MF'
66-
default_package_type = 'osgi'
67-
6825

6926
def parse_manifest(location):
7027
"""
@@ -258,13 +215,8 @@ def dget(s):
258215
else:
259216
name = i_title or am_nm or ext_nm or nm
260217
descriptions = [s_title, i_title, nm]
261-
262-
if package_type == 'maven':
263-
datasource_id = JavaJarManifestHandler.datasource_id
264-
elif package_type == 'jar':
265-
datasource_id = JavaJarManifestHandler.datasource_id
266-
elif package_type == 'osgi':
267-
datasource_id = JavaOSGiManifestHandler.datasource_id
218+
219+
datasource_id =get_datasource_id(package_type=package_type)
268220

269221
descriptions = unique(descriptions)
270222
descriptions = [d for d in descriptions if d and d.strip() and d != name]
@@ -391,6 +343,51 @@ def dget(s):
391343
return package
392344

393345

346+
def parse_scm_connection(scm_connection):
347+
"""
348+
Return an SPDX vcs_url given a Maven `scm_connection` string or the string
349+
as-is if it cannot be parsed.
350+
351+
See https://maven.apache.org/scm/scm-url-format.html
352+
scm:<scm_provider><delimiter><provider_specific_part>
353+
354+
scm:git:git://server_name[:port]/path_to_repository
355+
scm:git:http://server_name[:port]/path_to_repository
356+
scm:git:https://server_name[:port]/path_to_repository
357+
scm:git:ssh://server_name[:port]/path_to_repository
358+
scm:git:file://[hostname]/path_to_repository
359+
"""
360+
361+
delimiter = '|' if '|' in scm_connection else ':'
362+
segments = scm_connection.split(delimiter, 2)
363+
if not len(segments) == 3:
364+
# we cannot parse this so we return it as is
365+
return scm_connection
366+
367+
_scm, scm_tool, vcs_url = segments
368+
# TODO: vcs_tool is not yet supported
369+
normalized = normalize_vcs_url(vcs_url, vcs_tool=scm_tool)
370+
if normalized:
371+
vcs_url = normalized
372+
373+
if not vcs_url.startswith(VCS_URLS):
374+
if not vcs_url.startswith(scm_tool):
375+
vcs_url = '{scm_tool}+{vcs_url}'.format(**locals())
376+
377+
return vcs_url
378+
379+
380+
def get_datasource_id(package_type):
381+
from packagedcode.maven import JavaJarManifestHandler
382+
from packagedcode.maven import JavaOSGiManifestHandler
383+
384+
if package_type == 'maven':
385+
return JavaJarManifestHandler.datasource_id
386+
elif package_type == 'jar':
387+
return JavaJarManifestHandler.datasource_id
388+
elif package_type == 'osgi':
389+
return JavaOSGiManifestHandler.datasource_id
390+
394391
def is_id(s):
395392
"""
396393
Return True if `s` is some kind of id.

src/packagedcode/maven.py

Lines changed: 132 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,18 @@
1313

1414
import javaproperties
1515
import lxml
16+
from fnmatch import fnmatchcase
1617
from packageurl import PackageURL
1718
from pymaven import artifact
1819
from pymaven import pom
1920
from pymaven.pom import strip_namespace
2021

2122
from commoncode import fileutils
2223
from packagedcode import models
24+
from packagedcode.jar_manifest import parse_scm_connection
25+
from packagedcode.jar_manifest import parse_manifest
26+
from packagedcode.jar_manifest import get_normalized_java_manifest_data
2327
from packagedcode.utils import normalize_vcs_url
24-
from packagedcode.utils import VCS_URLS
2528
from textcode import analysis
2629
from typecode import contenttype
2730

@@ -52,10 +55,117 @@
5255
there is no pom.properties check if there are side-by-side artifacts
5356
"""
5457

55-
# TODO: combine with jar_manifets.py
58+
class MavenBasePackageHandler(models.DatafileHandler):
59+
60+
@classmethod
61+
def assemble(cls, package_data, resource, codebase, package_adder=models.add_to_package):
62+
"""
63+
"""
64+
if codebase.has_single_resource:
65+
yield from models.DatafileHandler.assemble(package_data, resource, codebase)
66+
return
67+
68+
datafile_path = resource.path
69+
70+
# This order is important as we want pom.xml to be used for package
71+
# creation and then to update from MANIFEST later
72+
manifest_path_pattern = '*/META-INF/MANIFEST.MF'
73+
nested_pom_xml_path_pattern = '*/META-INF/maven/**/pom.xml'
74+
datafile_name_patterns = (nested_pom_xml_path_pattern, manifest_path_pattern)
75+
76+
is_manifest = fnmatchcase(datafile_path, manifest_path_pattern)
77+
is_pom_xml = fnmatchcase(datafile_path, nested_pom_xml_path_pattern)
78+
if is_manifest:
79+
meta_inf_resource = resource.parent(codebase)
80+
elif is_pom_xml:
81+
upward_segments = 4
82+
root = resource
83+
for _ in range(upward_segments):
84+
root = root.parent(codebase)
85+
meta_inf_resource = root
86+
else:
87+
yield from MavenPomXmlHandlerMixin.assemble(package_data, resource, codebase)
88+
return
89+
90+
pom_xmls = []
91+
manifests = []
92+
for r in meta_inf_resource.walk(codebase):
93+
if fnmatchcase(r.path, manifest_path_pattern):
94+
manifests.append(r)
95+
if fnmatchcase(r.path, nested_pom_xml_path_pattern):
96+
pom_xmls.append(r)
97+
98+
if len(pom_xmls) > 1:
99+
yield from MavenPomXmlHandlerMixin.assemble(package_data, resource, codebase)
100+
return
101+
102+
if manifests and pom_xmls:
103+
#raise Exception(resource.path, meta_inf_resource, datafile_name_patterns, package_adder)
104+
parent_resource = meta_inf_resource.parent(codebase)
105+
if not parent_resource:
106+
parent_resource = meta_inf_resource
107+
yield from cls.assemble_from_many_datafiles_in_directory(
108+
datafile_name_patterns=datafile_name_patterns,
109+
directory=meta_inf_resource,
110+
codebase=codebase,
111+
package_adder=package_adder,
112+
ignore_name_check=True,
113+
parent_resource=parent_resource,
114+
)
115+
elif manifests and not pom_xmls:
116+
yield from JavaJarManifestHandlerMixin.assemble(package_data, resource, codebase)
117+
elif pom_xmls and not manifests:
118+
yield from MavenPomXmlHandlerMixin.assemble(package_data, resource, codebase)
119+
else:
120+
yield from models.DatafileHandler.assemble(package_data, resource, codebase)
56121

57122

58-
class MavenPomXmlHandler(models.DatafileHandler):
123+
class JavaJarManifestHandler(MavenBasePackageHandler):
124+
datasource_id = 'java_jar_manifest'
125+
path_patterns = ('*/META-INF/MANIFEST.MF',)
126+
default_package_type = 'jar'
127+
default_primary_language = 'Java'
128+
description = 'Java JAR MANIFEST.MF'
129+
documentation_url = 'https://docs.oracle.com/javase/tutorial/deployment/jar/manifestindex.html'
130+
131+
@classmethod
132+
def parse(cls, location):
133+
sections = parse_manifest(location)
134+
if sections:
135+
main_section = sections[0]
136+
manifest = get_normalized_java_manifest_data(main_section)
137+
if manifest:
138+
yield models.PackageData(**manifest,)
139+
140+
141+
class JavaJarManifestHandlerMixin(models.DatafileHandler):
142+
143+
@classmethod
144+
def assign_package_to_resources(cls, package, resource, codebase, package_adder):
145+
# we want to root of the jar, two levels up
146+
parent = resource.parent(codebase)
147+
if parent:
148+
parent = resource.parent(codebase)
149+
if parent:
150+
models.DatafileHandler.assign_package_to_resources(
151+
package,
152+
resource=parent,
153+
codebase=codebase,
154+
package_adder=package_adder,
155+
)
156+
157+
158+
class JavaOSGiManifestHandler(JavaJarManifestHandler):
159+
datasource_id = 'java_osgi_manifest'
160+
# This is an empty tuple to avoid getting back two packages
161+
# essentially this is the same as JavaJarManifestHandler
162+
path_patterns = ()
163+
default_primary_language = 'Java'
164+
description = 'Java OSGi MANIFEST.MF'
165+
default_package_type = 'osgi'
166+
167+
168+
class MavenPomXmlHandler(MavenBasePackageHandler):
59169
datasource_id = 'maven_pom'
60170
# NOTE: Maven 1.x used project.xml
61171
path_patterns = ('*.pom', '*pom.xml',)
@@ -64,8 +174,6 @@ class MavenPomXmlHandler(models.DatafileHandler):
64174
description = 'Apache Maven pom'
65175
documentation_url = 'https://maven.apache.org/pom.html'
66176

67-
# TODO: implment more sophistcaed assembly with META-INF/MANIFEST.MF and META-INF/LICENSE
68-
69177
@classmethod
70178
def is_datafile(cls, location, filetypes=tuple()):
71179
"""
@@ -106,6 +214,23 @@ def parse(cls, location, base_url='https://repo1.maven.org/maven2'):
106214
if package_data:
107215
yield package_data
108216

217+
@classmethod
218+
def get_top_level_resources(cls, manifest_resource, codebase):
219+
"""
220+
Yield Resources that are top-level based on a JAR's directory structure
221+
"""
222+
if 'META-INF' in manifest_resource.path:
223+
path_segments = manifest_resource.path.split('META-INF')
224+
leading_segment = path_segments[0].strip()
225+
meta_inf_dir_path = f'{leading_segment}/META-INF'
226+
meta_inf_resource = codebase.get_resource(meta_inf_dir_path)
227+
if meta_inf_resource:
228+
yield meta_inf_resource
229+
yield from meta_inf_resource.walk(codebase)
230+
231+
232+
class MavenPomXmlHandlerMixin(models.DatafileHandler):
233+
109234
@classmethod
110235
def assign_package_to_resources(cls, package, resource, codebase, package_adder):
111236
"""
@@ -164,20 +289,6 @@ def assign_package_to_resources(cls, package, resource, codebase, package_adder)
164289
package_adder=package_adder
165290
)
166291

167-
@classmethod
168-
def get_top_level_resources(cls, manifest_resource, codebase):
169-
"""
170-
Yield Resources that are top-level based on a JAR's directory structure
171-
"""
172-
if 'META-INF' in manifest_resource.path:
173-
path_segments = manifest_resource.path.split('META-INF')
174-
leading_segment = path_segments[0].strip()
175-
meta_inf_dir_path = f'{leading_segment}/META-INF'
176-
meta_inf_resource = codebase.get_resource(meta_inf_dir_path)
177-
if meta_inf_resource:
178-
yield meta_inf_resource
179-
yield from meta_inf_resource.walk(codebase)
180-
181292

182293
# TODO: assemble with its pom!!
183294
class MavenPomPropertiesHandler(models.NonAssemblableDatafileHandler):
@@ -201,8 +312,8 @@ def parse(cls, location):
201312
if properties:
202313
yield models.PackageData(
203314
datasource_id=cls.datasource_id,
204-
type=cls.package_type,
205-
primary_language=cls.primary_language,
315+
type=cls.default_package_type,
316+
primary_language=cls.default_primary_language,
206317
extra_data=dict(pom_properties=properties)
207318
)
208319

@@ -1267,40 +1378,6 @@ def build_vcs_and_code_view_urls(scm):
12671378
return dict(vcs_url=vcs_url, code_view_url=code_view_url,)
12681379

12691380

1270-
def parse_scm_connection(scm_connection):
1271-
"""
1272-
Return an SPDX vcs_url given a Maven `scm_connection` string or the string
1273-
as-is if it cannot be parsed.
1274-
1275-
See https://maven.apache.org/scm/scm-url-format.html
1276-
scm:<scm_provider><delimiter><provider_specific_part>
1277-
1278-
scm:git:git://server_name[:port]/path_to_repository
1279-
scm:git:http://server_name[:port]/path_to_repository
1280-
scm:git:https://server_name[:port]/path_to_repository
1281-
scm:git:ssh://server_name[:port]/path_to_repository
1282-
scm:git:file://[hostname]/path_to_repository
1283-
"""
1284-
1285-
delimiter = '|' if '|' in scm_connection else ':'
1286-
segments = scm_connection.split(delimiter, 2)
1287-
if not len(segments) == 3:
1288-
# we cannot parse this so we return it as is
1289-
return scm_connection
1290-
1291-
_scm, scm_tool, vcs_url = segments
1292-
# TODO: vcs_tool is not yet supported
1293-
normalized = normalize_vcs_url(vcs_url, vcs_tool=scm_tool)
1294-
if normalized:
1295-
vcs_url = normalized
1296-
1297-
if not vcs_url.startswith(VCS_URLS):
1298-
if not vcs_url.startswith(scm_tool):
1299-
vcs_url = '{scm_tool}+{vcs_url}'.format(**locals())
1300-
1301-
return vcs_url
1302-
1303-
13041381
def get_extension(packaging):
13051382
"""
13061383
We only care for certain artifacts extension/packaging/classifier.

0 commit comments

Comments
 (0)