Skip to content

Commit 24e9026

Browse files
committed
Split packages from package_manifest #1027
The packages scan return now a single package_manifest key (not a list) And a post_scan plugin (responding to the same --package) option perform a roll-up of the manifest informationat the proper level for a package type as the "packages" attribute (which is still a list). For instance a package.json "package_manifest" will end up having a "packages" entry in its parent directory. Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent c7fd59a commit 24e9026

31 files changed

+450
-74
lines changed
Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
1-
Resource,type,name,base_name,extension,size,date,sha1,md5,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,files_count,dirs_count,size_count,scan_errors,license__key,license__score,license__short_name,license__category,license__owner,license__homepage_url,license__text_url,license__reference_url,license__spdx_license_key,license__spdx_url,start_line,end_line,matched_rule__identifier,matched_rule__license_choice,matched_rule__licenses,copyright,copyright_holder,email,url,package__type,package__namespace,package__name,package__version,package__qualifiers,package__subpath,package__primary_language,package__code_type,package__description,package__size,package__release_date,package__parties,package__homepage_url,package__download_url,package__bug_tracking_url,package__vcs_repository,package__copyright,package__declared_licensing,package__notice_text
2-
/json2csv.rb,file,json2csv.rb,json2csv,.rb,1599,2018-04-11,6cfb0bd0fb0b784f57164d15bdfca2b734ad87a6,f18e519b77bc7f3e4213215033db3857,text/x-python,"Python script, ASCII text executable",Ruby,False,True,False,False,True,True,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3-
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,apache-2.0,98.45,Apache 2.0,Permissive,Apache Software Foundation,http://www.apache.org/licenses/,http://www.apache.org/licenses/LICENSE-2.0,https://enterprise.dejacode.com/urn/urn:dje:license:apache-2.0,Apache-2.0,https://spdx.org/licenses/Apache-2.0,5,24,apache-2.0_scancode.RULE,False,"[u'apache-2.0', u'scancode-acknowledgment']",,,,,,,,,,,,,,,,,,,,,,,
4-
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,scancode-acknowledgment,98.45,ScanCode acknowledgment,Permissive,nexB,https://github.com/nexB/scancode-toolkit/,,https://enterprise.dejacode.com/urn/urn:dje:license:scancode-acknowledgment,,,5,24,apache-2.0_scancode.RULE,False,"[u'apache-2.0', u'scancode-acknowledgment']",,,,,,,,,,,,,,,,,,,,,,,
5-
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,Copyright (c) 2017 nexB Inc. and others.,,,,,,,,,,,,,,,,,,,,,,
6-
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,nexB Inc. and others.,,,,,,,,,,,,,,,,,,,,,
7-
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,http://nexb.com/,,,,,,,,,,,,,,,,,,,
8-
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,https://github.com/nexB/scancode-toolkit/,,,,,,,,,,,,,,,,,,,
9-
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10,10,,,,,,,http://apache.org/licenses/LICENSE-2.0,,,,,,,,,,,,,,,,,,,
10-
/license,file,license,license,,679,2018-04-11,75c5490a718ddd45e40e0cc7ce0c756abc373123,b965a762efb9421cf1bf4405f336e278,text/plain,ASCII text,,False,True,False,False,False,False,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
11-
/license,,,,,,,,,,,,,,,,,,,,,,gpl-2.0-plus,100.00,GPL 2.0 or later,Copyleft,Free Software Foundation (FSF),http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html,http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html,https://enterprise.dejacode.com/urn/urn:dje:license:gpl-2.0-plus,GPL-2.0+,https://spdx.org/licenses/GPL-2.0,1,12,gpl-2.0-plus.LICENSE,False,[u'gpl-2.0-plus'],,,,,,,,,,,,,,,,,,,,,,,
12-
/package.json,file,package.json,package,.json,2200,2018-04-11,918376afce796ef90eeda1d6695f2289c90491ac,1f66239a9b850c5e60a9382dbe2162d2,text/plain,"ASCII text, with very long lines",JSON,False,True,False,False,True,False,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
13-
/package.json,,,,,,,,,,,,,,,,,,,,,,mit,15.00,MIT License,Permissive,MIT,http://opensource.org/licenses/mit-license.php,http://opensource.org/licenses/mit-license.php,https://enterprise.dejacode.com/urn/urn:dje:license:mit,MIT,https://spdx.org/licenses/MIT,24,24,mit_27.RULE,False,[u'mit'],,,,,,,,,,,,,,,,,,,,,,,
14-
/package.json,,,,,,,,,,,,,,,,,,,,,,mit,100.00,MIT License,Permissive,MIT,http://opensource.org/licenses/mit-license.php,http://opensource.org/licenses/mit-license.php,https://enterprise.dejacode.com/urn/urn:dje:license:mit,MIT,https://spdx.org/licenses/MIT,24,24,mit.LICENSE,False,[u'mit'],,,,,,,,,,,,,,,,,,,,,,,
15-
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23,26,,,,Copyright (c) 2012 LearnBoost <[email protected]>,,,,,,,,,,,,,,,,,,,,,,
16-
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23,26,,,,,LearnBoost,,,,,,,,,,,,,,,,,,,,,
17-
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,12,,,,,,[email protected],,,,,,,,,,,,,,,,,,,,
18-
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16,16,,,,,,,https://github.com/visionmedia/node-cookie-signature.git,,,,,,,,,,,,,,,,,,,
19-
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27,27,,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,,,,,,,,,,,,,,,,,,
20-
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,TJ Holowaychuk,,,,,,,
1+
Resource,type,name,base_name,extension,size,date,sha1,md5,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,package_manifest,files_count,dirs_count,size_count,scan_errors,license__key,license__score,license__short_name,license__category,license__owner,license__homepage_url,license__text_url,license__reference_url,license__spdx_license_key,license__spdx_url,start_line,end_line,matched_rule__identifier,matched_rule__license_choice,matched_rule__licenses,copyright,copyright_holder,email,url
2+
/json2csv.rb,file,json2csv.rb,json2csv,.rb,1599,2018-04-11,6cfb0bd0fb0b784f57164d15bdfca2b734ad87a6,f18e519b77bc7f3e4213215033db3857,text/x-python,"Python script, ASCII text executable",Ruby,False,True,False,False,True,True,,0,0,0,,,,,,,,,,,,,,,,,,,,
3+
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,apache-2.0,98.45,Apache 2.0,Permissive,Apache Software Foundation,http://www.apache.org/licenses/,http://www.apache.org/licenses/LICENSE-2.0,https://enterprise.dejacode.com/urn/urn:dje:license:apache-2.0,Apache-2.0,https://spdx.org/licenses/Apache-2.0,5,24,apache-2.0_scancode.RULE,False,"[u'apache-2.0', u'scancode-acknowledgment']",,,,
4+
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,scancode-acknowledgment,98.45,ScanCode acknowledgment,Permissive,nexB,https://github.com/nexB/scancode-toolkit/,,https://enterprise.dejacode.com/urn/urn:dje:license:scancode-acknowledgment,,,5,24,apache-2.0_scancode.RULE,False,"[u'apache-2.0', u'scancode-acknowledgment']",,,,
5+
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,Copyright (c) 2017 nexB Inc. and others.,,,
6+
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,nexB Inc. and others.,,
7+
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,http://nexb.com/
8+
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,https://github.com/nexB/scancode-toolkit/
9+
/json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10,10,,,,,,,http://apache.org/licenses/LICENSE-2.0
10+
/license,file,license,license,,679,2018-04-11,75c5490a718ddd45e40e0cc7ce0c756abc373123,b965a762efb9421cf1bf4405f336e278,text/plain,ASCII text,,False,True,False,False,False,False,,0,0,0,,,,,,,,,,,,,,,,,,,,
11+
/license,,,,,,,,,,,,,,,,,,,,,,,gpl-2.0-plus,100.00,GPL 2.0 or later,Copyleft,Free Software Foundation (FSF),http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html,http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html,https://enterprise.dejacode.com/urn/urn:dje:license:gpl-2.0-plus,GPL-2.0+,https://spdx.org/licenses/GPL-2.0,1,12,gpl-2.0-plus.LICENSE,False,[u'gpl-2.0-plus'],,,,
12+
/package.json,file,package.json,package,.json,2200,2018-04-11,918376afce796ef90eeda1d6695f2289c90491ac,1f66239a9b850c5e60a9382dbe2162d2,text/plain,"ASCII text, with very long lines",JSON,False,True,False,False,True,False,"OrderedDict([(u'type', u'npm'), (u'namespace', None), (u'name', u'cookie-signature'), (u'version', u'1.0.3'), (u'qualifiers', None), (u'subpath', None), (u'primary_language', u'JavaScript'), (u'code_type', None), (u'description', u'Sign and unsign cookies'), (u'size', None), (u'release_date', None), (u'parties', [OrderedDict([(u'type', u'person'), (u'role', u'author'), (u'name', u'TJ Holowaychuk'), (u'email', u'[email protected]'), (u'url', None)])]), (u'keywords', [u'cookie', u'sign', u'unsign']), (u'homepage_url', None), (u'download_url', u'https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.3.tgz'), (u'download_checksums', []), (u'bug_tracking_url', u'https://github.com/visionmedia/node-cookie-signature/issues'), (u'code_view_url', None), (u'vcs_tool', u'git'), (u'vcs_repository', u'https://github.com/visionmedia/node-cookie-signature.git'), (u'vcs_revision', None), (u'copyright', None), (u'license_expression', None), (u'declared_licensing', None), (u'notice_text', None), (u'dependencies', [OrderedDict([(u'purl', u'pkg:npm/mocha'), (u'requirement', u'*'), (u'scope', u'devDependencies'), (u'is_runtime', False), (u'is_optional', True), (u'is_resolved', False)]), OrderedDict([(u'purl', u'pkg:npm/should'), (u'requirement', u'*'), (u'scope', u'devDependencies'), (u'is_runtime', False), (u'is_optional', True), (u'is_resolved', False)])]), (u'related_packages', [])])",0,0,0,,,,,,,,,,,,,,,,,,,,
13+
/package.json,,,,,,,,,,,,,,,,,,,,,,,mit,15.00,MIT License,Permissive,MIT,http://opensource.org/licenses/mit-license.php,http://opensource.org/licenses/mit-license.php,https://enterprise.dejacode.com/urn/urn:dje:license:mit,MIT,https://spdx.org/licenses/MIT,24,24,mit_27.RULE,False,[u'mit'],,,,
14+
/package.json,,,,,,,,,,,,,,,,,,,,,,,mit,100.00,MIT License,Permissive,MIT,http://opensource.org/licenses/mit-license.php,http://opensource.org/licenses/mit-license.php,https://enterprise.dejacode.com/urn/urn:dje:license:mit,MIT,https://spdx.org/licenses/MIT,24,24,mit.LICENSE,False,[u'mit'],,,,
15+
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23,26,,,,Copyright (c) 2012 LearnBoost <[email protected]>,,,
16+
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23,26,,,,,LearnBoost,,
17+
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,12,,,,,,[email protected],
18+
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16,16,,,,,,,https://github.com/visionmedia/node-cookie-signature.git
19+
/package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27,27,,,,,,,https://github.com/visionmedia/node-cookie-signature/issues

setup.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@
1616
from setuptools import find_packages
1717
from setuptools import setup
1818

19-
2019
version = '2.9.1'
2120

22-
2321
#### Small hack to force using a plain version number if the option
2422
#### --plain-version is passed to setup.py
2523

@@ -31,6 +29,7 @@
3129
pass
3230
####
3331

32+
3433
def get_version(default=version, template='{tag}.{distance}.{commit}{dirty}',
3534
use_default=USE_DEFAULT_VERSION):
3635
"""
@@ -230,7 +229,7 @@ def read(*names, **kwargs):
230229
'info = scancode.plugin_info:InfoScanner',
231230
'licenses = scancode.plugin_license:LicenseScanner',
232231
'copyrights = scancode.plugin_copyright:CopyrightScanner',
233-
'packages = scancode.plugin_package:PackageScanner',
232+
'packages = scancode.plugin_package:PackageManifestScanner',
234233
'emails = scancode.plugin_email:EmailScanner',
235234
'urls = scancode.plugin_url:UrlScanner',
236235
],
@@ -247,7 +246,9 @@ def read(*names, **kwargs):
247246
'mark-source = scancode.plugin_mark_source:MarkSource',
248247
'copyrights-summary = scancode.plugin_copyrights_summary:CopyrightSummary',
249248
'license-policy = scancode.plugin_license_policy:LicensePolicy',
249+
'package-root = scancode.plugin_package:PackageRootSummarizer',
250250
],
251+
251252

252253
# scancode_output_filter is the entry point for filter plugins executed
253254
# after the post-scan plugins and used by the output plugins to

src/packagedcode/__init__.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,31 @@
7373
models.PlainZipPackage,
7474
]
7575

76+
PACKAGES_BY_TYPE = {cls.type.default: cls for cls in PACKAGE_TYPES}
77+
78+
79+
def get_package_class(scan_data):
80+
"""
81+
Return the Package subclass that corresponds to the package type in a
82+
mapping of package `scan_data`.
83+
84+
For example:
85+
>>> data = {'type': 'cpan'}
86+
>>> assert models.CpanModule == get_package_class(data)
87+
>>> data = {'type': 'some stuff'}
88+
>>> assert models.Package == get_package_class(data)
89+
>>> data = {'type': None}
90+
>>> assert models.Package == get_package_class(data)
91+
>>> data = {}
92+
>>> assert models.Package == get_package_class(data)
93+
>>> data = []
94+
>>> assert models.Package == get_package_class(data)
95+
>>> data = None
96+
>>> assert models.Package == get_package_class(data)
97+
"""
98+
ptype = scan_data and scan_data.get('type') or None
99+
if not ptype:
100+
# basic type for unknown package types
101+
return models.Package
102+
ptype_class = PACKAGES_BY_TYPE.get(ptype)
103+
return ptype_class or models.Package

src/packagedcode/maven.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,13 @@ class MavenPomPackage(models.Package):
7474
def recognize(cls, location):
7575
return parse(location)
7676

77+
@classmethod
78+
def get_package_root(cls, manifest_resource, codebase):
79+
if manifest_resource.name.endswith('pom.xml'):
80+
return manifest_resource.parent(codebase)
81+
# FIXME: this is NOT correct
82+
return manifest_resource
83+
7784

7885
class ParentPom(artifact.Artifact):
7986
"""

src/packagedcode/models.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,26 @@ def recognize(cls, location):
613613
"""
614614
return cls(location)
615615

616+
@classmethod
617+
def get_package_root(cls, manifest_resource, codebase):
618+
"""
619+
Return the Resource for the package root given a `manifest_resource`
620+
Resource object that represents a manifest in the `codebase` Codebase.
621+
622+
Each package type and instance have different conventions on how a
623+
package manifest realtes to the toor of a package.
624+
625+
For instance, given a "package.json" file, the root of an npm is the
626+
parent directory. The same applies with a Maven "pom.xml". In the case
627+
of a "xyz.pom" file found inside a JAR META-INF/ directory, the root is
628+
the JAR itself which may not be the direct parent
629+
630+
Each package type should subclass as needed. This deafult to return the
631+
same path.
632+
"""
633+
return manifest_resource
616634
#
635+
617636
# Package types
618637
# NOTE: this is somewhat redundant with extractcode archive handlers
619638
# yet the purpose and semantics are rather different here
@@ -691,12 +710,20 @@ class BowerPackage(Package):
691710
type = StringType(default='bower')
692711
primary_language = StringType(default='JavaScript')
693712

713+
@classmethod
714+
def get_package_root(cls, manifest_resource, codebase):
715+
return manifest_resource.parent(codebase)
716+
694717

695718
class MeteorPackage(Package):
696719
metafiles = ('package.js',)
697720
type = StringType(default='meteor')
698721
primary_language = StringType(default='JavaScript')
699722

723+
@classmethod
724+
def get_package_root(cls, manifest_resource, codebase):
725+
return manifest_resource.parent(codebase)
726+
700727

701728
class CpanModule(Package):
702729
metafiles = ('*.pod', '*.pm', 'MANIFEST', 'Makefile.PL', 'META.yml', 'META.json', '*.meta', 'dist.ini')
@@ -707,11 +734,16 @@ class CpanModule(Package):
707734

708735

709736
# TODO: refine me: Go packages are a mess but something is emerging
737+
# TODO: move to and use godeps.py
710738
class Godep(Package):
711739
metafiles = ('Godeps',)
712740
type = StringType(default='go')
713741
primary_language = StringType(default='Go')
714742

743+
@classmethod
744+
def get_package_root(cls, manifest_resource, codebase):
745+
return manifest_resource.parent(codebase)
746+
715747

716748
class RubyGem(Package):
717749
metafiles = ('*.control', '*.gemspec', 'Gemfile', 'Gemfile.lock',)
@@ -721,6 +753,10 @@ class RubyGem(Package):
721753
type = StringType(default='gem')
722754
primary_language = StringType(default='gem')
723755

756+
@classmethod
757+
def get_package_root(cls, manifest_resource, codebase):
758+
return manifest_resource.parent(codebase)
759+
724760

725761
class AndroidApp(Package):
726762
filetypes = ('zip archive',)

src/packagedcode/npm.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ class NpmPackage(models.Package):
7474
def recognize(cls, location):
7575
return parse(location)
7676

77+
@classmethod
78+
def get_package_root(cls, manifest_resource, codebase):
79+
return manifest_resource.parent(codebase)
80+
7781
def repository_homepage_url(self, baseurl=default_web_baseurl):
7882
return npm_homepage_url(self.namespace, self.name, registry=baseurl)
7983

src/packagedcode/nuget.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ class NugetPackage(models.Package):
5151
def recognize(cls, location):
5252
return parse(location)
5353

54+
@classmethod
55+
def get_package_root(cls, manifest_resource, codebase):
56+
if manifest_resource.name.endswith('.nupkg'):
57+
return manifest_resource
58+
if manifest_resource.name.endswith(cls.metafiles):
59+
return manifest_resource.parent(codebase)
60+
return manifest_resource
61+
5462

5563
nuspec_tags = [
5664
'id',

src/packagedcode/phpcomposer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ class PHPComposerPackage(models.Package):
7575
def recognize(cls, location):
7676
return parse(location)
7777

78+
@classmethod
79+
def get_package_root(cls, manifest_resource, codebase):
80+
return manifest_resource.parent(codebase)
81+
7882

7983
def is_phpcomposer_json(location):
8084
return (filetype.is_file(location)

src/scancode/api.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -187,18 +187,43 @@ def get_licenses(location, min_score=0, include_text=False, diag=False,
187187

188188
def get_package_info(location, **kwargs):
189189
"""
190-
mappings for package information detected in the file at `location`.
190+
Return a mapping of package manifest information detected in the
191+
file at `location`.
192+
193+
Note that all exceptions are caught if there are any errors while parsing a
194+
package manifest.
195+
"""
196+
from packagedcode.recognize import recognize_package
197+
manifest = recognize_package(location)
198+
if manifest:
199+
return dict(package_manifest=manifest.to_dict())
200+
return dict(package_manifest=None)
201+
202+
203+
def get_package_info2(location, **kwargs):
204+
"""
205+
Return a mapping of package manifest information detected in the
206+
file at `location`.
207+
208+
Note that all exceptions are caught if there are any errors while parsing a
209+
package manifest.
191210
"""
192211
from packagedcode.recognize import recognize_package
193-
package = recognize_package(location)
194-
if package:
195-
return dict(packages=[package.to_dict()])
196-
return dict(packages=[])
212+
try:
213+
manifest = recognize_package(location)
214+
if manifest:
215+
return dict(package_manifest=manifest.to_dict())
216+
except Exception:
217+
# FIXME: this should be logged somehow, but for now we avoid useless
218+
# errors per #983
219+
pass
220+
return dict(package_manifest=None)
221+
197222

198223

199224
def get_file_info(location, **kwargs):
200225
"""
201-
Return a mappings of file information collected for the file at `location`.
226+
Return a mapping of file information collected for the file at `location`.
202227
"""
203228
result = OrderedDict()
204229

0 commit comments

Comments
 (0)