11#
2- # Copyright (c) 2015 nexB Inc. and others. All rights reserved.
2+ # Copyright (c) 2017 nexB Inc. and others. All rights reserved.
33# http://nexb.com and https://github.com/nexB/scancode-toolkit/
44# The ScanCode software is licensed under the Apache License version 2.0.
55# Data generated with ScanCode require an acknowledgment.
@@ -86,6 +86,20 @@ def parse(location):
8686 if not is_package_json (location ):
8787 return
8888
89+ with codecs .open (location , encoding = 'utf-8' ) as loc :
90+ package_data = json .load (loc , object_pairs_hook = OrderedDict )
91+
92+ # a package.json is at the root of an NPM package
93+ base_dir = fileutils .parent_directory (location )
94+ metafile_name = fileutils .file_base_name (location )
95+ return build_package (package_data , base_dir , metafile_name )
96+
97+
98+ def build_package (package_data , base_dir = None , metafile_name = 'package.json' ):
99+ """
100+ Return a Package object from a package_data mapping (from a
101+ package.json or similar) or None.
102+ """
89103 # mapping of top level package.json items to the Package object field name
90104 plain_fields = OrderedDict ([
91105 ('name' , 'name' ),
@@ -101,33 +115,35 @@ def parse(location):
101115 ('bugs' , bugs_mapper ),
102116 ('contributors' , contributors_mapper ),
103117 ('maintainers' , maintainers_mapper ),
118+ # current form
104119 ('license' , licensing_mapper ),
120+ # old, deprecated form
105121 ('licenses' , licensing_mapper ),
106122 ('dependencies' , dependencies_mapper ),
107123 ('devDependencies' , dev_dependencies_mapper ),
108124 ('peerDependencies' , peer_dependencies_mapper ),
109125 ('optionalDependencies' , optional_dependencies_mapper ),
110- ('url' , url_mapper ),
126+ # legacy, ignored
127+ # ('url', url_mapper),
111128 ('dist' , dist_mapper ),
112129 ('repository' , repository_mapper ),
113130 ])
114131
115- with codecs .open (location , encoding = 'utf-8' ) as loc :
116- data = json .load (loc , object_pairs_hook = OrderedDict )
117132
118- if not data .get ('name' ) or not data .get ('version' ):
133+ if not package_data .get ('name' ) or not package_data .get ('version' ):
119134 # a package.json without name and version is not a usable NPM package
120135 return
121136
122137 package = NpmPackage ()
123138 # a package.json is at the root of an NPM package
124- base_dir = fileutils .parent_directory (location )
125139 package .location = base_dir
126140 # for now we only recognize a package.json, not a node_modules directory yet
127- package .metafile_locations = [location ]
128- package .version = data .get ('version' )
141+ if metafile_name :
142+ package .metafile_locations = [metafile_name ]
143+
144+ package .version = package_data .get ('version' ) or None
129145 for source , target in plain_fields .items ():
130- value = data .get (source )
146+ value = package_data .get (source ) or None
131147 if value :
132148 if isinstance (value , basestring ):
133149 value = value .strip ()
@@ -136,14 +152,21 @@ def parse(location):
136152
137153 for source , func in field_mappers .items ():
138154 logger .debug ('parse: %(source)r, %(func)r' % locals ())
139- value = data .get (source )
155+ value = package_data .get (source ) or None
140156 if value :
141157 if isinstance (value , basestring ):
142158 value = value .strip ()
143159 if value :
144160 func (value , package )
145- # this should be a mapper function but requires two args
146- package .download_urls .append (public_download_url (package .name , package .version ))
161+
162+ # this should be a mapper function but requires two args.
163+ # Note: we only add a synthetic download URL if there is none from
164+ # the dist mapping.
165+ if not package .download_urls :
166+ tarball = public_download_url (package .name , package .version )
167+ if tarball :
168+ package .download_urls .append (tarball )
169+
147170 return package
148171
149172
@@ -152,7 +175,7 @@ def licensing_mapper(licenses, package):
152175 Update package licensing and return package.
153176 Licensing data structure has evolved over time and is a tad messy.
154177 https://docs.npmjs.com/files/package.json#license
155- licenses is either:
178+ license(s) is either:
156179 - a string with:
157180 - an SPDX id or expression { "license" : "(ISC OR GPL-3.0)" }
158181 - some license name or id
@@ -163,9 +186,13 @@ def licensing_mapper(licenses, package):
163186 return package
164187
165188 if isinstance (licenses , basestring ):
189+ # current form
190+ # TODO: handle "SEE LICENSE IN <filename>"
191+ # TODO: parse expression with license_expression library
166192 package .asserted_licenses .append (models .AssertedLicense (license = licenses ))
167193
168194 elif isinstance (licenses , dict ):
195+ # old, deprecated form
169196 """
170197 "license": {
171198 "type": "MIT",
@@ -176,6 +203,7 @@ def licensing_mapper(licenses, package):
176203 url = licenses .get ('url' )))
177204
178205 elif isinstance (licenses , list ):
206+ # old, deprecated form
179207 """
180208 "licenses": ["type": "Apache License, Version 2.0",
181209 "url": "http://www.apache.org/licenses/LICENSE-2.0" } ]
@@ -295,19 +323,28 @@ def repository_mapper(repo, package):
295323 if isinstance (repo , basestring ):
296324 package .vcs_repository = parse_repo_url (repo )
297325 elif isinstance (repo , dict ):
298- package .vcs_tool = repo .get ('type' ) or 'git'
299- package .vcs_repository = parse_repo_url (repo .get ('url' ))
326+ repurl = parse_repo_url (repo .get ('url' ))
327+ if repurl :
328+ package .vcs_tool = repo .get ('type' ) or 'git'
329+ package .vcs_repository = repurl
300330 return package
301331
302332
303333def url_mapper (url , package ):
304334 """
305- In a package.json, the "url" field is a redirection to a package download
306- URL published somewhere else than on the public npm registry.
307- We map it to a download url.
335+ In a package.json, the "url" field is a legacy field that contained
336+ various URLs either as a string or as a mapping of type->url
308337 """
309- if url :
310- package .download_urls .append (url )
338+ if not url :
339+ return package
340+
341+ if isinstance (url , basestring ):
342+ # TOOD: map to a miscellaneous urls dict
343+ pass
344+ elif isinstance (url , dict ):
345+ # typical key is "web"
346+ # TOOD: map to a miscellaneous urls dict
347+ pass
311348 return package
312349
313350
@@ -395,6 +432,11 @@ def parse_person(person):
395432 Both forms are equivalent.
396433 """
397434 # TODO: detect if this is a person name or a company name
435+
436+ name = None
437+ email = None
438+ url = None
439+
398440 if isinstance (person , basestring ):
399441 parsed = person_parser (person )
400442 if not parsed :
@@ -409,10 +451,28 @@ def parse_person(person):
409451 name = person .get ('name' )
410452 email = person .get ('email' )
411453 url = person .get ('url' )
454+
412455 else :
413456 raise Exception ('Incorrect NPM package.json person: %(person)r' % locals ())
414457
415- return name and name .strip (), email and email .strip ('<> ' ), url and url .strip ('() ' )
458+ if name :
459+ name = name .strip ()
460+ if name .lower () == 'none' :
461+ name = None
462+ name = name or None
463+
464+ if email :
465+ email = email .strip ('<> ' )
466+ if email .lower () == 'none' :
467+ email = None
468+ email = email or None
469+
470+ if url :
471+ url = url .strip ('() ' )
472+ if url .lower () == 'none' :
473+ url = None
474+ url = url or None
475+ return name , email , url
416476
417477
418478def public_download_url (name , version , registry = 'https://registry.npmjs.org' ):
0 commit comments