Skip to content

Commit 013e846

Browse files
Merge pull request #3302 from keshav-space/3290_package_holder
Add copyright `holder` field to PackageData model
2 parents a3946df + 655874a commit 013e846

File tree

1,080 files changed

+2515
-48
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,080 files changed

+2515
-48
lines changed

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ Package detection:
6363
manifest data parsing code outside of the scancode-toolkit context in other
6464
libraries.
6565

66+
- The PackageData model now includes a ``holder`` field, which is populated with
67+
holder data extracted from the copyright field if copyright data is present,
68+
otherwise it remains empty.
69+
70+
https://github.com/nexB/scancode-toolkit/issues/3290
71+
6672

6773
License detection:
6874
~~~~~~~~~~~~~~~~~~~

src/packagedcode/models.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,11 @@ class PackageData(IdentifiablePackageData):
609609
label='Copyright',
610610
help='Copyright statements for this package. Typically one per line.')
611611

612+
holder = String(
613+
label='Holder',
614+
help='Holders for this package. Typically one per line.'
615+
)
616+
612617
declared_license_expression = String(
613618
label='license expression',
614619
help='The license expression for this package typically derived '
@@ -711,8 +716,45 @@ class PackageData(IdentifiablePackageData):
711716
repr=True,
712717
)
713718

719+
714720
def __attrs_post_init__(self, *args, **kwargs):
715721
self.populate_license_fields()
722+
self.populate_holder_field()
723+
724+
def populate_holder_field(self):
725+
if not self.copyright:
726+
return
727+
728+
from cluecode.copyrights import CopyrightDetector
729+
730+
numbered_lines = list(enumerate(self.copyright.split("\n"), start=1))
731+
detector = CopyrightDetector()
732+
holders = list(
733+
detector.detect(
734+
numbered_lines,
735+
include_copyrights=False,
736+
include_holders=True,
737+
include_authors=False,
738+
)
739+
)
740+
# If no holder detected, prefix each copyright statement with `Copyright`
741+
if not holders:
742+
numbered_lines = [
743+
(count, f"Copyright {value}") for count, value in numbered_lines
744+
]
745+
holders = list(
746+
detector.detect(
747+
numbered_lines,
748+
include_copyrights=False,
749+
include_holders=True,
750+
include_authors=False,
751+
)
752+
)
753+
# If still no holder, then populate holder with copyright field
754+
self.holder = (
755+
"\n".join([holder_detection.holder for holder_detection in holders])
756+
or self.copyright
757+
)
716758

717759
def populate_license_fields(self):
718760
"""
Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
1-
path,type,name,base_name,extension,size,date,sha1,md5,sha256,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,detected_license_expression,detected_license_expression_spdx,percentage_of_license_text,files_count,dirs_count,size_count,scan_errors,license_expression,detection_log,license_match__score,start_line,end_line,license_match__matched_length,license_match__match_coverage,license_match__matcher,license_match__license_expression,license_match__rule_identifier,license_match__rule_relevance,license_match__rule_url,copyright,holder,email,url,package__type,package__namespace,package__name,package__version,package__qualifiers,package__subpath,package__primary_language,package__description,package__release_date,package__homepage_url,package__download_url,package__size,package__sha1,package__md5,package__sha256,package__sha512,package__bug_tracking_url,package__code_view_url,package__vcs_url,package__copyright,package__declared_license_expression,package__declared_license_expression_spdx,package__license_detections,package__other_license_expression,package__other_license_expression_spdx,package__other_license_detections,package__extracted_license_statement,package__notice_text,package__file_references,package__extra_data,package__repository_homepage_url,package__repository_download_url,package__api_data_url,package__datasource_id,package__purl
2-
json2csv.rb,file,json2csv.rb,json2csv,.rb,912,2022-04-20,1236469a06a2bacbdd8e172ad718482af5b0a936,1307c281e0b153202e291b217eab85d5,12ba215313981dbe810d9ed696b7cc753d97adfcc26eba1e13f941dc7506aa4e,text/x-script.python,"Python script, ASCII text executable",Ruby,False,True,False,False,True,True,apache-2.0,Apache-2.0,62.04,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3-
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,apache-2.0,,100.00,5,13,85,100.00,2-aho,apache-2.0,apache-2.0_7.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_7.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4-
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,,,,Copyright (c) 2017 nexB Inc. and others,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5-
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,,,,,nexB Inc. and others,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6-
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,,,,,http://nexb.com/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7-
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,,,,,https://github.com/nexB/scancode-toolkit/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8-
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,,,,,,,,,,,http://www.apache.org/licenses/LICENSE-2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9-
license,file,license,license,,679,2022-04-20,75c5490a718ddd45e40e0cc7ce0c756abc373123,b965a762efb9421cf1bf4405f336e278,a34098a43e5677495f59dff825a3f9bc0f2b0261d75feb2356919f4c3ce049ab,text/plain,ASCII text,,False,True,False,False,False,False,gpl-2.0-plus,GPL-2.0-or-later,100.0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10-
license,,,,,,,,,,,,,,,,,,,,,,,,,,gpl-2.0-plus,,100.00,1,12,113,100.00,1-hash,gpl-2.0-plus,gpl-2.0-plus_420.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0-plus_420.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
11-
package.json,file,package.json,package,.json,2200,2022-04-20,918376afce796ef90eeda1d6695f2289c90491ac,1f66239a9b850c5e60a9382dbe2162d2,29f6068a1b6c7d06f115a5edc4ed8558edde42c6bbf0145ed77cf1108a0dd529,application/json,JSON data,,False,True,False,False,False,False,mit,MIT,45.72,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
12-
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,mit,,100.00,24,24,3,100.00,2-aho,mit,mit_27.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_27.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
13-
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,mit,,84.68,24,24,136,85.53,3-seq,mit,mit_823.RULE,99.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_823.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
14-
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,24,,,,,,,,Copyright (c) 2012 LearnBoost <[email protected]>,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
15-
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,24,,,,,,,,,LearnBoost,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
16-
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,12,,,,,,,,,,[email protected],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
17-
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16,16,,,,,,,,,,,https://github.com/visionmedia/node-cookie-signature.git,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
18-
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27,27,,,,,,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
19-
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,npm,,cookie-signature,v 1.0.3,,,JavaScript,Sign and unsign cookies,,,https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.3.tgz,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,git+https://github.com/visionmedia/node-cookie-signature.git,,mit,MIT,"[{'license_expression': 'mit', 'matches': [{'score': 100.0, 'start_line': 24, 'end_line': 24, 'matched_length': 3, 'match_coverage': 100.0, 'matcher': '2-aho', 'license_expression': 'mit', 'rule_identifier': 'mit_27.RULE', 'rule_relevance': 100, 'rule_url': 'https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_27.RULE'}, {'score': 84.68, 'start_line': 24, 'end_line': 24, 'matched_length': 136, 'match_coverage': 85.53, 'matcher': '3-seq', 'license_expression': 'mit', 'rule_identifier': 'mit_823.RULE', 'rule_relevance': 99, 'rule_url': 'https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_823.RULE'}], 'identifier': 'mit-13195f55-8383-ff05-7a20-04ec94bbf4b1'}]",,,,,,,,https://www.npmjs.com/package/cookie-signature,https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.3.tgz,https://registry.npmjs.org/cookie-signature/1.0.3,npm_package_json,pkg:npm/[email protected]
1+
path,type,name,base_name,extension,size,date,sha1,md5,sha256,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,detected_license_expression,detected_license_expression_spdx,percentage_of_license_text,files_count,dirs_count,size_count,scan_errors,license_expression,detection_log,license_match__score,start_line,end_line,license_match__matched_length,license_match__match_coverage,license_match__matcher,license_match__license_expression,license_match__rule_identifier,license_match__rule_relevance,license_match__rule_url,copyright,holder,email,url,package__type,package__namespace,package__name,package__version,package__qualifiers,package__subpath,package__primary_language,package__description,package__release_date,package__homepage_url,package__download_url,package__size,package__sha1,package__md5,package__sha256,package__sha512,package__bug_tracking_url,package__code_view_url,package__vcs_url,package__copyright,package__holder,package__declared_license_expression,package__declared_license_expression_spdx,package__license_detections,package__other_license_expression,package__other_license_expression_spdx,package__other_license_detections,package__extracted_license_statement,package__notice_text,package__file_references,package__extra_data,package__repository_homepage_url,package__repository_download_url,package__api_data_url,package__datasource_id,package__purl
2+
json2csv.rb,file,json2csv.rb,json2csv,.rb,912,2023-03-30,1236469a06a2bacbdd8e172ad718482af5b0a936,1307c281e0b153202e291b217eab85d5,12ba215313981dbe810d9ed696b7cc753d97adfcc26eba1e13f941dc7506aa4e,text/x-script.python,"Python script, ASCII text executable",Ruby,False,True,False,False,True,True,apache-2.0,Apache-2.0,62.04,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3+
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,apache-2.0,,100.00,5,13,85,100.00,2-aho,apache-2.0,apache-2.0_7.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_7.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4+
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,,,,Copyright (c) 2017 nexB Inc. and others,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5+
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,3,,,,,,,,,nexB Inc. and others,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6+
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,,,,,http://nexb.com/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7+
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,4,,,,,,,,,,,https://github.com/nexB/scancode-toolkit/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8+
json2csv.rb,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,,,,,,,,,,,http://www.apache.org/licenses/LICENSE-2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9+
license,file,license,license,,679,2023-03-30,75c5490a718ddd45e40e0cc7ce0c756abc373123,b965a762efb9421cf1bf4405f336e278,a34098a43e5677495f59dff825a3f9bc0f2b0261d75feb2356919f4c3ce049ab,text/plain,ASCII text,,False,True,False,False,False,False,gpl-2.0-plus,GPL-2.0-or-later,100.0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10+
license,,,,,,,,,,,,,,,,,,,,,,,,,,gpl-2.0-plus,,100.00,1,12,113,100.00,1-hash,gpl-2.0-plus,gpl-2.0-plus_420.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0-plus_420.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
11+
package.json,file,package.json,package,.json,2200,2023-03-30,918376afce796ef90eeda1d6695f2289c90491ac,1f66239a9b850c5e60a9382dbe2162d2,29f6068a1b6c7d06f115a5edc4ed8558edde42c6bbf0145ed77cf1108a0dd529,application/json,JSON data,,False,True,False,False,False,False,mit,MIT,45.72,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
12+
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,mit,,100.00,24,24,3,100.00,2-aho,mit,mit_27.RULE,100.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_27.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
13+
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,mit,,84.68,24,24,136,85.53,3-seq,mit,mit_823.RULE,99.00,https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_823.RULE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
14+
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,24,,,,,,,,Copyright (c) 2012 LearnBoost <[email protected]>,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
15+
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24,24,,,,,,,,,LearnBoost,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
16+
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,12,,,,,,,,,,[email protected],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
17+
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16,16,,,,,,,,,,,https://github.com/visionmedia/node-cookie-signature.git,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
18+
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27,27,,,,,,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
19+
package.json,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,npm,,cookie-signature,v 1.0.3,,,JavaScript,Sign and unsign cookies,,,https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.3.tgz,,,,,,https://github.com/visionmedia/node-cookie-signature/issues,,git+https://github.com/visionmedia/node-cookie-signature.git,,,mit,MIT,"[{'license_expression': 'mit', 'matches': [{'score': 100.0, 'start_line': 24, 'end_line': 24, 'matched_length': 3, 'match_coverage': 100.0, 'matcher': '2-aho', 'license_expression': 'mit', 'rule_identifier': 'mit_27.RULE', 'rule_relevance': 100, 'rule_url': 'https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_27.RULE'}, {'score': 84.68, 'start_line': 24, 'end_line': 24, 'matched_length': 136, 'match_coverage': 85.53, 'matcher': '3-seq', 'license_expression': 'mit', 'rule_identifier': 'mit_823.RULE', 'rule_relevance': 99, 'rule_url': 'https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_823.RULE'}], 'identifier': 'mit-13195f55-8383-ff05-7a20-04ec94bbf4b1'}]",,,,,,,,https://www.npmjs.com/package/cookie-signature,https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.3.tgz,https://registry.npmjs.org/cookie-signature/1.0.3,npm_package_json,pkg:npm/[email protected]

tests/formattedcode/data/csv/non-standard/identified.csv

100644100755
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
path,type,name,base_name,extension,size,date,sha1,md5,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,files_count,dirs_count,size_count,scan_errors,package__download_url,package__sha1,package__md5,package__size,package__release_date,package__primary_language,package__description,package__copyright,package__declared_license_expression,package__declared_license_expression_spdx,package__license_detections,package__other_license_expression,package__other_license_expression_spdx,package__other_license_detections,package__extracted_license_statement,package__reference_notes,package__homepage_url,package__notice_text,package__components__name,package__components__version,package__components__owner_name,package__components__copyright,package__components__reference_notes,package__components__release_date,package__components__description,package__components__homepage_url,package__components__vcs_url,package__components__code_view_url,package__components__bug_tracking_url,package__components__primary_language,package__components__notice_text,package__components__notice_filename,package__components__notice_url,package__type,package__namespace,package__name,package__version,package__qualifiers,package__subpath
2-
apache-log4j-extras-1.1.jar,file,apache-log4j-extras-1.1.jar,apache-log4j-extras-1.1,.jar,346729,2010-12-02,1e4b290f5c9ce5ea3a1a7352496c9c9d2a894800,acd91d528e26aa771198d930cf08e953,application/java-archive,Java archive data (JAR),,True,False,True,False,False,False,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3-
apache-log4j-extras-1.1.jar,,,,,,,,,,,,,,,,,,,,,,http://central.maven.org/maven2/log4j/apache-log4j-extras/1.1/apache-log4j-extras-1.1.jar,1e4b290f5c9ce5ea3a1a7352496c9c9d2a894800,acd91d528e26aa771198d930cf08e953,346729,,,,,,,,,,,,,,,Apache Log4j Extras,1.1,Apache Software Foundation,Copyright 2007 The Apache Software Foundation,,,Apache Extras for Apache log4j is a jar file full of additional functionality for log4j 1.2.x.,http://logging.apache.org/log4j/extras/,,,,Java,"Apache Extras Companion for log4j 1.2.
1+
path,type,name,base_name,extension,size,date,sha1,md5,mime_type,file_type,programming_language,is_binary,is_text,is_archive,is_media,is_source,is_script,files_count,dirs_count,size_count,scan_errors,package__download_url,package__sha1,package__md5,package__size,package__release_date,package__primary_language,package__description,package__copyright,package__holder,package__declared_license_expression,package__declared_license_expression_spdx,package__license_detections,package__other_license_expression,package__other_license_expression_spdx,package__other_license_detections,package__extracted_license_statement,package__reference_notes,package__homepage_url,package__notice_text,package__components__name,package__components__version,package__components__owner_name,package__components__copyright,package__components__reference_notes,package__components__release_date,package__components__description,package__components__homepage_url,package__components__vcs_url,package__components__code_view_url,package__components__bug_tracking_url,package__components__primary_language,package__components__notice_text,package__components__notice_filename,package__components__notice_url,package__type,package__namespace,package__name,package__version,package__qualifiers,package__subpath
2+
apache-log4j-extras-1.1.jar,file,apache-log4j-extras-1.1.jar,apache-log4j-extras-1.1,.jar,346729,2010-12-02,1e4b290f5c9ce5ea3a1a7352496c9c9d2a894800,acd91d528e26aa771198d930cf08e953,application/java-archive,Java archive data (JAR),,True,False,True,False,False,False,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3+
apache-log4j-extras-1.1.jar,,,,,,,,,,,,,,,,,,,,,,http://central.maven.org/maven2/log4j/apache-log4j-extras/1.1/apache-log4j-extras-1.1.jar,1e4b290f5c9ce5ea3a1a7352496c9c9d2a894800,acd91d528e26aa771198d930cf08e953,346729,,,,,,,,,,,,,,,,Apache Log4j Extras,1.1,Apache Software Foundation,Copyright 2007 The Apache Software Foundation,,,Apache Extras for Apache log4j is a jar file full of additional functionality for log4j 1.2.x.,http://logging.apache.org/log4j/extras/,,,,Java,"Apache Extras Companion for log4j 1.2.
44
Copyright 2007 The Apache Software Foundation
55

66
This product includes software developed at

0 commit comments

Comments
 (0)