Skip to content

Commit 6ee677d

Browse files
committed
Do not always dedupe Debian licenses #2058
Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 7a9eb3f commit 6ee677d

File tree

1 file changed

+36
-19
lines changed

1 file changed

+36
-19
lines changed

src/packagedcode/debian_copyright.py

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,14 @@ def parse_copyright_file(copyright_file, skip_debian_packaging=True, simplify_li
9292
declared_license, detected_license, copyrights = parse_structured_copyright_file(
9393
copyright_file=copyright_file,
9494
skip_debian_packaging=skip_debian_packaging,
95-
simplify_licenses=simplify_licenses)
95+
simplify_licenses=simplify_licenses,
96+
)
9697

9798
if not detected_license or detected_license == 'unknown':
9899
text = textcode.analysis.unicode_text(copyright_file)
99100
detected_license = get_normalized_expression(text, try_as_expression=False)
100101
if not copyrights:
101102
copyrights = '\n'.join(copyright_detector(copyright_file))
102-
103103
return declared_license, detected_license, copyrights
104104

105105

@@ -119,12 +119,25 @@ def copyright_detector(location):
119119
return copyrights
120120

121121

122-
def parse_structured_copyright_file(copyright_file, skip_debian_packaging=True, simplify_licenses=True):
122+
def parse_structured_copyright_file(
123+
copyright_file,
124+
skip_debian_packaging=True,
125+
simplify_licenses=True,
126+
unique=True,
127+
):
123128
"""
124-
Return a tuple of (declared license, detected license_expression, copyrights) strings computed
125-
from the `copyright_file` location. For each copyright file paragraph we
126-
treat the "name" as a license declaration. The text is used for detection
127-
and cross-reference with the declaration.
129+
Return a tuple of (declared license, detected license_expression,
130+
copyrights) strings computed from the `copyright_file` location. For each
131+
copyright file paragraph we treat the "name" as a license declaration. The
132+
text is used for detection and cross-reference with the declaration.
133+
134+
If `skip_debian_packaging` is True, the Debian packaging license --if
135+
detected-- is skipped.
136+
137+
If `simplify_licenses` is True the license expressions are simplified.
138+
139+
If `unique` is True, repeated copyrights, detected or declared licenses are
140+
ignore, and only unique detections are returne.
128141
"""
129142
if not copyright_file:
130143
return None, None, None
@@ -135,13 +148,6 @@ def parse_structured_copyright_file(copyright_file, skip_debian_packaging=True,
135148
detected_licenses = []
136149
copyrights = []
137150

138-
# debug on Python3
139-
# try:
140-
# deco = fix_copyright(deco)
141-
# except Exception as e:
142-
# # debug issues
143-
# raise Exception(copyright_file) from e
144-
145151
deco = fix_copyright(deco)
146152

147153
licensing = Licensing()
@@ -154,10 +160,13 @@ def parse_structured_copyright_file(copyright_file, skip_debian_packaging=True,
154160

155161
if isinstance(paragraph, (CopyrightHeaderParagraph, CopyrightFilesParagraph)):
156162
pcs = paragraph.copyright.statements or []
157-
# avoid repeats
158163
for p in pcs:
159164
p = p.dumps()
160-
if p not in copyrights:
165+
# avoid repeats
166+
if unique:
167+
if p not in copyrights:
168+
copyrights.append(p)
169+
else:
161170
copyrights.append(p)
162171

163172
if isinstance(paragraph, CatchAllParagraph):
@@ -174,9 +183,13 @@ def parse_structured_copyright_file(copyright_file, skip_debian_packaging=True,
174183

175184
declared, detected = detect_declared_license(plicense.name)
176185
# avoid repeats
177-
if declared and declared not in declared_licenses:
186+
if unique:
187+
if declared and declared not in declared_licenses:
188+
declared_licenses.append(declared)
189+
if detected and detected not in detected_licenses:
190+
detected_licenses.append(detected)
191+
else:
178192
declared_licenses.append(declared)
179-
if detected and detected not in detected_licenses:
180193
detected_licenses.append(detected)
181194

182195
# also detect in text
@@ -185,7 +198,11 @@ def parse_structured_copyright_file(copyright_file, skip_debian_packaging=True,
185198
detected = get_normalized_expression(text, try_as_expression=False)
186199
if not detected:
187200
detected = 'unknown'
188-
if detected not in detected_licenses:
201+
# avoid repeats
202+
if unique:
203+
if detected not in detected_licenses:
204+
detected_licenses.append(detected)
205+
else:
189206
detected_licenses.append(detected)
190207

191208
declared_license = '\n'.join(declared_licenses)

0 commit comments

Comments
 (0)