@@ -92,14 +92,14 @@ def parse_copyright_file(copyright_file, skip_debian_packaging=True, simplify_li
9292 declared_license , detected_license , copyrights = parse_structured_copyright_file (
9393 copyright_file = copyright_file ,
9494 skip_debian_packaging = skip_debian_packaging ,
95- simplify_licenses = simplify_licenses )
95+ simplify_licenses = simplify_licenses ,
96+ )
9697
9798 if not detected_license or detected_license == 'unknown' :
9899 text = textcode .analysis .unicode_text (copyright_file )
99100 detected_license = get_normalized_expression (text , try_as_expression = False )
100101 if not copyrights :
101102 copyrights = '\n ' .join (copyright_detector (copyright_file ))
102-
103103 return declared_license , detected_license , copyrights
104104
105105
@@ -119,12 +119,25 @@ def copyright_detector(location):
119119 return copyrights
120120
121121
122- def parse_structured_copyright_file (copyright_file , skip_debian_packaging = True , simplify_licenses = True ):
122+ def parse_structured_copyright_file (
123+ copyright_file ,
124+ skip_debian_packaging = True ,
125+ simplify_licenses = True ,
126+ unique = True ,
127+ ):
123128 """
124- Return a tuple of (declared license, detected license_expression, copyrights) strings computed
125- from the `copyright_file` location. For each copyright file paragraph we
126- treat the "name" as a license declaration. The text is used for detection
127- and cross-reference with the declaration.
129+ Return a tuple of (declared license, detected license_expression,
130+ copyrights) strings computed from the `copyright_file` location. For each
131+ copyright file paragraph we treat the "name" as a license declaration. The
132+ text is used for detection and cross-reference with the declaration.
133+
134+ If `skip_debian_packaging` is True, the Debian packaging license --if
135+ detected-- is skipped.
136+
137+ If `simplify_licenses` is True the license expressions are simplified.
138+
139+ If `unique` is True, repeated copyrights, detected or declared licenses are
140+ ignore, and only unique detections are returne.
128141 """
129142 if not copyright_file :
130143 return None , None , None
@@ -135,13 +148,6 @@ def parse_structured_copyright_file(copyright_file, skip_debian_packaging=True,
135148 detected_licenses = []
136149 copyrights = []
137150
138- # debug on Python3
139- # try:
140- # deco = fix_copyright(deco)
141- # except Exception as e:
142- # # debug issues
143- # raise Exception(copyright_file) from e
144-
145151 deco = fix_copyright (deco )
146152
147153 licensing = Licensing ()
@@ -154,10 +160,13 @@ def parse_structured_copyright_file(copyright_file, skip_debian_packaging=True,
154160
155161 if isinstance (paragraph , (CopyrightHeaderParagraph , CopyrightFilesParagraph )):
156162 pcs = paragraph .copyright .statements or []
157- # avoid repeats
158163 for p in pcs :
159164 p = p .dumps ()
160- if p not in copyrights :
165+ # avoid repeats
166+ if unique :
167+ if p not in copyrights :
168+ copyrights .append (p )
169+ else :
161170 copyrights .append (p )
162171
163172 if isinstance (paragraph , CatchAllParagraph ):
@@ -174,9 +183,13 @@ def parse_structured_copyright_file(copyright_file, skip_debian_packaging=True,
174183
175184 declared , detected = detect_declared_license (plicense .name )
176185 # avoid repeats
177- if declared and declared not in declared_licenses :
186+ if unique :
187+ if declared and declared not in declared_licenses :
188+ declared_licenses .append (declared )
189+ if detected and detected not in detected_licenses :
190+ detected_licenses .append (detected )
191+ else :
178192 declared_licenses .append (declared )
179- if detected and detected not in detected_licenses :
180193 detected_licenses .append (detected )
181194
182195 # also detect in text
@@ -185,7 +198,11 @@ def parse_structured_copyright_file(copyright_file, skip_debian_packaging=True,
185198 detected = get_normalized_expression (text , try_as_expression = False )
186199 if not detected :
187200 detected = 'unknown'
188- if detected not in detected_licenses :
201+ # avoid repeats
202+ if unique :
203+ if detected not in detected_licenses :
204+ detected_licenses .append (detected )
205+ else :
189206 detected_licenses .append (detected )
190207
191208 declared_license = '\n ' .join (declared_licenses )
0 commit comments