Skip to content

Commit 51cb4f0

Browse files
Remove old debian_copyright parsing functions
Removes old debian copyright parsing functions and also removes debian_licenses.txt. Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 1ef4c89 commit 51cb4f0

File tree

2 files changed

+0
-9061
lines changed

2 files changed

+0
-9061
lines changed

src/packagedcode/debian_copyright.py

Lines changed: 0 additions & 219 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,222 +1458,3 @@ def is_paragraph_primary_license(paragraph):
14581458
paragraph, CopyrightFilesParagraph
14591459
) and paragraph.files.values == ['*']
14601460

1461-
1462-
# ========================== DEPRECATED FUNCTIONS ==================================
1463-
1464-
def parse_structured_copyright_file(
1465-
location,
1466-
skip_debian_packaging=False,
1467-
):
1468-
"""
1469-
Return a tuple of (list of declared license strings, list of detected license matches)
1470-
collected from the debian copyright file at `location`.
1471-
1472-
If `skip_debian_packaging` is False, the Debian packaging license is skipped if detected.
1473-
1474-
Note: This was the older structured file parsing method which is now discontinued.
1475-
"""
1476-
if not location:
1477-
return None, None
1478-
1479-
deco = DebianCopyright.from_file(location)
1480-
1481-
declared_licenses = []
1482-
detected_licenses = []
1483-
1484-
deco = refine_debian_copyright(deco)
1485-
1486-
licensing = Licensing()
1487-
for paragraph in deco.paragraphs:
1488-
1489-
if is_paragraph_debian_packaging(paragraph) and not skip_debian_packaging:
1490-
# Skipping packaging license and copyrights since they are not
1491-
# relevant to the effective package license
1492-
continue
1493-
1494-
# rare case where we have not a structured file
1495-
if isinstance(paragraph, CatchAllParagraph):
1496-
text = paragraph.dumps()
1497-
if text:
1498-
detected = get_normalized_expression(
1499-
text,
1500-
try_as_expression=False,
1501-
approximate=False,
1502-
)
1503-
if not detected:
1504-
detected = 'unknown'
1505-
detected_licenses.append(detected)
1506-
else:
1507-
plicense = paragraph.license
1508-
if not plicense:
1509-
continue
1510-
1511-
declared, detected = detect_declared_license(plicense.name)
1512-
if declared:
1513-
declared_licenses.append(declared)
1514-
if detected:
1515-
detected_licenses.append(detected)
1516-
1517-
# also detect in text
1518-
text = paragraph.license.text
1519-
if text:
1520-
detected = get_normalized_expression(
1521-
text,
1522-
try_as_expression=False,
1523-
approximate=True,
1524-
)
1525-
if not detected:
1526-
detected = 'unknown'
1527-
1528-
detected_licenses.append(detected)
1529-
1530-
declared_license = '\n'.join(declared_licenses)
1531-
1532-
if detected_licenses:
1533-
detected_license = str(combine_expressions(detected_licenses))
1534-
1535-
return declared_license, detected_license
1536-
1537-
1538-
def detect_declared_license(declared):
1539-
"""
1540-
Return a tuple of (declared license, detected license expression) from a
1541-
declared license. Both can be None.
1542-
"""
1543-
declared = normalize_and_cleanup_declared_license(declared)
1544-
1545-
if TRACE:
1546-
logger_debug(f'detect_declared_license: {declared}')
1547-
1548-
if not declared:
1549-
return None, None
1550-
1551-
# apply multiple license detection in sequence
1552-
detected = detect_using_name_mapping(declared)
1553-
if detected:
1554-
return declared, detected
1555-
1556-
from packagedcode import licensing
1557-
1558-
try:
1559-
detected = licensing.get_normalized_expression(
1560-
declared,
1561-
try_as_expression=False,
1562-
approximate=False,
1563-
)
1564-
except Exception:
1565-
# FIXME: add logging
1566-
# we never fail just for this
1567-
return 'unknown'
1568-
1569-
return declared, detected
1570-
1571-
1572-
def normalize_and_cleanup_declared_license(declared):
1573-
"""
1574-
Return a cleaned and normalized declared license.
1575-
"""
1576-
declared = declared or ''
1577-
# there are few odd cases of license fileds starting with a colon or #
1578-
declared = declared.strip(': \t#')
1579-
# normalize spaces
1580-
declared = ' '.join(declared.split())
1581-
return declared
1582-
1583-
1584-
def detect_using_name_mapping(declared):
1585-
"""
1586-
Return a license expression detected from a declared_license.
1587-
"""
1588-
declared = declared.lower()
1589-
detected = get_declared_to_detected().get(declared)
1590-
if detected:
1591-
licensing = Licensing()
1592-
return str(licensing.parse(detected, simple=True))
1593-
1594-
1595-
def refine_debian_copyright(debian_copyright):
1596-
"""
1597-
Update in place the `debian_copyright` DebianCopyright object based on
1598-
issues found in a large collection of Debian copyright files.
1599-
"""
1600-
for paragraph in debian_copyright.paragraphs:
1601-
if not hasattr(paragraph, 'license'):
1602-
continue
1603-
plicense = paragraph.license
1604-
if not plicense:
1605-
continue
1606-
1607-
license_name = plicense.name
1608-
if not license_name:
1609-
continue
1610-
1611-
if license_name.startswith('200'):
1612-
# these are copyrights and not actual licenses, such as:
1613-
# - 2005 Sergio Costas
1614-
# - 2006-2010 by The HDF Group.
1615-
1616-
if isinstance(
1617-
paragraph, (CopyrightHeaderParagraph, CopyrightFilesParagraph)
1618-
):
1619-
pcs = paragraph.copyright.statements or []
1620-
pcs.append(license_name)
1621-
paragraph.copyright.statements = pcs
1622-
paragraph.license.name = None
1623-
1624-
license_name_low = license_name.lower()
1625-
NOT_A_LICENSE_NAME = (
1626-
'according to',
1627-
'by obtaining',
1628-
'distributed under the terms of the gnu',
1629-
'gnu general public license version 2 as published by the free',
1630-
'gnu lesser general public license 2.1 as published by the',
1631-
)
1632-
if license_name_low.startswith(NOT_A_LICENSE_NAME):
1633-
text = plicense.text
1634-
if text:
1635-
text = '\n'.join([license_name, text])
1636-
else:
1637-
text = license_name
1638-
paragraph.license.name = None
1639-
paragraph.license.text = text
1640-
1641-
return debian_copyright
1642-
1643-
1644-
_DECLARED_TO_DETECTED = None
1645-
1646-
1647-
def get_declared_to_detected(data_file=None):
1648-
"""
1649-
Return a mapping of declared to detected license expression cached and
1650-
loaded from a tab-separated text file, all lowercase.
1651-
1652-
Each line has this form:
1653-
some license name<tab>scancode license expression
1654-
1655-
For instance:
1656-
2-clause bsd bsd-simplified
1657-
1658-
This data file is about license keys used in copyright files and has been
1659-
derived from a large collection of most copyright files from Debian (about
1660-
320K files from circa 2019-11) and Ubuntu (about 200K files from circa
1661-
2020-06)
1662-
"""
1663-
global _DECLARED_TO_DETECTED
1664-
if _DECLARED_TO_DETECTED:
1665-
return _DECLARED_TO_DETECTED
1666-
1667-
_DECLARED_TO_DETECTED = {}
1668-
if not data_file:
1669-
data_file = path.join(path.dirname(__file__), 'debian_licenses.txt')
1670-
with io.open(data_file, encoding='utf-8') as df:
1671-
for line in df:
1672-
line = line.strip()
1673-
if not line or line.startswith('#'):
1674-
continue
1675-
decl, _, detect = line.strip().partition('\t')
1676-
if detect and detect.strip():
1677-
decl = decl.strip()
1678-
_DECLARED_TO_DETECTED[decl] = detect
1679-
return _DECLARED_TO_DETECTED

0 commit comments

Comments
 (0)