@@ -1458,222 +1458,3 @@ def is_paragraph_primary_license(paragraph):
14581458 paragraph , CopyrightFilesParagraph
14591459 ) and paragraph .files .values == ['*' ]
14601460
1461-
1462- # ========================== DEPRECATED FUNCTIONS ==================================
1463-
1464- def parse_structured_copyright_file (
1465- location ,
1466- skip_debian_packaging = False ,
1467- ):
1468- """
1469- Return a tuple of (list of declared license strings, list of detected license matches)
1470- collected from the debian copyright file at `location`.
1471-
1472- If `skip_debian_packaging` is False, the Debian packaging license is skipped if detected.
1473-
1474- Note: This was the older structured file parsing method which is now discontinued.
1475- """
1476- if not location :
1477- return None , None
1478-
1479- deco = DebianCopyright .from_file (location )
1480-
1481- declared_licenses = []
1482- detected_licenses = []
1483-
1484- deco = refine_debian_copyright (deco )
1485-
1486- licensing = Licensing ()
1487- for paragraph in deco .paragraphs :
1488-
1489- if is_paragraph_debian_packaging (paragraph ) and not skip_debian_packaging :
1490- # Skipping packaging license and copyrights since they are not
1491- # relevant to the effective package license
1492- continue
1493-
1494- # rare case where we have not a structured file
1495- if isinstance (paragraph , CatchAllParagraph ):
1496- text = paragraph .dumps ()
1497- if text :
1498- detected = get_normalized_expression (
1499- text ,
1500- try_as_expression = False ,
1501- approximate = False ,
1502- )
1503- if not detected :
1504- detected = 'unknown'
1505- detected_licenses .append (detected )
1506- else :
1507- plicense = paragraph .license
1508- if not plicense :
1509- continue
1510-
1511- declared , detected = detect_declared_license (plicense .name )
1512- if declared :
1513- declared_licenses .append (declared )
1514- if detected :
1515- detected_licenses .append (detected )
1516-
1517- # also detect in text
1518- text = paragraph .license .text
1519- if text :
1520- detected = get_normalized_expression (
1521- text ,
1522- try_as_expression = False ,
1523- approximate = True ,
1524- )
1525- if not detected :
1526- detected = 'unknown'
1527-
1528- detected_licenses .append (detected )
1529-
1530- declared_license = '\n ' .join (declared_licenses )
1531-
1532- if detected_licenses :
1533- detected_license = str (combine_expressions (detected_licenses ))
1534-
1535- return declared_license , detected_license
1536-
1537-
1538- def detect_declared_license (declared ):
1539- """
1540- Return a tuple of (declared license, detected license expression) from a
1541- declared license. Both can be None.
1542- """
1543- declared = normalize_and_cleanup_declared_license (declared )
1544-
1545- if TRACE :
1546- logger_debug (f'detect_declared_license: { declared } ' )
1547-
1548- if not declared :
1549- return None , None
1550-
1551- # apply multiple license detection in sequence
1552- detected = detect_using_name_mapping (declared )
1553- if detected :
1554- return declared , detected
1555-
1556- from packagedcode import licensing
1557-
1558- try :
1559- detected = licensing .get_normalized_expression (
1560- declared ,
1561- try_as_expression = False ,
1562- approximate = False ,
1563- )
1564- except Exception :
1565- # FIXME: add logging
1566- # we never fail just for this
1567- return 'unknown'
1568-
1569- return declared , detected
1570-
1571-
1572- def normalize_and_cleanup_declared_license (declared ):
1573- """
1574- Return a cleaned and normalized declared license.
1575- """
1576- declared = declared or ''
1577- # there are few odd cases of license fileds starting with a colon or #
1578- declared = declared .strip (': \t #' )
1579- # normalize spaces
1580- declared = ' ' .join (declared .split ())
1581- return declared
1582-
1583-
1584- def detect_using_name_mapping (declared ):
1585- """
1586- Return a license expression detected from a declared_license.
1587- """
1588- declared = declared .lower ()
1589- detected = get_declared_to_detected ().get (declared )
1590- if detected :
1591- licensing = Licensing ()
1592- return str (licensing .parse (detected , simple = True ))
1593-
1594-
1595- def refine_debian_copyright (debian_copyright ):
1596- """
1597- Update in place the `debian_copyright` DebianCopyright object based on
1598- issues found in a large collection of Debian copyright files.
1599- """
1600- for paragraph in debian_copyright .paragraphs :
1601- if not hasattr (paragraph , 'license' ):
1602- continue
1603- plicense = paragraph .license
1604- if not plicense :
1605- continue
1606-
1607- license_name = plicense .name
1608- if not license_name :
1609- continue
1610-
1611- if license_name .startswith ('200' ):
1612- # these are copyrights and not actual licenses, such as:
1613- # - 2005 Sergio Costas
1614- # - 2006-2010 by The HDF Group.
1615-
1616- if isinstance (
1617- paragraph , (CopyrightHeaderParagraph , CopyrightFilesParagraph )
1618- ):
1619- pcs = paragraph .copyright .statements or []
1620- pcs .append (license_name )
1621- paragraph .copyright .statements = pcs
1622- paragraph .license .name = None
1623-
1624- license_name_low = license_name .lower ()
1625- NOT_A_LICENSE_NAME = (
1626- 'according to' ,
1627- 'by obtaining' ,
1628- 'distributed under the terms of the gnu' ,
1629- 'gnu general public license version 2 as published by the free' ,
1630- 'gnu lesser general public license 2.1 as published by the' ,
1631- )
1632- if license_name_low .startswith (NOT_A_LICENSE_NAME ):
1633- text = plicense .text
1634- if text :
1635- text = '\n ' .join ([license_name , text ])
1636- else :
1637- text = license_name
1638- paragraph .license .name = None
1639- paragraph .license .text = text
1640-
1641- return debian_copyright
1642-
1643-
1644- _DECLARED_TO_DETECTED = None
1645-
1646-
1647- def get_declared_to_detected (data_file = None ):
1648- """
1649- Return a mapping of declared to detected license expression cached and
1650- loaded from a tab-separated text file, all lowercase.
1651-
1652- Each line has this form:
1653- some license name<tab>scancode license expression
1654-
1655- For instance:
1656- 2-clause bsd bsd-simplified
1657-
1658- This data file is about license keys used in copyright files and has been
1659- derived from a large collection of most copyright files from Debian (about
1660- 320K files from circa 2019-11) and Ubuntu (about 200K files from circa
1661- 2020-06)
1662- """
1663- global _DECLARED_TO_DETECTED
1664- if _DECLARED_TO_DETECTED :
1665- return _DECLARED_TO_DETECTED
1666-
1667- _DECLARED_TO_DETECTED = {}
1668- if not data_file :
1669- data_file = path .join (path .dirname (__file__ ), 'debian_licenses.txt' )
1670- with io .open (data_file , encoding = 'utf-8' ) as df :
1671- for line in df :
1672- line = line .strip ()
1673- if not line or line .startswith ('#' ):
1674- continue
1675- decl , _ , detect = line .strip ().partition ('\t ' )
1676- if detect and detect .strip ():
1677- decl = decl .strip ()
1678- _DECLARED_TO_DETECTED [decl ] = detect
1679- return _DECLARED_TO_DETECTED
0 commit comments