|
8 | 8 | # |
9 | 9 | import base64 |
10 | 10 | import codecs |
| 11 | +import dataclasses |
11 | 12 | import email |
12 | 13 | import posixpath |
13 | 14 | import re |
@@ -1362,32 +1363,76 @@ def source_handler(value, **kwargs): |
1362 | 1363 | } |
1363 | 1364 |
|
1364 | 1365 |
|
1365 | | -def detect_declared_license(declared): |
| 1366 | +@dataclasses.dataclass |
| 1367 | +class ApkLicenseDetection: |
| 1368 | + """ |
| 1369 | + Represent the results of an Alpine license detection, including intermediate steps. |
1366 | 1370 | """ |
1367 | | - Return a tuple of (cleaned declared license, detected license expression) |
1368 | | - strings from a ``declared`` license text. Both can be None. |
| 1371 | + declared_license: str |
| 1372 | + cleaned_license: str |
| 1373 | + mapped_license: str |
| 1374 | + license_detections: list |
| 1375 | + license_expression: str |
| 1376 | + |
| 1377 | + def to_dict(self): |
| 1378 | + return dict( |
| 1379 | + declared_license=self.declared_license , |
| 1380 | + cleaned_license=self.cleaned_license , |
| 1381 | + mapped_license=self.mapped_license , |
| 1382 | + license_detections=self.license_detections , |
| 1383 | + license_expression=self.license_expression , |
| 1384 | + ) |
| 1385 | + |
| 1386 | + |
| 1387 | +def get_alpine_license_detection(declared): |
| 1388 | + """ |
| 1389 | + Return an ApkLicenseDetection from a ``declared`` license text |
1369 | 1390 | """ |
1370 | 1391 | # cleaning first to fix syntax quirks and try to get something we can parse |
1371 | | - cleaned = normalize_and_cleanup_declared_license(declared) |
1372 | | - if not cleaned: |
1373 | | - return None, None |
| 1392 | + cleaned_license = normalize_and_cleanup_declared_license(declared) |
| 1393 | + if not cleaned_license: |
| 1394 | + return None |
1374 | 1395 |
|
1375 | | - # then we apply mappings for known non-standard symbols |
| 1396 | + # then we apply mappings for known non-standard symbols. |
1376 | 1397 | # the output should be a proper SPDX expression |
1377 | | - mapped = apply_expressions_mapping(cleaned) |
| 1398 | + mapped_license = apply_expressions_mapping(cleaned_license) |
1378 | 1399 |
|
1379 | 1400 | # Finally perform SPDX expressions detection: Alpine uses mostly SPDX, but |
1380 | 1401 | # with some quirks such as some non standard symbols (in addition to the |
1381 | 1402 | # non-standard syntax) |
1382 | 1403 | extra_licenses = {} |
1383 | 1404 | expression_symbols = get_license_symbols(extra_licenses=extra_licenses) |
1384 | 1405 |
|
1385 | | - license_detections, detected_license_expression = get_license_detections_and_expression( |
1386 | | - extracted_license_statement=mapped, |
| 1406 | + license_detections, license_expression = get_license_detections_and_expression( |
| 1407 | + extracted_license_statement=mapped_license, |
1387 | 1408 | expression_symbols=expression_symbols, |
1388 | 1409 | ) |
1389 | 1410 |
|
1390 | | - return cleaned, detected_license_expression, license_detections |
| 1411 | + return ApkLicenseDetection( |
| 1412 | + declared_license=declared, |
| 1413 | + cleaned_license=cleaned_license, |
| 1414 | + mapped_license=mapped_license, |
| 1415 | + license_expression=license_expression, |
| 1416 | + license_detections=license_detections, |
| 1417 | + ) |
| 1418 | + |
| 1419 | + |
| 1420 | +def detect_declared_license(declared): |
| 1421 | + """ |
| 1422 | + Return a three-tuple of detected license data from a ``declared`` license text, with this shape: |
| 1423 | + (cleaned declared license, detected license expression, license_detections) |
| 1424 | + - cleaned declared license and detected license expression are strings. |
| 1425 | + - license_detections is a list of LicenseDetection. |
| 1426 | + - Any of these can be None. |
| 1427 | + """ |
| 1428 | + if alpine_detection := get_alpine_license_detection(declared): |
| 1429 | + return ( |
| 1430 | + alpine_detection.cleaned_license, |
| 1431 | + alpine_detection.license_expression, |
| 1432 | + alpine_detection.license_detections, |
| 1433 | + ) |
| 1434 | + else: |
| 1435 | + return None, None, None |
1391 | 1436 |
|
1392 | 1437 |
|
1393 | 1438 | def get_license_symbols(extra_licenses): |
@@ -1416,25 +1461,19 @@ def get_license_symbols(extra_licenses): |
1416 | 1461 | def normalize_and_cleanup_declared_license(declared): |
1417 | 1462 | """ |
1418 | 1463 | Return a cleaned and normalized declared license. |
1419 | | -
|
1420 | | - The expression should be valida SPDX but are far from this in practice. |
1421 | | -
|
| 1464 | + The expressions should be valid SPDX license expressions but they are far from this in practice. |
1422 | 1465 | Several fixes are applied: |
1423 | | -
|
1424 | 1466 | - plain text replacemnet aka. syntax fixes are plain text replacements |
1425 | 1467 | to make the expression parsable |
1426 | | -
|
1427 | 1468 | - common fixes includes also nadling space-separated and comma-separated |
1428 | 1469 | lists of licenses |
1429 | 1470 | """ |
1430 | 1471 | declared = declared or '' |
1431 | 1472 |
|
1432 | | - # normalize spaces |
| 1473 | + # normalize spaces and case |
1433 | 1474 | declared = ' '.join(declared.split()) |
1434 | | - |
1435 | 1475 | declared = declared.lower() |
1436 | 1476 |
|
1437 | | - # performa replacements |
1438 | 1477 | declared = apply_syntax_fixes(declared) |
1439 | 1478 |
|
1440 | 1479 | # comma-separated as in gpl-2.0+, lgpl-2.1+, zlib |
@@ -1516,15 +1555,15 @@ def normalize_and_cleanup_declared_license(declared): |
1516 | 1555 |
|
1517 | 1556 | def apply_syntax_fixes(s): |
1518 | 1557 | """ |
1519 | | - Fix the expression string s by aplying replacement for various quirks. |
| 1558 | + Fix the expression string ``s`` by aplying replacement for various quirks to get clean license |
| 1559 | + expression syntax. |
1520 | 1560 | """ |
1521 | 1561 | for src, tgt in EXPRESSION_SYNTAX_FIXES.items(): |
1522 | 1562 | s = s.replace(src, tgt) |
1523 | 1563 | return s |
1524 | 1564 |
|
1525 | | -# These are parsed expression objects replacement that make the expression SPDX compliant |
1526 | | - |
1527 | 1565 |
|
| 1566 | +# These are parsed expression objects replacement that make the expression SPDX compliant |
1528 | 1567 | # {alpine sub-expression: SPDX subexpression} |
1529 | 1568 | DECLARED_TO_SPDX = { |
1530 | 1569 | 'openssl-exception': 'licenseref-scancode-generic-exception', |
|
0 commit comments