|
30 | 30 | import posixpath |
31 | 31 | import traceback |
32 | 32 | from itertools import zip_longest |
| 33 | +import urllib |
33 | 34 | from urllib.parse import urljoin |
34 | 35 | from urllib.parse import urlparse |
35 | 36 | from urllib.request import urlopen |
@@ -1523,48 +1524,69 @@ def save_as_csv(location, about_dicts, field_names): |
1523 | 1524 | def pre_process_and_fetch_license_dict(abouts, api_url, api_key): |
1524 | 1525 | """ |
1525 | 1526 | Modify a list of About data dictionaries by adding license information |
1526 | | - fetched from the DejaCode API. |
| 1527 | + fetched from the ScanCode LicenseDB or DejaCode API. |
1527 | 1528 | """ |
1528 | | - dje_uri = urlparse(api_url) |
1529 | | - domain = '{uri.scheme}://{uri.netloc}/'.format(uri=dje_uri) |
1530 | | - dje_lic_urn = urljoin(domain, 'urn/?urn=urn:dje:license:') |
1531 | 1529 | key_text_dict = {} |
1532 | 1530 | captured_license = [] |
1533 | 1531 | errors = [] |
| 1532 | + if api_url: |
| 1533 | + dje_uri = urlparse(api_url) |
| 1534 | + domain = '{uri.scheme}://{uri.netloc}/'.format(uri=dje_uri) |
| 1535 | + lic_urn = urljoin(domain, 'urn/?urn=urn:dje:license:') |
| 1536 | + url = api_url |
| 1537 | + else: |
| 1538 | + url = 'https://scancode-licensedb.aboutcode.org/' |
1534 | 1539 | if util.have_network_connection(): |
1535 | | - if not valid_api_url(api_url): |
1536 | | - msg = u"URL not reachable. Invalid '--api_url'. License generation is skipped." |
| 1540 | + if not valid_api_url(url): |
| 1541 | + msg = u"URL not reachable. Invalid 'URL'. License generation is skipped." |
1537 | 1542 | errors.append(Error(ERROR, msg)) |
1538 | 1543 | else: |
1539 | 1544 | msg = u'Network problem. Please check your Internet connection. License generation is skipped.' |
1540 | 1545 | errors.append(Error(ERROR, msg)) |
| 1546 | + |
| 1547 | + if errors: |
| 1548 | + return key_text_dict, errors |
| 1549 | + |
1541 | 1550 | for about in abouts: |
1542 | | - # No need to go through all the about objects for license extraction if we detected |
1543 | | - # invalid '--api_key' |
| 1551 | + # No need to go through all the about objects if '--api_key' is invalid |
1544 | 1552 | auth_error = Error(ERROR, u"Authorization denied. Invalid '--api_key'. License generation is skipped.") |
1545 | 1553 | if auth_error in errors: |
1546 | 1554 | break |
1547 | 1555 | if about.license_expression.present: |
1548 | 1556 | special_char_in_expression, lic_list = parse_license_expression(about.license_expression.value) |
1549 | 1557 | if special_char_in_expression: |
1550 | | - msg = (u"The following character(s) cannot be in the license_expression: " + |
| 1558 | + msg = (about.about_file_path + u": The following character(s) cannot be in the license_expression: " + |
1551 | 1559 | str(special_char_in_expression)) |
1552 | 1560 | errors.append(Error(ERROR, msg)) |
1553 | 1561 | else: |
1554 | 1562 | for lic_key in lic_list: |
1555 | 1563 | if not lic_key in captured_license: |
| 1564 | + lic_url = '' |
| 1565 | + license_text = '' |
1556 | 1566 | detail_list = [] |
1557 | | - license_name, license_key, license_text, errs = api.get_license_details_from_api(api_url, api_key, lic_key) |
1558 | | - for e in errs: |
1559 | | - if e not in errors: |
1560 | | - errors.append(e) |
1561 | | - if license_key: |
1562 | | - captured_license.append(lic_key) |
1563 | | - dje_lic_url = dje_lic_urn + license_key |
1564 | | - detail_list.append(license_name) |
1565 | | - detail_list.append(license_text) |
1566 | | - detail_list.append(dje_lic_url) |
1567 | | - key_text_dict[license_key] = detail_list |
| 1567 | + if api_key: |
| 1568 | + license_name, _license_key, license_text, errs = api.get_license_details_from_api(url, api_key, lic_key) |
| 1569 | + for severity, message in errs: |
| 1570 | + msg = (about.about_file_path + ": " + message) |
| 1571 | + errors.append(Error(severity, msg)) |
| 1572 | + lic_url = lic_urn + lic_key |
| 1573 | + else: |
| 1574 | + license_url = url + lic_key + '.json' |
| 1575 | + license_text_url = url + lic_key + '.LICENSE' |
| 1576 | + try: |
| 1577 | + json_url = urlopen(license_url) |
| 1578 | + data = json.loads(json_url.read()) |
| 1579 | + license_name = data['name'] |
| 1580 | + license_text = urllib.request.urlopen(license_text_url).read().decode('utf-8') |
| 1581 | + lic_url = url + data['key'] + '.LICENSE' |
| 1582 | + except: |
| 1583 | + msg = about.about_file_path + u" : Invalid 'license': " + lic_key |
| 1584 | + errors.append(Error(ERROR, msg)) |
| 1585 | + captured_license.append(lic_key) |
| 1586 | + detail_list.append(license_name) |
| 1587 | + detail_list.append(license_text) |
| 1588 | + detail_list.append(lic_url) |
| 1589 | + key_text_dict[lic_key] = detail_list |
1568 | 1590 | return key_text_dict, errors |
1569 | 1591 |
|
1570 | 1592 |
|
@@ -1595,6 +1617,7 @@ def valid_api_url(api_url): |
1595 | 1617 | # This will always goes to exception as no key are provided. |
1596 | 1618 | # The purpose of this code is to validate the provided api_url is correct |
1597 | 1619 | urlopen(request) |
| 1620 | + return True |
1598 | 1621 | except HTTPError as http_e: |
1599 | 1622 | # The 403 error code is refer to "Authentication credentials were not provided.". |
1600 | 1623 | # This is correct as no key are provided. |
|
0 commit comments