Skip to content

Commit d6b39ed

Browse files
Support ubuntu packages and debian copyrights
* Supports purl2meta from ubuntu package URLs * Supports getting package information from debian copyrights Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent e4900f1 commit d6b39ed

File tree

1 file changed

+95
-11
lines changed

1 file changed

+95
-11
lines changed

minecode/visitors/debian.py

Lines changed: 95 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from debian_inspector.version import Version as DebVersion
2222
from packagedcode.models import PackageData
2323
from packagedcode.debian import DebianDscFileHandler
24+
from packagedcode.debian_copyright import StandaloneDebianCopyrightFileHandler
2425
from packageurl import PackageURL
2526

2627
from minecode import ls
@@ -30,7 +31,6 @@
3031
from minecode.visitors import HttpVisitor
3132
from minecode.visitors import NonPersistentHttpVisitor
3233
from minecode.visitors import URI
33-
from minecode.utils import get_temp_dir
3434
from minecode.utils import get_temp_file
3535
from minecode.utils import get_package_sha1
3636
from packagedb.models import make_relationship
@@ -52,6 +52,10 @@
5252

5353
DEBIAN_BASE_URL = "https://deb.debian.org/debian/pool/main/"
5454
DEBIAN_METADATA_URL = "https://metadata.ftp-master.debian.org/changelogs/main/"
55+
56+
UBUNTU_BASE_URL = "http://archive.ubuntu.com/ubuntu/pool/main/"
57+
UBUNTU_METADATA_URL = "http://changelogs.ubuntu.com/changelogs/pool/main/"
58+
5559
# Other URLs and sources to consider
5660
# 'http://ftp.debian.org/debian/'
5761
# rsync://archive.debian.org/debian-archive
@@ -383,7 +387,14 @@ def map_debian_package(debian_package, package_content):
383387
package, error_metadata = get_debian_package_metadata(debian_package)
384388
if error_metadata:
385389
error += error_metadata
390+
391+
package_copyright, error_copyright = get_debian_package_copyright(debian_package)
386392
package.update_purl_fields(package_data=purl_package, replace=True)
393+
update_license_copyright_fields(
394+
package_from=package_copyright,
395+
package_to=package,
396+
replace=True,
397+
)
387398

388399
# This will be used to download and scan the package
389400
package.download_url = download_url
@@ -431,16 +442,59 @@ def get_debian_package_metadata(debian_package):
431442

432443
packages = DebianDscFileHandler.parse(location=temp_metadata_file)
433444
package = list(packages).pop()
434-
# In the case of looking up a maven package with qualifiers of
435-
# `classifiers=sources`, the purl of the package created from the pom does
436-
# not have the qualifiers, so we need to set them. Additionally, the download
437-
# url is not properly generated since it would be missing the sources bit
438-
# from the filename.
445+
439446
package.qualifiers = debian_package.package_url.qualifiers
440447

441448
return package, error
442449

443450

451+
def get_debian_package_copyright(debian_package):
452+
"""
453+
"""
454+
error = ''
455+
456+
metadata_url = debian_package.package_copyright_url
457+
response = requests.get(metadata_url)
458+
if not response.ok:
459+
msg = f'Package metadata does not exist on debian: {metadata_url}'
460+
error += msg + '\n'
461+
logger.error(msg)
462+
return None, error
463+
464+
metadata_content = response.text
465+
filename = metadata_url.split("/")[-1]
466+
file_name, _, extension = filename.rpartition(".")
467+
temp_metadata_file = get_temp_file(file_name=file_name, extension=extension)
468+
with open(temp_metadata_file, 'a') as metadata_file:
469+
metadata_file.write(metadata_content)
470+
471+
packages = StandaloneDebianCopyrightFileHandler.parse(location=temp_metadata_file)
472+
package = list(packages).pop()
473+
474+
package.qualifiers = debian_package.package_url.qualifiers
475+
476+
return package, error
477+
478+
479+
def update_license_copyright_fields(package_from, package_to, replace=True):
480+
fields_to_update = [
481+
'copyright',
482+
'holder',
483+
'declared_license_expression',
484+
'declared_license_expression_spdx',
485+
'license_detections',
486+
'other_license_expression',
487+
'other_license_expression_spdx',
488+
'other_license_detections',
489+
'extracted_license_statement'
490+
]
491+
492+
for field in fields_to_update:
493+
value = getattr(package_from, field)
494+
if value and replace:
495+
setattr(package_to, field, value)
496+
497+
444498
def map_debian_metadata_binary_and_source(package_url, source_package_url):
445499
"""
446500
Get metadata for the binary and source release of the Debain package
@@ -452,11 +506,15 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url):
452506

453507
if "repository_url" in package_url.qualifiers:
454508
base_url = package_url.qualifiers["repository_url"]
509+
elif package_url.namespace == 'ubuntu':
510+
base_url = UBUNTU_BASE_URL
455511
else:
456512
base_url = DEBIAN_BASE_URL
457513

458514
if "api_data_url" in package_url.qualifiers:
459515
metadata_base_url = package_url.qualifiers["api_data_url"]
516+
elif package_url.namespace == 'ubuntu':
517+
metadata_base_url = UBUNTU_METADATA_URL
460518
else:
461519
metadata_base_url = DEBIAN_METADATA_URL
462520

@@ -503,7 +561,7 @@ class DebianPackage:
503561
archive_directory_url = attr.ib(type=str, default=None)
504562

505563
def __attrs_post_init__(self, *args, **kwargs):
506-
self.set_debian_archive_directory()
564+
self.set_debian_directories()
507565

508566
@property
509567
def package_archive_version(self):
@@ -576,10 +634,31 @@ def package_metadata_url(self):
576634

577635
return metadata_dsc_package_url
578636

579-
def set_debian_archive_directory(self):
637+
@property
638+
def package_copyright_url(self):
639+
640+
metadata_version = self.package_archive_version
641+
if not self.source_package_url:
642+
metadata_package_name = self.package_url.name
643+
else:
644+
metadata_package_name = self.source_package_url.name
645+
if self.source_package_url.version:
646+
metadata_version = self.source_package_url.version
647+
648+
copyright_package_url = self.metadata_directory_url + f"{metadata_package_name}_{metadata_version}_copyright"
649+
response = requests.get(copyright_package_url)
650+
if not response.ok:
651+
base_version_metadata = metadata_version.split('+')[0]
652+
copyright_package_url = self.metadata_directory_url + f"{metadata_package_name}_{base_version_metadata}_copyright"
653+
654+
return copyright_package_url
655+
656+
def set_debian_directories(self):
580657
"""
581658
"""
582-
base_url = self.archive_base_url
659+
archive_base_url = self.archive_base_url
660+
metadata_base_url = self.metadata_base_url
661+
583662
index_folder = None
584663
if self.package_url.name.startswith('lib'):
585664
name_wout_lib = self.package_url.name.replace("lib", "")
@@ -589,7 +668,9 @@ def set_debian_archive_directory(self):
589668

590669
msg = "No directory exists for package at: "
591670

592-
package_directory = f"{base_url}{index_folder}/{self.package_url.name}/"
671+
package_directory = f"{archive_base_url}{index_folder}/{self.package_url.name}/"
672+
metadata_directory = f"{metadata_base_url}{index_folder}/{self.package_url.name}/"
673+
593674
response = requests.get(package_directory)
594675
if not response.ok:
595676
if not self.source_package_url:
@@ -599,12 +680,15 @@ def set_debian_archive_directory(self):
599680
index_folder = 'lib' + name_wout_lib[0]
600681
else:
601682
index_folder = self.source_package_url.name[0]
602-
package_directory = f"{base_url}{index_folder}/{self.source_package_url.name}/"
683+
package_directory = f"{archive_base_url}{index_folder}/{self.source_package_url.name}/"
684+
metadata_directory = f"{metadata_base_url}{index_folder}/{self.source_package_url.name}/"
685+
603686
response = requests.get(package_directory)
604687
if not response.ok:
605688
raise PackageDirectoryMissingException(msg + str(package_directory))
606689

607690
self.archive_directory_url = package_directory
691+
self.metadata_directory_url = metadata_directory
608692

609693

610694
class PackageDirectoryMissingException(Exception):

0 commit comments

Comments
 (0)