2121from debian_inspector .version import Version as DebVersion
2222from packagedcode .models import PackageData
2323from packagedcode .debian import DebianDscFileHandler
24+ from packagedcode .debian_copyright import StandaloneDebianCopyrightFileHandler
2425from packageurl import PackageURL
2526
2627from minecode import ls
3031from minecode .visitors import HttpVisitor
3132from minecode .visitors import NonPersistentHttpVisitor
3233from minecode .visitors import URI
33- from minecode .utils import get_temp_dir
3434from minecode .utils import get_temp_file
3535from minecode .utils import get_package_sha1
3636from packagedb .models import make_relationship
5252
5353DEBIAN_BASE_URL = "https://deb.debian.org/debian/pool/main/"
5454DEBIAN_METADATA_URL = "https://metadata.ftp-master.debian.org/changelogs/main/"
55+
56+ UBUNTU_BASE_URL = "http://archive.ubuntu.com/ubuntu/pool/main/"
57+ UBUNTU_METADATA_URL = "http://changelogs.ubuntu.com/changelogs/pool/main/"
58+
5559# Other URLs and sources to consider
5660# 'http://ftp.debian.org/debian/'
5761# rsync://archive.debian.org/debian-archive
@@ -383,7 +387,14 @@ def map_debian_package(debian_package, package_content):
383387 package , error_metadata = get_debian_package_metadata (debian_package )
384388 if error_metadata :
385389 error += error_metadata
390+
391+ package_copyright , error_copyright = get_debian_package_copyright (debian_package )
386392 package .update_purl_fields (package_data = purl_package , replace = True )
393+ update_license_copyright_fields (
394+ package_from = package_copyright ,
395+ package_to = package ,
396+ replace = True ,
397+ )
387398
388399 # This will be used to download and scan the package
389400 package .download_url = download_url
@@ -431,16 +442,59 @@ def get_debian_package_metadata(debian_package):
431442
432443 packages = DebianDscFileHandler .parse (location = temp_metadata_file )
433444 package = list (packages ).pop ()
434- # In the case of looking up a maven package with qualifiers of
435- # `classifiers=sources`, the purl of the package created from the pom does
436- # not have the qualifiers, so we need to set them. Additionally, the download
437- # url is not properly generated since it would be missing the sources bit
438- # from the filename.
445+
439446 package .qualifiers = debian_package .package_url .qualifiers
440447
441448 return package , error
442449
443450
451+ def get_debian_package_copyright (debian_package ):
452+ """
453+ """
454+ error = ''
455+
456+ metadata_url = debian_package .package_copyright_url
457+ response = requests .get (metadata_url )
458+ if not response .ok :
459+ msg = f'Package metadata does not exist on debian: { metadata_url } '
460+ error += msg + '\n '
461+ logger .error (msg )
462+ return None , error
463+
464+ metadata_content = response .text
465+ filename = metadata_url .split ("/" )[- 1 ]
466+ file_name , _ , extension = filename .rpartition ("." )
467+ temp_metadata_file = get_temp_file (file_name = file_name , extension = extension )
468+ with open (temp_metadata_file , 'a' ) as metadata_file :
469+ metadata_file .write (metadata_content )
470+
471+ packages = StandaloneDebianCopyrightFileHandler .parse (location = temp_metadata_file )
472+ package = list (packages ).pop ()
473+
474+ package .qualifiers = debian_package .package_url .qualifiers
475+
476+ return package , error
477+
478+
479+ def update_license_copyright_fields (package_from , package_to , replace = True ):
480+ fields_to_update = [
481+ 'copyright' ,
482+ 'holder' ,
483+ 'declared_license_expression' ,
484+ 'declared_license_expression_spdx' ,
485+ 'license_detections' ,
486+ 'other_license_expression' ,
487+ 'other_license_expression_spdx' ,
488+ 'other_license_detections' ,
489+ 'extracted_license_statement'
490+ ]
491+
492+ for field in fields_to_update :
493+ value = getattr (package_from , field )
494+ if value and replace :
495+ setattr (package_to , field , value )
496+
497+
444498def map_debian_metadata_binary_and_source (package_url , source_package_url ):
445499 """
446500 Get metadata for the binary and source release of the Debain package
@@ -452,11 +506,15 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url):
452506
453507 if "repository_url" in package_url .qualifiers :
454508 base_url = package_url .qualifiers ["repository_url" ]
509+ elif package_url .namespace == 'ubuntu' :
510+ base_url = UBUNTU_BASE_URL
455511 else :
456512 base_url = DEBIAN_BASE_URL
457513
458514 if "api_data_url" in package_url .qualifiers :
459515 metadata_base_url = package_url .qualifiers ["api_data_url" ]
516+ elif package_url .namespace == 'ubuntu' :
517+ metadata_base_url = UBUNTU_METADATA_URL
460518 else :
461519 metadata_base_url = DEBIAN_METADATA_URL
462520
@@ -503,7 +561,7 @@ class DebianPackage:
503561 archive_directory_url = attr .ib (type = str , default = None )
504562
505563 def __attrs_post_init__ (self , * args , ** kwargs ):
506- self .set_debian_archive_directory ()
564+ self .set_debian_directories ()
507565
508566 @property
509567 def package_archive_version (self ):
@@ -576,10 +634,31 @@ def package_metadata_url(self):
576634
577635 return metadata_dsc_package_url
578636
579- def set_debian_archive_directory (self ):
637+ @property
638+ def package_copyright_url (self ):
639+
640+ metadata_version = self .package_archive_version
641+ if not self .source_package_url :
642+ metadata_package_name = self .package_url .name
643+ else :
644+ metadata_package_name = self .source_package_url .name
645+ if self .source_package_url .version :
646+ metadata_version = self .source_package_url .version
647+
648+ copyright_package_url = self .metadata_directory_url + f"{ metadata_package_name } _{ metadata_version } _copyright"
649+ response = requests .get (copyright_package_url )
650+ if not response .ok :
651+ base_version_metadata = metadata_version .split ('+' )[0 ]
652+ copyright_package_url = self .metadata_directory_url + f"{ metadata_package_name } _{ base_version_metadata } _copyright"
653+
654+ return copyright_package_url
655+
656+ def set_debian_directories (self ):
580657 """
581658 """
582- base_url = self .archive_base_url
659+ archive_base_url = self .archive_base_url
660+ metadata_base_url = self .metadata_base_url
661+
583662 index_folder = None
584663 if self .package_url .name .startswith ('lib' ):
585664 name_wout_lib = self .package_url .name .replace ("lib" , "" )
@@ -589,7 +668,9 @@ def set_debian_archive_directory(self):
589668
590669 msg = "No directory exists for package at: "
591670
592- package_directory = f"{ base_url } { index_folder } /{ self .package_url .name } /"
671+ package_directory = f"{ archive_base_url } { index_folder } /{ self .package_url .name } /"
672+ metadata_directory = f"{ metadata_base_url } { index_folder } /{ self .package_url .name } /"
673+
593674 response = requests .get (package_directory )
594675 if not response .ok :
595676 if not self .source_package_url :
@@ -599,12 +680,15 @@ def set_debian_archive_directory(self):
599680 index_folder = 'lib' + name_wout_lib [0 ]
600681 else :
601682 index_folder = self .source_package_url .name [0 ]
602- package_directory = f"{ base_url } { index_folder } /{ self .source_package_url .name } /"
683+ package_directory = f"{ archive_base_url } { index_folder } /{ self .source_package_url .name } /"
684+ metadata_directory = f"{ metadata_base_url } { index_folder } /{ self .source_package_url .name } /"
685+
603686 response = requests .get (package_directory )
604687 if not response .ok :
605688 raise PackageDirectoryMissingException (msg + str (package_directory ))
606689
607690 self .archive_directory_url = package_directory
691+ self .metadata_directory_url = metadata_directory
608692
609693
610694class PackageDirectoryMissingException (Exception ):
0 commit comments