Skip to content

Commit 0d1bfce

Browse files
committed
Yield single package for versioned PURL
Signed-off-by: Keshav Priyadarshi <[email protected]>
1 parent d3d759e commit 0d1bfce

File tree

1 file changed

+136
-52
lines changed

1 file changed

+136
-52
lines changed

src/fetchcode/package.py

Lines changed: 136 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -370,22 +370,22 @@ class DirectoryListedSource:
370370
},
371371
)
372372

373-
# TODO: use ignored_files_and_dir to ignore non interesting archives.
374373
@classmethod
375-
def get_package_info(cls, purl):
376-
package_url = PackageURL.from_string(purl)
374+
def get_package_info(cls, package_url):
377375
if cls.is_nested:
378-
_, listing = htmllistparse.fetch_listing(cls.source_url)
379-
for dir in listing:
380-
if not dir.name.endswith("/"):
381-
continue
382-
url = urljoin(cls.source_url, dir.name)
383-
yield from extract_packages_from_listing(
384-
package_url, url, cls.source_archive_regex
385-
)
376+
yield from extract_package_from_nested_listing(
377+
package_url,
378+
cls.source_url,
379+
cls.source_archive_regex,
380+
cls.ignored_files_and_dir,
381+
)
382+
386383
else:
387384
yield from extract_packages_from_listing(
388-
package_url, cls.source_url, cls.source_archive_regex
385+
package_url,
386+
cls.source_url,
387+
cls.source_archive_regex,
388+
cls.ignored_files_and_dir,
389389
)
390390

391391

@@ -526,52 +526,82 @@ class DropbearDirectoryListedSource(DirectoryListedSource):
526526
source_url = "https://matt.ucc.asn.au/dropbear/releases/"
527527
source_archive_regex = r"^(dropbear-)[\w.]*(.tar.bz2|_i386.deb)$"
528528
is_nested = False
529-
ignored_files_and_dir = []
530-
531-
532-
HTML_DIR_LIST = {
533-
"pkg:generic/ipkg": IpkgDirectoryListedSource,
534-
"pkg:generic/util-linux": UtilLinuxDirectoryListedSource,
535-
"pkg:generic/busybox": BusyBoxDirectoryListedSource,
536-
"pkg:generic/uclibc": UclibcDirectoryListedSource,
537-
"pkg:generic/uclibc-ng": UclibcNGDirectoryListedSource,
538-
"pkg:generic/bzip2": Bzip2DirectoryListedSource,
539-
"pkg:generic/openssh": OpenSSHDirectoryListedSource,
540-
"pkg:generic/dnsmasq": DnsmasqDirectoryListedSource,
541-
"pkg:generic/ebtables": EbtablesDirectoryListedSource,
542-
"pkg:generic/hostapd": HostapdDirectoryListedSource,
543-
"pkg:generic/iproute2": Iproute2DirectoryListedSource,
544-
"pkg:generic/iptables": IptablesDirectoryListedSource,
545-
"pkg:generic/libnl": LibnlDirectoryListedSource,
546-
"pkg:generic/lighttpd": LighttpdDirectoryListedSource,
547-
"pkg:generic/nftables": NftablesDirectoryListedSource,
548-
"pkg:generic/wpa_supplicant": WpaSupplicantDirectoryListedSource,
549-
"pkg:generic/syslinux": SyslinuxDirectoryListedSource,
550-
# TODO: Need to implement extraction from deep nested directory.
551-
# "pkg:generic/rpm":
552-
# source_url ="https://ftp.osuosl.org/pub/rpm/releases/"
553-
# source_archive_regex = r"^(rpm-)[\w.]*(.tar.bz2)$"
554-
# is_nested = False
555-
"pkg:generic/toybox": ToyboxDirectoryListedSource,
556-
# TODO: Ignore test archives like dropbear-0.44test4.tar.gz
557-
"pkg:generic/dropbear": DropbearDirectoryListedSource,
529+
ignored_files_and_dir = [
530+
"dropbear-0.44test1.tar.bz2",
531+
"dropbear-0.44test1.tar.gz",
532+
"dropbear-0.44test2.tar.bz2",
533+
"dropbear-0.44test2.tar.gz",
534+
"dropbear-0.44test3.tar.bz2",
535+
"dropbear-0.44test3.tar.gz",
536+
"dropbear-0.44test4.tar.bz2",
537+
"dropbear-0.44test4.tar.gz",
538+
]
539+
540+
541+
DIR_SUPPORTED_PURLS = [
542+
"pkg:generic/ipkg.*",
543+
"pkg:generic/util-linux.*",
544+
"pkg:generic/busybox.*",
545+
"pkg:generic/uclibc.*",
546+
"pkg:generic/uclibc-ng.*",
547+
"pkg:generic/bzip2.*",
548+
"pkg:generic/openssh.*",
549+
"pkg:generic/dnsmasq.*",
550+
"pkg:generic/ebtables.*",
551+
"pkg:generic/hostapd.*",
552+
"pkg:generic/iproute2.*",
553+
"pkg:generic/iptables.*",
554+
"pkg:generic/libnl.*",
555+
"pkg:generic/lighttpd.*",
556+
"pkg:generic/nftables.*",
557+
"pkg:generic/wpa_supplicant.*",
558+
"pkg:generic/syslinux.*",
559+
"pkg:generic/toybox.*",
560+
"pkg:generic/dropbear.*",
561+
]
562+
563+
DIR_LISTED_SOURCE_BY_PACKAGE_NAME = {
564+
"ipkg": IpkgDirectoryListedSource,
565+
"util-linux": UtilLinuxDirectoryListedSource,
566+
"busybox": BusyBoxDirectoryListedSource,
567+
"uclibc": UclibcDirectoryListedSource,
568+
"uclibc-ng": UclibcNGDirectoryListedSource,
569+
"bzip2": Bzip2DirectoryListedSource,
570+
"openssh": OpenSSHDirectoryListedSource,
571+
"dnsmasq": DnsmasqDirectoryListedSource,
572+
"ebtables": EbtablesDirectoryListedSource,
573+
"hostapd": HostapdDirectoryListedSource,
574+
"iproute2": Iproute2DirectoryListedSource,
575+
"iptables": IptablesDirectoryListedSource,
576+
"libnl": LibnlDirectoryListedSource,
577+
"lighttpd": LighttpdDirectoryListedSource,
578+
"nftables": NftablesDirectoryListedSource,
579+
"wpa_supplicant": WpaSupplicantDirectoryListedSource,
580+
"syslinux": SyslinuxDirectoryListedSource,
581+
"toybox": ToyboxDirectoryListedSource,
582+
"dropbear": DropbearDirectoryListedSource,
558583
}
559584

560585

561-
@router.route(*HTML_DIR_LIST.keys())
586+
@router.route(*DIR_SUPPORTED_PURLS)
562587
def get_htmllisting_data_from_purl(purl):
563588
"""Generate `Package` object from the `purl` having directory listed source"""
564-
return HTML_DIR_LIST[purl].get_package_info(purl)
589+
package_url = PackageURL.from_string(purl)
590+
return DIR_LISTED_SOURCE_BY_PACKAGE_NAME[package_url.name].get_package_info(
591+
package_url
592+
)
565593

566594

567-
def extract_packages_from_listing(purl, source_archive_url, regex):
595+
def get_packages_from_listing(purl, source_archive_url, regex, ignored_files_and_dir):
568596
"""
569-
Yield package data from a directory listing for given source_archive_url.
597+
Return list of package data from a directory listing based on the specified regex.
570598
"""
571599
pattern = re.compile(regex)
572600
_, listing = htmllistparse.fetch_listing(source_archive_url)
601+
602+
packages = []
573603
for file in listing:
574-
if not pattern.match(file.name):
604+
if not pattern.match(file.name) or file.name in ignored_files_and_dir:
575605
continue
576606

577607
version = hint(file.name)
@@ -584,11 +614,65 @@ def extract_packages_from_listing(purl, source_archive_url, regex):
584614

585615
download_url = urljoin(source_archive_url, file.name)
586616
package_url = PackageURL(
587-
type=purl.type, namespace=purl.namespace, name=purl.name, version=version
617+
type=purl.type,
618+
namespace=purl.namespace,
619+
name=purl.name,
620+
version=version,
588621
)
589-
yield Package(
590-
homepage_url=source_archive_url,
591-
download_url=download_url,
592-
release_date=date.isoformat(),
593-
**package_url.to_dict(),
622+
packages.append(
623+
Package(
624+
homepage_url=source_archive_url,
625+
download_url=download_url,
626+
release_date=date.isoformat(),
627+
**package_url.to_dict(),
628+
)
594629
)
630+
631+
return packages
632+
633+
634+
def extract_packages_from_listing(
635+
purl, source_archive_url, regex, ignored_files_and_dir
636+
):
637+
"""
638+
Yield package data from a directory listing for the given source_archive_url.
639+
"""
640+
for package in get_packages_from_listing(
641+
purl, source_archive_url, regex, ignored_files_and_dir
642+
):
643+
# Don't yield all packages when a specific version is requested.
644+
if purl.version and package.version != purl.version:
645+
continue
646+
647+
yield package
648+
649+
# If a version is specified in purl and we have found a matching package,
650+
# we don't need to continue searching.
651+
if purl.version:
652+
break
653+
654+
655+
def extract_package_from_nested_listing(purl, source_url, regex, ignored_files_and_dir):
656+
"""
657+
Yield package data from a nested directory listing for the given source_url.
658+
"""
659+
_, listing = htmllistparse.fetch_listing(source_url)
660+
for directory in listing:
661+
if not directory.name.endswith("/"):
662+
continue
663+
664+
directory_url = urljoin(source_url, directory.name)
665+
666+
for package in get_packages_from_listing(
667+
purl, directory_url, regex, ignored_files_and_dir
668+
):
669+
# Don't yield all packages when a specific version is requested.
670+
if purl.version and package.version != purl.version:
671+
continue
672+
673+
yield package
674+
675+
# If a version is specified in purl and we have found a matching package,
676+
# we don't need to continue searching.
677+
if purl.version:
678+
return

0 commit comments

Comments
 (0)