|
9 | 9 |
|
10 | 10 | from collections import namedtuple |
11 | 11 | from itertools import chain |
| 12 | +from shutil import rmtree |
12 | 13 | import os |
13 | 14 | import gzip |
14 | 15 | import io |
@@ -577,58 +578,81 @@ class MavenNexusCollector: |
577 | 578 | WARNING: Processing is rather long: a full index is ~600MB. |
578 | 579 | """ |
579 | 580 |
|
580 | | - def __init__(self, index_location=None, index_properties_location=None): |
581 | | - self._set_index_properties(index_properties_location=index_properties_location) |
| 581 | + def __init__(self, index_location=None, index_properties_location=None, last_incremental=None): |
| 582 | + if index_location and last_incremental: |
| 583 | + raise Exception( |
| 584 | + "index_location and last_incremental cannot both be set at the same time. " |
| 585 | + "MavenNexusCollector() is only able to yield packages from a maven index or " |
| 586 | + "packages starting past a particular index increment." |
| 587 | + ) |
| 588 | + |
| 589 | + if index_properties_location: |
| 590 | + self.index_properties_download = None |
| 591 | + self.index_properties_location = index_properties_location |
| 592 | + else: |
| 593 | + self.index_properties_download = self._fetch_index_properties() |
| 594 | + self.index_properties_location = self.index_properties_download.path |
| 595 | + |
| 596 | + if self.index_properties_location: |
| 597 | + with open(self.index_properties_location) as config_file: |
| 598 | + self.index_properties = javaproperties.load(config_file) or {} |
| 599 | + else: |
| 600 | + self.index_properties = {} |
| 601 | + |
582 | 602 | if index_location: |
| 603 | + self.index_download = None |
583 | 604 | self.index_location = index_location |
584 | 605 | else: |
585 | | - self.index_location = self._fetch_index() |
586 | | - self.index_location_given = bool(index_location) |
587 | | - self.index_increment_locations = [] |
| 606 | + self.index_download = self._fetch_index() |
| 607 | + self.index_location = self.index_download.path |
| 608 | + |
| 609 | + if last_incremental: |
| 610 | + self.index_increment_downloads = self._fetch_index_increments(last_incremental=last_incremental) |
| 611 | + self.index_increment_locations = [download.path for download in self.index_increment_downloads] |
| 612 | + else: |
| 613 | + self.index_increment_downloads = [] |
| 614 | + self.index_increment_locations = [] |
588 | 615 |
|
589 | 616 | def __del__(self): |
590 | | - if self.index_location and not self.index_location_given: |
591 | | - os.remove(self.index_location) |
592 | | - if self.index_increment_locations: |
593 | | - for loc in self.index_increment_locations: |
594 | | - os.remove(loc) |
| 617 | + if self.index_properties_download: |
| 618 | + rmtree(path=self.index_properties_download.directory) |
| 619 | + if self.index_download: |
| 620 | + rmtree(path=self.index_download.directory) |
| 621 | + if self.index_increment_downloads: |
| 622 | + for download in self.index_increment_downloads: |
| 623 | + rmtree(path=download.directory) |
595 | 624 |
|
596 | 625 | def _fetch_index(self, uri=MAVEN_INDEX_URL): |
597 | 626 | """ |
598 | | - Return a temporary location where the maven index was saved. |
| 627 | + Fetch the maven index at `uri` and return a Download with information |
| 628 | + about where the maven index was saved. |
599 | 629 | """ |
600 | 630 | index = fetch_http(uri) |
601 | | - return index.path |
| 631 | + return index |
602 | 632 |
|
603 | 633 | def _fetch_index_properties(self, uri=MAVEN_INDEX_PROPERTIES_URL): |
604 | 634 | """ |
605 | | - Return a temporary location where the maven index properties file was saved. |
| 635 | + Fetch the maven index properties file at `uri` and return a Download |
| 636 | + with information about where the maven index properties file was saved. |
606 | 637 | """ |
607 | 638 | index_properties = fetch_http(uri) |
608 | | - return index_properties.path |
609 | | - |
610 | | - def _set_index_properties(self, index_properties_location=None): |
611 | | - if index_properties_location: |
612 | | - content = index_properties_location |
613 | | - else: |
614 | | - content = self._fetch_index_properties() |
615 | | - with open(content) as config_file: |
616 | | - self.index_properties = javaproperties.load(config_file) or {} |
617 | | - if not index_properties_location: |
618 | | - os.remove(content) |
| 639 | + return index_properties |
619 | 640 |
|
620 | 641 | def _fetch_index_increments(self, last_incremental): |
621 | 642 | """ |
622 | | - Yield maven index increments |
| 643 | + Fetch maven index increments, starting past `last_incremental`, and |
| 644 | + return a list of Downloads with information about where the maven index |
| 645 | + increments were saved. |
623 | 646 | """ |
| 647 | + index_increment_downloads = [] |
624 | 648 | for key, increment_index in self.index_properties.items(): |
625 | 649 | if increment_index <= last_incremental: |
626 | 650 | continue |
627 | 651 | if key.startswith("nexus.index.incremental"): |
628 | 652 | index_increment_url = MAVEN_INDEX_INCREMENT_BASE_URL.format(index=increment_index) |
629 | 653 | index_increment = fetch_http(index_increment_url) |
630 | | - self.index_increment_locations.append(index_increment.path) |
631 | | - yield index_increment.path |
| 654 | + index_increment_downloads.append(index_increment) |
| 655 | + return index_increment_downloads |
632 | 656 |
|
633 | 657 | def _get_packages(self, content=None): |
634 | 658 | artifacts = get_artifacts(content, worthyness=is_worthy_artifact) |
@@ -697,15 +721,15 @@ def _get_packages(self, content=None): |
697 | 721 | ) |
698 | 722 | yield current_purl, package |
699 | 723 |
|
700 | | - def _get_packages_from_index_increments(self, last_incremental): |
701 | | - for index_increment in self._fetch_index_increments(last_incremental=last_incremental): |
702 | | - return self._get_packages(content=index_increment) |
| 724 | + def _get_packages_from_index_increments(self): |
| 725 | + for index_increment in self.index_increment_locations: |
| 726 | + yield self._get_packages(content=index_increment) |
703 | 727 |
|
704 | | - def get_packages(self, last_incremental=None): |
705 | | - """Yield Package objects from maven index""" |
706 | | - if last_incremental: |
| 728 | + def get_packages(self): |
| 729 | + """Yield Package objects from maven index or index increments""" |
| 730 | + if self.index_increment_locations: |
707 | 731 | packages = chain( |
708 | | - self._get_packages_from_index_increments(last_incremental=last_incremental) |
| 732 | + self._get_packages_from_index_increments() |
709 | 733 | ) |
710 | 734 | else: |
711 | 735 | packages = self._get_packages(content=self.index_location) |
|
0 commit comments