|
7 | 7 | # See https://aboutcode.org for more information about nexB OSS projects. |
8 | 8 | # |
9 | 9 |
|
10 | | -import saneyaml |
| 10 | +from itertools import chain |
11 | 11 | import os |
12 | 12 | import gzip |
13 | 13 | import io |
14 | 14 | import logging |
15 | 15 | import arrow |
16 | | -from aboutcode import hashid |
| 16 | +import javaproperties |
17 | 17 | from packageurl import PackageURL |
18 | | -from urllib.parse import urlparse |
| 18 | + |
19 | 19 | from dateutil import tz |
20 | 20 | from minecode_pipeline.pipes import java_stream |
21 | 21 | from collections import namedtuple |
|
44 | 44 |
|
45 | 45 | MAVEN_BASE_URL = "https://repo1.maven.org/maven2" |
46 | 46 | MAVEN_INDEX_URL = "https://repo1.maven.org/maven2/.index/nexus-maven-repository-index.gz" |
| 47 | +MAVEN_INDEX_INCREMENT_BASE_URL = f"https://repo1.maven.org/maven2/.index/nexus-maven-repository-index.{index}.gz" |
| 48 | +MAVEN_INDEX_PROPERTIES_URL = "https://repo1.maven.org/maven2/.index/nexus-maven-repository-index.properties" |
47 | 49 |
|
48 | 50 |
|
49 | 51 | def is_worthy_artifact(artifact): |
@@ -575,24 +577,36 @@ class MavenNexusCollector: |
575 | 577 | WARNING: Processing is rather long: a full index is ~600MB. |
576 | 578 | """ |
577 | 579 |
|
578 | | - def fetch_index(self, uri=MAVEN_INDEX_URL, timeout=10): |
| 580 | + def fetch_index(self, uri=MAVEN_INDEX_URL): |
579 | 581 | """ |
580 | | - Return a temporary location where the fetched content was saved. |
581 | | - Does not return the content proper as a regular fetch does. |
582 | | -
|
583 | | - `timeout` is a default timeout. |
| 582 | + Return a temporary location where the maven index was saved. |
584 | 583 | """ |
585 | 584 | index = fetch_http(uri) |
586 | 585 | return index.path |
587 | 586 |
|
588 | | - def get_packages(self, content=None): |
589 | | - """Yield Package objects from maven index""" |
590 | | - if content: |
591 | | - index_location = content |
592 | | - else: |
593 | | - index_location = self.fetch_index() |
| 587 | + def fetch_index_properties(self, uri=MAVEN_INDEX_PROPERTIES_URL): |
| 588 | + """ |
| 589 | + Return a temporary location where the maven index properties file was saved. |
| 590 | + """ |
| 591 | + index_properties = fetch_http(uri) |
| 592 | + return index_properties.path |
594 | 593 |
|
595 | | - artifacts = get_artifacts(index_location, worthyness=is_worthy_artifact) |
| 594 | + def fetch_index_increments(self): |
| 595 | + """ |
| 596 | + Yield maven index increments |
| 597 | + """ |
| 598 | + content = self.fetch_index_properties() |
| 599 | + with open(content) as config_file: |
| 600 | + properties = javaproperties.load(config_file) or {} |
| 601 | + |
| 602 | + for key, increment_index in properties.items(): |
| 603 | + if key.startswith("nexus.index.incremental"): |
| 604 | + index_increment_url = MAVEN_INDEX_INCREMENT_BASE_URL.format(index=increment_index) |
| 605 | + index_increment = fetch_http(index_increment_url) |
| 606 | + yield index_increment.path |
| 607 | + |
| 608 | + def _get_packages(self, content=None): |
| 609 | + artifacts = get_artifacts(content, worthyness=is_worthy_artifact) |
596 | 610 |
|
597 | 611 | for artifact in artifacts: |
598 | 612 | # we cannot do much without these |
@@ -658,6 +672,22 @@ def get_packages(self, content=None): |
658 | 672 | ) |
659 | 673 | yield current_purl, package |
660 | 674 |
|
| 675 | + def _get_packages_from_index_increments(self): |
| 676 | + for index_increment in self.fetch_index_increments(): |
| 677 | + return self._get_packages(content=index_increment) |
| 678 | + |
| 679 | + def get_packages(self, content=None, increments=False): |
| 680 | + """Yield Package objects from maven index""" |
| 681 | + if increments: |
| 682 | + packages = chain(self._get_packages_from_index_increments()) |
| 683 | + else: |
| 684 | + if content: |
| 685 | + index_location = content |
| 686 | + else: |
| 687 | + index_location = self.fetch_index() |
| 688 | + packages = self._get_packages(content=index_location) |
| 689 | + return packages |
| 690 | + |
661 | 691 |
|
662 | 692 | def collect_packages_from_maven(commits_per_push=10, logger=None): |
663 | 693 | # download and iterate through maven nexus index |
|
0 commit comments