77# See https://aboutcode.org for more information about nexB OSS projects.
88#
99
10+ from collections import namedtuple
1011from itertools import chain
1112import os
1213import gzip
1314import io
1415import logging
15- import arrow
16- import javaproperties
17- from packageurl import PackageURL
1816
1917from dateutil import tz
20- from minecode_pipelines .pipes import java_stream
21- from collections import namedtuple
22- from scanpipe .pipes .fetch import fetch_http
23- from scanpipe .pipes import federatedcode
24- from minecode_pipeline .pipes import write_purls_to_repo
2518from jawa .util .utf import decode_modified_utf8
26- from packagedcode .maven import get_urls
19+ import arrow
20+ import javaproperties
21+
2722from packagedcode .maven import build_filename
2823from packagedcode .maven import build_url
24+ from packagedcode .maven import get_urls
2925from packagedcode .models import PackageData
26+ from packageurl import PackageURL
27+ from scanpipe .pipes .fetch import fetch_http
28+ from scanpipe .pipes import federatedcode
29+
30+ from minecode_pipelines import miners
31+ from minecode_pipelines import pipes
32+ from minecode_pipelines .pipes import java_stream
3033
3134
3235logger = logging .getLogger (__name__ )
4447
4548MAVEN_BASE_URL = "https://repo1.maven.org/maven2"
4649MAVEN_INDEX_URL = "https://repo1.maven.org/maven2/.index/nexus-maven-repository-index.gz"
47- MAVEN_INDEX_INCREMENT_BASE_URL = f "https://repo1.maven.org/maven2/.index/nexus-maven-repository-index.{ index } .gz"
50+ MAVEN_INDEX_INCREMENT_BASE_URL = "https://repo1.maven.org/maven2/.index/nexus-maven-repository-index.{index}.gz"
4851MAVEN_INDEX_PROPERTIES_URL = "https://repo1.maven.org/maven2/.index/nexus-maven-repository-index.properties"
52+ MAVEN_SETTINGS_PATH = "minecode_checkpoints/maven.json"
4953
5054
5155def is_worthy_artifact (artifact ):
@@ -577,6 +581,14 @@ class MavenNexusCollector:
577581 WARNING: Processing is rather long: a full index is ~600MB.
578582 """
579583
584+ def __init__ (self , index_properties_location = None ):
585+ if index_properties_location :
586+ content = index_properties_location
587+ else :
588+ content = self .fetch_index_properties ()
589+ with open (content ) as config_file :
590+ self .index_properties = javaproperties .load (config_file ) or {}
591+
580592 def fetch_index (self , uri = MAVEN_INDEX_URL ):
581593 """
582594 Return a temporary location where the maven index was saved.
@@ -595,11 +607,11 @@ def fetch_index_increments(self):
595607 """
596608 Yield maven index increments
597609 """
598- content = self . fetch_index_properties ()
599- with open ( content ) as config_file :
600- properties = javaproperties . load ( config_file ) or {}
601-
602- for key , increment_index in properties . items ():
610+ # in this context, last serial means last incremental
611+ last_incremental = pipes . fetch_last_serial_mined ( settings_path = MAVEN_SETTINGS_PATH )
612+ for key , increment_index in self . index_properties . items ():
613+ if increment_index <= last_incremental :
614+ continue
603615 if key .startswith ("nexus.index.incremental" ):
604616 index_increment_url = MAVEN_INDEX_INCREMENT_BASE_URL .format (index = increment_index )
605617 index_increment = fetch_http (index_increment_url )
@@ -676,9 +688,10 @@ def _get_packages_from_index_increments(self):
676688 for index_increment in self .fetch_index_increments ():
677689 return self ._get_packages (content = index_increment )
678690
679- def get_packages (self , content = None , increments = False ):
691+ def get_packages (self , content = None ):
680692 """Yield Package objects from maven index"""
681- if increments :
693+ last_incremental = pipes .fetch_last_serial_mined (settings_path = MAVEN_SETTINGS_PATH )
694+ if last_incremental :
682695 packages = chain (self ._get_packages_from_index_increments ())
683696 else :
684697 if content :
@@ -698,7 +711,6 @@ def collect_packages_from_maven(commits_per_push=10, logger=None):
698711 if not prev_purl :
699712 prev_purl = current_purl
700713 elif prev_purl != current_purl :
701- # check out repo
702714 repo_url , _ = federatedcode .get_package_repository (
703715 project_purl = prev_purl ,
704716 logger = logger
@@ -710,16 +722,18 @@ def collect_packages_from_maven(commits_per_push=10, logger=None):
710722
711723 push_commit = not bool (i % commits_per_push )
712724 # save purls to yaml
713- write_purls_to_repo (
725+ miners . write_purls_to_repo (
714726 repo = repo ,
715727 package = prev_purl ,
716728 packages = current_packages ,
717729 push_commit = push_commit
718730 )
719731
720- # delete local clone
721732 federatedcode .delete_local_clone (repo )
722733
723734 current_packages = []
724735 prev_purl = current_purl
725736 current_packages .append (package )
737+
738+ last_incremental = maven_nexus_collector .index_properties .get ("nexus.index.last-incremental" )
739+ pipes .update_last_serial_mined (last_serial = last_incremental , settings_path = MAVEN_SETTINGS_PATH )
0 commit comments