2323import json
2424import logging
2525import re
26- import requests
2726import sys
2827import uuid
2928from pathlib import Path
3231from django .core .exceptions import ObjectDoesNotExist
3332
3433import python_inspector .api as python_inspector
34+ import requests
3535import saneyaml
3636from attributecode .model import About
3737from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
4141from scanpipe .models import DiscoveredDependency
4242from scanpipe .models import DiscoveredPackage
4343from scanpipe .pipes import cyclonedx
44+ from scanpipe .pipes import fetch
4445from scanpipe .pipes import flag
46+ from scanpipe .pipes import scancode
4547from scanpipe .pipes import spdx
4648from scanpipe .pipes import update_or_create_dependency
4749from scanpipe .pipes import update_or_create_package
4850
49- from scanpipe .pipes import fetch
50- from scanpipe .pipes import scancode
51-
5251"""
5352Resolve packages from manifest, lockfile, and SBOM.
5453"""
@@ -531,26 +530,42 @@ def extract_headers(input_location, extract_fields):
531530def parse_maven_filename (filename ):
532531 """Parse a Maven's jar filename to extract artifactId and version."""
533532 # Remove the .jar extension
534- base = filename .rsplit ('.' , 1 )[0 ]
533+ base = filename .rsplit ("." , 1 )[0 ]
535534
536535 # Common classifiers pattern
537536 common_classifiers = {
538- 'sources' , 'javadoc' , 'tests' , 'test' , 'test-sources' ,
539- 'src' , 'bin' , 'docs' , 'javadocs' , 'client' , 'server' ,
540- 'linux' , 'windows' , 'macos' , 'linux-x86_64' , 'windows-x86_64'
537+ "sources" ,
538+ "javadoc" ,
539+ "tests" ,
540+ "test" ,
541+ "test-sources" ,
542+ "src" ,
543+ "bin" ,
544+ "docs" ,
545+ "javadocs" ,
546+ "client" ,
547+ "server" ,
548+ "linux" ,
549+ "windows" ,
550+ "macos" ,
551+ "linux-x86_64" ,
552+ "windows-x86_64" ,
541553 }
542554
543555 # Remove known classifier if present
544556 for classifier in common_classifiers :
545557 if base .endswith (f"-{ classifier } " ):
546- base = base [:- (len (classifier ) + 1 )]
558+ base = base [: - (len (classifier ) + 1 )]
547559 break
548560
549561 # Match artifactId and version
550- match = re .match (r'^(.*)-(\d[\w.\-]+)$' , base )
562+ match = re .match (r"^(.*?)-((\d[\w.\-]*))$" , base )
563+
551564 if match :
552565 artifact_id = match .group (1 )
553566 version = match .group (2 )
567+ print ("artifact_id" , artifact_id )
568+ print ("version" , version )
554569 return artifact_id , version
555570 else :
556571 return None , None
@@ -564,15 +579,21 @@ def get_pom_url_list(input_source, packages):
564579 package_ns = package .get ("namespace" , "" )
565580 package_name = package .get ("name" , "" )
566581 package_version = package .get ("version" , "" )
567- pom_url = f"https://repo1.maven.org/maven2/{ package_ns .replace ('.' , '/' )} /{ package_name } /{ package_version } /{ package_name } -{ package_version } .pom" .lower ()
582+ pom_url = (
583+ f"https://repo1.maven.org/maven2/{ package_ns .replace ('.' , '/' )} /"
584+ f"{ package_name } /{ package_version } /"
585+ f"{ package_name } -{ package_version } .pom" .lower ()
586+ )
568587 pom_url_list .append (pom_url )
569588 else :
570589 # Check what's the input source
571590 input_source_url = input_source .get ("download_url" , "" )
572591
573592 if input_source_url and "maven.org/" in input_source_url :
574- base_url = input_source_url .rsplit ('/' , 1 )[0 ]
575- pom_url = base_url + "/" + "-" .join (base_url .rstrip ("/" ).split ("/" )[- 2 :]) + ".pom"
593+ base_url = input_source_url .rsplit ("/" , 1 )[0 ]
594+ pom_url = (
595+ base_url + "/" + "-" .join (base_url .rstrip ("/" ).split ("/" )[- 2 :]) + ".pom"
596+ )
576597 pom_url_list .append (pom_url )
577598 else :
578599 # Construct a pom_url from filename
@@ -596,12 +617,12 @@ def construct_pom_url_from_filename(artifact_id, version):
596617 pom_url_list = []
597618 group_ids = []
598619 try :
599- response = requests .get (url )
620+ response = requests .get (url , timeout = 5 )
600621 response .raise_for_status ()
601622 data = response .json ()
602623 # Extract all 'g' fields from the docs array that represent
603624 # groupIds
604- group_ids = [doc ['g' ] for doc in data [' response' ][ ' docs' ]]
625+ group_ids = [doc ["g" ] for doc in data [" response" ][ " docs" ]]
605626 except requests .RequestException as e :
606627 print (f"Error fetching data: { e } " )
607628 return []
@@ -610,7 +631,10 @@ def construct_pom_url_from_filename(artifact_id, version):
610631 return []
611632
612633 for group_id in group_ids :
613- pom_url = f"https://repo1.maven.org/maven2/{ group_id .replace ('.' , '/' )} /{ artifact_id } /{ version } /{ artifact_id } -{ version } .pom" .lower ()
634+ pom_url = (
635+ f"https://repo1.maven.org/maven2/{ group_id .replace ('.' , '/' )} /"
636+ f"{ artifact_id } /{ version } /{ artifact_id } -{ version } .pom" .lower ()
637+ )
614638 if is_maven_pom_url (pom_url ):
615639 pom_url_list .append (pom_url )
616640 if len (pom_url_list ) > 1 :
@@ -632,12 +656,12 @@ def is_maven_pom_url(url):
632656 if response .status_code != 200 :
633657 return False
634658 # Check content-type
635- content_type = response .headers .get (' content-type' , '' ).lower ()
636- is_xml = ' xml' in content_type or ' text/xml' in content_type
659+ content_type = response .headers .get (" content-type" , "" ).lower ()
660+ is_xml = " xml" in content_type or " text/xml" in content_type
637661
638662 # Check content
639663 content = response .text .strip ()
640- is_pom = content .startswith (' <?xml' ) and ' <project' in content
664+ is_pom = content .startswith (" <?xml" ) and " <project" in content
641665
642666 if is_xml and is_pom :
643667 return True
@@ -665,19 +689,19 @@ def download_and_scan_pom_file(pom_url_list):
665689 },
666690 )
667691
668- with open (scanned_pom_output_path , 'r' ) as scanned_pom_file :
692+ with open (scanned_pom_output_path ) as scanned_pom_file :
669693 scanned_pom_data = json .load (scanned_pom_file )
670694 scanned_packages = scanned_pom_data .get ("packages" , [])
671695 scanned_dependencies = scanned_pom_data .get ("dependencies" , [])
672696 if scanned_packages :
673697 for scanned_package in scanned_packages :
674698 # Replace the 'datafile_path' with the pom_url
675- scanned_package [' datafile_paths' ] = [pom_url ]
699+ scanned_package [" datafile_paths" ] = [pom_url ]
676700 scanned_pom_packages .append (scanned_package )
677701 if scanned_dependencies :
678702 for scanned_dep in scanned_dependencies :
679703 # Replace the 'datafile_path' with empty string
680704 # See https://github.com/aboutcode-org/scancode.io/issues/1763#issuecomment-3525165830
681- scanned_dep [' datafile_path' ] = ""
705+ scanned_dep [" datafile_path" ] = ""
682706 scanned_pom_deps .append (scanned_dep )
683707 return scanned_pom_packages , scanned_pom_deps
0 commit comments