3030import tempfile
3131from collections import namedtuple
3232from email .message import Message
33+ from io import BytesIO
3334from pathlib import Path
3435from urllib .parse import unquote
3536from urllib .parse import urlparse
3940
4041import git
4142import requests
42- import scanpipe
4343from commoncode import command
4444from commoncode .hash import multi_checksums
4545from commoncode .text import python_safe_name
4646from plugincode .location_provider import get_location
4747from requests import auth as request_auth
4848
49+ import scanpipe
4950from scanpipe .models import DownloadedPackage
5051from scanpipe .models import PackageArchive
51- from io import BytesIO
5252
5353logger = logging .getLogger ("scanpipe.pipes" )
5454
@@ -374,15 +374,17 @@ def store_package_archive(project, url=None, file_path=None, pipeline_name=None)
374374 project: The ScanCode.io Project instance.
375375 url (str, optional): The URL from which the package was downloaded.
376376 file_path (str or Path, optional): Path to the package file.
377+ pipeline_name: The name of the pipeline storing the package.
377378
378379 Returns:
379380 DownloadedPackage: The created DownloadedPackage instance, or
380381 None if storage is disabled or an error occurs.
381382
382383 """
383384 logger .info (
384- f"store_package_archive called with project: { project } , url: { url } ,"
385- "file_path: {file_path}"
385+ f"store_package_archive called with project: { project } , "
386+ f"url: { url } , "
387+ f"file_path: { file_path } "
386388 )
387389
388390 if not getattr (settings , "ENABLE_PACKAGE_STORAGE" , False ):
@@ -392,42 +394,50 @@ def store_package_archive(project, url=None, file_path=None, pipeline_name=None)
392394 if not file_path and not url :
393395 logger .error ("Either file_path or url must be provided" )
394396 return None
395-
396- if url :
397- existing = DownloadedPackage .objects .filter (project = project , url = url ).first ()
398- if existing and not should_rescan (existing , pipeline_name ):
399- logger .info (f"Using existing package: { existing .package_archive .package_file .name } " )
400- return existing
401397
398+ content , filename = get_package_content_and_filename (file_path , url )
399+ if not content :
400+ return None
401+
402+ archive = get_or_create_archive (content , file_path , filename )
403+ if not archive :
404+ return None
405+
406+ dp = get_or_create_downloaded_package (
407+ project , url , filename , archive , pipeline_name
408+ )
409+ return dp
410+
411+
412+ def get_package_content_and_filename (file_path , url ):
402413 if file_path :
403- file_path = str (file_path )
404- if not Path (file_path ).exists ():
414+ file_path = str (file_path )
415+ if not Path (file_path ).exists ():
405416 logger .error (f"File not found: { file_path } " )
406- return None
407- with open (file_path , "rb" ) as f :
417+ return None , None
418+ with open (file_path , "rb" ) as f :
408419 content = f .read ()
409- filename = os .path .basename (file_path )
420+ filename = os .path .basename (file_path )
410421 else :
411422 try :
412- response = requests .get (url , stream = True )
423+ response = requests .get (url , stream = True , timeout = HTTP_REQUEST_TIMEOUT )
413424 response .raise_for_status ()
414425 content = response .content
415426 filename = os .path .basename (url .split ("?" )[0 ])
416427 except requests .RequestException as e :
417428 logger .error (f"Failed to download { url } : { e } " )
418- return None
419-
429+ return None , None
430+ return content , filename
431+
432+
433+ def get_or_create_archive (content , file_path , filename ):
420434 checksum = hashlib .sha256 (content ).hexdigest ()
421435 logger .info (f"Calculated SHA256: { checksum } " )
422436
423437 existing_archive = PackageArchive .objects .filter (checksum_sha256 = checksum ).first ()
424438 if existing_archive :
425- existing = DownloadedPackage .objects .filter (
426- project = project , package_archive = existing_archive
427- ).first ()
428- if existing and not should_rescan (existing , pipeline_name ):
429- logger .info (f"Using existing package: { existing_archive .package_file .name } " )
430- return existing
439+ logger .info (f"Using existing package: { existing_archive .package_file .name } " )
440+ return existing_archive
431441
432442 try :
433443 archive = PackageArchive (
@@ -438,10 +448,13 @@ def store_package_archive(project, url=None, file_path=None, pipeline_name=None)
438448 archive .package_file .save (filename , File (f ), save = False )
439449 archive .save ()
440450 logger .info (f"Created PackageArchive: { archive .checksum_sha256 } " )
451+ return archive
441452 except Exception as e :
442453 logger .error (f"Error creating PackageArchive: { e } " )
443454 return None
444455
456+
457+ def get_or_create_downloaded_package (project , url , filename , archive , pipeline_name ):
445458 try :
446459 dp = DownloadedPackage .objects .create (
447460 project = project ,
@@ -457,12 +470,6 @@ def store_package_archive(project, url=None, file_path=None, pipeline_name=None)
457470 logger .error (f"Error creating DownloadedPackage: { e } " )
458471 return None
459472
460- def should_rescan (package , pipeline_name ):
461- """Check if rescanning is needed based on ScanCode version or pipeline."""
462- current_version = scanpipe .__version__
463- return package .scancode_version != current_version or (
464- pipeline_name and package .pipeline_name != pipeline_name
465- )
466473
467474SCHEME_TO_FETCHER_MAPPING = {
468475 "http" : fetch_http ,
0 commit comments