Skip to content

Commit 33dfe79

Browse files
committed
fix errors
Signed-off-by: Varsha U N <[email protected]>
1 parent d81ebd0 commit 33dfe79

File tree

3 files changed

+42
-35
lines changed

3 files changed

+42
-35
lines changed

scanpipe/forms.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,9 @@ def __init__(self, *args, **kwargs):
180180
self.fields["pipeline"].choices = pipeline_choices
181181

182182
self.fields["use_local_storage"].label = "Store packages locally"
183-
self.fields["use_local_storage"].help_text = (
184-
"If checked, packages will be stored on the local filesystem."
185-
)
183+
self.fields[
184+
"use_local_storage"
185+
].help_text = "If checked, packages will be stored on the local filesystem."
186186
self.fields["use_local_storage"].widget.attrs.update({"class": "checkbox"})
187187

188188
def clean_name(self):

scanpipe/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4427,7 +4427,7 @@ class PackageArchive(UUIDPKModel):
44274427
)
44284428

44294429
class Meta:
4430-
pass
4430+
pass
44314431

44324432
def __str__(self):
44334433
return f"Archive {self.checksum_sha256[:8]} at {
@@ -4502,7 +4502,7 @@ class Meta:
45024502
name="%(app_label)s_%(class)s_unique_project_archive",
45034503
),
45044504
]
4505-
4505+
45064506
def __str__(self):
45074507
return f"{self.filename} for project {self.project.name}"
45084508

scanpipe/pipes/fetch.py

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import tempfile
3131
from collections import namedtuple
3232
from email.message import Message
33+
from io import BytesIO
3334
from pathlib import Path
3435
from urllib.parse import unquote
3536
from urllib.parse import urlparse
@@ -39,16 +40,15 @@
3940

4041
import git
4142
import requests
42-
import scanpipe
4343
from commoncode import command
4444
from commoncode.hash import multi_checksums
4545
from commoncode.text import python_safe_name
4646
from plugincode.location_provider import get_location
4747
from requests import auth as request_auth
4848

49+
import scanpipe
4950
from scanpipe.models import DownloadedPackage
5051
from scanpipe.models import PackageArchive
51-
from io import BytesIO
5252

5353
logger = logging.getLogger("scanpipe.pipes")
5454

@@ -374,15 +374,17 @@ def store_package_archive(project, url=None, file_path=None, pipeline_name=None)
374374
project: The ScanCode.io Project instance.
375375
url (str, optional): The URL from which the package was downloaded.
376376
file_path (str or Path, optional): Path to the package file.
377+
pipeline_name: The name of the pipeline storing the package.
377378
378379
Returns:
379380
DownloadedPackage: The created DownloadedPackage instance, or
380381
None if storage is disabled or an error occurs.
381382
382383
"""
383384
logger.info(
384-
f"store_package_archive called with project: {project}, url: {url},"
385-
"file_path: {file_path}"
385+
f"store_package_archive called with project: {project}, "
386+
f"url: {url}, "
387+
f"file_path: {file_path}"
386388
)
387389

388390
if not getattr(settings, "ENABLE_PACKAGE_STORAGE", False):
@@ -392,42 +394,50 @@ def store_package_archive(project, url=None, file_path=None, pipeline_name=None)
392394
if not file_path and not url:
393395
logger.error("Either file_path or url must be provided")
394396
return None
395-
396-
if url:
397-
existing = DownloadedPackage.objects.filter(project=project, url=url).first()
398-
if existing and not should_rescan(existing, pipeline_name):
399-
logger.info(f"Using existing package: {existing.package_archive.package_file.name}")
400-
return existing
401397

398+
content, filename = get_package_content_and_filename(file_path, url)
399+
if not content:
400+
return None
401+
402+
archive = get_or_create_archive(content, file_path, filename)
403+
if not archive:
404+
return None
405+
406+
dp = get_or_create_downloaded_package(
407+
project, url, filename, archive, pipeline_name
408+
)
409+
return dp
410+
411+
412+
def get_package_content_and_filename(file_path, url):
402413
if file_path:
403-
file_path = str(file_path)
404-
if not Path(file_path).exists():
414+
file_path = str(file_path)
415+
if not Path(file_path).exists():
405416
logger.error(f"File not found: {file_path}")
406-
return None
407-
with open(file_path, "rb") as f:
417+
return None, None
418+
with open(file_path, "rb") as f:
408419
content = f.read()
409-
filename = os.path.basename(file_path)
420+
filename = os.path.basename(file_path)
410421
else:
411422
try:
412-
response = requests.get(url, stream=True)
423+
response = requests.get(url, stream=True, timeout=HTTP_REQUEST_TIMEOUT)
413424
response.raise_for_status()
414425
content = response.content
415426
filename = os.path.basename(url.split("?")[0])
416427
except requests.RequestException as e:
417428
logger.error(f"Failed to download {url}: {e}")
418-
return None
419-
429+
return None, None
430+
return content, filename
431+
432+
433+
def get_or_create_archive(content, file_path, filename):
420434
checksum = hashlib.sha256(content).hexdigest()
421435
logger.info(f"Calculated SHA256: {checksum}")
422436

423437
existing_archive = PackageArchive.objects.filter(checksum_sha256=checksum).first()
424438
if existing_archive:
425-
existing = DownloadedPackage.objects.filter(
426-
project=project, package_archive=existing_archive
427-
).first()
428-
if existing and not should_rescan(existing, pipeline_name):
429-
logger.info(f"Using existing package: {existing_archive.package_file.name}")
430-
return existing
439+
logger.info(f"Using existing package: {existing_archive.package_file.name}")
440+
return existing_archive
431441

432442
try:
433443
archive = PackageArchive(
@@ -438,10 +448,13 @@ def store_package_archive(project, url=None, file_path=None, pipeline_name=None)
438448
archive.package_file.save(filename, File(f), save=False)
439449
archive.save()
440450
logger.info(f"Created PackageArchive: {archive.checksum_sha256}")
451+
return archive
441452
except Exception as e:
442453
logger.error(f"Error creating PackageArchive: {e}")
443454
return None
444455

456+
457+
def get_or_create_downloaded_package(project, url, filename, archive, pipeline_name):
445458
try:
446459
dp = DownloadedPackage.objects.create(
447460
project=project,
@@ -457,12 +470,6 @@ def store_package_archive(project, url=None, file_path=None, pipeline_name=None)
457470
logger.error(f"Error creating DownloadedPackage: {e}")
458471
return None
459472

460-
def should_rescan(package, pipeline_name):
461-
"""Check if rescanning is needed based on ScanCode version or pipeline."""
462-
current_version = scanpipe.__version__
463-
return package.scancode_version != current_version or (
464-
pipeline_name and package.pipeline_name != pipeline_name
465-
)
466473

467474
SCHEME_TO_FETCHER_MAPPING = {
468475
"http": fetch_http,

0 commit comments

Comments
 (0)