4444from scanpipe .models import InputSource
4545from scanpipe .pipes import scancode
4646from scanpipe .pipes .output import mappings_key_by_fieldname
47- from scancodeio .settings import ENABLE_DOWNLOAD_ARCHIVING
4847from scancodeio .settings import download_store
4948
5049logger = logging .getLogger (__name__ )
@@ -262,61 +261,47 @@ def add_input_from_url(project, url, filename=None):
262261 logger .error (f"Failed to download { url } : { e } " )
263262 raise
264263
265- should_archive = (
266- ENABLE_DOWNLOAD_ARCHIVING == "always"
267- or (
268- ENABLE_DOWNLOAD_ARCHIVING == "per_project"
269- and getattr (project , "archive_downloads" , False )
270- )
271- or (
272- ENABLE_DOWNLOAD_ARCHIVING == "per_input"
273- and "archive" in getattr (project , "input_tags" , [])
274- )
275- )
264+ filename = filename or url .split ("/" )[- 1 ] or "downloaded_file"
265+ url_hash = hashlib .sha256 (url .encode ()).hexdigest ()
266+ archive_path = Path (project .settings .CENTRAL_ARCHIVE_PATH ) / url_hash / filename
276267
277- filename = filename or url .split ("/" )[- 1 ]
278- if should_archive and download_store :
279- sha256 = hashlib .sha256 (content ).hexdigest ()
280- existing_download = download_store .get (sha256 )
281- if not existing_download :
282- try :
283- download = download_store .put (
284- content = content ,
285- download_url = url ,
286- download_date = datetime .now ().isoformat (),
287- filename = filename ,
288- )
289- except Exception as e :
290- logger .error (f"Failed to archive download for { url } : { e } " )
291- raise
292- else :
293- download = existing_download
294-
295- InputSource .objects .create (
296- project = project ,
297- sha256 = download .sha256 ,
298- download_url = download .download_url ,
299- filename = download .filename ,
300- download_date = download .download_date ,
301- is_uploaded = False ,
302- )
268+ if download_store :
269+ try :
270+ download = download_store .put (
271+ content = content ,
272+ download_url = url ,
273+ download_date = datetime .now ().isoformat (),
274+ filename = filename ,
275+ )
276+ InputSource .objects .create (
277+ project = project ,
278+ sha256 = download .sha256 ,
279+ download_url = download .download_url ,
280+ filename = download .filename ,
281+ download_date = download .download_date ,
282+ file_path = str (download .path ),
283+ is_uploaded = False ,
284+ )
285+ except Exception as e :
286+ logger .error (f"Failed to archive download for { url } : { e } " )
287+ raise
303288 else :
304289 input_path = project .input_path / filename
305290 try :
291+ input_path .parent .mkdir (parents = True , exist_ok = True )
306292 with open (input_path , "wb" ) as f :
307293 f .write (content )
294+ InputSource .objects .create (
295+ project = project ,
296+ filename = filename ,
297+ download_url = url ,
298+ file_path = str (input_path ),
299+ is_uploaded = False ,
300+ )
308301 except Exception as e :
309302 logger .error (f"Failed to save { filename } to { input_path } : { e } " )
310303 raise
311304
312- InputSource .objects .create (
313- project = project ,
314- filename = filename ,
315- download_url = url ,
316- is_uploaded = False ,
317- )
318-
319-
320305def add_input_from_upload (project , uploaded_file ):
321306 """
322307 Add an uploaded file as an InputSource for the specified ``project``.
@@ -325,54 +310,38 @@ def add_input_from_upload(project, uploaded_file):
325310 content = uploaded_file .read ()
326311 filename = uploaded_file .name
327312
328- should_archive = (
329- ENABLE_DOWNLOAD_ARCHIVING == "always"
330- or (
331- ENABLE_DOWNLOAD_ARCHIVING == "per_project"
332- and getattr (project , "archive_downloads" , False )
333- )
334- or (
335- ENABLE_DOWNLOAD_ARCHIVING == "per_input"
336- and "archive" in getattr (project , "input_tags" , [])
337- )
338- )
339-
340- if should_archive and download_store :
341- sha256 = hashlib .sha256 (content ).hexdigest ()
342- existing_download = download_store .get (sha256 )
343- if not existing_download :
344- try :
345- download = download_store .put (
346- content = content ,
347- download_url = "" , # No URL for uploads
348- download_date = datetime .now ().isoformat (),
349- filename = filename ,
350- )
351- except Exception as e :
352- logger .error (f"Failed to archive upload { filename } : { e } " )
353- raise
354- else :
355- download = existing_download
356-
357- InputSource .objects .create (
358- project = project ,
359- sha256 = download .sha256 ,
360- download_url = download .download_url ,
361- filename = download .filename ,
362- download_date = download .download_date ,
363- is_uploaded = True ,
364- )
313+ if download_store :
314+ try :
315+ download = download_store .put (
316+ content = content ,
317+ download_url = "" ,
318+ download_date = datetime .now ().isoformat (),
319+ filename = filename ,
320+ )
321+ InputSource .objects .create (
322+ project = project ,
323+ sha256 = download .sha256 ,
324+ download_url = download .download_url ,
325+ filename = download .filename ,
326+ download_date = download .download_date ,
327+ file_path = str (download .path ),
328+ is_uploaded = True ,
329+ )
330+ except Exception as e :
331+ logger .error (f"Failed to archive upload { filename } : { e } " )
332+ raise
365333 else :
366334 input_path = project .input_path / filename
367335 try :
336+ input_path .parent .mkdir (parents = True , exist_ok = True )
368337 with open (input_path , "wb" ) as f :
369338 f .write (content )
339+ InputSource .objects .create (
340+ project = project ,
341+ filename = filename ,
342+ file_path = str (input_path ),
343+ is_uploaded = True ,
344+ )
370345 except Exception as e :
371346 logger .error (f"Failed to save { filename } to { input_path } : { e } " )
372- raise
373-
374- InputSource .objects .create (
375- project = project ,
376- filename = filename ,
377- is_uploaded = True ,
378- )
347+ raise
0 commit comments