@@ -19,31 +19,37 @@ def download_file_with_marker(
1919 url : str , output_path : Path , description : str = "Downloading"
2020) -> bool :
2121 """
22- Download a file from a URL with progress indication and create a .done marker.
22+ Download a file from a URL with progress indication.
23+
24+ Uses .downloading suffix during download, removes it on completion.
2325
2426 Args:
2527 url: URL to download from
2628 output_path: Path where to save the file
2729 description: Description for progress indicator
2830
2931 Returns:
30- True if downloaded successfully, False if skipped (already complete )
32+ True if downloaded successfully, False if skipped (already exists )
3133
3234 Raises:
3335 VotuDerepError: If download fails
3436 """
35- done_marker = Path (str (output_path ) + ".done" )
37+ # Path with .downloading suffix for in-progress downloads
38+ downloading_path = Path (str (output_path ) + ".downloading" )
3639
37- # Check if already downloaded
38- if done_marker . exists () and output_path .exists ():
40+ # Check if file already exists (complete download)
41+ if output_path .exists ():
3942 logger .info (f"Skipping download (already complete): { output_path .name } " )
40- console .print (f"[yellow]⊙[/yellow] { output_path .name } already downloaded" )
43+ console .print (
44+ f"[yellow]⊙[/yellow] Skipping download of { output_path .name } (already exists)"
45+ )
4146 return False
4247
43- # Clean up partial download if exists
44- if output_path .exists () and not done_marker .exists ():
45- logger .warning (f"Removing partial download: { output_path } " )
46- output_path .unlink ()
48+ # Clean up any failed download (.downloading file)
49+ if downloading_path .exists ():
50+ logger .warning (f"Removing failed download: { downloading_path } " )
51+ console .print (f"[yellow]![/yellow] Removing incomplete download: { downloading_path .name } " )
52+ downloading_path .unlink ()
4753
4854 try :
4955 with Progress (
@@ -59,27 +65,30 @@ def reporthook(block_num, block_size, total_size):
5965 percent = min (100 , (block_num * block_size * 100 ) / total_size )
6066 progress .update (task , completed = percent )
6167
62- urllib .request .urlretrieve (url , str (output_path ), reporthook )
68+ # Download to .downloading file first
69+ urllib .request .urlretrieve (url , str (downloading_path ), reporthook )
6370 progress .update (task , completed = 100 )
6471
65- # Create .done marker
66- done_marker . touch ( )
72+ # Move to final location (remove .downloading suffix)
73+ downloading_path . rename ( output_path )
6774 logger .info (f"Downloaded: { output_path } " )
6875 console .print (f"[green]✓[/green] Downloaded { output_path .name } " )
6976
7077 return True
7178
7279 except Exception as e :
7380 # Clean up failed download
74- if output_path .exists ():
75- output_path .unlink ()
81+ if downloading_path .exists ():
82+ downloading_path .unlink ()
7683 raise VotuDerepError (f"Failed to download { url } : { e } " )
7784
7885
7986def extract_tarball (tarball_path : Path , output_dir : Path , description : str = "Extracting" ) -> bool :
8087 """
8188 Extract a tarball to the specified directory.
8289
90+ Creates .extracted marker file after successful extraction.
91+
8392 Args:
8493 tarball_path: Path to the tarball file
8594 output_dir: Directory where to extract
@@ -91,8 +100,10 @@ def extract_tarball(tarball_path: Path, output_dir: Path, description: str = "Ex
91100 Raises:
92101 VotuDerepError: If extraction fails
93102 """
103+ # Create marker file path (archive_name.extracted in same directory)
104+ extraction_marker = tarball_path .parent / f"{ tarball_path .name } .extracted"
105+
94106 # Determine the expected extracted directory name
95- # Remove .tar.gz or .tgz extensions
96107 if tarball_path .name .endswith (".tar.gz" ):
97108 db_name = tarball_path .name [:- 7 ]
98109 elif tarball_path .name .endswith (".tgz" ):
@@ -101,14 +112,22 @@ def extract_tarball(tarball_path: Path, output_dir: Path, description: str = "Ex
101112 db_name = tarball_path .stem
102113
103114 extracted_dir = output_dir / db_name
104- extraction_marker = extracted_dir / ".extracted"
105115
106- # Check if already extracted
107- if extraction_marker .exists () and extracted_dir . exists () :
116+ # Check if extraction marker exists (successful extraction)
117+ if extraction_marker .exists ():
108118 logger .info (f"Skipping extraction (already complete): { db_name } " )
109- console .print (f"[yellow]⊙[/yellow] { db_name } already extracted" )
119+ console .print (f"[yellow]⊙[/yellow] Skipping extraction of { db_name } ( already extracted) " )
110120 return False
111121
122+ # If no marker but directory exists, assume failed extraction and clean up
123+ if extracted_dir .exists () and not extraction_marker .exists ():
124+ logger .warning (f"Found incomplete extraction, removing: { extracted_dir } " )
125+ console .print (f"[yellow]![/yellow] Removing incomplete extraction: { extracted_dir } " )
126+ # Remove the incomplete extraction
127+ import shutil
128+
129+ shutil .rmtree (extracted_dir )
130+
112131 try :
113132 with Progress (
114133 SpinnerColumn (),
@@ -124,16 +143,19 @@ def extract_tarball(tarball_path: Path, output_dir: Path, description: str = "Ex
124143
125144 progress .update (task , completed = 100 )
126145
127- # Create extraction marker
128- if extracted_dir .exists ():
129- extraction_marker .touch ()
130-
146+ # Create extraction marker after successful extraction
147+ extraction_marker .touch ()
131148 logger .info (f"Extracted: { tarball_path } to { output_dir } " )
132149 console .print (f"[green]✓[/green] Extracted { db_name } " )
133150
134151 return True
135152
136153 except Exception as e :
154+ # Clean up failed extraction
155+ if extracted_dir .exists ():
156+ import shutil
157+
158+ shutil .rmtree (extracted_dir )
137159 raise VotuDerepError (f"Failed to extract { tarball_path } : { e } " )
138160
139161
@@ -218,8 +240,9 @@ def getdbs(ctx, outdir: str, force: bool):
218240
219241 # Download
220242 try :
221- download_file_with_marker (db ["url" ], tarball_path , f"Downloading { db ['name' ]} " )
222- downloaded_tarballs .append (tarball_path )
243+ if download_file_with_marker (db ["url" ], tarball_path , f"Downloading { db ['name' ]} " ):
244+ # Only add to list if actually downloaded (not skipped)
245+ downloaded_tarballs .append (tarball_path )
223246 except VotuDerepError as e :
224247 console .print (f"[red]✗[/red] Failed to download { db ['name' ]} : { e } " )
225248 success = False
@@ -233,32 +256,33 @@ def getdbs(ctx, outdir: str, force: bool):
233256 success = False
234257 raise
235258
236- # Success! Clean up tarballs
237- if success :
238- console .print ("\n [bold blue]Cleaning up...[/bold blue]" )
259+ # Success! Clean up tarballs (optional - only those we downloaded in this run)
260+ if success and downloaded_tarballs :
261+ console .print ("\n [bold blue]Cleaning up newly downloaded tarballs ...[/bold blue]" )
239262 for tarball_path in downloaded_tarballs :
240263 if tarball_path .exists ():
241264 tarball_path .unlink ()
242265 logger .info (f"Removed tarball: { tarball_path } " )
243266 console .print (f"[green]✓[/green] Removed { tarball_path .name } " )
244267
245- # Remove .done marker
246- done_marker = Path (str (tarball_path ) + ".done" )
247- if done_marker .exists ():
248- done_marker .unlink ()
249-
250- console .print ("\n [bold green]✓ All databases downloaded successfully![/bold green]" )
251- console .print (f"[blue]Databases saved to:[/blue] { outdir_path } " )
252-
253- # Summary
254- if verbose :
255- console .print ("\n [bold]Database directories:[/bold]" )
256- for item in sorted (outdir_path .iterdir ()):
257- if item .is_dir ():
258- # Calculate directory size
259- total_size = sum (f .stat ().st_size for f in item .rglob ("*" ) if f .is_file ())
260- size_mb = total_size / (1024 * 1024 )
261- console .print (f" • { item .name } / ({ size_mb :.1f} MB)" )
268+ # Also remove the .extracted marker since we removed the archive
269+ extraction_marker = tarball_path .parent / f"{ tarball_path .name } .extracted"
270+ if extraction_marker .exists ():
271+ extraction_marker .unlink ()
272+ logger .info (f"Removed marker: { extraction_marker } " )
273+
274+ console .print ("\n [bold green]✓ All databases processed successfully![/bold green]" )
275+ console .print (f"[blue]Databases saved to:[/blue] { outdir_path } " )
276+
277+ # Summary
278+ if verbose :
279+ console .print ("\n [bold]Database directories:[/bold]" )
280+ for item in sorted (outdir_path .iterdir ()):
281+ if item .is_dir () and not item .name .startswith ("." ):
282+ # Calculate directory size
283+ total_size = sum (f .stat ().st_size for f in item .rglob ("*" ) if f .is_file ())
284+ size_mb = total_size / (1024 * 1024 )
285+ console .print (f" • { item .name } / ({ size_mb :.1f} MB)" )
262286
263287 except VotuDerepError as e :
264288 console .print (f"\n [bold red]Error:[/bold red] { e } " )
0 commit comments