Skip to content

Commit 88f3167

Browse files
committed
resilient
1 parent ee62a6f commit 88f3167

File tree

1 file changed

+70
-46
lines changed

1 file changed

+70
-46
lines changed

src/votuderep/commands/getdbs.py

Lines changed: 70 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -19,31 +19,37 @@ def download_file_with_marker(
1919
url: str, output_path: Path, description: str = "Downloading"
2020
) -> bool:
2121
"""
22-
Download a file from a URL with progress indication and create a .done marker.
22+
Download a file from a URL with progress indication.
23+
24+
Uses .downloading suffix during download, removes it on completion.
2325
2426
Args:
2527
url: URL to download from
2628
output_path: Path where to save the file
2729
description: Description for progress indicator
2830
2931
Returns:
30-
True if downloaded successfully, False if skipped (already complete)
32+
True if downloaded successfully, False if skipped (already exists)
3133
3234
Raises:
3335
VotuDerepError: If download fails
3436
"""
35-
done_marker = Path(str(output_path) + ".done")
37+
# Path with .downloading suffix for in-progress downloads
38+
downloading_path = Path(str(output_path) + ".downloading")
3639

37-
# Check if already downloaded
38-
if done_marker.exists() and output_path.exists():
40+
# Check if file already exists (complete download)
41+
if output_path.exists():
3942
logger.info(f"Skipping download (already complete): {output_path.name}")
40-
console.print(f"[yellow]⊙[/yellow] {output_path.name} already downloaded")
43+
console.print(
44+
f"[yellow]⊙[/yellow] Skipping download of {output_path.name} (already exists)"
45+
)
4146
return False
4247

43-
# Clean up partial download if exists
44-
if output_path.exists() and not done_marker.exists():
45-
logger.warning(f"Removing partial download: {output_path}")
46-
output_path.unlink()
48+
# Clean up any failed download (.downloading file)
49+
if downloading_path.exists():
50+
logger.warning(f"Removing failed download: {downloading_path}")
51+
console.print(f"[yellow]![/yellow] Removing incomplete download: {downloading_path.name}")
52+
downloading_path.unlink()
4753

4854
try:
4955
with Progress(
@@ -59,27 +65,30 @@ def reporthook(block_num, block_size, total_size):
5965
percent = min(100, (block_num * block_size * 100) / total_size)
6066
progress.update(task, completed=percent)
6167

62-
urllib.request.urlretrieve(url, str(output_path), reporthook)
68+
# Download to .downloading file first
69+
urllib.request.urlretrieve(url, str(downloading_path), reporthook)
6370
progress.update(task, completed=100)
6471

65-
# Create .done marker
66-
done_marker.touch()
72+
# Move to final location (remove .downloading suffix)
73+
downloading_path.rename(output_path)
6774
logger.info(f"Downloaded: {output_path}")
6875
console.print(f"[green]✓[/green] Downloaded {output_path.name}")
6976

7077
return True
7178

7279
except Exception as e:
7380
# Clean up failed download
74-
if output_path.exists():
75-
output_path.unlink()
81+
if downloading_path.exists():
82+
downloading_path.unlink()
7683
raise VotuDerepError(f"Failed to download {url}: {e}")
7784

7885

7986
def extract_tarball(tarball_path: Path, output_dir: Path, description: str = "Extracting") -> bool:
8087
"""
8188
Extract a tarball to the specified directory.
8289
90+
Creates .extracted marker file after successful extraction.
91+
8392
Args:
8493
tarball_path: Path to the tarball file
8594
output_dir: Directory where to extract
@@ -91,8 +100,10 @@ def extract_tarball(tarball_path: Path, output_dir: Path, description: str = "Ex
91100
Raises:
92101
VotuDerepError: If extraction fails
93102
"""
103+
# Create marker file path (archive_name.extracted in same directory)
104+
extraction_marker = tarball_path.parent / f"{tarball_path.name}.extracted"
105+
94106
# Determine the expected extracted directory name
95-
# Remove .tar.gz or .tgz extensions
96107
if tarball_path.name.endswith(".tar.gz"):
97108
db_name = tarball_path.name[:-7]
98109
elif tarball_path.name.endswith(".tgz"):
@@ -101,14 +112,22 @@ def extract_tarball(tarball_path: Path, output_dir: Path, description: str = "Ex
101112
db_name = tarball_path.stem
102113

103114
extracted_dir = output_dir / db_name
104-
extraction_marker = extracted_dir / ".extracted"
105115

106-
# Check if already extracted
107-
if extraction_marker.exists() and extracted_dir.exists():
116+
# Check if extraction marker exists (successful extraction)
117+
if extraction_marker.exists():
108118
logger.info(f"Skipping extraction (already complete): {db_name}")
109-
console.print(f"[yellow]⊙[/yellow] {db_name} already extracted")
119+
console.print(f"[yellow]⊙[/yellow] Skipping extraction of {db_name} (already extracted)")
110120
return False
111121

122+
# If no marker but directory exists, assume failed extraction and clean up
123+
if extracted_dir.exists() and not extraction_marker.exists():
124+
logger.warning(f"Found incomplete extraction, removing: {extracted_dir}")
125+
console.print(f"[yellow]![/yellow] Removing incomplete extraction: {extracted_dir}")
126+
# Remove the incomplete extraction
127+
import shutil
128+
129+
shutil.rmtree(extracted_dir)
130+
112131
try:
113132
with Progress(
114133
SpinnerColumn(),
@@ -124,16 +143,19 @@ def extract_tarball(tarball_path: Path, output_dir: Path, description: str = "Ex
124143

125144
progress.update(task, completed=100)
126145

127-
# Create extraction marker
128-
if extracted_dir.exists():
129-
extraction_marker.touch()
130-
146+
# Create extraction marker after successful extraction
147+
extraction_marker.touch()
131148
logger.info(f"Extracted: {tarball_path} to {output_dir}")
132149
console.print(f"[green]✓[/green] Extracted {db_name}")
133150

134151
return True
135152

136153
except Exception as e:
154+
# Clean up failed extraction
155+
if extracted_dir.exists():
156+
import shutil
157+
158+
shutil.rmtree(extracted_dir)
137159
raise VotuDerepError(f"Failed to extract {tarball_path}: {e}")
138160

139161

@@ -218,8 +240,9 @@ def getdbs(ctx, outdir: str, force: bool):
218240

219241
# Download
220242
try:
221-
download_file_with_marker(db["url"], tarball_path, f"Downloading {db['name']}")
222-
downloaded_tarballs.append(tarball_path)
243+
if download_file_with_marker(db["url"], tarball_path, f"Downloading {db['name']}"):
244+
# Only add to list if actually downloaded (not skipped)
245+
downloaded_tarballs.append(tarball_path)
223246
except VotuDerepError as e:
224247
console.print(f"[red]✗[/red] Failed to download {db['name']}: {e}")
225248
success = False
@@ -233,32 +256,33 @@ def getdbs(ctx, outdir: str, force: bool):
233256
success = False
234257
raise
235258

236-
# Success! Clean up tarballs
237-
if success:
238-
console.print("\n[bold blue]Cleaning up...[/bold blue]")
259+
# Success! Clean up tarballs (optional - only those we downloaded in this run)
260+
if success and downloaded_tarballs:
261+
console.print("\n[bold blue]Cleaning up newly downloaded tarballs...[/bold blue]")
239262
for tarball_path in downloaded_tarballs:
240263
if tarball_path.exists():
241264
tarball_path.unlink()
242265
logger.info(f"Removed tarball: {tarball_path}")
243266
console.print(f"[green]✓[/green] Removed {tarball_path.name}")
244267

245-
# Remove .done marker
246-
done_marker = Path(str(tarball_path) + ".done")
247-
if done_marker.exists():
248-
done_marker.unlink()
249-
250-
console.print("\n[bold green]✓ All databases downloaded successfully![/bold green]")
251-
console.print(f"[blue]Databases saved to:[/blue] {outdir_path}")
252-
253-
# Summary
254-
if verbose:
255-
console.print("\n[bold]Database directories:[/bold]")
256-
for item in sorted(outdir_path.iterdir()):
257-
if item.is_dir():
258-
# Calculate directory size
259-
total_size = sum(f.stat().st_size for f in item.rglob("*") if f.is_file())
260-
size_mb = total_size / (1024 * 1024)
261-
console.print(f" • {item.name}/ ({size_mb:.1f} MB)")
268+
# Also remove the .extracted marker since we removed the archive
269+
extraction_marker = tarball_path.parent / f"{tarball_path.name}.extracted"
270+
if extraction_marker.exists():
271+
extraction_marker.unlink()
272+
logger.info(f"Removed marker: {extraction_marker}")
273+
274+
console.print("\n[bold green]✓ All databases processed successfully![/bold green]")
275+
console.print(f"[blue]Databases saved to:[/blue] {outdir_path}")
276+
277+
# Summary
278+
if verbose:
279+
console.print("\n[bold]Database directories:[/bold]")
280+
for item in sorted(outdir_path.iterdir()):
281+
if item.is_dir() and not item.name.startswith("."):
282+
# Calculate directory size
283+
total_size = sum(f.stat().st_size for f in item.rglob("*") if f.is_file())
284+
size_mb = total_size / (1024 * 1024)
285+
console.print(f" • {item.name}/ ({size_mb:.1f} MB)")
262286

263287
except VotuDerepError as e:
264288
console.print(f"\n[bold red]Error:[/bold red] {e}")

0 commit comments

Comments
 (0)