|
11 | 11 | from archivebox.config import DATA_DIR, CONSTANTS, ARCHIVE_DIR |
12 | 12 | from archivebox.config.common import SHELL_CONFIG |
13 | 13 | from archivebox.misc.legacy import parse_json_links_details |
14 | | -from archivebox.misc.folders import ( |
15 | | - get_indexed_folders, |
16 | | - get_archived_folders, |
17 | | - get_invalid_folders, |
18 | | - get_unarchived_folders, |
19 | | - get_present_folders, |
20 | | - get_valid_folders, |
21 | | - get_duplicate_folders, |
22 | | - get_orphaned_folders, |
23 | | - get_corrupted_folders, |
24 | | - get_unrecognized_folders, |
25 | | -) |
26 | 14 | from archivebox.misc.system import get_dir_size |
27 | 15 | from archivebox.misc.logging_util import printable_filesize |
28 | 16 |
|
@@ -55,42 +43,40 @@ def status(out_dir: Path=DATA_DIR) -> None: |
55 | 43 | size = printable_filesize(num_bytes) |
56 | 44 | print(f' Size: {size} across {num_files} files in {num_dirs} directories') |
57 | 45 |
|
58 | | - num_indexed = len(get_indexed_folders(links, out_dir=out_dir)) |
59 | | - num_archived = len(get_archived_folders(links, out_dir=out_dir)) |
60 | | - num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir)) |
61 | | - print(f' > indexed: {num_indexed}'.ljust(36), f'({get_indexed_folders.__doc__})') |
62 | | - print(f' > archived: {num_archived}'.ljust(36), f'({get_archived_folders.__doc__})') |
63 | | - print(f' > unarchived: {num_unarchived}'.ljust(36), f'({get_unarchived_folders.__doc__})') |
64 | | - |
65 | | - num_present = len(get_present_folders(links, out_dir=out_dir)) |
66 | | - num_valid = len(get_valid_folders(links, out_dir=out_dir)) |
| 46 | + # Use DB as source of truth for snapshot status |
| 47 | + num_indexed = links.count() |
| 48 | + num_archived = links.filter(status='archived').count() or links.exclude(downloaded_at=None).count() |
| 49 | + num_unarchived = links.filter(status='queued').count() or links.filter(downloaded_at=None).count() |
| 50 | + print(f' > indexed: {num_indexed}'.ljust(36), '(total snapshots in DB)') |
| 51 | + print(f' > archived: {num_archived}'.ljust(36), '(snapshots with archived content)') |
| 52 | + print(f' > unarchived: {num_unarchived}'.ljust(36), '(snapshots pending archiving)') |
| 53 | + |
| 54 | + # Count directories on filesystem |
| 55 | + num_present = 0 |
| 56 | + orphaned_dirs = [] |
| 57 | + if ARCHIVE_DIR.exists(): |
| 58 | + for entry in ARCHIVE_DIR.iterdir(): |
| 59 | + if entry.is_dir(): |
| 60 | + num_present += 1 |
| 61 | + if not links.filter(timestamp=entry.name).exists(): |
| 62 | + orphaned_dirs.append(str(entry)) |
| 63 | + |
| 64 | + num_valid = min(num_present, num_indexed) # approximate |
67 | 65 | print() |
68 | | - print(f' > present: {num_present}'.ljust(36), f'({get_present_folders.__doc__})') |
69 | | - print(f' > [green]valid:[/green] {num_valid}'.ljust(36), f' ({get_valid_folders.__doc__})') |
70 | | - |
71 | | - duplicate = get_duplicate_folders(links, out_dir=out_dir) |
72 | | - orphaned = get_orphaned_folders(links, out_dir=out_dir) |
73 | | - corrupted = get_corrupted_folders(links, out_dir=out_dir) |
74 | | - unrecognized = get_unrecognized_folders(links, out_dir=out_dir) |
75 | | - num_invalid = len({**duplicate, **orphaned, **corrupted, **unrecognized}) |
76 | | - print(f' > [red]invalid:[/red] {num_invalid}'.ljust(36), f' ({get_invalid_folders.__doc__})') |
77 | | - print(f' > duplicate: {len(duplicate)}'.ljust(36), f'({get_duplicate_folders.__doc__})') |
78 | | - print(f' > orphaned: {len(orphaned)}'.ljust(36), f'({get_orphaned_folders.__doc__})') |
79 | | - print(f' > corrupted: {len(corrupted)}'.ljust(36), f'({get_corrupted_folders.__doc__})') |
80 | | - print(f' > unrecognized: {len(unrecognized)}'.ljust(36), f'({get_unrecognized_folders.__doc__})') |
| 66 | + print(f' > present: {num_present}'.ljust(36), '(directories in archive/)') |
| 67 | + print(f' > [green]valid:[/green] {num_valid}'.ljust(36), ' (directories with matching DB entry)') |
| 68 | + |
| 69 | + num_orphaned = len(orphaned_dirs) |
| 70 | + print(f' > [red]orphaned:[/red] {num_orphaned}'.ljust(36), ' (directories without matching DB entry)') |
81 | 71 |
|
82 | 72 | if num_indexed: |
83 | | - print(' [violet]Hint:[/violet] You can list link data directories by status like so:') |
84 | | - print(' [green]archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)[/green]') |
| 73 | + print(' [violet]Hint:[/violet] You can list snapshots by status like so:') |
| 74 | + print(' [green]archivebox list --status=<status> (e.g. archived, queued, etc.)[/green]') |
85 | 75 |
|
86 | | - if orphaned: |
| 76 | + if orphaned_dirs: |
87 | 77 | print(' [violet]Hint:[/violet] To automatically import orphaned data directories into the main index, run:') |
88 | 78 | print(' [green]archivebox init[/green]') |
89 | 79 |
|
90 | | - if num_invalid: |
91 | | - print(' [violet]Hint:[/violet] You may need to manually remove or fix some invalid data directories, afterwards make sure to run:') |
92 | | - print(' [green]archivebox init[/green]') |
93 | | - |
94 | 80 | print() |
95 | 81 | print('[green]\\[*] Scanning recent archive changes and user logins:[/green]') |
96 | 82 | print(f'[yellow] {CONSTANTS.LOGS_DIR}/*[/yellow]') |
|
0 commit comments