Skip to content

Commit 4ad0d9a

Browse files
AlpinDalehanouticelinaWauplin
authored
feat: add --sort arg to delete-cache to sort by size (#2815)
* feat: add `--sort` arg to `delete-cache` to sort by size * update the arg to take `size` * implement `alphabetical`, `lastUpdated` and `lastUsed` sorting options * fixes * Apply suggestions from code review * fix test for python 3.8 * fix * fix * make the test less flaky * fix another test --------- Co-authored-by: Celina Hanouti <[email protected]> Co-authored-by: Lucain <[email protected]>
1 parent 9662bdc commit 4ad0d9a

File tree

2 files changed

+161
-37
lines changed

2 files changed

+161
-37
lines changed

src/huggingface_hub/commands/delete_cache.py

Lines changed: 66 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
huggingface-cli delete-cache
1919
huggingface-cli delete-cache --disable-tui
2020
huggingface-cli delete-cache --dir ~/.cache/huggingface/hub
21+
huggingface-cli delete-cache --sort=size
2122
2223
NOTE:
2324
This command is based on `InquirerPy` to build the multiselect menu in the terminal.
@@ -50,7 +51,6 @@
5051
TODO: add support for `huggingface-cli delete-cache aaaaaa bbbbbb cccccc (...)` ?
5152
TODO: add "--keep-last" arg to delete revisions that are not on `main` ref
5253
TODO: add "--filter" arg to filter repositories by name ?
53-
TODO: add "--sort" arg to sort by size ?
5454
TODO: add "--limit" arg to limit to X repos ?
5555
TODO: add "-y" arg for immediate deletion ?
5656
See discussions in https://github.com/huggingface/huggingface_hub/issues/1025.
@@ -60,7 +60,7 @@
6060
from argparse import Namespace, _SubParsersAction
6161
from functools import wraps
6262
from tempfile import mkstemp
63-
from typing import Any, Callable, Iterable, List, Optional, Union
63+
from typing import Any, Callable, Iterable, List, Literal, Optional, Union
6464

6565
from ..utils import CachedRepoInfo, CachedRevisionInfo, HFCacheInfo, scan_cache_dir
6666
from . import BaseHuggingfaceCLICommand
@@ -76,6 +76,8 @@
7676
except ImportError:
7777
_inquirer_py_available = False
7878

79+
SortingOption_T = Literal["alphabetical", "lastUpdated", "lastUsed", "size"]
80+
7981

8082
def require_inquirer_py(fn: Callable) -> Callable:
8183
"""Decorator to flag methods that require `InquirerPy`."""
@@ -120,11 +122,25 @@ def register_subcommand(parser: _SubParsersAction):
120122
),
121123
)
122124

125+
delete_cache_parser.add_argument(
126+
"--sort",
127+
nargs="?",
128+
choices=["alphabetical", "lastUpdated", "lastUsed", "size"],
129+
help=(
130+
"Sort repositories by the specified criteria. Options: "
131+
"'alphabetical' (A-Z), "
132+
"'lastUpdated' (newest first), "
133+
"'lastUsed' (most recent first), "
134+
"'size' (largest first)."
135+
),
136+
)
137+
123138
delete_cache_parser.set_defaults(func=DeleteCacheCommand)
124139

125140
def __init__(self, args: Namespace) -> None:
126141
self.cache_dir: Optional[str] = args.dir
127142
self.disable_tui: bool = args.disable_tui
143+
self.sort_by: Optional[SortingOption_T] = args.sort
128144

129145
def run(self):
130146
"""Run `delete-cache` command with or without TUI."""
@@ -133,9 +149,9 @@ def run(self):
133149

134150
# Manual review from the user
135151
if self.disable_tui:
136-
selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[])
152+
selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[], sort_by=self.sort_by)
137153
else:
138-
selected_hashes = _manual_review_tui(hf_cache_info, preselected=[])
154+
selected_hashes = _manual_review_tui(hf_cache_info, preselected=[], sort_by=self.sort_by)
139155

140156
# If deletion is not cancelled
141157
if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes:
@@ -163,14 +179,35 @@ def run(self):
163179
print("Deletion is cancelled. Do nothing.")
164180

165181

182+
def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[SortingOption_T] = None):
183+
if sort_by == "alphabetical":
184+
return (repo.repo_type, repo.repo_id.lower()) # by type then name
185+
elif sort_by == "lastUpdated":
186+
return -max(rev.last_modified for rev in repo.revisions) # newest first
187+
elif sort_by == "lastUsed":
188+
return -repo.last_accessed # most recently used first
189+
elif sort_by == "size":
190+
return -repo.size_on_disk # largest first
191+
else:
192+
return (repo.repo_type, repo.repo_id) # default stable order
193+
194+
166195
@require_inquirer_py
167-
def _manual_review_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]:
196+
def _manual_review_tui(
197+
hf_cache_info: HFCacheInfo,
198+
preselected: List[str],
199+
sort_by: Optional[SortingOption_T] = None,
200+
) -> List[str]:
168201
"""Ask the user for a manual review of the revisions to delete.
169202
170203
Displays a multi-select menu in the terminal (TUI).
171204
"""
172205
# Define multiselect list
173-
choices = _get_tui_choices_from_scan(repos=hf_cache_info.repos, preselected=preselected)
206+
choices = _get_tui_choices_from_scan(
207+
repos=hf_cache_info.repos,
208+
preselected=preselected,
209+
sort_by=sort_by,
210+
)
174211
checkbox = inquirer.checkbox(
175212
message="Select revisions to delete:",
176213
choices=choices, # List of revisions with some pre-selection
@@ -213,22 +250,27 @@ def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool:
213250
return inquirer.confirm(message, default=default).execute()
214251

215252

216-
def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: List[str]) -> List:
253+
def _get_tui_choices_from_scan(
254+
repos: Iterable[CachedRepoInfo],
255+
preselected: List[str],
256+
sort_by: Optional[SortingOption_T] = None,
257+
) -> List:
217258
"""Build a list of choices from the scanned repos.
218259
219260
Args:
220261
repos (*Iterable[`CachedRepoInfo`]*):
221262
List of scanned repos on which we want to delete revisions.
222263
preselected (*List[`str`]*):
223264
List of revision hashes that will be preselected.
265+
sort_by (*Optional[SortingOption_T]*):
266+
Sorting direction. Choices: "alphabetical", "lastUpdated", "lastUsed", "size".
224267
225268
Return:
226269
The list of choices to pass to `inquirer.checkbox`.
227270
"""
228271
choices: List[Union[Choice, Separator]] = []
229272

230-
# First choice is to cancel the deletion. If selected, nothing will be deleted,
231-
# no matter the other selected items.
273+
# First choice is to cancel the deletion
232274
choices.append(
233275
Choice(
234276
_CANCEL_DELETION_STR,
@@ -237,8 +279,10 @@ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: Lis
237279
)
238280
)
239281

240-
# Display a separator per repo and a Choice for each revisions of the repo
241-
for repo in sorted(repos, key=_repo_sorting_order):
282+
# Sort repos based on specified criteria
283+
sorted_repos = sorted(repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
284+
285+
for repo in sorted_repos:
242286
# Repo as separator
243287
choices.append(
244288
Separator(
@@ -264,7 +308,11 @@ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: Lis
264308
return choices
265309

266310

267-
def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]:
311+
def _manual_review_no_tui(
312+
hf_cache_info: HFCacheInfo,
313+
preselected: List[str],
314+
sort_by: Optional[SortingOption_T] = None,
315+
) -> List[str]:
268316
"""Ask the user for a manual review of the revisions to delete.
269317
270318
Used when TUI is disabled. Manual review happens in a separate tmp file that the
@@ -275,7 +323,10 @@ def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) ->
275323
os.close(fd)
276324

277325
lines = []
278-
for repo in sorted(hf_cache_info.repos, key=_repo_sorting_order):
326+
327+
sorted_repos = sorted(hf_cache_info.repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
328+
329+
for repo in sorted_repos:
279330
lines.append(
280331
f"\n# {repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str},"
281332
f" used {repo.last_accessed_str})"
@@ -314,9 +365,9 @@ def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) ->
314365
):
315366
break
316367

317-
# 4. Return selected_hashes
368+
# 4. Return selected_hashes sorted to maintain stable order
318369
os.remove(tmp_path)
319-
return selected_hashes
370+
return sorted(selected_hashes) # Sort to maintain stable order
320371

321372

322373
def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool:
@@ -418,11 +469,6 @@ def _read_manual_review_tmp_file(tmp_path: str) -> List[str]:
418469
""".strip()
419470

420471

421-
def _repo_sorting_order(repo: CachedRepoInfo) -> Any:
422-
# First split by Dataset/Model, then sort by last accessed (oldest first)
423-
return (repo.repo_type, repo.last_accessed)
424-
425-
426472
def _revision_sorting_order(revision: CachedRevisionInfo) -> Any:
427473
# Sort by last modified (oldest first)
428474
return revision.last_modified

0 commit comments

Comments
 (0)