Skip to content

Commit 17cf79a

Browse files
LysandreJikmuellerzrWauplinosanseviero
authored
Push to hub mixins that do not leverage git (#847)
* Propose a non-git mixin * Fix merge * Improvements non git mixin (#618) Add in commit_message + tests Co-authored-by: Lysandre <[email protected]> * Big refactor * FIX typos in contributing.md * Remove redefined logger in HfApi.py * Use upload_folder in both mixins + some docstring * moved back logger to top of hf_api >< * space in documentation can be ambiguous * WIP started deprecation * deprecate skip lfs file and use Path * added decorator to deprecate specific arguments + unittests for it * simplified tests * proper decorators * fix docstring * hubmixin: fixed existing tests + add http one * unique repo names across tests * make push_to_hub_keras work + tests * logs are not overwritten in push_to_hub_keras * flake8 * refacto push_to_hub from mixin.save_pretrained * deprecate positional argument in version 0.12 * remove docstring for deprecated skip_lfs_files * delete old logs when uploading keras model to hub * remove TODO in tests * remove useless todo * Update src/huggingface_hub/hub_mixin.py * flake8 * remove un-explicit _generate_url helper * exclude some folders from flake8 * clean pr * move out tests fixing to other issue * introduce decorator to deprecate tests * docstring * fix pattern in expect_deprecation decorator * docstring * remove extras['ml'] integration * Apply suggestions from code review Co-authored-by: Omar Sanseviero <[email protected]> * use expect_deprecation decorator in test * remove caret in comment * explicit mocked model * delete unused and failing tett * optional path_in_repo for root * revert back token docstring * Update src/huggingface_hub/hf_api.py Co-authored-by: Lysandre Debut <[email protected]> Co-authored-by: Zachary Mueller <[email protected]> Co-authored-by: Wauplin <[email protected]> Co-authored-by: Omar Sanseviero <[email protected]>
1 parent 880408a commit 17cf79a

File tree

13 files changed

+701
-233
lines changed

13 files changed

+701
-233
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ venv/
109109
ENV/
110110
env.bak/
111111
venv.bak/
112+
.venv*
112113

113114
# Spyder project settings
114115
.spyderproject

CONTRIBUTING.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,13 +258,14 @@ repository they can be run with the following:
258258

259259
```bash
260260
$ HUGGINGFACE_CO_STAGING=1 python -m pytest -sv ./tests
261+
```
261262

262-
In fact, that's how `make test` is implemented (sans the `pip install` line)!
263+
In fact, that's how `make test` is implemented (without the `pip install` line)!
263264

264265
You can specify a smaller set of tests in order to test only the feature
265266
you're working on.
266267

267-
For example, the following will only run the tests hel in the `test_repository.py` file:
268+
For example, the following will only run the tests in the `test_repository.py` file:
268269

269270
```bash
270271
$ HUGGINGFACE_CO_STAGING=1 python -m pytest -sv ./tests/test_repository.py

docs/source/how-to-manage.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Create and manage a repository
22

3-
A repository is a space for you to store your model or dataset files. This guide will show you how to:
3+
A repository is a place where you can store your model or dataset files. This guide will show you how to:
44

55
* Create and delete a repository.
66
* Adjust repository visibility.

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ multi_line_output = 3
4848
use_parentheses = True
4949

5050
[flake8]
51+
exclude = .git,__pycache__,old,build,dist,.venv*
5152
ignore = E203, E501, E741, W503
5253
max-line-length = 88
5354

src/huggingface_hub/hf_api.py

Lines changed: 59 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,8 @@
7373
else:
7474
from typing_extensions import Literal, TypedDict
7575

76-
77-
REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$")
7876
USERNAME_PLACEHOLDER = "hf_user"
77+
_REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$")
7978

8079
logger = logging.get_logger(__name__)
8180

@@ -1519,7 +1518,7 @@ def list_repo_files(
15191518
)
15201519
return [f.rfilename for f in repo_info.siblings]
15211520

1522-
@_deprecate_positional_args
1521+
@_deprecate_positional_args(version="0.12")
15231522
def create_repo(
15241523
self,
15251524
repo_id: str = None,
@@ -2006,6 +2005,11 @@ def create_commit(
20062005
" `CommitOperationDelete`"
20072006
)
20082007

2008+
logger.debug(
2009+
f"About to commit to the hub: {len(additions)} addition(s) and"
2010+
f" {len(deletions)} deletion(s)."
2011+
)
2012+
20092013
for addition in additions:
20102014
addition.validate()
20112015

@@ -2189,20 +2193,14 @@ def upload_file(
21892193
create_pr=create_pr,
21902194
parent_commit=parent_commit,
21912195
)
2192-
if pr_url is not None:
2193-
re_match = re.match(REGEX_DISCUSSION_URL, pr_url)
2194-
if re_match is None:
2195-
raise RuntimeError(
2196-
"Unexpected response from the hub, expected a Pull Request URL but"
2197-
f" got: '{pr_url}'"
2198-
)
2199-
revision = quote(f"refs/pr/{re_match[1]}", safe="")
22002196

2197+
if pr_url is not None:
2198+
revision = quote(_parse_revision_from_pr_url(pr_url), safe="")
22012199
if repo_type in REPO_TYPES_URL_PREFIXES:
22022200
repo_id = REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
22032201
revision = revision if revision is not None else DEFAULT_REVISION
2202+
# Similar to `hf_hub_url` but it's "blob" instead of "resolve"
22042203
return f"{self.endpoint}/{repo_id}/blob/{revision}/{path_in_repo}"
2205-
# ^ Similar to `hf_hub_url` but it's "blob" instead of "resolve"
22062204

22072205
def upload_folder(
22082206
self,
@@ -2313,25 +2311,8 @@ def upload_folder(
23132311
if commit_message is not None
23142312
else f"Upload {path_in_repo} with huggingface_hub"
23152313
)
2316-
folder_path = os.path.normpath(os.path.expanduser(folder_path))
2317-
if not os.path.isdir(folder_path):
2318-
raise ValueError(f"Provided path: '{folder_path}' is not a directory")
2319-
2320-
files_to_add: List[CommitOperationAdd] = []
2321-
for dirpath, _, filenames in os.walk(folder_path):
2322-
for filename in filenames:
2323-
abs_path = os.path.join(dirpath, filename)
2324-
rel_path = os.path.relpath(abs_path, folder_path)
2325-
files_to_add.append(
2326-
CommitOperationAdd(
2327-
path_or_fileobj=abs_path,
2328-
path_in_repo=os.path.normpath(
2329-
os.path.join(path_in_repo, rel_path)
2330-
).replace(os.sep, "/"),
2331-
)
2332-
)
23332314

2334-
logger.debug(f"About to upload / commit {len(files_to_add)} files to the Hub")
2315+
files_to_add = _prepare_upload_folder_commit(folder_path, path_in_repo)
23352316

23362317
pr_url = self.create_commit(
23372318
repo_type=repo_type,
@@ -2346,20 +2327,12 @@ def upload_folder(
23462327
)
23472328

23482329
if pr_url is not None:
2349-
re_match = re.match(REGEX_DISCUSSION_URL, pr_url)
2350-
if re_match is None:
2351-
raise RuntimeError(
2352-
"Unexpected response from the hub, expected a Pull Request URL but"
2353-
f" got: '{pr_url}'"
2354-
)
2355-
revision = quote(f"refs/pr/{re_match[1]}", safe="")
2356-
2330+
revision = quote(_parse_revision_from_pr_url(pr_url), safe="")
23572331
if repo_type in REPO_TYPES_URL_PREFIXES:
23582332
repo_id = REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
2359-
23602333
revision = revision if revision is not None else DEFAULT_REVISION
2334+
# Similar to `hf_hub_url` but it's "tree" instead of "resolve"
23612335
return f"{self.endpoint}/{repo_id}/tree/{revision}/{path_in_repo}"
2362-
# ^ Similar to `hf_hub_url` but it's "tree" instead of "resolve"
23632336

23642337
def delete_file(
23652338
self,
@@ -3231,8 +3204,8 @@ def get_token(cls) -> Optional[str]:
32313204
"""
32323205
Get token or None if not existent.
32333206
3234-
Note that a token can be also provided using the `HUGGING_FACE_HUB_TOKEN`
3235-
environment variable.
3207+
Note that a token can be also provided using the
3208+
`HUGGING_FACE_HUB_TOKEN` environment variable.
32363209
32373210
Returns:
32383211
`str` or `None`: The token, `None` if it doesn't exist.
@@ -3258,6 +3231,48 @@ def delete_token(cls):
32583231
pass
32593232

32603233

3234+
def _prepare_upload_folder_commit(
3235+
folder_path: str, path_in_repo: str
3236+
) -> List[CommitOperationAdd]:
3237+
"""Generate the list of Add operations for a commit to upload a folder."""
3238+
folder_path = os.path.normpath(os.path.expanduser(folder_path))
3239+
if not os.path.isdir(folder_path):
3240+
raise ValueError(f"Provided path: '{folder_path}' is not a directory")
3241+
3242+
files_to_add: List[CommitOperationAdd] = []
3243+
for dirpath, _, filenames in os.walk(folder_path):
3244+
for filename in filenames:
3245+
abs_path = os.path.join(dirpath, filename)
3246+
rel_path = os.path.relpath(abs_path, folder_path)
3247+
files_to_add.append(
3248+
CommitOperationAdd(
3249+
path_or_fileobj=abs_path,
3250+
path_in_repo=os.path.normpath(
3251+
os.path.join(path_in_repo, rel_path)
3252+
).replace(os.sep, "/"),
3253+
)
3254+
)
3255+
return files_to_add
3256+
3257+
3258+
def _parse_revision_from_pr_url(pr_url: str) -> str:
3259+
"""Safely parse revision number from a PR url.
3260+
3261+
Example:
3262+
```py
3263+
>>> _parse_revision_from_pr_url("https://huggingface.co/bigscience/bloom/discussions/2")
3264+
"refs/pr/2"
3265+
```
3266+
"""
3267+
re_match = re.match(_REGEX_DISCUSSION_URL, pr_url)
3268+
if re_match is None:
3269+
raise RuntimeError(
3270+
"Unexpected response from the hub, expected a Pull Request URL but"
3271+
f" got: '{pr_url}'"
3272+
)
3273+
return f"refs/pr/{re_match[1]}"
3274+
3275+
32613276
api = HfApi()
32623277

32633278
set_access_token = api.set_access_token
@@ -3301,3 +3316,5 @@ def delete_token(cls):
33013316
edit_discussion_comment = api.edit_discussion_comment
33023317
rename_discussion = api.rename_discussion
33033318
merge_pull_request = api.merge_pull_request
3319+
3320+
_validate_or_retrieve_token = api._validate_or_retrieve_token

0 commit comments

Comments
 (0)