Skip to content

Commit d5d5300

Browse files
authored
No need for token to understand HF urls (#203)
* No need for token to understand HF urls * Remove lack of namespace capabilities
1 parent 62d7e8d commit d5d5300

File tree

5 files changed

+115
-46
lines changed

5 files changed

+115
-46
lines changed

src/huggingface_hub/hf_api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ def repo_type_and_id_from_hf_id(hf_id: str):
5858

5959
if is_hf_url:
6060
namespace, repo_id = url_segments[-2:]
61+
if namespace == "huggingface.co":
62+
namespace = None
6163
if len(url_segments) > 2 and "huggingface.co" not in url_segments[-3]:
6264
repo_type = url_segments[-3]
6365
else:

src/huggingface_hub/repository.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -223,24 +223,24 @@ def clone_from(self, repo_url: str, use_auth_token: Union[bool, str, None] = Non
223223
token = use_auth_token if use_auth_token is not None else self.huggingface_token
224224
api = HfApi()
225225

226-
if token is not None:
227-
whoami_info = api.whoami(token)
228-
user = whoami_info["name"]
229-
valid_organisations = [org["name"] for org in whoami_info["orgs"]]
230-
repo_type, namespace, repo_id = repo_type_and_id_from_hf_id(repo_url)
226+
repo_type, namespace, repo_id = repo_type_and_id_from_hf_id(repo_url)
231227

232-
if namespace is None:
233-
namespace = user
228+
if repo_type is not None:
229+
self.repo_type = repo_type
234230

235-
if repo_type is not None:
236-
self.repo_type = repo_type
231+
repo_url = ENDPOINT + "/"
237232

238-
repo_url = ENDPOINT + "/"
233+
if self.repo_type in REPO_TYPES_URL_PREFIXES:
234+
repo_url += REPO_TYPES_URL_PREFIXES[self.repo_type]
239235

240-
if self.repo_type in REPO_TYPES_URL_PREFIXES:
241-
repo_url += REPO_TYPES_URL_PREFIXES[self.repo_type]
236+
if token is not None:
237+
whoami_info = api.whoami(token)
238+
user = whoami_info["name"]
239+
valid_organisations = [org["name"] for org in whoami_info["orgs"]]
242240

243-
repo_url += f"{namespace}/{repo_id}"
241+
if namespace is not None:
242+
repo_url += f"{namespace}/"
243+
repo_url += repo_id
244244

245245
repo_url = repo_url.replace("https://", f"https://user:{token}@")
246246

@@ -252,6 +252,11 @@ def clone_from(self, repo_url: str, use_auth_token: Union[bool, str, None] = Non
252252
organization=namespace,
253253
exist_ok=True,
254254
)
255+
else:
256+
if namespace is not None:
257+
repo_url += f"{namespace}/"
258+
repo_url += repo_id
259+
255260
# For error messages, it's cleaner to show the repo url without the token.
256261
clean_repo_url = re.sub(r"https://.*@", "https://", repo_url)
257262
try:
@@ -265,7 +270,7 @@ def clone_from(self, repo_url: str, use_auth_token: Union[bool, str, None] = Non
265270

266271
# checks if repository is initialized in a empty repository or in one with files
267272
if len(os.listdir(self.local_dir)) == 0:
268-
logger.debug(f"Cloning {clean_repo_url} into local empty directory.")
273+
logger.warning(f"Cloning {clean_repo_url} into local empty directory.")
269274
subprocess.run(
270275
["git", "clone", repo_url, "."],
271276
stderr=subprocess.PIPE,

tests/test_hf_api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ def test_end_to_end_thresh_16M(self):
561561
class HfApiMiscTest(unittest.TestCase):
562562
def test_repo_type_and_id_from_hf_id(self):
563563
possible_values = {
564+
"https://huggingface.co/id": [None, None, "id"],
564565
"https://huggingface.co/user/id": [None, "user", "id"],
565566
"https://huggingface.co/datasets/user/id": ["dataset", "user", "id"],
566567
"https://huggingface.co/spaces/user/id": ["space", "user", "id"],

tests/test_repository.py

Lines changed: 87 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from huggingface_hub.repository import Repository, is_tracked_with_lfs
2727

2828
from .testing_constants import ENDPOINT_STAGING, PASS, USER
29-
from .testing_utils import set_write_permission_and_retry
29+
from .testing_utils import set_write_permission_and_retry, with_production_testing
3030

3131

3232
REPO_NAME = "repo-{}".format(int(time.time() * 10e3))
@@ -312,35 +312,65 @@ def test_clone_with_repo_name_and_user_namespace(self):
312312
self.assertTrue("model.bin" in files)
313313

314314
def test_clone_with_repo_name_and_no_namespace(self):
315-
clone = Repository(
316-
REPO_NAME,
315+
self.assertRaises(
316+
OSError,
317+
Repository,
318+
f"{WORKING_REPO_DIR}/{REPO_NAME}",
317319
clone_from=REPO_NAME,
318320
use_auth_token=self._token,
319321
git_user="ci",
320322
git_email="[email protected]",
321323
)
322324

323-
with clone.commit("Commit"):
324-
# Create dummy files
325-
# one is lfs-tracked, the other is not.
326-
with open("dummy.txt", "w") as f:
327-
f.write("hello")
328-
with open("model.bin", "w") as f:
329-
f.write("hello")
325+
def test_clone_with_repo_name_user_and_no_auth_token(self):
326+
# Create repo
327+
Repository(
328+
f"{WORKING_REPO_DIR}/{REPO_NAME}",
329+
clone_from=f"{USER}/{REPO_NAME}",
330+
git_user="ci",
331+
git_email="[email protected]",
332+
)
330333

331-
shutil.rmtree(REPO_NAME)
334+
# Instantiate it without token
335+
Repository(
336+
f"{WORKING_REPO_DIR}/{REPO_NAME}",
337+
clone_from=f"{USER}/{REPO_NAME}",
338+
git_user="ci",
339+
git_email="[email protected]",
340+
)
332341

342+
def test_clone_with_repo_name_org_and_no_auth_token(self):
343+
# Create repo
333344
Repository(
334345
f"{WORKING_REPO_DIR}/{REPO_NAME}",
335-
clone_from=REPO_NAME,
336346
use_auth_token=self._token,
347+
clone_from=f"valid_org/{REPO_NAME}",
337348
git_user="ci",
338349
git_email="[email protected]",
339350
)
340351

341-
files = os.listdir(f"{WORKING_REPO_DIR}/{REPO_NAME}")
342-
self.assertTrue("dummy.txt" in files)
343-
self.assertTrue("model.bin" in files)
352+
# Instantiate it without token
353+
Repository(
354+
f"{WORKING_REPO_DIR}/{REPO_NAME}",
355+
clone_from=f"valid_org/{REPO_NAME}",
356+
git_user="ci",
357+
git_email="[email protected]",
358+
)
359+
360+
@with_production_testing
361+
def test_clone_repo_at_root(self):
362+
os.environ["GIT_LFS_SKIP_SMUDGE"] = "1"
363+
Repository(
364+
f"{WORKING_REPO_DIR}/{REPO_NAME}",
365+
clone_from="bert-base-cased",
366+
)
367+
368+
shutil.rmtree(f"{WORKING_REPO_DIR}/{REPO_NAME}")
369+
370+
Repository(
371+
f"{WORKING_REPO_DIR}/{REPO_NAME}",
372+
clone_from="https://huggingface.co/bert-base-cased",
373+
)
344374

345375

346376
class RepositoryAutoLFSTrackingTest(RepositoryCommonTest):
@@ -622,12 +652,15 @@ def tearDown(self):
622652
token=self._token, name=REPO_NAME, repo_type="dataset"
623653
)
624654
except requests.exceptions.HTTPError:
625-
self._api.delete_repo(
626-
token=self._token,
627-
organization="valid_org",
628-
name=REPO_NAME,
629-
repo_type="dataset",
630-
)
655+
try:
656+
self._api.delete_repo(
657+
token=self._token,
658+
organization="valid_org",
659+
name=REPO_NAME,
660+
repo_type="dataset",
661+
)
662+
except requests.exceptions.HTTPError:
663+
pass
631664

632665
shutil.rmtree(
633666
f"{WORKING_DATASET_DIR}/{REPO_NAME}", onerror=set_write_permission_and_retry
@@ -721,7 +754,9 @@ def test_clone_with_repo_name_and_user_namespace(self):
721754
self.assertTrue("test.py" in files)
722755

723756
def test_clone_with_repo_name_and_no_namespace(self):
724-
clone = Repository(
757+
self.assertRaises(
758+
OSError,
759+
Repository,
725760
f"{WORKING_DATASET_DIR}/{REPO_NAME}",
726761
clone_from=REPO_NAME,
727762
repo_type="dataset",
@@ -730,21 +765,42 @@ def test_clone_with_repo_name_and_no_namespace(self):
730765
git_email="[email protected]",
731766
)
732767

733-
with clone.commit("Commit"):
734-
for file in os.listdir(DATASET_FIXTURE):
735-
shutil.copyfile(pathlib.Path(DATASET_FIXTURE) / file, file)
768+
def test_clone_with_repo_name_user_and_no_auth_token(self):
769+
# Create repo
770+
Repository(
771+
f"{WORKING_DATASET_DIR}/{REPO_NAME}",
772+
clone_from=f"{USER}/{REPO_NAME}",
773+
repo_type="dataset",
774+
use_auth_token=self._token,
775+
git_user="ci",
776+
git_email="[email protected]",
777+
)
736778

737-
shutil.rmtree(f"{WORKING_DATASET_DIR}/{REPO_NAME}")
779+
# Instantiate it without token
780+
Repository(
781+
f"{WORKING_DATASET_DIR}/{REPO_NAME}",
782+
clone_from=f"{USER}/{REPO_NAME}",
783+
repo_type="dataset",
784+
git_user="ci",
785+
git_email="[email protected]",
786+
)
738787

788+
def test_clone_with_repo_name_org_and_no_auth_token(self):
789+
# Create repo
739790
Repository(
740791
f"{WORKING_DATASET_DIR}/{REPO_NAME}",
741-
clone_from=REPO_NAME,
742-
use_auth_token=self._token,
792+
clone_from=f"valid_org/{REPO_NAME}",
743793
repo_type="dataset",
794+
use_auth_token=self._token,
744795
git_user="ci",
745796
git_email="[email protected]",
746797
)
747798

748-
files = os.listdir(f"{WORKING_DATASET_DIR}/{REPO_NAME}")
749-
self.assertTrue("some_text.txt" in files)
750-
self.assertTrue("test.py" in files)
799+
# Instantiate it without token
800+
Repository(
801+
f"{WORKING_DATASET_DIR}/{REPO_NAME}",
802+
clone_from=f"valid_org/{REPO_NAME}",
803+
repo_type="dataset",
804+
git_user="ci",
805+
git_email="[email protected]",
806+
)

tests/testing_utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,4 +164,9 @@ def with_production_testing(func):
164164
ENDPOINT_PRODUCTION,
165165
)
166166

167-
return hf_api(file_download(func))
167+
repository = patch(
168+
"huggingface_hub.repository.ENDPOINT",
169+
ENDPOINT_PRODUCTION,
170+
)
171+
172+
return repository(hf_api(file_download(func)))

0 commit comments

Comments
 (0)