@@ -103,6 +103,8 @@ def repo_type_and_id_from_hf_id(
103103
104104 - https://huggingface.co/<repo_type>/<namespace>/<repo_id>
105105 - https://huggingface.co/<namespace>/<repo_id>
106+ - hf://<repo_type>/<namespace>/<repo_id>
107+ - hf://<namespace>/<repo_id>
106108 - <repo_type>/<namespace>/<repo_id>
107109 - <namespace>/<repo_id>
108110 - <repo_id>
@@ -112,9 +114,21 @@ def repo_type_and_id_from_hf_id(
112114 Returns:
113115 A tuple with three items: repo_type (`str` or `None`), namespace (`str` or
114116 `None`) and repo_id (`str`).
117+
118+ Raises:
119+ - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
120+ If URL cannot be parsed.
121+ - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
122+ If `repo_type` is unknown.
115123 """
124+ input_hf_id = hf_id
116125 hub_url = re .sub (r"https?://" , "" , hub_url if hub_url is not None else ENDPOINT )
117126 is_hf_url = hub_url in hf_id and "@" not in hf_id
127+
128+ HFFS_PREFIX = "hf://"
129+ if hf_id .startswith (HFFS_PREFIX ): # Remove "hf://" prefix if exists
130+ hf_id = hf_id [len (HFFS_PREFIX ) :]
131+
118132 url_segments = hf_id .split ("/" )
119133 is_hf_id = len (url_segments ) <= 3
120134
@@ -144,9 +158,13 @@ def repo_type_and_id_from_hf_id(
144158 f"Unable to retrieve user and repo ID from the passed HF ID: { hf_id } "
145159 )
146160
161+ # Check if repo type is known (mapping "spaces" => "space" + empty value => `None`)
162+ if repo_type in REPO_TYPES_MAPPING :
163+ repo_type = REPO_TYPES_MAPPING [repo_type ]
164+ if repo_type == "" :
165+ repo_type = None
147166 if repo_type not in REPO_TYPES :
148- assert repo_type is not None , "repo_type `None` do not have mapping"
149- repo_type = REPO_TYPES_MAPPING .get (repo_type )
167+ raise ValueError (f"Unknown `repo_type`: '{ repo_type } ' ('{ input_hf_id } ')" )
150168
151169 return repo_type , namespace , repo_id
152170
@@ -234,12 +252,21 @@ class RepoUrl(str):
234252 >>> RepoUrl('https://huggingface.co/gpt2')
235253 RepoUrl('https://huggingface.co/gpt2', endpoint='https://huggingface.co', repo_type='model', repo_id='gpt2')
236254
237- >>> RepoUrl('https://hub-ci.huggingface.co/dataset/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co')
238- RepoUrl('https://hub-ci.huggingface.co/dataset/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co', repo_type='dataset', repo_id='dummy_user/dummy_dataset')
255+ >>> RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co')
256+ RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co', repo_type='dataset', repo_id='dummy_user/dummy_dataset')
257+
258+ >>> RepoUrl('hf://datasets/my-user/my-dataset')
259+ RepoUrl('hf://datasets/my-user/my-dataset', endpoint='https://huggingface.co', repo_type='dataset', repo_id='user/dataset')
239260
240261 >>> HfApi.create_repo("dummy_model")
241262 RepoUrl('https://huggingface.co/Wauplin/dummy_model', endpoint='https://huggingface.co', repo_type='model', repo_id='Wauplin/dummy_model')
242263 ```
264+
265+ Raises:
266+ - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
267+ If URL cannot be parsed.
268+ - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
269+ If `repo_type` is unknown.
243270 """
244271
245272 def __new__ (cls , url : Any , endpoint : Optional [str ] = None ):
0 commit comments