Skip to content

Commit e430693

Browse files
committed
dvcfs: use paths relative to the repo root
Currently we use both absolute and relative (depend on cwd) paths, which is terrible for an api, as their interpretation depends on external factors. This PR makes dvcfs accept only paths relative to the repo root for regular outputs and absolute paths for external outputs. Pre-requisite for making repofs use same relative paths.
1 parent e60ab00 commit e430693

File tree

6 files changed

+83
-84
lines changed

6 files changed

+83
-84
lines changed

dvc/api.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ def get_url(path, repo=None, rev=None, remote=None):
2727
raise OutputNotFoundError(path, repo)
2828

2929
cloud = info["repo"].cloud
30-
md5 = info["repo"].dvcfs.info(fs_path)["md5"]
30+
dvc_path = _repo.fs.path.relpath(fs_path, info["repo"].root_dir)
31+
md5 = info["repo"].dvcfs.info(dvc_path)["md5"]
3132
return cloud.get_url_for(remote, checksum=md5)
3233

3334

dvc/fs/dvc.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,18 @@ def config(self):
3232
raise NotImplementedError
3333

3434
def _get_key(self, path):
35+
from dvc.fs.local import LocalFileSystem
36+
3537
from . import get_cloud_fs
3638

3739
cls, kwargs, fs_path = get_cloud_fs(None, url=path)
3840

39-
if not path.startswith(self.repo.root_dir):
41+
if cls != LocalFileSystem or os.path.isabs(path):
4042
fs = cls(**kwargs)
4143
return (cls.scheme, *fs.path.parts(fs_path))
4244

4345
fs_key = "repo"
44-
key = self.path.relparts(path, self.repo.root_dir)
46+
key = self.path.parts(path)
4547
if key == (".",):
4648
key = ()
4749

@@ -132,9 +134,8 @@ def _walk(self, root, topdown=True, **kwargs):
132134

133135
def walk(self, top, topdown=True, onerror=None, **kwargs):
134136
assert topdown
135-
root = os.path.abspath(top)
136137
try:
137-
info = self.info(root)
138+
info = self.info(top)
138139
except FileNotFoundError:
139140
if onerror is not None:
140141
onerror(FileNotFoundError(top))
@@ -145,7 +146,7 @@ def walk(self, top, topdown=True, onerror=None, **kwargs):
145146
onerror(NotADirectoryError(top))
146147
return
147148

148-
yield from self._walk(root, topdown=topdown, **kwargs)
149+
yield from self._walk(top, topdown=topdown, **kwargs)
149150

150151
def find(self, path, prefix=None):
151152
for root, _, files in self.walk(path):
@@ -165,9 +166,7 @@ def isdvc(self, path, recursive=False, strict=True):
165166
def info(self, path):
166167
from dvc.data.meta import Meta
167168

168-
abspath = os.path.abspath(path)
169-
170-
key = self._get_key(abspath)
169+
key = self._get_key(path)
171170

172171
try:
173172
outs = list(self.repo.index.tree.iteritems(key))

dvc/fs/repo.py

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
RepoFactory = Union[Callable[[str], "Repo"], Type["Repo"]]
1919

2020

21+
def _wrap_walk(dvc_fs, *args, **kwargs):
22+
for root, dnames, fnames in dvc_fs.walk(*args, **kwargs):
23+
yield dvc_fs.path.join(dvc_fs.repo.root_dir, root), dnames, fnames
24+
25+
2126
class RepoFileSystem(FileSystem): # pylint:disable=abstract-method
2227
"""DVC + git-tracked files fs.
2328
@@ -191,7 +196,9 @@ def _is_dvc_repo(self, dir_path):
191196
repo_path = os.path.join(dir_path, Repo.DVC_DIR)
192197
return self._main_repo.fs.isdir(repo_path)
193198

194-
def _get_fs_pair(self, path) -> Tuple[FileSystem, Optional[DvcFileSystem]]:
199+
def _get_fs_pair(
200+
self, path
201+
) -> Tuple[FileSystem, Optional[DvcFileSystem], str]:
195202
"""
196203
Returns a pair of fss based on repo the path falls in, using prefix.
197204
"""
@@ -202,27 +209,33 @@ def _get_fs_pair(self, path) -> Tuple[FileSystem, Optional[DvcFileSystem]]:
202209
repo = self._get_repo(path) or self._main_repo
203210

204211
dvc_fs = self._dvcfss.get(repo.root_dir)
205-
return repo.fs, dvc_fs
212+
213+
if path.startswith(repo.root_dir):
214+
dvc_path = path[len(repo.root_dir) + 1 :]
215+
else:
216+
dvc_path = path
217+
218+
return repo.fs, dvc_fs, dvc_path
206219

207220
def open(
208221
self, path, mode="r", encoding="utf-8", **kwargs
209222
): # pylint: disable=arguments-renamed
210223
if "b" in mode:
211224
encoding = None
212225

213-
fs, dvc_fs = self._get_fs_pair(path)
226+
fs, dvc_fs, dvc_path = self._get_fs_pair(path)
214227
try:
215228
return fs.open(path, mode=mode, encoding=encoding)
216229
except FileNotFoundError:
217230
if not dvc_fs:
218231
raise
219232

220-
return dvc_fs.open(path, mode=mode, encoding=encoding, **kwargs)
233+
return dvc_fs.open(dvc_path, mode=mode, encoding=encoding, **kwargs)
221234

222235
def exists(self, path) -> bool:
223236
path = os.path.abspath(path)
224237

225-
fs, dvc_fs = self._get_fs_pair(path)
238+
fs, dvc_fs, dvc_path = self._get_fs_pair(path)
226239

227240
if not dvc_fs:
228241
return fs.exists(path)
@@ -233,7 +246,7 @@ def exists(self, path) -> bool:
233246
if fs.exists(path):
234247
return True
235248

236-
if not dvc_fs.exists(path):
249+
if not dvc_fs.exists(dvc_path):
237250
return False
238251

239252
for p in self.path.parents(path):
@@ -248,7 +261,7 @@ def exists(self, path) -> bool:
248261
def isdir(self, path): # pylint: disable=arguments-renamed
249262
path = os.path.abspath(path)
250263

251-
fs, dvc_fs = self._get_fs_pair(path)
264+
fs, dvc_fs, dvc_path = self._get_fs_pair(path)
252265

253266
if dvc_fs and dvc_fs.repo.dvcignore.is_ignored_dir(path):
254267
return False
@@ -264,7 +277,7 @@ def isdir(self, path): # pylint: disable=arguments-renamed
264277
return False
265278

266279
try:
267-
info = dvc_fs.info(path)
280+
info = dvc_fs.info(dvc_path)
268281
except FileNotFoundError:
269282
return False
270283

@@ -278,13 +291,13 @@ def isdir(self, path): # pylint: disable=arguments-renamed
278291
return info["type"] == "directory"
279292

280293
def isdvc(self, path, **kwargs):
281-
_, dvc_fs = self._get_fs_pair(path)
282-
return dvc_fs is not None and dvc_fs.isdvc(path, **kwargs)
294+
_, dvc_fs, dvc_path = self._get_fs_pair(path)
295+
return dvc_fs is not None and dvc_fs.isdvc(dvc_path, **kwargs)
283296

284297
def isfile(self, path): # pylint: disable=arguments-renamed
285298
path = os.path.abspath(path)
286299

287-
fs, dvc_fs = self._get_fs_pair(path)
300+
fs, dvc_fs, dvc_path = self._get_fs_pair(path)
288301

289302
if dvc_fs and dvc_fs.repo.dvcignore.is_ignored_file(path):
290303
return False
@@ -300,7 +313,7 @@ def isfile(self, path): # pylint: disable=arguments-renamed
300313
return False
301314

302315
try:
303-
info = dvc_fs.info(path)
316+
info = dvc_fs.info(dvc_path)
304317
except FileNotFoundError:
305318
return False
306319

@@ -328,10 +341,10 @@ def _subrepo_walk(self, dir_path, **kwargs):
328341
NOTE: subrepo will only be discovered when walking if
329342
ignore_subrepos is set to False.
330343
"""
331-
fs, dvc_fs = self._get_fs_pair(dir_path)
344+
fs, dvc_fs, dvc_path = self._get_fs_pair(dir_path)
332345
fs_walk = fs.walk(dir_path, topdown=True)
333346
if dvc_fs:
334-
dvc_walk = dvc_fs.walk(dir_path, topdown=True, **kwargs)
347+
dvc_walk = _wrap_walk(dvc_fs, dvc_path, topdown=True, **kwargs)
335348
else:
336349
dvc_walk = None
337350
yield from self._walk(fs_walk, dvc_walk, **kwargs)
@@ -420,26 +433,26 @@ def walk(self, top, topdown=True, onerror=None, **kwargs):
420433
repo = self._get_repo(os.path.abspath(top))
421434
dvcfiles = kwargs.pop("dvcfiles", False)
422435

423-
fs, dvc_fs = self._get_fs_pair(top)
436+
fs, dvc_fs, dvc_path = self._get_fs_pair(top)
424437
repo_exists = fs.exists(top)
425438

426439
repo_walk = repo.dvcignore.walk(
427440
fs, top, topdown=topdown, onerror=onerror, **kwargs
428441
)
429442

430-
if not dvc_fs or (repo_exists and dvc_fs.isdvc(top)):
443+
if not dvc_fs or (repo_exists and dvc_fs.isdvc(dvc_path)):
431444
yield from self._walk(repo_walk, None, dvcfiles=dvcfiles)
432445
return
433446

434447
if not repo_exists:
435-
yield from dvc_fs.walk(
436-
top, topdown=topdown, onerror=onerror, **kwargs
448+
yield from _wrap_walk(
449+
dvc_fs, dvc_path, topdown=topdown, onerror=onerror, **kwargs
437450
)
438451

439452
dvc_walk = None
440-
if dvc_fs.exists(top):
441-
dvc_walk = dvc_fs.walk(
442-
top, topdown=topdown, onerror=onerror, **kwargs
453+
if dvc_fs.exists(dvc_path):
454+
dvc_walk = _wrap_walk(
455+
dvc_fs, dvc_path, topdown=topdown, onerror=onerror, **kwargs
443456
)
444457

445458
yield from self._walk(repo_walk, dvc_walk, dvcfiles=dvcfiles)
@@ -452,7 +465,7 @@ def find(self, path, prefix=None):
452465
def get_file(
453466
self, from_info, to_file, callback=DEFAULT_CALLBACK, **kwargs
454467
):
455-
fs, dvc_fs = self._get_fs_pair(from_info)
468+
fs, dvc_fs, dvc_path = self._get_fs_pair(from_info)
456469
try:
457470
fs.get_file( # pylint: disable=protected-access
458471
from_info, to_file, callback=callback, **kwargs
@@ -463,14 +476,14 @@ def get_file(
463476
raise
464477

465478
dvc_fs.get_file( # pylint: disable=protected-access
466-
from_info, to_file, callback=callback, **kwargs
479+
dvc_path, to_file, callback=callback, **kwargs
467480
)
468481

469482
def info(self, path):
470-
fs, dvc_fs = self._get_fs_pair(path)
483+
fs, dvc_fs, dvc_path = self._get_fs_pair(path)
471484

472485
try:
473-
dvc_info = dvc_fs.info(path)
486+
dvc_info = dvc_fs.info(dvc_path)
474487
except FileNotFoundError:
475488
dvc_info = None
476489

@@ -503,9 +516,9 @@ def info(self, path):
503516
return dvc_info
504517

505518
def checksum(self, path):
506-
fs, dvc_fs = self._get_fs_pair(path)
519+
fs, dvc_fs, dvc_path = self._get_fs_pair(path)
507520

508521
try:
509522
return fs.checksum(path)
510523
except FileNotFoundError:
511-
return dvc_fs.checksum(path)
524+
return dvc_fs.checksum(dvc_path)

tests/func/test_fs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ def test_download_callbacks_on_dvc_git_fs(tmp_dir, dvc, scm, fs_type):
305305

306306
callback = fsspec.Callback()
307307
fs.download_file(
308-
(tmp_dir / "file").fs_path,
308+
"file",
309309
(tmp_dir / "file2").fs_path,
310310
callback=callback,
311311
)
@@ -317,7 +317,7 @@ def test_download_callbacks_on_dvc_git_fs(tmp_dir, dvc, scm, fs_type):
317317

318318
callback = fsspec.Callback()
319319
fs.download(
320-
(tmp_dir / "dir").fs_path,
320+
"dir",
321321
(tmp_dir / "dir2").fs_path,
322322
callback=callback,
323323
)

0 commit comments

Comments
 (0)