Skip to content

Commit 8497eda

Browse files
committed
dvcfs: use info() instead of metadata()
metadata was introduced before our migration to fsspec and no longer needed now. This is a pre-requisite to using data index in dvcfs/repofs. Note that a bunch of awkward flags like isdvc/isout/etc are still kept, and will be removed in followups.
1 parent dc13179 commit 8497eda

File tree

8 files changed

+273
-390
lines changed

8 files changed

+273
-390
lines changed

dvc/api.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ def get_url(path, repo=None, rev=None, remote=None):
2121
with Repo.open(repo, rev=rev, subrepos=True, uninitialized=True) as _repo:
2222
fs_path = _repo.fs.path.join(_repo.root_dir, path)
2323
with reraise(FileNotFoundError, PathMissingError(path, repo)):
24-
metadata = _repo.repo_fs.metadata(fs_path)
24+
info = _repo.repo_fs.info(fs_path)
2525

26-
if not metadata.is_dvc:
26+
if not info["isdvc"]:
2727
raise OutputNotFoundError(path, repo)
2828

29-
cloud = metadata.repo.cloud
30-
md5 = metadata.repo.dvcfs.info(fs_path)["md5"]
29+
cloud = info["repo"].cloud
30+
md5 = info["repo"].dvcfs.info(fs_path)["md5"]
3131
return cloud.get_url_for(remote, checksum=md5)
3232

3333

dvc/fs/_metadata.py

Lines changed: 0 additions & 73 deletions
This file was deleted.

dvc/fs/dvc.py

Lines changed: 49 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from dvc.utils import relpath
77

88
from ._callback import DEFAULT_CALLBACK
9-
from ._metadata import Metadata
109
from .base import FileSystem
1110

1211
if typing.TYPE_CHECKING:
@@ -106,38 +105,20 @@ def open( # type: ignore
106105

107106
def exists(self, path): # pylint: disable=arguments-renamed
108107
try:
109-
self.metadata(path)
108+
self.info(path)
110109
return True
111110
except FileNotFoundError:
112111
return False
113112

114113
def isdir(self, path): # pylint: disable=arguments-renamed
115114
try:
116-
meta = self.metadata(path)
117-
return meta.isdir
115+
return self.info(path)["type"] == "directory"
118116
except FileNotFoundError:
119117
return False
120118

121-
def check_isdir(self, path, outs):
122-
if len(outs) != 1:
123-
return True
124-
125-
out = outs[0]
126-
if not out.is_dir_checksum:
127-
return out.fs_path != path
128-
if out.fs_path == path:
129-
return True
130-
131-
try:
132-
self._get_granular_hash(path, out)
133-
return False
134-
except FileNotFoundError:
135-
return True
136-
137119
def isfile(self, path): # pylint: disable=arguments-renamed
138120
try:
139-
meta = self.metadata(path)
140-
return meta.isfile
121+
return self.info(path)["type"] == "file"
141122
except FileNotFoundError:
142123
return False
143124

@@ -192,19 +173,19 @@ def walk(self, top, topdown=True, onerror=None, **kwargs):
192173
assert topdown
193174
root = os.path.abspath(top)
194175
try:
195-
meta = self.metadata(root)
176+
info = self.info(root)
196177
except FileNotFoundError:
197178
if onerror is not None:
198179
onerror(FileNotFoundError(top))
199180
return
200181

201-
if not meta.isdir:
182+
if info["type"] != "directory":
202183
if onerror is not None:
203184
onerror(NotADirectoryError(top))
204185
return
205186

206187
trie = Trie()
207-
for out in meta.outs:
188+
for out in info["outs"]:
208189
trie[out.fs.path.parts(out.fs_path)] = out
209190

210191
if out.is_dir_checksum and self.path.isin_or_eq(root, out.fs_path):
@@ -220,40 +201,64 @@ def find(self, path, prefix=None):
220201

221202
def isdvc(self, path, recursive=False, strict=True):
222203
try:
223-
meta = self.metadata(path)
204+
info = self.info(path)
224205
except FileNotFoundError:
225206
return False
226207

227208
recurse = recursive or not strict
228-
return meta.output_exists if recurse else meta.is_output
209+
return bool(info.get("outs") if recurse else info.get("isout"))
229210

230-
def metadata(self, fs_path):
231-
abspath = os.path.abspath(fs_path)
211+
def info(self, path):
212+
abspath = os.path.abspath(path)
232213

233214
try:
234215
outs = self._find_outs(abspath, strict=False, recursive=True)
235216
except OutputNotFoundError as exc:
236217
raise FileNotFoundError from exc
237218

238-
meta = Metadata(fs_path=abspath, outs=outs, repo=self.repo)
239-
meta.isdir = meta.isdir or self.check_isdir(meta.fs_path, meta.outs)
240-
return meta
219+
ret = {
220+
"type": "file",
221+
"outs": outs,
222+
"size": 0,
223+
"isexec": False,
224+
"isdvc": False,
225+
}
241226

242-
def info(self, path):
243-
meta = self.metadata(path)
244-
ret = {"type": "directory" if meta.isdir else "file"}
245-
if meta.is_output and len(meta.outs) == 1 and meta.outs[0].hash_info:
246-
out = meta.outs[0]
227+
if len(outs) > 1:
228+
ret["type"] = "directory"
229+
return ret
230+
231+
out = outs[0]
232+
233+
if not out.hash_info:
234+
ret["isexec"] = out.meta.isexec
235+
return ret
236+
237+
if abspath == out.fs_path:
238+
if out.hash_info.isdir:
239+
ret["type"] = "directory"
247240
ret["size"] = out.meta.size
241+
ret["isexec"] = out.meta.isexec
248242
ret[out.hash_info.name] = out.hash_info.value
249-
elif meta.part_of_output:
250-
(out,) = meta.outs
251-
key = self.path.parts(self.path.relpath(path, out.fs_path))
252-
(obj_meta, oid) = out.obj.trie.get(key) or (None, None)
253-
if oid:
254-
ret["size"] = obj_meta.size if obj_meta else 0
255-
ret[oid.name] = oid.value
243+
ret["isdvc"] = True
244+
ret["isout"] = True
245+
return ret
246+
247+
if out.fs_path.startswith(abspath + self.sep):
248+
ret["type"] = "directory"
249+
return ret
256250

251+
ret["isdvc"] = True
252+
253+
try:
254+
self._get_granular_hash(abspath, out)
255+
except FileNotFoundError:
256+
ret["type"] = "directory"
257+
return ret
258+
259+
key = self.repo.fs.path.relparts(abspath, out.fs_path)
260+
(_, oid) = out.obj.trie.get(key) or (None, None)
261+
ret[oid.name] = oid.value
257262
return ret
258263

259264
def get_file(

dvc/fs/repo.py

Lines changed: 35 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
import errno
21
import logging
32
import os
43
import threading
5-
from contextlib import suppress
64
from itertools import takewhile
75
from typing import TYPE_CHECKING, Callable, Optional, Tuple, Type, Union
86

@@ -66,7 +64,7 @@ def __init__(
6664

6765
self._main_repo = repo
6866
self.hash_jobs = repo.fs.hash_jobs
69-
self.root_dir = repo.root_dir
67+
self.root_dir: str = repo.root_dir
7068
self._traverse_subrepos = subrepos
7169

7270
self._subrepos_trie = PathStringTrie()
@@ -234,11 +232,11 @@ def exists(self, path) -> bool:
234232
return True
235233

236234
try:
237-
meta = dvc_fs.metadata(path)
235+
info = dvc_fs.info(path)
238236
except FileNotFoundError:
239237
return False
240238

241-
for out in meta.outs:
239+
for out in info["outs"]:
242240
if fs.exists(out.fs_path):
243241
return False
244242

@@ -261,15 +259,15 @@ def isdir(self, path): # pylint: disable=arguments-renamed
261259
return False
262260

263261
try:
264-
meta = dvc_fs.metadata(path)
262+
info = dvc_fs.info(path)
265263
except FileNotFoundError:
266264
return False
267265

268-
for out in meta.outs:
266+
for out in info["outs"]:
269267
if fs.exists(out.fs_path):
270268
return False
271269

272-
return meta.isdir
270+
return info["type"] == "directory"
273271

274272
def isdvc(self, path, **kwargs):
275273
_, dvc_fs = self._get_fs_pair(path)
@@ -292,15 +290,16 @@ def isfile(self, path): # pylint: disable=arguments-renamed
292290
return False
293291

294292
try:
295-
meta = dvc_fs.metadata(path)
293+
info = dvc_fs.info(path)
296294
except FileNotFoundError:
297295
return False
298296

299-
(out,) = meta.outs
300-
assert len(meta.outs) == 1
297+
(out,) = info["outs"]
298+
assert len(info["outs"]) == 1
301299
if fs.exists(out.fs_path):
302300
return False
303-
return meta.isfile
301+
302+
return info["type"] == "file"
304303

305304
def _dvc_walk(self, walk):
306305
try:
@@ -455,47 +454,36 @@ def get_file(
455454
from_info, to_file, callback=callback, **kwargs
456455
)
457456

458-
def metadata(self, path):
459-
fs_path = os.path.abspath(path)
460-
fs, dvc_fs = self._get_fs_pair(fs_path)
461-
462-
dvc_meta = None
463-
if dvc_fs:
464-
with suppress(FileNotFoundError):
465-
dvc_meta = dvc_fs.metadata(fs_path)
466-
467-
info_result = None
468-
with suppress(FileNotFoundError):
469-
info_result = fs.info(fs_path)
470-
471-
if not info_result and not dvc_meta:
472-
raise FileNotFoundError(
473-
errno.ENOENT, os.strerror(errno.ENOENT), fs_path
474-
)
475-
476-
from ._metadata import Metadata
477-
478-
meta = dvc_meta or Metadata(
479-
fs_path=fs_path,
480-
repo=self._get_repo(fs_path) or self._main_repo,
481-
)
457+
def info(self, path):
458+
fs, dvc_fs = self._get_fs_pair(path)
482459

483-
isdir = bool(info_result) and info_result["type"] == "directory"
484-
meta.isdir = meta.isdir or isdir
460+
try:
461+
dvc_info = dvc_fs.info(path)
462+
except FileNotFoundError:
463+
dvc_info = None
485464

486-
if not dvc_meta:
465+
try:
487466
from dvc.utils import is_exec
488467

489-
meta.is_exec = bool(info_result) and is_exec(info_result["mode"])
490-
return meta
491-
492-
def info(self, path):
493-
fs, dvc_fs = self._get_fs_pair(path)
468+
fs_info = fs.info(path)
469+
fs_info["repo"] = dvc_fs.repo
470+
fs_info["outs"] = dvc_info.get("outs", None) if dvc_info else None
471+
fs_info["isout"] = (
472+
dvc_info.get("isout", False) if dvc_info else False
473+
)
474+
fs_info["isdvc"] = dvc_info["isdvc"] if dvc_info else False
475+
fs_info["isexec"] = (
476+
dvc_info["isexec"] if dvc_info else is_exec(fs_info["mode"])
477+
)
478+
return fs_info
494479

495-
try:
496-
return fs.info(path)
497480
except FileNotFoundError:
498-
return dvc_fs.info(path)
481+
if not dvc_info:
482+
raise
483+
484+
dvc_info["repo"] = dvc_fs.repo
485+
dvc_info["isdvc"] = True
486+
return dvc_info
499487

500488
def checksum(self, path):
501489
fs, dvc_fs = self._get_fs_pair(path)

0 commit comments

Comments
 (0)