Skip to content

Commit a54a7bf

Browse files
danielvdendeandrewmchen
authored andcommitted
Add dbfs cat cli option (#226)
* Add support for dbfs to dbfs copies This commit enables these copies by using a temp file/dir on the local filesystem as a stepping stone. * Add cat option to dbfs cli This allows users to view the contents of a file in a single command, instead of forcing them to download it first, and then use their default shell's cat functionality * add test and fix up * fix lint
1 parent c054386 commit a54a7bf

File tree

3 files changed

+67
-5
lines changed

3 files changed

+67
-5
lines changed

databricks_cli/dbfs/api.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
from base64 import b64encode, b64decode
2525

2626
import os
27+
import shutil
28+
import tempfile
29+
2730
import click
2831

2932
from requests.exceptions import HTTPError
@@ -221,8 +224,40 @@ def cp(self, recursive, overwrite, src, dst, headers=None):
221224
'To use this utility, one of the src or dst must be prefixed '
222225
'with dbfs:/')
223226
elif DbfsPath.is_valid(src) and DbfsPath.is_valid(dst):
224-
error_and_quit('Both paths provided are from the DBFS filesystem. '
225-
'To copy between the DBFS filesystem, you currently must copy the '
226-
'file from DBFS to your local filesystem and then back.')
227+
with TempDir() as temp_dir:
228+
# Always copy to <temp_dir>/temp since this will work no matter if it's a
229+
# recursive or a non-recursive copy.
230+
temp_path = temp_dir.path('temp')
231+
self.cp(recursive, True, src, temp_path)
232+
self.cp(recursive, overwrite, temp_path, dst)
227233
else:
228234
assert False, 'not reached'
235+
236+
def cat(self, src):
237+
with TempDir() as temp_dir:
238+
temp_path = temp_dir.path('temp')
239+
self.cp(False, True, src, temp_path)
240+
with open(temp_path) as f:
241+
click.echo(f.read(), nl=False)
242+
243+
244+
class TempDir(object):
245+
def __init__(self, remove_on_exit=True):
246+
self._dir = None
247+
self._path = None
248+
self._remove = remove_on_exit
249+
250+
def __enter__(self):
251+
self._path = os.path.abspath(tempfile.mkdtemp())
252+
assert os.path.exists(self._path)
253+
return self
254+
255+
def __exit__(self, tp, val, traceback):
256+
if self._remove and os.path.exists(self._path):
257+
shutil.rmtree(self._path)
258+
259+
assert not self._remove or not os.path.exists(self._path)
260+
assert os.path.exists(os.getcwd())
261+
262+
def path(self, *path):
263+
return os.path.join(self._path, *path)

databricks_cli/dbfs/cli.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,7 @@ def cp_cli(api_client, recursive, overwrite, src, dst):
102102
"""
103103
Copy files to and from DBFS.
104104
105-
Note that this function will fail if the src and dst are both on the local filesystem
106-
or if they are both DBFS paths.
105+
Note that this function will fail if the src and dst are both on the local filesystem.
107106
108107
For non-recursive copies, if the dst is a directory, the file will be placed inside the
109108
directory. For example ``dbfs cp dbfs:/apple.txt .`` will create a file at `./apple.txt`.
@@ -146,9 +145,23 @@ def dbfs_group():
146145
pass
147146

148147

148+
@click.command(context_settings=CONTEXT_SETTINGS)
149+
@click.argument('src')
150+
@debug_option
151+
@profile_option
152+
@eat_exceptions
153+
@provide_api_client
154+
def cat_cli(api_client, src):
155+
"""
156+
Show the contents of a file. Does not work for directories.
157+
"""
158+
DbfsApi(api_client).cat(src)
159+
160+
149161
dbfs_group.add_command(configure_cli, name='configure')
150162
dbfs_group.add_command(ls_cli, name='ls')
151163
dbfs_group.add_command(mkdirs_cli, name='mkdirs')
152164
dbfs_group.add_command(rm_cli, name='rm')
153165
dbfs_group.add_command(cp_cli, name='cp')
154166
dbfs_group.add_command(mv_cli, name='mv')
167+
dbfs_group.add_command(cat_cli, name='cat')

tests/dbfs/test_api.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,17 @@ def test_get_file(self, dbfs_api, tmpdir):
151151

152152
with open(test_file_path, 'r') as f:
153153
assert f.read() == 'x'
154+
155+
def test_cat(self, dbfs_api):
156+
dbfs_api.client.get_status.return_value = {
157+
'path': '/test',
158+
'is_dir': False,
159+
'file_size': 1
160+
}
161+
dbfs_api.client.read.return_value = {
162+
'bytes_read': 1,
163+
'data': b64encode(b'a'),
164+
}
165+
with mock.patch('databricks_cli.dbfs.api.click') as click_mock:
166+
dbfs_api.cat('dbfs:/whatever-doesnt-matter')
167+
click_mock.echo.assert_called_with('a', nl=False)

0 commit comments

Comments
 (0)