3434from databricks_cli .dbfs .dbfs_path import DbfsPath
3535from databricks_cli .dbfs .exceptions import LocalFileExistsException
3636
37- TEST_DBFS_PATH = DbfsPath ('dbfs:/test' )
37+ TEST_DBFS_PATH1 = DbfsPath ('dbfs:/test' )
38+ TEST_DBFS_PATH2 = DbfsPath ('dbfs:/dir/test' )
3839DUMMY_TIME = 1613158406000
39- TEST_FILE_JSON = {
40+ TEST_FILE_JSON1 = {
4041 'path' : '/test' ,
4142 'is_dir' : False ,
4243 'file_size' : 1 ,
4344 'modification_time' : DUMMY_TIME
4445}
45- TEST_FILE_INFO = api .FileInfo (TEST_DBFS_PATH , False , 1 , DUMMY_TIME )
46+ TEST_FILE_JSON2 = {
47+ 'path' : '/dir/test' ,
48+ 'is_dir' : False ,
49+ 'file_size' : 1 ,
50+ 'modification_time' : DUMMY_TIME
51+ }
52+ TEST_DIR_JSON = {
53+ 'path' : '/dir' ,
54+ 'is_dir' : True ,
55+ 'file_size' : 0 ,
56+ 'modification_time' : DUMMY_TIME
57+ }
58+ TEST_FILE_INFO0 = api .FileInfo (TEST_DBFS_PATH1 , False , 1 , DUMMY_TIME )
59+ TEST_FILE_INFO1 = api .FileInfo (TEST_DBFS_PATH2 , False , 1 , DUMMY_TIME )
4660
4761
4862def get_resource_does_not_exist_exception ():
4963 response = requests .Response ()
50- response ._content = ('{"error_code": "' + api .DbfsErrorCodes .RESOURCE_DOES_NOT_EXIST + '"}' ).encode () # NOQA
64+ response ._content = ('{"error_code": "' + api .DbfsErrorCodes .RESOURCE_DOES_NOT_EXIST + '"}' ).encode () # NOQA
5165 return requests .exceptions .HTTPError (response = response )
5266
5367
5468def get_partial_delete_exception (message = "[...] operation has deleted 10 files [...]" ):
5569 response = requests .Response ()
5670 response .status_code = 503
57- response ._content = ('{{"error_code": "{}","message": "{}"}}' .format (api .DbfsErrorCodes .PARTIAL_DELETE , message )).encode () # NOQA
71+ response ._content = ('{{"error_code": "{}","message": "{}"}}' .format (api .DbfsErrorCodes .PARTIAL_DELETE , message )).encode () # NOQA
5872 return requests .exceptions .HTTPError (response = response )
5973
6074
6175class TestFileInfo (object ):
6276 def test_to_row_not_long_form_not_absolute (self ):
63- file_info = api .FileInfo (TEST_DBFS_PATH , False , 1 , DUMMY_TIME )
77+ file_info = api .FileInfo (TEST_DBFS_PATH1 , False , 1 , DUMMY_TIME )
6478 row = file_info .to_row (is_long_form = False , is_absolute = False )
6579 assert len (row ) == 1
66- assert TEST_DBFS_PATH .basename == row [0 ]
80+ assert TEST_DBFS_PATH1 .basename == row [0 ]
6781
6882 def test_to_row_long_form_not_absolute (self ):
69- file_info = api .FileInfo (TEST_DBFS_PATH , False , 1 , DUMMY_TIME )
83+ file_info = api .FileInfo (TEST_DBFS_PATH1 , False , 1 , DUMMY_TIME )
7084 row = file_info .to_row (is_long_form = True , is_absolute = False )
7185 assert len (row ) == 4
7286 assert row [0 ] == 'file'
7387 assert row [1 ] == 1
74- assert TEST_DBFS_PATH .basename == row [2 ]
88+ assert TEST_DBFS_PATH1 .basename == row [2 ]
7589
7690 def test_from_json (self ):
77- file_info = api .FileInfo .from_json (TEST_FILE_JSON )
78- assert file_info .dbfs_path == TEST_DBFS_PATH
91+ file_info = api .FileInfo .from_json (TEST_FILE_JSON1 )
92+ assert file_info .dbfs_path == TEST_DBFS_PATH1
7993 assert not file_info .is_dir
8094 assert file_info .file_size == 1
8195
@@ -89,41 +103,52 @@ def dbfs_api():
89103
90104
91105class TestDbfsApi (object ):
106+ def test_list_files_recursive (self , dbfs_api ):
107+ json = {
108+ 'files' : [TEST_FILE_JSON1 , TEST_DIR_JSON , TEST_FILE_JSON2 ]
109+ }
110+ dbfs_api .client .list .return_value = json
111+ files = dbfs_api .list_files ("dbfs:/" )
112+
113+ assert len (files ) == 2
114+ assert TEST_FILE_INFO0 == files [0 ]
115+ assert TEST_FILE_INFO1 == files [1 ]
116+
92117 def test_list_files_exists (self , dbfs_api ):
93118 json = {
94- 'files' : [TEST_FILE_JSON ]
119+ 'files' : [TEST_FILE_JSON1 ]
95120 }
96121 dbfs_api .client .list .return_value = json
97- files = dbfs_api .list_files (TEST_DBFS_PATH )
122+ files = dbfs_api .list_files (TEST_DBFS_PATH1 , is_recursive = True )
98123
99124 assert len (files ) == 1
100- assert TEST_FILE_INFO == files [0 ]
125+ assert TEST_FILE_INFO0 == files [0 ]
101126
102127 def test_list_files_does_not_exist (self , dbfs_api ):
103128 json = {}
104129 dbfs_api .client .list .return_value = json
105- files = dbfs_api .list_files (TEST_DBFS_PATH )
130+ files = dbfs_api .list_files (TEST_DBFS_PATH1 )
106131
107132 assert len (files ) == 0
108133
109134 def test_file_exists_true (self , dbfs_api ):
110- dbfs_api .client .get_status .return_value = TEST_FILE_JSON
111- assert dbfs_api .file_exists (TEST_DBFS_PATH )
135+ dbfs_api .client .get_status .return_value = TEST_FILE_JSON1
136+ assert dbfs_api .file_exists (TEST_DBFS_PATH1 )
112137
113138 def test_file_exists_false (self , dbfs_api ):
114139 exception = get_resource_does_not_exist_exception ()
115140 dbfs_api .client .get_status = mock .Mock (side_effect = exception )
116- assert not dbfs_api .file_exists (TEST_DBFS_PATH )
141+ assert not dbfs_api .file_exists (TEST_DBFS_PATH1 )
117142
118143 def test_get_status (self , dbfs_api ):
119- dbfs_api .client .get_status .return_value = TEST_FILE_JSON
120- assert dbfs_api .get_status (TEST_DBFS_PATH ) == TEST_FILE_INFO
144+ dbfs_api .client .get_status .return_value = TEST_FILE_JSON1
145+ assert dbfs_api .get_status (TEST_DBFS_PATH1 ) == TEST_FILE_INFO0
121146
122147 def test_get_status_fail (self , dbfs_api ):
123148 exception = get_resource_does_not_exist_exception ()
124149 dbfs_api .client .get_status = mock .Mock (side_effect = exception )
125150 with pytest .raises (exception .__class__ ):
126- dbfs_api .get_status (TEST_DBFS_PATH )
151+ dbfs_api .get_status (TEST_DBFS_PATH1 )
127152
128153 def test_put_file (self , dbfs_api , tmpdir ):
129154 test_file_path = os .path .join (tmpdir .strpath , 'test' )
@@ -133,7 +158,7 @@ def test_put_file(self, dbfs_api, tmpdir):
133158 api_mock = dbfs_api .client
134159 test_handle = 0
135160 api_mock .create .return_value = {'handle' : test_handle }
136- dbfs_api .put_file (test_file_path , TEST_DBFS_PATH , True )
161+ dbfs_api .put_file (test_file_path , TEST_DBFS_PATH1 , True )
137162
138163 # Should not call add-block since file is < 2GB
139164 assert api_mock .add_block .call_count == 0
@@ -148,10 +173,11 @@ def test_put_large_file(self, dbfs_api, tmpdir):
148173 dbfs_api .MULTIPART_UPLOAD_LIMIT = 2
149174 test_handle = 0
150175 api_mock .create .return_value = {'handle' : test_handle }
151- dbfs_api .put_file (test_file_path , TEST_DBFS_PATH , True )
176+ dbfs_api .put_file (test_file_path , TEST_DBFS_PATH1 , True )
152177 assert api_mock .add_block .call_count == 1
153178 assert test_handle == api_mock .add_block .call_args [0 ][0 ]
154- assert b64encode (b'test' ).decode () == api_mock .add_block .call_args [0 ][1 ]
179+ assert b64encode (b'test' ).decode (
180+ ) == api_mock .add_block .call_args [0 ][1 ]
155181 assert api_mock .close .call_count == 1
156182 assert test_handle == api_mock .close .call_args [0 ][0 ]
157183
@@ -160,18 +186,18 @@ def test_get_file_check_overwrite(self, dbfs_api, tmpdir):
160186 with open (test_file_path , 'w' ) as f :
161187 f .write ('test' )
162188 with pytest .raises (LocalFileExistsException ):
163- dbfs_api .get_file (TEST_DBFS_PATH , test_file_path , False )
189+ dbfs_api .get_file (TEST_DBFS_PATH1 , test_file_path , False )
164190
165191 def test_get_file (self , dbfs_api , tmpdir ):
166192 api_mock = dbfs_api .client
167- api_mock .get_status .return_value = TEST_FILE_JSON
193+ api_mock .get_status .return_value = TEST_FILE_JSON1
168194 api_mock .read .return_value = {
169195 'bytes_read' : 1 ,
170196 'data' : b64encode (b'x' ),
171197 }
172198
173199 test_file_path = os .path .join (tmpdir .strpath , 'test' )
174- dbfs_api .get_file (TEST_DBFS_PATH , test_file_path , True )
200+ dbfs_api .get_file (TEST_DBFS_PATH1 , test_file_path , True )
175201
176202 with open (test_file_path , 'r' ) as f :
177203 assert f .read () == 'x'
@@ -194,19 +220,23 @@ def test_cat(self, dbfs_api):
194220 def test_partial_delete (self , dbfs_api ):
195221 e_partial_delete = get_partial_delete_exception ()
196222 # Simulate 3 partial deletes followed by a full successful delete
197- exception_sequence = [e_partial_delete , e_partial_delete , e_partial_delete , None ]
223+ exception_sequence = [e_partial_delete ,
224+ e_partial_delete , e_partial_delete , None ]
198225 dbfs_api .client .delete = mock .Mock (side_effect = exception_sequence )
199226 dbfs_api .delete_retry_delay_millis = 1
200227 # Should succeed
201- dbfs_api .delete (DbfsPath ('dbfs:/whatever-doesnt-matter' ), recursive = True )
228+ dbfs_api .delete (
229+ DbfsPath ('dbfs:/whatever-doesnt-matter' ), recursive = True )
202230
203231 def test_partial_delete_exception_message_parse_error (self , dbfs_api ):
204232 message = "unexpected partial delete exception message"
205233 e_partial_delete = get_partial_delete_exception (message )
206- dbfs_api .client .delete = mock .Mock (side_effect = [e_partial_delete , None ])
234+ dbfs_api .client .delete = mock .Mock (
235+ side_effect = [e_partial_delete , None ])
207236 dbfs_api .delete_retry_delay_millis = 1
208237 # Should succeed
209- dbfs_api .delete (DbfsPath ('dbfs:/whatever-doesnt-matter' ), recursive = True )
238+ dbfs_api .delete (
239+ DbfsPath ('dbfs:/whatever-doesnt-matter' ), recursive = True )
210240
211241 def test_get_num_files_deleted (self ):
212242 e_partial_delete = get_partial_delete_exception ()
0 commit comments