11import pytest
22from unittest .mock import patch , MagicMock
33from pyspark .sql import DataFrame
4+ import uuid
45
56from dataworkbench .storage import DeltaStorage
67from dataworkbench .datacatalogue import DataCatalogue
78from dataworkbench .gateway import Gateway
89
10+ from requests .exceptions import RequestException
11+
912
1013@pytest .fixture
1114def mock_dependencies ():
@@ -17,6 +20,15 @@ def mock_dependencies():
1720 datacatalogue = DataCatalogue ()
1821 return datacatalogue , MockStorage .return_value , MockGateway .return_value
1922
23+ @pytest .fixture
24+ def storage_handler ():
25+ handler = DataCatalogue ()
26+ handler .storage = MagicMock ()
27+ handler ._DataCatalogue__build_storage_table_root_url = MagicMock ()
28+
29+ return handler
30+
31+
2032@patch .object (DeltaStorage , "write" , return_value = "mock_write_success" )
2133@patch .object (Gateway , "import_dataset" , return_value = "mock_datacatalog_success" )
2234def test_save_dataset (mock_write , mock_gateway_import , mock_dependencies ):
@@ -32,3 +44,129 @@ def test_save_dataset(mock_write, mock_gateway_import, mock_dependencies):
3244 assert result == "mock_datacatalog_success"
3345 mock_write .assert_called_once ()
3446 mock_gateway_import .assert_called_once ()
47+
48+
49+ @pytest .mark .parametrize ("folder_id" , ["" , 123 , "5f69754e-37a0-431b-aa3e-3f5e361017fa" ])
50+ def test_invalid_folder_id_build_storage_table_root_url (mock_dependencies , folder_id ):
51+ datacatalogue , _ , _ = mock_dependencies
52+ with pytest .raises (TypeError ):
53+ datacatalogue ._DataCatalogue__build_storage_table_root_url (folder_id )
54+
55+
56+
57+ def test_save_dataset_invalid_df (mock_dependencies ):
58+ datacatalogue , _ , _ = mock_dependencies
59+ df = "a string"
60+ with pytest .raises (TypeError ):
61+ datacatalogue .save (df , "name" , "description" )
62+
63+
64+ @pytest .mark .parametrize ("name" , ["" , 123 ])
65+ def test_save_dataset_invalid_name (mock_dependencies , name ):
66+ datacatalogue , _ , _ = mock_dependencies
67+ with pytest .raises (TypeError ):
68+ datacatalogue .save (
69+ df = MagicMock (spec = DataFrame ),
70+ dataset_name = name ,
71+ dataset_description = "test description"
72+ )
73+
74+ def test_save_dataset_invalid_description (mock_dependencies ):
75+ datacatalogue , _ , _ = mock_dependencies
76+ description = 123
77+ with pytest .raises (TypeError ):
78+ datacatalogue .save (
79+ df = MagicMock (spec = DataFrame ),
80+ dataset_name = "name" ,
81+ dataset_description = description
82+ )
83+
84+ @pytest .mark .parametrize ("tags" , ["tags: test" , "{tags: test}" , 123 ])
85+ def test_save_dataset_invalid_tags (mock_dependencies , tags ):
86+ datacatalogue , _ , _ = mock_dependencies
87+ with pytest .raises (TypeError ):
88+ datacatalogue .save (
89+ df = MagicMock (spec = DataFrame ),
90+ dataset_name = "name" ,
91+ dataset_description = "description" ,
92+ tags = tags
93+ )
94+
95+
96+ def test_save_gateway_failure_triggers_rollback (mock_dependencies , storage_handler ):
97+
98+ folder_id = uuid .uuid4 ()
99+ target_path = f".../{ folder_id } "
100+ datacatalogue , _ , _ = mock_dependencies
101+
102+ datacatalogue .gateway .import_dataset = MagicMock ()
103+ datacatalogue .gateway .import_dataset .side_effect = RequestException ()
104+
105+ storage_handler ._DataCatalogue__build_storage_table_root_url .return_value = target_path
106+
107+ datacatalogue ._rollback_write = MagicMock ()
108+
109+ result = datacatalogue .save (
110+ df = MagicMock (spec = DataFrame ),
111+ dataset_name = "name" ,
112+ dataset_description = "description"
113+ )
114+
115+ assert "error" in result
116+ assert "error_type" in result
117+
118+ datacatalogue .gateway .import_dataset .assert_called_once ()
119+ datacatalogue ._rollback_write .assert_called_once ()
120+
121+
122+ def test_save_gateway_failure_and_rollback_fails (mock_dependencies , storage_handler ):
123+ folder_id = uuid .uuid4 ()
124+ target_path = f".../{ folder_id } "
125+ datacatalogue , _ , _ = mock_dependencies
126+
127+ datacatalogue .gateway .import_dataset = MagicMock ()
128+ datacatalogue .gateway .import_dataset .side_effect = RequestException ()
129+
130+ storage_handler ._DataCatalogue__build_storage_table_root_url .return_value = target_path
131+
132+ datacatalogue ._rollback_write = MagicMock ()
133+ error_msg = "some type of error"
134+ datacatalogue ._rollback_write .side_effect = RuntimeError (error_msg )
135+
136+ result = datacatalogue .save (
137+ df = MagicMock (spec = DataFrame ),
138+ dataset_name = "name" ,
139+ dataset_description = "description"
140+ )
141+
142+ assert "error" in result
143+ assert "error_type" in result
144+
145+ assert result ["error_type" ] == "RuntimeError"
146+ assert result ["error" ] == error_msg
147+
148+ datacatalogue .gateway .import_dataset .assert_called_once ()
149+ datacatalogue ._rollback_write .assert_called_once ()
150+
151+
152+ def test_rollback_write_success (storage_handler ):
153+ folder_id = uuid .uuid4 ()
154+ target_path = f".../{ folder_id } "
155+ storage_handler ._DataCatalogue__build_storage_table_root_url .return_value = target_path
156+
157+ storage_handler ._rollback_write (folder_id )
158+ storage_handler .storage .delete .assert_called_once_with (target_path , recursive = True )
159+
160+
161+
162+ def test_rollback_write_delete_fails_logs_error (storage_handler ):
163+ folder_id = uuid .uuid4 ()
164+ target_path = f".../{ folder_id } "
165+ storage_handler ._DataCatalogue__build_storage_table_root_url .return_value = target_path
166+
167+ storage_handler .storage .delete .side_effect = Exception ()
168+
169+ with pytest .raises (Exception ):
170+ storage_handler ._rollback_write (folder_id )
171+
172+ storage_handler .storage .delete .assert_called_once_with (target_path , recursive = True )
0 commit comments