Skip to content

Commit 6eb8113

Browse files
feat(datasets): BI-0 allow null as a connection ref in datasets and export-import w/o conn
1 parent ecbf2ad commit 6eb8113

File tree

16 files changed

+163
-92
lines changed

16 files changed

+163
-92
lines changed

lib/dl_api_client/dl_api_client/dsmaker/api/dataset_api.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -225,16 +225,16 @@ def load_dataset(self, dataset: Dataset) -> HttpDatasetApiResponse:
225225
dataset=dataset,
226226
)
227227

228-
def export_dataset(self, dataset: Dataset, data: dict, bi_headers: dict) -> HttpDatasetApiResponse:
229-
response = self._request(f"/api/v1/datasets/export/{dataset.id}", method="post", data=data, headers=bi_headers)
228+
def export_dataset(self, dataset: Dataset, data: dict, headers: dict) -> HttpDatasetApiResponse:
229+
response = self._request(f"/api/v1/datasets/export/{dataset.id}", method="post", data=data, headers=headers)
230230
return HttpDatasetApiResponse(
231231
json=response.json,
232232
status_code=response.status_code,
233233
dataset=None,
234234
)
235235

236-
def import_dataset(self, data: dict, bi_headers: dict) -> HttpDatasetApiResponse:
237-
response = self._request("/api/v1/datasets/import", method="post", data=data, headers=bi_headers)
236+
def import_dataset(self, data: dict, headers: dict) -> HttpDatasetApiResponse:
237+
response = self._request("/api/v1/datasets/import", method="post", data=data, headers=headers)
238238
return HttpDatasetApiResponse(
239239
json=response.json,
240240
status_code=response.status_code,

lib/dl_api_lib/dl_api_lib/app/control_api/resources/dataset.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
log_dataset_field_stats,
2929
)
3030
from dl_api_lib.enums import USPermissionKind
31-
from dl_api_lib.schemas import main as dl_api_main_schemas
31+
import dl_api_lib.schemas.main
3232
import dl_api_lib.schemas.data
3333
import dl_api_lib.schemas.dataset_base
3434
import dl_api_lib.schemas.validation
@@ -85,9 +85,9 @@ def generate_dataset_location(cls, body: dict) -> EntryLocation:
8585
@put_to_request_context(endpoint_code="DatasetCreate")
8686
@schematic_request(
8787
ns=ns,
88-
body=dl_api_main_schemas.CreateDatasetSchema(),
88+
body=dl_api_lib.schemas.main.CreateDatasetSchema(),
8989
responses={
90-
200: ("Success", dl_api_main_schemas.CreateDatasetResponseSchema()),
90+
200: ("Success", dl_api_lib.schemas.main.CreateDatasetResponseSchema()),
9191
},
9292
)
9393
def post(self, body: dict) -> dict:
@@ -310,7 +310,6 @@ def post(self, dataset_id: str, body: dict) -> dict:
310310

311311
ds, _ = self.get_dataset(dataset_id=dataset_id, body={})
312312
ds_dict = ds.as_dict()
313-
us_manager.load_dependencies(ds)
314313
ds_dict.update(
315314
self.make_dataset_response_data(
316315
dataset=ds, us_entry_buffer=us_manager.get_entry_buffer(), conn_id_mapping=body["id_mapping"]
@@ -322,7 +321,6 @@ def post(self, dataset_id: str, body: dict) -> dict:
322321
ds_dict["dataset"]["name"] = dl_loc.entry_name
323322

324323
ds_dict["dataset"]["revision_id"] = None
325-
del ds_dict["dataset"]["rls"]
326324

327325
notifications = []
328326
localizer = self.get_service_registry().get_localizer()
@@ -350,25 +348,29 @@ def generate_dataset_location(cls, body: dict) -> EntryLocation:
350348

351349
@classmethod
352350
def replace_conn_ids(cls, data: dict, conn_id_mapping: dict) -> None:
351+
if "sources" not in data["dataset"]:
352+
LOGGER.info("There are no sources in the passed dataset data, so nothing to replace")
353+
return
354+
353355
for source in data["dataset"]["sources"]:
354356
assert isinstance(source, dict)
355357
fake_conn_id = source["connection_id"]
356358
if fake_conn_id not in conn_id_mapping:
357359
LOGGER.info(
358-
'Can not find "%s" in conn id mapping for source with id %s, going to replace it with a fake connection',
360+
'Can not find "%s" in conn id mapping for source with id %s, going to replace it with None',
359361
fake_conn_id,
360362
source.get("id"),
361363
)
362-
source["connection_id"] = "0000000000000" # TODO which ID should we use here?
364+
source["connection_id"] = None
363365
else:
364366
source["connection_id"] = conn_id_mapping[fake_conn_id]
365367

366368
@put_to_request_context(endpoint_code="DatasetImport")
367369
@schematic_request(
368370
ns=ns,
369-
body=dl_api_main_schemas.DatasetImportRequestSchema(),
371+
body=dl_api_lib.schemas.main.DatasetImportRequestSchema(),
370372
responses={
371-
200: ("Success", dl_api_main_schemas.ImportResponseSchema()),
373+
200: ("Success", dl_api_lib.schemas.main.ImportResponseSchema()),
372374
},
373375
)
374376
def post(self, body: dict) -> dict:

lib/dl_api_lib/dl_api_lib/app/control_api/resources/dataset_base.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,16 @@ def dump_dataset_data(
146146
origin_dsrc = dsrc_coll.get_strict(role=DataSourceRole.origin)
147147
connection_id = dsrc_coll.get_connection_id(DataSourceRole.origin)
148148
if conn_id_mapping is not None:
149-
try:
149+
if connection_id not in conn_id_mapping:
150+
LOGGER.info(
151+
'Can not find "%s" in conn id mapping for source with id %s, going to replace it with None',
152+
connection_id,
153+
source_id,
154+
)
155+
connection_id = None
156+
else:
150157
connection_id = conn_id_mapping[connection_id]
151-
except KeyError:
152-
raise DatasetExportError(f"Error to find {connection_id} in connection_id_mapping")
158+
153159
sources.append(
154160
{
155161
"id": source_id,

lib/dl_api_lib/dl_api_lib/request_model/data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ class DeleteObligatoryFilterAction(ObligatoryFilterActionBase):
141141

142142
@attr.s(frozen=True, kw_only=True, auto_attribs=True)
143143
class ReplaceConnection:
144-
id: str
144+
id: str | None
145145
new_id: str
146146

147147

lib/dl_api_lib/dl_api_lib/schemas/main.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,11 @@ class DashSQLRequestSchema(BaseSchema):
170170

171171

172172
class IdMappingContentSchema(BaseSchema):
173-
id_mapping = ma_fields.Dict(ma_fields.String(), ma_fields.String(), required=True)
173+
id_mapping = ma_fields.Dict(
174+
ma_fields.String(allow_none=True),
175+
ma_fields.String(allow_none=True),
176+
required=True,
177+
)
174178

175179

176180
class DatasetExportRequestSchema(IdMappingContentSchema):
@@ -185,6 +189,9 @@ class NotificationContentSchema(BaseSchema):
185189

186190
class DatasetExportResponseSchema(BaseSchema):
187191
class DatasetContentInternalExportSchema(DatasetContentInternalSchema):
192+
class Meta(DatasetContentInternalSchema.Meta):
193+
exclude = ("rls",) # not exporting rls at all, only rls2
194+
188195
name = ma_fields.String()
189196

190197
dataset = ma_fields.Nested(DatasetContentInternalExportSchema)

lib/dl_api_lib/dl_api_lib_tests/db/data_api/result/test_errors.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1+
from dl_api_client.dsmaker.api.data_api import SyncHttpDataApiV1
2+
from dl_api_client.dsmaker.api.dataset_api import SyncHttpDatasetApiV1
3+
from dl_api_commons.base_models import DLHeadersCommon
4+
from dl_api_lib.app_settings import ControlApiAppSettings
5+
from dl_core.us_manager.us_manager_sync import SyncUSManager
16
import pytest
27

38
from dl_api_client.dsmaker.primitives import (
9+
Dataset,
410
RequestLegendItem,
511
RequestLegendItemRef,
612
ResultField,
@@ -163,7 +169,52 @@ def test_dataset_with_deleted_connection(self, saved_dataset, saved_connection_i
163169
result_resp = data_api.get_result(dataset=saved_dataset, fields=[saved_dataset.result_schema[0]], fail_ok=True)
164170
assert result_resp.status_code == 400
165171
assert result_resp.bi_status_code == "ERR.DS_API.REFERENCED_ENTRY_NOT_FOUND"
166-
assert result_resp.json["message"] == f"Referenced connection {saved_connection_id} was deleted"
172+
assert result_resp.json["message"] == f"Referenced connection does not exist (connection id: {saved_connection_id})"
173+
174+
def test_dataset_with_null_connection(
175+
self,
176+
saved_dataset: Dataset,
177+
control_api: SyncHttpDatasetApiV1,
178+
control_api_app_settings: ControlApiAppSettings,
179+
data_api: SyncHttpDataApiV1,
180+
sync_us_manager: SyncUSManager,
181+
) -> None:
182+
# the only intended way to create such a dataset is via export-import, so let's create it that way
183+
export_import_headers = {
184+
DLHeadersCommon.US_MASTER_TOKEN.value: control_api_app_settings.US_MASTER_TOKEN,
185+
}
186+
export_req_data = {"id_mapping": {}}
187+
export_resp = control_api.export_dataset(dataset=saved_dataset, data=export_req_data, headers=export_import_headers)
188+
assert export_resp.status_code == 200, export_resp.json
189+
assert export_resp.json["dataset"]["sources"][0]["connection_id"] == None
190+
191+
import_req_data: dict = {
192+
"id_mapping": {},
193+
"data": {"workbook_id": None, "dataset": export_resp.json["dataset"]},
194+
}
195+
import_resp = control_api.import_dataset(data=import_req_data, headers=export_import_headers)
196+
assert import_resp.status_code == 200, f"{import_resp.json} vs {export_resp.json}"
197+
198+
ds = control_api.serial_adapter.load_dataset_from_response_body(Dataset(), export_resp.json)
199+
200+
query = data_api.serial_adapter.make_req_data_get_result(
201+
dataset=None,
202+
fields=[ds.result_schema[0]],
203+
)
204+
headers = {
205+
"Content-Type": "application/json",
206+
}
207+
result_resp = data_api.get_response_for_dataset_result(
208+
dataset_id=import_resp.json["id"],
209+
raw_body=query,
210+
headers=headers,
211+
)
212+
213+
assert result_resp.status_code == 400, result_resp.json
214+
assert result_resp.json["code"] == "ERR.DS_API.REFERENCED_ENTRY_NOT_FOUND"
215+
assert result_resp.json["message"] == "Referenced connection does not exist (connection id: empty)"
216+
217+
control_api.delete_dataset(dataset_id=import_resp.json["id"])
167218

168219

169220
class TestDashSQLErrors(DefaultApiTestBase):

lib/dl_api_lib_testing/dl_api_lib_testing/connector/dataset_suite.py

Lines changed: 52 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from dl_core.us_manager.us_manager_sync import SyncUSManager
1212
from dl_testing.regulated_test import RegulatedTestCase
1313

14+
import pytest
15+
1416

1517
class DefaultConnectorDatasetTestSuite(DatasetTestBase, RegulatedTestCase, metaclass=abc.ABCMeta):
1618
def check_basic_dataset(self, ds: Dataset) -> None:
@@ -88,35 +90,40 @@ def test_replace_connection(
8890
assert dataset.sources
8991
assert all(source.connection_id == new_connection_id for source in dataset.sources)
9092

91-
def test_export_import_dataset(
93+
@pytest.fixture(scope="function")
94+
def export_import_headers(self, control_api_app_settings: ControlApiAppSettings) -> dict[str, str]:
95+
return {
96+
DLHeadersCommon.US_MASTER_TOKEN.value: control_api_app_settings.US_MASTER_TOKEN,
97+
}
98+
99+
def test_export_import_invalid_schema(
92100
self,
93101
control_api: SyncHttpDatasetApiV1,
94-
saved_connection_id: str,
95102
saved_dataset: Dataset,
96-
sync_us_manager: SyncUSManager,
97-
control_api_app_settings: ControlApiAppSettings,
98-
bi_headers: Optional[dict[str, str]],
99-
) -> None:
100-
us_master_token = control_api_app_settings.US_MASTER_TOKEN
101-
assert us_master_token
102-
103-
if bi_headers is None:
104-
bi_headers = dict()
105-
106-
bi_headers[DLHeadersCommon.US_MASTER_TOKEN.value] = us_master_token
103+
export_import_headers: dict[str, str],
104+
):
105+
export_data = dict()
106+
export_resp = control_api.export_dataset(dataset=saved_dataset, data=export_data, headers=export_import_headers)
107+
assert export_resp.status_code == 400, export_resp.json
107108

108-
# test invalid schema
109-
export_data: dict = dict()
110-
export_resp = control_api.export_dataset(saved_dataset, data=export_data, bi_headers=bi_headers)
111-
assert export_resp.status_code == 400
109+
import_data = dict()
110+
import_resp = control_api.import_dataset(data=import_data, headers=export_import_headers)
111+
assert import_resp.status_code == 400, import_resp.json
112112

113-
export_data = {"id_mapping": {}}
114-
export_resp = control_api.export_dataset(saved_dataset, data=export_data, bi_headers=bi_headers)
115-
assert export_resp.status_code == 400
113+
import_data = {"id_mapping": {}}
114+
import_resp = control_api.import_dataset(data=import_data, headers=export_import_headers)
115+
assert import_resp.status_code == 400, import_resp.json
116116

117+
def test_export_import_dataset(
118+
self,
119+
control_api: SyncHttpDatasetApiV1,
120+
saved_connection_id: str,
121+
saved_dataset: Dataset,
122+
export_import_headers: dict[str, str],
123+
) -> None:
117124
# test common export
118125
export_data = {"id_mapping": {saved_connection_id: "conn_id_1"}}
119-
export_resp = control_api.export_dataset(saved_dataset, data=export_data, bi_headers=bi_headers)
126+
export_resp = control_api.export_dataset(dataset=saved_dataset, data=export_data, headers=export_import_headers)
120127
assert export_resp.status_code == 200
121128
assert export_resp.json["dataset"]["sources"][0]["connection_id"] == "conn_id_1"
122129

@@ -125,17 +132,33 @@ def test_export_import_dataset(
125132
"id_mapping": {"conn_id_1": saved_connection_id},
126133
"data": {"workbook_id": None, "dataset": export_resp.json["dataset"]},
127134
}
128-
import_resp = control_api.import_dataset(data=import_data, bi_headers=bi_headers)
129-
assert import_resp.status_code == 200, f"{import_resp.json} != {export_resp.json}"
135+
import_resp = control_api.import_dataset(data=import_data, headers=export_import_headers)
136+
assert import_resp.status_code == 200, f"{import_resp.json} vs {export_resp.json}"
130137

131138
control_api.delete_dataset(dataset_id=import_resp.json["id"])
132139

133-
# test import without a connection
134-
import_without_conn_data: dict = {
135-
"id_mapping": {}, # empty
140+
def test_export_import_dataset_with_no_connection(
141+
self,
142+
control_api: SyncHttpDatasetApiV1,
143+
saved_connection_id: str,
144+
sync_us_manager: SyncUSManager,
145+
saved_dataset: Dataset,
146+
export_import_headers: dict[str, str],
147+
):
148+
sync_us_manager.delete(sync_us_manager.get_by_id(saved_connection_id))
149+
150+
# export with no connection
151+
export_req_data = {"id_mapping": {}}
152+
export_resp = control_api.export_dataset(dataset=saved_dataset, data=export_req_data, headers=export_import_headers)
153+
assert export_resp.status_code == 200, export_resp.json
154+
assert export_resp.json["dataset"]["sources"][0]["connection_id"] == None
155+
156+
# import with no connection
157+
import_req_data: dict = {
158+
"id_mapping": {},
136159
"data": {"workbook_id": None, "dataset": export_resp.json["dataset"]},
137160
}
138-
import_without_conn_resp = control_api.import_dataset(data=import_without_conn_data, bi_headers=bi_headers)
139-
assert import_without_conn_resp.status_code == 200, f"{import_without_conn_resp.json} != {export_resp.json}"
161+
import_resp = control_api.import_dataset(data=import_req_data, headers=export_import_headers)
162+
assert import_resp.status_code == 200, f"{import_resp.json} vs {export_resp.json}"
140163

141-
control_api.delete_dataset(dataset_id=import_without_conn_resp.json["id"])
164+
control_api.delete_dataset(dataset_id=import_resp.json["id"])

lib/dl_core/dl_core/base_models.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,8 @@ class DefaultConnectionRef(ConnectionRef):
3333
conn_id: str = attr.ib(kw_only=True)
3434

3535

36-
@attr.s(frozen=True, slots=True)
37-
class InternalMaterializationConnectionRef(ConnectionRef):
38-
pass
39-
40-
4136
def connection_ref_from_id(connection_id: Optional[str]) -> ConnectionRef:
42-
if connection_id is None:
43-
# TODO REMOVE: some sample source code still relies on mat con ref
44-
return InternalMaterializationConnectionRef()
45-
else:
46-
return DefaultConnectionRef(conn_id=connection_id)
37+
return DefaultConnectionRef(conn_id=connection_id)
4738

4839

4940
@attr.s()

lib/dl_core/dl_core/data_source/base.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -156,17 +156,6 @@ def spec(self) -> DataSourceSpec:
156156
return self._spec
157157

158158
def _validate_connection(self) -> None:
159-
if self._connection is not None and self._spec.connection_ref is None: # type: ignore # TODO: fix
160-
# TODO CONSIDER: extraction of connection ref
161-
pass
162-
elif self._spec.connection_ref is not None and self._connection is None: # type: ignore # TODO: fix
163-
pass
164-
else:
165-
raise ValueError(
166-
f"Unexpected combination of 'connection' and 'connection_ref':"
167-
f" {self._connection} and {self._spec.connection_ref}" # type: ignore # TODO: fix no attribute
168-
)
169-
170159
if self._connection is not None:
171160
self._validate_connection_cls(self._connection)
172161

lib/dl_core/dl_core/data_source/collection.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from dl_core.base_models import (
1212
ConnectionRef,
1313
DefaultConnectionRef,
14-
InternalMaterializationConnectionRef,
1514
)
1615
from dl_core.connection_executors.sync_base import SyncConnExecutorBase
1716
import dl_core.data_source.base as base
@@ -71,8 +70,6 @@ def get_connection_id(self, role: DataSourceRole | None = None) -> str | None:
7170
conn_ref = self.get_strict(role=role).connection_ref
7271
if isinstance(conn_ref, DefaultConnectionRef):
7372
return conn_ref.conn_id
74-
elif isinstance(conn_ref, InternalMaterializationConnectionRef):
75-
return None
7673
else:
7774
raise TypeError(f"Unexpected conn_ref class: {type(conn_ref)}")
7875

0 commit comments

Comments
 (0)