Skip to content

Commit 1f78a9e

Browse files
authored
feat: BI-6156 add dt postprocessing to dashsql (#1465)
* add dashsql postprocessing * some fixes * refactor
1 parent 1f483b9 commit 1f78a9e

File tree

5 files changed

+108
-10
lines changed
  • lib
    • dl_api_lib
    • dl_connector_clickhouse/dl_connector_clickhouse_tests/db/api
    • dl_connector_trino/dl_connector_trino_tests/db/api
    • dl_query_processing/dl_query_processing/postprocessing/postprocessors

5 files changed

+108
-10
lines changed

lib/dl_api_lib/dl_api_lib/app/data_api/resources/dashsql.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,22 @@
4949
IncomingDSQLParamType,
5050
IncomingDSQLParamTypeExt,
5151
)
52+
from dl_query_processing.postprocessing.postprocessors.all import (
53+
TYPE_PROCESSORS,
54+
postprocess_array,
55+
)
5256
from dl_query_processing.utils.datetime import parse_datetime
5357

5458

5559
if TYPE_CHECKING:
5660
from dl_constants.types import TJSONLike
5761
from dl_core.data_processing.dashsql import TResultEvents
5862

63+
import logging
64+
65+
66+
LOGGER = logging.getLogger(__name__)
67+
5968

6069
TRowProcessor = Callable[[TRow], TRow]
6170

@@ -180,12 +189,37 @@ def _postprocess_any(cls, value: Any) -> Any:
180189
return value
181190

182191
@classmethod
183-
def _make_postprocess_row(cls, bi_type_names: tuple[str, ...]) -> TRowProcessor:
184-
# Nothing type-specific at the moment
185-
funcs = tuple(cls._postprocess_any for _ in bi_type_names)
192+
def _get_type_processor(cls, bi_type_name: str) -> Callable[[Any], Any]:
193+
bi_type = UserDataType.__members__.get(bi_type_name)
194+
195+
if bi_type is None:
196+
return lambda val: val
186197

187-
def _postprocess_row(row: TRow, funcs: tuple[Callable, ...] = funcs) -> TRow:
188-
return tuple(func(value) for func, value in zip(funcs, row, strict=True))
198+
# Handle datetimetz as datetime (without timezone conversion)
199+
if bi_type == UserDataType.datetimetz:
200+
bi_type = UserDataType.datetime
201+
202+
type_processor = TYPE_PROCESSORS.get(bi_type)
203+
204+
# Return arrays as is (no need to dump), otherwise use the processor
205+
if type_processor is None or type_processor == postprocess_array:
206+
return lambda val: val
207+
208+
return type_processor
209+
210+
@classmethod
211+
def _make_postprocess_row(cls, bi_type_names: tuple[str, ...]) -> TRowProcessor:
212+
type_processors = tuple(cls._get_type_processor(name) for name in bi_type_names)
213+
214+
def _postprocess_row(row: TRow) -> TRow:
215+
result = []
216+
for proc, val in zip(type_processors, row, strict=True):
217+
try:
218+
result.append(cls._postprocess_any(proc(val)))
219+
except (ValueError, TypeError):
220+
LOGGER.warning("Failed to postprocess value %s (processor: %s). Using raw value.", val, proc)
221+
result.append(cls._postprocess_any(val))
222+
return tuple(result)
189223

190224
return _postprocess_row
191225

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,71 @@
1+
import re
2+
3+
from aiohttp.test_utils import TestClient
4+
import pytest
5+
16
from dl_api_lib_testing.connector.dashsql_suite import DefaultDashSQLTestSuite
27
from dl_api_lib_tests.db.base import DefaultApiTestBase
38
from dl_constants.enums import RawSQLLevel
49

510

611
class TestDashSQL(DefaultApiTestBase, DefaultDashSQLTestSuite):
712
raw_sql_level = RawSQLLevel.dashsql
13+
14+
ISO_DATETIME_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$")
15+
16+
@pytest.fixture(scope="class")
17+
def dashsql_datetime_query(self) -> str:
18+
return """select
19+
toDateTime('2023-06-15 14:30:45'),
20+
toDateTime64('2023-06-15 14:30:45.123', 3),
21+
toDateTime('2023-06-15 14:30:45', 'Europe/Moscow'),
22+
toDateTime64('2023-06-15 14:30:45.123', 3, 'UTC'),
23+
toDateTime(1686838245),
24+
cast('2023-06-15 14:30:45' AS DateTime),
25+
now()
26+
"""
27+
28+
@pytest.fixture(scope="class")
29+
def dashsql_array_query(self) -> str:
30+
return "select [1, 2, 3]"
31+
32+
@pytest.mark.asyncio
33+
async def test_postprocess_datetime(
34+
self,
35+
data_api_lowlevel_aiohttp_client: TestClient,
36+
saved_connection_id: str,
37+
dashsql_datetime_query: str,
38+
bi_headers: dict[str, str] | None,
39+
) -> None:
40+
resp = await self.get_dashsql_response(
41+
data_api_aio=data_api_lowlevel_aiohttp_client,
42+
conn_id=saved_connection_id,
43+
query=dashsql_datetime_query,
44+
headers=bi_headers,
45+
)
46+
data = await resp.json()
47+
row_data = data[1]["data"]
48+
for datetime_value in row_data:
49+
assert self.ISO_DATETIME_PATTERN.match(
50+
datetime_value
51+
), f"Value '{datetime_value}' doesn't match ISO 8601 format"
52+
53+
@pytest.mark.asyncio
54+
async def test_postprocess_array(
55+
self,
56+
data_api_lowlevel_aiohttp_client: TestClient,
57+
saved_connection_id: str,
58+
dashsql_array_query: str,
59+
bi_headers: dict[str, str] | None,
60+
) -> None:
61+
"""Test that array values are JSON-dumped to strings."""
62+
resp = await self.get_dashsql_response(
63+
data_api_aio=data_api_lowlevel_aiohttp_client,
64+
conn_id=saved_connection_id,
65+
query=dashsql_array_query,
66+
headers=bi_headers,
67+
)
68+
data = await resp.json()
69+
70+
row_data = data[1]["data"]
71+
assert row_data[0] == [1, 2, 3], "Array should be returned as list"

lib/dl_connector_clickhouse/dl_connector_clickhouse_tests/db/api/test_dashsql.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ async def test_result(self, data_api_lowlevel_aiohttp_client: TestClient, saved_
2727
assert resp_data[0]["data"]["bi_types"] == ["integer", "unsupported", "genericdatetime"]
2828

2929
assert resp_data[0]["data"]["clickhouse_headers"]["X-ClickHouse-Timezone"] == "UTC", resp_data
30-
assert resp_data[1] == {"event": "row", "data": [11, [33, 44], "2020-01-02 03:04:16"]}, resp_data
31-
assert resp_data[2] == {"event": "row", "data": [22, [33, 44], "2020-01-02 03:04:27"]}, resp_data
30+
assert resp_data[1] == {"event": "row", "data": [11, [33, 44], "2020-01-02T03:04:16"]}, resp_data
31+
assert resp_data[2] == {"event": "row", "data": [22, [33, 44], "2020-01-02T03:04:27"]}, resp_data
3232
assert resp_data[-1]["event"] == "footer", resp_data
3333
assert isinstance(resp_data[-1]["data"], dict)
3434

lib/dl_connector_trino/dl_connector_trino_tests/db/api/test_dashsql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ async def test_result_with_params(self, data_api_lowlevel_aiohttp_client: TestCl
135135
True,
136136
False,
137137
"2021-07-19",
138-
"2021-07-19 19:35:43.000000",
138+
"2021-07-19T19:35:43",
139139
"11",
140140
"22",
141141
True,

lib/dl_query_processing/dl_query_processing/postprocessing/postprocessors/all.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def postprocess_array(value: Optional[Iterable[Any]]) -> Optional[Iterable[Optio
4141
return json.dumps(value, ensure_ascii=False) # Frontend fully supports non-ASCII JSON
4242

4343

44-
TYPE_PROCESSORS = {
44+
TYPE_PROCESSORS: dict[UserDataType, Callable[[Any], Any]] = {
4545
UserDataType.datetime: postprocess_datetime,
4646
# parametrized: UserDataType.datetimetz
4747
UserDataType.genericdatetime: postprocess_genericdatetime,
@@ -62,7 +62,7 @@ def get_type_processor(field_type_info: Optional[DetailedType]) -> Callable[[Any
6262
# Basic
6363
result = TYPE_PROCESSORS.get(field_type_info.data_type)
6464
if result is not None:
65-
return result # type: ignore # TODO: fix
65+
return result
6666

6767
# Parmetrized
6868
if field_type_info.data_type == UserDataType.datetimetz:

0 commit comments

Comments
 (0)