Skip to content

Commit 541e428

Browse files
authored
chore: switch data warehouse describe table to cluster (#42198)
1 parent bc4d663 commit 541e428

File tree

2 files changed

+22
-48
lines changed

2 files changed

+22
-48
lines changed

mypy-baseline.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -923,7 +923,6 @@ products/data_warehouse/backend/models/modeling.py:0: error: No overload variant
923923
products/data_warehouse/backend/models/modeling.py:0: error: Value of type "Any | None" is not indexable [index]
924924
products/data_warehouse/backend/models/modeling.py:0: error: Value of type "Any | None" is not indexable [index]
925925
products/data_warehouse/backend/models/ssh_tunnel.py:0: error: Incompatible types in assignment (expression has type "NoEncryption", variable has type "BestAvailableEncryption") [assignment]
926-
products/data_warehouse/backend/models/table.py:0: error: Incompatible return value type (got "dict[str, dict[str, object]]", expected "dict[str, dict[str, str | bool]] | dict[str, str]") [return-value]
927926
products/data_warehouse/backend/models/table.py:0: error: Item "None" of "Any | None" has no attribute "get" [union-attr]
928927
products/data_warehouse/backend/models/table.py:0: error: Item "None" of "Any | None" has no attribute "keys" [union-attr]
929928
products/data_warehouse/backend/models/table.py:0: error: Item "None" of "DataWarehouseCredential | None" has no attribute "access_key" [union-attr]
@@ -934,7 +933,6 @@ products/data_warehouse/backend/models/table.py:0: error: Item "None" of "DataWa
934933
products/data_warehouse/backend/models/table.py:0: error: Item "None" of "DataWarehouseCredential | None" has no attribute "access_secret" [union-attr]
935934
products/data_warehouse/backend/models/table.py:0: error: Item "None" of "DataWarehouseCredential | None" has no attribute "access_secret" [union-attr]
936935
products/data_warehouse/backend/models/table.py:0: error: Item "None" of "DataWarehouseCredential | None" has no attribute "access_secret" [union-attr]
937-
products/data_warehouse/backend/models/table.py:0: error: Subclass of "list[tuple[str, ...]]" and "int" cannot exist: have distinct disjoint bases [unreachable]
938936
products/data_warehouse/backend/models/table.py:0: error: Value of type "Any | None" is not indexable [index]
939937
products/data_warehouse/backend/models/table.py:0: error: Value of type "Any | None" is not indexable [index]
940938
products/data_warehouse/backend/test/utils.py:0: error: Dict entry 1 has incompatible type "str": "str | Mapping[str, str | bool]"; expected "str": "str | bool" [dict-item]

products/data_warehouse/backend/models/table.py

Lines changed: 22 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from django.db.models import Q
1010

1111
import chdb
12-
import structlog
1312

1413
from posthog.schema import DatabaseSerializedFieldType, HogQLQueryModifiers
1514

@@ -185,53 +184,30 @@ def get_columns(
185184
context=placeholder_context,
186185
table_size_mib=self.size_in_s3_mib,
187186
)
188-
logger = structlog.get_logger(__name__)
189-
try:
190-
# chdb hangs in CI during tests
191-
if TEST:
192-
raise Exception()
193187

194-
quoted_placeholders = {k: f"'{v}'" for k, v in placeholder_context.values.items()}
195-
# chdb doesn't support parameterized queries
196-
chdb_query = f"DESCRIBE TABLE (SELECT * FROM {s3_table_func} LIMIT 1)" % quoted_placeholders
188+
tag_queries(team_id=self.team.pk, table_id=self.id, warehouse_query=True, name="describe_wh_table")
197189

198-
# TODO: upgrade chdb once https://github.com/chdb-io/chdb/issues/342 is actually resolved
199-
# See https://github.com/chdb-io/chdb/pull/374 for the fix
200-
chdb_result = chdb.query(chdb_query, output_format="CSV")
201-
reader = csv.reader(StringIO(str(chdb_result)))
202-
result = [tuple(row) for row in reader]
203-
except Exception as chdb_error:
204-
if self._is_suppressed_chdb_error(chdb_error):
205-
logger.debug(chdb_error)
206-
else:
207-
capture_exception(chdb_error)
208-
209-
tag_queries(team_id=self.team.pk, table_id=self.id, warehouse_query=True)
210-
211-
# The cluster is a little broken right now, and so this can intermittently fail.
212-
# See https://posthog.slack.com/archives/C076R4753Q8/p1756901693184169 for context
213-
attempts = 5
214-
for i in range(attempts):
215-
try:
216-
result = sync_execute(
217-
f"""DESCRIBE TABLE (
218-
SELECT *
219-
FROM {s3_table_func}
220-
LIMIT 1
221-
)""",
222-
args=placeholder_context.values,
223-
)
224-
break
225-
except Exception as err:
226-
if i >= attempts - 1:
227-
capture_exception(err)
228-
if safe_expose_ch_error:
229-
self._safe_expose_ch_error(err)
230-
else:
231-
raise
232-
233-
# Pause execution slightly to not overload clickhouse
234-
time.sleep(2**i)
190+
# The cluster is a little broken right now, and so this can intermittently fail.
191+
# See https://posthog.slack.com/archives/C076R4753Q8/p1756901693184169 for context
192+
attempts = 5
193+
result = None
194+
for i in range(attempts):
195+
try:
196+
result = sync_execute(
197+
f"""DESCRIBE TABLE {s3_table_func}""",
198+
args=placeholder_context.values,
199+
)
200+
break
201+
except Exception as err:
202+
if i >= attempts - 1:
203+
capture_exception(err)
204+
if safe_expose_ch_error:
205+
self._safe_expose_ch_error(err)
206+
else:
207+
raise
208+
209+
# Pause execution slightly to not overload clickhouse
210+
time.sleep(2**i)
235211

236212
if result is None or isinstance(result, int):
237213
raise Exception("No columns types provided by clickhouse in get_columns")

0 commit comments

Comments
 (0)