Skip to content

Commit 0449ce4

Browse files
authored
CREATE TABLE AS support for BigQuery (#2487)
* Add CREATE TABLE AS compatibility for BigQuery data source * Fix issue with tabulate when result contains non-ascii characters * Fix formatting of string for tabulate
1 parent 6172589 commit 0449ce4

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
COLUMN,
1717
CONCAT_WS,
1818
COUNT,
19+
CREATE_TABLE_AS_SELECT,
1920
DISTINCT,
2021
LITERAL,
2122
REGEX_LIKE,
@@ -242,3 +243,13 @@ def get_preferred_number_of_rows_for_insert(self) -> int:
242243
def _build_concat_ws_sql(self, concat_ws: CONCAT_WS) -> str:
243244
elements: str = f", '{concat_ws.separator}', ".join(self.build_expression_sql(e) for e in concat_ws.expressions)
244245
return f"CONCAT({elements})"
246+
247+
# Exact copy from Postgres. So we can refactor this once more data sources support this.
248+
def build_create_table_as_select_sql(
249+
self, create_table_as_select: CREATE_TABLE_AS_SELECT, add_semicolon: bool = True
250+
) -> str:
251+
result_sql: str = f"CREATE TABLE {create_table_as_select.fully_qualified_table_name} AS "
252+
result_sql += f"(\n{self.build_select_sql(create_table_as_select.select_elements, add_semicolon=False)})" + (
253+
";" if add_semicolon else ""
254+
)
255+
return result_sql

soda-core/src/soda_core/common/data_source_connection.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,17 @@ def execute_query(self, sql: str, log_query: bool = True) -> QueryResult:
8484
formatted_rows = self.format_rows(rows)
8585
truncated_rows = self.truncate_rows(formatted_rows)
8686
headers = [self._execute_query_get_result_row_column_name(c) for c in cursor.description]
87-
table_text: str = tabulate(
88-
truncated_rows,
89-
headers=headers,
90-
tablefmt="github",
91-
)
87+
# The tabulate can crash if the rows contain non-ASCII characters.
88+
# This is purely for debugging/logging purposes, so we can try/catch this.
89+
try:
90+
table_text: str = tabulate(
91+
truncated_rows,
92+
headers=headers,
93+
tablefmt="github",
94+
)
95+
except UnicodeDecodeError as e:
96+
logger.debug(f"Error formatting rows. These may contain non-ASCII characters. {e}")
97+
table_text = "Error formatting rows. These may contain non-ASCII characters."
9298

9399
logger.debug(
94100
f"SQL query result (max {self.MAX_ROWS} rows, {self.MAX_CHARS_PER_STRING} chars per string):\n{table_text}"

0 commit comments

Comments
 (0)