diff --git a/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source.py b/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source.py index 1b9837783..2f2a6ee8f 100644 --- a/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source.py +++ b/soda-bigquery/src/soda_bigquery/common/data_sources/bigquery_data_source.py @@ -16,6 +16,7 @@ COLUMN, CONCAT_WS, COUNT, + CREATE_TABLE_AS_SELECT, DISTINCT, LITERAL, REGEX_LIKE, @@ -242,3 +243,13 @@ def get_preferred_number_of_rows_for_insert(self) -> int: def _build_concat_ws_sql(self, concat_ws: CONCAT_WS) -> str: elements: str = f", '{concat_ws.separator}', ".join(self.build_expression_sql(e) for e in concat_ws.expressions) return f"CONCAT({elements})" + + # Exact copy from Postgres. So we can refactor this once more data sources support this. + def build_create_table_as_select_sql( + self, create_table_as_select: CREATE_TABLE_AS_SELECT, add_semicolon: bool = True + ) -> str: + result_sql: str = f"CREATE TABLE {create_table_as_select.fully_qualified_table_name} AS " + result_sql += f"(\n{self.build_select_sql(create_table_as_select.select_elements, add_semicolon=False)})" + ( + ";" if add_semicolon else "" + ) + return result_sql diff --git a/soda-core/src/soda_core/common/data_source_connection.py b/soda-core/src/soda_core/common/data_source_connection.py index fa5f97083..52d53510b 100644 --- a/soda-core/src/soda_core/common/data_source_connection.py +++ b/soda-core/src/soda_core/common/data_source_connection.py @@ -84,11 +84,17 @@ def execute_query(self, sql: str, log_query: bool = True) -> QueryResult: formatted_rows = self.format_rows(rows) truncated_rows = self.truncate_rows(formatted_rows) headers = [self._execute_query_get_result_row_column_name(c) for c in cursor.description] - table_text: str = tabulate( - truncated_rows, - headers=headers, - tablefmt="github", - ) + # The tabulate can crash if the rows contain non-ASCII characters. + # This is purely for debugging/logging purposes, so we can try/catch this. + try: + table_text: str = tabulate( + truncated_rows, + headers=headers, + tablefmt="github", + ) + except UnicodeDecodeError as e: + logger.debug(f"Error formatting rows. These may contain non-ASCII characters. {e}") + table_text = "Error formatting rows. These may contain non-ASCII characters." logger.debug( f"SQL query result (max {self.MAX_ROWS} rows, {self.MAX_CHARS_PER_STRING} chars per string):\n{table_text}"