@@ -642,7 +642,7 @@ def describe_table(
642642
643643
644644@apply_configs
645- def create_ctas_table (
645+ def create_ctas_table ( # pylint: disable=too-many-locals
646646 sql : str ,
647647 database : str ,
648648 ctas_table : Optional [str ] = None ,
@@ -658,8 +658,10 @@ def create_ctas_table(
658658 data_source : Optional [str ] = None ,
659659 encryption : Optional [str ] = None ,
660660 kms_key : Optional [str ] = None ,
661+ categories : Optional [List [str ]] = None ,
662+ wait : bool = False ,
661663 boto3_session : Optional [boto3 .Session ] = None ,
662- ) -> Dict [str , str ]:
664+ ) -> Dict [str , Union [ str , _QueryMetadata ] ]:
663665 """Create a new table populated with the results of a SELECT query.
664666
665667 https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html
@@ -703,13 +705,19 @@ def create_ctas_table(
703705 Valid values: [None, 'SSE_S3', 'SSE_KMS']. Note: 'CSE_KMS' is not supported.
704706 kms_key : str, optional
705707 For SSE-KMS, this is the KMS key ARN or ID.
708+ categories: List[str], optional
709+ List of columns names that should be returned as pandas.Categorical.
710+ Recommended for memory restricted environments.
711+ wait : bool, default False
712+ Whether to wait for the query to finish and return a dictionary with the Query metadata.
706713 boto3_session : Optional[boto3.Session], optional
707714 Boto3 Session. The default boto3 session is used if boto3_session is None.
708715
709716 Returns
710717 -------
711- Dict[str, str]
712- A dictionary with the ID of the query, and the CTAS database and table names
718+ Dict[str, Union[str, _QueryMetadata]]
719+ A dictionary with the the CTAS database and table names.
720+ If `wait` is `False`, the query ID is included, otherwise a Query metadata object is added instead.
713721 """
714722 ctas_table = catalog .sanitize_table_name (ctas_table ) if ctas_table else f"temp_table_{ uuid .uuid4 ().hex } "
715723 ctas_database = ctas_database if ctas_database else database
@@ -753,7 +761,7 @@ def create_ctas_table(
753761 _logger .debug ("ctas sql: %s" , ctas_sql )
754762
755763 try :
756- query_id : str = _start_query_execution (
764+ query_execution_id : str = _start_query_execution (
757765 sql = ctas_sql ,
758766 wg_config = wg_config ,
759767 database = database ,
@@ -775,7 +783,35 @@ def create_ctas_table(
775783 f"It is not possible to wrap this query into a CTAS statement. Root error message: { error ['Message' ]} "
776784 )
777785 raise ex
778- return {"ctas_database" : ctas_database , "ctas_table" : ctas_table , "ctas_query_id" : query_id }
786+
787+ response : Dict [str , Union [str , _QueryMetadata ]] = {"ctas_database" : ctas_database , "ctas_table" : ctas_table }
788+ if wait :
789+ try :
790+ response ["ctas_query_metadata" ] = _get_query_metadata (
791+ query_execution_id = query_execution_id ,
792+ boto3_session = boto3_session ,
793+ categories = categories ,
794+ metadata_cache_manager = _cache_manager ,
795+ )
796+ except exceptions .QueryFailed as ex :
797+ msg : str = str (ex )
798+ if "Column name" in msg and "specified more than once" in msg :
799+ raise exceptions .InvalidCtasApproachQuery (
800+ f"Please, define distinct names for your columns. Root error message: { msg } "
801+ )
802+ if "Column name not specified" in msg :
803+ raise exceptions .InvalidArgumentValue (
804+ "Please, define all columns names in your query. (E.g. 'SELECT MAX(col1) AS max_col1, ...')"
805+ )
806+ if "Column type is unknown" in msg :
807+ raise exceptions .InvalidArgumentValue (
808+ "Please, don't leave undefined columns types in your query. You can cast to ensure it. "
809+ "(E.g. 'SELECT CAST(NULL AS INTEGER) AS MY_COL, ...')"
810+ )
811+ raise ex
812+ else :
813+ response ["ctas_query_id" ] = query_execution_id
814+ return response
779815
780816
781817@apply_configs
0 commit comments