@@ -506,6 +506,7 @@ def read_sql_query( # pylint: disable=too-many-branches,too-many-locals,too-man
506506 max_cache_seconds = max_cache_seconds ,
507507 max_cache_query_inspections = max_cache_query_inspections ,
508508 )
509+ _logger .debug ("cache_info: %s" , cache_info )
509510
510511 if cache_info ["has_valid_cache" ] is True :
511512 _logger .debug ("Valid cache found. Retrieving..." )
@@ -687,6 +688,7 @@ def _resolve_query_with_cache( # pylint: disable=too-many-return-statements
687688 session : Optional [boto3 .Session ],
688689):
689690 """Fetch cached data and return it as a pandas Dataframe (or list of Dataframes)."""
691+ _logger .debug ("cache_info: %s" , cache_info )
690692 if cache_info ["data_type" ] == "parquet" :
691693 manifest_path = cache_info ["query_execution_info" ]["Statistics" ]["DataManifestLocation" ]
692694 # this is needed just so we can access boto's modeled exceptions
@@ -970,7 +972,9 @@ def _prepare_query_string_for_comparison(query_string: str) -> str:
970972 """To use cached data, we need to compare queries. Returns a query string in canonical form."""
971973 # for now this is a simple complete strip, but it could grow into much more sophisticated
972974 # query comparison data structures
973- return "" .join (query_string .split ()).strip ("()" ).lower ()
975+ query_string = "" .join (query_string .split ()).strip ("()" ).lower ()
976+ query_string = query_string [:- 1 ] if query_string .endswith (";" ) is True else query_string
977+ return query_string
974978
975979
976980def _get_last_query_executions (
@@ -983,6 +987,7 @@ def _get_last_query_executions(
983987 args ["WorkGroup" ] = workgroup
984988 paginator = client_athena .get_paginator ("list_query_executions" )
985989 for page in paginator .paginate (** args ):
990+ _logger .debug ("paginating Athena's queries history..." )
986991 query_execution_id_list : List [str ] = page ["QueryExecutionIds" ]
987992 execution_data = client_athena .batch_get_query_execution (QueryExecutionIds = query_execution_id_list )
988993 yield execution_data .get ("QueryExecutions" )
@@ -1026,33 +1031,45 @@ def _check_for_cached_results(
10261031 num_executions_inspected : int = 0
10271032 if max_cache_seconds > 0 : # pylint: disable=too-many-nested-blocks
10281033 current_timestamp = datetime .datetime .now (datetime .timezone .utc )
1029- print (current_timestamp )
10301034 for query_executions in _get_last_query_executions (boto3_session = session , workgroup = workgroup ):
1035+
1036+ _logger .debug ("len(query_executions): %s" , len (query_executions ))
10311037 cached_queries : List [Dict [str , Any ]] = _sort_successful_executions_data (query_executions = query_executions )
10321038 comparable_sql : str = _prepare_query_string_for_comparison (sql )
1039+ _logger .debug ("len(cached_queries): %s" , len (cached_queries ))
10331040
10341041 # this could be mapreduced, but it is only 50 items long, tops
10351042 for query_info in cached_queries :
1036- if (current_timestamp - query_info ["Status" ]["CompletionDateTime" ]).total_seconds () > max_cache_seconds :
1037- break # pragma: no cover
1043+
1044+ query_timestamp : datetime .datetime = query_info ["Status" ]["CompletionDateTime" ]
1045+ _logger .debug ("current_timestamp: %s" , current_timestamp )
1046+ _logger .debug ("query_timestamp: %s" , query_timestamp )
1047+ if (current_timestamp - query_timestamp ).total_seconds () > max_cache_seconds :
1048+ return {"has_valid_cache" : False } # pragma: no cover
10381049
10391050 comparison_query : Optional [str ]
10401051 if query_info ["StatementType" ] == "DDL" and query_info ["Query" ].startswith ("CREATE TABLE" ):
10411052 parsed_query : Optional [str ] = _parse_select_query_from_possible_ctas (query_info ["Query" ])
10421053 if parsed_query is not None :
10431054 comparison_query = _prepare_query_string_for_comparison (query_string = parsed_query )
1055+ _logger .debug ("DDL - comparison_query: %s" , comparison_query )
1056+ _logger .debug ("DDL - comparable_sql: %s" , comparable_sql )
10441057 if comparison_query == comparable_sql :
10451058 data_type = "parquet"
10461059 return {"has_valid_cache" : True , "data_type" : data_type , "query_execution_info" : query_info }
10471060
10481061 elif query_info ["StatementType" ] == "DML" and not query_info ["Query" ].startswith ("INSERT" ):
10491062 comparison_query = _prepare_query_string_for_comparison (query_string = query_info ["Query" ])
1063+ _logger .debug ("DML - comparison_query: %s" , comparison_query )
1064+ _logger .debug ("DML - comparable_sql: %s" , comparable_sql )
10501065 if comparison_query == comparable_sql :
10511066 data_type = "csv"
10521067 return {"has_valid_cache" : True , "data_type" : data_type , "query_execution_info" : query_info }
10531068
10541069 num_executions_inspected += 1
1070+ _logger .debug ("num_executions_inspected: %s" , num_executions_inspected )
1071+ _logger .debug ("max_cache_query_inspections: %s" , max_cache_query_inspections )
10551072 if num_executions_inspected >= max_cache_query_inspections :
1056- break # pragma: no cover
1073+ return { "has_valid_cache" : False } # pragma: no cover
10571074
10581075 return {"has_valid_cache" : False }
0 commit comments