From 059823d289c88731ad9c2fd6248ceb060610df4b Mon Sep 17 00:00:00 2001 From: David Mateo Date: Wed, 23 Apr 2025 07:40:19 +0200 Subject: [PATCH] fix(athena): ensure thread safety when reading local Athena cache Wrapped the loop in `_LocalMetadataCacheManager.get_queries` with a lock to ensure thread safety when accessing the cache. This prevents potential race conditions in multithreaded environments. --- awswrangler/athena/_cache.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/awswrangler/athena/_cache.py b/awswrangler/athena/_cache.py index 2e9252baf..f01c4e255 100644 --- a/awswrangler/athena/_cache.py +++ b/awswrangler/athena/_cache.py @@ -73,9 +73,10 @@ def sorted_successful_generator(self) -> list["QueryExecutionTypeDef"]: Returns successful DDL and DML queries sorted by query completion time. """ filtered: list["QueryExecutionTypeDef"] = [] - for query in self._cache.values(): - if (query["Status"].get("State") == "SUCCEEDED") and (query.get("StatementType") in ["DDL", "DML"]): - filtered.append(query) + with self._lock: + for query in self._cache.values(): + if (query["Status"].get("State") == "SUCCEEDED") and (query.get("StatementType") in ["DDL", "DML"]): + filtered.append(query) return sorted(filtered, key=lambda e: str(e["Status"]["CompletionDateTime"]), reverse=True) def __contains__(self, key: str) -> bool: