|
16 | 16 | import threading |
17 | 17 | from typing import List, Optional, Sequence |
18 | 18 | import uuid |
| 19 | +import warnings |
19 | 20 |
|
| 21 | +from google.api_core import retry as api_core_retry |
20 | 22 | import google.cloud.bigquery as bigquery |
21 | 23 |
|
22 | 24 | from bigframes import constants |
23 | 25 | import bigframes.core.events |
| 26 | +import bigframes.exceptions as bfe |
24 | 27 | from bigframes.session import temporary_storage |
25 | 28 | import bigframes.session._io.bigquery as bf_io_bigquery |
26 | 29 |
|
27 | 30 | _TEMP_TABLE_ID_FORMAT = "bqdf{date}_{session_id}_{random_id}" |
| 31 | +# UDFs older than this many days are considered stale and will be deleted |
| 32 | +# from the anonymous dataset before creating a new UDF. |
| 33 | +_UDF_CLEANUP_THRESHOLD_DAYS = 3 |
28 | 34 |
|
29 | 35 |
|
30 | 36 | class AnonymousDatasetManager(temporary_storage.TemporaryStorageManager): |
@@ -137,8 +143,46 @@ def generate_unique_resource_id(self) -> bigquery.TableReference: |
137 | 143 | ) |
138 | 144 | return self.dataset.table(table_id) |
139 | 145 |
|
| 146 | + def _cleanup_old_udfs(self): |
| 147 | + """Clean up old UDFs in the anonymous dataset.""" |
| 148 | + dataset = self.dataset |
| 149 | + routines = list(self.bqclient.list_routines(dataset)) |
| 150 | + cleanup_cutoff_time = datetime.datetime.now( |
| 151 | + datetime.timezone.utc |
| 152 | + ) - datetime.timedelta(days=_UDF_CLEANUP_THRESHOLD_DAYS) |
| 153 | + |
| 154 | + for routine in routines: |
| 155 | + if ( |
| 156 | + routine.created < cleanup_cutoff_time |
| 157 | + and routine._properties["routineType"] == "SCALAR_FUNCTION" |
| 158 | + ): |
| 159 | + try: |
| 160 | + self.bqclient.delete_routine( |
| 161 | + routine.reference, |
| 162 | + not_found_ok=True, |
| 163 | + retry=api_core_retry.Retry(timeout=0), |
| 164 | + ) |
| 165 | + except Exception as e: |
| 166 | + msg = bfe.format_message( |
| 167 | + f"Unable to clean this old UDF '{routine.reference}': {e}" |
| 168 | + ) |
| 169 | + warnings.warn(msg, category=bfe.CleanupFailedWarning) |
| 170 | + |
140 | 171 | def close(self): |
141 | 172 | """Delete tables that were created with this session's session_id.""" |
142 | 173 | for table_ref in self._table_ids: |
143 | 174 | self.bqclient.delete_table(table_ref, not_found_ok=True) |
144 | 175 | self._table_ids.clear() |
| 176 | + |
| 177 | + try: |
| 178 | + # Before closing the session, attempt to clean up any uncollected, |
| 179 | + # old Python UDFs residing in the anonymous dataset. These UDFs |
| 180 | + # accumulate over time and can eventually exceed resource limits. |
| 181 | + # See more from b/450913424. |
| 182 | + self._cleanup_old_udfs() |
| 183 | + except Exception as e: |
| 184 | + # Log a warning on the failure, do not interrupt the workflow. |
| 185 | + msg = bfe.format_message( |
| 186 | + f"Failed to clean up the old Python UDFs before closing the session: {e}" |
| 187 | + ) |
| 188 | + warnings.warn(msg, category=bfe.CleanupFailedWarning) |
0 commit comments