Skip to content

Commit 1d5373d

Browse files
authored
feat: cleanup temp resources with graceful session shutdown (#1068)
* chore: improve error handling, redefine recency to 6 hours * make recency cutoff a script param * add automatic cleanup in session and test build script * move cleanup to a nox session * run cleanup nox session only with doctest as doctest finishes faster
1 parent c5eccad commit 1d5373d

File tree

5 files changed

+64
-12
lines changed

5 files changed

+64
-12
lines changed

.kokoro/continuous/doctest.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Only run this nox session.
44
env_vars: {
55
key: "NOX_SESSION"
6-
value: "doctest"
6+
value: "doctest cleanup"
77
}
88

99
env_vars: {

.kokoro/presubmit/doctest.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Only run this nox session.
44
env_vars: {
55
key: "NOX_SESSION"
6-
value: "doctest"
6+
value: "doctest cleanup"
77
}
88

99
env_vars: {

bigframes/session/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,10 @@ def __init__(
274274
metrics=self._metrics,
275275
)
276276

277+
def __del__(self):
278+
"""Automatic cleanup of internal resources"""
279+
self.close()
280+
277281
@property
278282
def bqclient(self):
279283
return self._clients_provider.bqclient

noxfile.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,8 @@ def system_prerelease(session: nox.sessions.Session):
697697

698698
@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS)
699699
def notebook(session: nox.Session):
700-
GOOGLE_CLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT")
701-
if not GOOGLE_CLOUD_PROJECT:
700+
google_cloud_project = os.getenv("GOOGLE_CLOUD_PROJECT")
701+
if not google_cloud_project:
702702
session.error(
703703
"Set GOOGLE_CLOUD_PROJECT environment variable to run notebook session."
704704
)
@@ -937,3 +937,31 @@ def release_dry_run(session):
937937
):
938938
env["PROJECT_ROOT"] = "."
939939
session.run(".kokoro/release-nightly.sh", "--dry-run", env=env)
940+
941+
942+
@nox.session(python=DEFAULT_PYTHON_VERSION)
943+
def cleanup(session):
944+
"""Clean up stale and/or temporary resources in the test project."""
945+
google_cloud_project = os.getenv("GOOGLE_CLOUD_PROJECT")
946+
if not google_cloud_project:
947+
session.error(
948+
"Set GOOGLE_CLOUD_PROJECT environment variable to run notebook session."
949+
)
950+
951+
# Cleanup a few stale (more than 12 hours old) temporary cloud run
952+
# functions created by bigframems. This will help keeping the test GCP
953+
# project within the "Number of functions" quota
954+
# https://cloud.google.com/functions/quotas#resource_limits
955+
recency_cutoff_hours = 12
956+
cleanup_count_per_location = 10
957+
958+
session.install("-e", ".")
959+
960+
session.run(
961+
"python",
962+
"scripts/manage_cloud_functions.py",
963+
f"--project-id={google_cloud_project}",
964+
f"--recency-cutoff={recency_cutoff_hours}",
965+
"cleanup",
966+
f"--number={cleanup_count_per_location}",
967+
)

scripts/manage_cloud_functions.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414

1515
import argparse
16-
from datetime import datetime
16+
import datetime as dt
1717
import sys
1818
import time
1919

@@ -94,8 +94,10 @@ def summarize_gcfs(args):
9494
# Count how many GCFs are newer than a day
9595
recent = 0
9696
for f in functions:
97-
age = datetime.now() - datetime.fromtimestamp(f.update_time.timestamp())
98-
if age.days <= 0:
97+
age = dt.datetime.now() - dt.datetime.fromtimestamp(
98+
f.update_time.timestamp()
99+
)
100+
if age.total_seconds() < args.recency_cutoff:
99101
recent += 1
100102

101103
region_counts[region] = (functions_count, recent)
@@ -106,7 +108,7 @@ def summarize_gcfs(args):
106108
region = item[0]
107109
count, recent = item[1]
108110
print(
109-
"{}: Total={}, Recent={}, OlderThanADay={}".format(
111+
"{}: Total={}, Recent={}, Older={}".format(
110112
region, count, recent, count - recent
111113
)
112114
)
@@ -120,8 +122,10 @@ def cleanup_gcfs(args):
120122
functions = get_bigframes_functions(args.project_id, region)
121123
count = 0
122124
for f in functions:
123-
age = datetime.now() - datetime.fromtimestamp(f.update_time.timestamp())
124-
if age.days > 0:
125+
age = dt.datetime.now() - dt.datetime.fromtimestamp(
126+
f.update_time.timestamp()
127+
)
128+
if age.total_seconds() >= args.recency_cutoff:
125129
try:
126130
count += 1
127131
GCF_CLIENT.delete_function(name=f.name)
@@ -134,12 +138,15 @@ def cleanup_gcfs(args):
134138
# that for this clean-up, i.e. 6 mutations per minute. So wait for
135139
# 60/6 = 10 seconds
136140
time.sleep(10)
141+
except google.api_core.exceptions.NotFound:
142+
# Most likely the function was deleted otherwise
143+
pass
137144
except google.api_core.exceptions.ResourceExhausted:
138145
# Stop deleting in this region for now
139146
print(
140-
f"Cannot delete any more functions in region {region} due to quota exhaustion. Please try again later."
147+
f"Failed to delete function in region {region} due to quota exhaustion. Pausing for 2 minutes."
141148
)
142-
break
149+
time.sleep(120)
143150

144151

145152
def list_str(values):
@@ -168,6 +175,19 @@ def list_str(values):
168175
help="Cloud functions region(s). If multiple regions, Specify comma separated (e.g. region1,region2)",
169176
)
170177

178+
def hours_to_timedelta(hrs):
179+
return dt.timedelta(hours=int(hrs)).total_seconds()
180+
181+
parser.add_argument(
182+
"-c",
183+
"--recency-cutoff",
184+
type=hours_to_timedelta,
185+
required=False,
186+
default=hours_to_timedelta("24"),
187+
action="store",
188+
help="Number of hours, cloud functions older than which should be considered stale (worthy of cleanup).",
189+
)
190+
171191
subparsers = parser.add_subparsers(title="subcommands", required=True)
172192
parser_summary = subparsers.add_parser(
173193
"summary",

0 commit comments

Comments
 (0)