Skip to content

Commit 71a878c

Browse files
committed
[GCP Functions] Retry function creation on failure to improve reliability
1 parent e2b61b8 commit 71a878c

File tree

5 files changed

+16
-19
lines changed

5 files changed

+16
-19
lines changed

lithops/job/job.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,6 @@ def _create_job(
177177
"""
178178
Creates a new Job
179179
"""
180-
global FUNCTION_CACHE
181-
182180
ext_env = {} if extra_env is None else extra_env.copy()
183181
if ext_env:
184182
ext_env = utils.convert_bools_to_string(ext_env)

lithops/serverless/backends/gcp_functions/config.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@
2929
RUNTIME_MEMORY_MAX = 8192 # 8GB
3030
RUNTIME_MEMORY_OPTIONS = {128, 256, 512, 1024, 2048, 4096, 8192}
3131

32-
RETRIES = 5
33-
RETRY_SLEEP = 20
34-
3532
AVAILABLE_PY_RUNTIMES = {
3633
'3.7': 'python37',
3734
'3.8': 'python38',
@@ -49,7 +46,9 @@
4946
'max_workers': 1000,
5047
'worker_processes': 1,
5148
'invoke_pool_threads': 1000,
52-
'trigger': 'pub/sub'
49+
'trigger': 'pub/sub',
50+
'retries': 5,
51+
'retry_sleep': 10,
5352
}
5453

5554
REQUIREMENTS_FILE = """
@@ -103,8 +102,5 @@ def load_config(config_data=None):
103102
if config_data['gcp_functions']['runtime_memory'] > RUNTIME_MEMORY_MAX:
104103
config_data['gcp_functions']['runtime_memory'] = RUNTIME_MEMORY_MAX
105104

106-
config_data['gcp_functions']['retries'] = RETRIES
107-
config_data['gcp_functions']['retry_sleep'] = RETRY_SLEEP
108-
109105
if 'region' not in config_data['gcp']:
110106
config_data['gcp']['region'] = config_data['gcp_functions']['region']

lithops/serverless/backends/gcp_functions/gcp_functions.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,10 +244,19 @@ def _create_function(self, runtime_name, memory, timeout=60):
244244
'failurePolicy': {}
245245
}
246246

247-
operation = self._api_resource.projects().locations().functions().create(
248-
location=self._default_location,
249-
body=cloud_function
250-
).execute(num_retries=self.num_retries)
247+
logger.info(f'Deploying function {function_location}')
248+
for attempt in range(self.num_retries):
249+
try:
250+
operation = self._api_resource.projects().locations().functions().create(
251+
location=self._default_location,
252+
body=cloud_function
253+
).execute()
254+
break
255+
except Exception as e:
256+
if attempt < self.num_retries - 1:
257+
time.sleep(self.retry_sleep)
258+
else:
259+
raise Exception(f"Failed to create Cloud Function after {self.num_retries} attempts.") from e
251260

252261
# Wait until the function is completely deployed
253262
logger.info('Waiting for the function to be deployed')

lithops/standalone/worker.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,6 @@ def notify_task_done(job_key, call_id):
146146

147147

148148
def redis_queue_consumer(pid, work_queue_name, exec_mode, backend):
149-
global worker_threads
150-
151149
worker_threads[pid]['status'] = WorkerStatus.IDLE.value
152150

153151
logger.info(f"Redis consumer process {pid} started")
@@ -213,7 +211,6 @@ def redis_queue_consumer(pid, work_queue_name, exec_mode, backend):
213211
def run_worker():
214212
global redis_client
215213
global budget_keeper
216-
global worker_threads
217214

218215
os.makedirs(LITHOPS_TEMP_DIR, exist_ok=True)
219216

lithops/storage/storage.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -462,9 +462,6 @@ def get_runtime_meta(self, key):
462462
:param runtime: name of the runtime
463463
:return: runtime metadata
464464
"""
465-
466-
global RUNTIME_META_CACHE
467-
468465
path = [RUNTIMES_PREFIX, key + ".meta.json"]
469466
filename_local_path = os.path.join(CACHE_DIR, *path)
470467

0 commit comments

Comments
 (0)