Skip to content

Commit 76df88d

Browse files
authored
Merge pull request #1729 from GSA/main
05/22/2025 Production Deploy
2 parents 25886cc + d96b665 commit 76df88d

File tree

10 files changed

+335
-322
lines changed

10 files changed

+335
-322
lines changed

.github/dependabot.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,7 @@ updates:
1111
interval: "daily"
1212
labels:
1313
- "dependabot" # Custom label to identify Dependabot PRs
14+
assignees:
15+
- "alexjanousekGSA"
16+
reviewers:
17+
- "alexjanousekGSA"

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ If you don't have a line for your `$PATH` environment variable, add it in like
221221
this, which will include the PostgreSQL binaries:
222222

223223
```
224-
export PATH="/opt/homebrew/opt/postgresql@15/bin:$PATH
224+
export PATH="/opt/homebrew/opt/postgresql@15/bin:$PATH"
225225
```
226226

227227
_NOTE: You don't want to overwrite your existing `$PATH` environment variable! Hence the reason why it is included on the end like this; paths are separated by a colon._

app/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import time
66
import uuid
77
from contextlib import contextmanager
8-
from multiprocessing import Manager
8+
from threading import Lock
99
from time import monotonic
1010

1111
from celery import Celery, Task, current_task
@@ -32,6 +32,9 @@
3232
from notifications_utils.clients.redis.redis_client import RedisClient
3333
from notifications_utils.clients.zendesk.zendesk_client import ZendeskClient
3434

35+
job_cache = {}
36+
job_cache_lock = Lock()
37+
3538

3639
class NotifyCelery(Celery):
3740
def init_app(self, app):
@@ -152,9 +155,6 @@ def create_app(application):
152155
redis_store.init_app(application)
153156
document_download_client.init_app(application)
154157

155-
manager = Manager()
156-
application.config["job_cache"] = manager.dict()
157-
158158
register_blueprint(application)
159159

160160
# avoid circular imports by importing this file later

app/aws/s3.py

Lines changed: 53 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
import datetime
33
import re
44
import time
5-
from concurrent.futures import ThreadPoolExecutor
65
from io import StringIO
76

87
import botocore
98
import eventlet
109
from boto3 import Session
1110
from flask import current_app
1211

12+
from app import job_cache, job_cache_lock
1313
from app.clients import AWS_CLIENT_CONFIG
1414
from notifications_utils import aware_utcnow
1515

@@ -25,31 +25,33 @@
2525
s3_resource = None
2626

2727

28+
def get_service_id_from_key(key):
29+
key = key.replace("service-", "")
30+
key = key.split("/")
31+
key = key[0].replace("-notify", "")
32+
return key
33+
34+
2835
def set_job_cache(key, value):
29-
current_app.logger.debug(f"Setting {key} in the job_cache.")
30-
job_cache = current_app.config["job_cache"]
31-
job_cache[key] = (value, time.time() + 8 * 24 * 60 * 60)
36+
# current_app.logger.debug(f"Setting {key} in the job_cache to {value}.")
37+
38+
with job_cache_lock:
39+
job_cache[key] = (value, time.time() + 8 * 24 * 60 * 60)
3240

3341

3442
def get_job_cache(key):
35-
job_cache = current_app.config["job_cache"]
43+
3644
ret = job_cache.get(key)
37-
if ret is None:
38-
current_app.logger.warning(f"Could not find {key} in the job_cache.")
39-
else:
40-
current_app.logger.debug(f"Got {key} from job_cache.")
4145
return ret
4246

4347

4448
def len_job_cache():
45-
job_cache = current_app.config["job_cache"]
4649
ret = len(job_cache)
4750
current_app.logger.debug(f"Length of job_cache is {ret}")
4851
return ret
4952

5053

5154
def clean_cache():
52-
job_cache = current_app.config["job_cache"]
5355
current_time = time.time()
5456
keys_to_delete = []
5557
for key, (_, expiry_time) in job_cache.items():
@@ -59,8 +61,9 @@ def clean_cache():
5961
current_app.logger.debug(
6062
f"Deleting the following keys from the job_cache: {keys_to_delete}"
6163
)
62-
for key in keys_to_delete:
63-
del job_cache[key]
64+
with job_cache_lock:
65+
for key in keys_to_delete:
66+
del job_cache[key]
6467

6568

6669
def get_s3_client():
@@ -74,7 +77,7 @@ def get_s3_client():
7477
aws_secret_access_key=secret_key,
7578
region_name=region,
7679
)
77-
s3_client = session.client("s3")
80+
s3_client = session.client("s3", config=AWS_CLIENT_CONFIG)
7881
return s3_client
7982

8083

@@ -185,23 +188,24 @@ def read_s3_file(bucket_name, object_key, s3res):
185188
"""
186189
try:
187190
job_id = get_job_id_from_s3_object_key(object_key)
191+
service_id = get_service_id_from_key(object_key)
192+
188193
if get_job_cache(job_id) is None:
189-
object = (
194+
job = (
190195
s3res.Object(bucket_name, object_key)
191196
.get()["Body"]
192197
.read()
193198
.decode("utf-8")
194199
)
195-
set_job_cache(job_id, object)
196-
set_job_cache(f"{job_id}_phones", extract_phones(object))
200+
set_job_cache(job_id, job)
201+
set_job_cache(f"{job_id}_phones", extract_phones(job, service_id, job_id))
197202
set_job_cache(
198203
f"{job_id}_personalisation",
199-
extract_personalisation(object),
204+
extract_personalisation(job),
200205
)
201206

202-
except LookupError:
203-
# perhaps our key is not formatted as we expected. If so skip it.
204-
current_app.logger.exception("LookupError #notify-debug-admin-1200")
207+
except Exception as e:
208+
current_app.logger.exception(str(e))
205209

206210

207211
def get_s3_files():
@@ -216,11 +220,21 @@ def get_s3_files():
216220
current_app.logger.info(
217221
f"job_cache length before regen: {len_job_cache()} #notify-debug-admin-1200"
218222
)
223+
count = 0
219224
try:
220-
with ThreadPoolExecutor() as executor:
221-
executor.map(lambda key: read_s3_file(bucket_name, key, s3res), object_keys)
225+
for object_key in object_keys:
226+
read_s3_file(bucket_name, object_key, s3res)
227+
count = count + 1
228+
eventlet.sleep(0.2)
222229
except Exception:
223-
current_app.logger.exception("Connection pool issue")
230+
current_app.logger.exception(
231+
f"Trouble reading {object_key} which is # {count} during cache regeneration"
232+
)
233+
except OSError as e:
234+
current_app.logger.exception(
235+
f"Egress proxy issue reading {object_key} which is # {count}"
236+
)
237+
raise e
224238

225239
current_app.logger.info(
226240
f"job_cache length after regen: {len_job_cache()} #notify-debug-admin-1200"
@@ -290,9 +304,7 @@ def file_exists(file_location):
290304

291305

292306
def get_job_location(service_id, job_id):
293-
current_app.logger.debug(
294-
f"#notify-debug-s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
295-
)
307+
296308
return (
297309
current_app.config["CSV_UPLOAD_BUCKET"]["bucket"],
298310
NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id),
@@ -308,9 +320,7 @@ def get_old_job_location(service_id, job_id):
308320
but it will take a few days where we have to support both formats.
309321
Remove this when everything works with the NEW_FILE_LOCATION_STRUCTURE.
310322
"""
311-
current_app.logger.debug(
312-
f"#notify-debug-s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
313-
)
323+
314324
return (
315325
current_app.config["CSV_UPLOAD_BUCKET"]["bucket"],
316326
FILE_LOCATION_STRUCTURE.format(service_id, job_id),
@@ -449,7 +459,6 @@ def extract_personalisation(job):
449459
def get_phone_number_from_s3(service_id, job_id, job_row_number):
450460
job = get_job_cache(job_id)
451461
if job is None:
452-
current_app.logger.debug(f"job {job_id} was not in the cache")
453462
job = get_job_from_s3(service_id, job_id)
454463
# Even if it is None, put it here to avoid KeyErrors
455464
set_job_cache(job_id, job)
@@ -463,8 +472,16 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number):
463472
)
464473
return "Unavailable"
465474

466-
phones = extract_phones(job, service_id, job_id)
467-
set_job_cache(f"{job_id}_phones", phones)
475+
phones = get_job_cache(f"{job_id}_phones")
476+
if phones is None:
477+
current_app.logger.debug("HAVE TO REEXTRACT PHONES!")
478+
phones = extract_phones(job, service_id, job_id)
479+
set_job_cache(f"{job_id}_phones", phones)
480+
current_app.logger.debug(f"SETTING PHONES TO {phones}")
481+
else:
482+
phones = phones[
483+
0
484+
] # we only want the phone numbers not the cache expiration time
468485

469486
# If we can find the quick dictionary, use it
470487
phone_to_return = phones[job_row_number]
@@ -483,7 +500,6 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
483500
# So this is a little recycling mechanism to reduce the number of downloads.
484501
job = get_job_cache(job_id)
485502
if job is None:
486-
current_app.logger.debug(f"job {job_id} was not in the cache")
487503
job = get_job_from_s3(service_id, job_id)
488504
# Even if it is None, put it here to avoid KeyErrors
489505
set_job_cache(job_id, job)
@@ -501,7 +517,9 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
501517
)
502518
return {}
503519

504-
set_job_cache(f"{job_id}_personalisation", extract_personalisation(job))
520+
personalisation = get_job_cache(f"{job_id}_personalisation")
521+
if personalisation is None:
522+
set_job_cache(f"{job_id}_personalisation", extract_personalisation(job))
505523

506524
return get_job_cache(f"{job_id}_personalisation")[0].get(job_row_number)
507525

app/service/rest.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import itertools
2+
import time
23
from datetime import datetime, timedelta
34
from zoneinfo import ZoneInfo
45

@@ -504,6 +505,10 @@ def get_all_notifications_for_service(service_id):
504505
if "page_size" in data
505506
else current_app.config.get("PAGE_SIZE")
506507
)
508+
# HARD CODE TO 100 for now. 1000 or 10000 causes reports to time out before they complete (if big)
509+
# Tests are relying on the value in config (20), whereas the UI seems to pass 10000
510+
if page_size > 100:
511+
page_size = 100
507512
limit_days = data.get("limit_days")
508513
include_jobs = data.get("include_jobs", True)
509514
include_from_test_key = data.get("include_from_test_key", False)
@@ -517,6 +522,8 @@ def get_all_notifications_for_service(service_id):
517522
f"get pagination with {service_id} service_id filters {data} \
518523
limit_days {limit_days} include_jobs {include_jobs} include_one_off {include_one_off}"
519524
)
525+
start_time = time.time()
526+
current_app.logger.debug(f"Start report generation with page.size {page_size}")
520527
pagination = notifications_dao.get_notifications_for_service(
521528
service_id,
522529
filter_dict=data,
@@ -528,9 +535,13 @@ def get_all_notifications_for_service(service_id):
528535
include_from_test_key=include_from_test_key,
529536
include_one_off=include_one_off,
530537
)
538+
current_app.logger.debug(f"Query complete at {int(time.time()-start_time)*1000}")
531539

532540
for notification in pagination.items:
533541
if notification.job_id is not None:
542+
current_app.logger.debug(
543+
f"Processing job_id {notification.job_id} at {int(time.time()-start_time)*1000}"
544+
)
534545
notification.personalisation = get_personalisation_from_s3(
535546
notification.service_id,
536547
notification.job_id,

app/socket_handlers.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@
33

44

55
def register_socket_handlers(socketio):
6+
@socketio.on("connect")
7+
def on_connect():
8+
current_app.logger.info(
9+
f"Socket {request.sid} connected from {request.environ.get('HTTP_ORIGIN')}"
10+
)
11+
return True
12+
13+
@socketio.on("disconnect")
14+
def on_disconnect():
15+
current_app.logger.info(f"Socket {request.sid} disconnected")
16+
617
@socketio.on("join")
718
def on_join(data): # noqa: F401
819
room = data.get("room")

docs/openapi.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@ paths:
570570
reference:
571571
type: string
572572
example:
573-
phone_number: "2028675309"
573+
phone_number: "800-555-0100"
574574
template_id: "85b58733-7ebf-494e-bee2-a21a4ce17d58"
575575
personalisation:
576576
variable: "value"

0 commit comments

Comments
 (0)