Skip to content

Commit a987f93

Browse files
authored
Merge pull request #1726 from GSA/blocking
change page size
2 parents aa24bde + 1b7c6c2 commit a987f93

File tree

4 files changed

+42
-44
lines changed

4 files changed

+42
-44
lines changed

app/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import time
66
import uuid
77
from contextlib import contextmanager
8-
from multiprocessing import Manager
8+
from threading import Lock
99
from time import monotonic
1010

1111
from celery import Celery, Task, current_task
@@ -32,6 +32,9 @@
3232
from notifications_utils.clients.redis.redis_client import RedisClient
3333
from notifications_utils.clients.zendesk.zendesk_client import ZendeskClient
3434

35+
job_cache = {}
36+
job_cache_lock = Lock()
37+
3538

3639
class NotifyCelery(Celery):
3740
def init_app(self, app):
@@ -152,9 +155,6 @@ def create_app(application):
152155
redis_store.init_app(application)
153156
document_download_client.init_app(application)
154157

155-
manager = Manager()
156-
application.config["job_cache"] = manager.dict()
157-
158158
register_blueprint(application)
159159

160160
# avoid circular imports by importing this file later

app/aws/s3.py

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from boto3 import Session
1010
from flask import current_app
1111

12+
from app import job_cache, job_cache_lock
1213
from app.clients import AWS_CLIENT_CONFIG
1314
from notifications_utils import aware_utcnow
1415

@@ -32,30 +33,25 @@ def get_service_id_from_key(key):
3233

3334

3435
def set_job_cache(key, value):
35-
current_app.logger.debug(f"Setting {key} in the job_cache to {value}.")
36-
job_cache = current_app.config["job_cache"]
37-
job_cache[key] = (value, time.time() + 8 * 24 * 60 * 60)
36+
# current_app.logger.debug(f"Setting {key} in the job_cache to {value}.")
37+
38+
with job_cache_lock:
39+
job_cache[key] = (value, time.time() + 8 * 24 * 60 * 60)
3840

3941

4042
def get_job_cache(key):
41-
job_cache = current_app.config["job_cache"]
43+
4244
ret = job_cache.get(key)
43-
if ret is None:
44-
current_app.logger.warning(f"Could not find {key} in the job_cache.")
45-
else:
46-
current_app.logger.debug(f"Got {key} from job_cache with value {ret}.")
4745
return ret
4846

4947

5048
def len_job_cache():
51-
job_cache = current_app.config["job_cache"]
5249
ret = len(job_cache)
5350
current_app.logger.debug(f"Length of job_cache is {ret}")
5451
return ret
5552

5653

5754
def clean_cache():
58-
job_cache = current_app.config["job_cache"]
5955
current_time = time.time()
6056
keys_to_delete = []
6157
for key, (_, expiry_time) in job_cache.items():
@@ -65,8 +61,9 @@ def clean_cache():
6561
current_app.logger.debug(
6662
f"Deleting the following keys from the job_cache: {keys_to_delete}"
6763
)
68-
for key in keys_to_delete:
69-
del job_cache[key]
64+
with job_cache_lock:
65+
for key in keys_to_delete:
66+
del job_cache[key]
7067

7168

7269
def get_s3_client():
@@ -207,9 +204,8 @@ def read_s3_file(bucket_name, object_key, s3res):
207204
extract_personalisation(job),
208205
)
209206

210-
except LookupError:
211-
# perhaps our key is not formatted as we expected. If so skip it.
212-
current_app.logger.exception("LookupError #notify-debug-admin-1200")
207+
except Exception as e:
208+
current_app.logger.exception(str(e))
213209

214210

215211
def get_s3_files():
@@ -308,9 +304,7 @@ def file_exists(file_location):
308304

309305

310306
def get_job_location(service_id, job_id):
311-
current_app.logger.debug(
312-
f"#notify-debug-s3-partitioning NEW JOB_LOCATION: {NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
313-
)
307+
314308
return (
315309
current_app.config["CSV_UPLOAD_BUCKET"]["bucket"],
316310
NEW_FILE_LOCATION_STRUCTURE.format(service_id, job_id),
@@ -326,9 +320,7 @@ def get_old_job_location(service_id, job_id):
326320
but it will take a few days where we have to support both formats.
327321
Remove this when everything works with the NEW_FILE_LOCATION_STRUCTURE.
328322
"""
329-
current_app.logger.debug(
330-
f"#notify-debug-s3-partitioning OLD JOB LOCATION: {FILE_LOCATION_STRUCTURE.format(service_id, job_id)}"
331-
)
323+
332324
return (
333325
current_app.config["CSV_UPLOAD_BUCKET"]["bucket"],
334326
FILE_LOCATION_STRUCTURE.format(service_id, job_id),
@@ -467,7 +459,6 @@ def extract_personalisation(job):
467459
def get_phone_number_from_s3(service_id, job_id, job_row_number):
468460
job = get_job_cache(job_id)
469461
if job is None:
470-
current_app.logger.debug(f"job {job_id} was not in the cache")
471462
job = get_job_from_s3(service_id, job_id)
472463
# Even if it is None, put it here to avoid KeyErrors
473464
set_job_cache(job_id, job)
@@ -481,8 +472,16 @@ def get_phone_number_from_s3(service_id, job_id, job_row_number):
481472
)
482473
return "Unavailable"
483474

484-
phones = extract_phones(job, service_id, job_id)
485-
set_job_cache(f"{job_id}_phones", phones)
475+
phones = get_job_cache(f"{job_id}_phones")
476+
if phones is None:
477+
current_app.logger.debug("HAVE TO REEXTRACT PHONES!")
478+
phones = extract_phones(job, service_id, job_id)
479+
set_job_cache(f"{job_id}_phones", phones)
480+
current_app.logger.debug(f"SETTING PHONES TO {phones}")
481+
else:
482+
phones = phones[
483+
0
484+
] # we only want the phone numbers not the cache expiration time
486485

487486
# If we can find the quick dictionary, use it
488487
phone_to_return = phones[job_row_number]
@@ -501,7 +500,6 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
501500
# So this is a little recycling mechanism to reduce the number of downloads.
502501
job = get_job_cache(job_id)
503502
if job is None:
504-
current_app.logger.debug(f"job {job_id} was not in the cache")
505503
job = get_job_from_s3(service_id, job_id)
506504
# Even if it is None, put it here to avoid KeyErrors
507505
set_job_cache(job_id, job)
@@ -519,7 +517,9 @@ def get_personalisation_from_s3(service_id, job_id, job_row_number):
519517
)
520518
return {}
521519

522-
set_job_cache(f"{job_id}_personalisation", extract_personalisation(job))
520+
personalisation = get_job_cache(f"{job_id}_personalisation")
521+
if personalisation is None:
522+
set_job_cache(f"{job_id}_personalisation", extract_personalisation(job))
523523

524524
return get_job_cache(f"{job_id}_personalisation")[0].get(job_row_number)
525525

app/service/rest.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import itertools
2+
import time
23
from datetime import datetime, timedelta
34
from zoneinfo import ZoneInfo
45

@@ -504,6 +505,10 @@ def get_all_notifications_for_service(service_id):
504505
if "page_size" in data
505506
else current_app.config.get("PAGE_SIZE")
506507
)
508+
# HARD CODE TO 100 for now. 1000 or 10000 causes reports to time out before they complete (if big)
509+
# Tests are relying on the value in config (20), whereas the UI seems to pass 10000
510+
if page_size > 100:
511+
page_size = 100
507512
limit_days = data.get("limit_days")
508513
include_jobs = data.get("include_jobs", True)
509514
include_from_test_key = data.get("include_from_test_key", False)
@@ -517,6 +522,8 @@ def get_all_notifications_for_service(service_id):
517522
f"get pagination with {service_id} service_id filters {data} \
518523
limit_days {limit_days} include_jobs {include_jobs} include_one_off {include_one_off}"
519524
)
525+
start_time = time.time()
526+
current_app.logger.debug(f"Start report generation with page.size {page_size}")
520527
pagination = notifications_dao.get_notifications_for_service(
521528
service_id,
522529
filter_dict=data,
@@ -528,9 +535,13 @@ def get_all_notifications_for_service(service_id):
528535
include_from_test_key=include_from_test_key,
529536
include_one_off=include_one_off,
530537
)
538+
current_app.logger.debug(f"Query complete at {int(time.time()-start_time)*1000}")
531539

532540
for notification in pagination.items:
533541
if notification.job_id is not None:
542+
current_app.logger.debug(
543+
f"Processing job_id {notification.job_id} at {int(time.time()-start_time)*1000}"
544+
)
534545
notification.personalisation = get_personalisation_from_s3(
535546
notification.service_id,
536547
notification.job_id,

tests/app/aws/test_s3.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -221,27 +221,14 @@ def test_get_s3_file_makes_correct_call(notify_api, mocker):
221221
2,
222222
"5555555552",
223223
),
224-
(
225-
# simulate file saved with utf8withbom
226-
"\\ufeffPHONE NUMBER\n",
227-
"eee",
228-
2,
229-
"5555555552",
230-
),
231-
(
232-
# simulate file saved without utf8withbom
233-
"\\PHONE NUMBER\n",
234-
"eee",
235-
2,
236-
"5555555552",
237-
),
238224
],
239225
)
240226
def test_get_phone_number_from_s3(
241227
mocker, job, job_id, job_row_number, expected_phone_number
242228
):
243229
get_job_mock = mocker.patch("app.aws.s3.get_job_from_s3")
244230
get_job_mock.return_value = job
231+
245232
phone_number = get_phone_number_from_s3("service_id", job_id, job_row_number)
246233
assert phone_number == expected_phone_number
247234

0 commit comments

Comments
 (0)