Skip to content

Commit ac3b5ea

Browse files
steph-torres-nhsRioKnightleyNHSNogaNHSbethany-kish-nhs
authored
PRMP-928 Add user login data and search to report(#439)
--------- Co-authored-by: Rio Knightley <rio.knightley2@nhs.net> Co-authored-by: NogaNHS <noga.sasson1@nhs.net> Co-authored-by: bethany-kish-nhs <beth.kish1@nhs.net>
1 parent 3b3a36a commit ac3b5ea

File tree

10 files changed

+171
-47
lines changed

10 files changed

+171
-47
lines changed

lambdas/models/statistics.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ class OrganisationData(StatisticData):
5656
daily_count_viewed: int = 0
5757
daily_count_downloaded: int = 0
5858
daily_count_deleted: int = 0
59+
daily_count_searched: int = 0
5960

6061

6162
class ApplicationData(StatisticData):

lambdas/services/data_collection_service.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
CloudwatchLogsQueryParams,
2121
LloydGeorgeRecordsDeleted,
2222
LloydGeorgeRecordsDownloaded,
23+
LloydGeorgeRecordsSearched,
2324
LloydGeorgeRecordsStored,
2425
LloydGeorgeRecordsViewed,
2526
UniqueActiveUserIds,
@@ -68,11 +69,14 @@ def collect_all_data_and_write_to_dynamodb(self):
6869
def collect_all_data(self) -> list[StatisticData]:
6970
dynamodb_scan_result = self.scan_dynamodb_tables()
7071
s3_list_objects_result = self.get_all_s3_files_info()
72+
record_store_data = []
7173

72-
record_store_data = self.get_record_store_data(
73-
dynamodb_scan_result, s3_list_objects_result
74-
)
74+
if dynamodb_scan_result:
75+
record_store_data = self.get_record_store_data(
76+
dynamodb_scan_result, s3_list_objects_result
77+
)
7578
organisation_data = self.get_organisation_data(dynamodb_scan_result)
79+
7680
application_data = self.get_application_data()
7781

7882
return record_store_data + organisation_data + application_data
@@ -126,6 +130,7 @@ def get_record_store_data(
126130
dynamodb_scan_result: list[dict],
127131
s3_list_objects_result: list[dict],
128132
) -> list[RecordStoreData]:
133+
129134
total_number_of_records = self.get_total_number_of_records(dynamodb_scan_result)
130135

131136
total_and_average_file_sizes = (
@@ -159,6 +164,7 @@ def get_record_store_data(
159164
def get_organisation_data(
160165
self, dynamodb_scan_result: list[dict]
161166
) -> list[OrganisationData]:
167+
162168
number_of_patients = self.get_number_of_patients(dynamodb_scan_result)
163169
average_records_per_patient = self.get_average_number_of_files_per_patient(
164170
dynamodb_scan_result
@@ -171,6 +177,9 @@ def get_organisation_data(
171177
LloydGeorgeRecordsDeleted
172178
)
173179
daily_count_stored = self.get_cloud_watch_query_result(LloydGeorgeRecordsStored)
180+
daily_count_searched = self.get_cloud_watch_query_result(
181+
LloydGeorgeRecordsSearched
182+
)
174183

175184
joined_query_result = self.join_results_by_ods_code(
176185
[
@@ -180,11 +189,15 @@ def get_organisation_data(
180189
daily_count_downloaded,
181190
daily_count_deleted,
182191
daily_count_stored,
192+
daily_count_searched,
183193
]
184194
)
185195

186196
organisation_data_for_all_ods_code = [
187-
OrganisationData(date=self.today_date, **organisation_data_properties)
197+
OrganisationData(
198+
date=self.today_date,
199+
**organisation_data_properties,
200+
)
188201
for organisation_data_properties in joined_query_result
189202
]
190203

@@ -208,11 +221,14 @@ def get_active_user_list(self) -> dict[str, list]:
208221
)
209222
user_ids_per_ods_code = defaultdict(list)
210223
for entry in query_result:
211-
ods_code = entry.get("ods_code")
212-
user_id = entry.get("user_id")
224+
ods_code = entry.get("ods_code", "")
225+
user_id = entry.get("user_id", "")
226+
user_role = entry.get("user_role", "No role description")
227+
role_code = entry.get("role_code", "No role code")
213228
hashed_user_id = hashlib.sha256(bytes(user_id, "utf8")).hexdigest()
214-
user_ids_per_ods_code[ods_code].append(hashed_user_id)
215-
229+
user_ids_per_ods_code[ods_code].append(
230+
hashed_user_id + " - " + user_role + " - " + role_code
231+
)
216232
return user_ids_per_ods_code
217233

218234
def get_cloud_watch_query_result(
@@ -309,6 +325,8 @@ def get_average_number_of_files_per_patient(
309325
self,
310326
dynamodb_scan_result: list[dict],
311327
) -> list[dict]:
328+
if not dynamodb_scan_result:
329+
return []
312330
dynamodb_df = pl.DataFrame(dynamodb_scan_result)
313331

314332
count_records = pl.len().alias("number_of_records")

lambdas/services/login_service.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
TooManyOrgsException,
2323
)
2424
from utils.lambda_exceptions import LoginException
25+
from utils.request_context import request_context
2526

2627
logger = LoggingService(__name__)
2728

@@ -240,6 +241,7 @@ def issue_auth_token(
240241
"ndr_session_id": session_id,
241242
"nhs_user_id": user_id,
242243
}
244+
request_context.authorization = ndr_token_content
243245

244246
authorisation_token = jwt.encode(
245247
ndr_token_content, private_key, algorithm="RS256"

lambdas/services/statistical_report_service.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -154,14 +154,21 @@ def summarise_application_data(
154154

155155
df = self.load_data_to_polars(application_data)
156156

157-
count_unique_ids = (
158-
pl.concat_list("active_user_ids_hashed")
159-
.flatten()
160-
.unique()
161-
.len()
162-
.alias("active_users_count")
157+
summarised_data = df.group_by("ods_code").agg(
158+
[
159+
pl.concat_list("active_user_ids_hashed")
160+
.flatten()
161+
.unique()
162+
.apply(lambda col: str(col.sort().to_list()))
163+
.alias("unique_active_user_ids_hashed"),
164+
pl.concat_list("active_user_ids_hashed")
165+
.flatten()
166+
.unique()
167+
.len()
168+
.alias("active_users_count"),
169+
]
163170
)
164-
summarised_data = df.group_by("ods_code").agg(count_unique_ids)
171+
165172
return summarised_data
166173

167174
def join_dataframes_by_ods_code(

lambdas/tests/unit/helpers/data/statistic/mock_collected_data.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
TOTAL_FILE_SIZE_FOR_Y12345,
77
)
88
from tests.unit.helpers.data.statistic.mock_logs_query_results import (
9-
HASHED_USER_ID_1,
10-
HASHED_USER_ID_2,
9+
HASHED_USER_ID_1_WITH_ADMIN_ROLE,
10+
HASHED_USER_ID_1_WITH_PCSE_ROLE,
11+
HASHED_USER_ID_2_WITH_CLINICAL_ROLE,
1112
)
1213

1314
TODAY_DATE = datetime.today().strftime("%Y%m%d")
@@ -45,6 +46,7 @@
4546
daily_count_viewed=40,
4647
daily_count_downloaded=20,
4748
daily_count_deleted=2,
49+
daily_count_searched=30,
4850
),
4951
OrganisationData(
5052
statistic_id="mock_uuid",
@@ -56,6 +58,7 @@
5658
daily_count_viewed=20,
5759
daily_count_downloaded=10,
5860
daily_count_deleted=1,
61+
daily_count_searched=50,
5962
),
6063
]
6164

@@ -64,13 +67,16 @@
6467
statistic_id="mock_uuid",
6568
date=TODAY_DATE,
6669
ods_code="H81109",
67-
active_user_ids_hashed=[HASHED_USER_ID_1, HASHED_USER_ID_2],
70+
active_user_ids_hashed=[
71+
HASHED_USER_ID_1_WITH_ADMIN_ROLE,
72+
HASHED_USER_ID_2_WITH_CLINICAL_ROLE,
73+
],
6874
),
6975
ApplicationData(
7076
statistic_id="mock_uuid",
7177
date=TODAY_DATE,
7278
ods_code="Y12345",
73-
active_user_ids_hashed=[HASHED_USER_ID_1],
79+
active_user_ids_hashed=[HASHED_USER_ID_1_WITH_PCSE_ROLE],
7480
),
7581
]
7682

lambdas/tests/unit/helpers/data/statistic/mock_logs_query_results.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,24 @@
22
USER_ID_2 = "9E7F1235-3DF1-4822-AFFB-C4FCC88C2690"
33
HASHED_USER_ID_1 = "3192b6cf7ef953cf1a1f0945a83b55ab2cb8bae95cac6548ae5412aaa4c67677"
44
HASHED_USER_ID_2 = "a89d1cb4ac0776e45131c65a69e8b1a48026e9b497c94409e480588418a016e4"
5+
HASHED_USER_ID_1_WITH_ADMIN_ROLE = f"{HASHED_USER_ID_1} - GP_ADMIN - RO76"
6+
HASHED_USER_ID_1_WITH_PCSE_ROLE = f"{HASHED_USER_ID_1} - PCSE - "
7+
HASHED_USER_ID_2_WITH_CLINICAL_ROLE = f"{HASHED_USER_ID_2} - GP_CLINICAL - RO76"
8+
59

610
MOCK_UNIQUE_ACTIVE_USER_IDS = [
7-
{
8-
"ods_code": "Y12345",
9-
"user_id": USER_ID_1,
10-
},
11+
{"ods_code": "Y12345", "user_id": USER_ID_1, "role_code": "", "user_role": "PCSE"},
1112
{
1213
"ods_code": "H81109",
1314
"user_id": USER_ID_1,
15+
"role_code": "RO76",
16+
"user_role": "GP_ADMIN",
1417
},
1518
{
1619
"ods_code": "H81109",
1720
"user_id": USER_ID_2,
21+
"role_code": "RO76",
22+
"user_role": "GP_CLINICAL",
1823
},
1924
]
2025

@@ -63,6 +68,18 @@
6368
},
6469
]
6570

71+
MOCK_PATIENT_SEARCHED = [
72+
{
73+
"ods_code": "Y12345",
74+
"daily_count_searched": "50",
75+
},
76+
{
77+
"ods_code": "H81109",
78+
"daily_count_searched": "30",
79+
},
80+
]
81+
82+
6683
MOCK_RESPONSE_QUERY_IN_PROGRESS = {"status": "Running"}
6784

6885
MOCK_RESPONSE_QUERY_FAILED = {"status": "Failed"}

lambdas/tests/unit/helpers/data/statistic/mock_statistic_data.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
"weekly_count_viewed": 35,
125125
"weekly_count_downloaded": 4,
126126
"weekly_count_deleted": 1,
127+
"weekly_count_searched": 0,
127128
"average_records_per_patient": 4.5,
128129
"number_of_patients": 4,
129130
},
@@ -133,6 +134,7 @@
133134
"weekly_count_viewed": 15 + 30,
134135
"weekly_count_downloaded": 1 + 5,
135136
"weekly_count_deleted": 1 + 1,
137+
"weekly_count_searched": 0,
136138
"average_records_per_patient": (3.51 + 2.78) / 2,
137139
"number_of_patients": 10,
138140
},
@@ -202,14 +204,6 @@
202204
"cf1af742e351ce63d8ed275d4bec8d8f",
203205
],
204206
)
205-
206-
EXPECTED_SUMMARY_APPLICATION_DATA = pl.DataFrame(
207-
[
208-
{"ods_code": "Z56789", "active_users_count": 1},
209-
{"ods_code": "Y12345", "active_users_count": 3},
210-
],
211-
)
212-
213207
SERIALISED_APPLICATION_DATA = [
214208
{
215209
"Date": "20240510",
@@ -239,6 +233,30 @@
239233
},
240234
]
241235

236+
EXPECTED_SUMMARY_APPLICATION_DATA = pl.DataFrame(
237+
[
238+
{
239+
"ods_code": "Z56789",
240+
"active_users_count": 1,
241+
"unique_active_user_ids_hashed": str(
242+
[str(SERIALISED_APPLICATION_DATA[0]["ActiveUserIdsHashed"][0])]
243+
),
244+
},
245+
{
246+
"ods_code": "Y12345",
247+
"active_users_count": 3,
248+
"unique_active_user_ids_hashed": str(
249+
[
250+
str(SERIALISED_APPLICATION_DATA[1]["ActiveUserIdsHashed"][0]),
251+
str(SERIALISED_APPLICATION_DATA[1]["ActiveUserIdsHashed"][1]),
252+
str(SERIALISED_APPLICATION_DATA[2]["ActiveUserIdsHashed"][1]),
253+
]
254+
),
255+
},
256+
],
257+
)
258+
259+
242260
ALL_MOCKED_STATISTIC_DATA = (
243261
[MOCK_RECORD_STORE_DATA_1, MOCK_RECORD_STORE_DATA_2, MOCK_RECORD_STORE_DATA_3],
244262
[MOCK_ORGANISATION_DATA_1, MOCK_ORGANISATION_DATA_2, MOCK_ORGANISATION_DATA_3],
@@ -274,8 +292,12 @@
274292
"Number of patients": 4,
275293
"Total number of records": 18,
276294
"Total size of records in megabytes": 1.75,
295+
"Unique active user ids hashed": str(
296+
[str(SERIALISED_APPLICATION_DATA[0]["ActiveUserIdsHashed"][0])]
297+
),
277298
"Weekly count deleted": 1,
278299
"Weekly count downloaded": 4,
300+
"Weekly count searched": 0,
279301
"Weekly count stored": 0,
280302
"Weekly count viewed": 35,
281303
},
@@ -289,8 +311,16 @@
289311
"Number of patients": 10,
290312
"Total number of records": 20,
291313
"Total size of records in megabytes": 2.34,
314+
"Unique active user ids hashed": str(
315+
[
316+
str(SERIALISED_APPLICATION_DATA[1]["ActiveUserIdsHashed"][0]),
317+
str(SERIALISED_APPLICATION_DATA[1]["ActiveUserIdsHashed"][1]),
318+
str(SERIALISED_APPLICATION_DATA[2]["ActiveUserIdsHashed"][1]),
319+
]
320+
),
292321
"Weekly count deleted": 1 + 1,
293322
"Weekly count downloaded": 1 + 5,
323+
"Weekly count searched": 0,
294324
"Weekly count stored": 0 + 2,
295325
"Weekly count viewed": 15 + 30,
296326
},

lambdas/tests/unit/services/test_data_collection_service.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,21 @@
3333
build_mock_results,
3434
)
3535
from tests.unit.helpers.data.statistic.mock_logs_query_results import (
36-
HASHED_USER_ID_1,
37-
HASHED_USER_ID_2,
36+
HASHED_USER_ID_1_WITH_ADMIN_ROLE,
37+
HASHED_USER_ID_1_WITH_PCSE_ROLE,
38+
HASHED_USER_ID_2_WITH_CLINICAL_ROLE,
3839
MOCK_LG_DELETED,
3940
MOCK_LG_DOWNLOADED,
4041
MOCK_LG_STORED,
4142
MOCK_LG_VIEWED,
43+
MOCK_PATIENT_SEARCHED,
4244
MOCK_UNIQUE_ACTIVE_USER_IDS,
4345
)
4446
from utils.cloudwatch_logs_query import (
4547
CloudwatchLogsQueryParams,
4648
LloydGeorgeRecordsDeleted,
4749
LloydGeorgeRecordsDownloaded,
50+
LloydGeorgeRecordsSearched,
4851
LloydGeorgeRecordsStored,
4952
LloydGeorgeRecordsViewed,
5053
UniqueActiveUserIds,
@@ -99,6 +102,8 @@ def mock_implementation(query_params: CloudwatchLogsQueryParams, **_kwargs):
99102
return MOCK_LG_STORED
100103
elif query_params == UniqueActiveUserIds:
101104
return MOCK_UNIQUE_ACTIVE_USER_IDS
105+
elif query_params == LloydGeorgeRecordsSearched:
106+
return MOCK_PATIENT_SEARCHED
102107

103108
patched_instance = mocker.patch(
104109
"services.data_collection_service.CloudwatchService",
@@ -168,11 +173,12 @@ def test_collect_all_data_and_write_to_dynamodb(mock_service, mocker):
168173

169174

170175
def test_collect_all_data(mock_service, mock_uuid):
171-
actual = mock_service.collect_all_data()
172176
expected = unordered(
173177
MOCK_RECORD_STORE_DATA + MOCK_ORGANISATION_DATA + MOCK_APPLICATION_DATA
174178
)
175179

180+
actual = mock_service.collect_all_data()
181+
176182
assert actual == expected
177183

178184

@@ -250,10 +256,10 @@ def test_get_active_user_list(set_env, mock_query_logs):
250256
service = DataCollectionService()
251257
expected = {
252258
"H81109": [
253-
HASHED_USER_ID_1,
254-
HASHED_USER_ID_2,
259+
HASHED_USER_ID_1_WITH_ADMIN_ROLE,
260+
HASHED_USER_ID_2_WITH_CLINICAL_ROLE,
255261
],
256-
"Y12345": [HASHED_USER_ID_1],
262+
"Y12345": [HASHED_USER_ID_1_WITH_PCSE_ROLE],
257263
}
258264
actual = service.get_active_user_list()
259265

0 commit comments

Comments
 (0)