@@ -64,23 +64,23 @@ def qa_check(si, ss, config, snapshot=False):
6464 si = pd .merge (si , sid_registry [['service_id' , 'org_id' ]], how = 'left' , on = 'service_id' , suffixes = ['' , '_sid_registry' ])
6565 si = pd .merge (si , dept .rename (columns = {'org_id' : 'org_id_sid_registry' }), how = 'left' , on = 'org_id_sid_registry' , suffixes = ['' , '_sid_registry' ])
6666
67- # QA check: unregistered service ID
67+ # === QA check: unregistered service ID
6868 # This service id is not registered in the service id registry
6969 si ['qa_unregistered_sid' ] = si ['org_id_sid_registry' ].isna ()
7070
71- # QA check: reused service ID
71+ # === QA check: reused service ID
7272 # This service id is registered to a different organization
7373 si ['qa_reused_sid' ] = (si ['org_id' ] != si ['org_id_sid_registry' ]) & ~ (si ['qa_unregistered_sid' ])
7474 si ['reused_sid_correct_org' ] = si ['org_id' ].astype (str ) + ' : ' + si ['department_en_sid_registry' ] + ' | ' + si ['department_fr_sid_registry' ]
7575
76- # QA check: Record is reported for a fiscal year that is incomplete or in the future.
76+ # === QA check: Record is reported for a fiscal year that is incomplete or in the future.
7777 si ['fiscal_yr_end_date' ] = pd .to_datetime (si ['fiscal_yr' ].str .split ('-' ).str [1 ]+ '-04-01' )
7878 si ['qa_si_fiscal_yr_out_of_scope' ] = si ['fiscal_yr_end_date' ].dt .date >= current_date
7979
8080 ss ['fiscal_yr_end_date' ] = pd .to_datetime (ss ['fiscal_yr' ].str .split ('-' ).str [1 ]+ '-04-01' )
8181 ss ['qa_ss_fiscal_yr_out_of_scope' ] = ss ['fiscal_yr_end_date' ].dt .date >= current_date
8282
83- # QA check: Record has contradiction between client feedback channels and online interaction points for feedback
83+ # === QA check: Record has contradiction between client feedback channels and online interaction points for feedback
8484 si ['qa_client_feedback_contradiction' ] = (
8585
8686 # Service accepts client feedback via the online channel (ONL) but online issue resolution or feedback is not applicable or not activated
@@ -98,7 +98,7 @@ def qa_check(si, ss, config, snapshot=False):
9898 )
9999 )
100100
101- # QA check: Service reports no volume, but associated Service standards have volume
101+ # === QA check: Service reports no volume, but associated Service standards have volume
102102 ss_vol_by_service = (
103103 ss .groupby (['fiscal_yr' , 'service_id' ])['total_volume' ]
104104 .sum ()
@@ -114,16 +114,16 @@ def qa_check(si, ss, config, snapshot=False):
114114
115115 si ['qa_no_si_app_volume' ] = (si ['num_applications_total' ] == 0 )
116116
117- # QA check: Service standard reports no volume
117+ # === QA check: Service standard reports no volume
118118 ss ['qa_no_ss_volume' ] = (ss ['total_volume' ] == 0 )
119119
120- # QA check: Services where 'econom' (business) are a client type should not be 'NA' for CRA BN as ID
120+ # === QA check: Services where 'econom' (business) are a client type should not be 'NA' for CRA BN as ID
121121 si ['qa_use_of_cra_bn_applicable' ] = (
122122 (si ['client_target_groups' ].str .contains ('ECONOM' )) &
123123 (si ['cra_bn_identifier_usage' ] == 'NA' )
124124 )
125125
126- # QA check for programs
126+ # === QA check: for programs
127127 # Prepare a dataframe that splits service inventory into one-program-per-row: si_prog
128128 si ['org_id' ] = si ['org_id' ].astype (str )
129129 program ['org_id' ] = program ['org_id' ].astype (str )
@@ -141,12 +141,12 @@ def qa_check(si, ss, config, snapshot=False):
141141 # Join si_prog with program_list on program_id and org_id
142142 si_prog = si_prog .merge (program , on = ['program_id' , 'org_id' ], how = 'left' , suffixes = ('_si' , '_prog' ), indicator = True )
143143
144- # qa check: program id belongs to different department
144+ # === QA check: program id belongs to different department
145145 si_prog_wrong_org = si_prog [si_prog ['_merge' ] == 'left_only' ] # Keep only mismatched rows
146146 si_prog_wrong_org = si_prog_wrong_org .groupby (['fiscal_yr' , 'service_id' , 'org_id' ], as_index = False ).agg ({'program_id' : lambda x : '<>' .join (sorted (map (str , x .dropna ())))})
147147 si_prog_wrong_org .rename (columns = {'program_id' :'mismatched_program_ids' }, inplace = True )
148148
149- # qa check: program id is old/expired
149+ # === QA check: program id is old/expired
150150 si_prog ['latest_valid_fy_ending_in' ] = pd .to_numeric (si_prog ['latest_valid_fy' ].str .split ('-' ).str [1 ].fillna (0 ), errors = 'coerce' ).astype (int )
151151 si_prog ['reported_fy_ending_in' ] = pd .to_numeric (si_prog ['fiscal_yr' ].str .split ('-' ).str [1 ].fillna (0 ), errors = 'coerce' ).astype (int )
152152 si_prog ['program_id_latest_valid_fy' ] = si_prog ['program_id' ]+ ': ' + si_prog ['latest_valid_fy' ]
@@ -161,10 +161,10 @@ def qa_check(si, ss, config, snapshot=False):
161161 si = pd .merge (si , si_prog_wrong_org , on = ['fiscal_yr' , 'service_id' , 'org_id' ], how = 'left' )
162162 si ['qa_program_id_wrong_org' ] = ~ (si ['mismatched_program_ids' ].isnull ())
163163
164- # QA check: Service standard performance is greater than 100%
164+ # === QA check: Service standard performance is greater than 100%
165165 ss ['qa_performance_over_100' ] = ss ['volume_meeting_target' ]> ss ['total_volume' ]
166166
167- # QA check: Service volumes vary by a higher than expected amount
167+ # === QA check: Service volumes vary by a higher than expected amount
168168 def fy_to_num (fiscal_yr ): # Returns the year in which the fiscal year ends, as a number.
169169 return pd .to_numeric (fiscal_yr .split ('-' )[- 1 ])
170170
@@ -180,6 +180,9 @@ def fy_to_num(fiscal_yr): # Returns the year in which the fiscal year ends, as a
180180 suffixes = ['' , '_max' ]
181181 )
182182
183+ # Identify the rows belonging to the latest fiscal year
184+ si_variance_qa ['latest_fy_bool' ] = si_variance_qa ['fy_num' ] == si_variance_qa ['fy_num_max' ]
185+
183186 # Only consider records with at least 4 years of reported non-zero values (latest + 3)
184187 # Remove records without any application volume
185188 si_variance_qa = si_variance_qa .loc [si_variance_qa ['num_applications_total' ]> 0 ]
@@ -196,10 +199,6 @@ def fy_to_num(fiscal_yr): # Returns the year in which the fiscal year ends, as a
196199 # Then only keep records with 4 or more years
197200 si_variance_qa = si_variance_qa .loc [si_variance_qa ['years_reported' ]>= 4 ]
198201
199-
200- # Identify the rows belonging to the latest fiscal year
201- si_variance_qa ['latest_fy_bool' ] = si_variance_qa ['fy_num' ] == si_variance_qa ['fy_num_max' ]
202-
203202 # Determine the average number of applications and their standard deviation
204203 # by service and fiscal year, excluding the latest fiscal year
205204 si_variance_qa = pd .merge (
@@ -218,16 +217,19 @@ def fy_to_num(fiscal_yr): # Returns the year in which the fiscal year ends, as a
218217 si_variance_qa ['apps_stdevs_away_from_mean' ] = np .abs (si_variance_qa ['num_applications_total' ]- si_variance_qa ['mean' ])/ si_variance_qa ['std_dev' ]
219218
220219 # Issues to identify:
221- # 1. Standard deviation is 0 (std_dev = 0)
222- # this is when for all years (except the latest) the num_applications_total is the same
223- si_variance_qa ['qa_no_volume_variation' ] = (si_variance_qa ['std_dev' ] == 0 )
224-
225- # 2. The difference between the number of applications and the mean, in units of standard deviation, is greater than some threshold
220+ # 1. The difference between the number of applications and the mean, in units of standard deviation, is greater than some threshold
226221 # this is for big swings that would need to be investigated.
227222 stdevs_away_from_mean_threshold = 20
228- si_variance_qa ['qa_extreme_volume_variation' ] = ((si_variance_qa ['apps_stdevs_away_from_mean' ] > stdevs_away_from_mean_threshold ) & ~ si_variance_qa ['qa_no_volume_variation' ])
223+ si_variance_qa ['qa_extreme_volume_variation' ] = (si_variance_qa ['apps_stdevs_away_from_mean' ] > stdevs_away_from_mean_threshold )
224+
225+ # 2. Standard deviation is 0 (std_dev = 0)
226+ # this is when for all years (except the latest) the num_applications_total is the same
227+ si_variance_qa ['qa_no_volume_variation' ] = ((si_variance_qa ['std_dev' ] == 0 ) & ~ si_variance_qa ['qa_extreme_volume_variation' ])
229228
230229 # Add these checks into the si dataframe
230+ # The merge is there to generate an indicator (true/false) that describes
231+ # whether the service in the si is part of the si_variance_qa dataframe, filtered for the
232+ # check in question
231233 si = pd .merge (
232234 si ,
233235 si_variance_qa .loc [
@@ -246,7 +248,7 @@ def fy_to_num(fiscal_yr): # Returns the year in which the fiscal year ends, as a
246248 si ,
247249 si_variance_qa .loc [
248250 (si_variance_qa ['latest_fy_bool' ] &
249- si_variance_qa ['qa_no_volume_variation ' ]),
251+ si_variance_qa ['qa_extreme_volume_variation ' ]),
250252 ['fiscal_yr' , 'service_id' , 'org_id' ]
251253 ],
252254 on = ['fiscal_yr' , 'service_id' , 'org_id' ],
@@ -256,6 +258,22 @@ def fy_to_num(fiscal_yr): # Returns the year in which the fiscal year ends, as a
256258
257259 si ['qa_extreme_volume_variation' ] = (si ['qa_extreme_volume_variation' ] == 'both' )
258260
261+ # Generate context for qa_report:
262+ # Display a field with all the reported application volumes and their fiscal years
263+
264+ # Create the field in the si
265+ si ['fy_num_applications_total' ] = "(" + si ['fiscal_yr' ]+ ": " + si ['num_applications_total' ].astype ('str' )+ ")"
266+
267+ # Create a grouped version with the contents of each field concatenated (joined)
268+ si_apps_by_fy = si .groupby (['org_id' , 'service_id' ], as_index = False ).agg ({'fy_num_applications_total' : lambda x : ', ' .join (sorted (x ))})
269+
270+ # Merge the concatenated vales back into si, while dropping the original column.
271+ si = pd .merge (
272+ si .drop (columns = ['fy_num_applications_total' ]),
273+ si_apps_by_fy ,
274+ on = ['org_id' , 'service_id' ]
275+ )
276+
259277
260278 # === EXPORT DATA TO CSV ===
261279 # Define the DataFrames to export to csv and their corresponding names
@@ -292,7 +310,9 @@ def generate_context(row):
292310 'qa_program_id_old' : f"{ row ['program_id_latest_valid_fy' ]} " ,
293311 'qa_ss_vol_without_si_vol' : f"service applications: { row ['num_applications_total' ]} , standard volumes: { row ['total_volume_ss' ]} " ,
294312 'qa_si_fiscal_yr_out_of_scope' : f"{ row ['fiscal_yr' ]} " ,
295- 'qa_ss_fiscal_yr_out_of_scope' : f"{ row ['fiscal_yr' ]} "
313+ 'qa_ss_fiscal_yr_out_of_scope' : f"{ row ['fiscal_yr' ]} " ,
314+ 'qa_extreme_volume_variation' : f"{ row ['fy_num_applications_total' ]} " ,
315+ 'qa_no_volume_variation' : f"{ row ['fy_num_applications_total' ]} "
296316 }
297317
298318 return issue_messages .get (row ['qa_field_name' ])
@@ -343,7 +363,8 @@ def generate_context(row):
343363 'reused_sid_correct_org' ,
344364 'program_id' ,
345365 'program_id_latest_valid_fy' ,
346- 'mismatched_program_ids'
366+ 'mismatched_program_ids' ,
367+ 'fy_num_applications_total'
347368 ]
348369
349370 # Transform data to have all qa issues in a single column
@@ -388,7 +409,8 @@ def generate_context(row):
388409 'reused_sid_correct_org' , # replaced by context field
389410 'program_id' , # replaced by context field
390411 'program_id_latest_valid_fy' , # replaced by context field
391- 'mismatched_program_ids' # replaced by context field
412+ 'mismatched_program_ids' , # replaced by context field
413+ 'fy_num_applications_total' # replaced by context field
392414 ])
393415
394416 si_qa_report = si_qa_report .sort_values (by = ['org_id' , 'severity_en' , 'service_id' ])
0 commit comments