@@ -70,7 +70,7 @@ def handle_cache_update_request(event, context):
7070 # Cache the metrics
7171 metrics_to_cache = {
7272 'overallAccuracy' : aggregated_metrics .get ('overall_accuracy' ),
73- 'weightedOverallScores' : aggregated_metrics .get ('weighted_overall_scores' , [] ),
73+ 'weightedOverallScores' : aggregated_metrics .get ('weighted_overall_scores' , {} ),
7474 'averageConfidence' : aggregated_metrics .get ('average_confidence' ),
7575 'accuracyBreakdown' : aggregated_metrics .get ('accuracy_breakdown' , {}),
7676 'totalCost' : aggregated_metrics .get ('total_cost' , 0 ),
@@ -187,26 +187,33 @@ def get_test_results(test_run_id):
187187 cached_metrics = metadata .get ('testRunResult' )
188188 if cached_metrics is not None :
189189 logger .info (f"Retrieved cached metrics for test run: { test_run_id } " )
190- # Use cached metrics but get dynamic fields from current metadata
191- return {
192- 'testRunId' : test_run_id ,
193- 'testSetId' : metadata .get ('TestSetId' ),
194- 'testSetName' : metadata .get ('TestSetName' ),
195- 'status' : current_status ,
196- 'filesCount' : metadata .get ('FilesCount' , 0 ),
197- 'completedFiles' : metadata .get ('CompletedFiles' , 0 ),
198- 'failedFiles' : metadata .get ('FailedFiles' , 0 ),
199- 'overallAccuracy' : cached_metrics .get ('overallAccuracy' ),
200- 'weightedOverallScores' : cached_metrics .get ('weightedOverallScores' , []),
201- 'averageConfidence' : cached_metrics .get ('averageConfidence' ),
202- 'accuracyBreakdown' : cached_metrics .get ('accuracyBreakdown' , {}),
203- 'totalCost' : cached_metrics .get ('totalCost' , 0 ),
204- 'costBreakdown' : cached_metrics .get ('costBreakdown' , {}),
205- 'createdAt' : _format_datetime (metadata .get ('CreatedAt' )),
206- 'completedAt' : _format_datetime (metadata .get ('CompletedAt' )),
207- 'context' : metadata .get ('Context' ),
208- 'config' : _get_test_run_config (test_run_id )
209- }
190+
191+ # Check if cached weightedOverallScores is old array format - if so, recalculate
192+ cached_scores = cached_metrics .get ('weightedOverallScores' )
193+ if isinstance (cached_scores , list ):
194+ logger .info (f"Found old array format for weightedOverallScores, recalculating for test run: { test_run_id } " )
195+ # Force recalculation by falling through to aggregation logic
196+ else :
197+ # Use cached metrics but get dynamic fields from current metadata
198+ return {
199+ 'testRunId' : test_run_id ,
200+ 'testSetId' : metadata .get ('TestSetId' ),
201+ 'testSetName' : metadata .get ('TestSetName' ),
202+ 'status' : current_status ,
203+ 'filesCount' : metadata .get ('FilesCount' , 0 ),
204+ 'completedFiles' : metadata .get ('CompletedFiles' , 0 ),
205+ 'failedFiles' : metadata .get ('FailedFiles' , 0 ),
206+ 'overallAccuracy' : cached_metrics .get ('overallAccuracy' ),
207+ 'weightedOverallScores' : cached_metrics .get ('weightedOverallScores' , {}),
208+ 'averageConfidence' : cached_metrics .get ('averageConfidence' ),
209+ 'accuracyBreakdown' : cached_metrics .get ('accuracyBreakdown' , {}),
210+ 'totalCost' : cached_metrics .get ('totalCost' , 0 ),
211+ 'costBreakdown' : cached_metrics .get ('costBreakdown' , {}),
212+ 'createdAt' : _format_datetime (metadata .get ('CreatedAt' )),
213+ 'completedAt' : _format_datetime (metadata .get ('CompletedAt' )),
214+ 'context' : metadata .get ('Context' ),
215+ 'config' : _get_test_run_config (test_run_id )
216+ }
210217
211218 # Calculate aggregated metrics
212219 aggregated_metrics = _aggregate_test_run_metrics (test_run_id )
@@ -220,7 +227,7 @@ def get_test_results(test_run_id):
220227 'completedFiles' : metadata .get ('CompletedFiles' , 0 ),
221228 'failedFiles' : metadata .get ('FailedFiles' , 0 ),
222229 'overallAccuracy' : aggregated_metrics .get ('overall_accuracy' ),
223- 'weightedOverallScores' : aggregated_metrics .get ('weighted_overall_scores' , [] ),
230+ 'weightedOverallScores' : aggregated_metrics .get ('weighted_overall_scores' , {} ),
224231 'averageConfidence' : aggregated_metrics .get ('average_confidence' ),
225232 'accuracyBreakdown' : aggregated_metrics .get ('accuracy_breakdown' , {}),
226233 'totalCost' : aggregated_metrics .get ('total_cost' , 0 ),
@@ -238,7 +245,7 @@ def get_test_results(test_run_id):
238245 # Cache only static metrics
239246 metrics_to_cache = {
240247 'overallAccuracy' : aggregated_metrics .get ('overall_accuracy' ),
241- 'weightedOverallScores' : aggregated_metrics .get ('weighted_overall_scores' , [] ),
248+ 'weightedOverallScores' : aggregated_metrics .get ('weighted_overall_scores' , {} ),
242249 'averageConfidence' : aggregated_metrics .get ('average_confidence' ),
243250 'accuracyBreakdown' : aggregated_metrics .get ('accuracy_breakdown' , {}),
244251 'totalCost' : aggregated_metrics .get ('total_cost' , 0 ),
@@ -544,7 +551,7 @@ def _aggregate_test_run_metrics(test_run_id):
544551 total_cost = 0
545552 accuracy_count = 0
546553 confidence_count = 0
547- weighted_overall_scores = [] # List to collect individual weighted overall scores
554+ weighted_overall_scores = {} # Dict to collect document ID -> score mapping
548555 cost_breakdown = {}
549556
550557 # Accuracy metrics aggregation
@@ -582,7 +589,9 @@ def _aggregate_test_run_metrics(test_run_id):
582589
583590 # Extract weighted overall score
584591 if overall_metrics .get ('weighted_overall_score' ) is not None :
585- weighted_overall_scores .append (overall_metrics ['weighted_overall_score' ])
592+ # Extract document ID from the item PK (format: doc#{test_run_id}/{file_key})
593+ document_id = item ['PK' ].replace ('doc#' , '' , 1 )
594+ weighted_overall_scores [document_id ] = overall_metrics ['weighted_overall_score' ]
586595
587596 # Extract additional accuracy metrics
588597 if overall_metrics .get ('precision' ):
@@ -642,7 +651,7 @@ def _aggregate_test_run_metrics(test_run_id):
642651
643652 return {
644653 'overall_accuracy' : total_accuracy / accuracy_count if accuracy_count > 0 else None ,
645- 'weighted_overall_scores' : weighted_overall_scores if weighted_overall_scores else [] ,
654+ 'weighted_overall_scores' : weighted_overall_scores if weighted_overall_scores else {} ,
646655 'average_confidence' : total_confidence / confidence_count if confidence_count > 0 else None ,
647656 'accuracy_breakdown' : {
648657 'precision' : total_precision / precision_count if precision_count > 0 else None ,
0 commit comments