@@ -97,11 +97,11 @@ def test_composed_metrics(self):
9797 size , batch_size , pred = 1 , label = 1 ):
9898 evaluator .process (predictions , data_samples )
9999
100- metrics = evaluator .evaluate (size = size )
100+ metrics_results , averaged_results = evaluator .evaluate (size = size )
101101
102- self .assertAlmostEqual (metrics ['Fake/Toy/accuracy' ], 1.0 )
103- self .assertAlmostEqual (metrics ['Fake/Toy/mAP' ], 0.0 )
104- self .assertEqual (metrics ['Fake/Toy/size' ], size )
102+ self .assertAlmostEqual (metrics_results ['Fake/Toy/accuracy' ], 1.0 )
103+ self .assertAlmostEqual (metrics_results ['Fake/Toy/mAP' ], 0.0 )
104+ self .assertEqual (metrics_results ['Fake/Toy/size' ], size )
105105 with self .assertWarns (Warning ):
106106 evaluator .evaluate (size = 0 )
107107
@@ -124,9 +124,9 @@ def test_composed_metrics(self):
124124 for data_samples , predictions in generate_test_results (
125125 size , batch_size , pred = 1 , label = 1 ):
126126 evaluator .process (predictions , data_samples )
127- metrics = evaluator .evaluate (size = size )
128- self .assertIn ('Fake/Toy/accuracy' , metrics )
129- self .assertIn ('Fake/accuracy' , metrics )
127+ metrics_results , averaged_results = evaluator .evaluate (size = size )
128+ self .assertIn ('Fake/Toy/accuracy' , metrics_results )
129+ self .assertIn ('Fake/accuracy' , metrics_results )
130130
131131 metrics_results = OrderedDict ({
132132 'dataset1/metric1/accuracy' : 0.9 ,
@@ -135,7 +135,7 @@ def test_composed_metrics(self):
135135 'dataset2/metric2/f1_score' : 0.75
136136 })
137137
138- evaluator = MultiDatasetsEvaluator ([], [ ])
138+ evaluator = MultiDatasetsEvaluator (cfg , dataset_prefixes = [ 'Fake' ])
139139 averaged_results = evaluator .average_results (metrics_results )
140140
141141 expected_averaged_results = {
0 commit comments