@@ -119,8 +119,6 @@ def load_data_series(report_path: Path, tag=None, *, reemit: bool = False):
119119 # Dictionary mapping EMF dimensions to A/B-testable metrics/properties
120120 processed_emf = {}
121121
122- distinct_values_per_dimenson = defaultdict (set )
123-
124122 report = json .loads (report_path .read_text ("UTF-8" ))
125123 for test in report ["tests" ]:
126124 for line in test ["teardown" ]["stdout" ].splitlines ():
@@ -140,9 +138,6 @@ def load_data_series(report_path: Path, tag=None, *, reemit: bool = False):
140138 if not dimensions :
141139 continue
142140
143- for dimension , value in dimensions .items ():
144- distinct_values_per_dimenson [dimension ].add (value )
145-
146141 dimension_set = frozenset (dimensions .items ())
147142
148143 if dimension_set not in processed_emf :
@@ -159,24 +154,27 @@ def load_data_series(report_path: Path, tag=None, *, reemit: bool = False):
159154
160155 values .extend (result [metric ][0 ])
161156
162- irrelevant_dimensions = set ()
157+ return processed_emf
163158
164- for dimension , distinct_values in distinct_values_per_dimenson .items ():
165- if len (distinct_values ) == 1 :
166- irrelevant_dimensions .add (dimension )
167159
168- post_processed_emf = {}
160+ def uninteresting_dimensions (processed_emf ):
161+ """
162+ Computes the set of cloudwatch dimensions that only ever take on a
163+ single value across the entire dataset.
164+ """
165+ values_per_dimension = defaultdict (set )
166+
167+ for dimension_set in processed_emf :
168+ for dimension , value in dimension_set :
169+ values_per_dimension [dimension ].add (value )
169170
170- for dimension_set , metrics in processed_emf .items ():
171- processed_key = frozenset (
172- (dim , value )
173- for (dim , value ) in dimension_set
174- if dim not in irrelevant_dimensions
175- )
171+ uninteresting = set ()
176172
177- post_processed_emf [processed_key ] = metrics
173+ for dimension , distinct_values in values_per_dimension .items ():
174+ if len (distinct_values ) == 1 :
175+ uninteresting .add (dimension )
178176
179- return post_processed_emf
177+ return uninteresting
180178
181179
182180def collect_data (binary_dir : Path , pytest_opts : str ):
@@ -304,6 +302,7 @@ def analyze_data(
304302 )
305303
306304 messages = []
305+ do_not_print_list = uninteresting_dimensions (processed_emf_a )
307306 for dimension_set , metric , result , unit in failures :
308307 # Sanity check as described above
309308 if abs (statistics .mean (relative_changes_by_metric [metric ])) <= noise_threshold :
@@ -325,7 +324,7 @@ def analyze_data(
325324 f"for metric \033 [1m{ metric } \033 [0m with \033 [0;31m\033 [1mp={ result .pvalue } \033 [0m. "
326325 f"This means that observing a change of this magnitude or worse, assuming that performance "
327326 f"characteristics did not change across the tested commits, has a probability of { result .pvalue :.2%} . "
328- f"Tested Dimensions:\n { json .dumps (dict (dimension_set ), indent = 2 , sort_keys = True )} "
327+ f"Tested Dimensions:\n { json .dumps (dict ({ k : v for k , v in dimension_set . items () if k not in do_not_print_list } ), indent = 2 , sort_keys = True )} "
329328 )
330329 messages .append (msg )
331330
0 commit comments