Skip to content

Commit a3dafb7

Browse files
glados-vermacopybara-github
authored andcommitted
Add from_dataframe method to Measurement to create multidim measurement from a DataFrame.
This also adds some symmetry with the existing to_dataframe method. PiperOrigin-RevId: 721549130
1 parent b8cfa7e commit a3dafb7

File tree

2 files changed

+142
-2
lines changed

2 files changed

+142
-2
lines changed

openhtf/core/measurements.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,35 @@ def to_dataframe(self, columns: Any = None) -> Any:
480480

481481
return dataframe
482482

483+
def from_dataframe(self, dataframe: Any, metric_column: str):
484+
"""Convert a pandas DataFrame to a multi-dim measurement.
485+
486+
Args:
487+
dataframe: A pandas DataFrame. Dimensions for this multi-dim measurement
488+
need to match columns in the DataFrame (can be multi-index).
489+
metric_column: The column name of the metric to be measured.
490+
491+
Raises:
492+
TypeError: If this measurement is not dimensioned.
493+
ValueError: If dataframe is missing dimensions.
494+
"""
495+
if not isinstance(self._measured_value, DimensionedMeasuredValue):
496+
raise TypeError(
497+
'Only a dimensioned measurement can be set from a DataFrame'
498+
)
499+
dimension_labels = [d.name for d in self.dimensions]
500+
dimensioned_df = dataframe.reset_index()
501+
try:
502+
dimensioned_df.set_index(dimension_labels, inplace=True)
503+
except KeyError as e:
504+
raise ValueError('DataFrame is missing dimensions') from e
505+
if metric_column not in dimensioned_df.columns:
506+
raise ValueError(
507+
f'DataFrame does not have a column named {metric_column}'
508+
)
509+
for row_dimensions, row_metrics in dimensioned_df.iterrows():
510+
self.measured_value[row_dimensions] = row_metrics[metric_column]
511+
483512

484513
@attr.s(slots=True)
485514
class MeasuredValue(object):

test/core/measurements_test.py

Lines changed: 113 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from openhtf.core import measurements
2626
from examples import all_the_things
2727
from openhtf.util import test as htf_test
28+
import pandas
2829

2930
# Fields that are considered 'volatile' for record comparison.
3031
_VOLATILE_FIELDS = {
@@ -231,14 +232,19 @@ def test_to_dataframe__no_pandas(self):
231232
with self.assertRaises(RuntimeError):
232233
self.test_to_dataframe(units=True)
233234

234-
def test_to_dataframe(self, units=True):
235+
def _make_multidim_measurement(self, units=''):
235236
measurement = htf.Measurement('test_multidim')
236237
measurement.with_dimensions('ms', 'assembly', htf.Dimension('my_zone'))
238+
if units:
239+
measurement.with_units(units)
240+
return measurement
237241

242+
def test_to_dataframe(self, units=True):
238243
if units:
239-
measurement.with_units('°C')
244+
measurement = self._make_multidim_measurement('°C')
240245
measure_column_name = 'degree Celsius'
241246
else:
247+
measurement = self._make_multidim_measurement()
242248
measure_column_name = 'value'
243249

244250
for t in range(5):
@@ -260,6 +266,111 @@ def test_to_dataframe(self, units=True):
260266
def test_to_dataframe__no_units(self):
261267
self.test_to_dataframe(units=False)
262268

269+
def test_from_dataframe_raises_if_dimensions_missing_in_dataframe(self):
270+
measurement = self._make_multidim_measurement('°C')
271+
with self.assertRaisesRegex(
272+
ValueError, 'DataFrame is missing dimensions'
273+
) as cm:
274+
measurement.from_dataframe(
275+
pandas.DataFrame({
276+
'ms': [1, 2, 3],
277+
'my_zone': ['X', 'Y', 'Z'],
278+
'degree_celsius': [10, 20, 30],
279+
}),
280+
metric_column='degree_celsius',
281+
)
282+
with self.assertRaisesRegex(
283+
KeyError, "None of ['assembly'] are in the columns"
284+
):
285+
raise cm.exception.__cause__
286+
287+
def test_from_dataframe_raises_if_metric_missing_in_dataframe(self):
288+
measurement = self._make_multidim_measurement('°C')
289+
with self.assertRaisesRegex(
290+
ValueError, 'DataFrame does not have a column named degree_celsius'
291+
):
292+
measurement.from_dataframe(
293+
pandas.DataFrame({
294+
'ms': [1, 2, 3],
295+
'assembly': ['A', 'B', 'C'],
296+
'my_zone': ['X', 'Y', 'Z'],
297+
'degrees_fahrenheit': [10, 20, 30],
298+
}),
299+
metric_column='degree_celsius',
300+
)
301+
302+
def test_from_flat_dataframe(self):
303+
measurement = self._make_multidim_measurement('°C')
304+
source_dataframe = pandas.DataFrame({
305+
'ms': [1, 2, 3],
306+
'assembly': ['A', 'B', 'C'],
307+
'my_zone': ['X', 'Y', 'Z'],
308+
'degree_celsius': [10, 20, 30],
309+
})
310+
measurement.from_dataframe(source_dataframe, metric_column='degree_celsius')
311+
measurement.outcome = measurements.Outcome.PASS
312+
self.assertEqual(measurement.measured_value[(1, 'A', 'X')], 10)
313+
self.assertEqual(measurement.measured_value[(2, 'B', 'Y')], 20)
314+
self.assertEqual(measurement.measured_value[(3, 'C', 'Z')], 30)
315+
pandas.testing.assert_frame_equal(
316+
measurement.to_dataframe().rename(
317+
columns={
318+
'ms': 'ms',
319+
'assembly': 'assembly',
320+
'my_zone': 'my_zone',
321+
# The metric column name comes from the unit.
322+
'degree Celsius': 'degree_celsius',
323+
}
324+
),
325+
source_dataframe,
326+
)
327+
328+
def test_from_dataframe_with_multiindex_dataframe(self):
329+
measurement = self._make_multidim_measurement('°C')
330+
source_dataframe = pandas.DataFrame({
331+
'ms': [1, 2, 3],
332+
'assembly': ['A', 'B', 'C'],
333+
'my_zone': ['X', 'Y', 'Z'],
334+
'degree_celsius': [10, 20, 30],
335+
})
336+
source_dataframe.set_index(['ms', 'assembly', 'my_zone'], inplace=True)
337+
measurement.from_dataframe(source_dataframe, metric_column='degree_celsius')
338+
measurement.outcome = measurements.Outcome.PASS
339+
self.assertEqual(measurement.measured_value[(1, 'A', 'X')], 10)
340+
self.assertEqual(measurement.measured_value[(2, 'B', 'Y')], 20)
341+
self.assertEqual(measurement.measured_value[(3, 'C', 'Z')], 30)
342+
343+
def test_from_dataframe_ignores_extra_columns(self):
344+
measurement = self._make_multidim_measurement('°C')
345+
source_dataframe = pandas.DataFrame({
346+
'ms': [1, 2, 3],
347+
'assembly': ['A', 'B', 'C'],
348+
'my_zone': ['X', 'Y', 'Z'],
349+
'degree_celsius': [10, 20, 30],
350+
'degrees_fahrenheit': [11, 21, 31],
351+
})
352+
measurement.from_dataframe(source_dataframe, metric_column='degree_celsius')
353+
measurement.outcome = measurements.Outcome.PASS
354+
self.assertEqual(measurement.measured_value[(1, 'A', 'X')], 10)
355+
self.assertEqual(measurement.measured_value[(2, 'B', 'Y')], 20)
356+
self.assertEqual(measurement.measured_value[(3, 'C', 'Z')], 30)
357+
358+
def test_from_dataframe_with_duplicate_dimensions_overwrites(self):
359+
"""Verifies multi-dim measurement overwrite with duplicate dimensions."""
360+
measurement = self._make_multidim_measurement('°C')
361+
source_dataframe = pandas.DataFrame({
362+
'ms': [1, 2, 3, 1],
363+
'assembly': ['A', 'B', 'C', 'A'],
364+
'my_zone': ['X', 'Y', 'Z', 'X'],
365+
'degree_celsius': [10, 20, 30, 11],
366+
})
367+
measurement.from_dataframe(source_dataframe, metric_column='degree_celsius')
368+
measurement.outcome = measurements.Outcome.PASS
369+
# Overwritten value.
370+
self.assertEqual(measurement.measured_value[(1, 'A', 'X')], 11)
371+
self.assertEqual(measurement.measured_value[(2, 'B', 'Y')], 20)
372+
self.assertEqual(measurement.measured_value[(3, 'C', 'Z')], 30)
373+
263374
def test_bad_validator(self):
264375
measurement = htf.Measurement('bad_measure')
265376
measurement.with_dimensions('a')

0 commit comments

Comments
 (0)