Skip to content

Commit 24e265a

Browse files
committed
feat!: improve performance by not sorting in _post_process_ts_df
1 parent 511cce8 commit 24e265a

File tree

4 files changed

+6
-9
lines changed

4 files changed

+6
-9
lines changed

meteora/clients/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def _ts_params(self, variable_ids, *args, **kwargs) -> dict:
146146
return {"variable_ids": variable_ids, **kwargs}
147147

148148
def _post_process_ts_df(self, ts_df: pd.DataFrame) -> pd.DataFrame:
149-
return ts_df.apply(pd.to_numeric, axis="columns").sort_index()
149+
return ts_df.apply(pd.to_numeric, axis="columns") # .sort_index()
150150

151151
def _rename_variables_cols(
152152
self, ts_df: pd.DataFrame, variable_id_ser: pd.Series

meteora/clients/iem.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@ def _ts_df_from_content(self, response_content: io.StringIO) -> pd.DataFrame:
226226

227227
def _post_process_ts_df(self, ts_df: pd.DataFrame) -> pd.DataFrame:
228228
# In this case:
229-
# - avoid sorting on index as data is already sorted
230229
# - avoid to_numeric as data is already numeric
231230
return ts_df
232231

meteora/clients/noaa.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,10 +220,6 @@ def _process_station_ts_df(year, station_id):
220220
)
221221
return pd.DataFrame(columns=variable_cols)
222222

223-
def _post_process_ts_df(self, ts_df: pd.DataFrame) -> pd.DataFrame:
224-
# no need to sort the index given the way the data has been requested
225-
return ts_df.apply(pd.to_numeric, axis="columns")
226-
227223
def get_ts_df(
228224
self,
229225
variables: VariablesType,

tests/test_meteora.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -692,9 +692,11 @@ def test_time_series(self):
692692
)
693693
# TODO: use "time" as `level` arg?
694694
assert is_datetime64_any_dtype(ts_df.index.get_level_values(1))
695-
# test that index is sorted (note that we need to test it as a multi-index
696-
# because otherwise the time index alone is not unique in long data frames
697-
assert ts_df.index.is_monotonic_increasing
695+
# test that index is sorted - note that we need to test it as a multi-index
696+
# for each station because (i) we do not care if stations ids are sorted and
697+
# (ii) otherwise the time index alone is not unique in long data frames
698+
for _, _ts_df in ts_df.groupby(level="station_id"):
699+
assert _ts_df.droplevel("station_id").index.is_monotonic_increasing
698700
# test index labels
699701
assert ts_df.index.names == [settings.STATIONS_ID_COL, settings.TIME_COL]
700702

0 commit comments

Comments
 (0)