Skip to content

Commit 98f4ddd

Browse files
authored
Merge pull request #203 from ehinman/order-rows
Add row ordering by `time` and `monitoring_location_id`, if applicable, plus...
2 parents 50466c8 + 7c66421 commit 98f4ddd

File tree

2 files changed

+53
-3
lines changed

2 files changed

+53
-3
lines changed

dataretrieval/waterdata/utils.py

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -662,9 +662,24 @@ def _arrange_cols(
662662
plural = output_id.replace("_id", "s_id")
663663
if plural in properties:
664664
df = df.rename(columns={"id": plural})
665-
return df.loc[:, [col for col in properties if col in df.columns]]
665+
df = df.loc[:, [col for col in properties if col in df.columns]]
666666
else:
667-
return df.rename(columns={"id": output_id})
667+
df = df.rename(columns={"id": output_id})
668+
669+
# Move meaningless-to-user, extra id columns to the end
670+
# of the dataframe, if they exist
671+
extra_id_cols = set(df.columns).intersection({
672+
"latest_continuous_id",
673+
"latest_daily_id",
674+
"daily_id",
675+
"continuous_id",
676+
"field_measurement_id"
677+
})
678+
if extra_id_cols:
679+
id_col_order = [col for col in df.columns if col not in extra_id_cols] + list(extra_id_cols)
680+
df = df.loc[:, id_col_order]
681+
682+
return df
668683

669684

670685
def _type_cols(df: pd.DataFrame) -> pd.DataFrame:
@@ -712,6 +727,36 @@ def _type_cols(df: pd.DataFrame) -> pd.DataFrame:
712727
return df
713728

714729

730+
def _sort_rows(df: pd.DataFrame) -> pd.DataFrame:
731+
"""
732+
Sorts rows by 'time' and 'monitoring_location_id' columns if they
733+
exist.
734+
735+
Parameters
736+
----------
737+
df : pd.DataFrame
738+
The input DataFrame containing water data.
739+
740+
Returns
741+
-------
742+
pd.DataFrame
743+
The DataFrame with rows ordered by time and site.
744+
745+
"""
746+
if "time" in df.columns and "monitoring_location_id" in df.columns:
747+
df = df.sort_values(
748+
by=["time", "monitoring_location_id"],
749+
ignore_index=True
750+
)
751+
elif "time" in df.columns:
752+
df = df.sort_values(
753+
by="time",
754+
ignore_index=True
755+
)
756+
757+
return df
758+
759+
715760
def get_ogc_data(
716761
args: Dict[str, Any], output_id: str, service: str
717762
) -> Tuple[pd.DataFrame, BaseMetadata]:
@@ -769,7 +814,10 @@ def get_ogc_data(
769814
return_list = _type_cols(return_list)
770815

771816
return_list = _arrange_cols(return_list, properties, output_id)
817+
818+
return_list = _sort_rows(return_list)
772819
# Create metadata object from response
773820
metadata = BaseMetadata(response)
774821
return return_list, metadata
775822

823+

tests/waterdata_test.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,18 @@ def test_get_daily():
122122
)
123123
assert "daily_id" in df.columns
124124
assert "geometry" in df.columns
125+
assert df.columns[-1] == "daily_id"
125126
assert df.shape[1] == 12
126127
assert df.parameter_code.unique().tolist() == ["00060"]
127128
assert df.monitoring_location_id.unique().tolist() == ["USGS-05427718"]
128129
assert df["time"].apply(lambda x: isinstance(x, datetime.date)).all()
130+
assert df["time"].iloc[0] < df["time"].iloc[-1]
129131
assert hasattr(md, 'url')
130132
assert hasattr(md, 'query_time')
131133
assert df["value"].dtype == "float64"
132134

133135
def test_get_daily_properties():
134-
df, md = get_daily(
136+
df,_ = get_daily(
135137
monitoring_location_id="USGS-05427718",
136138
parameter_code="00060",
137139
time="2025-01-01/..",

0 commit comments

Comments
 (0)