Merge pull request #672 from nick-fournier-rsg/annotate_bugfix

jpn-- · web-flow · commit bdc9cac2859c · 2024-02-09T11:06:56.000-06:00
added stricter joining of annotated fields
diff --git a/activitysim/core/test/test_util.py b/activitysim/core/test/test_util.py
@@ -7,7 +7,7 @@
 import pandas.testing as pdt
 import pytest
 
-from ..util import other_than, quick_loc_df, quick_loc_series, reindex
+from ..util import other_than, quick_loc_df, quick_loc_series, reindex, df_from_dict
 
 
 @pytest.fixture(scope="module")
@@ -62,3 +62,30 @@ def test_quick_loc_series():
 
     assert list(quick_loc_series(loc_list, series)) == attrib_list
     assert list(quick_loc_series(loc_list, series)) == list(series.loc[loc_list])
+
+
+def test_df_from_dict():
+
+    index = [1, 2, 3, 4, 5]
+    df = pd.DataFrame({"attrib": [1, 2, 2, 3, 1]}, index=index)
+
+    # scramble index order for one expression and not the other
+    sorted = df.eval("attrib.sort_values()")
+    not_sorted = df.eval("attrib * 1")
+
+    # check above expressions
+    pdt.assert_series_equal(
+        sorted, pd.Series([1, 1, 2, 2, 3], index=[1, 5, 2, 3, 4]), check_names=False
+    )
+    pdt.assert_series_equal(not_sorted, df.attrib, check_names=False)
+
+    # create a new dataframe from the above expressions
+    values = {"sorted": sorted, "not_sorted": not_sorted}
+    new_df = df_from_dict(values, index)
+
+    # index should become unscrambed and back to the same order as before
+    expected_df = pd.DataFrame(
+        {"sorted": [1, 2, 2, 3, 1], "not_sorted": [1, 2, 2, 3, 1]}, index=index
+    )
+
+    pdt.assert_frame_equal(new_df, expected_df)
diff --git a/activitysim/core/util.py b/activitysim/core/util.py
@@ -26,7 +26,6 @@
 
 
 def si_units(x, kind="B", digits=3, shift=1000):
-
     #       nano micro milli    kilo mega giga tera peta exa  zeta yotta
     tiers = ["n", "µ", "m", "", "K", "M", "G", "T", "P", "E", "Z", "Y"]
 
@@ -342,7 +341,6 @@ def assign_in_place(df, df2):
         # this is a hack fix for a bug in pandas.update
         # github.com/pydata/pandas/issues/4094
         for c, old_dtype in zip(common_columns, old_dtypes):
-
             # if both df and df2 column were same type, but result is not
             if (old_dtype == df2[c].dtype) and (df[c].dtype != old_dtype):
                 try:
@@ -373,7 +371,20 @@ def assign_in_place(df, df2):
     df[new_columns] = df2[new_columns]
 
 
+def reindex_if_series(values, index):
+    if index is not None:
+        return values
+
+    if isinstance(values, pd.Series):
+        assert len(set(values.index).intersection(index)) == len(index)
+
+        if all(values.index != index):
+            return values.reindex(index=index)
+
+
 def df_from_dict(values, index=None):
+    # If value object is a series and has out of order index, reindex it
+    values = {k: reindex_if_series(v, index) for k, v in values.items()}
 
     df = pd.DataFrame.from_dict(values)
     if index is not None: