Skip to content

Commit bdc9cac

Browse files
authored
Merge pull request #672 from nick-fournier-rsg/annotate_bugfix
added stricter joining of annotated fields
2 parents 5141945 + a3f60a4 commit bdc9cac

File tree

2 files changed

+41
-3
lines changed

2 files changed

+41
-3
lines changed

activitysim/core/test/test_util.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pandas.testing as pdt
88
import pytest
99

10-
from ..util import other_than, quick_loc_df, quick_loc_series, reindex
10+
from ..util import other_than, quick_loc_df, quick_loc_series, reindex, df_from_dict
1111

1212

1313
@pytest.fixture(scope="module")
@@ -62,3 +62,30 @@ def test_quick_loc_series():
6262

6363
assert list(quick_loc_series(loc_list, series)) == attrib_list
6464
assert list(quick_loc_series(loc_list, series)) == list(series.loc[loc_list])
65+
66+
67+
def test_df_from_dict():
68+
69+
index = [1, 2, 3, 4, 5]
70+
df = pd.DataFrame({"attrib": [1, 2, 2, 3, 1]}, index=index)
71+
72+
# scramble index order for one expression and not the other
73+
sorted = df.eval("attrib.sort_values()")
74+
not_sorted = df.eval("attrib * 1")
75+
76+
# check above expressions
77+
pdt.assert_series_equal(
78+
sorted, pd.Series([1, 1, 2, 2, 3], index=[1, 5, 2, 3, 4]), check_names=False
79+
)
80+
pdt.assert_series_equal(not_sorted, df.attrib, check_names=False)
81+
82+
# create a new dataframe from the above expressions
83+
values = {"sorted": sorted, "not_sorted": not_sorted}
84+
new_df = df_from_dict(values, index)
85+
86+
# index should become unscrambed and back to the same order as before
87+
expected_df = pd.DataFrame(
88+
{"sorted": [1, 2, 2, 3, 1], "not_sorted": [1, 2, 2, 3, 1]}, index=index
89+
)
90+
91+
pdt.assert_frame_equal(new_df, expected_df)

activitysim/core/util.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626

2727

2828
def si_units(x, kind="B", digits=3, shift=1000):
29-
3029
# nano micro milli kilo mega giga tera peta exa zeta yotta
3130
tiers = ["n", "µ", "m", "", "K", "M", "G", "T", "P", "E", "Z", "Y"]
3231

@@ -342,7 +341,6 @@ def assign_in_place(df, df2):
342341
# this is a hack fix for a bug in pandas.update
343342
# github.com/pydata/pandas/issues/4094
344343
for c, old_dtype in zip(common_columns, old_dtypes):
345-
346344
# if both df and df2 column were same type, but result is not
347345
if (old_dtype == df2[c].dtype) and (df[c].dtype != old_dtype):
348346
try:
@@ -373,7 +371,20 @@ def assign_in_place(df, df2):
373371
df[new_columns] = df2[new_columns]
374372

375373

374+
def reindex_if_series(values, index):
375+
if index is not None:
376+
return values
377+
378+
if isinstance(values, pd.Series):
379+
assert len(set(values.index).intersection(index)) == len(index)
380+
381+
if all(values.index != index):
382+
return values.reindex(index=index)
383+
384+
376385
def df_from_dict(values, index=None):
386+
# If value object is a series and has out of order index, reindex it
387+
values = {k: reindex_if_series(v, index) for k, v in values.items()}
377388

378389
df = pd.DataFrame.from_dict(values)
379390
if index is not None:

0 commit comments

Comments
 (0)