Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 25 additions & 45 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10893,61 +10893,41 @@ def infer(x):

def _append_internal(
self,
other,
other: Series,
ignore_index: bool = False,
verify_integrity: bool = False,
sort: bool = False,
) -> DataFrame:
if isinstance(other, (Series, dict)):
if isinstance(other, dict):
if not ignore_index:
raise TypeError("Can only append a dict if ignore_index=True")
other = Series(other)
if other.name is None and not ignore_index:
raise TypeError(
"Can only append a Series if ignore_index=True "
"or if the Series has a name"
)
assert isinstance(other, Series), type(other)

index = Index(
[other.name],
name=(
self.index.names
if isinstance(self.index, MultiIndex)
else self.index.name
),
if other.name is None and not ignore_index:
raise TypeError(
"Can only append a Series if ignore_index=True "
"or if the Series has a name"
)
row_df = other.to_frame().T
if isinstance(self.index.dtype, ExtensionDtype):
# GH#41626 retain e.g. CategoricalDtype if reached via
# df.loc[key] = item
row_df.index = self.index.array._cast_pointwise_result(
row_df.index._values
)

# infer_objects is needed for
# test_append_empty_frame_to_series_with_dateutil_tz
other = row_df.infer_objects().rename_axis(index.names)
elif isinstance(other, list):
if not other:
pass
elif not isinstance(other[0], DataFrame):
other = DataFrame(other)
if self.index.name is not None and not ignore_index:
other.index.name = self.index.name
index = Index(
[other.name],
name=(
self.index.names
if isinstance(self.index, MultiIndex)
else self.index.name
),
)

from pandas.core.reshape.concat import concat
row_df = other.to_frame().T
if isinstance(self.index.dtype, ExtensionDtype):
# GH#41626 retain e.g. CategoricalDtype if reached via
# df.loc[key] = item
row_df.index = self.index.array._cast_pointwise_result(row_df.index._values)

if isinstance(other, (list, tuple)):
to_concat = [self, *other]
else:
to_concat = [self, other]
# infer_objects is needed for
# test_append_empty_frame_to_series_with_dateutil_tz
row_df = row_df.infer_objects().rename_axis(index.names)

from pandas.core.reshape.concat import concat

result = concat(
to_concat,
[self, row_df],
ignore_index=ignore_index,
verify_integrity=verify_integrity,
sort=sort,
)
return result.__finalize__(self, method="append")

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ def _add_margins(
margin_dummy[cols] = margin_dummy[cols].apply(
maybe_downcast_to_dtype, args=(dtype,)
)
result = result._append_internal(margin_dummy)
result = concat([result, margin_dummy])
result.index.names = row_names

return result
Expand Down
16 changes: 2 additions & 14 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2989,22 +2989,10 @@ def searchsorted( # type: ignore[override]
# -------------------------------------------------------------------
# Combination

def _append_internal(
self, to_append, ignore_index: bool = False, verify_integrity: bool = False
):
def _append_internal(self, to_append: Series, ignore_index: bool = False) -> Series:
from pandas.core.reshape.concat import concat

if isinstance(to_append, (list, tuple)):
to_concat = [self]
to_concat.extend(to_append)
else:
to_concat = [self, to_append]
if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]):
msg = "to_append should be a Series or list/tuple of Series, got DataFrame"
raise TypeError(msg)
return concat(
to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity
)
return concat([self, to_append], ignore_index=ignore_index)

@doc(
_shared_docs["compare"],
Expand Down
71 changes: 23 additions & 48 deletions pandas/tests/reshape/concat/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,23 @@ def test_append(self, sort, float_frame):
begin_frame = float_frame.reindex(begin_index)
end_frame = float_frame.reindex(end_index)

appended = begin_frame._append_internal(end_frame)
appended = concat([begin_frame, end_frame])
tm.assert_almost_equal(appended["A"], float_frame["A"])

del end_frame["A"]
partial_appended = begin_frame._append_internal(end_frame, sort=sort)
partial_appended = concat([begin_frame, end_frame], sort=sort)
assert "A" in partial_appended

partial_appended = end_frame._append_internal(begin_frame, sort=sort)
partial_appended = concat([end_frame, begin_frame], sort=sort)
assert "A" in partial_appended

# mixed type handling
appended = mixed_frame[:5]._append_internal(mixed_frame[5:])
appended = concat([mixed_frame[:5], mixed_frame[5:]])
tm.assert_frame_equal(appended, mixed_frame)

# what to test here
mixed_appended = mixed_frame[:5]._append_internal(float_frame[5:], sort=sort)
mixed_appended2 = float_frame[:5]._append_internal(mixed_frame[5:], sort=sort)
mixed_appended = concat([mixed_frame[:5], float_frame[5:]], sort=sort)
mixed_appended2 = concat([float_frame[:5], mixed_frame[5:]], sort=sort)

# all equal except 'foo' column
tm.assert_frame_equal(
Expand All @@ -55,18 +55,18 @@ def test_append(self, sort, float_frame):
def test_append_empty(self, float_frame):
empty = DataFrame()

appended = float_frame._append_internal(empty)
appended = concat([float_frame, empty])
tm.assert_frame_equal(float_frame, appended)
assert appended is not float_frame

appended = empty._append_internal(float_frame)
appended = concat([empty, float_frame])
tm.assert_frame_equal(float_frame, appended)
assert appended is not float_frame

def test_append_overlap_raises(self, float_frame):
msg = "Indexes have overlapping values"
with pytest.raises(ValueError, match=msg):
float_frame._append_internal(float_frame, verify_integrity=True)
concat([float_frame, float_frame], verify_integrity=True)

def test_append_new_columns(self):
# see gh-6129: new columns
Expand All @@ -85,7 +85,7 @@ def test_append_new_columns(self):
def test_append_length0_frame(self, sort):
df = DataFrame(columns=["A", "B", "C"])
df3 = DataFrame(index=[0, 1], columns=["A", "B"])
df5 = df._append_internal(df3, sort=sort)
df5 = concat([df, df3], sort=sort)

expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
tm.assert_frame_equal(df5, expected)
Expand All @@ -100,7 +100,7 @@ def test_append_records(self):
df1 = DataFrame(arr1)
df2 = DataFrame(arr2)

result = df1._append_internal(df2, ignore_index=True)
result = concat([df1, df2], ignore_index=True)
expected = DataFrame(np.concatenate((arr1, arr2)))
tm.assert_frame_equal(result, expected)

Expand All @@ -109,7 +109,7 @@ def test_append_sorts(self, sort):
df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3])

result = df1._append_internal(df2, sort=sort)
result = concat([df1, df2], sort=sort)

# for None / True
expected = DataFrame(
Expand All @@ -133,36 +133,18 @@ def test_append_different_columns(self, sort):
a = df[:5].loc[:, ["bools", "ints", "floats"]]
b = df[5:].loc[:, ["strings", "ints", "floats"]]

appended = a._append_internal(b, sort=sort)
appended = concat([a, b], sort=sort)
assert isna(appended["strings"][0:4]).all()
assert isna(appended["bools"][5:]).all()

def test_append_many(self, sort, float_frame):
chunks = [
float_frame[:5],
float_frame[5:10],
float_frame[10:15],
float_frame[15:],
]

result = chunks[0]._append_internal(chunks[1:])
tm.assert_frame_equal(result, float_frame)

chunks[-1] = chunks[-1].copy()
chunks[-1]["foo"] = "bar"
result = chunks[0]._append_internal(chunks[1:], sort=sort)
tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame)
assert (result["foo"][15:] == "bar").all()
assert result["foo"][:15].isna().all()

def test_append_preserve_index_name(self):
# #980
df1 = DataFrame(columns=["A", "B", "C"])
df1 = df1.set_index(["A"])
df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
df2 = df2.set_index(["A"])

result = df1._append_internal(df2)
result = concat([df1, df2])
assert result.index.name == "A"

indexes_can_append = [
Expand Down Expand Up @@ -285,7 +267,7 @@ def test_append_dtype_coerce(self, sort):
axis=1,
sort=sort,
)
result = df1._append_internal(df2, ignore_index=True, sort=sort)
result = concat([df1, df2], ignore_index=True, sort=sort)
if sort:
expected = expected[["end_time", "start_time"]]
else:
Expand All @@ -297,7 +279,7 @@ def test_append_missing_column_proper_upcast(self, sort):
df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")})
df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)})

appended = df1._append_internal(df2, ignore_index=True, sort=sort)
appended = concat([df1, df2], sort=sort)
assert appended["A"].dtype == "f8"
assert appended["B"].dtype == "O"

Expand All @@ -323,27 +305,20 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
result_b = result_a._append_internal(ser, ignore_index=True)
tm.assert_frame_equal(result_b, expected)

result = df._append_internal([ser, ser], ignore_index=True)
tm.assert_frame_equal(result, expected)

def test_append_empty_tz_frame_with_datetime64ns(self):
# https://github.com/pandas-dev/pandas/issues/35460
df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")

# pd.NaT gets inferred as tz-naive, so append result is tz-naive
result = df._append_internal({"a": pd.NaT}, ignore_index=True)
expected = DataFrame({"a": [pd.NaT]}, dtype=object)
tm.assert_frame_equal(result, expected)

# also test with typed value to append
df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
other = Series({"a": pd.NaT}, dtype="datetime64[ns]")
result = df._append_internal(other, ignore_index=True)
other = Series({"a": pd.NaT}, dtype="datetime64[ns]").to_frame().T
result = concat([df, other], ignore_index=True)
expected = DataFrame({"a": [pd.NaT]}, dtype=object)
tm.assert_frame_equal(result, expected)

# mismatched tz
other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]")
result = df._append_internal(other, ignore_index=True)
other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]").to_frame().T
result = concat([df, other], ignore_index=True)
expected = DataFrame({"a": [pd.NaT]}).astype(object)
tm.assert_frame_equal(result, expected)

Expand All @@ -356,7 +331,7 @@ def test_append_empty_frame_with_timedelta64ns_nat(self, dtype_str, val):
df = DataFrame(columns=["a"]).astype(dtype_str)

other = DataFrame({"a": [np.timedelta64(val, "ns")]})
result = df._append_internal(other, ignore_index=True)
result = concat([df, other])

expected = other.astype(object)
tm.assert_frame_equal(result, expected)
Expand All @@ -370,7 +345,7 @@ def test_append_frame_with_timedelta64ns_nat(self, dtype_str, val):
df = DataFrame({"a": pd.array([1], dtype=dtype_str)})

other = DataFrame({"a": [np.timedelta64(val, "ns")]})
result = df._append_internal(other, ignore_index=True)
result = concat([df, other], ignore_index=True)

expected = DataFrame({"a": [df.iloc[0, 0], other.iloc[0, 0]]}, dtype=object)
tm.assert_frame_equal(result, expected)
Loading
Loading