Skip to content

Commit be2c977

Browse files
committed
BUG: DataFrame constructor defaulting to float dtype on empty input
1 parent 34177d6 commit be2c977

21 files changed

+66
-48
lines changed

pandas/core/construction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -652,8 +652,8 @@ def sanitize_array(
652652
data = list(data)
653653

654654
if len(data) == 0 and dtype is None:
655-
# We default to float64, matching numpy
656-
subarr = np.array([], dtype=np.float64)
655+
# We default to object, diverging from NumPy
656+
subarr = np.array([], dtype=np.object_)
657657

658658
elif dtype is not None:
659659
subarr = _try_cast(data, dtype, copy)

pandas/core/frame.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13059,16 +13059,14 @@ def quantile(
1305913059
interpolation=interpolation,
1306013060
method=method,
1306113061
)
13062-
if method == "single":
13063-
res = res_df.iloc[0]
13064-
else:
13065-
# cannot directly iloc over sparse arrays
13066-
res = res_df.T.iloc[:, 0]
13062+
res = res_df.iloc[0]
1306713063
if axis == 1 and len(self) == 0:
1306813064
# GH#41544 try to get an appropriate dtype
13069-
dtype = find_common_type(list(self.dtypes))
13070-
if needs_i8_conversion(dtype):
13071-
return res.astype(dtype)
13065+
dtype = "float64"
13066+
cdtype = find_common_type(list(self.dtypes))
13067+
if needs_i8_conversion(cdtype):
13068+
dtype = cdtype
13069+
return res.astype(dtype)
1307213070
return res
1307313071

1307413072
q = Index(q, dtype=np.float64)

pandas/core/groupby/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ def _transform_general(
578578
concatenated = concat(results, ignore_index=True)
579579
result = self._set_result_index_ordered(concatenated)
580580
else:
581-
result = self.obj._constructor(dtype=np.float64)
581+
result = self.obj._constructor(dtype=self.obj.dtype)
582582

583583
result.name = self.obj.name
584584
return result

pandas/core/internals/managers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1778,7 +1778,7 @@ def as_array(
17781778
passed_nan = lib.is_float(na_value) and isna(na_value)
17791779

17801780
if len(self.blocks) == 0:
1781-
arr = np.empty(self.shape, dtype=float)
1781+
arr = np.empty(self.shape, dtype=object)
17821782
return arr.transpose()
17831783

17841784
if self.is_single_block:

pandas/tests/arrays/categorical/test_missing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,9 @@ def test_compare_categorical_with_missing(self, a1, a2, categories):
122122
"na_value, dtype",
123123
[
124124
(pd.NaT, "datetime64[ns]"),
125-
(None, "float64"),
125+
(None, "object"),
126126
(np.nan, "float64"),
127-
(pd.NA, "float64"),
127+
(pd.NA, "object"),
128128
],
129129
)
130130
def test_categorical_only_missing_values_no_cast(self, na_value, dtype):

pandas/tests/frame/methods/test_quantile.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_quantile(self, datetime_frame, interp_method, request):
8181
def test_empty(self, interp_method):
8282
interpolation, method = interp_method
8383
q = DataFrame({"x": [], "y": []}).quantile(
84-
0.1, axis=0, numeric_only=True, interpolation=interpolation, method=method
84+
0.1, axis=0, interpolation=interpolation, method=method
8585
)
8686
assert np.isnan(q["x"]) and np.isnan(q["y"])
8787

@@ -319,8 +319,11 @@ def test_quantile_multi_empty(self, interp_method):
319319
result = DataFrame({"x": [], "y": []}).quantile(
320320
[0.1, 0.9], axis=0, interpolation=interpolation, method=method
321321
)
322+
dtype = "float64" if method == "single" else "object"
322323
expected = DataFrame(
323-
{"x": [np.nan, np.nan], "y": [np.nan, np.nan]}, index=[0.1, 0.9]
324+
{"x": [np.nan, np.nan], "y": [np.nan, np.nan]},
325+
index=[0.1, 0.9],
326+
dtype=dtype,
324327
)
325328
tm.assert_frame_equal(result, expected)
326329

pandas/tests/frame/methods/test_reindex.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def test_setitem_reset_index_dtypes(self):
7777
df1["d"] = []
7878
result = df1.reset_index()
7979
expected = DataFrame(columns=["a", "b", "c", "d"], index=range(0)).astype(
80-
{"a": "datetime64[ns]", "b": np.int64, "c": np.float64, "d": np.float64}
80+
{"a": "datetime64[ns]", "b": np.int64, "c": np.float64, "d": np.object_}
8181
)
8282
tm.assert_frame_equal(result, expected)
8383

pandas/tests/frame/test_reductions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1627,7 +1627,7 @@ def test_min_max_dt64_api_consistency_empty_df(self):
16271627
# check DataFrame/Series api consistency when calling min/max on an empty
16281628
# DataFrame/Series.
16291629
df = DataFrame({"x": []})
1630-
expected_float_series = Series([], dtype=float)
1630+
expected_float_series = Series([], dtype=object)
16311631
# check axis 0
16321632
assert np.isnan(df.min(axis=0).x) == np.isnan(expected_float_series.min())
16331633
assert np.isnan(df.max(axis=0).x) == np.isnan(expected_float_series.max())

pandas/tests/frame/test_stack_unstack.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,11 +1418,12 @@ def test_stack_timezone_aware_values(future_stack):
14181418
def test_stack_empty_frame(dropna, future_stack):
14191419
# GH 36113
14201420
levels = [np.array([], dtype=np.int64), np.array([], dtype=np.int64)]
1421-
expected = Series(dtype=np.float64, index=MultiIndex(levels=levels, codes=[[], []]))
1421+
expected = Series(dtype=np.object_, index=MultiIndex(levels=levels, codes=[[], []]))
14221422
if future_stack and dropna is not lib.no_default:
14231423
with pytest.raises(ValueError, match="dropna must be unspecified"):
14241424
DataFrame(dtype=np.float64).stack(dropna=dropna, future_stack=future_stack)
14251425
else:
1426+
# dtype=np.float64 is lost since there are no columns
14261427
result = DataFrame(dtype=np.float64).stack(
14271428
dropna=dropna, future_stack=future_stack
14281429
)
@@ -1612,7 +1613,9 @@ def test_unstack(self, multiindex_year_month_day_dataframe_random_data):
16121613
(
16131614
[[1, 1, None, None, 30.0], [2, None, None, None, 30.0]],
16141615
["ix1", "ix2", "col1", "col2", "col3"],
1615-
None,
1616+
# Nones are used as floats in the presence of numeric data,
1617+
# resulting in np.nan for index level 1.
1618+
np.nan,
16161619
[None, None, 30.0],
16171620
),
16181621
],
@@ -1624,10 +1627,12 @@ def test_unstack_partial(
16241627
# https://github.com/pandas-dev/pandas/issues/19351
16251628
# make sure DataFrame.unstack() works when its run on a subset of the DataFrame
16261629
# and the Index levels contain values that are not present in the subset
1627-
result = DataFrame(result_rows, columns=result_columns).set_index(
1628-
["ix1", "ix2"]
1630+
data = (
1631+
DataFrame(result_rows, columns=result_columns)
1632+
.set_index(["ix1", "ix2"])
1633+
.iloc[1:2]
16291634
)
1630-
result = result.iloc[1:2].unstack("ix2")
1635+
result = data.unstack("ix2")
16311636
expected = DataFrame(
16321637
[expected_row],
16331638
columns=MultiIndex.from_product(

pandas/tests/groupby/methods/test_quantile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def test_quantile_missing_group_values_no_segfaults():
192192
([1.0, np.nan, 2.0, 2.0], range(4), [1.0, 2.0], [0.0, 2.5]),
193193
(["a", "b", "b", np.nan], range(4), ["a", "b"], [0, 1.5]),
194194
([0], [42], [0], [42.0]),
195-
([], [], np.array([], dtype="float64"), np.array([], dtype="float64")),
195+
([], np.array([], dtype="float64"), [], np.array([], dtype="float64")),
196196
],
197197
)
198198
def test_quantile_missing_group_values_correct_results(

0 commit comments

Comments
 (0)