Skip to content

Commit 3a4b466

Browse files
Revert "perf: fix pandas PerformanceWarning caused due to frame.insert"
This reverts commit 413d41e.
1 parent fbfd4a8 commit 3a4b466

File tree

1 file changed

+24
-37
lines changed
  • packages/python/plotly/plotly/express

1 file changed

+24
-37
lines changed

packages/python/plotly/plotly/express/_core.py

Lines changed: 24 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
321321
and args["y"]
322322
and len(trace_data[[args["x"], args["y"]]].dropna()) > 1
323323
):
324+
324325
# sorting is bad but trace_specs with "trendline" have no other attrs
325326
sorted_trace_data = trace_data.sort_values(by=args["x"])
326327
y = sorted_trace_data[args["y"]].values
@@ -561,6 +562,7 @@ def set_cartesian_axis_opts(args, axis, letter, orders):
561562

562563

563564
def configure_cartesian_marginal_axes(args, fig, orders):
565+
564566
if "histogram" in [args["marginal_x"], args["marginal_y"]]:
565567
fig.layout["barmode"] = "overlay"
566568

@@ -883,8 +885,8 @@ def make_trace_spec(args, constructor, attrs, trace_patch):
883885
def make_trendline_spec(args, constructor):
884886
trace_spec = TraceSpec(
885887
constructor=go.Scattergl
886-
if constructor == go.Scattergl
887-
else go.Scatter, # could be contour
888+
if constructor == go.Scattergl # could be contour
889+
else go.Scatter,
888890
attrs=["trendline"],
889891
trace_patch=dict(mode="lines"),
890892
marginal=None,
@@ -1062,25 +1064,14 @@ def _escape_col_name(df_input, col_name, extra):
10621064
return col_name
10631065

10641066

1065-
def to_unindexed_series(x, name=None):
1067+
def to_unindexed_series(x):
10661068
"""
1067-
assuming x is list-like or even an existing pd.Series, return a new pd.DataFrame
1068-
with no index, without extracting the data from an existing Series via numpy, which
1069+
assuming x is list-like or even an existing pd.Series, return a new pd.Series with
1070+
no index, without extracting the data from an existing Series via numpy, which
10691071
seems to mangle datetime columns. Stripping the index from existing pd.Series is
1070-
required to get things to match up right in the new DataFrame we're building.
1071-
It's converted to a frame so that it can be concated easily and it contains
1072-
`columns` attribute, so `_get_cols` can be used.
1072+
required to get things to match up right in the new DataFrame we're building
10731073
"""
1074-
return pd.Series(x, name=name).reset_index(drop=True).to_frame()
1075-
1076-
1077-
def _get_cols(df_list):
1078-
"""
1079-
get all the columns in the current df_list.
1080-
Since this func is called when we raise error, the func is called once.
1081-
So inefficiency here can be tolerated.
1082-
"""
1083-
return [column for df in df_list for column in df.columns]
1074+
return pd.Series(x).reset_index(drop=True)
10841075

10851076

10861077
def process_args_into_dataframe(args, wide_mode, var_name, value_name):
@@ -1095,11 +1086,9 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
10951086
df_input = args["data_frame"]
10961087
df_provided = df_input is not None
10971088

1098-
# we use append it as list to avoid performance issues in pandas
1099-
# when dealing with large dataframes.
1100-
df_outputs = []
1101-
constants = {}
1102-
ranges = []
1089+
df_output = pd.DataFrame()
1090+
constants = dict()
1091+
ranges = list()
11031092
wide_id_vars = set()
11041093
reserved_names = _get_reserved_col_names(args) if df_provided else set()
11051094

@@ -1110,7 +1099,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
11101099
"No data were provided. Please provide data either with the `data_frame` or with the `dimensions` argument."
11111100
)
11121101
else:
1113-
df_outputs.append(df_input[df_input.columns])
1102+
df_output[df_input.columns] = df_input[df_input.columns]
11141103

11151104
# hover_data is a dict
11161105
hover_data_is_dict = (
@@ -1151,7 +1140,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
11511140
# argument_list and field_list ready, iterate over them
11521141
# Core of the loop starts here
11531142
for i, (argument, field) in enumerate(zip(argument_list, field_list)):
1154-
length = len(df_outputs[0]) if len(df_outputs) else 0
1143+
length = len(df_output)
11551144
if argument is None:
11561145
continue
11571146
col_name = None
@@ -1192,11 +1181,11 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
11921181
% (
11931182
argument,
11941183
len(real_argument),
1195-
str(_get_cols(df_outputs)),
1184+
str(list(df_output.columns)),
11961185
length,
11971186
)
11981187
)
1199-
df_outputs.append(to_unindexed_series(real_argument, col_name))
1188+
df_output[col_name] = to_unindexed_series(real_argument)
12001189
elif not df_provided:
12011190
raise ValueError(
12021191
"String or int arguments are only possible when a "
@@ -1225,13 +1214,13 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
12251214
% (
12261215
field,
12271216
len(df_input[argument]),
1228-
str(_get_cols(df_outputs)),
1217+
str(list(df_output.columns)),
12291218
length,
12301219
)
12311220
)
12321221
else:
12331222
col_name = str(argument)
1234-
df_outputs.append(to_unindexed_series(df_input[argument], col_name))
1223+
df_output[col_name] = to_unindexed_series(df_input[argument])
12351224
# ----------------- argument is likely a column / array / list.... -------
12361225
else:
12371226
if df_provided and hasattr(argument, "name"):
@@ -1258,9 +1247,9 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
12581247
"All arguments should have the same length. "
12591248
"The length of argument `%s` is %d, whereas the "
12601249
"length of previously-processed arguments %s is %d"
1261-
% (field, len(argument), str(_get_cols(df_outputs)), length)
1250+
% (field, len(argument), str(list(df_output.columns)), length)
12621251
)
1263-
df_outputs.append(to_unindexed_series(argument, str(col_name)))
1252+
df_output[str(col_name)] = to_unindexed_series(argument)
12641253

12651254
# Finally, update argument with column name now that column exists
12661255
assert col_name is not None, (
@@ -1278,14 +1267,12 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
12781267
if field_name != "wide_variable":
12791268
wide_id_vars.add(str(col_name))
12801269

1281-
length = len(df_outputs[0])
1282-
df_outputs.extend([pd.Series(range(length), name=col_name) for col_name in ranges])
1270+
for col_name in ranges:
1271+
df_output[col_name] = range(len(df_output))
12831272

1284-
df_outputs.extend(
1285-
[pd.Series(constants[col_name], name=col_name) for col_name in constants]
1286-
)
1273+
for col_name in constants:
1274+
df_output[col_name] = constants[col_name]
12871275

1288-
df_output = pd.concat(df_outputs, axis=1)
12891276
return df_output, wide_id_vars
12901277

12911278

0 commit comments

Comments
 (0)