@@ -321,6 +321,7 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
321
321
and args ["y" ]
322
322
and len (trace_data [[args ["x" ], args ["y" ]]].dropna ()) > 1
323
323
):
324
+
324
325
# sorting is bad but trace_specs with "trendline" have no other attrs
325
326
sorted_trace_data = trace_data .sort_values (by = args ["x" ])
326
327
y = sorted_trace_data [args ["y" ]].values
@@ -561,6 +562,7 @@ def set_cartesian_axis_opts(args, axis, letter, orders):
561
562
562
563
563
564
def configure_cartesian_marginal_axes (args , fig , orders ):
565
+
564
566
if "histogram" in [args ["marginal_x" ], args ["marginal_y" ]]:
565
567
fig .layout ["barmode" ] = "overlay"
566
568
@@ -883,8 +885,8 @@ def make_trace_spec(args, constructor, attrs, trace_patch):
883
885
def make_trendline_spec (args , constructor ):
884
886
trace_spec = TraceSpec (
885
887
constructor = go .Scattergl
886
- if constructor == go .Scattergl
887
- else go .Scatter , # could be contour
888
+ if constructor == go .Scattergl # could be contour
889
+ else go .Scatter ,
888
890
attrs = ["trendline" ],
889
891
trace_patch = dict (mode = "lines" ),
890
892
marginal = None ,
@@ -1062,25 +1064,14 @@ def _escape_col_name(df_input, col_name, extra):
1062
1064
return col_name
1063
1065
1064
1066
1065
- def to_unindexed_series (x , name = None ):
1067
+ def to_unindexed_series (x ):
1066
1068
"""
1067
- assuming x is list-like or even an existing pd.Series, return a new pd.DataFrame
1068
- with no index, without extracting the data from an existing Series via numpy, which
1069
+ assuming x is list-like or even an existing pd.Series, return a new pd.Series with
1070
+ no index, without extracting the data from an existing Series via numpy, which
1069
1071
seems to mangle datetime columns. Stripping the index from existing pd.Series is
1070
- required to get things to match up right in the new DataFrame we're building.
1071
- It's converted to a frame so that it can be concated easily and it contains
1072
- `columns` attribute, so `_get_cols` can be used.
1072
+ required to get things to match up right in the new DataFrame we're building
1073
1073
"""
1074
- return pd .Series (x , name = name ).reset_index (drop = True ).to_frame ()
1075
-
1076
-
1077
- def _get_cols (df_list ):
1078
- """
1079
- get all the columns in the current df_list.
1080
- Since this func is called when we raise error, the func is called once.
1081
- So inefficiency here can be tolerated.
1082
- """
1083
- return [column for df in df_list for column in df .columns ]
1074
+ return pd .Series (x ).reset_index (drop = True )
1084
1075
1085
1076
1086
1077
def process_args_into_dataframe (args , wide_mode , var_name , value_name ):
@@ -1095,11 +1086,9 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1095
1086
df_input = args ["data_frame" ]
1096
1087
df_provided = df_input is not None
1097
1088
1098
- # we use append it as list to avoid performance issues in pandas
1099
- # when dealing with large dataframes.
1100
- df_outputs = []
1101
- constants = {}
1102
- ranges = []
1089
+ df_output = pd .DataFrame ()
1090
+ constants = dict ()
1091
+ ranges = list ()
1103
1092
wide_id_vars = set ()
1104
1093
reserved_names = _get_reserved_col_names (args ) if df_provided else set ()
1105
1094
@@ -1110,7 +1099,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1110
1099
"No data were provided. Please provide data either with the `data_frame` or with the `dimensions` argument."
1111
1100
)
1112
1101
else :
1113
- df_outputs . append ( df_input [df_input .columns ])
1102
+ df_output [ df_input . columns ] = df_input [df_input .columns ]
1114
1103
1115
1104
# hover_data is a dict
1116
1105
hover_data_is_dict = (
@@ -1151,7 +1140,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1151
1140
# argument_list and field_list ready, iterate over them
1152
1141
# Core of the loop starts here
1153
1142
for i , (argument , field ) in enumerate (zip (argument_list , field_list )):
1154
- length = len (df_outputs [ 0 ]) if len ( df_outputs ) else 0
1143
+ length = len (df_output )
1155
1144
if argument is None :
1156
1145
continue
1157
1146
col_name = None
@@ -1192,11 +1181,11 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1192
1181
% (
1193
1182
argument ,
1194
1183
len (real_argument ),
1195
- str (_get_cols ( df_outputs )),
1184
+ str (list ( df_output . columns )),
1196
1185
length ,
1197
1186
)
1198
1187
)
1199
- df_outputs . append ( to_unindexed_series (real_argument , col_name ) )
1188
+ df_output [ col_name ] = to_unindexed_series (real_argument )
1200
1189
elif not df_provided :
1201
1190
raise ValueError (
1202
1191
"String or int arguments are only possible when a "
@@ -1225,13 +1214,13 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1225
1214
% (
1226
1215
field ,
1227
1216
len (df_input [argument ]),
1228
- str (_get_cols ( df_outputs )),
1217
+ str (list ( df_output . columns )),
1229
1218
length ,
1230
1219
)
1231
1220
)
1232
1221
else :
1233
1222
col_name = str (argument )
1234
- df_outputs . append ( to_unindexed_series (df_input [argument ], col_name ) )
1223
+ df_output [ col_name ] = to_unindexed_series (df_input [argument ])
1235
1224
# ----------------- argument is likely a column / array / list.... -------
1236
1225
else :
1237
1226
if df_provided and hasattr (argument , "name" ):
@@ -1258,9 +1247,9 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1258
1247
"All arguments should have the same length. "
1259
1248
"The length of argument `%s` is %d, whereas the "
1260
1249
"length of previously-processed arguments %s is %d"
1261
- % (field , len (argument ), str (_get_cols ( df_outputs )), length )
1250
+ % (field , len (argument ), str (list ( df_output . columns )), length )
1262
1251
)
1263
- df_outputs . append ( to_unindexed_series ( argument , str (col_name )) )
1252
+ df_output [ str (col_name )] = to_unindexed_series ( argument )
1264
1253
1265
1254
# Finally, update argument with column name now that column exists
1266
1255
assert col_name is not None , (
@@ -1278,14 +1267,12 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
1278
1267
if field_name != "wide_variable" :
1279
1268
wide_id_vars .add (str (col_name ))
1280
1269
1281
- length = len ( df_outputs [ 0 ])
1282
- df_outputs . extend ([ pd . Series ( range ( length ), name = col_name ) for col_name in ranges ] )
1270
+ for col_name in ranges :
1271
+ df_output [ col_name ] = range ( len ( df_output ) )
1283
1272
1284
- df_outputs .extend (
1285
- [pd .Series (constants [col_name ], name = col_name ) for col_name in constants ]
1286
- )
1273
+ for col_name in constants :
1274
+ df_output [col_name ] = constants [col_name ]
1287
1275
1288
- df_output = pd .concat (df_outputs , axis = 1 )
1289
1276
return df_output , wide_id_vars
1290
1277
1291
1278
0 commit comments