@@ -934,88 +934,16 @@ def _is_col_list(df_input, arg):
934
934
return True
935
935
936
936
937
- def build_dataframe (args , constructor ):
937
+ def process_args_into_dataframe (args , wide_mode , var_name ):
938
938
"""
939
- Constructs a dataframe and modifies `args` in-place.
940
-
941
- The argument values in `args` can be either strings corresponding to
942
- existing columns of a dataframe, or data arrays (lists, numpy arrays,
943
- pandas columns, series).
944
-
945
- Parameters
946
- ----------
947
- args : OrderedDict
948
- arguments passed to the px function and subsequently modified
949
- constructor : graph_object trace class
950
- the trace type selected for this figure
939
+ After this function runs, the `all_attrables` keys of `args` all contain only
940
+ references to columns of `df_output`. This function handles the extraction of data
941
+ from `args["attrable"]` and column-name-generation as appropriate, and adds the
942
+ data to `df_output` and then replaces `args["attrable"]` with the appropriate
943
+ reference.
951
944
"""
952
-
953
- # make copies of all the fields via dict() and list()
954
- for field in args :
955
- if field in array_attrables and args [field ] is not None :
956
- args [field ] = (
957
- dict (args [field ])
958
- if isinstance (args [field ], dict )
959
- else list (args [field ])
960
- )
961
-
962
- # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.)
963
- df_provided = args ["data_frame" ] is not None
964
- if df_provided and not isinstance (args ["data_frame" ], pd .DataFrame ):
965
- args ["data_frame" ] = pd .DataFrame (args ["data_frame" ])
966
945
df_input = args ["data_frame" ]
967
-
968
- no_x = args .get ("x" , None ) is None
969
- no_y = args .get ("y" , None ) is None
970
- wide_x = False if no_x else _is_col_list (df_input , args ["x" ])
971
- wide_y = False if no_y else _is_col_list (df_input , args ["y" ])
972
-
973
- wide_mode = False
974
- if constructor in [go .Scatter , go .Bar , go .Violin , go .Box , go .Histogram ]:
975
- wide_cross_name = None
976
- if wide_x and wide_y :
977
- raise ValueError (
978
- "Cannot accept list of column references or list of columns for both `x` and `y`."
979
- )
980
- if df_provided and no_x and no_y :
981
- wide_mode = True
982
- args ["_column_" ] = list (df_input .columns )
983
- var_name = df_input .columns .name or "_column_"
984
- wide_orientation = args .get ("orientation" , None ) or "v"
985
- args ["orientation" ] = wide_orientation
986
- args ["wide_cross" ] = None
987
- elif wide_x != wide_y :
988
- wide_mode = True
989
- args ["_column_" ] = args ["y" ] if wide_y else args ["x" ]
990
- var_name = "_column_"
991
- if constructor == go .Histogram :
992
- wide_orientation = "v" if wide_x else "h"
993
- else :
994
- wide_orientation = "v" if wide_y else "h"
995
- args ["y" if wide_y else "x" ] = None
996
- args ["wide_cross" ] = None
997
- if not no_x and not no_y :
998
- wide_cross_name = "__x__" if wide_y else "__y__"
999
-
1000
- missing_bar_dim = None
1001
- if constructor in [go .Scatter , go .Bar ]:
1002
- if not wide_mode and (no_x != no_y ):
1003
- for ax in ["x" , "y" ]:
1004
- if args .get (ax , None ) is None :
1005
- args [ax ] = df_input .index if df_provided else Range ()
1006
- if constructor == go .Scatter :
1007
- if args ["orientation" ] is None :
1008
- args ["orientation" ] = "v" if ax == "x" else "h"
1009
- if constructor == go .Bar :
1010
- missing_bar_dim = ax
1011
- if wide_mode and wide_cross_name is None :
1012
- if df_provided :
1013
- args ["wide_cross" ] = df_input .index
1014
- wide_cross_name = df_input .index .name or "index"
1015
- else :
1016
- args ["wide_cross" ] = Range (label = "index" )
1017
- wide_cross_name = "index"
1018
-
946
+ df_provided = df_input is not None
1019
947
df_output = pd .DataFrame ()
1020
948
constants = dict ()
1021
949
ranges = list ()
@@ -1031,7 +959,6 @@ def build_dataframe(args, constructor):
1031
959
else :
1032
960
df_output [df_input .columns ] = df_input [df_input .columns ]
1033
961
1034
-
1035
962
# Loop over possible arguments
1036
963
for field_name in all_attrables :
1037
964
# Massaging variables
@@ -1158,6 +1085,108 @@ def build_dataframe(args, constructor):
1158
1085
if field_name != "_column_" :
1159
1086
wide_id_vars .add (str (col_name ))
1160
1087
1088
+ for col_name in ranges :
1089
+ df_output [col_name ] = range (len (df_output ))
1090
+
1091
+ for col_name in constants :
1092
+ df_output [col_name ] = constants [col_name ]
1093
+
1094
+ return df_output , wide_id_vars
1095
+
1096
+
1097
+ def build_dataframe (args , constructor ):
1098
+ """
1099
+ Constructs a dataframe and modifies `args` in-place.
1100
+
1101
+ The argument values in `args` can be either strings corresponding to
1102
+ existing columns of a dataframe, or data arrays (lists, numpy arrays,
1103
+ pandas columns, series).
1104
+
1105
+ Parameters
1106
+ ----------
1107
+ args : OrderedDict
1108
+ arguments passed to the px function and subsequently modified
1109
+ constructor : graph_object trace class
1110
+ the trace type selected for this figure
1111
+ """
1112
+
1113
+ # make copies of all the fields via dict() and list()
1114
+ for field in args :
1115
+ if field in array_attrables and args [field ] is not None :
1116
+ args [field ] = (
1117
+ dict (args [field ])
1118
+ if isinstance (args [field ], dict )
1119
+ else list (args [field ])
1120
+ )
1121
+
1122
+ # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.)
1123
+ df_provided = args ["data_frame" ] is not None
1124
+ if df_provided and not isinstance (args ["data_frame" ], pd .DataFrame ):
1125
+ args ["data_frame" ] = pd .DataFrame (args ["data_frame" ])
1126
+ df_input = args ["data_frame" ]
1127
+
1128
+ # now we handle special cases like wide-mode or x-xor-y specification
1129
+ # by rearranging args to tee things up for process_args_into_dataframe to work
1130
+ no_x = args .get ("x" , None ) is None
1131
+ no_y = args .get ("y" , None ) is None
1132
+ wide_x = False if no_x else _is_col_list (df_input , args ["x" ])
1133
+ wide_y = False if no_y else _is_col_list (df_input , args ["y" ])
1134
+
1135
+ wide_mode = False
1136
+ var_name = None
1137
+ if constructor in [go .Scatter , go .Bar , go .Violin , go .Box , go .Histogram ]:
1138
+ wide_cross_name = None
1139
+ if wide_x and wide_y :
1140
+ raise ValueError (
1141
+ "Cannot accept list of column references or list of columns for both `x` and `y`."
1142
+ )
1143
+ if df_provided and no_x and no_y :
1144
+ wide_mode = True
1145
+ args ["_column_" ] = list (df_input .columns )
1146
+ var_name = df_input .columns .name or "_column_"
1147
+ wide_orientation = args .get ("orientation" , None ) or "v"
1148
+ args ["orientation" ] = wide_orientation
1149
+ args ["wide_cross" ] = None
1150
+ elif wide_x != wide_y :
1151
+ wide_mode = True
1152
+ args ["_column_" ] = args ["y" ] if wide_y else args ["x" ]
1153
+ var_name = "_column_"
1154
+ if constructor == go .Histogram :
1155
+ wide_orientation = "v" if wide_x else "h"
1156
+ else :
1157
+ wide_orientation = "v" if wide_y else "h"
1158
+ args ["y" if wide_y else "x" ] = None
1159
+ args ["wide_cross" ] = None
1160
+ if not no_x and not no_y :
1161
+ wide_cross_name = "__x__" if wide_y else "__y__"
1162
+
1163
+ missing_bar_dim = None
1164
+ if constructor in [go .Scatter , go .Bar ]:
1165
+ if not wide_mode and (no_x != no_y ):
1166
+ for ax in ["x" , "y" ]:
1167
+ if args .get (ax , None ) is None :
1168
+ args [ax ] = df_input .index if df_provided else Range ()
1169
+ if constructor == go .Scatter :
1170
+ if args ["orientation" ] is None :
1171
+ args ["orientation" ] = "v" if ax == "x" else "h"
1172
+ if constructor == go .Bar :
1173
+ missing_bar_dim = ax
1174
+ if wide_mode and wide_cross_name is None :
1175
+ if df_provided :
1176
+ args ["wide_cross" ] = df_input .index
1177
+ wide_cross_name = df_input .index .name or "index"
1178
+ else :
1179
+ args ["wide_cross" ] = Range (label = "index" )
1180
+ wide_cross_name = "index"
1181
+
1182
+ # now that things have been prepped, we do the systematic rewriting of `args`
1183
+
1184
+ df_output , wide_id_vars = process_args_into_dataframe (args , wide_mode , var_name )
1185
+
1186
+ # now that `df_output` exists and `args` contains only references, we complete
1187
+ # the special-case and wide-mode handling by further rewriting args and/or mutating
1188
+ # df_output
1189
+
1161
1190
if not wide_mode and missing_bar_dim and constructor == go .Bar :
1162
1191
# now that we've populated df_output, we check to see if the non-missing
1163
1192
# dimension is categorical: if so, then setting the missing dimension to a
@@ -1166,20 +1195,17 @@ def build_dataframe(args, constructor):
1166
1195
other_dim = "x" if missing_bar_dim == "y" else "y"
1167
1196
if not _is_continuous (df_output , args [other_dim ]):
1168
1197
args [missing_bar_dim ] = "_count_"
1169
- constants ["_count_" ] = 1
1198
+ df_output ["_count_" ] = 1
1170
1199
else :
1171
1200
# on the other hand, if the non-missing dimension is continuous, then we
1172
1201
# can use this information to override the normal auto-orientation code
1173
1202
if args ["orientation" ] is None :
1174
1203
args ["orientation" ] = "v" if missing_bar_dim == "x" else "h"
1175
1204
1176
- for col_name in ranges :
1177
- df_output [col_name ] = range (len (df_output ))
1178
-
1179
- for col_name in constants :
1180
- df_output [col_name ] = constants [col_name ]
1181
-
1182
1205
if wide_mode :
1206
+ # at this point, `df_output` is semi-long/semi-wide, but we know which columns
1207
+ # are which, so we melt it and reassign `args` to refer to the newly-tidy
1208
+ # columns, keeping track of various names and manglings set up above
1183
1209
wide_value_vars = [c for c in args ["_column_" ] if c not in wide_id_vars ]
1184
1210
del args ["_column_" ]
1185
1211
del args ["wide_cross" ]
0 commit comments