Skip to content

Commit 553bd0b

Browse files
wip
1 parent 977dbcf commit 553bd0b

File tree

5 files changed

+149
-89
lines changed

5 files changed

+149
-89
lines changed

packages/python/plotly/plotly/data/__init__.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,31 @@ def carshare():
102102
return _get_dataset("carshare")
103103

104104

105-
def _get_dataset(d):
105+
def timeseries():
106+
"""
107+
Each row in this wide dataset represents values from 6 random walk time-series. The
108+
index contains dates.
109+
110+
Returns:
111+
A `pandas.DataFrame` with 100 rows and the following columns:
112+
`['MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`.
113+
"""
114+
return _get_dataset("timeseries", index_col=0)
115+
116+
117+
def experiment():
118+
"""
119+
Each row in this wide dataset represents the results of 100 simulated participants
120+
on three hypothetical experiments, along with their gender and smoker status.
121+
122+
Returns:
123+
A `pandas.DataFrame` with 100 rows and the following columns:
124+
`['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'smoker']`.
125+
"""
126+
return _get_dataset("experiment")
127+
128+
129+
def _get_dataset(d, index_col=None):
106130
import pandas
107131
import os
108132

@@ -112,5 +136,6 @@ def _get_dataset(d):
112136
"package_data",
113137
"datasets",
114138
d + ".csv.gz",
115-
)
139+
),
140+
index_col=index_col,
116141
)

packages/python/plotly/plotly/express/_core.py

Lines changed: 113 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -934,88 +934,16 @@ def _is_col_list(df_input, arg):
934934
return True
935935

936936

937-
def build_dataframe(args, constructor):
937+
def process_args_into_dataframe(args, wide_mode, var_name):
938938
"""
939-
Constructs a dataframe and modifies `args` in-place.
940-
941-
The argument values in `args` can be either strings corresponding to
942-
existing columns of a dataframe, or data arrays (lists, numpy arrays,
943-
pandas columns, series).
944-
945-
Parameters
946-
----------
947-
args : OrderedDict
948-
arguments passed to the px function and subsequently modified
949-
constructor : graph_object trace class
950-
the trace type selected for this figure
939+
After this function runs, the `all_attrables` keys of `args` all contain only
940+
references to columns of `df_output`. This function handles the extraction of data
941+
from `args["attrable"]` and column-name-generation as appropriate, and adds the
942+
data to `df_output` and then replaces `args["attrable"]` with the appropriate
943+
reference.
951944
"""
952-
953-
# make copies of all the fields via dict() and list()
954-
for field in args:
955-
if field in array_attrables and args[field] is not None:
956-
args[field] = (
957-
dict(args[field])
958-
if isinstance(args[field], dict)
959-
else list(args[field])
960-
)
961-
962-
# Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.)
963-
df_provided = args["data_frame"] is not None
964-
if df_provided and not isinstance(args["data_frame"], pd.DataFrame):
965-
args["data_frame"] = pd.DataFrame(args["data_frame"])
966945
df_input = args["data_frame"]
967-
968-
no_x = args.get("x", None) is None
969-
no_y = args.get("y", None) is None
970-
wide_x = False if no_x else _is_col_list(df_input, args["x"])
971-
wide_y = False if no_y else _is_col_list(df_input, args["y"])
972-
973-
wide_mode = False
974-
if constructor in [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram]:
975-
wide_cross_name = None
976-
if wide_x and wide_y:
977-
raise ValueError(
978-
"Cannot accept list of column references or list of columns for both `x` and `y`."
979-
)
980-
if df_provided and no_x and no_y:
981-
wide_mode = True
982-
args["_column_"] = list(df_input.columns)
983-
var_name = df_input.columns.name or "_column_"
984-
wide_orientation = args.get("orientation", None) or "v"
985-
args["orientation"] = wide_orientation
986-
args["wide_cross"] = None
987-
elif wide_x != wide_y:
988-
wide_mode = True
989-
args["_column_"] = args["y"] if wide_y else args["x"]
990-
var_name = "_column_"
991-
if constructor == go.Histogram:
992-
wide_orientation = "v" if wide_x else "h"
993-
else:
994-
wide_orientation = "v" if wide_y else "h"
995-
args["y" if wide_y else "x"] = None
996-
args["wide_cross"] = None
997-
if not no_x and not no_y:
998-
wide_cross_name = "__x__" if wide_y else "__y__"
999-
1000-
missing_bar_dim = None
1001-
if constructor in [go.Scatter, go.Bar]:
1002-
if not wide_mode and (no_x != no_y):
1003-
for ax in ["x", "y"]:
1004-
if args.get(ax, None) is None:
1005-
args[ax] = df_input.index if df_provided else Range()
1006-
if constructor == go.Scatter:
1007-
if args["orientation"] is None:
1008-
args["orientation"] = "v" if ax == "x" else "h"
1009-
if constructor == go.Bar:
1010-
missing_bar_dim = ax
1011-
if wide_mode and wide_cross_name is None:
1012-
if df_provided:
1013-
args["wide_cross"] = df_input.index
1014-
wide_cross_name = df_input.index.name or "index"
1015-
else:
1016-
args["wide_cross"] = Range(label="index")
1017-
wide_cross_name = "index"
1018-
946+
df_provided = df_input is not None
1019947
df_output = pd.DataFrame()
1020948
constants = dict()
1021949
ranges = list()
@@ -1031,7 +959,6 @@ def build_dataframe(args, constructor):
1031959
else:
1032960
df_output[df_input.columns] = df_input[df_input.columns]
1033961

1034-
1035962
# Loop over possible arguments
1036963
for field_name in all_attrables:
1037964
# Massaging variables
@@ -1158,6 +1085,108 @@ def build_dataframe(args, constructor):
11581085
if field_name != "_column_":
11591086
wide_id_vars.add(str(col_name))
11601087

1088+
for col_name in ranges:
1089+
df_output[col_name] = range(len(df_output))
1090+
1091+
for col_name in constants:
1092+
df_output[col_name] = constants[col_name]
1093+
1094+
return df_output, wide_id_vars
1095+
1096+
1097+
def build_dataframe(args, constructor):
1098+
"""
1099+
Constructs a dataframe and modifies `args` in-place.
1100+
1101+
The argument values in `args` can be either strings corresponding to
1102+
existing columns of a dataframe, or data arrays (lists, numpy arrays,
1103+
pandas columns, series).
1104+
1105+
Parameters
1106+
----------
1107+
args : OrderedDict
1108+
arguments passed to the px function and subsequently modified
1109+
constructor : graph_object trace class
1110+
the trace type selected for this figure
1111+
"""
1112+
1113+
# make copies of all the fields via dict() and list()
1114+
for field in args:
1115+
if field in array_attrables and args[field] is not None:
1116+
args[field] = (
1117+
dict(args[field])
1118+
if isinstance(args[field], dict)
1119+
else list(args[field])
1120+
)
1121+
1122+
# Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.)
1123+
df_provided = args["data_frame"] is not None
1124+
if df_provided and not isinstance(args["data_frame"], pd.DataFrame):
1125+
args["data_frame"] = pd.DataFrame(args["data_frame"])
1126+
df_input = args["data_frame"]
1127+
1128+
# now we handle special cases like wide-mode or x-xor-y specification
1129+
# by rearranging args to tee things up for process_args_into_dataframe to work
1130+
no_x = args.get("x", None) is None
1131+
no_y = args.get("y", None) is None
1132+
wide_x = False if no_x else _is_col_list(df_input, args["x"])
1133+
wide_y = False if no_y else _is_col_list(df_input, args["y"])
1134+
1135+
wide_mode = False
1136+
var_name = None
1137+
if constructor in [go.Scatter, go.Bar, go.Violin, go.Box, go.Histogram]:
1138+
wide_cross_name = None
1139+
if wide_x and wide_y:
1140+
raise ValueError(
1141+
"Cannot accept list of column references or list of columns for both `x` and `y`."
1142+
)
1143+
if df_provided and no_x and no_y:
1144+
wide_mode = True
1145+
args["_column_"] = list(df_input.columns)
1146+
var_name = df_input.columns.name or "_column_"
1147+
wide_orientation = args.get("orientation", None) or "v"
1148+
args["orientation"] = wide_orientation
1149+
args["wide_cross"] = None
1150+
elif wide_x != wide_y:
1151+
wide_mode = True
1152+
args["_column_"] = args["y"] if wide_y else args["x"]
1153+
var_name = "_column_"
1154+
if constructor == go.Histogram:
1155+
wide_orientation = "v" if wide_x else "h"
1156+
else:
1157+
wide_orientation = "v" if wide_y else "h"
1158+
args["y" if wide_y else "x"] = None
1159+
args["wide_cross"] = None
1160+
if not no_x and not no_y:
1161+
wide_cross_name = "__x__" if wide_y else "__y__"
1162+
1163+
missing_bar_dim = None
1164+
if constructor in [go.Scatter, go.Bar]:
1165+
if not wide_mode and (no_x != no_y):
1166+
for ax in ["x", "y"]:
1167+
if args.get(ax, None) is None:
1168+
args[ax] = df_input.index if df_provided else Range()
1169+
if constructor == go.Scatter:
1170+
if args["orientation"] is None:
1171+
args["orientation"] = "v" if ax == "x" else "h"
1172+
if constructor == go.Bar:
1173+
missing_bar_dim = ax
1174+
if wide_mode and wide_cross_name is None:
1175+
if df_provided:
1176+
args["wide_cross"] = df_input.index
1177+
wide_cross_name = df_input.index.name or "index"
1178+
else:
1179+
args["wide_cross"] = Range(label="index")
1180+
wide_cross_name = "index"
1181+
1182+
# now that things have been prepped, we do the systematic rewriting of `args`
1183+
1184+
df_output, wide_id_vars = process_args_into_dataframe(args, wide_mode, var_name)
1185+
1186+
# now that `df_output` exists and `args` contains only references, we complete
1187+
# the special-case and wide-mode handling by further rewriting args and/or mutating
1188+
# df_output
1189+
11611190
if not wide_mode and missing_bar_dim and constructor == go.Bar:
11621191
# now that we've populated df_output, we check to see if the non-missing
11631192
# dimension is categorical: if so, then setting the missing dimension to a
@@ -1166,20 +1195,17 @@ def build_dataframe(args, constructor):
11661195
other_dim = "x" if missing_bar_dim == "y" else "y"
11671196
if not _is_continuous(df_output, args[other_dim]):
11681197
args[missing_bar_dim] = "_count_"
1169-
constants["_count_"] = 1
1198+
df_output["_count_"] = 1
11701199
else:
11711200
# on the other hand, if the non-missing dimension is continuous, then we
11721201
# can use this information to override the normal auto-orientation code
11731202
if args["orientation"] is None:
11741203
args["orientation"] = "v" if missing_bar_dim == "x" else "h"
11751204

1176-
for col_name in ranges:
1177-
df_output[col_name] = range(len(df_output))
1178-
1179-
for col_name in constants:
1180-
df_output[col_name] = constants[col_name]
1181-
11821205
if wide_mode:
1206+
# at this point, `df_output` is semi-long/semi-wide, but we know which columns
1207+
# are which, so we melt it and reassign `args` to refer to the newly-tidy
1208+
# columns, keeping track of various names and manglings set up above
11831209
wide_value_vars = [c for c in args["_column_"] if c not in wide_id_vars]
11841210
del args["_column_"]
11851211
del args["wide_cross"]
Binary file not shown.
Binary file not shown.

packages/python/plotly/plotly/tests/test_core/test_px/test_px_wide.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,15 @@ def test_wide_x_or_y():
168168
df_out.sort_index(axis=1), pd.DataFrame(expected).sort_index(axis=1),
169169
)
170170

171+
"""
172+
for each orientation
173+
for each trace type
174+
with and without df
175+
wide x or y
176+
numerical or categorical wide values
177+
with and without cross value
178+
"""
179+
171180

172181
@pytest.mark.parametrize(
173182
"orientation", [None, "v", "h"],

0 commit comments

Comments
 (0)