Skip to content

Commit b788721

Browse files
wide datasets
1 parent 18276cf commit b788721

File tree

3 files changed

+55
-9
lines changed

3 files changed

+55
-9
lines changed

packages/python/plotly/plotly/data/__init__.py

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def election_geojson():
9393
def carshare():
9494
"""
9595
Each row represents the availability of car-sharing services near the centroid of a zone
96-
in Montreal.
96+
in Montreal over a month-long period.
9797
9898
Returns:
9999
A `pandas.DataFrame` with 249 rows and the following columns:
@@ -102,31 +102,78 @@ def carshare():
102102
return _get_dataset("carshare")
103103

104104

105-
def timeseries():
105+
def timeseries(indexed=False):
106106
"""
107107
Each row in this wide dataset represents values from 6 random walk time-series. The
108108
index contains dates.
109109
110110
Returns:
111111
A `pandas.DataFrame` with 100 rows and the following columns:
112-
`['MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`.
112+
`['day', 'MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`.
113+
If `indexed` is True, the 'day' column is used as the index and the column index
114+
is named 'ticker'
113115
"""
114-
return _get_dataset("timeseries", index_col=0)
116+
df = _get_dataset("timeseries")
117+
if indexed:
118+
df = df.set_index("day")
119+
df.columns.name = "ticker"
120+
return df
115121

116122

117-
def experiment():
123+
def experiment(indexed=False):
118124
"""
119125
Each row in this wide dataset represents the results of 100 simulated participants
120126
on three hypothetical experiments, along with their gender and control/treatment group.
121127
128+
122129
Returns:
123130
A `pandas.DataFrame` with 100 rows and the following columns:
124131
`['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'group']`.
132+
If `indexed` is True, the data frame index is named "participant"
133+
"""
134+
df = _get_dataset("experiment")
135+
if indexed:
136+
df.index.name = "participant"
137+
return df
138+
139+
140+
def short_track_wide(indexed=False):
141+
"""
142+
This dataset represents the medal table for Olympic Short Track Speed Skating for the
143+
top three nations as of 2020.
144+
145+
Returns:
146+
A `pandas.DataFrame` with 3 rows and the following columns:
147+
`['nation', 'gold', 'silver', 'bronze']`.
148+
If `indexed` is True, the 'nation' column is used as the index and the column index
149+
is named 'medal'
125150
"""
126-
return _get_dataset("experiment")
151+
df = _get_dataset("short_track")
152+
if indexed:
153+
df = df.set_index("nation")
154+
df.index.name = "medal"
155+
return df
156+
157+
158+
def short_track_long(indexed=False):
159+
"""
160+
This dataset represents the medal table for Olympic Short Track Speed Skating for the
161+
top three nations as of 2020.
162+
163+
Returns:
164+
A `pandas.DataFrame` with 9 rows and the following columns:
165+
`['nation', 'medal', 'count']`.
166+
If `indexed` is True, the 'nation' column is used as the index.
167+
"""
168+
df = _get_dataset("short_track").melt(
169+
id_vars=["nation"], value_name="count", var_name="medal"
170+
)
171+
if indexed:
172+
df = df.set_index("nation")
173+
return df
127174

128175

129-
def _get_dataset(d, index_col=None):
176+
def _get_dataset(d):
130177
import pandas
131178
import os
132179

@@ -136,6 +183,5 @@ def _get_dataset(d, index_col=None):
136183
"package_data",
137184
"datasets",
138185
d + ".csv.gz",
139-
),
140-
index_col=index_col,
186+
)
141187
)
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)