@@ -93,7 +93,7 @@ def election_geojson():
93
93
def carshare ():
94
94
"""
95
95
Each row represents the availability of car-sharing services near the centroid of a zone
96
- in Montreal.
96
+ in Montreal over a month-long period .
97
97
98
98
Returns:
99
99
A `pandas.DataFrame` with 249 rows and the following columns:
@@ -102,31 +102,78 @@ def carshare():
102
102
return _get_dataset ("carshare" )
103
103
104
104
105
- def timeseries ():
105
+ def timeseries (indexed = False ):
106
106
"""
107
107
Each row in this wide dataset represents values from 6 random walk time-series. The
108
108
index contains dates.
109
109
110
110
Returns:
111
111
A `pandas.DataFrame` with 100 rows and the following columns:
112
- `['MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`.
112
+ `['day', 'MOL.AA', 'JJK.OB', 'LFF.KP', 'UJS.PX', 'BTK.HH', 'SHX.QQ']`.
113
+ If `indexed` is True, the 'day' column is used as the index and the column index
114
+ is named 'ticker'
113
115
"""
114
- return _get_dataset ("timeseries" , index_col = 0 )
116
+ df = _get_dataset ("timeseries" )
117
+ if indexed :
118
+ df = df .set_index ("day" )
119
+ df .columns .name = "ticker"
120
+ return df
115
121
116
122
117
- def experiment ():
123
+ def experiment (indexed = False ):
118
124
"""
119
125
Each row in this wide dataset represents the results of 100 simulated participants
120
126
on three hypothetical experiments, along with their gender and control/treatment group.
121
127
128
+
122
129
Returns:
123
130
A `pandas.DataFrame` with 100 rows and the following columns:
124
131
`['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'group']`.
132
+ If `indexed` is True, the data frame index is named "participant"
133
+ """
134
+ df = _get_dataset ("experiment" )
135
+ if indexed :
136
+ df .index .name = "participant"
137
+ return df
138
+
139
+
140
+ def short_track_wide (indexed = False ):
141
+ """
142
+ This dataset represents the medal table for Olympic Short Track Speed Skating for the
143
+ top three nations as of 2020.
144
+
145
+ Returns:
146
+ A `pandas.DataFrame` with 3 rows and the following columns:
147
+ `['nation', 'gold', 'silver', 'bronze']`.
148
+ If `indexed` is True, the 'nation' column is used as the index and the column index
149
+ is named 'medal'
125
150
"""
126
- return _get_dataset ("experiment" )
151
+ df = _get_dataset ("short_track" )
152
+ if indexed :
153
+ df = df .set_index ("nation" )
154
+ df .index .name = "medal"
155
+ return df
156
+
157
+
158
+ def short_track_long (indexed = False ):
159
+ """
160
+ This dataset represents the medal table for Olympic Short Track Speed Skating for the
161
+ top three nations as of 2020.
162
+
163
+ Returns:
164
+ A `pandas.DataFrame` with 9 rows and the following columns:
165
+ `['nation', 'medal', 'count']`.
166
+ If `indexed` is True, the 'nation' column is used as the index.
167
+ """
168
+ df = _get_dataset ("short_track" ).melt (
169
+ id_vars = ["nation" ], value_name = "count" , var_name = "medal"
170
+ )
171
+ if indexed :
172
+ df = df .set_index ("nation" )
173
+ return df
127
174
128
175
129
- def _get_dataset (d , index_col = None ):
176
+ def _get_dataset (d ):
130
177
import pandas
131
178
import os
132
179
@@ -136,6 +183,5 @@ def _get_dataset(d, index_col=None):
136
183
"package_data" ,
137
184
"datasets" ,
138
185
d + ".csv.gz" ,
139
- ),
140
- index_col = index_col ,
186
+ )
141
187
)
0 commit comments