Skip to content

Commit c4920ea

Browse files
committed
Replacing pd.Series with dict for simplicity
1 parent 8eae8b7 commit c4920ea

File tree

7 files changed

+97
-91
lines changed

7 files changed

+97
-91
lines changed

openml/datasets/dataset.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -178,23 +178,23 @@ def __str__(self):
178178
header = '{}\n{}\n'.format(header, '=' * len(header))
179179

180180
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
181-
fields = pd.Series({"Name": self.name,
182-
"Version": self.version,
183-
"Format": self.format,
184-
"Upload Date": self.upload_date.replace('T', ' '),
185-
"Licence": self.licence,
186-
"Download URL": self.url,
187-
"OpenML URL": "{}d/{}".format(base_url, self.dataset_id),
188-
"Data file": self.data_file,
189-
"Pickle file": self.data_pickle_file,
190-
"# of features": len(self.features)})
191-
181+
fields = {"Name": self.name,
182+
"Version": self.version,
183+
"Format": self.format,
184+
"Upload Date": self.upload_date.replace('T', ' '),
185+
"Licence": self.licence,
186+
"Download URL": self.url,
187+
"OpenML URL": "{}d/{}".format(base_url, self.dataset_id),
188+
"Data file": self.data_file,
189+
"Pickle file": self.data_pickle_file,
190+
"# of features": len(self.features)}
192191
if self.qualities['NumberOfInstances'] is not None:
193-
fields.append(pd.Series({"# of instances": int(self.qualities['NumberOfInstances'])}))
192+
fields["# of instances"] = int(self.qualities['NumberOfInstances'])
194193

194+
# determines the order in which the information will be printed
195195
order = ["Name", "Version", "Format", "Upload Date", "Licence", "Download URL",
196-
"OpenML URL", "Data File", "Pickle File", "# of features"]
197-
fields = list(fields.reindex(order).dropna().iteritems())
196+
"OpenML URL", "Data File", "Pickle File", "# of features", "# of instances"]
197+
fields = [(key, fields[key]) for key in order if key in fields]
198198

199199
longest_field_name_length = max(len(name) for name, value in fields)
200200
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)

openml/evaluations/evaluation.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,24 +56,24 @@ def __str__(self):
5656
header = '{}\n{}\n'.format(header, '=' * len(header))
5757

5858
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
59-
fields = pd.Series({"Upload Date": self.upload_time,
60-
"Run ID": self.run_id,
61-
"OpenML Run URL": "{}r/{}".format(base_url, self.run_id),
62-
"Task ID": self.task_id,
63-
"OpenML Task URL": "{}t/{}".format(base_url, self.task_id),
64-
"Flow ID": self.flow_id,
65-
"OpenML Flow URL": "{}f/{}".format(base_url, self.flow_id),
66-
"Setup ID": self.setup_id,
67-
"Data ID": self.data_id,
68-
"Data Name": self.data_name,
69-
"OpenML Data URL": "{}d/{}".format(base_url, self.data_id),
70-
"Metric Used": self.function,
71-
"Result": self.value})
59+
fields = {"Upload Date": self.upload_time,
60+
"Run ID": self.run_id,
61+
"OpenML Run URL": "{}r/{}".format(base_url, self.run_id),
62+
"Task ID": self.task_id,
63+
"OpenML Task URL": "{}t/{}".format(base_url, self.task_id),
64+
"Flow ID": self.flow_id,
65+
"OpenML Flow URL": "{}f/{}".format(base_url, self.flow_id),
66+
"Setup ID": self.setup_id,
67+
"Data ID": self.data_id,
68+
"Data Name": self.data_name,
69+
"OpenML Data URL": "{}d/{}".format(base_url, self.data_id),
70+
"Metric Used": self.function,
71+
"Result": self.value}
7272

7373
order = ["Uploader Date", "Run ID", "OpenML Run URL", "Task ID", "OpenML Task URL"
7474
"Flow ID", "OpenML Flow URL", "Setup ID", "Data ID", "Data Name",
7575
"OpenML Data URL", "Metric Used", "Result"]
76-
fields = list(fields.reindex(order).dropna().iteritems())
76+
fields = [(key, fields[key]) for key in order if key in fields]
7777

7878
longest_field_name_length = max(len(name) for name, value in fields)
7979
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)

openml/flows/flow.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -140,18 +140,19 @@ def __str__(self):
140140
header = '{}\n{}\n'.format(header, '=' * len(header))
141141

142142
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
143-
fields = pd.Series({"Flow ID": "{} (version {})".format(self.flow_id, self.version),
144-
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
145-
"Flow Name": self.name,
146-
"Flow Description": self.description,
147-
"Upload Date": self.upload_date.replace('T', ' '),
148-
"Dependencies": self.dependencies})
143+
fields = {"Flow ID": "{} (version {})".format(self.flow_id, self.version),
144+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
145+
"Flow Name": self.name,
146+
"Flow Description": self.description,
147+
"Upload Date": self.upload_date.replace('T', ' '),
148+
"Dependencies": self.dependencies}
149149
if self.binary_url is not None:
150-
fields = fields.append(pd.Series({"Binary URL": self.binary_url}))
150+
fields["Binary URL"] = self.binary_url
151151

152+
# determines the order in which the information will be printed
152153
order = ["Flow ID", "Flow URL", "Flow Name", "Flow Description", "Binary URL",
153154
"Upload Date", "Dependencies"]
154-
fields = list(fields.reindex(order).dropna().iteritems())
155+
fields = [(key, fields[key]) for key in order if key in fields]
155156

156157
longest_field_name_length = max(len(name) for name, value in fields)
157158
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)

openml/runs/run.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,29 +69,29 @@ def __str__(self):
6969
header = '{}\n{}\n'.format(header, '=' * len(header))
7070

7171
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
72-
fields = pd.Series({"Uploader Name": self.uploader_name,
73-
"Uploader Profile": "{}u/{}".format(base_url, self.uploader),
74-
"Metric": self.task_evaluation_measure,
75-
"Run ID": self.run_id,
76-
"Run URL": "{}r/{}".format(base_url, self.run_id),
77-
"Task ID": self.task_id,
78-
"Task Type": self.task_type,
79-
"Task URL": "{}t/{}".format(base_url, self.run_id),
80-
"Flow ID": self.flow_id,
81-
"Flow Name": self.flow_name,
82-
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
83-
"Setup ID": self.setup_id,
84-
"Setup String": self.setup_string,
85-
"Dataset ID": self.dataset_id,
86-
"Dataset URL": "{}d/{}".format(base_url, self.dataset_id)})
72+
fields = {"Uploader Name": self.uploader_name,
73+
"Uploader Profile": "{}u/{}".format(base_url, self.uploader),
74+
"Metric": self.task_evaluation_measure,
75+
"Run ID": self.run_id,
76+
"Run URL": "{}r/{}".format(base_url, self.run_id),
77+
"Task ID": self.task_id,
78+
"Task Type": self.task_type,
79+
"Task URL": "{}t/{}".format(base_url, self.run_id),
80+
"Flow ID": self.flow_id,
81+
"Flow Name": self.flow_name,
82+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
83+
"Setup ID": self.setup_id,
84+
"Setup String": self.setup_string,
85+
"Dataset ID": self.dataset_id,
86+
"Dataset URL": "{}d/{}".format(base_url, self.dataset_id)}
8787
if self.task_evaluation_measure in self.evaluations:
88-
value = self.evaluations[self.task_evaluation_measure]
89-
fields = fields.append(pd.Series({"Result": value}))
88+
fields["Result"] = self.evaluations[self.task_evaluation_measure]
9089

90+
# determines the order in which the information will be printed
9191
order = ["Uploader Name", "Uploader Profile", "Metric", "Result", "Run ID", "Run URL",
9292
"Task ID", "Task Type", "Task URL", "Flow ID", "Flow Name", "Flow URL",
9393
"Setup ID", "Setup String", "Dataset ID", "Dataset URL"]
94-
fields = list(fields.reindex(order).dropna().iteritems())
94+
fields = [(key, fields[key]) for key in order if key in fields]
9595

9696
longest_field_name_length = max(len(name) for name, value in fields)
9797
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)

openml/setups/setup.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,14 @@ def __str__(self):
3333
header = '{}\n{}\n'.format(header, '=' * len(header))
3434

3535
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
36-
fields = pd.Series({"Setup ID": self.setup_id,
37-
"Flow ID": self.flow_id,
38-
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
39-
"# of Parameters": len(self.parameters)})
36+
fields = {"Setup ID": self.setup_id,
37+
"Flow ID": self.flow_id,
38+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
39+
"# of Parameters": len(self.parameters)}
40+
41+
# determines the order in which the information will be printed
4042
order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
41-
fields = list(fields.reindex(order).dropna().iteritems())
43+
fields = [(key, fields[key]) for key in order if key in fields]
4244

4345
longest_field_name_length = max(len(name) for name, value in fields)
4446
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
@@ -86,26 +88,27 @@ def __str__(self):
8688
header = '{}\n{}\n'.format(header, '=' * len(header))
8789

8890
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
89-
fields = pd.Series({"ID": self.id,
90-
"Flow ID": self.flow_id,
91-
# "Flow Name": self.flow_name,
92-
"Flow Name": self.full_name,
93-
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
94-
"Parameter Name": self.parameter_name})
91+
fields = {"ID": self.id,
92+
"Flow ID": self.flow_id,
93+
# "Flow Name": self.flow_name,
94+
"Flow Name": self.full_name,
95+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
96+
"Parameter Name": self.parameter_name}
9597
# indented prints for parameter attributes
9698
# indention = 2 spaces + 1 | + 2 underscores
9799
indent = "{}|{}".format(" " * 2, "_" * 2)
98100
parameter_data_type = "{}Data Type".format(indent)
101+
fields[parameter_data_type] = self.data_type
99102
parameter_default = "{}Default".format(indent)
103+
fields[parameter_default] = self.default_value
100104
parameter_value = "{}Value".format(indent)
101-
fields = fields.append(pd.Series({parameter_data_type: self.data_type,
102-
parameter_default: self.default_value,
103-
parameter_value: self.value}))
105+
fields[parameter_value] = self.value
104106

107+
# determines the order in which the information will be printed
105108
order = ["ID", "Flow ID", "Flow Name", "Flow URL", "Parameter Name",
106109
parameter_data_type, parameter_default, parameter_value]
107-
fields = list(fields.reindex(order).dropna().iteritems())
108-
110+
fields = [(key, fields[key]) for key in order if key in fields]
111+
109112
longest_field_name_length = max(len(name) for name, value in fields)
110113
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
111114
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)

openml/study/study.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -93,26 +93,27 @@ def __init__(
9393
def __str__(self):
9494
# header is provided by the sub classes
9595
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
96-
fields = pd.Series({"ID": self.id,
97-
"Name": self.name,
98-
"Status": self.status,
99-
"Main Entity Type": self.main_entity_type,
100-
"Study URL": "{}s/{}".format(base_url, self.id),
101-
"Creator": "{}u/{}".format(base_url, self.creator),
102-
"Upload Time": self.creation_date.replace('T', ' ')})
96+
fields = {"ID": self.id,
97+
"Name": self.name,
98+
"Status": self.status,
99+
"Main Entity Type": self.main_entity_type,
100+
"Study URL": "{}s/{}".format(base_url, self.id),
101+
"Creator": "{}u/{}".format(base_url, self.creator),
102+
"Upload Time": self.creation_date.replace('T', ' ')}
103103
if self.data is not None:
104-
fields = fields.append(pd.Series({"# of Data": len(self.data)}))
104+
fields["# of Data"] = len(self.data)
105105
if self.tasks is not None:
106-
fields = fields.append(pd.Series({"# of Tasks": len(self.tasks)}))
106+
fields["# of Tasks"] = len(self.tasks)
107107
if self.flows is not None:
108-
fields = fields.append(pd.Series({"# of Flows": len(self.flows)}))
108+
fields["# of Flows"] = len(self.flows)
109109
if self.runs is not None:
110-
fields = fields.append(pd.Series({"# of Runs": len(self.runs)}))
110+
fields["# of Runs"] = len(self.runs)
111111

112+
# determines the order in which the information will be printed
112113
order = ["ID", "Name", "Status", "Main Entity Type", "Study URL",
113114
"# of Data", "# of Tasks", "# of Flows", "# of Runs",
114115
"Creator", "Upload Time"]
115-
fields = list(fields.reindex(order).dropna().iteritems())
116+
fields = [(key, fields[key]) for key in order if key in fields]
116117

117118
longest_field_name_length = max(len(name) for name, value in fields)
118119
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)

openml/tasks/task.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,23 +47,24 @@ def __str__(self):
4747
header = '{}\n{}\n'.format(header, '=' * len(header))
4848

4949
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
50-
fields = pd.Series({"Task Type": self.task_type,
51-
"Task ID": self.task_id,
52-
"Task URL": "{}t/{}".format(base_url, self.task_id)})
50+
fields = {"Task Type": self.task_type,
51+
"Task ID": self.task_id,
52+
"Task URL": "{}t/{}".format(base_url, self.task_id)}
5353
if self.evaluation_measure is not None:
54-
fields = fields.append(pd.Series({"Evaluation Measure": self.evaluation_measure}))
54+
fields["Evaluation Measure"] = self.evaluation_measure
5555
if self.estimation_procedure is not None:
56-
fields = fields.append(pd.Series({"Estimation Procedure": self.estimation_procedure['type']}))
56+
fields["Estimation Procedure"] = self.estimation_procedure['type']
5757
if self.target_name is not None:
58-
fields = fields.append(pd.Series({"Target Feature": self.target_name}))
58+
fields["Target Feature"] = self.target_name
5959
if hasattr(self, 'class_labels'):
60-
fields = fields.append(pd.Series({"# of Classes": len(self.class_labels)}))
60+
fields["# of Classes"] = len(self.class_labels)
6161
if hasattr(self, 'cost_matrix'):
62-
fields = fields.append(pd.Series({"Cost Matrix": "Available"}))
62+
fields["Cost Matrix"] = "Available"
6363

64+
# determines the order in which the information will be printed
6465
order = ["Task Type", "Task ID", "Task URL", "Estimation Procedure", "Evaluation Measure",
6566
"Target Feature", "# of Classes", "Cost Matrix"]
66-
fields = list(fields.reindex(order).dropna().iteritems())
67+
fields = [(key, fields[key]) for key in order if key in fields]
6768

6869
longest_field_name_length = max(len(name) for name, value in fields)
6970
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)

0 commit comments

Comments
 (0)