Skip to content

Commit 6ec4ad1

Browse files
authored
Merge pull request #701 from openml/fix_653
Adding object summary (str representation)
2 parents cfac473 + 0ab7cd3 commit 6ec4ad1

File tree

7 files changed

+253
-6
lines changed

7 files changed

+253
-6
lines changed

openml/datasets/dataset.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,36 @@ def __init__(self, name, description, format=None,
173173
else:
174174
self.data_pickle_file = None
175175

176+
def __str__(self):
177+
header = "OpenML Dataset"
178+
header = '{}\n{}\n'.format(header, '=' * len(header))
179+
180+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
181+
fields = {"Name": self.name,
182+
"Version": self.version,
183+
"Format": self.format,
184+
"Licence": self.licence,
185+
"Download URL": self.url,
186+
"Data file": self.data_file,
187+
"Pickle file": self.data_pickle_file,
188+
"# of features": len(self.features)}
189+
if self.upload_date is not None:
190+
fields["Upload Date"] = self.upload_date.replace('T', ' ')
191+
if self.dataset_id is not None:
192+
fields["OpenML URL"] = "{}d/{}".format(base_url, self.dataset_id)
193+
if self.qualities['NumberOfInstances'] is not None:
194+
fields["# of instances"] = int(self.qualities['NumberOfInstances'])
195+
196+
# determines the order in which the information will be printed
197+
order = ["Name", "Version", "Format", "Upload Date", "Licence", "Download URL",
198+
"OpenML URL", "Data File", "Pickle File", "# of features", "# of instances"]
199+
fields = [(key, fields[key]) for key in order if key in fields]
200+
201+
longest_field_name_length = max(len(name) for name, value in fields)
202+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
203+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
204+
return header + body
205+
176206
def _data_arff_to_pickle(self, data_file):
177207
data_pickle_file = data_file.replace('.arff', '.pkl.py3')
178208
if os.path.exists(data_pickle_file):

openml/evaluations/evaluation.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import openml.config
2+
13

24
class OpenMLEvaluation(object):
35
"""
@@ -47,3 +49,32 @@ def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
4749
self.value = value
4850
self.values = values
4951
self.array_data = array_data
52+
53+
def __str__(self):
54+
header = "OpenML Evaluation"
55+
header = '{}\n{}\n'.format(header, '=' * len(header))
56+
57+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
58+
fields = {"Upload Date": self.upload_time,
59+
"Run ID": self.run_id,
60+
"OpenML Run URL": "{}r/{}".format(base_url, self.run_id),
61+
"Task ID": self.task_id,
62+
"OpenML Task URL": "{}t/{}".format(base_url, self.task_id),
63+
"Flow ID": self.flow_id,
64+
"OpenML Flow URL": "{}f/{}".format(base_url, self.flow_id),
65+
"Setup ID": self.setup_id,
66+
"Data ID": self.data_id,
67+
"Data Name": self.data_name,
68+
"OpenML Data URL": "{}d/{}".format(base_url, self.data_id),
69+
"Metric Used": self.function,
70+
"Result": self.value}
71+
72+
order = ["Uploader Date", "Run ID", "OpenML Run URL", "Task ID", "OpenML Task URL"
73+
"Flow ID", "OpenML Flow URL", "Setup ID", "Data ID", "Data Name",
74+
"OpenML Data URL", "Metric Used", "Result"]
75+
fields = [(key, fields[key]) for key in order if key in fields]
76+
77+
longest_field_name_length = max(len(name) for name, value in fields)
78+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
79+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
80+
return header + body

openml/flows/flow.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from ..extensions import get_extension_by_flow
88
from ..utils import extract_xml_tags, _tag_entity
99

10+
import openml.config
11+
1012

1113
class OpenMLFlow(object):
1214
"""OpenML Flow. Stores machine learning models.
@@ -132,6 +134,35 @@ def __init__(self, name, description, model, components, parameters,
132134

133135
self.extension = get_extension_by_flow(self)
134136

137+
def __str__(self):
138+
header = "OpenML Flow"
139+
header = '{}\n{}\n'.format(header, '=' * len(header))
140+
141+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
142+
fields = {"Flow Name": self.name,
143+
"Flow Description": self.description,
144+
"Dependencies": self.dependencies}
145+
if self.flow_id is not None:
146+
if self.version is not None:
147+
fields["Flow ID"] = "{} (version {})".format(self.flow_id, self.version)
148+
else:
149+
fields["Flow ID"] = self.flow_id
150+
fields["Flow URL"] = "{}f/{}".format(base_url, self.flow_id)
151+
if self.upload_date is not None:
152+
fields["Upload Date"] = self.upload_date.replace('T', ' ')
153+
if self.binary_url is not None:
154+
fields["Binary URL"] = self.binary_url
155+
156+
# determines the order in which the information will be printed
157+
order = ["Flow ID", "Flow URL", "Flow Name", "Flow Description", "Binary URL",
158+
"Upload Date", "Dependencies"]
159+
fields = [(key, fields[key]) for key in order if key in fields]
160+
161+
longest_field_name_length = max(len(name) for name, value in fields)
162+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
163+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
164+
return header + body
165+
135166
def _to_xml(self) -> str:
136167
"""Generate xml representation of self for upload to server.
137168

openml/runs/run.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,40 @@ def __init__(self, task_id, flow_id, dataset_id, setup_string=None,
6868
self.predictions_url = predictions_url
6969

7070
def __str__(self):
71-
flow_name = self.flow_name
72-
if flow_name is not None and len(flow_name) > 26:
73-
# long enough to show sklearn.pipeline.Pipeline
74-
flow_name = flow_name[:26] + "..."
75-
return "[run id: {}, task id: {}, flow id: {}, flow name: {}]".format(
76-
self.run_id, self.task_id, self.flow_id, flow_name)
71+
header = "OpenML Run"
72+
header = '{}\n{}\n'.format(header, '=' * len(header))
73+
74+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
75+
fields = {"Uploader Name": self.uploader_name,
76+
"Metric": self.task_evaluation_measure,
77+
"Run ID": self.run_id,
78+
"Task ID": self.task_id,
79+
"Task Type": self.task_type,
80+
"Task URL": "{}t/{}".format(base_url, self.task_id),
81+
"Flow ID": self.flow_id,
82+
"Flow Name": self.flow_name,
83+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
84+
"Setup ID": self.setup_id,
85+
"Setup String": self.setup_string,
86+
"Dataset ID": self.dataset_id,
87+
"Dataset URL": "{}d/{}".format(base_url, self.dataset_id)}
88+
if self.uploader is not None:
89+
fields["Uploader Profile"] = "{}u/{}".format(base_url, self.uploader)
90+
if self.run_id is not None:
91+
fields["Run URL"] = "{}r/{}".format(base_url, self.run_id)
92+
if self.task_evaluation_measure in self.evaluations:
93+
fields["Result"] = self.evaluations[self.task_evaluation_measure]
94+
95+
# determines the order in which the information will be printed
96+
order = ["Uploader Name", "Uploader Profile", "Metric", "Result", "Run ID", "Run URL",
97+
"Task ID", "Task Type", "Task URL", "Flow ID", "Flow Name", "Flow URL",
98+
"Setup ID", "Setup String", "Dataset ID", "Dataset URL"]
99+
fields = [(key, fields[key]) for key in order if key in fields]
100+
101+
longest_field_name_length = max(len(name) for name, value in fields)
102+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
103+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
104+
return header + body
77105

78106
def _repr_pretty_(self, pp, cycle):
79107
pp.text(str(self))

openml/setups/setup.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import openml.config
2+
13

24
class OpenMLSetup(object):
35
"""Setup object (a.k.a. Configuration).
@@ -25,6 +27,25 @@ def __init__(self, setup_id, flow_id, parameters):
2527
self.flow_id = flow_id
2628
self.parameters = parameters
2729

30+
def __str__(self):
31+
header = "OpenML Setup"
32+
header = '{}\n{}\n'.format(header, '=' * len(header))
33+
34+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
35+
fields = {"Setup ID": self.setup_id,
36+
"Flow ID": self.flow_id,
37+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
38+
"# of Parameters": len(self.parameters)}
39+
40+
# determines the order in which the information will be printed
41+
order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
42+
fields = [(key, fields[key]) for key in order if key in fields]
43+
44+
longest_field_name_length = max(len(name) for name, value in fields)
45+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
46+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
47+
return header + body
48+
2849

2950
class OpenMLParameter(object):
3051
"""Parameter object (used in setup).
@@ -60,3 +81,34 @@ def __init__(self, input_id, flow_id, flow_name, full_name, parameter_name,
6081
self.data_type = data_type
6182
self.default_value = default_value
6283
self.value = value
84+
85+
def __str__(self):
86+
header = "OpenML Parameter"
87+
header = '{}\n{}\n'.format(header, '=' * len(header))
88+
89+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
90+
fields = {"ID": self.id,
91+
"Flow ID": self.flow_id,
92+
# "Flow Name": self.flow_name,
93+
"Flow Name": self.full_name,
94+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
95+
"Parameter Name": self.parameter_name}
96+
# indented prints for parameter attributes
97+
# indention = 2 spaces + 1 | + 2 underscores
98+
indent = "{}|{}".format(" " * 2, "_" * 2)
99+
parameter_data_type = "{}Data Type".format(indent)
100+
fields[parameter_data_type] = self.data_type
101+
parameter_default = "{}Default".format(indent)
102+
fields[parameter_default] = self.default_value
103+
parameter_value = "{}Value".format(indent)
104+
fields[parameter_value] = self.value
105+
106+
# determines the order in which the information will be printed
107+
order = ["ID", "Flow ID", "Flow Name", "Flow URL", "Parameter Name",
108+
parameter_data_type, parameter_default, parameter_value]
109+
fields = [(key, fields[key]) for key in order if key in fields]
110+
111+
longest_field_name_length = max(len(name) for name, value in fields)
112+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
113+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
114+
return header + body

openml/study/study.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,39 @@ def __init__(
8989
self.runs = runs
9090
pass
9191

92+
def __str__(self):
93+
# header is provided by the sub classes
94+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
95+
fields = {"Name": self.name,
96+
"Status": self.status,
97+
"Main Entity Type": self.main_entity_type}
98+
if self.id is not None:
99+
fields["ID"] = self.id
100+
fields["Study URL"] = "{}s/{}".format(base_url, self.id)
101+
if self.creator is not None:
102+
fields["Creator"] = "{}u/{}".format(base_url, self.creator)
103+
if self.creation_date is not None:
104+
fields["Upload Time"] = self.creation_date.replace('T', ' ')
105+
if self.data is not None:
106+
fields["# of Data"] = len(self.data)
107+
if self.tasks is not None:
108+
fields["# of Tasks"] = len(self.tasks)
109+
if self.flows is not None:
110+
fields["# of Flows"] = len(self.flows)
111+
if self.runs is not None:
112+
fields["# of Runs"] = len(self.runs)
113+
114+
# determines the order in which the information will be printed
115+
order = ["ID", "Name", "Status", "Main Entity Type", "Study URL",
116+
"# of Data", "# of Tasks", "# of Flows", "# of Runs",
117+
"Creator", "Upload Time"]
118+
fields = [(key, fields[key]) for key in order if key in fields]
119+
120+
longest_field_name_length = max(len(name) for name, value in fields)
121+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
122+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
123+
return body
124+
92125
def publish(self) -> int:
93126
"""
94127
Publish the study on the OpenML server.
@@ -235,6 +268,12 @@ def __init__(
235268
setups=setups,
236269
)
237270

271+
def __str__(self):
272+
header = "OpenML Study"
273+
header = '{}\n{}\n'.format(header, '=' * len(header))
274+
body = super(OpenMLStudy, self).__str__()
275+
return header + body
276+
238277

239278
class OpenMLBenchmarkSuite(BaseStudy):
240279
"""
@@ -306,3 +345,9 @@ def __init__(
306345
runs=None,
307346
setups=None,
308347
)
348+
349+
def __str__(self):
350+
header = "OpenML Benchmark Suite"
351+
header = '{}\n{}\n'.format(header, '=' * len(header))
352+
body = super(OpenMLBenchmarkSuite, self).__str__()
353+
return header + body

openml/tasks/task.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,36 @@ def __init__(
5555
self.estimation_procedure_id = estimation_procedure_id
5656
self.split = None # type: Optional[OpenMLSplit]
5757

58+
def __str__(self):
59+
header = "OpenML Task"
60+
header = '{}\n{}\n'.format(header, '=' * len(header))
61+
62+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
63+
fields = {"Task Type": self.task_type}
64+
if self.task_id is not None:
65+
fields["Task ID"] = self.task_id
66+
fields["Task URL"] = "{}t/{}".format(base_url, self.task_id)
67+
if self.evaluation_measure is not None:
68+
fields["Evaluation Measure"] = self.evaluation_measure
69+
if self.estimation_procedure is not None:
70+
fields["Estimation Procedure"] = self.estimation_procedure['type']
71+
if self.target_name is not None:
72+
fields["Target Feature"] = self.target_name
73+
if hasattr(self, 'class_labels'):
74+
fields["# of Classes"] = len(self.class_labels)
75+
if hasattr(self, 'cost_matrix'):
76+
fields["Cost Matrix"] = "Available"
77+
78+
# determines the order in which the information will be printed
79+
order = ["Task Type", "Task ID", "Task URL", "Estimation Procedure", "Evaluation Measure",
80+
"Target Feature", "# of Classes", "Cost Matrix"]
81+
fields = [(key, fields[key]) for key in order if key in fields]
82+
83+
longest_field_name_length = max(len(name) for name, value in fields)
84+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
85+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
86+
return header + body
87+
5888
def get_dataset(self) -> datasets.OpenMLDataset:
5989
"""Download dataset associated with task"""
6090
return datasets.get_dataset(self.dataset_id)

0 commit comments

Comments
 (0)