Skip to content

Commit 0f36642

Browse files
authored
Merge pull request #765 from openml/develop
Release 0.10
2 parents 8efcf9d + 0f99118 commit 0f36642

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1670
-296
lines changed

.travis.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@ env:
1515
- TEST_DIR=/tmp/test_dir/
1616
- MODULE=openml
1717
matrix:
18-
- DISTRIB="conda" PYTHON_VERSION="3.5" SKLEARN_VERSION="0.20.0"
19-
- DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.20.0"
20-
- DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.0" RUN_FLAKE8="true" SKIP_TESTS="true"
21-
- DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.0" COVERAGE="true" DOCPUSH="true"
18+
- DISTRIB="conda" PYTHON_VERSION="3.5" SKLEARN_VERSION="0.21.2"
19+
- DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2"
20+
- DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true"
21+
- DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true"
22+
- DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2"
2223
# Checks for older scikit-learn versions (which also don't nicely work with
2324
# Python3.7)
2425
- DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"

CONTRIBUTING.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ following rules before you submit a pull request:
8181
Drafts often benefit from the inclusion of a
8282
[task list](https://github.com/blog/1375-task-lists-in-gfm-issues-pulls-comments)
8383
in the PR description.
84+
85+
- Add [unit tests](https://github.com/openml/openml-python/tree/develop/tests) and [examples](https://github.com/openml/openml-python/tree/develop/examples) for any new functionality being introduced.
86+
- If an unit test contains an upload to the test server, please ensure that it is followed by a file collection for deletion, to prevent the test server from bulking up. For example, `TestBase._mark_entity_for_removal('data', dataset.dataset_id)`, `TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))`.
87+
- Please ensure that the example is run on the test server by beginning with the call to `openml.config.start_using_configuration_for_example()`.
8488

8589
- All tests pass when running `pytest`. On
8690
Unix-like systems, check with (from the toplevel source folder):

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
BSD 3-Clause License
22

3-
Copyright (c) 2014-2018, Matthias Feurer, Jan van Rijn, Andreas Müller,
3+
Copyright (c) 2014-2019, Matthias Feurer, Jan van Rijn, Andreas Müller,
44
Joaquin Vanschoren and others.
55
All rights reserved.
66

PULL_REQUEST_TEMPLATE.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ Please make sure that:
99
* for any new function or class added, please add it to doc/api.rst
1010
* the list of classes and functions should be alphabetical
1111
* for any new functionality, consider adding a relevant example
12+
* add unit tests for new functionalities
13+
* collect files uploaded to test server using _mark_entity_for_removal()
1214
-->
1315

1416
#### Reference Issue

ci_scripts/test.sh

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
set -e
22

3+
# check status and branch before running the unit tests
4+
before="`git status --porcelain -b`"
5+
before="$before"
6+
# storing current working directory
7+
curr_dir=`pwd`
8+
39
run_tests() {
410
# Get into a temp directory to run test from the installed scikit learn and
511
# check if we do not leave artifacts
@@ -22,7 +28,7 @@ run_tests() {
2228
PYTEST_ARGS=''
2329
fi
2430

25-
pytest -n 4 --duration=20 --timeout=600 --timeout-method=thread -sv --ignore='test_OpenMLDemo.py' $PYTEST_ARGS $test_dir
31+
pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread -sv --ignore='test_OpenMLDemo.py' $PYTEST_ARGS $test_dir
2632
}
2733

2834
if [[ "$RUN_FLAKE8" == "true" ]]; then
@@ -32,3 +38,15 @@ fi
3238
if [[ "$SKIP_TESTS" != "true" ]]; then
3339
run_tests
3440
fi
41+
42+
# changing directory to stored working directory
43+
cd $curr_dir
44+
# check status and branch after running the unit tests
45+
# compares with $before to check for remaining files
46+
after="`git status --porcelain -b`"
47+
if [[ "$before" != "$after" ]]; then
48+
echo 'git status from before: '$before
49+
echo 'git status from after: '$after
50+
echo "All generated files have not been deleted!"
51+
exit 1
52+
fi

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ Modules
7272
get_dataset
7373
get_datasets
7474
list_datasets
75+
list_qualities
7576
status_update
7677

7778
:mod:`openml.evaluations`: Evaluation Functions
@@ -83,6 +84,7 @@ Modules
8384
:template: function.rst
8485

8586
list_evaluations
87+
list_evaluation_measures
8688

8789
:mod:`openml.flows`: Flow Functions
8890
-----------------------------------

doc/conf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import os
1616
import sys
1717
import sphinx_bootstrap_theme
18+
import time
1819
import openml
1920

2021
# If extensions (or modules to document with autodoc) are in another directory,
@@ -65,7 +66,7 @@
6566
# General information about the project.
6667
project = u'OpenML'
6768
copyright = (
68-
u'2014-2019, the OpenML-Python team.'
69+
u'2014-{}, the OpenML-Python team.'.format(time.strftime("%Y,%m,%d,%H,%M,%S").split(',')[0])
6970
)
7071

7172
# The version info for the project you're documenting, acts as replacement for

doc/index.rst

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,12 @@ Example
2121
.. code:: python
2222
2323
import openml
24-
from sklearn import preprocessing, tree, pipeline
25-
26-
# Set the OpenML API Key which is required to upload your runs.
27-
# You can get your own API by signing up to OpenML.org.
28-
openml.config.apikey = 'ABC'
24+
from sklearn import impute, tree, pipeline
2925
3026
# Define a scikit-learn classifier or pipeline
3127
clf = pipeline.Pipeline(
3228
steps=[
33-
('imputer', preprocessing.Imputer()),
29+
('imputer', impute.SimpleImputer()),
3430
('estimator', tree.DecisionTreeClassifier())
3531
]
3632
)
@@ -39,10 +35,13 @@ Example
3935
task = openml.tasks.get_task(31)
4036
# Run the scikit-learn model on the task.
4137
run = openml.runs.run_model_on_task(clf, task)
42-
# Publish the experiment on OpenML (optional, requires an API key).
38+
# Publish the experiment on OpenML (optional, requires an API key.
39+
# You can get your own API key by signing up to OpenML.org)
4340
run.publish()
4441
print('View the run online: %s/run/%d' % (openml.config.server, run.run_id))
4542
43+
You can find more examples in our `examples gallery <examples/index.html>`_.
44+
4645
----------------------------
4746
How to get OpenML for python
4847
----------------------------

doc/progress.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,27 @@
66
Changelog
77
=========
88

9+
0.10.0
10+
~~~~~~
11+
* ADD #737: Add list_evaluations_setups to return hyperparameters along with list of evaluations.
12+
* FIX #261: Test server is cleared of all files uploaded during unit testing.
13+
* FIX #447: All files created by unit tests no longer persist in local.
14+
* FIX #608: Fixing dataset_id referenced before assignment error in get_run function.
15+
* FIX #447: All files created by unit tests are deleted after the completion of all unit tests.
16+
* FIX #589: Fixing a bug that did not successfully upload the columns to ignore when creating and publishing a dataset.
17+
* FIX #608: Fixing dataset_id referenced before assignment error in get_run function.
18+
* DOC #639: More descriptive documention for function to convert array format.
19+
* DOC #719: Add documentation on uploading tasks.
20+
* ADD #687: Adds a function to retrieve the list of evaluation measures available.
21+
* ADD #695: A function to retrieve all the data quality measures available.
22+
* ADD #412: Add a function to trim flow names for scikit-learn flows.
23+
* ADD #715: `list_evaluations` now has an option to sort evaluations by score (value).
24+
* ADD #722: Automatic reinstantiation of flow in `run_model_on_task`. Clearer errors if that's not possible.
25+
* ADD #412: The scikit-learn extension populates the short name field for flows.
26+
* MAINT #726: Update examples to remove deprecation warnings from scikit-learn
27+
* MAINT #752: Update OpenML-Python to be compatible with sklearn 0.21
28+
29+
930
0.9.0
1031
~~~~~
1132
* ADD #560: OpenML-Python can now handle regression tasks as well.
@@ -21,6 +42,7 @@ Changelog
2142
* ADD #659: Lazy loading of task splits.
2243
* ADD #516: `run_flow_on_task` flow uploading is now optional.
2344
* ADD #680: Adds `openml.config.start_using_configuration_for_example` (and resp. stop) to easily connect to the test server.
45+
* ADD #75, #653: Adds a pretty print for objects of the top-level classes.
2446
* FIX #642: `check_datasets_active` now correctly also returns active status of deactivated datasets.
2547
* FIX #304, #636: Allow serialization of numpy datatypes and list of lists of more types (e.g. bools, ints) for flows.
2648
* FIX #651: Fixed a bug that would prevent openml-python from finding the user's config file.

examples/fetch_evaluations_tutorial.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
############################################################################
2222
import openml
23-
from pprint import pprint
2423

2524
############################################################################
2625
# Listing evaluations
@@ -37,7 +36,7 @@
3736
output_format='dataframe')
3837

3938
# Querying the returned results for precision above 0.98
40-
pprint(evals[evals.value > 0.98])
39+
print(evals[evals.value > 0.98])
4140

4241
#############################################################################
4342
# Viewing a sample task
@@ -47,7 +46,7 @@
4746
# We will start by displaying a simple *supervised classification* task:
4847
task_id = 167140 # https://www.openml.org/t/167140
4948
task = openml.tasks.get_task(task_id)
50-
pprint(vars(task))
49+
print(task)
5150

5251
#############################################################################
5352
# Obtaining all the evaluations for the task
@@ -60,11 +59,11 @@
6059
evals = openml.evaluations.list_evaluations(function=metric, task=[task_id],
6160
output_format='dataframe')
6261
# Displaying the first 10 rows
63-
pprint(evals.head(n=10))
62+
print(evals.head(n=10))
6463
# Sorting the evaluations in decreasing order of the metric chosen
6564
evals = evals.sort_values(by='value', ascending=False)
6665
print("\nDisplaying head of sorted dataframe: ")
67-
pprint(evals.head())
66+
print(evals.head())
6867

6968
#############################################################################
7069
# Obtaining CDF of metric for chosen task
@@ -147,4 +146,4 @@ def plot_flow_compare(evaluations, top_n=10, metric='predictive_accuracy'):
147146
flow_ids = evals.flow_id.unique()[:top_n]
148147
flow_names = evals.flow_name.unique()[:top_n]
149148
for i in range(top_n):
150-
pprint((flow_ids[i], flow_names[i]))
149+
print((flow_ids[i], flow_names[i]))

0 commit comments

Comments
 (0)