Skip to content

Commit cefd097

Browse files
author
Matthias Feurer
authored
Fix issues (#618)
* TST add connection retries test-wise * Improve file style * MAINT update changelog * MAINT simplify unit test, change code as requested by Jan * TST fix python2/3 bug * please flake
1 parent b71325c commit cefd097

File tree

5 files changed

+85
-30
lines changed

5 files changed

+85
-30
lines changed

doc/progress.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ Changelog
2020
OpenML.
2121
* ADD #564: New helpers to access the structure of a flow (and find its
2222
subflows).
23+
* ADD #618: The software will from now on retry to connect to the server if a
24+
connection failed. The number of retries can be configured.
2325
* FIX #538: Support loading clustering tasks.
2426
* FIX #464: Fixes a bug related to listing functions (returns correct listing
2527
size).

openml/_api_calls.py

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,17 @@
1-
import io
2-
import os
1+
import time
32
import requests
43
import warnings
54

6-
import arff
75
import xmltodict
86

97
from . import config
108
from .exceptions import (OpenMLServerError, OpenMLServerException,
119
OpenMLServerNoResult)
1210

1311

14-
def _perform_api_call(call, data=None, file_elements=None,
15-
add_authentication=True):
12+
def _perform_api_call(call, data=None, file_elements=None):
1613
"""
1714
Perform an API call at the OpenML server.
18-
return self._read_url(url, data=data, filePath=filePath,
19-
def _read_url(self, url, add_authentication=False, data=None, filePath=None):
2015
2116
Parameters
2217
----------
@@ -27,8 +22,6 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
2722
file_elements : dict
2823
Mapping of {filename: str} of strings which should be uploaded as
2924
files to the server.
30-
add_authentication : bool
31-
Whether to add authentication (api key) to the request.
3225
3326
Returns
3427
-------
@@ -50,12 +43,12 @@ def _read_url(self, url, add_authentication=False, data=None, filePath=None):
5043

5144

5245
def _file_id_to_url(file_id, filename=None):
53-
'''
46+
"""
5447
Presents the URL how to download a given file id
5548
filename is optional
56-
'''
49+
"""
5750
openml_url = config.server.split('/api/')
58-
url = openml_url[0] + '/data/download/%s' %file_id
51+
url = openml_url[0] + '/data/download/%s' % file_id
5952
if filename is not None:
6053
url += '/' + filename
6154
return url
@@ -71,7 +64,12 @@ def _read_url_files(url, data=None, file_elements=None):
7164
file_elements = {}
7265
# Using requests.post sets header 'Accept-encoding' automatically to
7366
# 'gzip,deflate'
74-
response = requests.post(url, data=data, files=file_elements)
67+
response = send_request(
68+
request_method='post',
69+
url=url,
70+
data=data,
71+
files=file_elements,
72+
)
7573
if response.status_code != 200:
7674
raise _parse_server_exception(response, url=url)
7775
if 'Content-Encoding' not in response.headers or \
@@ -87,12 +85,16 @@ def _read_url(url, data=None):
8785
data['api_key'] = config.apikey
8886

8987
if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
90-
# do a GET
91-
response = requests.get(url, params=data)
92-
else: # an actual post request
88+
response = send_request(
89+
request_method='get', url=url, data=data,
90+
)
91+
92+
else:
9393
# Using requests.post sets header 'Accept-encoding' automatically to
9494
# 'gzip,deflate'
95-
response = requests.post(url, data=data)
95+
response = send_request(
96+
request_method='post', url=url, data=data,
97+
)
9698

9799
if response.status_code != 200:
98100
raise _parse_server_exception(response, url=url)
@@ -102,12 +104,44 @@ def _read_url(url, data=None):
102104
return response.text
103105

104106

107+
def send_request(
108+
request_method,
109+
url,
110+
data,
111+
files=None,
112+
):
113+
n_retries = config.connection_n_retries
114+
response = None
115+
with requests.Session() as session:
116+
# Start at one to have a non-zero multiplier for the sleep
117+
for i in range(1, n_retries + 1):
118+
try:
119+
if request_method == 'get':
120+
response = session.get(url, params=data)
121+
elif request_method == 'post':
122+
response = session.post(url, data=data, files=files)
123+
else:
124+
raise NotImplementedError()
125+
break
126+
except (
127+
requests.exceptions.ConnectionError,
128+
requests.exceptions.SSLError,
129+
) as e:
130+
if i == n_retries:
131+
raise e
132+
else:
133+
time.sleep(0.1 * i)
134+
if response is None:
135+
raise ValueError('This should never happen!')
136+
return response
137+
138+
105139
def _parse_server_exception(response, url=None):
106140
# OpenML has a sopisticated error system
107141
# where information about failures is provided. try to parse this
108142
try:
109143
server_exception = xmltodict.parse(response.text)
110-
except:
144+
except Exception:
111145
raise OpenMLServerError(('Unexpected server error. Please '
112146
'contact the developers!\nStatus code: '
113147
'%d\n' % response.status_code) + response.text)
@@ -117,7 +151,7 @@ def _parse_server_exception(response, url=None):
117151
additional = None
118152
if 'oml:additional_information' in server_exception['oml:error']:
119153
additional = server_exception['oml:error']['oml:additional_information']
120-
if code in [372, 512, 500, 482, 542, 674]: # datasets,
154+
if code in [372, 512, 500, 482, 542, 674]:
121155
# 512 for runs, 372 for datasets, 500 for flows
122156
# 482 for tasks, 542 for evaluations, 674 for setups
123157
return OpenMLServerNoResult(code, message, additional)

openml/config.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
'verbosity': 0,
2222
'cachedir': os.path.expanduser(os.path.join('~', '.openml', 'cache')),
2323
'avoid_duplicate_runs': 'True',
24+
'connection_n_retries': 2,
2425
}
2526

2627
config_file = os.path.expanduser(os.path.join('~', '.openml' 'config'))
@@ -32,6 +33,9 @@
3233
# The current cache directory (without the server name)
3334
cache_directory = ""
3435

36+
# Number of retries if the connection breaks
37+
connection_n_retries = 2
38+
3539

3640
def _setup():
3741
"""Setup openml package. Called on first import.
@@ -46,6 +50,7 @@ def _setup():
4650
global server
4751
global cache_directory
4852
global avoid_duplicate_runs
53+
global connection_n_retries
4954
# read config file, create cache directory
5055
try:
5156
os.mkdir(os.path.expanduser(os.path.join('~', '.openml')))
@@ -57,6 +62,12 @@ def _setup():
5762
server = config.get('FAKE_SECTION', 'server')
5863
cache_directory = os.path.expanduser(config.get('FAKE_SECTION', 'cachedir'))
5964
avoid_duplicate_runs = config.getboolean('FAKE_SECTION', 'avoid_duplicate_runs')
65+
connection_n_retries = config.get('FAKE_SECTION', 'connection_n_retries')
66+
if connection_n_retries > 20:
67+
raise ValueError(
68+
'A higher number of retries than 20 is not allowed to keep the '
69+
'server load reasonable'
70+
)
6071

6172

6273
def _parse_config():

openml/testing.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ def setUp(self):
6565
with open(openml.config.config_file, 'w') as fh:
6666
fh.write('apikey = %s' % openml.config.apikey)
6767

68+
# Increase the number of retries to avoid spurios server failures
69+
self.connection_n_retries = openml.config.connection_n_retries
70+
openml.config.connection_n_retries = 10
71+
6872
def tearDown(self):
6973
os.chdir(self.cwd)
7074
try:
@@ -76,6 +80,7 @@ def tearDown(self):
7680
else:
7781
raise
7882
openml.config.server = self.production_server
83+
openml.config.connection_n_retries = self.connection_n_retries
7984

8085
def _get_sentinel(self, sentinel=None):
8186
if sentinel is None:

tests/test_runs/test_run_functions.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -735,17 +735,20 @@ def test_get_run_trace(self):
735735
if 'Run already exists in server' not in e.message:
736736
# in this case the error was not the one we expected
737737
raise e
738-
# run was already
739-
flow = openml.flows.sklearn_to_flow(clf)
740-
flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
741-
self.assertIsInstance(flow_exists, int)
742-
self.assertGreater(flow_exists, 0)
743-
downloaded_flow = openml.flows.get_flow(flow_exists,
744-
reinstantiate=True)
745-
setup_exists = openml.setups.setup_exists(downloaded_flow)
746-
self.assertIsInstance(setup_exists, int)
747-
self.assertGreater(setup_exists, 0)
748-
run_ids = _run_exists(task.task_id, setup_exists)
738+
# run was already performed
739+
message = e.message
740+
if sys.version_info[0] == 2:
741+
# Parse a string like:
742+
# 'Run already exists in server. Run id(s): set([37501])'
743+
run_ids = (
744+
message.split('[')[1].replace(']', '').
745+
replace(')', '').split(',')
746+
)
747+
else:
748+
# Parse a string like:
749+
# "Run already exists in server. Run id(s): {36980}"
750+
run_ids = message.split('{')[1].replace('}', '').split(',')
751+
run_ids = [int(run_id) for run_id in run_ids]
749752
self.assertGreater(len(run_ids), 0)
750753
run_id = random.choice(list(run_ids))
751754

0 commit comments

Comments
 (0)