Skip to content

Commit a01408d

Browse files
authored
Merge pull request #228 from DataBiosphere/dev2
PR for 0.4.5 release
2 parents 8278b37 + ad1c654 commit a01408d

File tree

15 files changed

+139
-126
lines changed

15 files changed

+139
-126
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
language: python
22
python:
3-
- "3.7"
3+
- "3.8"
44
# command to install dependencies
55
install: "python setup.py install"
66
# command to run tests

dsub/_dsub_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@
2626
0.1.3.dev0 -> 0.1.3 -> 0.1.4.dev0 -> ...
2727
"""
2828

29-
DSUB_VERSION = '0.4.4'
29+
DSUB_VERSION = '0.4.5'

dsub/commands/dsub.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,14 @@
2525
import sys
2626
import time
2727
import uuid
28-
from dateutil.tz import tzlocal
2928

29+
import dateutil
3030
from ..lib import dsub_errors
3131
from ..lib import dsub_util
3232
from ..lib import job_model
3333
from ..lib import output_formatter
3434
from ..lib import param_util
3535
from ..lib import resources
36-
from ..lib.dsub_util import print_error
3736
from ..providers import google_base
3837
from ..providers import provider_base
3938

@@ -660,7 +659,8 @@ def _get_job_metadata(provider, user_id, job_name, script, task_ids,
660659
Returns:
661660
A dictionary of job-specific metadata (such as job id, name, etc.)
662661
"""
663-
create_time = dsub_util.replace_timezone(datetime.datetime.now(), tzlocal())
662+
create_time = dsub_util.replace_timezone(datetime.datetime.now(),
663+
dateutil.tz.tzlocal())
664664
user_id = user_id or dsub_util.get_os_user()
665665
job_metadata = provider.prepare_job_metadata(script.name, job_name, user_id)
666666
if unique_job_id:
@@ -811,7 +811,7 @@ def _wait_after(provider, job_ids, poll_interval, stop_on_failure, summary):
811811
jobs_not_found = jobs_completed.difference(jobs_found)
812812
for j in jobs_not_found:
813813
error = '%s: not found' % j
814-
print_error(' %s' % error)
814+
dsub_util.print_error(' %s' % error)
815815
error_messages += [error]
816816

817817
# Print the dominant task for the completed jobs
@@ -997,7 +997,8 @@ def _importance_of_task(task):
997997
return (importance[task.get_field('task-status')],
998998
task.get_field(
999999
'end-time',
1000-
dsub_util.replace_timezone(datetime.datetime.max, tzlocal())))
1000+
dsub_util.replace_timezone(datetime.datetime.max,
1001+
dateutil.tz.tzlocal())))
10011002

10021003

10031004
def _wait_for_any_job(provider, job_ids, poll_interval, summary):
@@ -1289,7 +1290,7 @@ def run(provider,
12891290
summary)
12901291
if error_messages:
12911292
for msg in error_messages:
1292-
print_error(msg)
1293+
dsub_util.print_error(msg)
12931294
raise dsub_errors.PredecessorJobFailureError(
12941295
'One or more predecessor jobs completed but did not succeed.',
12951296
error_messages, None)
@@ -1331,7 +1332,7 @@ def run(provider,
13311332
poll_interval, False, summary)
13321333
if error_messages:
13331334
for msg in error_messages:
1334-
print_error(msg)
1335+
dsub_util.print_error(msg)
13351336
raise dsub_errors.JobExecutionError(
13361337
'One or more jobs finished with status FAILURE or CANCELED'
13371338
' during wait.', error_messages, launched_job)

dsub/lib/dsub_util.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,14 @@
2121
import pwd
2222
import sys
2323
import warnings
24-
from . import retry_util
2524

25+
from . import retry_util
26+
import google.auth
2627
import googleapiclient.discovery
2728
import googleapiclient.errors
2829
import googleapiclient.http
2930
import tenacity
3031

31-
import google.auth
32-
3332

3433
# this is the Job ID for jobs that are skipped.
3534
NO_JOB = 'NO_JOB'
@@ -56,6 +55,9 @@ def __init__(self, fileobj):
5655
def write(self, buf):
5756
self._fileobj.write(buf)
5857

58+
def flush(self):
59+
self._fileobj.flush()
60+
5961

6062
@contextlib.contextmanager
6163
def replace_print(fileobj=sys.stderr):
@@ -140,23 +142,25 @@ def get_storage_service(credentials):
140142
'ignore', 'Your application has authenticated using end user credentials')
141143
if credentials is None:
142144
credentials, _ = google.auth.default()
145+
# Set cache_discovery to False because we use google-auth
146+
# See https://github.com/googleapis/google-api-python-client/issues/299
143147
return googleapiclient.discovery.build(
144-
'storage', 'v1', credentials=credentials)
148+
'storage', 'v1', credentials=credentials, cache_discovery=False)
145149

146150

147151
# Exponential backoff retrying downloads of GCS object chunks.
148152
# Maximum 23 retries. Wait 1, 2, 4 ... 64, 64, 64... seconds.
149153
@tenacity.retry(
150154
stop=tenacity.stop_after_attempt(retry_util.MAX_API_ATTEMPTS),
151155
retry=retry_util.retry_api_check,
152-
wait=tenacity.wait_exponential(multiplier=0.5, max=64),
156+
wait=tenacity.wait_exponential(multiplier=1, max=64),
153157
retry_error_callback=retry_util.on_give_up)
154158
# For API errors dealing with auth, we want to retry, but not as often
155159
# Maximum 4 retries. Wait 1, 2, 4, 8 seconds.
156160
@tenacity.retry(
157161
stop=tenacity.stop_after_attempt(retry_util.MAX_AUTH_ATTEMPTS),
158162
retry=retry_util.retry_auth_check,
159-
wait=tenacity.wait_exponential(multiplier=0.5, max=8),
163+
wait=tenacity.wait_exponential(multiplier=1, max=8),
160164
retry_error_callback=retry_util.on_give_up)
161165
def _downloader_next_chunk(downloader):
162166
"""Downloads the next chunk."""
@@ -214,14 +218,14 @@ def load_file(file_path, credentials=None):
214218
@tenacity.retry(
215219
stop=tenacity.stop_after_attempt(retry_util.MAX_API_ATTEMPTS),
216220
retry=retry_util.retry_api_check,
217-
wait=tenacity.wait_exponential(multiplier=0.5, max=64),
221+
wait=tenacity.wait_exponential(multiplier=1, max=64),
218222
retry_error_callback=retry_util.on_give_up)
219223
# For API errors dealing with auth, we want to retry, but not as often
220224
# Maximum 4 retries. Wait 1, 2, 4, 8 seconds.
221225
@tenacity.retry(
222226
stop=tenacity.stop_after_attempt(retry_util.MAX_AUTH_ATTEMPTS),
223227
retry=retry_util.retry_auth_check,
224-
wait=tenacity.wait_exponential(multiplier=0.5, max=8),
228+
wait=tenacity.wait_exponential(multiplier=1, max=8),
225229
retry_error_callback=retry_util.on_give_up)
226230
def _file_exists_in_gcs(gcs_file_path, credentials=None, storage_service=None):
227231
"""Check whether the file exists, in GCS.
@@ -252,14 +256,14 @@ def _file_exists_in_gcs(gcs_file_path, credentials=None, storage_service=None):
252256
@tenacity.retry(
253257
stop=tenacity.stop_after_attempt(retry_util.MAX_API_ATTEMPTS),
254258
retry=retry_util.retry_api_check,
255-
wait=tenacity.wait_exponential(multiplier=0.5, max=64),
259+
wait=tenacity.wait_exponential(multiplier=1, max=64),
256260
retry_error_callback=retry_util.on_give_up)
257261
# For API errors dealing with auth, we want to retry, but not as often
258262
# Maximum 4 retries. Wait 1, 2, 4, 8 seconds.
259263
@tenacity.retry(
260264
stop=tenacity.stop_after_attempt(retry_util.MAX_AUTH_ATTEMPTS),
261265
retry=retry_util.retry_auth_check,
262-
wait=tenacity.wait_exponential(multiplier=0.5, max=8),
266+
wait=tenacity.wait_exponential(multiplier=1, max=8),
263267
retry_error_callback=retry_util.on_give_up)
264268
def _prefix_exists_in_gcs(gcs_prefix, credentials=None, storage_service=None):
265269
"""Check whether there is a GCS object whose name starts with the prefix.
@@ -302,14 +306,14 @@ def folder_exists(folder_path, credentials=None, storage_service=None):
302306
@tenacity.retry(
303307
stop=tenacity.stop_after_attempt(retry_util.MAX_API_ATTEMPTS),
304308
retry=retry_util.retry_api_check,
305-
wait=tenacity.wait_exponential(multiplier=0.5, max=64),
309+
wait=tenacity.wait_exponential(multiplier=1, max=64),
306310
retry_error_callback=retry_util.on_give_up)
307311
# For API errors dealing with auth, we want to retry, but not as often
308312
# Maximum 4 retries. Wait 1, 2, 4, 8 seconds.
309313
@tenacity.retry(
310314
stop=tenacity.stop_after_attempt(retry_util.MAX_AUTH_ATTEMPTS),
311315
retry=retry_util.retry_auth_check,
312-
wait=tenacity.wait_exponential(multiplier=0.5, max=8),
316+
wait=tenacity.wait_exponential(multiplier=1, max=8),
313317
retry_error_callback=retry_util.on_give_up)
314318
def simple_pattern_exists_in_gcs(file_pattern,
315319
credentials=None,

dsub/lib/job_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ def get_complete_descriptor(cls, task_metadata, task_params, task_resources):
595595
return task_descriptor
596596

597597
def __str__(self):
598-
return 'task-id: {}'.format(self.job_metadata.get('task-id'))
598+
return 'task-id: {}'.format(self.task_metadata.get('task-id'))
599599

600600
def __repr__(self):
601601
return ('task_metadata: {}, task_params: {}').format(

dsub/lib/output_formatter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ def prepare_summary_table(rows):
271271
# Use the original table as the driver in order to preserve the order.
272272
new_rows = []
273273
for job_key in sorted(grouped.keys()):
274-
group = grouped.get(job_key, None)
274+
group = grouped[job_key]
275275
canonical_status = ['RUNNING', 'SUCCESS', 'FAILURE', 'CANCEL']
276276
# Written this way to ensure that if somehow a new status is introduced,
277277
# it shows up in our output.

dsub/lib/retry_util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import sys
2222

2323
import googleapiclient.errors
24-
from httplib2 import ServerNotFoundError
24+
import httplib2
2525
import tenacity
2626

2727
import google.auth
@@ -127,7 +127,7 @@ def retry_api_check(retry_state: tenacity.RetryCallState) -> bool:
127127

128128
# This has been observed as a transient error:
129129
# ServerNotFoundError: Unable to find the server at genomics.googleapis.com
130-
if isinstance(exception, ServerNotFoundError):
130+
if isinstance(exception, httplib2.ServerNotFoundError):
131131
_print_retry_error(attempt_number, MAX_API_ATTEMPTS, exception)
132132
return True
133133

dsub/providers/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def get_tasks_completion_messages(self, tasks):
184184
raise NotImplementedError()
185185

186186

187-
class Task(object):
187+
class Task(object, metaclass=abc.ABCMeta):
188188
"""Basic container for task metadata."""
189189

190190
@abc.abstractmethod

dsub/providers/google_base.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ def parse_rfc3339_utc_string(rfc3339_utc_string):
300300
# When nanoseconds are provided, we round
301301
micros = int(round(int(fraction) // 1000))
302302
else:
303-
assert False, 'Fraction length not 0, 6, or 9: {}'.len(fraction)
303+
assert False, 'Fraction length not 0, 6, or 9: {}'.format(len(fraction))
304304

305305
try:
306306
return datetime.datetime(
@@ -424,14 +424,14 @@ def cancel(batch_fn, cancel_fn, ops):
424424
@tenacity.retry(
425425
stop=tenacity.stop_after_attempt(retry_util.MAX_API_ATTEMPTS),
426426
retry=retry_util.retry_api_check,
427-
wait=tenacity.wait_exponential(multiplier=0.5, max=64),
427+
wait=tenacity.wait_exponential(multiplier=1, max=64),
428428
retry_error_callback=retry_util.on_give_up)
429429
# For API errors dealing with auth, we want to retry, but not as often
430430
# Maximum 4 retries. Wait 1, 2, 4, 8 seconds.
431431
@tenacity.retry(
432432
stop=tenacity.stop_after_attempt(retry_util.MAX_AUTH_ATTEMPTS),
433433
retry=retry_util.retry_auth_check,
434-
wait=tenacity.wait_exponential(multiplier=0.5, max=8),
434+
wait=tenacity.wait_exponential(multiplier=1, max=8),
435435
retry_error_callback=retry_util.on_give_up)
436436
def setup_service(api_name, api_version, credentials=None):
437437
"""Configures genomics API client.
@@ -449,8 +449,10 @@ def setup_service(api_name, api_version, credentials=None):
449449
'ignore', 'Your application has authenticated using end user credentials')
450450
if not credentials:
451451
credentials, _ = google.auth.default()
452+
# Set cache_discovery to False because we use google-auth
453+
# See https://github.com/googleapis/google-api-python-client/issues/299
452454
return googleapiclient.discovery.build(
453-
api_name, api_version, credentials=credentials)
455+
api_name, api_version, cache_discovery=False, credentials=credentials)
454456

455457

456458
def credentials_from_service_account_info(credentials_file):
@@ -467,14 +469,14 @@ class Api(object):
467469
@tenacity.retry(
468470
stop=tenacity.stop_after_attempt(retry_util.MAX_API_ATTEMPTS),
469471
retry=retry_util.retry_api_check,
470-
wait=tenacity.wait_exponential(multiplier=0.5, max=64),
472+
wait=tenacity.wait_exponential(multiplier=1, max=64),
471473
retry_error_callback=retry_util.on_give_up)
472474
# For API errors dealing with auth, we want to retry, but not as often
473475
# Maximum 4 retries. Wait 1, 2, 4, 8 seconds.
474476
@tenacity.retry(
475477
stop=tenacity.stop_after_attempt(retry_util.MAX_AUTH_ATTEMPTS),
476478
retry=retry_util.retry_auth_check,
477-
wait=tenacity.wait_exponential(multiplier=0.5, max=8),
479+
wait=tenacity.wait_exponential(multiplier=1, max=8),
478480
retry_error_callback=retry_util.on_give_up)
479481
def execute(self, api):
480482
"""Executes operation.

dsub/providers/google_v2_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ def get_filtered_normalized_events(self):
409409
continue
410410

411411
if name == 'pulling-image':
412-
if match.group(1) != user_image:
412+
if match and match.group(1) != user_image:
413413
continue
414414

415415
events[name] = mapped

0 commit comments

Comments
 (0)