Skip to content

Commit 5a09501

Browse files
authored
Standardize Cromwell timestamps to follow analysis JSON schema (#117)
* Standardize cromwell datetime format Because the format of Cromwell timestamps are inconsistent, re-format them to follow the JSON date-time format as defined in the analysis process schema. * Add tests for formatting timestamps * Update docker images for testing * Update requirements * Update pipeline-tools version
1 parent a2ab84a commit 5a09501

File tree

8 files changed

+49
-13
lines changed

8 files changed

+49
-13
lines changed

adapter_pipelines/Optimus/adapter.wdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ workflow AdapterOptimus {
126126
Int max_cromwell_retries = 0
127127
Boolean add_md5s = false
128128

129-
String pipeline_tools_version = "v0.46.0"
129+
String pipeline_tools_version = "v0.46.1"
130130

131131
call GetInputs as prep {
132132
input:

adapter_pipelines/cellranger/adapter.wdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ workflow Adapter10xCount {
150150
Int max_cromwell_retries = 0
151151
Boolean add_md5s = false
152152

153-
String pipeline_tools_version = "v0.46.0"
153+
String pipeline_tools_version = "v0.46.1"
154154

155155
call GetInputs {
156156
input:

adapter_pipelines/ss2_single_sample/adapter.wdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ workflow AdapterSmartSeq2SingleCell{
8282
Int max_cromwell_retries = 0
8383
Boolean add_md5s = false
8484

85-
String pipeline_tools_version = "v0.46.0"
85+
String pipeline_tools_version = "v0.46.1"
8686

8787
call GetInputs as prep {
8888
input:

pipeline_tools/create_analysis_metadata.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from google.cloud import storage
99
from typing import List
1010
import re
11+
import arrow
1112

1213

1314
def create_analysis_process(raw_schema_url,
@@ -60,8 +61,8 @@ def create_analysis_process(raw_schema_url,
6061
'schema_type': SCHEMA_TYPE,
6162
'process_core': get_analysis_process_core(analysis_workflow_id=analysis_id),
6263
'process_type': get_analysis_process_type(),
63-
'timestamp_start_utc': workflow_metadata.get('start'),
64-
'timestamp_stop_utc': workflow_metadata.get('end'),
64+
'timestamp_start_utc': format_timestamp(workflow_metadata.get('start')),
65+
'timestamp_stop_utc': format_timestamp(workflow_metadata.get('end')),
6566
'input_bundles': input_bundles_string.split(','),
6667
'reference_bundle': reference_bundle,
6768
'tasks': workflow_tasks,
@@ -352,8 +353,8 @@ def get_workflow_tasks(workflow_metadata):
352353
'disk_size': runtime['disks'],
353354
'docker_image': runtime['docker'],
354355
'zone': runtime['zones'],
355-
'start_time': task['start'],
356-
'stop_time': task['end'],
356+
'start_time': format_timestamp(task['start']),
357+
'stop_time': format_timestamp(task['end']),
357358
'log_out': task['stdout'],
358359
'log_err': task['stderr']
359360
}
@@ -362,6 +363,21 @@ def get_workflow_tasks(workflow_metadata):
362363
return sorted_output_tasks
363364

364365

366+
def format_timestamp(timestamp):
367+
""" Standardize Cromwell timestamps to follow the date-time JSON format required by the analysis process schema.
368+
369+
Args:
370+
timestamp (str): A datetime string in any format
371+
Returns:
372+
formatted_timestamp (str): A datetime string in the format 'YYYY-MM-DDTHH:mm:ss.SSSZ'
373+
374+
"""
375+
if timestamp:
376+
d = arrow.get(timestamp)
377+
formatted_date = d.format('YYYY-MM-DDTHH:mm:ss.SSS')
378+
return '{}Z'.format(formatted_date)
379+
380+
365381
def get_file_format(path, extension_to_format):
366382
"""Returns the file type of the file at the given path.
367383

pipeline_tools/tests/test_create_analysis_metadata.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,3 +253,21 @@ def verify_tasks(self, tasks):
253253
assert first_task['cpus'] == 1
254254
assert first_task['disk_size'] == 'local-disk 10 HDD'
255255
assert first_task['docker_image'] == 'humancellatlas/picard:2.10.10'
256+
257+
def test_format_timestamp_without_seconds(self):
258+
timestamp = '2019-02-11T01:15Z'
259+
formatted_datetime = cam.format_timestamp(timestamp)
260+
expected_datetime = '2019-02-11T01:15:00.000Z'
261+
assert formatted_datetime == expected_datetime
262+
263+
def test_format_timestamp_without_milliseconds(self):
264+
timestamp = '2019-02-11T01:15:00Z'
265+
formatted_timestamp = cam.format_timestamp(timestamp)
266+
expected_timestamp = '2019-02-11T01:15:00.000Z'
267+
assert formatted_timestamp == expected_timestamp
268+
269+
def test_formatting_correct_timestamp(self):
270+
timestamp = '2019-02-11T01:15:00.000Z'
271+
formatted_timestamp = cam.format_timestamp(timestamp)
272+
expected_timestamp = '2019-02-11T01:15:00.000Z'
273+
assert formatted_timestamp == expected_timestamp

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
requests==2.20.0
1+
arrow>=0.12.1
2+
requests>=2.20.0,<3
23
google-auth>=1.6.1,<2
3-
google-cloud-storage==1.8.0
4+
google-cloud-storage>=1.10.0,<2
45
tenacity==4.10.0
56
PyJWT==1.6.4
67
git+git://github.com/HumanCellAtlas/metadata-api@release/1.0b4#egg=hca-metadata-api[dss]

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@
1515
license='BSD 3-clause "New" or "Revised" License',
1616
packages=['pipeline_tools'],
1717
install_requires=[
18+
'arrow>=0.12.1',
1819
'google-auth>=1.6.1,<2',
19-
'google-cloud-storage>=1.8.0,<2',
20+
'google-cloud-storage>=1.10.0,<2',
2021
'hca>=4.5.0,<5',
2122
'hca-metadata-api',
2223
'mock>=2.0.0,<3',
2324
'requests>=2.20.0,<3',
2425
'requests-mock>=1.5.2,<2',
2526
'setuptools_scm>=2.0.0,<3',
2627
'tenacity>=4.10.0,<5',
27-
'google-cloud-storage>=1.10.0,<2',
2828
'PyJWT==1.6.4'
2929
],
3030
entry_points={

test-requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
requests-mock==1.5.2
1+
backports.tempfile==1.0
2+
mock>=2.0.0,<3
23
pytest==3.6.3
4+
requests-mock>=1.5.2,<2
35
tenacity==4.10.0
4-
backports.tempfile==1.0

0 commit comments

Comments
 (0)