|
| 1 | +"""custom curate script to add URLs""" |
| 2 | +import sys |
| 3 | +import argparse |
| 4 | +from typing import Iterable |
| 5 | + |
| 6 | +from augur.curate import validate_records |
| 7 | +from augur.io.json import dump_ndjson, load_ndjson |
| 8 | + |
| 9 | +def run(args: argparse.Namespace, records: Iterable[dict]) -> Iterable[dict]: |
| 10 | + |
| 11 | + for index, record in enumerate(records): |
| 12 | + record = record.copy() |
| 13 | + |
| 14 | + ppx_accession = record.get('PPX_accession', None) # versioned |
| 15 | + insdc_accession = record.get('INSDC_accession', None) # versioned |
| 16 | + |
| 17 | + # Add INSDC_accession__url and PPX_accession__url fields to NDJSON records |
| 18 | + record['PPX_accession__url'] = f"https://pathoplexus.org/seq/{ppx_accession}" \ |
| 19 | + if ppx_accession \ |
| 20 | + else "" |
| 21 | + record['INSDC_accession__url'] = f"https://www.ncbi.nlm.nih.gov/nuccore/{insdc_accession}" \ |
| 22 | + if insdc_accession \ |
| 23 | + else "" |
| 24 | + |
| 25 | + yield record |
| 26 | + |
| 27 | + |
| 28 | +if __name__ == "__main__": |
| 29 | + parser = argparse.ArgumentParser(description=__doc__) |
| 30 | + args = parser.parse_args() |
| 31 | + |
| 32 | + records = load_ndjson(sys.stdin) |
| 33 | + |
| 34 | + # Validate records have the same input fields |
| 35 | + validated_input_records = validate_records(records, __doc__, True) |
| 36 | + |
| 37 | + # Run this custom curate command to get modified records |
| 38 | + modified_records = run(args, validated_input_records) |
| 39 | + |
| 40 | + # Validate modified records have the same output fields |
| 41 | + validated_output_records = validate_records(modified_records, __doc__, False) |
| 42 | + |
| 43 | + dump_ndjson(validated_output_records) |
0 commit comments