|
1 | 1 | import argparse |
2 | | -import csv |
3 | 2 | from argparse import Namespace |
4 | 3 | import os |
5 | 4 | import re |
| 5 | +from shlex import quote as shquote |
| 6 | +from shutil import which |
6 | 7 | from textwrap import dedent |
7 | | -from typing import Sequence, Set |
| 8 | +from typing import Sequence |
8 | 9 | import numpy as np |
9 | 10 | from collections import defaultdict |
10 | | -from xopen import xopen |
11 | 11 |
|
12 | 12 | from augur.errors import AugurError |
13 | 13 | from augur.io.file import open_file |
14 | | -from augur.io.metadata import Metadata, METADATA_DATE_COLUMN |
| 14 | +from augur.io.metadata import METADATA_DATE_COLUMN |
15 | 15 | from augur.io.print import print_err |
| 16 | +from augur.io.shell_command_runner import run_shell_command |
| 17 | +from augur.utils import augur |
16 | 18 | from .constants import GROUP_BY_GENERATED_COLUMNS |
17 | 19 | from .include_exclude_rules import extract_variables, parse_filter_query |
18 | 20 |
|
@@ -96,25 +98,29 @@ def constant_factory(value): |
96 | 98 | raise AugurError(f"missing or malformed priority scores file {fname}") |
97 | 99 |
|
98 | 100 |
|
99 | | -def write_output_metadata(input_metadata_path: str, delimiters: Sequence[str], |
100 | | - id_columns: Sequence[str], output_metadata_path: str, |
101 | | - ids_to_write: Set[str]): |
| 101 | +def write_output_metadata(input_filename: str, id_column: str, output_filename: str, ids_file: str): |
102 | 102 | """ |
103 | | - Write output metadata file given input metadata information and a set of IDs |
104 | | - to write. |
| 103 | + Write output metadata file given input metadata information and a file |
| 104 | + containing ids to write. |
105 | 105 | """ |
106 | | - input_metadata = Metadata(input_metadata_path, delimiters, id_columns) |
107 | | - |
108 | | - with xopen(output_metadata_path, "w", newline="") as output_metadata_handle: |
109 | | - output_metadata = csv.DictWriter(output_metadata_handle, fieldnames=input_metadata.columns, |
110 | | - delimiter="\t", lineterminator=os.linesep) |
111 | | - output_metadata.writeheader() |
112 | | - |
113 | | - # Write outputs based on rows in the original metadata. |
114 | | - for row in input_metadata.rows(): |
115 | | - row_id = row[input_metadata.id_column] |
116 | | - if row_id in ids_to_write: |
117 | | - output_metadata.writerow(row) |
| 106 | + # FIXME: make this a function like augur() and seqkit() |
| 107 | + tsv_join = which("tsv-join") |
| 108 | + |
| 109 | + command = f""" |
| 110 | + {augur()} read-file {shquote(input_filename)} | |
| 111 | + {tsv_join} -H --filter-file {ids_file} --key-fields {id_column} | |
| 112 | + {augur()} write-file {shquote(output_filename)} |
| 113 | + """ |
| 114 | + |
| 115 | + try: |
| 116 | + run_shell_command(command, raise_errors=True) |
| 117 | + except Exception: |
| 118 | + if os.path.isfile(output_filename): |
| 119 | + # Remove the partial output file. |
| 120 | + os.remove(output_filename) |
| 121 | + raise AugurError(f"Metadata output failed, see error(s) above.") |
| 122 | + else: |
| 123 | + raise AugurError(f"Metadata output failed, see error(s) above. The command may have already written data to stdout. You may want to clean up any partial outputs.") |
118 | 124 |
|
119 | 125 |
|
120 | 126 | # These are the types accepted in the following function. |
|
0 commit comments