Skip to content

Commit db8937a

Browse files
authored
Metadata JSON design (#30)
* added metadataJSON.py and query_mapper.yaml * addressing design comments:clean up init function, connect to db once, merged create_json_file and write_json_file, moving away from manual queries towards using pypika to generate SQL * submitter details added to JSON. * started to add project. * set up unit tests * moved sql queries _fetch functions. started on samples. unit tests added. * test.config file * added _get_analysis, _get_files function. changed placeholders to UNSPECIFIED. added validate to fetch functions * added unit tests * added gather_metadata function * edit fetching file name * remove TODO * requirements.txt: add oracledb * requirements.txt: add pypika * change _get_sample_new * removed query mapper * fix(metadataJSON.py): correct the dot notation for sample_status * fix(metadataJSON.py): change the return value * fix(metadataJSON.py): change how unregistered samples are handled * fix(metadataJSON.py): change how preregistered samples are handled * fix(metadataJSON.py): change use of next() to calling validate_fetch_result * fix(metadataJSON.py): change use of next() to calling validate_fetch_result * fix(convertGVFtoVCF.py): check if config is present before obtaining metadata * fix(convertGVFtoVCF.py): make arguments optional * fix(metadataJSON.py): validate the project accession * fix(metadataJSON.py): change the sampleinvcf to match the sample id * fix(test_metadataJSON.py): edit tests * fix(test_metadataJSON.py): remove place holder and raise ValueError for missing results * fix(test_metadataJSON.py): fetch methods changed to fetch what is named in the function name * fix(test_metadataJSON.py): edit mock data to represent the real return type * feat(gather_metadata.py): make gather_metadata its own executable * edit comment * fix(metadataJSON.py): improve error handling * fix(metadataJSON.py): correct typo * fix(metadataJSON.py): replace UNSPECIFIED placeholders with empty strings * fix(metadataJSON.py): edit unit test * fix(metadataJSON.py, test_metadataJSON.py): fix typos, remove dead code * fix(metadataJSON.py, test_metadataJSON.py): fix typos
1 parent 8b255f0 commit db8937a

File tree

7 files changed

+914
-4
lines changed

7 files changed

+914
-4
lines changed

convert_gvf_to_vcf/convertGVFtoVCF.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
from ebi_eva_common_pyutils.logger import logging_config as log_cfg
55

66
from convert_gvf_to_vcf.conversionstatistics import FileStatistics
7+
from convert_gvf_to_vcf.gather_metadata import gather_metadata
78
from convert_gvf_to_vcf.lookup import Lookup
89
from convert_gvf_to_vcf.utils import read_in_gvf_header, read_in_gvf_data
910
from convert_gvf_to_vcf.vcfline import VcfLineBuilder
10-
11+
from convert_gvf_to_vcf.metadataJSON import DGVaMetadataRetriever
1112

1213
logger = log_cfg.get_logger(__name__)
1314

@@ -377,13 +378,19 @@ def cleanup_temp_files(list_of_temp_files):
377378
if os.path.exists(temp_file):
378379
os.remove(temp_file)
379380

381+
382+
380383
def main():
381384
# Parse command line arguments
382385
parser = argparse.ArgumentParser()
383386
parser.add_argument("gvf_input", help="GVF input file.")
384387
parser.add_argument("vcf_output", help="VCF output file.")
388+
parser.add_argument("--json_output", help="JSON output file.")
389+
parser.add_argument("--study_accession", help="DGVa Study Accession")
385390
parser.add_argument("-a", "--assembly", help="FASTA assembly file")
386391
parser.add_argument("--log", help="Path to log file")
392+
parser.add_argument("--config", help="Path to config file")
393+
387394
args = parser.parse_args()
388395

389396
# Set up logging functionality
@@ -393,8 +400,11 @@ def main():
393400
else:
394401
log_cfg.add_stdout_handler()
395402
convert(args.gvf_input, args.vcf_output, args.assembly)
396-
397-
403+
if args.config:
404+
logger.info(f"The config file is {args.config}. Gathering metadata")
405+
gather_metadata(args.config, args.json_output, args.study_accession, args.vcf_output)
406+
else:
407+
logger.info(f"No config file provided. Unable to gather metadata.")
398408

399409
if __name__ == "__main__":
400410
main()
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import argparse
2+
3+
from convert_gvf_to_vcf.metadataJSON import DGVaMetadataRetriever
4+
5+
6+
def gather_metadata(config_input, json_output, study_accession, vcf_output):
7+
8+
retrieved_dgva_metadata = DGVaMetadataRetriever(config_input)
9+
with retrieved_dgva_metadata:
10+
retrieved_dgva_metadata.create_json_file(json_file_path=json_output, study_accession=study_accession, vcf_output=vcf_output)
11+
12+
13+
def main():
14+
parser = argparse.ArgumentParser()
15+
parser.add_argument("--config")
16+
parser.add_argument("--json_output")
17+
parser.add_argument("--study_accession")
18+
parser.add_argument("--vcf_output")
19+
args = parser.parse_args()
20+
21+
gather_metadata(args.config, args.json_output, args.study_accession, args.vcf_output)
22+
23+
if __name__ == "__main__":
24+
main()

0 commit comments

Comments
 (0)