Skip to content

Commit c4ecc8b

Browse files
committed
Don't use original VCF header in vcz
1 parent ed737c0 commit c4ecc8b

File tree

1 file changed

+4
-41
lines changed

1 file changed

+4
-41
lines changed

vcztools/vcf_writer.py

Lines changed: 4 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import io
22
import logging
3-
import re
43
import sys
54
from datetime import datetime
65
from typing import Optional
@@ -183,10 +182,8 @@ def write_vcf(
183182
reader = retrieval.VariantChunkReader(root)
184183

185184
if not no_header:
186-
original_header = root.attrs.get("vcf_header", None)
187185
vcf_header = _generate_header(
188186
root,
189-
original_header,
190187
sample_ids,
191188
no_version=no_version,
192189
force_ac_an=force_ac_an_header,
@@ -429,7 +426,6 @@ def c_chunk_to_vcf(
429426

430427
def _generate_header(
431428
ds,
432-
original_header,
433429
sample_ids,
434430
*,
435431
no_version: bool = False,
@@ -469,45 +465,12 @@ def _generate_header(
469465
if key in ("genotype", "genotype_phased"):
470466
continue
471467
format_fields.append(key)
472-
if original_header is None: # generate entire header
473-
# [1.4.1 File format]
474-
print("##fileformat=VCFv4.3", file=output)
475468

476-
if "source" in ds.attrs:
477-
print(f'##source={ds.attrs["source"]}', file=output)
469+
# [1.4.1 File format]
470+
print("##fileformat=VCFv4.3", file=output)
478471

479-
else: # use original header fields where appropriate
480-
unstructured_pattern = re.compile("##([^=]+)=([^<].*)")
481-
structured_pattern = re.compile("##([^=]+)=(<.*)")
482-
483-
for line in original_header.split("\n"):
484-
if re.fullmatch(unstructured_pattern, line):
485-
print(line, file=output)
486-
else:
487-
match = re.fullmatch(structured_pattern, line)
488-
if match:
489-
category = match.group(1)
490-
id_pattern = re.compile("ID=([^,>]+)")
491-
key = id_pattern.findall(line)[0]
492-
if category not in ("contig", "FILTER", "INFO", "FORMAT"):
493-
# output other structured fields
494-
print(line, file=output)
495-
# only output certain categories if in dataset
496-
elif category == "contig" and key in contigs:
497-
contigs.remove(key)
498-
print(line, file=output)
499-
elif category == "FILTER" and key in filters:
500-
filters.remove(key)
501-
print(line, file=output)
502-
elif category == "INFO" and key in info_fields:
503-
info_fields.remove(key)
504-
print(line, file=output)
505-
elif category == "FORMAT" and key in format_fields:
506-
format_fields.remove(key)
507-
print(line, file=output)
508-
509-
# add all fields that are not in the original header
510-
# or all fields if there was no original header
472+
if "source" in ds.attrs:
473+
print(f'##source={ds.attrs["source"]}', file=output)
511474

512475
# [1.4.2 Information field format]
513476
for key in info_fields:

0 commit comments

Comments
 (0)