Skip to content

Commit 3079a41

Browse files
committed
Don't use original VCF header in vcz
1 parent 67adc3a commit 3079a41

File tree

1 file changed

+6
-43
lines changed

1 file changed

+6
-43
lines changed

vcztools/vcf_writer.py

Lines changed: 6 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import io
22
import logging
3-
import re
43
import sys
54
from datetime import datetime
65

@@ -144,11 +143,9 @@ def write_vcf(
144143
)
145144

146145
if not no_header:
147-
original_header = root.attrs.get("vcf_header", None)
148146
force_ac_an_header = not drop_genotypes and samples_selection is not None
149147
vcf_header = _generate_header(
150148
root,
151-
original_header,
152149
sample_ids,
153150
no_version=no_version,
154151
force_ac_an=force_ac_an_header,
@@ -299,7 +296,6 @@ def c_chunk_to_vcf(
299296

300297
def _generate_header(
301298
ds,
302-
original_header,
303299
sample_ids,
304300
*,
305301
no_version: bool = False,
@@ -339,45 +335,12 @@ def _generate_header(
339335
if key in ("genotype", "genotype_phased"):
340336
continue
341337
format_fields.append(key)
342-
if original_header is None: # generate entire header
343-
# [1.4.1 File format]
344-
print("##fileformat=VCFv4.3", file=output)
345-
346-
if "source" in ds.attrs:
347-
print(f'##source={ds.attrs["source"]}', file=output)
348-
349-
else: # use original header fields where appropriate
350-
unstructured_pattern = re.compile("##([^=]+)=([^<].*)")
351-
structured_pattern = re.compile("##([^=]+)=(<.*)")
352-
353-
for line in original_header.split("\n"):
354-
if re.fullmatch(unstructured_pattern, line):
355-
print(line, file=output)
356-
else:
357-
match = re.fullmatch(structured_pattern, line)
358-
if match:
359-
category = match.group(1)
360-
id_pattern = re.compile("ID=([^,>]+)")
361-
key = id_pattern.findall(line)[0]
362-
if category not in ("contig", "FILTER", "INFO", "FORMAT"):
363-
# output other structured fields
364-
print(line, file=output)
365-
# only output certain categories if in dataset
366-
elif category == "contig" and key in contigs:
367-
contigs.remove(key)
368-
print(line, file=output)
369-
elif category == "FILTER" and key in filters:
370-
filters.remove(key)
371-
print(line, file=output)
372-
elif category == "INFO" and key in info_fields:
373-
info_fields.remove(key)
374-
print(line, file=output)
375-
elif category == "FORMAT" and key in format_fields:
376-
format_fields.remove(key)
377-
print(line, file=output)
378-
379-
# add all fields that are not in the original header
380-
# or all fields if there was no original header
338+
339+
# [1.4.1 File format]
340+
print("##fileformat=VCFv4.3", file=output)
341+
342+
if "source" in ds.attrs:
343+
print(f'##source={ds.attrs["source"]}', file=output)
381344

382345
# [1.4.2 Information field format]
383346
for key in info_fields:

0 commit comments

Comments
 (0)