|
1 | 1 | import io
|
2 | 2 | import logging
|
3 |
| -import re |
4 | 3 | import sys
|
5 | 4 | from datetime import datetime
|
6 | 5 |
|
@@ -144,11 +143,9 @@ def write_vcf(
|
144 | 143 | )
|
145 | 144 |
|
146 | 145 | if not no_header:
|
147 |
| - original_header = root.attrs.get("vcf_header", None) |
148 | 146 | force_ac_an_header = not drop_genotypes and samples_selection is not None
|
149 | 147 | vcf_header = _generate_header(
|
150 | 148 | root,
|
151 |
| - original_header, |
152 | 149 | sample_ids,
|
153 | 150 | no_version=no_version,
|
154 | 151 | force_ac_an=force_ac_an_header,
|
@@ -299,7 +296,6 @@ def c_chunk_to_vcf(
|
299 | 296 |
|
300 | 297 | def _generate_header(
|
301 | 298 | ds,
|
302 |
| - original_header, |
303 | 299 | sample_ids,
|
304 | 300 | *,
|
305 | 301 | no_version: bool = False,
|
@@ -339,45 +335,12 @@ def _generate_header(
|
339 | 335 | if key in ("genotype", "genotype_phased"):
|
340 | 336 | continue
|
341 | 337 | format_fields.append(key)
|
342 |
| - if original_header is None: # generate entire header |
343 |
| - # [1.4.1 File format] |
344 |
| - print("##fileformat=VCFv4.3", file=output) |
345 |
| - |
346 |
| - if "source" in ds.attrs: |
347 |
| - print(f'##source={ds.attrs["source"]}', file=output) |
348 |
| - |
349 |
| - else: # use original header fields where appropriate |
350 |
| - unstructured_pattern = re.compile("##([^=]+)=([^<].*)") |
351 |
| - structured_pattern = re.compile("##([^=]+)=(<.*)") |
352 |
| - |
353 |
| - for line in original_header.split("\n"): |
354 |
| - if re.fullmatch(unstructured_pattern, line): |
355 |
| - print(line, file=output) |
356 |
| - else: |
357 |
| - match = re.fullmatch(structured_pattern, line) |
358 |
| - if match: |
359 |
| - category = match.group(1) |
360 |
| - id_pattern = re.compile("ID=([^,>]+)") |
361 |
| - key = id_pattern.findall(line)[0] |
362 |
| - if category not in ("contig", "FILTER", "INFO", "FORMAT"): |
363 |
| - # output other structured fields |
364 |
| - print(line, file=output) |
365 |
| - # only output certain categories if in dataset |
366 |
| - elif category == "contig" and key in contigs: |
367 |
| - contigs.remove(key) |
368 |
| - print(line, file=output) |
369 |
| - elif category == "FILTER" and key in filters: |
370 |
| - filters.remove(key) |
371 |
| - print(line, file=output) |
372 |
| - elif category == "INFO" and key in info_fields: |
373 |
| - info_fields.remove(key) |
374 |
| - print(line, file=output) |
375 |
| - elif category == "FORMAT" and key in format_fields: |
376 |
| - format_fields.remove(key) |
377 |
| - print(line, file=output) |
378 |
| - |
379 |
| - # add all fields that are not in the original header |
380 |
| - # or all fields if there was no original header |
| 338 | + |
| 339 | + # [1.4.1 File format] |
| 340 | + print("##fileformat=VCFv4.3", file=output) |
| 341 | + |
| 342 | + if "source" in ds.attrs: |
| 343 | + print(f'##source={ds.attrs["source"]}', file=output) |
381 | 344 |
|
382 | 345 | # [1.4.2 Information field format]
|
383 | 346 | for key in info_fields:
|
|
0 commit comments