|
1 | 1 | import io
|
2 | 2 | import logging
|
3 |
| -import re |
4 | 3 | import sys
|
5 | 4 | from datetime import datetime
|
6 | 5 | from typing import Optional
|
@@ -145,11 +144,9 @@ def write_vcf(
|
145 | 144 | )
|
146 | 145 |
|
147 | 146 | if not no_header:
|
148 |
| - original_header = root.attrs.get("vcf_header", None) |
149 | 147 | force_ac_an_header = not drop_genotypes and samples_selection is not None
|
150 | 148 | vcf_header = _generate_header(
|
151 | 149 | root,
|
152 |
| - original_header, |
153 | 150 | sample_ids,
|
154 | 151 | no_version=no_version,
|
155 | 152 | force_ac_an=force_ac_an_header,
|
@@ -300,7 +297,6 @@ def c_chunk_to_vcf(
|
300 | 297 |
|
301 | 298 | def _generate_header(
|
302 | 299 | ds,
|
303 |
| - original_header, |
304 | 300 | sample_ids,
|
305 | 301 | *,
|
306 | 302 | no_version: bool = False,
|
@@ -340,45 +336,12 @@ def _generate_header(
|
340 | 336 | if key in ("genotype", "genotype_phased"):
|
341 | 337 | continue
|
342 | 338 | format_fields.append(key)
|
343 |
| - if original_header is None: # generate entire header |
344 |
| - # [1.4.1 File format] |
345 |
| - print("##fileformat=VCFv4.3", file=output) |
346 |
| - |
347 |
| - if "source" in ds.attrs: |
348 |
| - print(f'##source={ds.attrs["source"]}', file=output) |
349 |
| - |
350 |
| - else: # use original header fields where appropriate |
351 |
| - unstructured_pattern = re.compile("##([^=]+)=([^<].*)") |
352 |
| - structured_pattern = re.compile("##([^=]+)=(<.*)") |
353 |
| - |
354 |
| - for line in original_header.split("\n"): |
355 |
| - if re.fullmatch(unstructured_pattern, line): |
356 |
| - print(line, file=output) |
357 |
| - else: |
358 |
| - match = re.fullmatch(structured_pattern, line) |
359 |
| - if match: |
360 |
| - category = match.group(1) |
361 |
| - id_pattern = re.compile("ID=([^,>]+)") |
362 |
| - key = id_pattern.findall(line)[0] |
363 |
| - if category not in ("contig", "FILTER", "INFO", "FORMAT"): |
364 |
| - # output other structured fields |
365 |
| - print(line, file=output) |
366 |
| - # only output certain categories if in dataset |
367 |
| - elif category == "contig" and key in contigs: |
368 |
| - contigs.remove(key) |
369 |
| - print(line, file=output) |
370 |
| - elif category == "FILTER" and key in filters: |
371 |
| - filters.remove(key) |
372 |
| - print(line, file=output) |
373 |
| - elif category == "INFO" and key in info_fields: |
374 |
| - info_fields.remove(key) |
375 |
| - print(line, file=output) |
376 |
| - elif category == "FORMAT" and key in format_fields: |
377 |
| - format_fields.remove(key) |
378 |
| - print(line, file=output) |
379 |
| - |
380 |
| - # add all fields that are not in the original header |
381 |
| - # or all fields if there was no original header |
| 339 | + |
| 340 | + # [1.4.1 File format] |
| 341 | + print("##fileformat=VCFv4.3", file=output) |
| 342 | + |
| 343 | + if "source" in ds.attrs: |
| 344 | + print(f'##source={ds.attrs["source"]}', file=output) |
382 | 345 |
|
383 | 346 | # [1.4.2 Information field format]
|
384 | 347 | for key in info_fields:
|
|
0 commit comments