14
14
regions_to_chunk_indexes ,
15
15
regions_to_selection ,
16
16
)
17
+ from vcztools .samples import parse_samples
17
18
from vcztools .utils import (
18
19
open_file_like ,
19
- search ,
20
20
)
21
21
22
22
from . import _vcztools , constants , retrieval
@@ -134,48 +134,16 @@ def write_vcf(
134
134
root = zarr .open (vcz , mode = "r" )
135
135
136
136
with open_file_like (output ) as output :
137
- force_ac_an_header = False
138
137
if samples and drop_genotypes :
139
138
raise ValueError ("Cannot select samples and drop genotypes." )
140
139
elif drop_genotypes :
141
140
sample_ids = []
142
141
samples_selection = np .array ([])
143
- elif samples is None :
144
- sample_ids = root ["sample_id" ][:]
145
- samples_selection = None
146
142
else :
147
- force_ac_an_header = True
148
143
all_samples = root ["sample_id" ][:]
149
- exclude_samples = samples .startswith ("^" )
150
- samples = samples .lstrip ("^" )
151
- sample_ids = np .array (samples .split ("," ))
152
- if np .all (sample_ids == np .array ("" )):
153
- sample_ids = np .empty ((0 ,))
154
-
155
- unknown_samples = np .setdiff1d (sample_ids , all_samples )
156
- if len (unknown_samples ) > 0 :
157
- if force_samples :
158
- # remove unknown samples from sample_ids
159
- logger .warning (
160
- "subset called for sample(s) not in header: "
161
- f'{ "," .join (unknown_samples )} .'
162
- )
163
- sample_ids = np .delete (
164
- sample_ids , search (sample_ids , unknown_samples )
165
- )
166
- else :
167
- raise ValueError (
168
- "subset called for sample(s) not in header: "
169
- f'{ "," .join (unknown_samples )} . '
170
- 'Use "--force-samples" to ignore this error.'
171
- )
172
-
173
- samples_selection = search (all_samples , sample_ids )
174
- if exclude_samples :
175
- samples_selection = np .setdiff1d (
176
- np .arange (all_samples .size ), samples_selection
177
- )
178
- sample_ids = all_samples [samples_selection ]
144
+ sample_ids , samples_selection = parse_samples (
145
+ samples , all_samples , force_samples = force_samples
146
+ )
179
147
180
148
filter_expr = filter_mod .FilterExpression (
181
149
field_names = set (root ), include = include , exclude = exclude
@@ -184,6 +152,7 @@ def write_vcf(
184
152
185
153
if not no_header :
186
154
original_header = root .attrs .get ("vcf_header" , None )
155
+ force_ac_an_header = not drop_genotypes and samples_selection is not None
187
156
vcf_header = _generate_header (
188
157
root ,
189
158
original_header ,
@@ -336,7 +305,7 @@ def c_chunk_to_vcf(
336
305
if (
337
306
"call_genotype_phased" in root
338
307
and not drop_genotypes
339
- and (samples_selection is None or num_samples > 0 )
308
+ and (samples_selection is None or num_samples != 0 )
340
309
):
341
310
gt_phased = get_vchunk_array (
342
311
root ["call_genotype_phased" ],
0 commit comments