Skip to content

Commit 6b9f88c

Browse files
Will-Tylerjeromekelleher
authored andcommitted
Push variant-wise for-loop to C extension
1 parent ea2e591 commit 6b9f88c

File tree

2 files changed

+56
-13
lines changed

2 files changed

+56
-13
lines changed

vcztools/_vcztoolsmodule.c

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,57 @@ VcfEncoder_encode(VcfEncoder *self, PyObject *args)
476476
return ret;
477477
}
478478

479+
static PyObject *
480+
VcfEncoder_encode_all(VcfEncoder *self, PyObject *args)
481+
{
482+
bool allowed_threads = false;
483+
484+
if (VcfEncoder_check_state(self) != 0) {
485+
goto out;
486+
}
487+
488+
Py_BEGIN_ALLOW_THREADS
489+
allowed_threads = true;
490+
const size_t num_variants = self->vcf_encoder->num_variants;
491+
size_t bufsize = 1024;
492+
493+
for (size_t row = 0; row < num_variants; row++) {
494+
while (true) {
495+
char* const buf = PyMem_RawMalloc(bufsize);
496+
497+
if (buf == NULL) {
498+
PyErr_NoMemory();
499+
goto out;
500+
}
501+
502+
const int64_t line_length = vcz_variant_encoder_encode(
503+
self->vcf_encoder, row, buf, bufsize);
504+
505+
if (line_length < 0) {
506+
PyMem_RawFree(buf);
507+
508+
if (line_length == VCZ_ERR_BUFFER_OVERFLOW) {
509+
bufsize *= 2;
510+
} else {
511+
handle_library_error((int) line_length);
512+
goto out;
513+
}
514+
} else {
515+
puts(buf);
516+
PyMem_RawFree(buf);
517+
break;
518+
} // if (line_length < 0)
519+
} // while (true)
520+
}
521+
522+
out:
523+
if (allowed_threads) {
524+
Py_END_ALLOW_THREADS
525+
}
526+
527+
Py_RETURN_NONE;
528+
}
529+
479530
static PyObject *
480531
VcfEncoder_print_state(VcfEncoder *self, PyObject *args)
481532
{
@@ -546,6 +597,10 @@ static PyMethodDef VcfEncoder_methods[] = {
546597
.ml_meth = (PyCFunction) VcfEncoder_encode,
547598
.ml_flags = METH_VARARGS,
548599
.ml_doc = "Return the specified row of VCF text" },
600+
{ .ml_name = "encode_all",
601+
.ml_meth = (PyCFunction) VcfEncoder_encode_all,
602+
.ml_flags = METH_VARARGS,
603+
.ml_doc = "Print all rows of VCF text" },
549604
{ NULL } /* Sentinel */
550605
};
551606

vcztools/vcf_writer.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -415,19 +415,7 @@ def c_chunk_to_vcf(
415415
if preceding_future:
416416
concurrent.futures.wait((preceding_future,))
417417

418-
# TODO: (1) make a guess at this based on number of fields and samples,
419-
# and (2) log a DEBUG message when we have to double.
420-
buflen = 1024
421-
for j in range(num_variants):
422-
failed = True
423-
while failed:
424-
try:
425-
line = encoder.encode(j, buflen)
426-
failed = False
427-
except _vcztools.VczBufferTooSmall:
428-
buflen *= 2
429-
# print("Bumping buflen to", buflen)
430-
print(line, file=output)
418+
encoder.encode_all()
431419

432420

433421
def _generate_header(ds, original_header, sample_ids, *, no_version: bool = False):

0 commit comments

Comments
 (0)