@@ -1540,15 +1540,6 @@ def summary_table(self):
15401540 return data
15411541
15421542
1543- @dataclasses .dataclass
1544- class EncodingWork :
1545- func : callable = dataclasses .field (repr = False )
1546- start : int
1547- stop : int
1548- columns : list [str ]
1549- memory : int = 0
1550-
1551-
15521543def parse_max_memory (max_memory ):
15531544 if max_memory is None :
15541545 # Effectively unbounded
@@ -1640,7 +1631,7 @@ def init(
16401631 ):
16411632 self .icf = icf
16421633 if self .path .exists ():
1643- raise ValueError ("Zarr path already exists" )
1634+ raise ValueError ("Zarr path already exists" ) # NEEDS TEST
16441635 partitions = VcfZarrPartition .generate_partitions (
16451636 self .icf .num_records ,
16461637 schema .variants_chunk_size ,
@@ -1807,6 +1798,7 @@ def finalise_partition_array(self, partition_index, name):
18071798 wip_path = self .wip_partition_array_path (partition_index , name )
18081799 final_path = self .partition_array_path (partition_index , name )
18091800 if final_path .exists ():
1801+ # NEEDS TEST
18101802 logger .warning (f"Removing existing { final_path } " )
18111803 shutil .rmtree (final_path )
18121804 # Atomic swap
@@ -1923,7 +1915,7 @@ def encode_filters_partition(self, partition_index):
19231915 var_filter .buff [j , lookup [f ]] = True
19241916 except KeyError :
19251917 raise ValueError (
1926- f"Filter '{ f } ' was not defined " f" in the header."
1918+ f"Filter '{ f } ' was not defined in the header."
19271919 ) from None
19281920 var_filter .flush ()
19291921
@@ -1956,6 +1948,7 @@ def finalise_array(self, name):
19561948 logger .info (f"Finalising { name } " )
19571949 final_path = self .path / name
19581950 if final_path .exists ():
1951+ # NEEDS TEST
19591952 raise ValueError (f"Array { name } already exists" )
19601953 for partition in range (len (self .metadata .partitions )):
19611954 # Move all the files in partition dir to dest dir
@@ -1992,7 +1985,12 @@ def finalise(self, show_progress=False):
19921985 # NOTE: it's not clear that adding more workers will make this quicker,
19931986 # as it's just going to be causing contention on the file system.
19941987 # Something to check empirically in some deployments.
1995- with core .ParallelWorkManager (1 , progress_config ) as pwm :
1988+ # FIXME we're just using worker_processes=0 here to hook into the
1989+ # SynchronousExecutor which is intended for testing purposes so
1990+ # that we get test coverage. Should fix this either by allowing
1991+ # for multiple workers, or making a standard wrapper for tqdm
1992+ # that allows us to have a consistent look and feel.
1993+ with core .ParallelWorkManager (0 , progress_config ) as pwm :
19961994 for name in self .metadata .schema .columns :
19971995 pwm .submit (self .finalise_array , name )
19981996 zarr .consolidate_metadata (self .path )
@@ -2131,11 +2129,9 @@ def encode_init(
21312129 )
21322130
21332131
2134- def encode_partition (zarr_path , partition , * , show_progress = False , worker_processes = 1 ):
2132+ def encode_partition (zarr_path , partition ):
21352133 writer = VcfZarrWriter (zarr_path )
2136- writer .encode_partition (
2137- partition , show_progress = show_progress , worker_processes = worker_processes
2138- )
2134+ writer .encode_partition (partition )
21392135
21402136
21412137def encode_finalise (zarr_path , show_progress = False ):
0 commit comments