Skip to content

Commit f1d38d1

Browse files
Merge pull request #207 from jeromekelleher/move-to-db-for-reconsidered
Use match DB
2 parents f7771bb + cb0e445 commit f1d38d1

File tree

6 files changed

+333
-195
lines changed

6 files changed

+333
-195
lines changed

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,10 @@ authors = [
88
]
99
requires-python = ">=3.9"
1010
dependencies = [
11-
"tsinfer",
11+
"tsinfer==0.3.3", # https://github.com/jeromekelleher/sc2ts/issues/201
1212
"pyfaidx",
1313
"tskit>=0.5.3",
1414
"tszip",
15-
"tsinfer>=0.3.0",
1615
"pandas",
1716
"numba",
1817
"tqdm",

sc2ts/cli.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -199,15 +199,6 @@ def dump_samples(samples, output_file):
199199
"is greater than this, randomly subsample."
200200
),
201201
)
202-
@click.option(
203-
"--excluded_samples_dir",
204-
default=None,
205-
type=click.Path(file_okay=False, dir_okay=True),
206-
help=(
207-
"Directory containing pickled files of excluded samples. "
208-
"By default, it is set to output_prefx."
209-
),
210-
)
211202
@click.option("--num-threads", default=0, type=int, help="Number of match threads")
212203
@click.option("--random-seed", default=42, type=int, help="Random seed for subsampling")
213204
@click.option("-p", "--precision", default=None, type=int, help="Match precision")
@@ -225,7 +216,6 @@ def daily_extend(
225216
num_past_days,
226217
max_submission_delay,
227218
max_daily_samples,
228-
excluded_samples_dir,
229219
num_threads,
230220
random_seed,
231221
precision,
@@ -238,21 +228,23 @@ def daily_extend(
238228
"""
239229
setup_logging(verbose, log_file)
240230
rng = random.Random(random_seed)
231+
232+
match_db_path = f"{output_prefix}match.db"
241233
if base is None:
242234
base_ts = inference.initial_ts()
235+
match_db = inference.MatchDb.initialise(match_db_path)
243236
else:
244237
base_ts = tskit.load(base)
245238

246-
if excluded_samples_dir is None:
247-
excluded_samples_dir = output_prefix
248-
249239
with contextlib.ExitStack() as exit_stack:
250240
alignment_store = exit_stack.enter_context(sc2ts.AlignmentStore(alignments))
251241
metadata_db = exit_stack.enter_context(sc2ts.MetadataDb(metadata))
242+
match_db = exit_stack.enter_context(inference.MatchDb(match_db_path))
252243
ts_iter = inference.daily_extend(
253244
alignment_store=alignment_store,
254245
metadata_db=metadata_db,
255246
base_ts=base_ts,
247+
match_db=match_db,
256248
num_mismatches=num_mismatches,
257249
max_hmm_cost=max_hmm_cost,
258250
min_group_size=min_group_size,
@@ -263,13 +255,10 @@ def daily_extend(
263255
precision=precision,
264256
num_threads=num_threads,
265257
show_progress=not no_progress,
266-
excluded_sample_dir=excluded_samples_dir,
267258
)
268-
for ts, excluded_samples, date in ts_iter:
259+
for ts, date in ts_iter:
269260
output_ts = output_prefix + date + ".ts"
270261
add_provenance(ts, output_ts)
271-
output_excluded_samples = output_prefix + date + ".excluded_samples.pickle"
272-
dump_samples(excluded_samples, output_excluded_samples)
273262

274263

275264
@click.command()

0 commit comments

Comments
 (0)