Skip to content

Commit 245a8a9

Browse files
Delete some validation tests for SMCK/GC
1 parent 384498c commit 245a8a9

File tree

1 file changed

+0
-184
lines changed

1 file changed

+0
-184
lines changed

verification.py

Lines changed: 0 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -4949,190 +4949,6 @@ def test_smck_vs_smckapprox_multiple_bottleneck(self):
49494949
sequence_length=5e7,
49504950
)
49514951

4952-
def test_gc_tract_length_smc(self):
4953-
"""
4954-
Runs the check for the mean length of gene conversion tracts.
4955-
"""
4956-
models = {
4957-
"SMC": msprime.SmcApproxCoalescent(),
4958-
"SMCK": msprime.SMCK(k=0.0),
4959-
"Hudson": msprime.StandardCoalescent(),
4960-
}
4961-
num_replicates = 10
4962-
n = 10
4963-
gene_conversion_rate = 5
4964-
gc_tract_lengths = np.append(np.arange(1, 5.25, 0.25), [10, 50])
4965-
4966-
for discrete_genome in [True, False]:
4967-
records = []
4968-
4969-
for k, l in enumerate(gc_tract_lengths):
4970-
num_gc_events = np.zeros(num_replicates)
4971-
num_internal_gc_events = np.zeros(num_replicates)
4972-
sum_internal_gc_tract_lengths = np.zeros(num_replicates)
4973-
4974-
for model_name, model in models.items():
4975-
4976-
sim = msprime.ancestry._parse_sim_ancestry(
4977-
samples=n,
4978-
sequence_length=100,
4979-
gene_conversion_rate=gene_conversion_rate,
4980-
gene_conversion_tract_length=gc_tract_lengths[k],
4981-
discrete_genome=discrete_genome,
4982-
ploidy=1,
4983-
model=model,
4984-
additional_nodes=(
4985-
msprime.NodeType.RECOMBINANT
4986-
| msprime.NodeType.CENSUS
4987-
| msprime.NodeType.GENE_CONVERSION
4988-
| msprime.NodeType.PASS_THROUGH
4989-
| msprime.NodeType.COMMON_ANCESTOR
4990-
),
4991-
coalescing_segments_only=False,
4992-
)
4993-
4994-
for j, _ts in enumerate(sim.run_replicates(num_replicates)):
4995-
num_gc_events[j] = sim.num_gene_conversion_events
4996-
num_internal_gc_events[j] = (
4997-
sim.num_internal_gene_conversion_events
4998-
)
4999-
sum_internal_gc_tract_lengths[j] = (
5000-
sim.sum_internal_gc_tract_lengths
5001-
)
5002-
sim.reset()
5003-
5004-
for j in range(num_replicates):
5005-
records.append(
5006-
{
5007-
"tract_length": l,
5008-
"model": model_name,
5009-
"num_gc_events": num_gc_events[j],
5010-
"num_internal_gc_events": num_internal_gc_events[j],
5011-
"normalized_tract_length": (
5012-
sum_internal_gc_tract_lengths[j]
5013-
/ num_internal_gc_events[j]
5014-
/ l
5015-
if num_internal_gc_events[j] > 0
5016-
else np.nan
5017-
),
5018-
}
5019-
)
5020-
5021-
df = pd.DataFrame(records)
5022-
5023-
self.plot_gc_metric(
5024-
df, "num_gc_events", "Number of GC events", discrete_genome
5025-
)
5026-
self.plot_gc_metric(
5027-
df,
5028-
"num_internal_gc_events",
5029-
"Number of internal GC events",
5030-
discrete_genome,
5031-
)
5032-
self.plot_gc_metric(
5033-
df,
5034-
"normalized_tract_length",
5035-
"Normalized tract length (mean / l)",
5036-
discrete_genome,
5037-
)
5038-
5039-
def test_smc_k_num_trees_gc(self):
5040-
"""
5041-
Runs the check for number of trees in the SMC and full coalescent
5042-
using the API, but with gene conversion instead of recombination.
5043-
"""
5044-
L = 100
5045-
Ne = 1000
5046-
n = 10
5047-
gene_conversion_rate = 0.00001
5048-
gc_tract_lengths = np.arange(1, 5.25, 0.25)
5049-
5050-
num_replicates = 10_000
5051-
results = []
5052-
5053-
models_to_run = [
5054-
(msprime.SmcApproxCoalescent(), "msprime (hudson)"),
5055-
(msprime.SmcApproxCoalescent(), "smc"),
5056-
(msprime.SmcPrimeApproxCoalescent(), "smc_prime"),
5057-
(msprime.SMCK(k=0.0), "smc_k(0)"),
5058-
(msprime.SMCK(k=1.0), "smc_k(1)"),
5059-
(msprime.SMCK(k=L), "smc_k(inf)"),
5060-
]
5061-
5062-
for gc_tract_length in gc_tract_lengths:
5063-
for model_obj, model_name in models_to_run:
5064-
sim = msprime.ancestry._parse_sim_ancestry(
5065-
samples=n,
5066-
population_size=Ne,
5067-
sequence_length=L,
5068-
gene_conversion_rate=gene_conversion_rate,
5069-
gene_conversion_tract_length=gc_tract_length,
5070-
model=model_obj,
5071-
)
5072-
5073-
for rep in range(num_replicates):
5074-
sim.run()
5075-
results.append(
5076-
{
5077-
"tract_length": gc_tract_length,
5078-
"model": model_name,
5079-
"replicate": rep,
5080-
"num_breakpoints": sim.num_breakpoints,
5081-
}
5082-
)
5083-
sim.reset()
5084-
5085-
smc_df = pd.DataFrame(results)
5086-
5087-
models = smc_df["model"].unique()
5088-
tract_lengths = sorted(smc_df["tract_length"].unique())
5089-
model_colors = pyplot.cm.tab10(np.linspace(0, 1, len(models)))
5090-
5091-
fig, ax = pyplot.subplots(figsize=(10, 6))
5092-
5093-
box_width = 0.15
5094-
legend_handles = {}
5095-
5096-
for i, tl in enumerate(tract_lengths):
5097-
for j, model in enumerate(models):
5098-
subset = smc_df[
5099-
(smc_df["tract_length"] == tl) & (smc_df["model"] == model)
5100-
]
5101-
pos = i + (j - len(models) / 2) * box_width
5102-
bp = ax.boxplot(
5103-
subset["num_breakpoints"].values,
5104-
positions=[pos],
5105-
widths=box_width * 0.8,
5106-
patch_artist=True,
5107-
boxprops=dict(facecolor=model_colors[j], alpha=0.6),
5108-
medianprops=dict(color="black"),
5109-
whiskerprops=dict(color=model_colors[j]),
5110-
capprops=dict(color=model_colors[j]),
5111-
flierprops=dict(
5112-
markerfacecolor=model_colors[j],
5113-
marker="o",
5114-
markersize=4,
5115-
alpha=0.5,
5116-
),
5117-
)
5118-
if model not in legend_handles:
5119-
legend_handles[model] = bp["boxes"][0]
5120-
5121-
ax.set_xticks(range(len(tract_lengths)))
5122-
ax.set_xticklabels(tract_lengths)
5123-
ax.set_xlabel("GC tract length")
5124-
ax.set_ylabel("Number of breakpoints")
5125-
ax.set_title(
5126-
f"Distribution of breakpoints by model and GC tract length. \n"
5127-
f"GC rate: {gene_conversion_rate}, L: {L}, Ne: {Ne}, samples: {n}"
5128-
f", num_replicates: {num_replicates}"
5129-
)
5130-
ax.legend(legend_handles.values(), legend_handles.keys(), title="Model")
5131-
5132-
pyplot.tight_layout()
5133-
pyplot.savefig(self.output_dir / "breakpoints_boxplot.png")
5134-
pyplot.close()
5135-
51364952
def test_out_of_africa_migration_model(self):
51374953
s_no = 10
51384954
samples = {

0 commit comments

Comments
 (0)