From 902179db27bc200d82a323296e001493eafd9e98 Mon Sep 17 00:00:00 2001 From: hossam26644 Date: Fri, 12 Dec 2025 12:21:43 +0000 Subject: [PATCH] first pass update documentation and float k values hull offset a required positional arg remove string remapping fix verifications added lost test not remove named argument and add a test fix test fix doc --- docs/ancestry.md | 2 +- docs/api.md | 3 +-- msprime/ancestry.py | 19 ++++++++++--------- tests/test_ancestry.py | 16 ++++++++-------- tests/test_demography.py | 8 ++++++-- tests/test_models.py | 14 +++++++------- verification.py | 12 ++++++------ 7 files changed, 39 insertions(+), 35 deletions(-) diff --git a/docs/ancestry.md b/docs/ancestry.md index 8a9bdd09f..8755b3b25 100644 --- a/docs/ancestry.md +++ b/docs/ancestry.md @@ -2259,7 +2259,7 @@ In this example, we use the {class}`SMC(k) <.SmcKApproxCoalescent>` model to run simulations: ```{code-cell} ts = msprime.sim_ancestry(4, population_size=10, - model=msprime.SmcKApproxCoalescent(hull_offset=1), + model=msprime.SmcKApproxCoalescent(1), random_seed=1) SVG(ts.draw_svg(y_axis=True, time_scale="log_time")) ``` diff --git a/docs/api.md b/docs/api.md index cdb1397de..32708111b 100644 --- a/docs/api.md +++ b/docs/api.md @@ -21,8 +21,7 @@ for discussion and examples of individual features. sim_ancestry SampleSet StandardCoalescent - SmcApproxCoalescent - SmcPrimeApproxCoalescent + SmcKApproxCoalescent DiscreteTimeWrightFisher FixedPedigree BetaCoalescent diff --git a/msprime/ancestry.py b/msprime/ancestry.py index 1be0bd982..5cbcd08bb 100644 --- a/msprime/ancestry.py +++ b/msprime/ancestry.py @@ -1798,7 +1798,7 @@ class SmcApproxCoalescent(AncestryModel): This model is implemented using a naive rejection sampling approach and so it may not be any more efficient to simulate than the standard Hudson model. - We recommend using the ``SmcKApproxCoalescent(hull_offset=0)`` instead + We recommend using the ``SmcKApproxCoalescent(0)`` instead The string ``"smc"`` can be used to refer to this model. """ @@ -1822,7 +1822,8 @@ class SmcPrimeApproxCoalescent(AncestryModel): This model is implemented using a naive rejection sampling approach and so it may not be any more efficient to simulate than the standard Hudson model. We recommend using the - ``SmcKApproxCoalescent(hull_offset=1)`` instead. + ``SmcKApproxCoalescent(1)`` for discrete genomes instead, or + ``SmcKApproxCoalescent(1e-14)`` for continous genomes. The string ``"smc_prime"`` can be used to refer to this model. """ @@ -1845,15 +1846,15 @@ class SmcKApproxCoalescent(ParametricAncestryModel): Specifically, if the hull_offset is set to 0, then only overlapping genomic tracts can be joined by a common ancestor event (this is equivalent to the - SMC model). If the hull_offset is set to 1, then overlapping or adjacent - genomic tracts can be joined by a common ancestor (this is equivalent to the - SMC' model). If the hull_offset is set to full the sequence length, then any - segments can share a common ancestor, which is equivalent to the standard Hudson - coalescent. + SMC model). If the hull_offset is set to 1 (for discrete genomes), then + overlapping or adjacent genomic tracts can be joined by a common ancestor + (this is equivalent to the SMC' model). If the hull_offset is set to full + the sequence length, then any segments can share a common ancestor, which + is equivalent to the standard Hudson coalescent. :param float hull_offset: Determines the maximum distance between genomic tracts of ancestral material that can be joined by a common ancestor event. - Defaults to 0 (equivalent to the SMC model). + """ name = "smc_k" @@ -1861,7 +1862,7 @@ class SmcKApproxCoalescent(ParametricAncestryModel): hull_offset: float # We have to define an __init__ to enforce keyword-only behaviour - def __init__(self, *, duration=None, hull_offset=0.0): + def __init__(self, hull_offset, *, duration=None): self.duration = duration self.hull_offset = hull_offset diff --git a/tests/test_ancestry.py b/tests/test_ancestry.py index 6bf217ed5..2a74293b4 100644 --- a/tests/test_ancestry.py +++ b/tests/test_ancestry.py @@ -3052,7 +3052,7 @@ class TestSMCK: def test_discrete(self, seed): tss = msprime.sim_ancestry( samples=10, - model=msprime.SmcKApproxCoalescent(), + model=msprime.SmcKApproxCoalescent(1), recombination_rate=0.005, sequence_length=1000, random_seed=seed, @@ -3064,7 +3064,7 @@ def test_discrete(self, seed): def test_continuous(self): tss = msprime.sim_ancestry( samples=10, - model=msprime.SmcKApproxCoalescent(), + model=msprime.SmcKApproxCoalescent(1), recombination_rate=0.005, sequence_length=1000, num_replicates=10, @@ -3086,7 +3086,7 @@ def test_ancient_samples(self): ts = msprime.sim_ancestry( initial_state=tables, population_size=10_000, - model=msprime.SmcKApproxCoalescent(), + model=msprime.SmcKApproxCoalescent(1), recombination_rate=1e-6, ) for tree in ts.trees(): @@ -3098,7 +3098,7 @@ def test_model_switch(self): population_size=10_000, model=[ msprime.StandardCoalescent(duration=10), - msprime.SmcKApproxCoalescent(duration=10), + msprime.SmcKApproxCoalescent(1, duration=10), msprime.StandardCoalescent(), ], random_seed=10, @@ -3114,7 +3114,7 @@ def test_model_switch_high_rec(self): population_size=10_000, model=[ msprime.StandardCoalescent(duration=100), - msprime.SmcKApproxCoalescent(), + msprime.SmcKApproxCoalescent(1), ], random_seed=10, recombination_rate=1e-4, @@ -3129,7 +3129,7 @@ def test_smc_k_plus(self, hull_offset, discrete_genome): tss = msprime.sim_ancestry( samples=10, population_size=10_000, - model=msprime.SmcKApproxCoalescent(hull_offset=hull_offset), + model=msprime.SmcKApproxCoalescent(hull_offset), random_seed=10, recombination_rate=1e-5, sequence_length=100, @@ -3149,7 +3149,7 @@ def test_two_pops(self): ts = msprime.sim_ancestry( samples={0: 2, 1: 2}, demography=demography, - model=msprime.SmcKApproxCoalescent(), + model=msprime.SmcKApproxCoalescent(1), random_seed=74024, recombination_rate=1e-5, sequence_length=100, @@ -3166,7 +3166,7 @@ def test_gc(self): ): msprime.sim_ancestry( samples=10, - model=msprime.SmcKApproxCoalescent(), + model=msprime.SmcKApproxCoalescent(1), sequence_length=100, gene_conversion_rate=1.0, gene_conversion_tract_length=5, diff --git a/tests/test_demography.py b/tests/test_demography.py index 0bbd3035d..1b3f9d39b 100644 --- a/tests/test_demography.py +++ b/tests/test_demography.py @@ -2521,7 +2521,7 @@ class TestMigrationRecordsSmcPrime(MigrationRecordsMixin): class TestMigrationRecordsSmcK(MigrationRecordsMixin): - model = msprime.SmcKApproxCoalescent() + model = msprime.SmcKApproxCoalescent(1) class TestMigrationRecordsDtwf(MigrationRecordsMixin): @@ -2576,7 +2576,11 @@ def test_full_arg_migration(self): self.verify_two_pops_full_arg(ts) def test_full_arg_migration_smc(self): - for model in ["smc", "smc_prime", msprime.SmcKApproxCoalescent()]: + for model in [ + msprime.SmcApproxCoalescent(), + msprime.SmcPrimeApproxCoalescent(), + msprime.SmcKApproxCoalescent(1), + ]: population_configurations = [ msprime.PopulationConfiguration(10), msprime.PopulationConfiguration(10), diff --git a/tests/test_models.py b/tests/test_models.py index 23f3f5ef0..e82cddf76 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -60,7 +60,7 @@ def test_smc_models(self): assert repr(model) == repr_s assert str(model) == repr_s - model = msprime.SmcKApproxCoalescent() + model = msprime.SmcKApproxCoalescent(0.0) repr_s = "SmcKApproxCoalescent(duration=None, hull_offset=0.0)" assert repr(model) == repr_s assert str(model) == repr_s @@ -144,7 +144,7 @@ def test_model_instances(self): msprime.StandardCoalescent(), msprime.SmcApproxCoalescent(), msprime.SmcPrimeApproxCoalescent(), - msprime.SmcKApproxCoalescent(), + msprime.SmcKApproxCoalescent(1), msprime.DiscreteTimeWrightFisher(), msprime.FixedPedigree(), msprime.SweepGenicSelection( @@ -332,11 +332,11 @@ def test_sweep_genic_selection(self): msprime.SweepGenicSelection(1) def test_smck_coalescent(self): - model = msprime.SmcKApproxCoalescent() - assert model.duration is None - assert model.hull_offset == 0.0 - model = msprime.SmcKApproxCoalescent(hull_offset=1.1) + with pytest.raises(TypeError, match="hull_offset"): + model = msprime.SmcKApproxCoalescent() + + model = msprime.SmcKApproxCoalescent(1.1) assert model.duration is None assert model.hull_offset == 1.1 @@ -471,7 +471,7 @@ def test_dirac_coalescent_parameters(self): def test_smck_coalescent_parameters(self): for hull_offset in [0.01, 10.0, 0.99]: - model = msprime.SmcKApproxCoalescent(hull_offset=hull_offset) + model = msprime.SmcKApproxCoalescent(hull_offset) assert model.hull_offset == hull_offset d = model._as_lowlevel() assert d == {"name": "smc_k", "hull_offset": hull_offset, "duration": None} diff --git a/verification.py b/verification.py index 4414a2c7a..018e2ffb2 100644 --- a/verification.py +++ b/verification.py @@ -4535,7 +4535,7 @@ class SmckvsSmcKApproxCoalescent(Test): """ models = { - "SmcKApprox": msprime.SmcKApproxCoalescent(), + "SmcKApprox": msprime.SmcKApproxCoalescent(hull_offset=0.0), "smc": msprime.SmcApproxCoalescent(), } @@ -4955,9 +4955,9 @@ def test_gc_tract_length_smc(self): Runs the check for the mean length of gene conversion tracts. """ models = { - "Hudson": msprime.SmcApproxCoalescent(), - "SMC": msprime.SmcKApproxCoalescent(), - "SMCK": msprime.StandardCoalescent(), + "SMC": msprime.SmcApproxCoalescent(), + "SMCK": msprime.SmcKApproxCoalescent(hull_offset=0.0), + "Hudson": msprime.StandardCoalescent(), } num_replicates = 10 n = 10 @@ -5053,7 +5053,7 @@ def test_smc_k_num_trees_gc(self): models_to_run = [ (msprime.SmcApproxCoalescent(), "msprime (hudson)"), - (msprime.SmcKApproxCoalescent(), "smc"), + (msprime.SmcApproxCoalescent(), "smc"), (msprime.SmcPrimeApproxCoalescent(), "smc_prime"), (msprime.SmcKApproxCoalescent(hull_offset=0.0), "smc_k(0)"), (msprime.SmcKApproxCoalescent(hull_offset=1.0), "smc_k(1)"), @@ -5618,7 +5618,7 @@ def test_above_root_smck(self): samples=10, sequence_length=100, recombination_rate=0.1, - model=msprime.SmcKApproxCoalescent(), + model=msprime.SmcKApproxCoalescent(hull_offset=0.0), num_replicates=300, )