@@ -4949,190 +4949,6 @@ def test_smck_vs_smckapprox_multiple_bottleneck(self):
49494949 sequence_length = 5e7 ,
49504950 )
49514951
4952- def test_gc_tract_length_smc (self ):
4953- """
4954- Runs the check for the mean length of gene conversion tracts.
4955- """
4956- models = {
4957- "SMC" : msprime .SmcApproxCoalescent (),
4958- "SMCK" : msprime .SMCK (k = 0.0 ),
4959- "Hudson" : msprime .StandardCoalescent (),
4960- }
4961- num_replicates = 10
4962- n = 10
4963- gene_conversion_rate = 5
4964- gc_tract_lengths = np .append (np .arange (1 , 5.25 , 0.25 ), [10 , 50 ])
4965-
4966- for discrete_genome in [True , False ]:
4967- records = []
4968-
4969- for k , l in enumerate (gc_tract_lengths ):
4970- num_gc_events = np .zeros (num_replicates )
4971- num_internal_gc_events = np .zeros (num_replicates )
4972- sum_internal_gc_tract_lengths = np .zeros (num_replicates )
4973-
4974- for model_name , model in models .items ():
4975-
4976- sim = msprime .ancestry ._parse_sim_ancestry (
4977- samples = n ,
4978- sequence_length = 100 ,
4979- gene_conversion_rate = gene_conversion_rate ,
4980- gene_conversion_tract_length = gc_tract_lengths [k ],
4981- discrete_genome = discrete_genome ,
4982- ploidy = 1 ,
4983- model = model ,
4984- additional_nodes = (
4985- msprime .NodeType .RECOMBINANT
4986- | msprime .NodeType .CENSUS
4987- | msprime .NodeType .GENE_CONVERSION
4988- | msprime .NodeType .PASS_THROUGH
4989- | msprime .NodeType .COMMON_ANCESTOR
4990- ),
4991- coalescing_segments_only = False ,
4992- )
4993-
4994- for j , _ts in enumerate (sim .run_replicates (num_replicates )):
4995- num_gc_events [j ] = sim .num_gene_conversion_events
4996- num_internal_gc_events [j ] = (
4997- sim .num_internal_gene_conversion_events
4998- )
4999- sum_internal_gc_tract_lengths [j ] = (
5000- sim .sum_internal_gc_tract_lengths
5001- )
5002- sim .reset ()
5003-
5004- for j in range (num_replicates ):
5005- records .append (
5006- {
5007- "tract_length" : l ,
5008- "model" : model_name ,
5009- "num_gc_events" : num_gc_events [j ],
5010- "num_internal_gc_events" : num_internal_gc_events [j ],
5011- "normalized_tract_length" : (
5012- sum_internal_gc_tract_lengths [j ]
5013- / num_internal_gc_events [j ]
5014- / l
5015- if num_internal_gc_events [j ] > 0
5016- else np .nan
5017- ),
5018- }
5019- )
5020-
5021- df = pd .DataFrame (records )
5022-
5023- self .plot_gc_metric (
5024- df , "num_gc_events" , "Number of GC events" , discrete_genome
5025- )
5026- self .plot_gc_metric (
5027- df ,
5028- "num_internal_gc_events" ,
5029- "Number of internal GC events" ,
5030- discrete_genome ,
5031- )
5032- self .plot_gc_metric (
5033- df ,
5034- "normalized_tract_length" ,
5035- "Normalized tract length (mean / l)" ,
5036- discrete_genome ,
5037- )
5038-
5039- def test_smc_k_num_trees_gc (self ):
5040- """
5041- Runs the check for number of trees in the SMC and full coalescent
5042- using the API, but with gene conversion instead of recombination.
5043- """
5044- L = 100
5045- Ne = 1000
5046- n = 10
5047- gene_conversion_rate = 0.00001
5048- gc_tract_lengths = np .arange (1 , 5.25 , 0.25 )
5049-
5050- num_replicates = 10_000
5051- results = []
5052-
5053- models_to_run = [
5054- (msprime .SmcApproxCoalescent (), "msprime (hudson)" ),
5055- (msprime .SmcApproxCoalescent (), "smc" ),
5056- (msprime .SmcPrimeApproxCoalescent (), "smc_prime" ),
5057- (msprime .SMCK (k = 0.0 ), "smc_k(0)" ),
5058- (msprime .SMCK (k = 1.0 ), "smc_k(1)" ),
5059- (msprime .SMCK (k = L ), "smc_k(inf)" ),
5060- ]
5061-
5062- for gc_tract_length in gc_tract_lengths :
5063- for model_obj , model_name in models_to_run :
5064- sim = msprime .ancestry ._parse_sim_ancestry (
5065- samples = n ,
5066- population_size = Ne ,
5067- sequence_length = L ,
5068- gene_conversion_rate = gene_conversion_rate ,
5069- gene_conversion_tract_length = gc_tract_length ,
5070- model = model_obj ,
5071- )
5072-
5073- for rep in range (num_replicates ):
5074- sim .run ()
5075- results .append (
5076- {
5077- "tract_length" : gc_tract_length ,
5078- "model" : model_name ,
5079- "replicate" : rep ,
5080- "num_breakpoints" : sim .num_breakpoints ,
5081- }
5082- )
5083- sim .reset ()
5084-
5085- smc_df = pd .DataFrame (results )
5086-
5087- models = smc_df ["model" ].unique ()
5088- tract_lengths = sorted (smc_df ["tract_length" ].unique ())
5089- model_colors = pyplot .cm .tab10 (np .linspace (0 , 1 , len (models )))
5090-
5091- fig , ax = pyplot .subplots (figsize = (10 , 6 ))
5092-
5093- box_width = 0.15
5094- legend_handles = {}
5095-
5096- for i , tl in enumerate (tract_lengths ):
5097- for j , model in enumerate (models ):
5098- subset = smc_df [
5099- (smc_df ["tract_length" ] == tl ) & (smc_df ["model" ] == model )
5100- ]
5101- pos = i + (j - len (models ) / 2 ) * box_width
5102- bp = ax .boxplot (
5103- subset ["num_breakpoints" ].values ,
5104- positions = [pos ],
5105- widths = box_width * 0.8 ,
5106- patch_artist = True ,
5107- boxprops = dict (facecolor = model_colors [j ], alpha = 0.6 ),
5108- medianprops = dict (color = "black" ),
5109- whiskerprops = dict (color = model_colors [j ]),
5110- capprops = dict (color = model_colors [j ]),
5111- flierprops = dict (
5112- markerfacecolor = model_colors [j ],
5113- marker = "o" ,
5114- markersize = 4 ,
5115- alpha = 0.5 ,
5116- ),
5117- )
5118- if model not in legend_handles :
5119- legend_handles [model ] = bp ["boxes" ][0 ]
5120-
5121- ax .set_xticks (range (len (tract_lengths )))
5122- ax .set_xticklabels (tract_lengths )
5123- ax .set_xlabel ("GC tract length" )
5124- ax .set_ylabel ("Number of breakpoints" )
5125- ax .set_title (
5126- f"Distribution of breakpoints by model and GC tract length. \n "
5127- f"GC rate: { gene_conversion_rate } , L: { L } , Ne: { Ne } , samples: { n } "
5128- f", num_replicates: { num_replicates } "
5129- )
5130- ax .legend (legend_handles .values (), legend_handles .keys (), title = "Model" )
5131-
5132- pyplot .tight_layout ()
5133- pyplot .savefig (self .output_dir / "breakpoints_boxplot.png" )
5134- pyplot .close ()
5135-
51364952 def test_out_of_africa_migration_model (self ):
51374953 s_no = 10
51384954 samples = {
0 commit comments