44from scipy .stats import binom
55
66def e2q (e ):
7- return - 10 * np .log (e )
7+ return - 10 * np .log10 (e )
88
99def q2e (q ):
10- return np .exp ( - q / 10 )
10+ return np .power ( 10 , - ( q / 10 ) )
1111
1212def incorporate_monomorphic (gm , pos , start , end ):
1313 '''
@@ -46,7 +46,7 @@ def refalt(ref, alt, n_sit):
4646 if ref is None and alt is None :
4747 ref = np .full (n_sit , "A" )
4848 alt = np .full (n_sit , "C" )
49- return ref , alt
49+ return ref , alt
5050
5151def depth_per_haplotype (rng , mean_depth , std_depth , n_hap ):
5252 if isinstance (mean_depth , np .ndarray ):
@@ -66,7 +66,7 @@ def refalt_int_encoding(gm, ref, alt):
6666 refalt_int [refalt_str == "T" ] = 3
6767 return refalt_int [gm .reshape (- 1 ), np .repeat (np .arange (gm .shape [0 ]), gm .shape [1 ])].reshape (gm .shape )
6868
69- def linked_depth (rng , DPh , read_length , sites_n ):
69+ def linked_depth (rng , DPh , read_length , n_sit ):
7070 '''
7171 Simulates reads in a contiguous genomic region to compute the depth per position.
7272
@@ -78,7 +78,7 @@ def linked_depth(rng, DPh, read_length, sites_n):
7878 Numpy array with the depth per haplotype
7979 read_length : `int`
8080 Read length in base pair units
81- sites_n : `int`
81+ n_sit : `int`
8282 number of sites that depth has to be simulated for
8383
8484 Returns
@@ -87,10 +87,10 @@ def linked_depth(rng, DPh, read_length, sites_n):
8787 Depth per site per haplotype
8888 '''
8989 DP = []
90- read_n = ((DPh * sites_n )/ read_length ).astype ("int" )
90+ read_n = ((DPh * n_sit )/ read_length ).astype ("int" )
9191 for r in read_n :
92- dp = np .zeros ((sites_n ,), dtype = int )
93- for p in rng .integers (low = 0 , high = sites_n - read_length + 1 , size = r ):
92+ dp = np .zeros ((n_sit ,), dtype = int )
93+ for p in rng .integers (low = 0 , high = n_sit - read_length + 1 , size = r ):
9494 dp [p :p + read_length ] += 1
9595 DP .append (dp .tolist ())
9696 return np .array (DP ).T
@@ -150,7 +150,7 @@ def sim_allelereadcounts(gm, mean_depth, e, ploidy, seed = None, std_depth = Non
150150 (haplotypic samples, )) and the order must be the same as the second dimention of `gm`.
151151
152152 ploidy : `int`
153- Number of haplotypic chromosomes per individual.
153+ Number of haplotypic chromosomes per individual. It is recomended to read Notes about ploidy.
154154
155155 ref : `numpy.ndarray`, optional
156156 Reference alleles list per site. The size of the array must be (sites, ) and the order has to
@@ -181,6 +181,14 @@ def sim_allelereadcounts(gm, mean_depth, e, ploidy, seed = None, std_depth = Non
181181 must be 15.
182182 - If monomorphic sites are included, the `alt` values corresponding to those sites are not taken into account,
183183 but they must be still indicated.
184+ - Regarding ploidy, if the error parameter is specified as a constant for all individuals, the user can specify
185+ the desired ploidy of the organisms simulated.
186+ If different error rate per haplotype is inputed and the user wants to compute Genotype Likelihoods (GL) for
187+ organisms with ploidy > 1, ploidy should be equal to 1 for this function, and when the later function
188+ `allelereadcounts_to_GL()` is used, then, the desired ploidy can be specified. This is because the error values
189+ must be inputed again to compute GL and if ploidy > 1 is specified for this function, the dimentions of `arc`
190+ will be smaller than the dimentions of `e`. Nonetheless, if the user desires to obtain the output `arc` in
191+ a certain ploidy, one can use `ploidy_sum(arc, ploidy)` fucntion.
184192 '''
185193 #Checks
186194 assert check_gm (gm )
0 commit comments