@@ -30,15 +30,15 @@ def generate_count_matrix(
3030):
3131 """
3232 Generate count matrix for groups of samples by sampling from a
33- negative binomiaal distribution.
33+ negative binomial distribution.
3434 """
3535 import patsy
3636
3737 if isinstance (coefficient_stds , (int , float )):
3838 coefficient_stds = [coefficient_stds ] * n_factors
3939
4040 if dispersion_function is None :
41- dispersion_function = disp
41+ dispersion_function = _disp
4242
4343 # Build sample vs factors table
4444 dcat = pd .DataFrame (
@@ -118,8 +118,8 @@ def generate_data(
118118
119119 Default is "hg38"
120120 **kwargs : :obj:`dict`
121- Additional keyword arguments will be passed to the
122- makeExampleDESeqDataSet function of DESeq2 .
121+ Additional keyword arguments will be passed to
122+ :func:`ngs_toolkit.demo.data_generator.generate_count_matrix` .
123123
124124 Returns
125125 -------
@@ -239,7 +239,8 @@ def generate_project(
239239 dcat ["protocol" ] = data_type
240240 dcat ["organism" ] = organism
241241 # now save it
242- dcat .to_csv (os .path .join (output_dir , project_name , "metadata" , "annotation.csv" ))
242+ dcat .to_csv (
243+ os .path .join (output_dir , project_name , "metadata" , "annotation.csv" ))
243244
244245 # Make comparison table
245246 comp_table_file = os .path .join (
@@ -249,7 +250,9 @@ def generate_project(
249250 factors = list (string .ascii_uppercase [: n_factors ])
250251 for factor in factors :
251252 for side , f in [(1 , "2" ), (0 , "1" )]:
252- ct2 = dcat .query ("{} == '{}'" .format (factor , factor + f )).index .to_frame ()
253+ ct2 = dcat .query (
254+ "{} == '{}'" .format (factor , factor + f )
255+ ).index .to_frame ()
253256 ct2 ["comparison_side" ] = side
254257 ct2 ["comparison_name" ] = "Factor_" + factor + "_" + "2vs1"
255258 ct2 ["sample_group" ] = "Factor_" + factor + f
@@ -280,15 +283,17 @@ def generate_project(
280283 bed = location_index_to_bed (dnum .index )
281284 bed .to_csv (
282285 os .path .join (
283- output_dir , project_name , "results" , project_name + ".peak_set.bed"
286+ output_dir , project_name , "results" ,
287+ project_name + ".peak_set.bed"
284288 ),
285289 index = False ,
286290 sep = "\t " ,
287291 header = False ,
288292 )
289293 dnum .to_csv (
290294 os .path .join (
291- output_dir , project_name , "results" , project_name + ".matrix_raw.csv"
295+ output_dir , project_name , "results" ,
296+ project_name + ".matrix_raw.csv"
292297 )
293298 )
294299 prev_level = _LOGGER .getEffectiveLevel ()
@@ -452,8 +457,7 @@ def initialize_analysis_of_data_type(data_type, pep_config, *args, **kwargs):
452457
453458
454459def get_random_genomic_locations (
455- n_regions , width_mean = 500 , width_std = 400 , min_width = 300 , genome_assembly = "hg38"
456- ):
460+ n_regions , width_mean = 500 , width_std = 400 , min_width = 300 , genome_assembly = "hg38" ):
457461 """Get `n_regions`` number of random genomic locations respecting the boundaries of the ``genome_assembly``"""
458462 from ngs_toolkit .utils import bed_to_index
459463
@@ -502,5 +506,5 @@ def get_genomic_bins(n_bins, distribution="normal", genome_assembly="hg38"):
502506 return bed_to_index (w .head (n_bins ))
503507
504508
505- def disp (x ):
509+ def _disp (x ):
506510 return 4 / x + .1
0 commit comments