Skip to content

Commit e9d4d6d

Browse files
Fix max_seqlets_subsample (#123)
* Fix max_seqlets_subsample bug Mirror the fix in: jmschrei/tfmodisco-lite#60 * Remove unused trim_min_length argument in report functions * Update descriptive_report calls * Changelog --------- Co-authored-by: Austin Wang <austin.wang1357@gmail.com>
1 parent 2cb3294 commit e9d4d6d

File tree

8 files changed

+29
-21
lines changed

8 files changed

+29
-21
lines changed

CHANGELOG

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,19 @@
22
Release History
33
===============
44

5+
Version 2.5.2
6+
==============
7+
8+
9+
modisco
10+
------
11+
12+
- Properly expose the `max_seqlets_subsample` parameter. Sets the default value to 1000,
13+
which is what was hardcoded before.
14+
- Cleaned up some unused parameters in the reporting code.
15+
Thanks @caenrigen!
16+
17+
518
Version 2.5.1
619
==============
720

examples/MergeMotifsAcrossRuns.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@
628628
"min_num = 30 # also called min_num_to_trim_to\n",
629629
"flank_to_add = 5 # also called initial_flank_to_add\n",
630630
"window_size = 20 # also called trim_to_window_size\n",
631-
"max_seqlets_subsample = 300 # also called merging_max_seqlets_subsample\n",
631+
"max_seqlets_subsample = 1000 # also called merging_max_seqlets_subsample\n",
632632
"\n",
633633
"pattern_group = 'pos_patterns' #pos_patterns or neg_patterns\n",
634634
"\n",

modiscolite/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
from . import fasta_writer
1414
from . import descriptive_report
1515

16-
__version__ = '2.5.1'
16+
__version__ = '2.5.2'

modiscolite/aggregator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def _detect_spurious_merging(patterns, track_set, perplexity,
186186
prob_and_pertrack_sim_merge_thresholds=prob_and_pertrack_sim_merge_thresholds,
187187
prob_and_pertrack_sim_dealbreaker_thresholds=prob_and_pertrack_sim_dealbreaker_thresholds,
188188
min_frac=min_frac, min_num=min_num, flank_to_add=flank_to_add, window_size=window_size,
189-
bg_freq=bg_freq, max_seqlets_subsample=1000)
189+
bg_freq=bg_freq, max_seqlets_subsample=max_seqlets_subsample)
190190

191191
to_return.extend(refined_subpatterns[0])
192192
else:
@@ -197,7 +197,7 @@ def _detect_spurious_merging(patterns, track_set, perplexity,
197197
prob_and_pertrack_sim_merge_thresholds=prob_and_pertrack_sim_merge_thresholds,
198198
prob_and_pertrack_sim_dealbreaker_thresholds=prob_and_pertrack_sim_dealbreaker_thresholds,
199199
min_frac=min_frac, min_num=min_num, flank_to_add=flank_to_add, window_size=window_size,
200-
bg_freq=bg_freq, max_seqlets_subsample=1000)
200+
bg_freq=bg_freq, max_seqlets_subsample=max_seqlets_subsample)
201201

202202
def SimilarPatternsCollapser(patterns, track_set,
203203
min_overlap, prob_and_pertrack_sim_merge_thresholds,

modiscolite/descriptive_report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ def generate_descriptive_report(modisco_h5py: str, output_dir: str,
478478
if meme_motif_db is not None:
479479
from pathlib import Path
480480
if ttl:
481-
tomtom_df = tomtomlite_dataframe(Path(modisco_h5py), Path(output_dir), Path(meme_motif_db) if meme_motif_db else None,
481+
tomtom_df = tomtomlite_dataframe(Path(modisco_h5py), Path(meme_motif_db) if meme_motif_db else None,
482482
pattern_groups=pattern_groups, top_n_matches=top_n_matches,
483483
trim_threshold=trim_threshold)
484484
else:

modiscolite/report.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def write_meme_file(ppm, bg, fname):
4343

4444
def fetch_tomtom_matches(ppm, cwm, is_writing_tomtom_matrix, output_dir,
4545
pattern_name, motifs_db, background=[0.25, 0.25, 0.25, 0.25],
46-
tomtom_exec_path='tomtom', trim_threshold=0.3, trim_min_length=3):
46+
tomtom_exec_path='tomtom', trim_threshold=0.3):
4747

4848
"""Fetches top matches from a motifs database using TomTom.
4949
Args:
@@ -103,7 +103,7 @@ def generate_tomtom_dataframe(modisco_h5py: os.PathLike,
103103
output_dir: os.PathLike, meme_motif_db: Union[os.PathLike, None],
104104
is_writing_tomtom_matrix: bool, pattern_groups: List[str],
105105
top_n_matches=3, tomtom_exec: str="tomtom", trim_threshold=0.3,
106-
trim_min_length=3):
106+
):
107107

108108
tomtom_results = {}
109109

@@ -131,8 +131,7 @@ def generate_tomtom_dataframe(modisco_h5py: os.PathLike,
131131
is_writing_tomtom_matrix=is_writing_tomtom_matrix,
132132
output_dir=output_dir, pattern_name=pattern_name,
133133
motifs_db=meme_motif_db, tomtom_exec_path=tomtom_exec,
134-
trim_threshold=trim_threshold,
135-
trim_min_length=trim_min_length)
134+
trim_threshold=trim_threshold)
136135

137136
i = -1
138137
for i, (target, qval) in r.iloc[:top_n_matches].iterrows():
@@ -150,12 +149,10 @@ def generate_tomtom_dataframe(modisco_h5py: os.PathLike,
150149

151150
def tomtomlite_dataframe(
152151
modisco_h5py: os.PathLike,
153-
output_dir: os.PathLike,
154152
meme_motif_db: Union[os.PathLike, None],
155153
pattern_groups: List[str],
156154
top_n_matches=3,
157-
trim_threshold=0.3,
158-
trim_min_length=3):
155+
trim_threshold=0.3):
159156
"""Use tomtom-lite to match patterns to a motif database."""
160157

161158
tomtom_results = {}
@@ -276,7 +273,7 @@ def create_modisco_logos(modisco_h5py: os.PathLike, modisco_logo_dir, trim_thres
276273

277274
def report_motifs(modisco_h5py: Path, output_dir: os.PathLike, img_path_suffix: os.PathLike,
278275
meme_motif_db: Union[os.PathLike, None], is_writing_tomtom_matrix: bool, top_n_matches=3,
279-
trim_threshold=0.3, trim_min_length=3, ttl=False):
276+
trim_threshold=0.3, ttl=False):
280277

281278
if not os.path.isdir(output_dir):
282279
os.mkdir(output_dir)
@@ -316,17 +313,16 @@ def report_motifs(modisco_h5py: Path, output_dir: os.PathLike, img_path_suffix:
316313
motifs = {name: pwm.T for name, pwm in motifs.items()}
317314

318315
if ttl:
319-
tomtom_df = tomtomlite_dataframe(modisco_h5py, output_dir, meme_motif_db,
316+
tomtom_df = tomtomlite_dataframe(modisco_h5py, meme_motif_db,
320317
top_n_matches=top_n_matches, pattern_groups=pattern_groups,
321-
trim_threshold=trim_threshold, trim_min_length=trim_min_length)
318+
trim_threshold=trim_threshold)
322319
else:
323320
motifs = {key.split()[0]: value for key, value in motifs.items()}
324321

325322
tomtom_df = generate_tomtom_dataframe(modisco_h5py, output_dir, meme_motif_db,
326323
is_writing_tomtom_matrix,
327324
top_n_matches=top_n_matches, tomtom_exec='tomtom',
328-
pattern_groups=pattern_groups, trim_threshold=trim_threshold,
329-
trim_min_length=trim_min_length)
325+
pattern_groups=pattern_groups, trim_threshold=trim_threshold)
330326

331327
patterns_df = pandas.concat([patterns_df, tomtom_df], axis=1)
332328

modiscolite/tfmodisco.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import scipy
88
import scipy.sparse
99

10-
from collections import OrderedDict
1110
from collections import defaultdict
1211

1312
from . import affinitymat
@@ -161,7 +160,7 @@ def seqlets_to_patterns(seqlets, track_set, track_signs=None,
161160
final_flank_to_add=0,
162161
prob_and_pertrack_sim_merge_thresholds=[(0.8,0.8), (0.5, 0.85), (0.2, 0.9)],
163162
prob_and_pertrack_sim_dealbreaker_thresholds=[(0.4, 0.75), (0.2,0.8), (0.1, 0.85), (0.0,0.9)],
164-
subcluster_perplexity=50, merging_max_seqlets_subsample=300,
163+
subcluster_perplexity=50, merging_max_seqlets_subsample=1000,
165164
final_min_cluster_size=20,min_ic_in_window=0.6, min_ic_windowsize=6,
166165
ppm_pseudocount=0.001):
167166

@@ -275,7 +274,7 @@ def TFMoDISco(one_hot, hypothetical_contribs, sliding_window_size=21,
275274
initial_flank_to_add=10, final_flank_to_add=0,
276275
prob_and_pertrack_sim_merge_thresholds=[(0.8,0.8), (0.5, 0.85), (0.2, 0.9)],
277276
prob_and_pertrack_sim_dealbreaker_thresholds=[(0.4, 0.75), (0.2,0.8), (0.1, 0.85), (0.0,0.9)],
278-
subcluster_perplexity=50, merging_max_seqlets_subsample=300,
277+
subcluster_perplexity=50, merging_max_seqlets_subsample=1000,
279278
final_min_cluster_size=20, min_ic_in_window=0.6, min_ic_windowsize=6,
280279
ppm_pseudocount=0.001, verbose=False):
281280

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name='modisco',
5-
version='2.5.1',
5+
version='2.5.2',
66
author='Jacob Schreiber',
77
author_email='jmschreiber91@gmail.com',
88
packages=['modiscolite'],

0 commit comments

Comments
 (0)