Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@
Release History
===============

Version 2.5.2
==============


modisco
------

- Properly expose the `max_seqlets_subsample` parameter. Sets the default value to 1000,
which is what was hardcoded before.
- Cleaned up some unused parameters in the reporting code.
Thanks @caenrigen!


Version 2.5.1
==============

Expand Down
2 changes: 1 addition & 1 deletion examples/MergeMotifsAcrossRuns.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@
"min_num = 30 # also called min_num_to_trim_to\n",
"flank_to_add = 5 # also called initial_flank_to_add\n",
"window_size = 20 # also called trim_to_window_size\n",
"max_seqlets_subsample = 300 # also called merging_max_seqlets_subsample\n",
"max_seqlets_subsample = 1000 # also called merging_max_seqlets_subsample\n",
"\n",
"pattern_group = 'pos_patterns' #pos_patterns or neg_patterns\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion modiscolite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
from . import fasta_writer
from . import descriptive_report

__version__ = '2.5.1'
__version__ = '2.5.2'
4 changes: 2 additions & 2 deletions modiscolite/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def _detect_spurious_merging(patterns, track_set, perplexity,
prob_and_pertrack_sim_merge_thresholds=prob_and_pertrack_sim_merge_thresholds,
prob_and_pertrack_sim_dealbreaker_thresholds=prob_and_pertrack_sim_dealbreaker_thresholds,
min_frac=min_frac, min_num=min_num, flank_to_add=flank_to_add, window_size=window_size,
bg_freq=bg_freq, max_seqlets_subsample=1000)
bg_freq=bg_freq, max_seqlets_subsample=max_seqlets_subsample)

to_return.extend(refined_subpatterns[0])
else:
Expand All @@ -197,7 +197,7 @@ def _detect_spurious_merging(patterns, track_set, perplexity,
prob_and_pertrack_sim_merge_thresholds=prob_and_pertrack_sim_merge_thresholds,
prob_and_pertrack_sim_dealbreaker_thresholds=prob_and_pertrack_sim_dealbreaker_thresholds,
min_frac=min_frac, min_num=min_num, flank_to_add=flank_to_add, window_size=window_size,
bg_freq=bg_freq, max_seqlets_subsample=1000)
bg_freq=bg_freq, max_seqlets_subsample=max_seqlets_subsample)

def SimilarPatternsCollapser(patterns, track_set,
min_overlap, prob_and_pertrack_sim_merge_thresholds,
Expand Down
2 changes: 1 addition & 1 deletion modiscolite/descriptive_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def generate_descriptive_report(modisco_h5py: str, output_dir: str,
if meme_motif_db is not None:
from pathlib import Path
if ttl:
tomtom_df = tomtomlite_dataframe(Path(modisco_h5py), Path(output_dir), Path(meme_motif_db) if meme_motif_db else None,
tomtom_df = tomtomlite_dataframe(Path(modisco_h5py), Path(meme_motif_db) if meme_motif_db else None,
pattern_groups=pattern_groups, top_n_matches=top_n_matches,
trim_threshold=trim_threshold)
else:
Expand Down
20 changes: 8 additions & 12 deletions modiscolite/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def write_meme_file(ppm, bg, fname):

def fetch_tomtom_matches(ppm, cwm, is_writing_tomtom_matrix, output_dir,
pattern_name, motifs_db, background=[0.25, 0.25, 0.25, 0.25],
tomtom_exec_path='tomtom', trim_threshold=0.3, trim_min_length=3):
tomtom_exec_path='tomtom', trim_threshold=0.3):

"""Fetches top matches from a motifs database using TomTom.
Args:
Expand Down Expand Up @@ -103,7 +103,7 @@ def generate_tomtom_dataframe(modisco_h5py: os.PathLike,
output_dir: os.PathLike, meme_motif_db: Union[os.PathLike, None],
is_writing_tomtom_matrix: bool, pattern_groups: List[str],
top_n_matches=3, tomtom_exec: str="tomtom", trim_threshold=0.3,
trim_min_length=3):
):

tomtom_results = {}

Expand Down Expand Up @@ -131,8 +131,7 @@ def generate_tomtom_dataframe(modisco_h5py: os.PathLike,
is_writing_tomtom_matrix=is_writing_tomtom_matrix,
output_dir=output_dir, pattern_name=pattern_name,
motifs_db=meme_motif_db, tomtom_exec_path=tomtom_exec,
trim_threshold=trim_threshold,
trim_min_length=trim_min_length)
trim_threshold=trim_threshold)

i = -1
for i, (target, qval) in r.iloc[:top_n_matches].iterrows():
Expand All @@ -150,12 +149,10 @@ def generate_tomtom_dataframe(modisco_h5py: os.PathLike,

def tomtomlite_dataframe(
modisco_h5py: os.PathLike,
output_dir: os.PathLike,
meme_motif_db: Union[os.PathLike, None],
pattern_groups: List[str],
top_n_matches=3,
trim_threshold=0.3,
trim_min_length=3):
trim_threshold=0.3):
"""Use tomtom-lite to match patterns to a motif database."""

tomtom_results = {}
Expand Down Expand Up @@ -276,7 +273,7 @@ def create_modisco_logos(modisco_h5py: os.PathLike, modisco_logo_dir, trim_thres

def report_motifs(modisco_h5py: Path, output_dir: os.PathLike, img_path_suffix: os.PathLike,
meme_motif_db: Union[os.PathLike, None], is_writing_tomtom_matrix: bool, top_n_matches=3,
trim_threshold=0.3, trim_min_length=3, ttl=False):
trim_threshold=0.3, ttl=False):

if not os.path.isdir(output_dir):
os.mkdir(output_dir)
Expand Down Expand Up @@ -316,17 +313,16 @@ def report_motifs(modisco_h5py: Path, output_dir: os.PathLike, img_path_suffix:
motifs = {name: pwm.T for name, pwm in motifs.items()}

if ttl:
tomtom_df = tomtomlite_dataframe(modisco_h5py, output_dir, meme_motif_db,
tomtom_df = tomtomlite_dataframe(modisco_h5py, meme_motif_db,
top_n_matches=top_n_matches, pattern_groups=pattern_groups,
trim_threshold=trim_threshold, trim_min_length=trim_min_length)
trim_threshold=trim_threshold)
else:
motifs = {key.split()[0]: value for key, value in motifs.items()}

tomtom_df = generate_tomtom_dataframe(modisco_h5py, output_dir, meme_motif_db,
is_writing_tomtom_matrix,
top_n_matches=top_n_matches, tomtom_exec='tomtom',
pattern_groups=pattern_groups, trim_threshold=trim_threshold,
trim_min_length=trim_min_length)
pattern_groups=pattern_groups, trim_threshold=trim_threshold)

patterns_df = pandas.concat([patterns_df, tomtom_df], axis=1)

Expand Down
5 changes: 2 additions & 3 deletions modiscolite/tfmodisco.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import scipy
import scipy.sparse

from collections import OrderedDict
from collections import defaultdict

from . import affinitymat
Expand Down Expand Up @@ -161,7 +160,7 @@ def seqlets_to_patterns(seqlets, track_set, track_signs=None,
final_flank_to_add=0,
prob_and_pertrack_sim_merge_thresholds=[(0.8,0.8), (0.5, 0.85), (0.2, 0.9)],
prob_and_pertrack_sim_dealbreaker_thresholds=[(0.4, 0.75), (0.2,0.8), (0.1, 0.85), (0.0,0.9)],
subcluster_perplexity=50, merging_max_seqlets_subsample=300,
subcluster_perplexity=50, merging_max_seqlets_subsample=1000,
final_min_cluster_size=20,min_ic_in_window=0.6, min_ic_windowsize=6,
ppm_pseudocount=0.001):

Expand Down Expand Up @@ -275,7 +274,7 @@ def TFMoDISco(one_hot, hypothetical_contribs, sliding_window_size=21,
initial_flank_to_add=10, final_flank_to_add=0,
prob_and_pertrack_sim_merge_thresholds=[(0.8,0.8), (0.5, 0.85), (0.2, 0.9)],
prob_and_pertrack_sim_dealbreaker_thresholds=[(0.4, 0.75), (0.2,0.8), (0.1, 0.85), (0.0,0.9)],
subcluster_perplexity=50, merging_max_seqlets_subsample=300,
subcluster_perplexity=50, merging_max_seqlets_subsample=1000,
final_min_cluster_size=20, min_ic_in_window=0.6, min_ic_windowsize=6,
ppm_pseudocount=0.001, verbose=False):

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='modisco',
version='2.5.1',
version='2.5.2',
author='Jacob Schreiber',
author_email='jmschreiber91@gmail.com',
packages=['modiscolite'],
Expand Down