kundajelab · austintwang · Oct 12, 2025 · Oct 6, 2025 · Oct 6, 2025 · Oct 12, 2025
diff --git a/CHANGELOG b/CHANGELOG
@@ -2,6 +2,19 @@
 Release History
 ===============
 
+Version 2.5.2
+==============
+
+
+modisco
+------
+
+	- Properly expose the `max_seqlets_subsample` parameter. Sets the default value to 1000, 
+	which is what was hardcoded before. 
+	- Cleaned up some unused parameters in the reporting code.
+	Thanks @caenrigen!
+
+
 Version 2.5.1
 ==============
 

diff --git a/examples/MergeMotifsAcrossRuns.ipynb b/examples/MergeMotifsAcrossRuns.ipynb
@@ -628,7 +628,7 @@
         "min_num = 30 # also called min_num_to_trim_to\n",
         "flank_to_add = 5 # also called initial_flank_to_add\n",
         "window_size = 20 # also called trim_to_window_size\n",
-        "max_seqlets_subsample = 300 # also called merging_max_seqlets_subsample\n",
+        "max_seqlets_subsample = 1000 # also called merging_max_seqlets_subsample\n",
         "\n",
         "pattern_group = 'pos_patterns' #pos_patterns or neg_patterns\n",
         "\n",

diff --git a/modiscolite/__init__.py b/modiscolite/__init__.py
@@ -13,4 +13,4 @@
 from . import fasta_writer
 from . import descriptive_report
 
-__version__ = '2.5.1'
+__version__ = '2.5.2'
diff --git a/modiscolite/aggregator.py b/modiscolite/aggregator.py
@@ -186,7 +186,7 @@ def _detect_spurious_merging(patterns, track_set, perplexity,
 				prob_and_pertrack_sim_merge_thresholds=prob_and_pertrack_sim_merge_thresholds,
 				prob_and_pertrack_sim_dealbreaker_thresholds=prob_and_pertrack_sim_dealbreaker_thresholds,
 				min_frac=min_frac, min_num=min_num, flank_to_add=flank_to_add, window_size=window_size, 
-				bg_freq=bg_freq, max_seqlets_subsample=1000)
+				bg_freq=bg_freq, max_seqlets_subsample=max_seqlets_subsample)
 
 			to_return.extend(refined_subpatterns[0]) 
 		else:
@@ -197,7 +197,7 @@ def _detect_spurious_merging(patterns, track_set, perplexity,
 				prob_and_pertrack_sim_merge_thresholds=prob_and_pertrack_sim_merge_thresholds,
 				prob_and_pertrack_sim_dealbreaker_thresholds=prob_and_pertrack_sim_dealbreaker_thresholds,
 				min_frac=min_frac, min_num=min_num, flank_to_add=flank_to_add, window_size=window_size, 
-				bg_freq=bg_freq, max_seqlets_subsample=1000)
+				bg_freq=bg_freq, max_seqlets_subsample=max_seqlets_subsample)
 
 def SimilarPatternsCollapser(patterns, track_set,
 	min_overlap, prob_and_pertrack_sim_merge_thresholds,

diff --git a/modiscolite/descriptive_report.py b/modiscolite/descriptive_report.py
@@ -478,7 +478,7 @@ def generate_descriptive_report(modisco_h5py: str, output_dir: str,
     if meme_motif_db is not None:
         from pathlib import Path
         if ttl:
-            tomtom_df = tomtomlite_dataframe(Path(modisco_h5py), Path(output_dir), Path(meme_motif_db) if meme_motif_db else None,
+            tomtom_df = tomtomlite_dataframe(Path(modisco_h5py), Path(meme_motif_db) if meme_motif_db else None,
                 pattern_groups=pattern_groups, top_n_matches=top_n_matches,
                 trim_threshold=trim_threshold)
         else:

diff --git a/modiscolite/report.py b/modiscolite/report.py
@@ -43,7 +43,7 @@ def write_meme_file(ppm, bg, fname):
 
 def fetch_tomtom_matches(ppm, cwm, is_writing_tomtom_matrix, output_dir,
 	pattern_name, motifs_db, background=[0.25, 0.25, 0.25, 0.25],
-	tomtom_exec_path='tomtom', trim_threshold=0.3, trim_min_length=3):
+	tomtom_exec_path='tomtom', trim_threshold=0.3):
 
 	"""Fetches top matches from a motifs database using TomTom.
 	Args:
@@ -103,7 +103,7 @@ def generate_tomtom_dataframe(modisco_h5py: os.PathLike,
 		output_dir: os.PathLike, meme_motif_db: Union[os.PathLike, None],
 		is_writing_tomtom_matrix: bool, pattern_groups: List[str], 
 		top_n_matches=3, tomtom_exec: str="tomtom", trim_threshold=0.3,
-		trim_min_length=3):
+	):
 
 	tomtom_results = {}
 
@@ -131,8 +131,7 @@ def generate_tomtom_dataframe(modisco_h5py: os.PathLike,
 			     	is_writing_tomtom_matrix=is_writing_tomtom_matrix,
 					output_dir=output_dir, pattern_name=pattern_name,
 					motifs_db=meme_motif_db, tomtom_exec_path=tomtom_exec,
-					trim_threshold=trim_threshold,
-					trim_min_length=trim_min_length)
+					trim_threshold=trim_threshold)
 
 				i = -1
 				for i, (target, qval) in r.iloc[:top_n_matches].iterrows():
@@ -150,12 +149,10 @@ def generate_tomtom_dataframe(modisco_h5py: os.PathLike,
 
 def tomtomlite_dataframe(
 	modisco_h5py: os.PathLike,
-	output_dir: os.PathLike, 
 	meme_motif_db: Union[os.PathLike, None],
 	pattern_groups: List[str], 
 	top_n_matches=3, 
-	trim_threshold=0.3,
-	trim_min_length=3):
+	trim_threshold=0.3):
 	"""Use tomtom-lite to match patterns to a motif database."""
 
 	tomtom_results = {}
@@ -276,7 +273,7 @@ def create_modisco_logos(modisco_h5py: os.PathLike, modisco_logo_dir, trim_thres
 
 def report_motifs(modisco_h5py: Path, output_dir: os.PathLike, img_path_suffix: os.PathLike, 
 	meme_motif_db: Union[os.PathLike, None], is_writing_tomtom_matrix: bool, top_n_matches=3,
-	trim_threshold=0.3, trim_min_length=3, ttl=False):
+	trim_threshold=0.3, ttl=False):
 
 	if not os.path.isdir(output_dir):
 		os.mkdir(output_dir)
@@ -316,17 +313,16 @@ def report_motifs(modisco_h5py: Path, output_dir: os.PathLike, img_path_suffix:
 		motifs = {name: pwm.T for name, pwm in motifs.items()}
 
 		if ttl:
-			tomtom_df = tomtomlite_dataframe(modisco_h5py, output_dir, meme_motif_db,
+			tomtom_df = tomtomlite_dataframe(modisco_h5py, meme_motif_db,
 				top_n_matches=top_n_matches, pattern_groups=pattern_groups, 
-				trim_threshold=trim_threshold, trim_min_length=trim_min_length)
+				trim_threshold=trim_threshold)
 		else:
 			motifs = {key.split()[0]: value for key, value in motifs.items()}
 
 			tomtom_df = generate_tomtom_dataframe(modisco_h5py, output_dir, meme_motif_db,
 				is_writing_tomtom_matrix,
 				top_n_matches=top_n_matches, tomtom_exec='tomtom', 
-				pattern_groups=pattern_groups, trim_threshold=trim_threshold,
-				trim_min_length=trim_min_length)
+				pattern_groups=pattern_groups, trim_threshold=trim_threshold)
 
 		patterns_df = pandas.concat([patterns_df, tomtom_df], axis=1)
 

diff --git a/modiscolite/tfmodisco.py b/modiscolite/tfmodisco.py
@@ -7,7 +7,6 @@
 import scipy
 import scipy.sparse
 
-from collections import OrderedDict
 from collections import defaultdict
 
 from . import affinitymat
@@ -161,7 +160,7 @@ def seqlets_to_patterns(seqlets, track_set, track_signs=None,
 	final_flank_to_add=0,
 	prob_and_pertrack_sim_merge_thresholds=[(0.8,0.8), (0.5, 0.85), (0.2, 0.9)],
 	prob_and_pertrack_sim_dealbreaker_thresholds=[(0.4, 0.75), (0.2,0.8), (0.1, 0.85), (0.0,0.9)],
-	subcluster_perplexity=50, merging_max_seqlets_subsample=300,
+	subcluster_perplexity=50, merging_max_seqlets_subsample=1000,
 	final_min_cluster_size=20,min_ic_in_window=0.6, min_ic_windowsize=6,
 	ppm_pseudocount=0.001):
 
@@ -275,7 +274,7 @@ def TFMoDISco(one_hot, hypothetical_contribs, sliding_window_size=21,
 	initial_flank_to_add=10, final_flank_to_add=0,
 	prob_and_pertrack_sim_merge_thresholds=[(0.8,0.8), (0.5, 0.85), (0.2, 0.9)],
 	prob_and_pertrack_sim_dealbreaker_thresholds=[(0.4, 0.75), (0.2,0.8), (0.1, 0.85), (0.0,0.9)],
-	subcluster_perplexity=50, merging_max_seqlets_subsample=300,
+	subcluster_perplexity=50, merging_max_seqlets_subsample=1000,
 	final_min_cluster_size=20, min_ic_in_window=0.6, min_ic_windowsize=6,
 	ppm_pseudocount=0.001, verbose=False):
 

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
 	name='modisco',
-	version='2.5.1',
+	version='2.5.2',
 	author='Jacob Schreiber',
 	author_email='jmschreiber91@gmail.com',
 	packages=['modiscolite'],