diff --git a/modisco b/modisco index 5088aef..3ef7ecd 100755 --- a/modisco +++ b/modisco @@ -185,7 +185,7 @@ if args.cmd == "motifs": if sequences.shape[1] < args.window: raise ValueError("Window ({}) cannot be ".format(args.window) + - "longer than the sequences".format(sequences.shape)) + "longer than the sequences {}".format(sequences.shape[1])) sequences = sequences.astype('float32') attributions = attributions.astype('float32') diff --git a/modiscolite/aggregator.py b/modiscolite/aggregator.py index 876fc3e..933f8c1 100644 --- a/modiscolite/aggregator.py +++ b/modiscolite/aggregator.py @@ -41,7 +41,6 @@ def polish_pattern(pattern, min_frac, min_num, track_set, flank, window_size, bg def _expand_seqlets_to_fill_pattern(pattern, track_set, left_flank_to_add, right_flank_to_add): - new_seqlets = [] for seqlet in pattern.seqlets: left_expansion = left_flank_to_add @@ -55,11 +54,11 @@ def _expand_seqlets_to_fill_pattern(pattern, track_set, left_flank_to_add, end = seqlet.end + left_expansion if start >= 0 and end <= track_set.length: - seqlet = track_set.create_seqlets( + seqlets = track_set.create_seqlets( seqlets=[core.Seqlet(example_idx=seqlet.example_idx, - start=start, end=end, is_revcomp=seqlet.is_revcomp)])[0] - - new_seqlets.append(seqlet) + start=start, end=end, is_revcomp=seqlet.is_revcomp)]) + if len(seqlets) > 0: + new_seqlets.append(seqlets[0]) if len(new_seqlets) > 0: return core.SeqletSet(seqlets=new_seqlets) @@ -299,7 +298,7 @@ def SimilarPatternsCollapser(patterns, track_set, within_pattern1_sims = affinitymat.jaccard( flat_pattern1_fwdseqdata[:, :, None], - flat_pattern1_fwdseqdata[:, :, None])[:, :, 0].flatten() + flat_pattern1_fwdseqdata[:, :, None])[:, :, 0].flatten() auroc = roc_auc_score( y_true=[0 for x in between_pattern_sims] @@ -502,4 +501,4 @@ def SimilarPatternsCollapser(patterns, track_set, current_level_nodes = next_level_nodes return patterns, PatternMergeHierarchy(root_nodes=current_level_nodes) - \ No newline at end of file + diff --git a/modiscolite/core.py b/modiscolite/core.py index 639531b..3d0e7f4 100644 --- a/modiscolite/core.py +++ b/modiscolite/core.py @@ -21,7 +21,8 @@ def __init__(self, one_hot, contrib_scores, hypothetical_contribs): self.length = len(one_hot[0]) def create_seqlets(self, seqlets): - for seqlet in seqlets: + out_seqlets = [] + for i,seqlet in enumerate(seqlets): idx = seqlet.example_idx s, e = seqlet.start, seqlet.end @@ -33,8 +34,11 @@ def create_seqlets(self, seqlets): seqlet.sequence = self.one_hot[idx][s:e] seqlet.contrib_scores = self.contrib_scores[idx][s:e] seqlet.hypothetical_contribs = self.hypothetical_contribs[idx][s:e] - - return seqlets + + # verify that seqlet is not at the end of the sequence and has some bp set + if seqlet.sequence.sum()>0: + out_seqlets.append(seqlet) + return out_seqlets class Seqlet(object): def __init__(self, example_idx, start, end, is_revcomp):