@@ -155,7 +155,7 @@ def scan_sequences(
155155
156156 # Format motifs
157157 if isinstance (motifs , str ):
158- motifs = read_meme_file (motifs )
158+ motifs = read_meme_file (motifs , names = names )
159159
160160 import tempfile
161161
@@ -319,8 +319,6 @@ def compare_motifs(
319319 motifs: A dictionary whose values are Position Probability Matrices
320320 (PPMs) of shape (4, L), or the path to a MEME file.
321321 alt_seq: The alternate sequence as a string
322- ref_allele: The alternate allele as a string. Only used if
323- alt_seq is not supplied.
324322 alt_allele: The alternate allele as a string. Only needed if
325323 alt_seq is not supplied.
326324 pos: The position at which to substitute the alternate allele.
@@ -345,24 +343,39 @@ def compare_motifs(
345343 names = names ,
346344 seq_ids = ["ref" , "alt" ],
347345 pthresh = pthresh ,
348- rc = True , # Scan both strands
346+ rc = rc , # Scan both strands
349347 )
350-
351- # Compare the results for alt and ref sequences
352- scan = (
353- scan .pivot_table (
354- index = ["motif" , "start" , "end" , "strand" ],
355- columns = ["sequence" ],
356- values = "score" ,
348+ if len (scan ) > 0 :
349+
350+ # Compare the results for alt and ref sequences
351+ scan = (
352+ scan .pivot_table (
353+ index = ["motif" , "start" , "end" , "strand" ],
354+ columns = ["sequence" ],
355+ values = ["score" , "p-value" ],
356+ )
357+ .reset_index ()
357358 )
358- .fillna (0 )
359- .reset_index ()
360- )
361-
362- # Compute fold change
363- scan ["foldChange" ] = scan .alt / scan .ref
364- scan = scan .sort_values ("foldChange" ).reset_index (drop = True )
365- return scan
359+ scan .columns = [col [0 ] if col [1 ] == '' else '_' .join (col ) for col in scan .columns ]
360+ for col in ["p-value_alt" , "p-value_ref" , "score_alt" , "score_ref" ]:
361+ if col not in scan .columns :
362+ scan [col ] = np .nan
363+
364+ # Fill in empty positions
365+ for row in scan [scan .score_alt .isna ()].itertuples ():
366+ sc = scan_sequences (seqs = alt_seq [row .start :row .end + 1 ], motifs = motifs , names = [row .motif ], pthresh = 1 , rc = row .strand == '-' ).iloc [0 ]
367+ scan .loc [row .Index , 'score_alt' ] = sc .score
368+ scan .loc [row .Index , 'p-value_alt' ] = sc ['p-value' ]
369+
370+ for row in scan [scan .score_ref .isna ()].itertuples ():
371+ sc = scan_sequences (seqs = ref_seq [row .start :row .end + 1 ], motifs = motifs , names = [row .motif ], pthresh = 1 , rc = row .strand == '-' ).iloc [0 ]
372+ scan .loc [row .Index , 'score_ref' ] = sc .score
373+ scan .loc [row .Index , 'p-value_ref' ] = sc ['p-value' ]
374+
375+ # Compute fold change
376+ scan ["score_diff" ] = scan .score_alt - scan .score_ref
377+ scan = scan .sort_values ("score_diff" ).reset_index (drop = True )
378+ return scan
366379
367380
368381def run_tomtom (motifs : Dict [str , np .ndarray ], meme_file : str ) -> pd .DataFrame :
0 commit comments