File tree Expand file tree Collapse file tree 1 file changed +9
-4
lines changed Expand file tree Collapse file tree 1 file changed +9
-4
lines changed Original file line number Diff line number Diff line change 66import argparse
77import glob
88import os
9+ import re
910from collections import Counter , defaultdict , OrderedDict
1011from collections .abc import Set
1112from typing import Dict
@@ -50,14 +51,18 @@ def discover_transcript_attribute(gtf_file: str, transcripts: Set[str]) -> str:
5051 Returns:
5152 str: The attribute name that corresponds to transcripts in the GTF file.
5253 """
54+
5355 votes = Counter ()
5456 with open (gtf_file ) as inh :
55- # Read GTF file, skipping header lines
57+ # Read GTF file, skipping header lines
5658 for line in filter (lambda x : not x .startswith ("#" ), inh ):
5759 cols = line .split ("\t " )
58- # Parse attribute column and update votes for each attribute found
59- attributes = dict (item .strip ().split (" " , 1 ) for item in cols [8 ].split (";" ) if item .strip ())
60- votes .update (key for key , value in attributes .items () if value .strip ('"' ) in transcripts )
60+
61+ # Use regular expression to correctly split the attributes string
62+ attributes_str = cols [8 ]
63+ attributes = dict (re .findall (r'(\S+) "(.*?)(?<!\\)";' , attributes_str ))
64+
65+ votes .update (key for key , value in attributes .items ())
6166
6267 if not votes :
6368 # Log a warning if no matching attribute is found
You can’t perform that action at this time.
0 commit comments