2525import pandas as pd
2626import itertools
2727import gzip
28-
28+ import pdb
2929def getGraph (graph_file , paths_file ):
3030 # Load graph file into memory
3131 S_lines = list ()
@@ -81,14 +81,14 @@ def getGraph(graph_file, paths_file):
8181 # This is the start of a scaffold
8282 record = True
8383 current_scaffold = line .rstrip ()
84- scaffolds .add (current_scaffold )
84+ scaffolds .append (current_scaffold )
8585 current_seg_list = list ()
8686 elif len (line ) > 5 and line [0 :5 ] == 'NODE_' and line .rstrip ()[- 1 ] == "'" :
8787 record = False
8888 else :
8989 if record == True :
9090 clean_line = line .rstrip (';\n ' )
91- initial_path_list = line_list [ 2 ] .split (',' )
91+ initial_path_list = clean_line .split (',' )
9292 for segment_string in initial_path_list :
9393 segment = int (segment_string [:- 1 ])
9494 orientation = segment_string [- 1 ]
@@ -97,7 +97,7 @@ def getGraph(graph_file, paths_file):
9797 # Record last path list
9898 scaffold_paths [current_scaffold ] = current_seg_list
9999
100- paths_file .close ()
100+ paths .close ()
101101 else :
102102 # In this case we get the paths directly from the gfa file
103103 for line in P_lines :
@@ -130,27 +130,29 @@ def getGraph(graph_file, paths_file):
130130 for i in range (0 , len (scaffold_path_list )):
131131 if length_traversed < 100 :
132132 if scaffold_path_list [1 ][1 ] == '+' :
133- seg_ends .append (scaffold_path_list [i ][0 ] + 's' )
133+ seg_ends .append (str ( scaffold_path_list [i ][0 ]) + 's' )
134134 else :
135- seg_ends .append (scaffold_path_list [i ][0 ] + 'e' )
135+ seg_ends .append (str ( scaffold_path_list [i ][0 ]) + 'e' )
136136 scaffold_ends .append (scaffold_name + 's' )
137137 else :
138138 break
139-
140- length_traversed += scaffold_lengths [scaffold_path_list [i ][0 ]]
139+ try :
140+ length_traversed += segment_lengths [scaffold_path_list [i ][0 ]]
141+ except :
142+ pdb .set_trace ()
141143
142144 length_traversed = 0
143145 for i in reversed (range (0 , len (scaffold_path_list ))):
144146 if length_traversed < 100 :
145147 if segment_lengths [scaffold_path_list [i ][1 ]] == '+' :
146- seg_ends .append (scaffold_path_list [i ][0 ] + 'e' )
148+ seg_ends .append (str ( scaffold_path_list [i ][0 ]) + 'e' )
147149 else :
148- seg_ends .append (scaffold_path_list [i ][0 ] + 's' )
150+ seg_ends .append (str ( scaffold_path_list [i ][0 ]) + 's' )
149151 scaffold_ends .append (scaffold_name + 'e' )
150152 else :
151153 break
152154
153- length_traversed += scaffold_lengths [scaffold_path_list [i ][0 ]]
155+ length_traversed += segment_lengths [scaffold_path_list [i ][0 ]]
154156
155157 for i in range (len (seg_ends )):
156158 if seg_ends [i ] in end_segments :
@@ -283,7 +285,7 @@ def bfs(graph, start_set):
283285 exit (1 )
284286
285287graph_filename = graph_file_path .split ('/' )[- 1 ]
286- if ! (graph_filename == 'assembly_graph.gfa' or graph_filename == 'assembly_graph.gfa.gz' ) and ! (graph_filename == 'assembly_graph_with_scaffolds.gfa' or graph_filename == 'assembly_graph_with_scaffolds.gfa.gz' ):
288+ if not (graph_filename == 'assembly_graph.gfa' or graph_filename == 'assembly_graph.gfa.gz' ) and not (graph_filename == 'assembly_graph_with_scaffolds.gfa' or graph_filename == 'assembly_graph_with_scaffolds.gfa.gz' ):
287289 print ('Error! You must provide either the file assembly_graph.gfa or assembly_graph_with_scaffolds.gfa as the graph file' )
288290 exit (1 )
289291
0 commit comments