From ccac12a275744c7fd5135dc053e242f9047e813e Mon Sep 17 00:00:00 2001 From: Rob Egan Date: Tue, 4 Apr 2017 13:40:39 -0700 Subject: [PATCH 1/3] minor changes to make compatible with python3 install --- poretools/Fast5File.py | 32 ++++++++++++++++---------------- poretools/events.py | 8 ++++---- poretools/fasta.py | 2 +- poretools/fastq.py | 2 +- poretools/formats.py | 4 ++-- poretools/index.py | 6 +++--- poretools/metadata.py | 8 ++++---- poretools/nucdist.py | 4 ++-- poretools/occupancy.py | 6 +++--- poretools/qualdist.py | 4 ++-- poretools/readstats.py | 4 ++-- poretools/stats.py | 40 ++++++++++++++++++++-------------------- poretools/tabular.py | 6 +++--- poretools/times.py | 8 ++++---- poretools/winner.py | 2 +- setup.py | 2 +- 16 files changed, 69 insertions(+), 69 deletions(-) diff --git a/poretools/Fast5File.py b/poretools/Fast5File.py index be9e69f..0156c10 100644 --- a/poretools/Fast5File.py +++ b/poretools/Fast5File.py @@ -224,7 +224,7 @@ def open(self): try: self.hdf5file = h5py.File(self.filename, 'r') return True - except Exception, e: + except Exception as e: logger.warning("Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename) return False @@ -565,7 +565,7 @@ def get_duration(self): if node: try: return int(node.attrs['duration']) / self.get_sample_frequency() - except Exception, e: + except Exception as e: logger.error(str(e)) pass @@ -589,7 +589,7 @@ def get_start_time(self): try: frequency = int(self.get_sample_frequency()) return int(exp_start_time) + int(node.attrs['start_time'] / frequency) - except Exception, e: + except Exception as e: logger.error(str(e)) pass @@ -777,7 +777,7 @@ def get_sample_name(self): try: return self.keyinfo['context_tags'].attrs['user_filename_input'] - except Exception, e: + except Exception as e: return None def get_sample_frequency(self): @@ -791,7 +791,7 @@ def get_sample_frequency(self): try: return int(self.keyinfo['context_tags'].attrs['sample_frequency']) - except Exception, e: + except Exception as e: return None def get_script_name(self): @@ -800,7 +800,7 @@ def get_script_name(self): self.have_metdata = True try: return self.keyinfo['tracking_id'].attrs['exp_script_name'] - except Exception, e: + except Exception as e: return None def get_template_events_count(self): @@ -810,7 +810,7 @@ def get_template_events_count(self): try: table = self.hdf5file[fastq_paths[self.version]['template'] % self.group] return len(table['Events'][()]) - except Exception, e: + except Exception as e: return 0 def get_complement_events_count(self): @@ -820,7 +820,7 @@ def get_complement_events_count(self): try: table = self.hdf5file[fastq_paths[self.version]['complement'] % self.group] return len(table['Events'][()]) - except Exception, e: + except Exception as e: return 0 def is_high_quality(self): @@ -851,7 +851,7 @@ def get_best_type(self): return 'template' else: return 'complement' - except Exception, e: + except Exception as e: return None #################################################################### @@ -868,7 +868,7 @@ def _extract_fastqs_from_fast5(self): fq = formats.Fastq(table['Fastq'][()]) fq.name += " " + self.filename self.fastqs[id] = fq - except Exception, e: + except Exception as e: pass def _extract_fastas_from_fast5(self): @@ -881,7 +881,7 @@ def _extract_fastas_from_fast5(self): fa = formats.Fasta(table['Fastq'][()]) fa.name += " " + self.filename self.fastas[id] = fa - except Exception, e: + except Exception as e: pass def _extract_template_events(self): @@ -891,7 +891,7 @@ def _extract_template_events(self): try: table = self.hdf5file[fastq_paths[self.version]['template'] % self.group] self.template_events = [Event(x) for x in table['Events'][()]] - except Exception, e: + except Exception as e: self.template_events = [] def _extract_complement_events(self): @@ -901,7 +901,7 @@ def _extract_complement_events(self): try: table = self.hdf5file[fastq_paths[self.version]['complement'] % self.group] self.complement_events = [Event(x) for x in table['Events'][()]] - except Exception, e: + except Exception as e: self.complement_events = [] def _extract_pre_basecalled_events(self): @@ -914,15 +914,15 @@ def _extract_pre_basecalled_events(self): for read in table: events.extend(table[read]["Events"][()]) self.pre_basecalled_events = [Event(x) for x in events] - # except Exception, e: + # except Exception as e: # self.pre_basecalled_events = [] def _get_metadata(self): try: self.keyinfo = self.hdf5file['/UniqueGlobalKey'] - except Exception, e: + except Exception as e: try: self.keyinfo = self.hdf5file['/Key'] - except Exception, e: + except Exception as e: self.keyinfo = None logger.warning("Cannot find keyinfo. Exiting.\n") diff --git a/poretools/events.py b/poretools/events.py index c0315ec..fe22543 100644 --- a/poretools/events.py +++ b/poretools/events.py @@ -7,18 +7,18 @@ def run(parser, args): 'length', 'model_state', 'model_level', 'move', \ 'p_model_state', 'mp_model_state', 'p_mp_model_state', \ 'p_A', 'p_C', 'p_G', 'p_T', 'raw_index'] - print "\t".join(keys) + print("\t".join(keys)) if args.pre_basecalled: for fast5 in Fast5File.Fast5FileSet(args.files): for event in fast5.get_pre_basecalled_events(): - print '\t'.join([fast5.filename, 'pre_basecalled', str(event)]) + print('\t'.join([fast5.filename, 'pre_basecalled', str(event)])) else: for fast5 in Fast5File.Fast5FileSet(args.files): for event in fast5.get_template_events(): - print '\t'.join([fast5.filename, 'template', str(event)]) + print('\t'.join([fast5.filename, 'template', str(event)])) for event in fast5.get_complement_events(): - print '\t'.join([fast5.filename, 'complement', str(event)]) + print('\t'.join([fast5.filename, 'complement', str(event)])) fast5.close() diff --git a/poretools/fasta.py b/poretools/fasta.py index c65b58f..13da930 100644 --- a/poretools/fasta.py +++ b/poretools/fasta.py @@ -42,7 +42,7 @@ def run(parser, args): args.max_length > 0): continue - print fa + print(fa) fast5.close() diff --git a/poretools/fastq.py b/poretools/fastq.py index 314d264..1362fd0 100644 --- a/poretools/fastq.py +++ b/poretools/fastq.py @@ -42,7 +42,7 @@ def run(parser, args): args.max_length > 0): continue - print fa + print(fa) fast5.close() diff --git a/poretools/formats.py b/poretools/formats.py index 29bc7b5..07eef24 100644 --- a/poretools/formats.py +++ b/poretools/formats.py @@ -19,7 +19,7 @@ def est_error_rate(self): phred = ord(score) - 33 error_count += 10.0 ** (-phred / 10.0) return error_count / len(self.qual) - except Exception, e: + except Exception: return 0.0 @@ -34,4 +34,4 @@ def parse(self): self.name = self.name.lstrip('@') def __repr__(self): - return '\n'.join(['>'+self.name, self.seq]) \ No newline at end of file + return '\n'.join(['>'+self.name, self.seq]) diff --git a/poretools/index.py b/poretools/index.py index abc1af0..194e366 100644 --- a/poretools/index.py +++ b/poretools/index.py @@ -17,7 +17,7 @@ def run(parser, args): - print "source_filename\ttemplate_fwd_length\tcomplement_rev_length\t2d_length\tasic_id\tasic_temp\theatsink_temp\tchannel\texp_start_time\texp_start_time_string_date\texp_start_time_string_time\tstart_time\tstart_time_string_date\tstart_time_string_time\tduration\tfast5_version" + print("source_filename\ttemplate_fwd_length\tcomplement_rev_length\t2d_length\tasic_id\tasic_temp\theatsink_temp\tchannel\texp_start_time\texp_start_time_string_date\texp_start_time_string_time\tstart_time\tstart_time_string_date\tstart_time_string_time\tduration\tfast5_version") for fast5 in Fast5File.Fast5FileSet(args.files): @@ -56,11 +56,11 @@ def run(parser, args): length_complement = len(fastq_reads[1].seq) length_2d = len(fastq_reads[2].seq) - print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( + print("s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( fast5.filename, length_template, length_complement, length_2d, - asic_id, asic_temp, heatsink_temp,channel_number,exp_start_time,exp_start_time_string,start_time,start_time_string,duration,fast5_version) + asic_id, asic_temp, heatsink_temp,channel_number,exp_start_time,exp_start_time_string,start_time,start_time_string,duration,fast5_version)) fast5.close() diff --git a/poretools/metadata.py b/poretools/metadata.py index 958a675..062849d 100644 --- a/poretools/metadata.py +++ b/poretools/metadata.py @@ -7,16 +7,16 @@ def run(parser, args): for metadata_dict in fast5.read_metadata: if i == 0: header = metadata_dict.keys() - print "\t".join(["filename"] + header) - print "\t".join([fast5.filename] + [str( metadata_dict[k] ) for k in header]) + print("\t".join(["filename"] + header)) + print("\t".join([fast5.filename] + [str( metadata_dict[k] ) for k in header])) else: - print "asic_id\tasic_temp\theatsink_temp" + print("asic_id\tasic_temp\theatsink_temp") for fast5 in Fast5File.Fast5FileSet(args.files): asic_temp = fast5.get_asic_temp() asic_id = fast5.get_asic_id() heatsink_temp = fast5.get_heatsink_temp() - print "%s\t%s\t%s" % (asic_id, asic_temp, heatsink_temp) + print("%s\t%s\t%s" % (asic_id, asic_temp, heatsink_temp)) fast5.close() diff --git a/poretools/nucdist.py b/poretools/nucdist.py index 94a7fae..aa0680f 100644 --- a/poretools/nucdist.py +++ b/poretools/nucdist.py @@ -15,5 +15,5 @@ def run(parser, args): fast5.close() for n in nuc_count: - print '\t'.join(str(s) for s in [n, nuc_count[n], - total_nucs, float(nuc_count[n]) / float(total_nucs)]) \ No newline at end of file + print('\t'.join(str(s) for s in [n, nuc_count[n], + total_nucs, float(nuc_count[n]) / float(total_nucs)])) diff --git a/poretools/occupancy.py b/poretools/occupancy.py index 532d6c2..a54d718 100644 --- a/poretools/occupancy.py +++ b/poretools/occupancy.py @@ -55,7 +55,7 @@ def run(parser, args): tot_reads_per_pore = Counter() tot_bp_per_pore = Counter() - print "\t".join(['channel_number', 'start_time', 'duration']) + print("\t".join(['channel_number', 'start_time', 'duration'])) for fast5 in Fast5File.Fast5FileSet(args.files): if fast5.is_open: fq = fast5.get_fastq() @@ -70,10 +70,10 @@ def run(parser, args): tot_reads_per_pore[int(pore_id)] += 1 tot_bp_per_pore[int(pore_id)] += len(fq.seq) - print "\t".join([ + print("\t".join([ str(pore_id), str(start_time), - str(fast5.get_duration())]) + str(fast5.get_duration())])) fast5.close() if args.plot_type == 'read_count': diff --git a/poretools/qualdist.py b/poretools/qualdist.py index 66fbb26..4d58a9c 100644 --- a/poretools/qualdist.py +++ b/poretools/qualdist.py @@ -15,5 +15,5 @@ def run(parser, args): fast5.close() for q in qual_count: - print '\t'.join(str(s) for s in [chr(q+33), q, qual_count[q], - total_nucs, float(qual_count[q]) / float(total_nucs)]) \ No newline at end of file + print('\t'.join(str(s) for s in [chr(q+33), q, qual_count[q], + total_nucs, float(qual_count[q]) / float(total_nucs)])) diff --git a/poretools/readstats.py b/poretools/readstats.py index 3d7658f..1050844 100644 --- a/poretools/readstats.py +++ b/poretools/readstats.py @@ -2,7 +2,7 @@ def run(parser, args): - print "start_time\tchannel_number\tread_number\ttemplate_events\tcomplement_events" + print("start_time\tchannel_number\tread_number\ttemplate_events\tcomplement_events") for fast5 in Fast5File.Fast5FileSet(args.files): @@ -22,6 +22,6 @@ def run(parser, args): else: complement_len = 0 - print "%s\t%s\t%s\t%s\t%s" % (start_time, channel_number, read_number, template_len, complement_len) + print("%s\t%s\t%s\t%s\t%s" % (start_time, channel_number, read_number, template_len, complement_len)) fast5.close() diff --git a/poretools/stats.py b/poretools/stats.py index d22082a..20ec707 100644 --- a/poretools/stats.py +++ b/poretools/stats.py @@ -23,22 +23,22 @@ def run(parser, args): fast5.close() - print "files\ttotal reads\t%d" % (files) - print "files\ttotal base-called reads\t%d" % (basecalled_files) + print("files\ttotal reads\t%d" % (files)) + print("files\ttotal base-called reads\t%d" % (basecalled_files)) for category in sorted(stats.keys()): sizes = stats[category] if len(sizes) > 0: - print "%s\ttotal reads\t%d" % (category, len(sizes)) - print "%s\ttotal base pairs\t%d" % (category, sum(sizes)) - print "%s\tmean\t%.2f" % (category, stat.mean(sizes)) - print "%s\tmedian\t%d" % (category, stat.median(sizes)) - print "%s\tmin\t%d" % (category, min(sizes)) - print "%s\tmax\t%d" % (category, max(sizes)) + print("%s\ttotal reads\t%d" % (category, len(sizes))) + print("%s\ttotal base pairs\t%d" % (category, sum(sizes))) + print("%s\tmean\t%.2f" % (category, stat.mean(sizes))) + print("%s\tmedian\t%d" % (category, stat.median(sizes))) + print("%s\tmin\t%d" % (category, min(sizes))) + print("%s\tmax\t%d" % (category, max(sizes))) nxvalues = stat.NX(sizes, [25,50,75]) - print "%s\tN25\t%d" % (category, nxvalues[25]) - print "%s\tN50\t%d" % (category, nxvalues[50]) - print "%s\tN75\t%d" % (category, nxvalues[75]) + print("%s\tN25\t%d" % (category, nxvalues[25])) + print("%s\tN50\t%d" % (category, nxvalues[50])) + print("%s\tN75\t%d" % (category, nxvalues[75])) else: logger.warning("No valid sequences observed.\n") else: @@ -49,15 +49,15 @@ def run(parser, args): fast5.close() if len(sizes) > 0: - print "total reads\t%d" % (len(sizes)) - print "total base pairs\t%d" % (sum(sizes)) - print "mean\t%.2f" % (stat.mean(sizes)) - print "median\t%d" % (stat.median(sizes)) - print "min\t%d" % (min(sizes)) - print "max\t%d" % (max(sizes)) + print("total reads\t%d" % (len(sizes))) + print("total base pairs\t%d" % (sum(sizes))) + print("mean\t%.2f" % (stat.mean(sizes))) + print("median\t%d" % (stat.median(sizes))) + print("min\t%d" % (min(sizes))) + print("max\t%d" % (max(sizes))) nxvalues = stat.NX(sizes, [25,50,75]) - print "N25\t%d" % (nxvalues[25]) - print "N50\t%d" % (nxvalues[50]) - print "N75\t%d" % (nxvalues[75]) + print("N25\t%d" % (nxvalues[25])) + print("N50\t%d" % (nxvalues[50])) + print("N75\t%d" % (nxvalues[75])) else: logger.warning("No valid sequences observed.\n") diff --git a/poretools/tabular.py b/poretools/tabular.py index 24a6a2e..e48be71 100644 --- a/poretools/tabular.py +++ b/poretools/tabular.py @@ -2,7 +2,7 @@ def run(parser, args): - print '\t'.join(['length', 'name', 'sequence', 'quals']) + print('\t'.join(['length', 'name', 'sequence', 'quals'])) for fast5 in Fast5File.Fast5FileSet(args.files): fqs = fast5.get_fastqs(args.type) @@ -10,5 +10,5 @@ def run(parser, args): if fq is None: fast5.close() continue - print '\t'.join([str(len(fq.seq)), fq.name, fq.seq, fq.qual]) - fast5.close() \ No newline at end of file + print('\t'.join([str(len(fq.seq)), fq.name, fq.seq, fq.qual])) + fast5.close() diff --git a/poretools/times.py b/poretools/times.py index d568cc0..6263ad6 100644 --- a/poretools/times.py +++ b/poretools/times.py @@ -7,10 +7,10 @@ logger = logging.getLogger('poretools') def run(parser, args): - print '\t'.join(['channel', 'filename', 'read_length', + print('\t'.join(['channel', 'filename', 'read_length', 'exp_starttime', 'unix_timestamp', 'duration', 'unix_timestamp_end', 'iso_timestamp', 'day', - 'hour', 'minute']) + 'hour', 'minute'])) for fast5 in Fast5File.Fast5FileSet(args.files): if fast5.is_open: @@ -29,7 +29,7 @@ def run(parser, args): read_length = 0 lt = localtime(start_time) - print "\t".join([fast5.get_channel_number(), + print("\t".join([fast5.get_channel_number(), fast5.filename, str(read_length), str(fast5.get_exp_start_time()), @@ -39,5 +39,5 @@ def run(parser, args): strftime('%Y-%m-%dT%H:%M:%S%z', lt), strftime('%d', lt), strftime('%H', lt), - strftime('%M', lt)]) + strftime('%M', lt)])) fast5.close() diff --git a/poretools/winner.py b/poretools/winner.py index a6671ac..4e96e05 100644 --- a/poretools/winner.py +++ b/poretools/winner.py @@ -21,5 +21,5 @@ def run(parser, args): fast5.close() logger.info("Wow, it's a whopper: your longest read is %d bases." % (longest_size,)) - print longest_read + print(longest_read) diff --git a/setup.py b/setup.py index e5479f0..c5978d2 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ version_py = os.path.join(os.path.dirname(__file__), 'poretools', 'version.py') version = open(version_py).read().strip().split('=')[-1].replace('"','').strip() -print version +print(version) long_description = """ ``poretools`` is a toolset for working with nanopore sequencing data' """ From 47d739b2d265dfc6bb9f8c956375742f640c3f3b Mon Sep 17 00:00:00 2001 From: Rob Egan Date: Tue, 4 Apr 2017 16:09:04 -0700 Subject: [PATCH 2/3] more fixes - some spaces to tabs - argparse compatibility - import fully qualified names --- poretools/Fast5File.py | 78 ++++++++++++++++++------------------- poretools/__init__.py | 6 +-- poretools/poretools_main.py | 14 ++++--- 3 files changed, 51 insertions(+), 47 deletions(-) diff --git a/poretools/Fast5File.py b/poretools/Fast5File.py index 0156c10..8383d00 100644 --- a/poretools/Fast5File.py +++ b/poretools/Fast5File.py @@ -18,8 +18,8 @@ ### and must be converted to seconds by dividing by sample frequency. # poretools imports -import formats -from Event import Event +import poretools.formats +from poretools.Event import Event fastq_paths = { 'closed' : {}, @@ -245,11 +245,11 @@ def guess_version(self): pass # less likely - try: - self.hdf5file["/Analyses/Basecall_RNN_1D_%03d/BaseCalled_template" % (self.group)] - return 'r9rnn' - except KeyError: - pass + try: + self.hdf5file["/Analyses/Basecall_RNN_1D_%03d/BaseCalled_template" % (self.group)] + return 'r9rnn' + except KeyError: + pass return 'prebasecalled' @@ -333,13 +333,13 @@ def get_fastas(self, choice): return fas def get_fastas_dict(self): + """ + Return the set of base called sequences in the FAST5 + in FASTQ format. """ - Return the set of base called sequences in the FAST5 - in FASTQ format. - """ - if self.have_fastas is False: - self._extract_fastas_from_fast5() - self.have_fastas = True + if self.have_fastas is False: + self._extract_fastas_from_fast5() + self.have_fastas = True return self.fastas @@ -430,7 +430,7 @@ def get_exp_start_time(self): # Unix time stamp from MinKNOW < 1.4 timestamp = int(self.keyinfo['tracking_id'].attrs['exp_start_time']) return timestamp - except KeyError, e: + except KeyError as e: return None def get_channel_number(self): @@ -486,7 +486,7 @@ def hdf_internal_error(self,reason): https://github.com/arq5x/poretools/issues""" % (self.filename, reason) sys.exit(msg) - def find_read_number_block_fixed_raw(self): + def find_read_number_block_fixed_raw(self): """ New-style FAST5/HDF5 structure: There is a fixed 'Raw/Reads' node with only one 'read_NNN' item @@ -510,7 +510,7 @@ def find_read_number_block_fixed_raw(self): self.hdf_internal_error("Failed to get HDF5 item '%s'"% (path)) return node - def find_read_number_block(self): + def find_read_number_block(self): """Returns the node of the 'Read_NNN' information, or None if not found""" node = self.find_read_number_block_link() @@ -735,31 +735,31 @@ def get_asic_id(self): self._get_metadata() self.have_metadata = True - def get_host_name(self): - """ - Return the MinKNOW host computer name. - """ - if self.have_metadata is False: - self._get_metadata() - self.have_metadata = True + def get_host_name(self): + """ + Return the MinKNOW host computer name. + """ + if self.have_metadata is False: + self._get_metadata() + self.have_metadata = True - try: - return self.keyinfo['tracking_id'].attrs['hostname'] - except: - return None + try: + return self.keyinfo['tracking_id'].attrs['hostname'] + except: + return None - if self.have_metadata is False: - self._get_metadata() - self.have_metadata = True + if self.have_metadata is False: + self._get_metadata() + self.have_metadata = True def get_device_id(self): """ Return the flowcell's device id. """ - if self.have_metadata is False: - self._get_metadata() - self.have_metadata = True + if self.have_metadata is False: + self._get_metadata() + self.have_metadata = True try: return self.keyinfo['tracking_id'].attrs['device_id'] @@ -771,9 +771,9 @@ def get_sample_name(self): Return the user supplied sample name """ - if self.have_metadata is False: - self._get_metadata() - self.have_metadata = True + if self.have_metadata is False: + self._get_metadata() + self.have_metadata = True try: return self.keyinfo['context_tags'].attrs['user_filename_input'] @@ -785,9 +785,9 @@ def get_sample_frequency(self): Return the user supplied sample name """ - if self.have_metadata is False: - self._get_metadata() - self.have_metadata = True + if self.have_metadata is False: + self._get_metadata() + self.have_metadata = True try: return int(self.keyinfo['context_tags'].attrs['sample_frequency']) diff --git a/poretools/__init__.py b/poretools/__init__.py index c5c2412..84851c6 100644 --- a/poretools/__init__.py +++ b/poretools/__init__.py @@ -1,5 +1,5 @@ import os import sys -import scripts -from Fast5File import * -from version import __version__ +import poretools.scripts +from poretools.Fast5File import * +from poretools.version import __version__ diff --git a/poretools/poretools_main.py b/poretools/poretools_main.py index a4108e5..2e23fef 100755 --- a/poretools/poretools_main.py +++ b/poretools/poretools_main.py @@ -50,6 +50,9 @@ def run_subtool(parser, args): import index as submodule elif args.command == 'organise': import organise as submodule + else: + parser.print_help() + exit() # run the chosen submodule. submodule.run(parser, args) @@ -57,9 +60,10 @@ def run_subtool(parser, args): class ArgumentParserWithDefaults(argparse.ArgumentParser): def __init__(self, *args, **kwargs): super(ArgumentParserWithDefaults, self).__init__(*args, **kwargs) - self.add_argument("-q", "--quiet", help="Do not output warnings to stderr", - action="store_true", - dest="quiet") + self.add_argument("-q", "--quiet", help="Do not output warnings to stderr", + action="store_true", + dest="quiet") + self.set_defaults(func=run_subtool) def main(): logging.basicConfig() @@ -67,7 +71,7 @@ def main(): ######################################### # create the top-level parser ######################################### - parser = argparse.ArgumentParser(prog='poretools', formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = ArgumentParserWithDefaults(prog='poretools', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-v", "--version", help="Installed poretools version", action="version", version="%(prog)s " + str(poretools.version.__version__)) @@ -536,7 +540,7 @@ def main(): try: args.func(parser, args) - except IOError, e: + except IOError as e: if e.errno != 32: # ignore SIGPIPE raise From c026dda410b03dca6d7ad2fffcf64596e7c821e4 Mon Sep 17 00:00:00 2001 From: Rob Egan Date: Tue, 4 Apr 2017 18:46:02 -0700 Subject: [PATCH 3/3] fixed import in python3 fixed unicode -> string for fastq & fasta in python3 fixed iterators in python3 --- poretools/Fast5File.py | 19 +++++++++++------ poretools/__init__.py | 4 ++-- poretools/combine.py | 2 +- poretools/events.py | 2 +- poretools/fasta.py | 2 +- poretools/fastq.py | 2 +- poretools/formats.py | 4 ++-- poretools/hist.py | 2 +- poretools/index.py | 2 +- poretools/metadata.py | 2 +- poretools/nucdist.py | 2 +- poretools/occupancy.py | 2 +- poretools/organise.py | 2 +- poretools/poretools_main.py | 42 ++++++++++++++++++------------------- poretools/qual_v_pos.py | 2 +- poretools/qualdist.py | 2 +- poretools/readstats.py | 2 +- poretools/squiggle.py | 2 +- poretools/stats.py | 4 ++-- poretools/tabular.py | 2 +- poretools/times.py | 2 +- poretools/winner.py | 2 +- poretools/yield_plot.py | 2 +- 23 files changed, 58 insertions(+), 51 deletions(-) diff --git a/poretools/Fast5File.py b/poretools/Fast5File.py index 8383d00..e79e82f 100644 --- a/poretools/Fast5File.py +++ b/poretools/Fast5File.py @@ -18,8 +18,8 @@ ### and must be converted to seconds by dividing by sample frequency. # poretools imports -import poretools.formats -from poretools.Event import Event +from . import formats +from . import Event fastq_paths = { 'closed' : {}, @@ -77,6 +77,8 @@ def next(self): else: raise StopIteration() + __next__ = next + class Fast5FileSet(object): @@ -103,13 +105,16 @@ def __iter__(self): def next(self): try: - return Fast5File(self.files.next(), self.group) + nextFile = next(self.files) + return Fast5File(nextFile, self.group) except Exception as e: # cleanup our mess if self.set_type == FAST5SET_TARBALL: shutil.rmtree(PORETOOLS_TMPDIR) raise StopIteration + __next__ = next + def _extract_fast5_files(self): # return as-is if list of files @@ -171,7 +176,7 @@ def __iter__(self): def next(self): while True: - tarinfo = self._tarfile.next() + tarinfo = next(self._tarfile) if tarinfo is None: raise StopIteration elif self._fast5_filename_filter(tarinfo.name): @@ -179,6 +184,8 @@ def next(self): self._tarfile.extract(tarinfo, path=PORETOOLS_TMPDIR) return os.path.join(PORETOOLS_TMPDIR, tarinfo.name) + __next__ = next + def __len__(self): with tarfile.open(self._tarball) as tar: return len(tar.getnames()) @@ -862,7 +869,7 @@ def _extract_fastqs_from_fast5(self): """ Return the sequence in the FAST5 file in FASTQ format """ - for id, h5path in fastq_paths[self.version].iteritems(): + for (id, h5path) in fastq_paths[self.version].items(): try: table = self.hdf5file[h5path % self.group] fq = formats.Fastq(table['Fastq'][()]) @@ -875,7 +882,7 @@ def _extract_fastas_from_fast5(self): """ Return the sequence in the FAST5 file in FASTA format """ - for id, h5path in fastq_paths[self.version].iteritems(): + for (id, h5path) in fastq_paths[self.version].items(): try: table = self.hdf5file[h5path % self.group] fa = formats.Fasta(table['Fastq'][()]) diff --git a/poretools/__init__.py b/poretools/__init__.py index 84851c6..f24e3af 100644 --- a/poretools/__init__.py +++ b/poretools/__init__.py @@ -1,5 +1,5 @@ import os import sys import poretools.scripts -from poretools.Fast5File import * -from poretools.version import __version__ +from . import Fast5File +from . import version diff --git a/poretools/combine.py b/poretools/combine.py index 4d165d3..e962d9b 100644 --- a/poretools/combine.py +++ b/poretools/combine.py @@ -1,6 +1,6 @@ import tarfile import sys -import Fast5File +from . import Fast5File #logging import logging diff --git a/poretools/events.py b/poretools/events.py index fe22543..8af6a2f 100644 --- a/poretools/events.py +++ b/poretools/events.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File def run(parser, args): diff --git a/poretools/fasta.py b/poretools/fasta.py index 13da930..f1374b9 100644 --- a/poretools/fasta.py +++ b/poretools/fasta.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File import sys def run(parser, args): diff --git a/poretools/fastq.py b/poretools/fastq.py index 1362fd0..359964e 100644 --- a/poretools/fastq.py +++ b/poretools/fastq.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File import sys def run(parser, args): diff --git a/poretools/formats.py b/poretools/formats.py index 07eef24..570a2c7 100644 --- a/poretools/formats.py +++ b/poretools/formats.py @@ -1,6 +1,6 @@ class Fastq(object): def __init__(self, s): - self.s = s + self.s = s.decode('UTF-8') self.parse() def parse(self): @@ -26,7 +26,7 @@ def est_error_rate(self): class Fasta(object): def __init__(self, s): - self.s = s + self.s = s.decode('UTF-8') self.parse() def parse(self): diff --git a/poretools/hist.py b/poretools/hist.py index b16c683..1130c05 100644 --- a/poretools/hist.py +++ b/poretools/hist.py @@ -6,7 +6,7 @@ from matplotlib import pyplot as plt import seaborn as sns -import Fast5File +from . import Fast5File import logging logger = logging.getLogger('poretools') diff --git a/poretools/index.py b/poretools/index.py index 194e366..212d133 100644 --- a/poretools/index.py +++ b/poretools/index.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File import datetime ############ diff --git a/poretools/metadata.py b/poretools/metadata.py index 062849d..83746c6 100644 --- a/poretools/metadata.py +++ b/poretools/metadata.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File def run(parser, args): diff --git a/poretools/nucdist.py b/poretools/nucdist.py index aa0680f..65144a4 100644 --- a/poretools/nucdist.py +++ b/poretools/nucdist.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File from collections import Counter def run(parser, args): diff --git a/poretools/occupancy.py b/poretools/occupancy.py index a54d718..a50c915 100644 --- a/poretools/occupancy.py +++ b/poretools/occupancy.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File from collections import Counter import sys import pandas as pd diff --git a/poretools/organise.py b/poretools/organise.py index 7b3e578..9da112b 100644 --- a/poretools/organise.py +++ b/poretools/organise.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File import sys import os from os import makedirs diff --git a/poretools/poretools_main.py b/poretools/poretools_main.py index 2e23fef..9a98b55 100755 --- a/poretools/poretools_main.py +++ b/poretools/poretools_main.py @@ -9,47 +9,47 @@ logger = logging.getLogger('poretools') # poretools imports -import poretools.version +from . import version def run_subtool(parser, args): if args.command == 'combine': - import combine as submodule + from . import combine as submodule elif args.command == 'events': - import events as submodule + from . import events as submodule elif args.command == 'fasta': - import fasta as submodule + from . import fasta as submodule elif args.command == 'fastq': - import fastq as submodule + from . import fastq as submodule elif args.command == 'hist': - import hist as submodule + from . import hist as submodule elif args.command == 'metadata': - import metadata as submodule + from . import metadata as submodule elif args.command == 'nucdist': - import nucdist as submodule + from . import nucdist as submodule elif args.command == 'occupancy': - import occupancy as submodule + from . import occupancy as submodule elif args.command == 'qualdist': - import qualdist as submodule + from . import qualdist as submodule elif args.command == 'qualpos': - import qual_v_pos as submodule + from . import qual_v_pos as submodule elif args.command == 'readstats': - import readstats as submodule + from . import readstats as submodule elif args.command == 'stats': - import stats as submodule + from . import stats as submodule elif args.command == 'tabular': - import tabular as submodule + from . import tabular as submodule elif args.command == 'times': - import times as submodule + from . import times as submodule elif args.command == 'squiggle': - import squiggle as submodule + from . import squiggle as submodule elif args.command == 'winner': - import winner as submodule + from . import winner as submodule elif args.command == 'yield_plot': - import yield_plot as submodule + from . import yield_plot as submodule elif args.command == 'index': - import index as submodule + from . import index as submodule elif args.command == 'organise': - import organise as submodule + from . import organise as submodule else: parser.print_help() exit() @@ -74,7 +74,7 @@ def main(): parser = ArgumentParserWithDefaults(prog='poretools', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-v", "--version", help="Installed poretools version", action="version", - version="%(prog)s " + str(poretools.version.__version__)) + version="%(prog)s " + str(version.__version__)) subparsers = parser.add_subparsers(title='[sub-commands]', dest='command', parser_class=ArgumentParserWithDefaults) ######################################### diff --git a/poretools/qual_v_pos.py b/poretools/qual_v_pos.py index c310b1e..f37ccec 100644 --- a/poretools/qual_v_pos.py +++ b/poretools/qual_v_pos.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File from collections import defaultdict import pandas import matplotlib.pyplot as plt diff --git a/poretools/qualdist.py b/poretools/qualdist.py index 4d58a9c..7661cab 100644 --- a/poretools/qualdist.py +++ b/poretools/qualdist.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File from collections import Counter def run(parser, args): diff --git a/poretools/readstats.py b/poretools/readstats.py index 1050844..d192ecf 100644 --- a/poretools/readstats.py +++ b/poretools/readstats.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File def run(parser, args): diff --git a/poretools/squiggle.py b/poretools/squiggle.py index cd8d667..3f2e3ba 100644 --- a/poretools/squiggle.py +++ b/poretools/squiggle.py @@ -10,7 +10,7 @@ import logging logger = logging.getLogger('poretools') -import Fast5File +from . import Fast5File def plot_squiggle(args, filename, start_times, mean_signals): """ diff --git a/poretools/stats.py b/poretools/stats.py index 20ec707..fda406b 100644 --- a/poretools/stats.py +++ b/poretools/stats.py @@ -1,5 +1,5 @@ import statistics as stat -import Fast5File +from . import Fast5File import logging from collections import defaultdict logger = logging.getLogger('poretools') @@ -14,7 +14,7 @@ def run(parser, args): fas = fast5.get_fastas_dict() if len(fas) > 0: basecalled_files += 1 - for category, fa in fas.iteritems(): + for (category, fa) in fas.items(): if fa is not None: stats[category].append(len(fa.seq)) if category == 'twodirections': diff --git a/poretools/tabular.py b/poretools/tabular.py index e48be71..90b9706 100644 --- a/poretools/tabular.py +++ b/poretools/tabular.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File def run(parser, args): diff --git a/poretools/times.py b/poretools/times.py index 6263ad6..98db966 100644 --- a/poretools/times.py +++ b/poretools/times.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File from time import strftime, localtime import sys diff --git a/poretools/winner.py b/poretools/winner.py index 4e96e05..3266d52 100644 --- a/poretools/winner.py +++ b/poretools/winner.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File import sys #logging diff --git a/poretools/yield_plot.py b/poretools/yield_plot.py index af4b69b..b748102 100644 --- a/poretools/yield_plot.py +++ b/poretools/yield_plot.py @@ -1,4 +1,4 @@ -import Fast5File +from . import Fast5File import matplotlib #matplotlib.use('Agg') # Must be called before any other matplotlib calls from matplotlib import pyplot as plt