diff --git a/common_scripts/clade_translator.py b/common_scripts/clade_translator.py index cb9266c..5e12f84 100644 --- a/common_scripts/clade_translator.py +++ b/common_scripts/clade_translator.py @@ -1,8 +1,9 @@ #!/bin/env python3 -import os import copy -from Bio import SeqIO, AlignIO, SeqRecord, Seq +import os + import numpy as np +from Bio import AlignIO, Seq, SeqIO, SeqRecord def parse_args(): @@ -51,23 +52,30 @@ def get_coordinate_map(ref, qry): coord_maps = {'nuc': get_coordinate_map(orig_ref.seq, new_ref.seq)} for f in orig_features: - coord_maps[f] = get_coordinate_map(orig_features[f].extract(orig_ref).translate().seq, - new_features[f].extract(new_ref).translate().seq) + try: + coord_maps[f] = get_coordinate_map(orig_features[f].extract(orig_ref).translate().seq, + new_features[f].extract(new_ref).translate().seq) + except: + print(f"Could not map {f}") with open(args.clades) as f: clades = [l.strip().split('\t') for l in f] with open(args.output_clades, 'w') as f: + f.write("clade\tgene\tsite\talt\n") for clade in clades: if (len(clade) < 4) or clades[0][0]=='#': f.write('\t'.join(clade) + '\n') continue if clade[1] in coord_maps: - new_pos = max(0,coord_maps[clade[1]][0][int(clade[2])-1])+1 - f.write('\t'.join([clade[0], clade[1], str(new_pos),clade[3]]) + '\n') - - else: + try: + new_pos = max(0,coord_maps[clade[1]][0][int(clade[2])-1])+1 + f.write('\t'.join([clade[0], clade[1], str(new_pos),clade[3]]) + '\n') + except: + print(f"Could not map {clade}") + + if clade[1] == 'clade': f.write('\t'.join(clade) + '\n') diff --git a/flu/Snakefile b/flu/Snakefile index cd7e860..67fb878 100644 --- a/flu/Snakefile +++ b/flu/Snakefile @@ -9,7 +9,6 @@ wildcard_constraints: reference="[^_/]+", - rule all: input: [ @@ -19,23 +18,23 @@ rule all: for reference in config["builds"][strain][segment] ], + rule fetch_data: output: - raw_sequences="data/{strain}/raw_{segment}.fasta" + raw_sequences="data/{strain}/raw_{segment}.fasta", shell: """ scp -r neher@transfer.scicore.unibas.ch:/scicore/home/neher/neher/nextstrain/seasonal-flu/data/{wildcards.strain}/raw_{wildcards.segment}.fasta {output.raw_sequences} """ + rule download_clades: message: "Downloading clade definitions for {wildcards.strain} from {params.source} -> {output}" output: "data/clades_{strain}_{segment}_{reference}_raw.tsv", params: - source=lambda w: config["builds"][w.strain][w.segment][w.reference][ - "clade_url" - ], + source=lambda w: config["builds"][w.strain][w.segment][w.reference]["clade_url"], shell: "curl {params.source} | sed '/V1A\\tHA1\\t146\\tI/d' >{output}" @@ -58,6 +57,7 @@ rule offset_clades: >{output} """ + rule download_includes: output: "data/includes_{strain}_{segment}_{reference}.tsv", @@ -99,8 +99,10 @@ rule parse: def genes(w): - if w.segment=='ha': return ["SigPep", "HA1", "HA2"] - if w.segment=='na': return ["NA"] + if w.segment == "ha": + return ["SigPep", "HA1", "HA2"] + if w.segment == "na": + return ["NA"] rule subsample: @@ -112,13 +114,15 @@ rule subsample: sampled_sequences="build/{strain}/{segment}/{reference}/subsample_raw.fasta", sampled_strains="build/{strain}/{segment}/{reference}/subsample_raw.txt", params: - filter_arguments=lambda w: config["builds"][w.strain][w.segment][ - w.reference - ]["filter"], - reference_EPI_ISL=lambda w: config["builds"][w.strain][w.segment][ - w.reference - ]["reference_EPI_ISL"], - other_include = lambda w:config["builds"][w.strain][w.segment][w.reference].get("include_file","") + filter_arguments=lambda w: config["builds"][w.strain][w.segment][w.reference][ + "filter" + ], + reference_EPI_ISL=lambda w: config["builds"][w.strain][w.segment][w.reference][ + "reference_EPI_ISL" + ], + other_include=lambda w: config["builds"][w.strain][w.segment][w.reference].get( + "include_file", "" + ), shell: """ augur filter \ @@ -225,15 +229,16 @@ rule tree: > /dev/null """ + # root using dates in treetime, use 1500 as sequence length (good enough, doesn't matter) rule root: input: tree=rules.tree.output.tree, - metadata = rules.parse.output.metadata, + metadata=rules.parse.output.metadata, output: tree="build/{strain}/{segment}/{reference}/tree_rooted.nwk", params: - outdir = "build/{strain}/{segment}/{reference}/tt_out" + outdir="build/{strain}/{segment}/{reference}/tt_out", shell: """ treetime clock \ @@ -245,6 +250,7 @@ rule root: cp {params.outdir}/rerooted.newick {output.tree} """ + # refine while keeping the root rule refine: input: @@ -344,21 +350,26 @@ rule clades: > /dev/null """ + ## TODO explicitly relabel clade branch labels to differentiate long and short ones # currently long ones are overwritten by short ones. rule make_short_clades: input: clades=rules.offset_clades.output, output: - clades = "data/clades-short_{strain}_{segment}_{reference}.tsv" + clades="data/clades-short_{strain}_{segment}_{reference}.tsv", run: with open(str(input.clades)) as fh: clades = fh.readlines() - for contraction in config["builds"][wildcards.strain][wildcards.segment][wildcards.reference].get("clade_contractions", []): - clades = [x.replace(contraction['orig'], contraction['short']) for x in clades] + for contraction in config["builds"][wildcards.strain][wildcards.segment][ + wildcards.reference + ].get("clade_contractions", []): + clades = [ + x.replace(contraction["orig"], contraction["short"]) for x in clades + ] - with open(str(output.clades), 'w') as fh: + with open(str(output.clades), "w") as fh: for line in clades: fh.write(line) @@ -370,7 +381,7 @@ rule clades_short: tree=rules.refine.output.tree, aa_muts=rules.aa_muts_explicit.output.node_data, nuc_muts=rules.ancestral.output.node_data, - clades = "data/clades-short_{strain}_{segment}_{reference}.tsv" + clades="data/clades-short_{strain}_{segment}_{reference}.tsv", output: node_data="build/{strain}/{segment}/{reference}/clades-short.json", shell: @@ -383,18 +394,19 @@ rule clades_short: sed -i 's/clade_membership/short_clade/' {output.node_data} """ + # make sure all differences between the alignment reference and the root are attached as mutations to the root rule attach_root_mutations: input: aa_muts=rules.aa_muts_explicit.output.node_data, nuc_muts=rules.ancestral.output.node_data, - translations = rules.align.output.alignment, - tree = rules.refine.output.tree + translations=rules.align.output.alignment, + tree=rules.refine.output.tree, output: aa_muts="build/{strain}/{segment}/{reference}/aa_muts_adapted.json", - nuc_muts="build/{strain}/{segment}/{reference}/nuc_muts_adapted.json" + nuc_muts="build/{strain}/{segment}/{reference}/nuc_muts_adapted.json", params: - genes = genes, + genes=genes, translations=lambda w: expand( "build/{strain}/{segment}/{reference}/aligned.gene.{genes}.fasta", strain=w.strain, @@ -402,7 +414,7 @@ rule attach_root_mutations: genes=genes(w), reference=w.reference, ), - reference = lambda w: w.reference + reference=lambda w: w.reference, shell: """ python3 ../common_scripts/attach_root_mutations.py \ @@ -428,7 +440,9 @@ def get_node_data(w): node_data.append("build/{strain}/{segment}/{reference}/clades.json".format(**w)) if "clade_contractions" in config["builds"][w.strain][w.segment][w.reference]: - node_data.append("build/{strain}/{segment}/{reference}/clades-short.json".format(**w)) + node_data.append( + "build/{strain}/{segment}/{reference}/clades-short.json".format(**w) + ) return node_data @@ -439,8 +453,10 @@ rule export: input: tree=rules.refine.output.tree, metadata=rules.parse.output.metadata, - node_data = get_node_data, - auspice_config=lambda w: config["files"]["auspice_config_shortclade"] if "clade_contractions" in config["builds"][w.strain][w.segment][w.reference] else config["files"]["auspice_config"], + node_data=get_node_data, + auspice_config=lambda w: config["files"]["auspice_config_shortclade"] + if "clade_contractions" in config["builds"][w.strain][w.segment][w.reference] + else config["files"]["auspice_config"], output: auspice_json="auspice/{strain}/{segment}/{reference}/auspice_raw.json", params: @@ -466,7 +482,7 @@ rule swap_strain_accession: output: auspice_json="auspice/{strain}/{segment}/{reference}/auspice.json", params: - fake_clade = lambda w: '--add-fake-clade none' if w.segment != 'ha' else '' + fake_clade=lambda w: "--add-fake-clade none" if w.segment != "ha" else "", shell: """ python3 scripts/swap_strain_accession.py \ @@ -526,10 +542,11 @@ rule assemble_folder: cp {input.tree} {output.tree}; """ -if 'timestamp' not in config: + +if "timestamp" not in config: timestamp = datetime.datetime.utcnow().isoformat()[:-7] + "Z" else: - timestamp = config['timestamp'] + timestamp = config["timestamp"] rule test_nextclade: @@ -558,7 +575,6 @@ rule test_nextclade: """ - rule clean: shell: """ diff --git a/rsv/Snakefile b/rsv/Snakefile index 784aa2b..a8ba326 100644 --- a/rsv/Snakefile +++ b/rsv/Snakefile @@ -68,9 +68,11 @@ rule lift_clades_to_reference: input: clade_file="data/{a_or_b}/{reference}/clades_{clade_type}_raw.tsv", reference="references/{a_or_b}/{reference}/reference.gbk", - orig_reference="data/{a_or_b}/{reference}/clade_reference.gbk", + orig_reference=lambda w: config["builds"][w.a_or_b][w.reference]["clades"][ + w.clade_type + ]["ref_path"], output: - clade_file="data/{a_or_b}/{reference}/clades_{clade_type}.tsv", + clade_file="data/{a_or_b}/{reference}/clades_{clade_type,[A-Za-z0-9]+}.tsv", shell: """ python3 ../common_scripts/clade_translator.py \ diff --git a/rsv/profiles/auspice_config.json b/rsv/profiles/auspice_config.json index 360d766..a0c3488 100644 --- a/rsv/profiles/auspice_config.json +++ b/rsv/profiles/auspice_config.json @@ -26,6 +26,11 @@ "title": "G Clades (Goya et al)", "type": "categorical" }, + { + "key": "pango", + "title": "Proposed lineages", + "type": "categorical" + }, { "key": "date", "title": "Sample Date", @@ -57,6 +62,12 @@ "displayName": "G_clade (Goya et al)", "description": "Clades based on the G gene and Goya et al, IRV, 2019.", "hideInWeb": false + }, + { + "name": "pango", + "displayName": "Proposed lineages", + "description": "Proposed lineages based on Pango", + "hideInWeb": false } ] } diff --git a/rsv/profiles/builds.yaml b/rsv/profiles/builds.yaml index a7050a1..c0ffd6f 100644 --- a/rsv/profiles/builds.yaml +++ b/rsv/profiles/builds.yaml @@ -4,7 +4,7 @@ auspice_config: "profiles/auspice_config.json" color_schemes: "profiles/color_schemes.tsv" exclude: "profiles/exclude.txt" -timestamp: "2023-02-03T12:00:00Z" +timestamp: "2023-05-10T12:00:00Z" builds: a: @@ -17,10 +17,18 @@ builds: key: clade_membership label_key: clade_annotation def: "references/a/EPI_ISL_412866/clades_genome.tsv" + ref_path: "data/a/EPI_ISL_412866/clade_reference.gbk" G: key: G_clade label_key: G_clade_label def: "references/a/EPI_ISL_412866/clades_G.tsv" + ref_path: "data/a/EPI_ISL_412866/clade_reference.gbk" + pango: + key: pango + label_key: pango_label + excel_path: "profiles/pango/amino-acid-genotypes.xlsx" + excel_sheet: "AA_RSVA" + ref_path: "profiles/pango/REFROOTA.gb" b: EPI_ISL_1653999: filter: "--min-date 1965 --probabilistic-sampling --group-by year --subsample-max-sequences 1500 --query 'genome_coverage>0.95'" @@ -31,10 +39,18 @@ builds: key: clade_membership label_key: clade_annotation def: "references/b/EPI_ISL_1653999/clades_genome.tsv" + ref_path: "data/b/EPI_ISL_1653999/clade_reference.gbk" G: key: G_clade label_key: G_clade_label def: "references/b/EPI_ISL_1653999/clades_G.tsv" + ref_path: "data/b/EPI_ISL_1653999/clade_reference.gbk" + pango: + key: pango + label_key: pango_label + excel_path: "profiles/pango/amino-acid-genotypes.xlsx" + excel_sheet: "AA_RSVB" + ref_path: "profiles/pango/REFROOTB.gb" unused_builds: diff --git a/rsv/profiles/color_ordering_a.tsv b/rsv/profiles/color_ordering_a.tsv index 144001b..108c59f 100644 --- a/rsv/profiles/color_ordering_a.tsv +++ b/rsv/profiles/color_ordering_a.tsv @@ -44,3 +44,38 @@ G_clade GA3.0.4a G_clade GA3.0.3b G_clade GA3.0.4b G_clade GA3.0.5b + +pango A.1 +pango A.2 +pango A.2.1 +pango A.3 +pango A.4 +pango A.5 +pango A.6 +pango A.6.1 +pango A.6.10 +pango A.6.10.1 +pango A.6.10.2 +pango A.6.11 +pango A.6.12 +pango A.6.12.1 +pango A.6.12.2 +pango A.6.13 +pango A.6.14 +pango A.6.14.1 +pango A.6.15 +pango A.6.15.1 +pango A.6.15.2 +pango A.6.16 +pango A.6.2 +pango A.6.3 +pango A.6.3.1 +pango A.6.4 +pango A.6.5 +pango A.6.5.1 +pango A.6.6 +pango A.6.7 +pango A.6.8 +pango A.6.9 +pango not-assigned + diff --git a/rsv/profiles/color_ordering_b.tsv b/rsv/profiles/color_ordering_b.tsv index c6ac645..ad41156 100644 --- a/rsv/profiles/color_ordering_b.tsv +++ b/rsv/profiles/color_ordering_b.tsv @@ -19,4 +19,42 @@ clade_membership B2 clade_membership B3 clade_membership B4 clade_membership B5 -clade_membership B6 \ No newline at end of file +clade_membership B6 + +pango B.1 +pango B.1.1 +pango B.2 +pango B.2.1 +pango B.2.1.1 +pango B.2.2 +pango B.2.3 +pango B.2.4 +pango B.2.5 +pango B.2.6 +pango B.2.7 +pango B.2.8 +pango B.2.9 +pango B.2.9.1 +pango B.2.9.2 +pango B.2.9.3 +pango B.2.9.4 +pango B.2.9.5 +pango B.2.9.6 +pango B.2.9.7 +pango B.2.9.8 +pango B.2.9.9 +pango B.2.9.10 +pango B.2.9.10.1 (B.A) +pango B.2.9.10.2 (B.B) +pango B.2.9.10.3 (B.C) +pango B.2.9.10.4 (B.D) +pango B.2.9.10.5 (B.E) +pango B.2.9.10.6 (B.F) +pango B.2.9.10.7 (B.G) +pango B.G.1 +pango B.G.2 +pango B.G.3 +pango B.G.4 +pango B.G.5 +pango B.G.6 +pango not assigned diff --git a/rsv/profiles/pango/REFROOTA.gb b/rsv/profiles/pango/REFROOTA.gb new file mode 100644 index 0000000..416bc30 --- /dev/null +++ b/rsv/profiles/pango/REFROOTA.gb @@ -0,0 +1,511 @@ +LOCUS REFROOTA 15052 bp cRNA UNK 01-JAN-1980 +DEFINITION REFROOTA. +ACCESSION REFROOTA +VERSION REFROOTA +KEYWORDS . +SOURCE . + ORGANISM Human orthopneumovirus + Viruses; Riboviria; Orthornavirae; Negarnaviricota; Haploviricotina; + Monjiviricetes; Mononegavirales; Pneumoviridae; Orthopneumovirus. +FEATURES Location/Qualifiers + source 1..15052 + /organism="Human orthopneumovirus" + /mol_type="viral cRNA" + gene 1..484 + /gene="NS1" + /note="formerly called 1C" + /db_xref="GeneID:37607636" + mRNA 1..484 + /gene="NS1" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607636" + CDS 1..420 + /gene="NS1" + /codon_start=1 + /product="nonstructural protein 1" + /protein_id="YP_009518850.1" + /db_xref="GeneID:37607636" + /translation="MGSNSLSMIKVRLQNLFDNDEVALLKITCYTDKLIHLTNALAKAV + IHTIKLNGIVFVHVITSSDICPNNNIVVKSNFTTMPVLQNGGYIWEMMELTHCSQPNGL + IDDNCEIKFSKKLSDSTMTNYMNQLSELLGFDLNP*" + gene 504..1008 + /gene="NS2" + /note="Formerly called 1B" + /db_xref="GeneID:37607637" + mRNA 504..1008 + /gene="NS2" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607637" + CDS 536..910 + /gene="NS2" + /codon_start=1 + /product="nonstructural protein 2" + /protein_id="YP_009518851.1" + /db_xref="GeneID:37607637" + /translation="MDTTHNDTTPQRLMITDMRPLSLETIITSLTRDIITHRFIYLINH + ECIVRKLDERQATFTFLVNYEMKLLHKVGSTKYKKYTEYNTKYGTFPMPIFINHDGFLE + CIGIKPTKHTPIIYKYDLNP*" + gene 1035..2246 + /gene="N" + /db_xref="GeneID:37607638" + mRNA 1035..2246 + /gene="N" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607638" + CDS 1050..2225 + /gene="N" + /codon_start=1 + /product="nucleoprotein" + /protein_id="YP_009518852.1" + /db_xref="GeneID:37607638" + /translation="MALSKVKLNDTLNKDQLLSSSKYTIQRSTGDSIDTPNYDVQKHIN + KLCGMLLITEDANHKFTGLIGMLYAMSRLGREDTIKILRDAGYHVKANGVDVTTHRQDI + NGKEMKFEVLTLASLTTEIQINIEIESRKSYKKMLKEMGEVAPEYRHDSPDCGMIILCI + AALVITKLAAGDRSGLTAVIRRANNVLKNEMKRYKGLLPKDIANSFYEVFEKYPHFIDV + FVHFGIAQSSTRGGSRVEGIFAGLFMNAYGAGQVMLRWGVLAKSVKNIMLGHASVQAEM + EQVVEVYEYAQKLGGEAGFYHILNNPKASLLSLTQFPHFSSVVLGNAAGLGIMGEYRGT + PRNQDLYDAAKAYAEQLKENGVINYSVLDLTAEELEAIKHQLNPKDNDVEL*" + gene 2248..3188 + /gene="P" + /db_xref="GeneID:37607639" + mRNA 2248..3188 + /gene="P" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607639" + CDS 2265..2990 + /gene="P" + /codon_start=1 + /product="phosphoprotein" + /protein_id="YP_009518853.1" + /db_xref="GeneID:37607639" + /translation="MEKFAPEFHGEDANNRATKFLESIKGKFTSPKDPKKKDSIISVNS + IDIEVTKESPITSNSTIINPTNETDDTVGNKPNYQRKPLVSFKEDPTPSDNPFSKLYKE + TIETFDNNEEESSYSYEEINDQTNDNITARLDRIDEKLSEILGMLHTLVVASAGPTSAR + DGIRDAMVGLREEMIEKIRTEALMTNDRLEAMARLRNEESEKMAKDTSDEVSLNPTSEK + LNNLLEGNDSDNDLSLEDF*" + gene 3198..4161 + /gene="M" + /db_xref="GeneID:37607640" + mRNA 3198..4161 + /gene="M" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607640" + CDS 3207..3977 + /gene="M" + /codon_start=1 + /product="matrix protein" + /protein_id="YP_009518854.1" + /db_xref="GeneID:37607640" + /translation="METYVNKLHEGSTYTAAVQYNVLEKDDDPASLTIWVPMFQSSMPA + DLLIKELANVNILVKQISTPKGPSLRVMINSRSAVLAQMPSKFTICANVSLDERSKLAY + DVTTPCEIKACSLTCLKSKNMLTTVKDLTMKTLNPTHDIIALCEFENIVTSKKVIIPTY + LRSISVRNKDLNTLENITTTEFKNAITNAKIIPYSGLLLVITVTDNKGAFKYIKPQSQF + IVDLGAYLEKESIYYVTTNWKHTATRFAIKPMED*" + gene 4172..4589 + /gene="SH" + /db_xref="GeneID:37607641" + mRNA 4172..4589 + /gene="SH" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607641" + CDS 4256..4450 + /gene="SH" + /codon_start=1 + /product="small hydrophobic protein" + /protein_id="YP_009518855.1" + /db_xref="GeneID:37607641" + /translation="MENTSITIEFSSKFWPYFTLIHMITTIISLLIIISIMIAILNKLC + EYNVFHNKTFELPRARVNT*" + gene 4637..5639 + /gene="G" + /db_xref="GeneID:37607642" + mRNA 4637..5639 + /gene="G" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607642" + CDS 4652..5626 + /gene="G" + /codon_start=1 + /product="attachment glycoprotein" + /protein_id="YP_009518856.1" + /db_xref="GeneID:37607642" + /translation="MSKNKDQRTAKTLEKTWDTLNHLLFISSCLYKLNLKSIAQITLSI + LAMIISTSLIIAAIIFIASANHKVTLTTAIIQDATSQIKNTTPTYLTQNPQLGISFSNL + SEITSQTTTILASTTPRVKSTLQSTTVKTKNTTTTQIQPSKPTTKQRQNKPPNKPNNDF + HFEVFNFVPCSICSNNPTCWAICKRIPXXNKKPGKKTTTKPTKKPTIKTTKKDLKPQTT + KPKEVPTTKPTEKPTINTTKTNIITTLLTNNTTGNPEHTSQXXXXXXXXXXXXXXXXXX + XXXXXXKETFHSTSSEGNPSPSQVYTTSEYLSQPSSPSNTTNQ*" + gene 5693..7600 + /gene="F" + /db_xref="GeneID:37607643" + mRNA 5693..7600 + /gene="F" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + /db_xref="GeneID:37607643" + CDS 5706..7430 + /gene="F" + /codon_start=1 + /product="fusion glycoprotein" + /protein_id="YP_009518857.1" + /db_xref="GeneID:37607643" + /translation="MELPILKTNAITTILAAVTFCFASSQNITEEFYQSTCSAVSKGYL + SALRTGWYTSVITIELSNIKENKCNGTDAKVKLIKQELDKYKNAVTELQLLMQSTPAAN + NRARRELPRFMNYTLNNTKKTNVTLSKKRKRRFLGFLLGVGSAIASGIAVSKVLHLEGE + VNKIKSALLSTNKAVVSLSNGVSVLTSKVLDLKNYIDKQLLPIVNKQSCSISNIETVIE + FQQKNNRLLEITREFSVNAGVTTPVSTYMLTNSELLSLINDMPITNDQKKLMSNNVQIV + RQQSYSIMSIIKEEVLAYVVQLPLYGVIDTPCWKLHTSPLCTTNTKEGSNICLTRTDRG + WYCDNAGSVSFFPQAETCKVQSNRVFCDTMNSLTLPSEVNLCNIDIFNPKYDCKIMTSK + TDVSSSVITSLGAIVSCYGKTKCTASNKNRGIIKTFSNGCDYVSNKGVDTVSVGNTLYY + VNKQEGKSLYVKGEPIINFYDPLVFPSDEFDASISQVNEKINQSLAFIRKSDELLHNVN + AGKSTTNIMITTIIIVIIVILLSLIAVGLLLYCKARSTPVTLSKDQLSGINNIAFSN*" + gene 7654..8614 + /gene="M2" + /db_xref="GeneID:37607644" + mRNA 7654..8614 + /product="22K/M2 protein gene" + /experiment="experimental evidence, no additional details + recorded" + /citation=[3] + CDS 7663..8247 + /gene="M2-1" + /note="ORF 1, matrix protein 2" + /codon_start=1 + /product="M2-1 protein" + /protein_id="YP_009518858.1" + /db_xref="GeneID:37607644" + /translation="MSRRNPCKFEIRGHCLNGKRCHFSHNYFEWPPHALLVRQNFMLNR + ILKSMDKSIDTLSEISGAAELDRTEEYALGVVGVLESYIGSINNITKQSACVAMSKLLT + ELNSDDIKKLRDNEELNSPKIRVYNTVISYIESNRKNNKQTIHLLKRLPADVLKKTIKN + TLDIHKSITINNPKELTVSDTNDHAKNNDTT*" + CDS 8216..8488 + /gene="M2-2" + /note="ORF 2, RNA processivity factor" + /codon_start=1 + /product="M2-2 protein" + /protein_id="YP_009518859.1" + /db_xref="GeneID:37607644" + /translation="MTMPKIMILPDKYPCSINSILITSRCRVTMYNQKNTLYFNQNNQN + NHIYSPNQTFNEIHWTSQDLIDTIQNFLQHLGITDDIYTIYILVS*" + gene 8546..15052 + /gene="L" + /db_xref="GeneID:37607645" + mRNA 8546..15052 + /gene="L" + /experiment="experimental evidence, no additional details + recorded" + /citation=[2] + /function="viral polymerase" + /db_xref="GeneID:37607645" + CDS 8555..15052 + /gene="L" + /note="RNA dependant RNA polymerase; RdRp" + /codon_start=1 + /product="polymerase protein" + /protein_id="YP_009518860.1" + /db_xref="GeneID:37607645" + /translation="MDPIINGNSANVYLTDSYLKGVISFSECNALGSYIFNGPYLKNDY + TNLISRQNPLIEHINLKKLNITQSLISKYHKGEIKIEEPTYFQSLLMTYKSMTSSEQIT + TTNLLKKIIRRAIEISDVKVYAILNKLGLKEKDKIKSNNGQDEDNSVITTIIKDDILLA + VKDNQSHLKADKNHSTKQKDTIKTTLLKKLMCSMQHPPSWLIHWFNLYTKLNNILTQYR + SNEVKNHGFILIDNQTLNGFQFILNQYGCIVYHKELKRITVTTYNQFLTWKDISLSRLN + VCLITWISNCLNTLNKSLGLRCGFNNVILTQLFLYGDCILKLFHNEGFYIIKEVEGFIM + SLILNITEEDQFRKRFYNSMLNNITDAANKAQKNLLSRVCHTLLDKTVSDNIINGRWII + LLSKFLKLIKLAGDNNLNNLSELYFLFRIFGHPMVDERQAMDAVKVNCNETKFYLLSSL + SMLRGAFIYRIIKGFVNNYNRWPTLRNAIVLPLRWLTYYKLNTYPSLLELTERDLIVLS + GLRFYREFRLPKKVDLEMIINDKAISPPKNLIWTSFPRNYMPSHIQNYIEHEKLKFSES + DKSRRVLEYYLRDNKFNECDLYNCVVNQSYLNNPNHVVSLTGKERELSVGRMFAMQPGM + FRQVQILAEKMIAENILQFFPESLTRYGDLELQKILELKAGISNKSNRYNDNYNNYISK + CSIITDLSKFNQAFRYETSCICSDVLDELHGVQSLFSWLHLTIPHVTIICTYRHAPPYI + RDHIVDLNNVDEQSGLYRYHMGGIEGWCQKLWTIEAISLLDLISLKGKFSITALINGDN + QSIDISKPVRLMEGQTHAQADYLLALNSLKLLYKEYAGIGHKLKGTETYISRDMQFMSK + TIQHNGVYYPASIKKVLRVGPWINTILDDFKVSLESIGSLTQELEYRGESLLCSLIFRN + VWLYNQIALQLKNHALCNNKLYLDILKVLKHLKTFFNLDNIDTALTLYMNLPMLFGGGD + PNLLYRSFYRRTPDFLTEAIVHSVFILSYYTNHDLKDKLQDLSDDRLNKFLTCIITFDK + NPNAEFVTLMRDPQALGSERQAKITSEINRLAVTEVLSTAPNKIFSKSAQHYTTTEIDL + NDIMQNIEPTYPHGLRVVYESLPFYKAEKIVNLISGTKSITNILEKTSAIDLTDIDRAT + EMMRKNITLLIRIFPLDCNRDKREILSMENLSITELSKYVRERSWSLSNIVGVTSPSIM + YTMDIKYTTSTIASGIIIEKYNVNSLTRGERGPTKPWVGSSTQEKKTMPVYNRQVLTKK + QRDQIDLLAKLDWVYASIDNKDEFMEELSIGTLGLTYEKAKKLFPQYLSVNYLHRLTVS + SRPCEFPASIPAYRTTNYHFDTSPINRILTEKYGDEDIDIVFQNCISFGLSLMSVVEQF + TNVCPNRIILIPKLNEIHLMKPPIFTGDVDIHKLKQVIQKQHMFLPDKISLTQYVELFL + SNKTLKSGSHVNSNLILAHKISDYFHNTYILSTNLAGHWILIIQLMKDSKGIFEKDWGE + GYITDHMFINLKVFFNAYKTYLLCFHKGYGKAKLECDMNTSDLLCVLELIDSSYWKSMS + KVFLEQKVIKYILSQDASLHRVKGCHSFKLWFLKRLNVAEFTVCPWVVNIDYHPTHMKA + ILTYIDLVRMGLINIDRIYIKNKHKFNDEFYTSNLFYINYNFSDNTHLLTKHIRIANSE + LENNYNKLYHPTPETLENILTNPIKSNDKKTLNDYCIGKNVDSIMLPLLSNKKLIKSST + MIRTNYSKQDLYNLFPTVVIDKIIDHSGNTAKSNQLYTTTSHQISLVHNSTSLYCMLPW + HHINRFNFVFSSTGCKISIEYILKDLKIKDPNCIAFIGEGAGNLLLRTVVELHPDIRYI + YRSLKDCNDHSLPIEFLRLYNGHINIDYGENLTIPATDATNNIHWSYLHIKFAEPISLF + VCDAELPVTVNWSKIIIEWSKHVRKCKYCSSVNKCTLIVKYHAQDDIDFKLDNITILKT + YVCLGSKLKGSEVYLVLTIGPANVFPVFNVVQNAKLILSRTKNFIMPKKADKESIDANI + KSLIPFLCYPITKKGINTALSKLKSVVSGDILSYSIAGRNEVFSNKLINHKHMNILKWF + NHVLNFRSTELNYNHLYMVESTYPYLSELLNSLTTNELKKLIKITGSLLYNFHNE*" +ORIGIN + 1 atgggcagca attcattgag tatgataaaa gttagattac aaaatttgtt tgacaatgat + 61 gaagtagcat tgttaaaaat aacatgctat actgacaaat taatacattt aactaatgct + 121 ttggctaagg cagtgataca tacaatcaaa ttgaatggca ttgtatttgt gcatgttatt + 181 acaagtagtg atatttgccc taataataat attgtagtaa aatccaattt cacaacaatg + 241 ccagtgttac aaaatggagg ttatatatgg gaaatgatgg aattaacaca ttgctctcaa + 301 cctaatggtc taatagatga caattgtgaa attaaattct ccaaaaaact aagtgattca + 361 acaatgacca attatatgaa tcaattatct gaattacttg gatttgatct taatccataa + 421 attataataa atatcaacta gcaaatcaat gtcactaaca ccattagtta atatnnnaaa + 481 annncttaac agaagataaa aatggggcaa ataaatcaac tcagccaacc caaccatgga + 541 cacaacacac aatgatacca caccacaaag actgatgatc acagacatga gaccattgtc + 601 acttgagact ataataacat cactaaccag agacatcata acacacagat ttatatactt + 661 gataaatcat gaatgcatag tgagaaaact tgatgaaaga caggccacat ttacattcct + 721 ggtcaactat gaaatgaaac tattgcacaa agtaggaagc actaaatata aaaaatatac + 781 tgaatacaac acaaaatatg gcactttccc tatgccaata tttatcaatc atgatgggtt + 841 cttagaatgc attggcatta agcctacaaa gcatactccc ataatataca agtatgatct + 901 caatccatga atttcaacac aagattcaca caatccaaaa taacaacttt atgcataact + 961 acactccata gtccaaatgg agcctgaaaa ttatagtaat ttnnaaaatt aaggagagac + 1021 ataagataaa agatggggca aatacaaaaa tggctcttag caaagtcaag ttgaatgata + 1081 cactcaacaa agatcaactt ctgtcatcta gcaaatacac catccaacgg agcacaggag + 1141 atagtattga tactcctaat tatgatgtgc agaaacacat caataagtta tgtggcatgt + 1201 tattaatcac agaagatgct aatcataaat tcactgggtt aataggtatg ttatatgcta + 1261 tgtctagatt aggaagagaa gacaccataa aaatactcag agatgcggga tatcatgtaa + 1321 aagcaaatgg agtagatgta acaacacatc gtcaagacat taatgggaaa gaaatgaaat + 1381 ttgaagtgtt aacattggca agcttaacaa ctgaaattca aatcaacatt gagatagaat + 1441 ctagaaaatc ctacaaaaaa atgctaaaag aaatgggaga ggtagctcca gaatacaggc + 1501 atgactctcc tgattgtggg atgataatat tatgtatagc agcattagta ataaccaaat + 1561 tagcagcagg ggatagatct ggtcttacag ctgtgattag gagagctaat aatgtcctaa + 1621 aaaatgaaat gaaacgttat aaaggcttac tacccaagga tatagccaac agcttctatg + 1681 aagtgtttga aaaatatcct cactttatag atgtttttgt tcattttggt atagcacaat + 1741 cttctaccag aggtggcagt agagttgaag ggatttttgc aggattgttt atgaatgcct + 1801 atggtgcagg gcaagtgatg ttacggtggg gagtcttagc aaaatcagtt aaaaatatta + 1861 tgttaggaca tgctagtgtg caagcagaaa tggaacaagt tgttgaggtt tatgaatatg + 1921 cccaaaaatt gggtggagaa gcaggattct accatatatt gaacaaccca aaagcatcat + 1981 tattatcttt gactcaattt cctcacttct ccagtgtagt attaggcaat gctgctggcc + 2041 taggcataat gggagaatac agaggtacac caaggaatca agatctatat gatgcagcaa + 2101 aggcatatgc tgaacaactc aaagaaaatg gtgtgattaa ctacagtgta ttagacttga + 2161 cagcagaaga actagaggct atcaaacatc agcttaatcc aaaagataat gatgtagagc + 2221 tttgagttaa tnnannnnna aaaaantggg gcaaataaat catcatggaa aagtttgctc + 2281 ctgaattcca tggagaagat gcaaacaaca gagctactaa attcctagaa tcaataaagg + 2341 gcaaattcac atcacctaaa gatcccaaga aaaaagatag tatcatatct gtcaactcaa + 2401 tagatataga agtaaccaaa gaaagcccta taacatcaaa ttcaaccatt ataaacccaa + 2461 caaatgagac agatgatact gtagggaaca agcccaatta tcaaagaaaa cctctagtaa + 2521 gtttcaaaga agaccctaca ccaagtgata atcccttttc aaaactatac aaagaaacca + 2581 tagaaacatt tgataacaat gaagaagaat ctagctattc atatgaagaa ataaatgatc + 2641 agacaaacga taatataaca gcaagattag ataggattga tgaaaaatta agtgaaatac + 2701 taggaatgct tcacacatta gtagtagcaa gtgcaggacc tacatctgct cgggatggta + 2761 taagagatgc catggttggt ttaagagaag aaatgataga aaaaatcaga actgaagcat + 2821 taatgaccaa tgacagatta gaagctatgg caagactcag gaatgaggaa agtgaaaaga + 2881 tggcaaaaga cacatcagat gaagtgtctc tcaatccaac atcagagaaa ttgaacaacc + 2941 tgttggaagg gaatgatagt gacaatgatc tatcacttga agatttctga ttagttacca + 3001 atctgcacat caacacacaa caccaacaga agaccaacaa acaaaccaac tcactcatcc + 3061 aaccaaacat ccatctgcca atcagcnnnn nnnnnnnnnn nncaaacagc cnnaaaaann + 3121 naacaaccag ccaatccaaa actagccacc cnggnaaaaa atcgacaata tagttacaaa + 3181 annnnnaaga aaaagatggg gcaaatatgg aaacatacgt gaacaaactt cacgaaggct + 3241 ccacatacac agctgctgtt caatacaatg tcctagaaaa agacgatgac cctgcatcac + 3301 ttacaatatg ggtgcccatg ttccaatcat ctatgccagc agatttactt ataaaagaac + 3361 tagctaatgt caacatacta gtgaaacaaa tatccacacc caagggacct tcattaagag + 3421 tcatgataaa ctcaagaagt gcagtgctag cacaaatgcc cagcaaattt accatatgtg + 3481 ctaatgtgtc cttggatgaa agaagcaaac tggcatatga tgtaaccaca ccctgtgaaa + 3541 tcaaggcatg tagtctaaca tgcctaaaat caaaaaatat gttaactaca gttaaagatc + 3601 tcactatgaa aacactcaac ccaacacatg atattattgc tttatgtgaa tttgaaaata + 3661 tagtaacatc aaaaaaagtc ataataccaa catacctaag atccatcagt gtcagaaata + 3721 aagatctgaa cacacttgaa aatataacaa ccactgaatt caaaaatgcc atcacaaatg + 3781 caaaaatcat cccttactca ggattactat tagtcatcac agtgactgac aacaaaggag + 3841 cattcaaata cataaagcca caaagtcaat tcatagtaga tcttggagct tacctagaaa + 3901 aagaaagtat atattatgtt acaacaaatt ggaagcacac agctacacga tttgcaatca + 3961 aacccatgga agattaaccn tttttcctct acatcagtga gttaattcat acaaactttc + 4021 tacctacatt cttcacttca caatcataat cacaaacact ctgtggttca accaatcnnn + 4081 nnaaacaaaa cttatctgaa gtctcagatc atcccaagtc attgttcatc agatctagta + 4141 ctcaaataag ttaatnaaaa atatnacaca tggggcaaat aatcatcgga ggaaatccaa + 4201 ctaatcacaa tatctgtcaa catagacaag tcaacacacc agacaaaatc aaccaatgga + 4261 aaatacatcc ataacaatag aattctcaag caaattctgg ccttacttta cactaataca + 4321 catgatcaca acaataatct ctttgctaat cataatctcc atcatgattg caatactaaa + 4381 caaactctgt gaatataacg tattccataa caaaaccttt gagttaccaa gagctcgagt + 4441 caatacatag cattcatcaa tctaatagct caaaacagta accttgcatt taaaagtgaa + 4501 caaccctcac ctctttacaa aaccacatca acatctcacc atgcaaacca tcatccatac + 4561 tataaagtag ttaattnnnn aaaannnnat agtcataaca atgaactnna agatattaag + 4621 actnaacaat aacgttgggg caaatgcaaa catgtccaaa aacaaggacc aacgcaccgc + 4681 taagacacta gaaaagacct gggacactct caatcattta ttattcatat catcgtgctt + 4741 atataagtta aatcttaaat ctatagcaca aatcacatta tccattctgg caatgataat + 4801 ctcaacttca cttataattg cagccatcat attcatagcc tcggcaaacc acaaagtcac + 4861 actaacaact gcaatcatac aagatgcaac aagccagatc aagaacacaa ccccaacata + 4921 cctcacccag aatcctcagc ttggaatcag cttctccaat ctgtctgaaa ttacatcaca + 4981 aaccaccacc atactagctt caacaacacc aagagtcaag tcaaccctgc aatccacaac + 5041 agtcaagacc aaaaacacaa caacaaccca aatacaaccc agcaagccca ccacaaaaca + 5101 acgccaaaac aaaccaccaa acaaacccaa taatgatttt cactttgaag tgttcaactt + 5161 tgtaccctgc agcatatgca gcaacaatcc aacctgctgg gctatctgca aaagaatacc + 5221 nnnnnnaaac aaaaaaccag gaaagaaaac caccaccaag cccacaaaaa aaccaaccat + 5281 caagacaacc aaaaaagatc tcaaacctca aaccacaaaa ccaaaggaag tacctaccac + 5341 caagcccaca gaaaagccaa ccatcaacac caccaaaaca aacatcataa ctacactgct + 5401 caccaacaat accacaggaa atccagaaca cacaagtcaa nnnnnnnnnn nnnnnnnnnn + 5461 nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nnaaggaaac + 5521 cttccactca acctcctccg aaggcaatcc aagcccttca caagtctata caacatccga + 5581 gtacctatca caaccttcat ctccatccaa cacaacaaac cagtagttat tnnaaaaaac + 5641 atattattac naaaaagcca tgaccaaatc aaacagaatc aaaataaact ctggggcaaa + 5701 taacaatgga gttgccaatc ctcaaaacaa atgcaattac cacaatcctt gctgcagtca + 5761 cattttgttt tgcttctagt caaaacatca ctgaagaatt ttatcaatca acatgcagtg + 5821 cagttagcaa aggctatctt agtgctctaa gaactggttg gtatactagt gttataacta + 5881 tagaattaag taatatcaag gaaaataagt gtaatggaac agatgctaag gtaaaattga + 5941 taaaacaaga attagataaa tataaaaatg ctgtaacaga attgcagttg ctcatgcaaa + 6001 gcacaccagc agcaaacaat cgagccagaa gagaactacc aaggtttatg aattatacac + 6061 tcaacaatac caaaaaaacc aatgtaacat taagcaagaa aaggaaaaga agatttcttg + 6121 gttttttgtt aggtgttgga tctgcaatcg ccagtggcat tgctgtatct aaggtcctgc + 6181 acctagaagg ggaagtgaac aaaatcaaaa gtgctctact atccacaaac aaggctgtag + 6241 tcagcttatc aaatggagtt agtgtcttaa ccagcaaagt gttagacctc aaaaactata + 6301 tagataaaca attgttacct attgtgaaca agcaaagctg cagcatatca aatatagaaa + 6361 ctgtgataga gttccaacaa aagaacaaca gactactaga gattaccagg gaatttagtg + 6421 ttaatgcagg tgtaactaca cctgtaagca cttatatgtt aactaatagt gaattattat + 6481 cattaatcaa tgatatgcct ataacaaatg atcagaaaaa gttaatgtcc aacaatgttc + 6541 aaatagttag acagcaaagt tactctatca tgtccataat aaaagaggaa gtcttagcat + 6601 atgtagtaca attaccacta tatggtgtaa tagatacacc ttgttggaaa ttacacacat + 6661 cccctctatg tacaaccaac acaaaagaag ggtccaacat ctgtttaaca agaaccgaca + 6721 gaggatggta ctgtgacaat gcaggatcag tatctttctt cccacaagct gaaacatgta + 6781 aagttcaatc gaatcgagta ttttgtgaca caatgaacag tttaacatta ccaagtgaag + 6841 taaatctctg caatattgac atattcaatc ccaaatatga ttgtaaaatt atgacttcaa + 6901 aaacagatgt aagcagctcc gttatcacat ctctaggagc cattgtgtca tgctatggca + 6961 aaactaaatg tacagcatcc aataaaaatc gtggaatcat aaagacattt tctaacgggt + 7021 gtgattatgt atcaaataaa ggggtggaca ctgtgtctgt aggtaataca ttatattatg + 7081 taaataagca agaaggcaaa agtctctatg taaaaggtga accaataata aatttctatg + 7141 acccattagt attcccctct gatgaatttg atgcatcaat atctcaagtc aatgagaaga + 7201 ttaaccagag cctagcattt attcgtaaat ccgatgaatt attacataat gtaaatgctg + 7261 gtaaatccac cacaaatatc atgataacta ctataattat agtgattata gtaatattgt + 7321 tatcattaat tgcagttgga ctgctcctat actgtaaggc cagaagcaca ccagtcacac + 7381 taagcaagga tcaactgagt ggtataaata atattgcatt tagtaactga ataaaaatag + 7441 cacctaatca tgttcttaca atggtttact atctgatcat agacaaccca tctatcattg + 7501 gattttcttn naaaatctga acttcatcga aactctcatc tataaaccat ctcacttaca + 7561 ctatttaagt agattcctat tttatagtta tatnnnaaaa cacaattgaa taccagatta + 7621 acttactatt tgnnnnnntn aaaaatgaga actggggcaa atatgtcacg aaggaatcct + 7681 tgcaaatttg aaattcgagg tcattgcttg aatggtaaga ggtgtcattt tagtcataat + 7741 tattttgaat ggccacccca tgcactgctt gtaagacaaa actttatgtt aaacagaata + 7801 cttaagtcta tggataaaag tatagatact ttatcagaaa taagtggagc tgcagagttg + 7861 gacagaacag aagagtatgc tcttggtgta gttggagtgc tagagagtta tataggatca + 7921 ataaataata taactaaaca atcagcatgt gttgccatga gcaaactcct cactgaactc + 7981 aatagtgatg atatcaaaaa actaagggac aatgaagagc taaattcacc caagataaga + 8041 gtgtacaata ctgtcatatc atatattgaa agcaacagga aaaacaataa acaaactatc + 8101 catctgttaa aaagattgcc agcagacgta ttgaagaaaa ccatcaaaaa cacattggat + 8161 atccacaaga gcataaccat caataaccca aaagaattaa ctgttagtga tacaaatgac + 8221 catgccaaaa ataatgatac tacctgacaa atatccttgt agtataaatt ccatactaat + 8281 aacaagtaga tgtagagtta ctatgtataa tcaaaagaac acactatatt tcaatcaaaa + 8341 caaccaaaat aaccatatat actcaccgaa tcaaacattc aatgaaatcc attggacctc + 8401 tcaagacttg attgatacaa ttcaaaattt tctacaacat ctaggtatta ctgatgatat + 8461 atatacaata tatatattag tgtcataaca ctcaatccta atacttacca catcattaaa + 8521 ttattaactc aaacaattca agctatggga caaaatggat cccattatta atggaaattc + 8581 tgctaatgtt tatctaaccg atagttattt aaaaggtgtt atttctttct cagaatgtaa + 8641 tgctttagga agttacatat tcaatggtcc ttatctcaaa aatgattata ccaacttaat + 8701 tagtagacaa aatccattaa tagaacacat aaatctaaag aaactaaata taacacagtc + 8761 cttaatatct aagtatcata aaggtgaaat aaaaatagaa gaacctactt attttcagtc + 8821 attacttatg acatacaaga gtatgacctc gtcagaacag attactacca ctaatttact + 8881 taaaaagata ataagaagag ctatagaaat aagtgatgtc aaagtctatg ctatattgaa + 8941 taaactaggg cttaaagaaa aagacaagat taaatccaac aatggacaag atgaagacaa + 9001 ctcagttatt acaaccataa tcaaagatga tatactttta gctgttaagg ataatcaatc + 9061 tcatcttaaa gcagacaaaa atcactctac aaaacaaaaa gatacaatca aaacaacact + 9121 cttgaagaaa ttaatgtgtt caatgcaaca tcctccatca tggttaatac attggtttaa + 9181 tttatacaca aaattaaaca acatattaac acagtatcga tcaaatgagg taaaaaacca + 9241 tggttttata ttgatagata atcaaactct taatggattt caatttattt tgaatcaata + 9301 tggttgtata gtttatcata aggaactcaa aagaattact gtgacaacct ataatcaatt + 9361 cttgacatgg aaagatatta gccttagtag attaaatgtt tgtttaatta catggattag + 9421 taactgtttg aacacattaa ataaaagctt aggcttaaga tgcggattca ataatgttat + 9481 cttgacacaa ctattccttt atggagattg tatactaaaa ctatttcaca atgaggggtt + 9541 ctacataata aaagaggtag agggatttat tatgtctcta attttaaata taacagaaga + 9601 agatcaattc agaaaacgat tttataatag tatgctcaac aacatcacag atgctgctaa + 9661 taaagctcag aaaaatctgc tatcaagagt atgtcataca ttattagata agacagtatc + 9721 cgataatata ataaatggca gatggataat tctattaagt aagttcctta aattaattaa + 9781 gcttgcaggt gacaataacc ttaacaatct gagtgaatta tattttttgt tcagaatatt + 9841 tggacaccca atggtagatg aaagacaagc catggatgct gttaaagtta attgcaatga + 9901 gaccaaattt tacttgttaa gcagtttgag tatgttaaga ggtgccttta tatatagaat + 9961 tataaaaggg tttgtaaata attacaacag atggcctact ttaagaaatg ctattgtttt + 10021 acccttaaga tggttaactt actataaact aaacacttat ccttctttgt tggaacttac + 10081 agaaagagat ttgattgttt tatcaggact acgtttctat cgtgagtttc ggttgcctaa + 10141 aaaagtggat cttgaaatga ttataaatga taaagctata tcacctccta aaaatttgat + 10201 atggactagt ttccctagaa attatatgcc gtcacacata caaaattata tagaacatga + 10261 aaaattaaaa ttttccgaga gtgataaatc aagaagagta ttagagtatt atttaagaga + 10321 taacaaattc aatgaatgtg atttatacaa ctgtgtagtt aatcaaagtt atcttaacaa + 10381 ccctaatcat gtggtatcat tgacaggcaa agaaagagaa ctcagtgtag gtagaatgtt + 10441 tgcaatgcaa ccaggaatgt tcagacaagt tcaaatattg gcagagaaaa tgatagctga + 10501 aaacatttta caattctttc ctgaaagtct tacaagatat ggtgatctag aactacaaaa + 10561 aatattagaa ttgaaagcag gaataagtaa caaatcaaat cgttacaatg ataattacaa + 10621 caattacatt agtaagtgct ctatcatcac agatctcagc aaattcaatc aagcatttcg + 10681 atatgaaaca tcatgtattt gtagtgatgt actggatgaa ctgcatggtg tacaatctct + 10741 attttcctgg ttacatttaa ctattcctca tgtcacaata atatgcacat ataggcatgc + 10801 acccccctat ataagagatc atattgtaga tcttaacaat gtagatgaac aaagtggatt + 10861 atatagatat catatgggtg gtatcgaagg gtggtgtcaa aaactatgga ccatagaagc + 10921 tatatcacta ttggatctaa tatctctcaa agggaaattc tcaattactg ctttaattaa + 10981 tggtgacaat caatcaatag atataagtaa accagtcaga ctcatggaag gtcaaactca + 11041 tgctcaagca gattatttgc tagcattaaa tagtcttaaa ttactgtata aagagtatgc + 11101 aggcataggc cacaaattaa aaggaactga gacttatata tcaagagata tgcaatttat + 11161 gagtaaaaca attcaacata acggtgtata ttacccagct agtataaaga aagtcctaag + 11221 agtgggaccg tggataaaca ctatacttga tgatttcaaa gtgagtctag aatctatagg + 11281 tagtttgaca caagaattag aatatagagg tgaaagtcta ttatgcagtt taatatttag + 11341 aaatgtatgg ttatataatc aaattgcttt acaattaaaa aatcatgcat tatgtaacaa + 11401 taaattatat ttggacatat taaaggttct gaaacactta aaaacctttt ttaatcttga + 11461 taatattgat acagcattaa cattgtatat gaatttaccc atgttatttg gtggtggtga + 11521 tcccaacttg ttatatcgaa gtttctatag aagaactcct gatttcctca cagaggctat + 11581 agttcactct gtgttcatac ttagttatta tacaaaccat gatttaaaag ataaacttca + 11641 agatctgtca gatgatagat tgaataagtt cttaacatgc ataatcacgt ttgacaaaaa + 11701 ccctaatgct gaattcgtaa cattgatgag agatcctcaa gctttagggt ctgagagaca + 11761 agctaaaatt actagcgaaa tcaatagact ggcagttaca gaggttttga gtacagctcc + 11821 aaacaaaata ttctccaaaa gtgcacaaca ttataccact acagagatag atctaaatga + 11881 tattatgcaa aatatagaac ctacatatcc tcacgggcta agagttgttt atgaaagttt + 11941 acccttttat aaagcagaga aaatagtaaa tcttatatca ggtacaaaat ctataactaa + 12001 catactggaa aagacttctg ccatagactt aacagatatt gatagagcca ctgagatgat + 12061 gaggaaaaac ataactttgc ttataaggat atttccatta gattgtaaca gagataaaag + 12121 agaaatattg agtatggaaa acctaagtat tactgaatta agcaaatatg ttagagaaag + 12181 atcttggtct ttatccaata tagttggtgt tacatcaccc agtatcatgt atacaatgga + 12241 catcaaatat acaacaagca ctatagctag tggcataatc atagagaaat ataatgttaa + 12301 cagtttaaca cgtggtgaga gaggacccac taaaccatgg gttggttcat ctacacaaga + 12361 gaaaaaaaca atgccagttt ataatagaca agttttaacc aaaaaacaga gagatcaaat + 12421 agatctatta gcaaaattgg attgggtgta tgcatctata gataacaagg atgaattcat + 12481 ggaagaactt agcataggaa ctcttgggtt aacatatgaa aaggccaaaa aattatttcc + 12541 acaatattta agtgttaact atttgcatcg ccttacagtc agtagtagac catgtgaatt + 12601 ccctgcatca ataccagctt atagaacaac aaattatcac tttgatacta gccctattaa + 12661 tcgcatatta acagaaaagt atggtgatga agatattgat atagtattcc aaaactgtat + 12721 aagctttggc cttagcttaa tgtcagtagt agaacaattt actaatgtat gtcctaacag + 12781 aattattctc atacctaagc ttaatgagat acatttgatg aaacctccca tattcacagg + 12841 tgatgttgat attcacaagt taaaacaagt gatacaaaaa cagcatatgt ttttaccaga + 12901 caaaataagt ttgactcaat atgtggaatt attcttaagt aataaaacac tcaaatctgg + 12961 atctcatgtt aattctaatt taatattggc acataaaata tctgactatt ttcataatac + 13021 ttacatttta agtactaatt tagctggaca ttggattctg attatacaac ttatgaaaga + 13081 ttctaagggt atttttgaaa aagattgggg agagggatat ataactgatc atatgtttat + 13141 taatttgaaa gttttcttca atgcttataa gacctatctc ttgtgttttc ataaaggtta + 13201 tggcaaagca aagctggagt gtgatatgaa tacttcagat ctcctatgtg tattggaatt + 13261 aatagacagt agttattgga agtctatgtc taaggtattt ttagaacaaa aagttatcaa + 13321 atacattctt agccaagatg caagtttaca tagagtaaaa ggatgtcata gcttcaaatt + 13381 atggtttctt aaacgtctta atgtagcaga attcacagtt tgcccttggg ttgttaacat + 13441 agattatcat ccaacacata tgaaagcaat attaacttat atagatcttg ttagaatggg + 13501 attgataaat atagatagaa tatacattaa aaataaacac aaattcaatg atgaatttta + 13561 tacttctaat ctcttttaca ttaattataa cttctcagat aatactcatc tattaactaa + 13621 acatataagg attgctaatt ctgaattaga aaataattac aacaaattat atcatcctac + 13681 accagaaacc ctagaaaata tactaaccaa tccgattaaa agtaatgaca aaaagacact + 13741 gaatgactat tgtataggta aaaatgttga ctcaataatg ttaccattgt tatctaataa + 13801 gaagcttatt aaatcgtcta caatgattag aaccaattac agcaaacaag atttgtataa + 13861 tttatttcct acggttgtga ttgataaaat tatagatcat tcaggtaata cagccaaatc + 13921 taaccaactt tacactacta cttctcatca aatatcttta gtgcacaata gcacatcact + 13981 ttattgcatg cttccttggc atcatattaa tagattcaat tttgtattta gttctacagg + 14041 ttgtaaaatt agtatagagt atattttaaa agaccttaaa attaaagatc ctaattgtat + 14101 agcattcata ggtgaaggag cagggaattt attattgcgt acagtagtgg aacttcatcc + 14161 tgacataaga tatatttaca gaagtctgaa agattgcaat gatcatagtt tacctattga + 14221 gtttttaagg ctgtacaatg gacatatcaa cattgattat ggtgaaaatt tgaccattcc + 14281 tgctacagat gcaaccaaca acattcattg gtcttattta catataaagt ttgctgaacc + 14341 tatcagtctt tttgtctgtg atgctgaatt gcctgtaaca gtcaactgga gtaaaattat + 14401 aatagaatgg agcaagcatg taagaaaatg caagtactgt tcctcagtta ataaatgtac + 14461 gttaatagta aaatatcatg ctcaagatga tattgatttc aaattagaca atataactat + 14521 attaaaaact tatgtatgct taggcagtaa gttaaaggga tctgaagttt acttagtcct + 14581 tacaataggt cctgcaaatg tattcccagt atttaatgta gtacaaaatg ctaaattgat + 14641 actatcaaga accaaaaatt tcatcatgcc taagaaagct gataaagagt ctattgatgc + 14701 aaatattaaa agtttgatac cctttctttg ttaccctata acaaaaaaag gaattaatac + 14761 tgcattgtca aaactaaaga gtgttgttag tggagatata ctatcatatt ctatagctgg + 14821 acgtaatgaa gttttcagca ataaacttat aaatcataag catatgaaca tcttaaagtg + 14881 gttcaatcat gttttaaatt tcagatcaac agaactaaac tataatcatt tatatatggt + 14941 agaatctaca tatccttatc taagtgaatt gttaaacagc ttgacaacta atgaacttaa + 15001 aaaactgatt aaaatcacag gtagtttgtt atacaacttt cataatgaat aa +// diff --git a/rsv/profiles/pango/REFROOTB.gb b/rsv/profiles/pango/REFROOTB.gb new file mode 100644 index 0000000..ccaaa97 --- /dev/null +++ b/rsv/profiles/pango/REFROOTB.gb @@ -0,0 +1,479 @@ +LOCUS REFROOTB 15033 bp RNA UNK 01-JAN-1980 +DEFINITION REFROOTB. +ACCESSION REFROOTB +VERSION REFROOTB +KEYWORDS . +SOURCE . + ORGANISM Human orthopneumovirus + Viruses; Riboviria; Orthornavirae; Negarnaviricota; Haploviricotina; + Monjiviricetes; Mononegavirales; Pneumoviridae; Orthopneumovirus. +FEATURES Location/Qualifiers + source 1..15033 + /organism="Human orthopneumovirus" + /mol_type="genomic RNA" + gene 1..479 + /gene="NS1" + /db_xref="GeneID:1489818" + mRNA 1..479 + /gene="NS1" + /db_xref="GeneID:1489818" + CDS 1..420 + /gene="NS1" + /codon_start=1 + /product="nonstructural protein 1" + /protein_id="NP_056856.1" + /db_xref="GeneID:1489818" + /translation="MGCNSLSMIKVRLQNLFDNDEVALLKITCYTDKLILLTNALAKAA + IHTIKLNGIVFIHVITSSEVCPDNNIVVKSNFTTMPILQNGGYIWELIELTHCSQLNGL + MDDNCEIKFSKRLSDSVMTDYMNQISDLLGLDLNS*" + gene 496..1002 + /gene="NS2" + /db_xref="GeneID:1489819" + mRNA 496..1002 + /gene="NS2" + /db_xref="GeneID:1489819" + CDS 528..902 + /gene="NS2" + /codon_start=1 + /product="nonstructural protein 2" + /protein_id="NP_056857.1" + /db_xref="GeneID:1489819" + /translation="MSTTNDNTTMQRLMITDMRPLSMESIITSLTKEIITHKFIYLINN + ECIVRKLDERQATFTFLVNYEMKLLHKVGSTKYKKYTEYNTKYGTFPMPIFINHGGFLE + CIGIKPTKHTPIIYKYDLNP*" + gene 1029..2233 + /gene="N" + /db_xref="GeneID:1489820" + mRNA 1029..2233 + /gene="N" + /db_xref="GeneID:1489820" + CDS 1044..2219 + /gene="N" + /codon_start=1 + /product="nucleoprotein" + /protein_id="NP_056858.1" + /db_xref="GeneID:1489820" + /translation="MALSKVKLNDTLNKDQLLSSSKYTIQRSTGDNIDTPNYDVQKHLN + KLCGMLLITEDANHKFTGLIGMLYAMSRLGREDTIKILKDAGYHVKANGVDITTYRQDI + NGKEMKFEVLTLSSLTSEIQVNIEIESRKSYKKMLKEMGEVAPEYRHDSPDCGMIILCI + AALVITKLAAGDRSGLTAVIRRANNVLKNEIKRYKGLIPKDIANSFYEVFEKHPHLIDV + FVHFGIAQSSTRGGSRVEGIFAGLFMNAYGSGQVMLRWGVLAKSVKNIMLGHASVQAEM + EQVVEVYEYAQKLGGEAGFYHILNNPKASLLSLTQFPNFSSVVLGNAAGLGIMGEYRGT + PRNQDLYDAAKAYAEQLKENGVINYSVLDLTAEELEAIKHQLNPKEDDVEL*" + gene 2237..3176 + /gene="P" + /db_xref="GeneID:1489821" + mRNA 2237..3176 + /gene="P" + /db_xref="GeneID:1489821" + CDS 2254..2979 + /gene="P" + /codon_start=1 + /product="phosphoprotein" + /protein_id="NP_056859.1" + /db_xref="GeneID:1489821" + /translation="MEKFAPEFHGEDANNKATKFLESIKGKFASSKDPKKKDSIISVNS + IDIEVTKESPITSGTNIINPISEADSTPETKANYPRKPLVSFKEDLTPSDNPFSKLYKE + TIETFDNNEEESSYSYEEINDQTNDNITARLDRIDEKLSEILGMLHTLVVASAGPTSAR + DGIRDAMVGLREEMIEKIRAEALMTNDRLEAMARLRNEESEKMAKDTSDEVSLNPTSKK + LSDLLEDNDSDNDLSLDDF*" + gene 3186..4140 + /gene="M" + /db_xref="GeneID:1489822" + mRNA 3186..4140 + /gene="M" + /db_xref="GeneID:1489822" + CDS 3195..3965 + /gene="M" + /codon_start=1 + /product="matrix protein" + /protein_id="NP_056860.1" + /db_xref="GeneID:1489822" + /translation="METYVNKLHEGSTYTAAVQYNVLEKDDDPASLTIWVPMFQSSVPA + DLLIKELASINILVKQISTPKGPSLRVTINSRSAVLAQMPSNFIISANVSLDERSKLAY + DVTTPCEIKACSLTCLKVKSMLTTVKDLTMKTFNPTHEIIALCEFENIMTSKRVIIPTY + LRSISVKNKDLNSLENIATTEFKNAITNAKIIPYAGLVLVITVTDNKGAFKYIKPQSQF + IVDLGAYLEKESIYYVTTNWKHTATRFSIKPLED*" + gene 4150..4570 + /gene="SH" + /db_xref="GeneID:1489823" + mRNA 4150..4570 + /gene="SH" + /db_xref="GeneID:1489823" + CDS 4235..4432 + /gene="SH" + /codon_start=1 + /product="small hydrophobic protein" + /protein_id="NP_056861.1" + /db_xref="GeneID:1489823" + /translation="MGNTSITIEFTSKFWPYFTLIHMILTLISLLIIITIMIAILNKLS + EHKTFCNKTLELGQMYQINT*" + gene 4621..5612 + /gene="G" + /db_xref="GeneID:1489824" + mRNA 4621..5612 + /gene="G" + /db_xref="GeneID:1489824" + CDS 4636..5601 + /gene="G" + /codon_start=1 + /product="attachment glycoprotein" + /protein_id="NP_056862.1" + /db_xref="GeneID:1489824" + /translation="MSKHKNQRTARTLEKTWDTLNHLIVISSCLYRLNLKSIAQIALSV + LAMIISTSLIIAAIIFIISANHKVTLTTVTVQTIKNHTEKNITTYLTQVSPERVSSSKQ + PTTTSPIHTNSATISPNTKSETHHTTAQTKGRITTSTQTNKPSTKSRSKNPPKKPKDDY + HFEVFNFVPCSICGNNQLCKSICKTIPSNKPKKKPTIKPTNKPTIKTTNKRDPKTPAKM + PKKEXXXTTNPTKKPTLKTTERDXXXXXXXXXXXXXXXXXXXXXSTSQSTVLDTTTSKH + TIQQQYLHSTTSENTPNSTQIPTASEPSTSNST*KT*SHT*" + gene 5665..7570 + /gene="F" + /db_xref="GeneID:1489825" + mRNA 5665..7570 + /gene="F" + /db_xref="GeneID:1489825" + CDS 5678..7402 + /gene="F" + /codon_start=1 + /product="fusion glycoprotein" + /protein_id="NP_056863.1" + /db_xref="GeneID:1489825" + /translation="MELLIHRSSAIFLTLAINALYLTSSQNITEEFYQSTCSAVSRGYF + SALRTGWYTSVITIELSNIKETKCNGTDTKVKLIKQELDKYKNAVTELQLLMQNTPAVN + NRARREAPQYMNYTINTTKNLNVSISKKRKRRFLGFLLGVGSAIASGIAVSKVLHLEGE + VNKIKNALLSTNKAVVSLSNGVSVLTSKVLDLKNYINNQLLPIVNKQSCRISNIETVIE + FQQKNSRLLEITREFSVNAGVTTPLSTYMLTNSELLSLINDMPITNDQKKLMSSNVQIV + RQQSYSIMSIIKEEVLAYVVQLPIYGVIDTHCWKLHTSPLCTTNIKEGSNICLTRTDRG + WYCDNAGSVSFFPQADTCKVQSNRVFCDTMNSLTLPSEVSLCNTDIFNSKYDCKIMTSK + TDISSSVITSLGAIVSCYGKTKCTASNKNRGIIKTFSNGCDYVSNKGVDTVSVGNTLYY + VNKLEGKNLYVKGEPIINYYDPLVFPSDEFDASISQVNEKINQSLAFIRRSDELLHNVN + TGKSTTNIMITTIIIVIIVVLLSLIAIGLLLYCKAKNTPVTLSKDQLSGINNIAFSK*" + gene 7627..8586 + /gene="M2" + /db_xref="GeneID:1489826" + mRNA 7627..8586 + /gene="M2" + /db_xref="GeneID:1489826" + CDS 7636..8223 + /gene="M2-1" + /note="ORF 1, matrix protein 2" + /codon_start=1 + /product="M2-1 protein" + /protein_id="NP_056864.1" + /db_xref="GeneID:1489826" + /translation="MSRRNPCKFEIRGHCLNGRRCHYSHNYFEWPPHALLVRQNFMLNK + ILKSMDKSIDTLSEISGAAELDRTEEYALGIVGVLESYIGSINNITKQSACVAMSKLLI + EINSDDIKKLRDNEEPNSPKIRVYNTVISYIESNRKNNKQTIHLLKRLPADVLKKTIKN + TLDIHKSITISNPKESTVNDQNDQTKNNDITG*" + CDS 8189..8461 + /gene="M2-2" + /note="ORF 2, RNA processivity factor" + /codon_start=1 + /product="M2-2 protein" + /protein_id="NP_056865.3" + /db_xref="GeneID:1489826" + /translation="MTKPKIMILPDKYPCSISSILISSESMVATFNHKNILQFNYNHLD + NHQCLLNHIFDEIHWTPKNLLDTTQQFLQHLNIPEDIYTVYILVS*" + gene 8519..15033 + /gene="L" + /db_xref="GeneID:1489827" + mRNA 8519..15033 + /gene="L" + /db_xref="GeneID:1489827" + CDS 8527..15033 + /gene="L" + /codon_start=1 + /product="polymerase protein" + /protein_id="NP_056866.1" + /db_xref="GeneID:1489827" + /translation="MDPIINGNSANVYLTDSYLKGVISFSECNALGSYLFNGPYLKNDY + TNLISRQSPLLEHMNLKKLTITQSLISRYHKGELKLEEPTYFQSLLMTYKSMSSSEQIA + TTNLLKKIIRRAIEISDVKVYAILNKLGLKEKDRVKPNNNSGDENSVLTTIIKDDILSA + VENNQSYTNSDKNHSVNQNITIKTTLLKKLMCSMQHPPSWLIHWFNLYTKLNNILTQYR + SNEVKSHGFILIDNQTLSGFQFILNQYGCIVYHKGLKKITTTTYNQFLTWKDISLSRLN + VCLITWISNCLNTLNKSLGLRCGFNNIVLSQLFLYGDCILKLFHNEGFYIIKEVEGFIM + SLILNITEEDQFRKRFYNSMLNNITDAAIKAQKNLLSRVCHTLLDKTVSDNIINGKWII + LLSKFLKLIKLAGDNNLNNLSELYFLFRIFGHPMVDERQAMDAVRINCNETKFYLLSSL + STLRGAFIYRIIKGFVNTYNRWPTLRNAIVLPLRWLNYYKLNTYPSLLEITENDLIILS + GLRFYREFHLPKKVDLEMIINDKAISPPKDLIWTSFPRNYMPSHIQNYIEHEKLKFSES + DRSRRVLEYYLRDNKFNECDLYNCVVNQSYLNNSNHVVSLTGKERELSVGRMFAMQPGM + FRQIQILAEKMIAENILQFFPESLTRYGDLELQKILELKAGISNKSNRYNDNYNNYISK + CSIITDLSKFNQAFRYETSCICSDVLDELHGVQSLFSWLHLTIPLVTIICTYRHAPPFI + KDHVVNLNEVDEQSGLYRYHMGGIEGWCQKLWTIEAISLLDLISLKGKFSITALINGDN + QSIDISKPVRLIEGQTHAQADYLLALNSLKLLYKEYAGIGHKLKGTETYISRDMQFMSK + TIQHNGVYYPASIKKVLRVGPWINTILDDFKVSLESIGSLTQELEYRGESLLCSLIFRN + IWLYNQIALQLRNHALCNNKLYLDILKVLKHLKTFFNLDSIDMALSLYMNLPMLFGGGD + PNLLYRSFYRRTPDFLTEAIVHSVFVLSYYTGHDLQDKLQDLPDDRLNKFLTCIITFDK + NPNAEFVTLMRDPQALGSERQAKITSEINRLAVTEVLSIAPNKIFSKSAQHYTTTEIDL + NDIMQNIEPTYPHGLRVVYESLPFYKAEKIVNLISGTKSITNILEKTSAIDTTDINRAT + DMMRKNITLLIRILPLDCNKDKRELLSLENLSITELSKYVRERSWSLSNIVGVTSPSIM + FTMDIKYTTSTIASGIIIEKYNVNSLTRGERGPTKPWVGSSTQEKKTMPVYNRQVLTKK + QRDQIDLLAKLDWVYASIDNKDEFMEELSTGTLGLSYEKAKKLFPQYLSVNYLHRLTVS + SRPCEFPASIPAYRTTNYHFDTSPINHVLTEKYGDEDIDIVFQNCISFGLSLMSVVEQF + TNICPNRIILIPKLNEIHLMKPPIFTGDVDIIKLKQVIQKQHMFLPDKISLTQYVELFL + SNKALKSGSHINSNLILVHKMSDYFHNAYILSTNLAGHWILIIQLMKDSKGIFEKDWGE + GYITDHMFINLNVFFNAYKTYLLCFHKGYGKAKLECDMNTSDLLCVLELIDSSYWKSMS + KVFLEQKVIKYIVNQDTSLHRIKGCHSFKLWFLKRLNNAKFTVCPWVVNIDYHPTHMKA + ILSYIDLVRMGLINVDKLTXXXKNKNKFNDEFYTSNLFYISYNFSDNTHLLTKQIRIAN + SELENNYNKLYHPTPETLENMSLIPVKSNNSNKPKFCISGNTESMMTSTFSNKMHIKSS + TVTTRFNYSKQDLYNLFPIVVIDRIIDHSGNTAKSNQLYTTTSHQTSLVRNSASLYCML + PWHHVNRFNFVFSSTGCKISIEYILKDLKIKDPSCIAFIGEGAGNLLLRTVVELHPDIR + YIYRSLKDCNDHSLPIEFLRLYNGHINIDYGENLTIPATDATNNIHWSYLHIKFAEPIS + IFVCDAELPVTANWSKIIIEWSKHVRKCKYCSSVNRCILIAKYHAQDDIDFKLDNITIL + KTYVCLGSKLKGSEVYLVLTIGPANILPVFNVVQNAKLILSRTKNFIMPKKTDKESIDA + NIKSLIPFLCYPITKKGIKTSLSKLKSVVNGDILSYSIAGRNEVFSNKLINHKHMNILK + WLDHVLNFRSAELNYNHLYMIESTYPYLSELLNSLTTNELKKLIKITGSVLYNLPNEQ* + " +ORIGIN + 1 atggggtgca attcactgag catgataaag gttagattac aaaatttatt tgacaatgat + 61 gaagtagcat tgttaaaaat aacatgttat actgacaaat taattcttct gactaatgca + 121 ttagccaaag cagcaataca tacaattaaa ttaaacggca tagtttttat acatgttata + 181 acaagcagtg aagtgtgccc tgataacaat attgtagtga aatctaactt tacaacaatg + 241 ccaatattac aaaatggagg atacatatgg gaattgattg agttgacaca ctgctctcaa + 301 ttaaatggtc taatggatga taattgtgaa atcaaatttt ctaaaagact aagtgactca + 361 gtaatgactg attatatgaa tcaaatatct gatttacttg ggcttgatct caattcatga + 421 attatgttta gtctaattca atagacatat gtttattacc attttagtta atataaaaac + 481 tcatcaaagg gaaatggggc aaataaactc acctaatcaa tcaaaccatg agcactacaa + 541 atgacaacac tactatgcaa agattgatga tcacagacat gagacccctg tcgatggaat + 601 caataataac atctctcacc aaagaaatca taacacacaa attcatatac ttgataaaca + 661 atgaatgtat tgtaagaaaa cttgatgaaa gacaagctac atttacattc ttagtcaatt + 721 atgagatgaa gctactgcac aaagtaggga gtaccaaata caaaaaatac actgaatata + 781 atacaaaata tggcactttc cccatgccta tatttatcaa tcatggcggg tttctagaat + 841 gtattggcat taagcctaca aaacacactc ctataatata caaatatgac ctcaacccgt + 901 aaattccaac nnaaaaaaan ctaaccaatc caaactaagc tattccttaa acaacagtga + 961 tcaacagtta agaaggagct aatccatttt agtaattaaa aataaaggta aagccaataa + 1021 cataaattgg ggcaaataca aagatggctc ttagcaaagt caagttaaat gatacattaa + 1081 ataaggatca gctgctgtca tccagcaaat acactattca acgtagtaca ggagataata + 1141 ttgacactcc caattatgat gtgcaaaaac acctaaacaa actatgtggt atgctattaa + 1201 tcactgaaga tgcaaatcat aaattcacag gattaatagg tatgttatat gctatgtcca + 1261 ggttaggaag ggaagacact ataaagatac ttaaagatgc tggatatcat gttaaagcta + 1321 atggagtaga tataacaaca tatcgtcaag atataaatgg aaaggaaatg aaattcgaag + 1381 tattaacatt atcaagcttg acatcagaaa tacaagtcaa tattgagata gaatctagaa + 1441 agtcctacaa aaaaatgcta aaagagatgg gagaagtggc tccagaatat aggcatgatt + 1501 ctccagactg tgggatgata atactgtgta tagctgcact tgtaataacc aaattagcag + 1561 caggagatag atcaggtctt acagcagtaa ttaggagggc aaacaatgtc ttaaaaaacg + 1621 aaataaaacg ctacaagggc ctcataccaa aggatatagc taacagtttt tatgaagtgt + 1681 ttgaaaaaca ccctcatctt atagatgttt ttgtgcactt tggcattgca caatcatcca + 1741 caagaggggg tagtagagtt gaaggaatct ttgcaggatt atttatgaat gcctatggtt + 1801 cagggcaagt aatgctaaga tggggagttt tagccaaatc tgtaaaaaat atcatgctag + 1861 gacatgctag tgtccaggca gaaatggagc aagttgtgga agtctatgaa tatgcacaga + 1921 agttgggagg agaagctgga ttctaccata tattgaacaa tccaaaagca tcattgctgt + 1981 cattaactca atttcctaac ttctcaagtg tggtcctagg caatgcagca ggtctaggca + 2041 taatgggaga gtatagaggt acaccaagaa accaggatct ttatgatgca gccaaagcat + 2101 atgcagagca actcaaagaa aatggagtaa taaactacag tgtattagac ttaacagcag + 2161 aagaattgga agccataaag catcaactca accccaaaga agatgatgta gagctttaag + 2221 ttaacnnaaa aaatacgggg caaataagtc aacatggaga agtttgcacc tgaatttcat + 2281 ggagaagatg caaataacaa agctaccaaa ttcctagaat caataaaggg caagttcgca + 2341 tcatccaaag atcctaagaa gaaagatagc ataatatctg ttaactcaat agatatagaa + 2401 gtaactaaag agagcccgat aacatctggc accaacatca tcaatccaat aagtgaagct + 2461 gatagtaccc cagaaaccaa agccaactac ccaagaaaac ccctagtaag cttcaaagaa + 2521 gatctcaccc caagtgacaa ccctttttct aaattgtaca aagaaacaat agaaacattt + 2581 gataacaatg aagaagaatc tagctactca tatgaagaaa taaatgatca aacaaatgac + 2641 aacattacag caagactaga tagaattgat gaaaaattaa gtgaaatatt aggaatgctc + 2701 catacattag tagttgcaag tgcaggaccc acttcagctc gcgatggaat aagagatgct + 2761 atggttggtc taagagaaga aatgatagaa aaaataagag cggaagcatt aatgaccaat + 2821 gataggttag aggctatggc aagacttagg aatgaggaaa gcgaaaaaat ggcaaaagac + 2881 acctcagatg aagtgtctct taatccaact tccaaaaaat tgagtgactt gttggaagac + 2941 aacgatagtg acaatgatct atcacttgat gatttttgat cagtgatcaa ctcactcagc + 3001 aatcaacaac atcaataaaa cagacatcaa tccattgaat caactgccag accanaacan + 3061 naacaaacgt ccatcagcag aaccaccaac caatcaatca accaattgat caatcagcaa + 3121 cctaacaaaa ttaacaatat agtaacnnnn nnnnaaannn nnaannnnan nangnnnaac + 3181 aagatggggc aaatatggaa acatacgtga acaagcttca cgaaggctcc acatacacag + 3241 cagctgttca gtacaatgtt ctagaaaaag atgatgatcc tgcatcacta acaatatggg + 3301 tgcctatgtt ccagtcatct gtgccagcag acttgctcat aaaagaactt gcaagcatca + 3361 acatactagt gaagcagatc tctacgccca aaggaccttc actacgagtc acgattaact + 3421 caagaagtgc tgtgctggca caaatgccta gtaattttat cataagcgca aatgtatcat + 3481 tagatgaaag aagcaaatta gcatatgatg taactacacc ttgtgaaatc aaagcatgca + 3541 gtctaacatg cttaaaagta aaaagtatgt taactacagt caaagatctt accatgaaaa + 3601 cattcaaccc cactcatgag attattgctc tatgtgaatt tgaaaatatt atgacatcaa + 3661 aaagagtaat aataccaacc tatctaagat caattagtgt caaaaacaag gatctgaact + 3721 cactagaaaa tatagcaacc accgaattca aaaatgctat caccaatgcg aaaattattc + 3781 cttatgcagg attagtatta gttatcacag ttactgacaa taaaggagca ttcaaatata + 3841 tcaagccaca gagtcaattt atagtagatc ttggagccta cctagaaaaa gagagcatat + 3901 attatgtgac tacaaattgg aagcatacag ctacacgttt ttcaatcaaa ccactagaag + 3961 attaaactta attatcaaca ctaaatgaca ggtccacata tatcctcaaa ctacacacta + 4021 tatccaaaca tcatgaacat atacactaca cacttcatca cacaaaccaa tcccactcaa + 4081 aatccaaaat cacttccagc cactatctgc tagacctaga gtgcgaatag gtaaataaaa + 4141 ccaaaatatg gggtaaatag acattagtta gagttcaatc aatctcaaca accatttata + 4201 ctgctaattc aatacatata ctataaattt caaaatggga aatacatcca tcacaataga + 4261 attcacaagc aaattttggc cttattttac actaatacat atgatcttaa ctctaatctc + 4321 tttactaatt ataatcacta ttatgattgc aatactaaat aagctaagtg aacataaaac + 4381 attctgtaac aaaactcttg aactaggaca gatgtatcaa atcaacacat agtgttctac + 4441 cattatgctg tgtcaaatta taatcctgta tatataaaca aacaaatcta atcttctcac + 4501 agagtcatgg tggtgcaaaa ccatgccaac tatcatggta gcatagagta gttnnnnnnn + 4561 natttnnaaa aannnttaac ataatgatga attattagta tgggatcaaa aacanaaatt + 4621 ggggcaaatg caaccatgtc caaacacaag aatcaacgca ctgccaggac tctagaaaag + 4681 acctgggata ctcttaatca tctaattgta atatcctctt gtttatacag attaaattta + 4741 aaatctatag cacaaatagc actatcagtt ttggcaatga taatctcaac ctctctcata + 4801 attgcagcca taatattcat catctctgcc aatcacaaag ttacactaac aacagttaca + 4861 gttcaaacaa taaaaaacca cactgaaaaa aacatcacca cctaccttac tcaagtctca + 4921 ccagaaaggg ttagctcatc caaacaacct acaaccacat caccaatcca cacaaattca + 4981 gctacaatat caccaaatac aaaatcagaa acacaccata caacagcaca aaccaaaggc + 5041 agaatcacca cttcaacaca gaccaacaag ccaagcacaa aatcacgttc aaaaaatcca + 5101 ccaaaaaaac caaaagatga ttaccatttt gaagtgttca attttgttcc atgtagtata + 5161 tgtggcaaca atcaactttg caaatccatc tgcaaaacaa taccaagcaa caaaccaaag + 5221 aaaaaaccaa ccatcaaacc cacaaacaaa ccaaccatca aaaccacaaa caaaagagac + 5281 ccaaaaacac cagccaaaat gccgaaaaaa gaaannnnnn ccaccaccaa cccaacaaaa + 5341 aaaccaaccc tcaagaccac agaaagagac annnnnnnnn nnnnnnnnnn nnnnnnnnnn + 5401 nnnnnnnnnn nnnnnnnnnn nnnnnnnnnn nccagcacct cacaatctac tgtgctcgac + 5461 acaaccacat caaaacacac aatccaacag caatacctcc actcaaccac ctctgaaaac + 5521 acacccaact ccacacaaat acccacagca tccgagccct ccacatcaaa ttctacttaa + 5581 aaaacctagt cacatactta gttattcaaa aactacatct tagcagagaa ccgtgatcta + 5641 tcaagcaaga acgaaattaa acctggggca aataaccatg gagttgctga tccacaggtc + 5701 aagtgcaatc ttcctaactc ttgctattaa tgcattgtac ctcacctcaa gtcagaacat + 5761 aactgaggag ttttaccaat cgacatgtag tgcagttagc agaggttatt ttagtgcttt + 5821 aagaacaggt tggtatacca gtgtcataac aatagaatta agtaatataa aagaaaccaa + 5881 atgcaatgga actgacacta aagtaaaact tataaaacaa gaattagata agtataagaa + 5941 tgcagtaaca gaattacagc tacttatgca aaacacacca gctgtcaaca accgggccag + 6001 aagagaagca ccacagtata tgaactacac aatcaatacc actaaaaacc taaatgtatc + 6061 aataagcaag aagaggaaac gaagatttct gggcttcttg ttaggtgtag gatctgcaat + 6121 agcaagtggt atagctgtat ccaaagttct acaccttgaa ggagaagtga acaaaatcaa + 6181 aaatgctttg ttgtctacaa acaaagctgt agtcagtcta tcaaatgggg ttagtgtttt + 6241 aaccagcaaa gtgttagatc tcaagaatta cataaataac caattattac ccatagtaaa + 6301 taaacagagc tgtcgcatct ccaacattga aacagttata gaattccaac agaagaatag + 6361 cagattgttg gaaatcacca gagaatttag tgtcaatgca ggtgtaacaa cacctttaag + 6421 cacttacatg ttaacaaaca gtgagttact atcattgatc aatgatatgc ctataacaaa + 6481 tgatcagaaa aaattaatgt caagcaatgt tcagatagta aggcaacaaa gttattctat + 6541 catgtctata ataaaggaag aagtccttgc atatgttgta cagctaccta tctatggtgt + 6601 aatagataca cattgctgga aattacacac atcacctcta tgcaccacca acatcaaaga + 6661 aggatcaaat atttgtttaa caaggactga tagaggatgg tattgtgata atgcaggatc + 6721 agtatccttc tttccacagg ctgacacttg taaagtacag tccaatcgag tattttgtga + 6781 cactatgaac agtttgacat taccaagtga agtcagcctt tgtaacactg acatattcaa + 6841 ttccaagtat gactgcaaaa ttatgacatc aaaaacagac ataagtagct cagtaattac + 6901 ttctcttgga gctatagtgt catgttatgg taaaactaaa tgcactgcat ccaataaaaa + 6961 tcgtgggatt ataaagacat tttctaatgg ttgtgactat gtgtcaaaca aaggagtaga + 7021 tactgtgtca gtgggcaaca ctttatacta tgtaaacaag ctggaaggca agaaccttta + 7081 tgtaaaaggg gaacctataa taaattacta tgatcctcta gtgtttcctt ctgatgagtt + 7141 tgatgcatca atatctcaag tcaatgaaaa aatcaatcaa agtttagctt ttattcgtag + 7201 atctgatgaa ttactacata atgtaaatac tggcaaatct actacaaata ttatgataac + 7261 tacaattatt atagtaatca ttgtagtatt gttatcatta atagctattg gtttactgtt + 7321 gtattgcaaa gccaaaaaca caccagttac actaagcaaa gaccaactaa gtggaatcaa + 7381 taatattgca ttcagcaaat agaanaaaaa actacttgat catgtttcaa caacaatctg + 7441 ctgaccacca atcccaaatc aacttaacaa taaatatttc aacatcatag cacaggctga + 7501 atcatttcct cacatcatgc tacctacaca actaagctag atccttaact catagttaca + 7561 tnnnnaaaaa cctcaagtat cacaatcaac cactaaatca acacatcatt cacaaaatta + 7621 acaactgggg caaatatgtc gcgaagaaat ccttgtaaat ttgagattag aggtcattgc + 7681 ttgaatggta gaagatgtca ctacagtcat aattattttg aatggcctcc tcatgcatta + 7741 ctagtgaggc aaaacttcat gttaaacaag atacttaagt caatggacaa aagcatagac + 7801 actttgtcag aaataagtgg agctgctgaa ctggatagaa cagaagaata tgctcttggt + 7861 atagttggag tgctagagag ttacatagga tctataaaca acataacaaa acaatcagca + 7921 tgtgttgcta tgagtaaact tcttattgag attaatagtg atgacattaa aaaactgaga + 7981 gataatgaag aacccaattc acctaagata agagtgtaca atactgttat atcatacatt + 8041 gagagcaata gaaaaaacaa caagcaaacc atccatctgc ttaaaagact accagcagac + 8101 gtgctgaaga agacaataaa gaacacatta gatatccaca aaagcataac cataagcaac + 8161 ccaaaagagt caactgtgaa tgatcaaaat gaccaaacca aaaataatga tattaccgga + 8221 taaatatcct tgtagtatat catccatatt gatttcaagt gaaagcatgg ttgctacatt + 8281 caatcataaa aacatattac aatttaacta taaccatttg gataaccacc agtgtttatt + 8341 aaatcatata tttgatgaaa ttcattggac acctaaaaac ttattagata ccactcaaca + 8401 atttctccaa catcttaaca tccctgaaga tatatataca gtatatatat tagtgtcata + 8461 atgcttgacc ataacgattt tatatcatcc aaccataaaa ctatcataat aaggttatgg + 8521 gacaaaatgg atcccattat taatggaaac tctgctaatg tgtatctaac tgatagttat + 8581 ctaaaaggtg ttatctcttt ttcagaatgt aatgctttag ggagttacct ttttaacggc + 8641 ccttatctta aaaatgatta caccaactta attagtagac aaagcccact actagagcat + 8701 atgaatctaa aaaaactaac tataacacag tcattaatat ctagatatca taaaggtgaa + 8761 ctgaaattag aagaaccaac ttatttccag tcattactta tgacatataa aagtatgtcc + 8821 tcgtctgaac aaattgctac aactaactta cttaaaaaaa taatacgaag agctatagaa + 8881 ataagtgatg taaaggtata cgccatcttg aataaactag gactaaagga aaaggacaga + 8941 gttaagccca acaataattc aggtgatgaa aactcagtac ttacaaccat aattaaagat + 9001 gatatacttt cggctgtgga aaacaatcaa tcatatacaa attcagacaa aaatcactca + 9061 gtaaatcaaa atatcactat caaaacaaca ctcttgaaaa aattgatgtg ttcaatgcaa + 9121 catcctccat catggttaat acactggttc aatttatata caaaattaaa taacatatta + 9181 acacaatatc gatcaaatga ggtaaaaagt catgggttta tattaataga taatcaaact + 9241 ttaagtggtt ttcagtttat tttaaatcaa tatggttgta tcgtttatca taaaggactc + 9301 aaaaaaatca caactactac ttacaatcaa tttttgacat ggaaagacat cagccttagc + 9361 agattaaatg tttgcttaat tacttggata agtaattgtt taaatacatt aaataaaagc + 9421 ttagggctga gatgtggatt caataatatt gtgttatcac aattatttct ttatggagat + 9481 tgtatactga aattatttca taatgaaggc ttctacataa taaaagaagt agagggattt + 9541 attatgtctt taattctaaa cataacagaa gaagatcaat ttaggaaacg attttataat + 9601 agcatgctaa ataacataac agatgcagct attaaggctc aaaaaaacct actatcaaga + 9661 gtatgtcaca ctttattaga caagacagtg tctgataata tcataaatgg taaatggata + 9721 atcctattaa gtaaatttct taaattgatt aagcttgcag gtgataataa tctcaataac + 9781 ttgagtgagc tatattttct cttcagaatc tttggacatc caatggtcga tgaaagacaa + 9841 gcaatggatg ctgtaagaat taactgtaat gaaactaagt tctacttatt aagtagtcta + 9901 agtacgttaa gaggtgcttt tatttataga atcataaaag ggtttgtaaa tacctataac + 9961 agatggccca ctttaaggaa tgctattgtt ctacctctaa gatggttaaa ctattataaa + 10021 cttaatactt atccatctct acttgaaatc acagaaaatg atttgattat tttatcagga + 10081 ttgcggttct atcgtgagtt tcatctgcct aaaaaagtgg atcttgaaat gataataaat + 10141 gacaaagcca tttcacctcc aaaagatcta atatggacta gttttcctag aaattacatg + 10201 ccatcacata tacaaaatta tatagaacat gaaaagttga agttctctga aagcgacaga + 10261 tcaagaagag tactagagta ttacttgaga gataataaat tcaatgaatg cgatctatac + 10321 aattgtgtag tcaatcaaag ctatctcaac aactctaatc acgtggtatc actaactggt + 10381 aaagaaagag agcttagtgt aggtagaatg tttgctatgc aaccaggtat gtttaggcaa + 10441 attcaaatct tagcagagaa aatgatagcc gaaaatattt tacaattctt ccctgagagt + 10501 ttgacaagat atggtgatct agagcttcaa aagatattag aattaaaagc aggaataagc + 10561 aacaagtcaa atcgttataa tgataactac aacaattata tcagtaaatg ttctatcatt + 10621 acagatctta gcaaattcaa tcaagcattt agatatgaaa catcatgtat ctgcagtgat + 10681 gtattagatg aactgcatgg agtacaatct ctgttctctt ggttgcattt aacaatacct + 10741 cttgtcacaa taatatgtac atatagacat gcacctcctt tcataaagga tcatgttgtt + 10801 aatcttaatg aagttgatga acaaagtgga ttatacagat atcatatggg tggtattgag + 10861 ggctggtgtc aaaaactgtg gaccattgaa gctatatcat tattagatct aatatctctt + 10921 aaagggaaat tctctatcac agctctaata aatggtgata atcagtcaat tgatataagt + 10981 aaaccagtta gacttataga gggtcagact catgctcaag cagattattt gttagcatta + 11041 aatagcctta aattgctata taaagagtat gcaggcatag gccataagct taagggaaca + 11101 gagacctata tatcccgaga tatgcaattc atgagcaaaa caatccagca caatggagtg + 11161 tattatccag ccagtatcaa aaaagtcctg agagtaggtc catggataaa tacaatactt + 11221 gatgatttta aagttagttt agaatctata ggcagcttaa cacaggagtt agaatacaga + 11281 ggagaaagct tattatgcag tttaatattt aggaacattt ggttatacaa tcaaattgct + 11341 ttgcaacttc gaaatcatgc attatgtaac aataagctat atttagatat attgaaagta + 11401 ttaaaacact taaaaacttt ttttaatctt gatagtattg atatggcgtt atcattgtat + 11461 atgaatttgc ctatgctgtt tggtggtggt gatcctaatt tgttatatcg aagcttttat + 11521 aggagaactc cagacttcct tacagaagct atagtacatt cagtgtttgt gttgagctat + 11581 tatactggtc acgatttaca agataagctc caggatcttc cagatgatag actgaacaaa + 11641 ttcttgacat gtatcatcac atttgataaa aatcccaatg ccgagtttgt aacattgatg + 11701 agggatccac aggctttagg gtctgaaagg caagctaaaa ttactagtga gattaataga + 11761 ttagcagtaa cagaagtctt aagtatagct ccaaacaaaa tattttctaa aagtgcacaa + 11821 cattatacta ccactgagat tgatctaaat gatattatgc aaaatataga accaacttac + 11881 cctcatggat taagagttgt ttatgaaagt ttaccttttt ataaagcaga aaaaatagtt + 11941 aatcttatat caggaacaaa atccataact aatatacttg aaaaaacatc agcaatagat + 12001 acaactgata ttaatagggc tactgatatg atgaggaaaa atataacttt acttataagg + 12061 atacttccac tagattgtaa caaagacaaa agagagttat taagtttaga aaatcttagt + 12121 ataactgaat taagcaagta tgtaagagaa agatcttggt cattatccaa tatagtagga + 12181 gtaacatcgc caagtattat gttcacaatg gacattaaat atacaactag cactatagcc + 12241 agtggtataa ttatagaaaa atataatgtt aatagtttaa ctcgtggtga aagaggacct + 12301 actaagccat gggtaggttc atctacgcaa gagaaaaaaa caatgccagt gtacaataga + 12361 caagttttaa ccaaaaaaca aagagaccaa atagatttat tagcaaaatt agactgggta + 12421 tatgcatcca tagacaacaa agatgaattc atggaagaac tgagtactgg aacacttgga + 12481 ctgtcatatg aaaaagccaa aaaattgttt ccacaatatc taagtgtcaa ttatttacac + 12541 cgtttaacag tcagtagtag accatgtgaa ttccctgcat caataccagc ttatagaaca + 12601 acaaattatc attttgatac tagtcctatc aatcatgtat taacagaaaa gtatggagat + 12661 gaagatatcg acattgtgtt tcaaaattgc ataagttttg gtcttagcct aatgtcggtt + 12721 gtggaacaat tcacaaacat atgtcctaat agaattattc tcataccgaa gctgaatgag + 12781 atacatttga tgaaacctcc tatatttaca ggagatgttg atatcatcaa gttgaagcaa + 12841 gtgatacaaa aacagcacat gttcctacca gataaaataa gtttaaccca atatgtagaa + 12901 ttattcctaa gtaacaaagc acttaaatct ggatctcaca tcaactctaa tttaatatta + 12961 gtacataaaa tgtctgatta ttttcataat gcttatattt taagtactaa tttagctgga + 13021 cattggattc tgattattca acttatgaaa gattcaaaag gtatttttga aaaagattgg + 13081 ggagaggggt atataactga tcatatgttc attaatttga atgttttctt taatgcttat + 13141 aagacttatt tgctatgttt tcataaaggt tatggtaaag caaaattaga atgtgatatg + 13201 aacacttcag atcttctttg tgttttggag ttaatagaca gtagctactg gaaatctatg + 13261 tctaaagttt tcctagaaca aaaagtcata aaatacatag tcaatcaaga cacaagtttg + 13321 catagaataa aaggttgtca tagttttaag ttgtggtttt taaaacgcct taataatgct + 13381 aaatttaccg tatgcccttg ggttgttaac atagattatc acccaacaca catgaaagct + 13441 atattatctt acatagattt agttagaatg gggttaataa atgtagataa attaaccatn + 13501 nnnnntaaaa ataaaaacaa attcaatgat gaattttaca catcaaatct cttttatatt + 13561 agttataact tttcagacaa cactcatttg ctaacaaaac aaataagaat tgctaattca + 13621 gaattagaaa ataattataa caaactatat cacccaaccc cagaaacttt agaaaatatg + 13681 tcattaattc ctgttaaaag taataatagt aacaaaccta aattttgtat aagtggaaat + 13741 accgaatcta tgatgacgtc aacattctct aataaaatgc atattaaatc ttccactgtt + 13801 accacaagat tcaattatag caaacaagac ttgtacaatt tatttccaat tgttgtgata + 13861 gacaggatta tagatcattc aggtaataca gcaaaatcta accaacttta cactaccact + 13921 tcacatcaga catctttagt aaggaatagt gcatcacttt attgcatgct tccttggcat + 13981 catgtcaata gatttaactt tgtatttagt tccacaggat gcaagattag tatagagtat + 14041 attttaaaag atcttaagat taaggacccc agttgtatag cattcatagg tgaaggagct + 14101 ggtaacttat tattacgtac ggtagtagaa cttcatccag acataagata catttacaga + 14161 agtttaaaag attgcaatga tcatagttta cctattgaat ttctaaggtt atacaacggg + 14221 catataaaca tagattatgg tgagaattta accattcctg ctacagatgc aactaataac + 14281 attcattggt cttatttaca tataaaattt gcagaaccta ttagcatctt tgtctgcgat + 14341 gctgaattac ctgttacagc caattggagt aaaattataa ttgaatggag taagcatgta + 14401 agaaagtgta agtactgttc ttctgtaaat agatgcattt taattgcaaa atatcatgct + 14461 caagatgata ttgatttcaa attagataac attactatat taaaaactta cgtgtgccta + 14521 ggtagcaagt taaaaggatc tgaagtttac ttagtcctta caataggccc tgcaaatata + 14581 cttcctgttt ttaatgttgt gcaaaatgct aaattgattc tttcaagaac taaaaatttc + 14641 attatgccta aaaaaactga caaagaatct atcgatgcaa atattaaaag cttaatacct + 14701 ttcctttgtt accctataac aaaaaaagga attaagactt cattgtcaaa attgaagagt + 14761 gtagttaatg gagatatatt atcatattct atagctggac gtaatgaagt attcagcaac + 14821 aagcttataa accacaagca tatgaatatc ctaaaatggc tagatcatgt tttaaacttt + 14881 agatcagctg aacttaatta caatcattta tatatgatag agtccacata tccttactta + 14941 agtgaattgt taaatagttt aacaaccaat gagctcaaga agctgattaa aataacaggt + 15001 agtgtactat acaaccttcc caacgaacag taa +// diff --git a/rsv/profiles/pango/amino-acid-genotypes.xlsx b/rsv/profiles/pango/amino-acid-genotypes.xlsx new file mode 100644 index 0000000..cb843e8 Binary files /dev/null and b/rsv/profiles/pango/amino-acid-genotypes.xlsx differ diff --git a/rsv/scripts/excel-to-clades.py b/rsv/scripts/excel-to-clades.py new file mode 100644 index 0000000..b821a26 --- /dev/null +++ b/rsv/scripts/excel-to-clades.py @@ -0,0 +1,33 @@ +from typing import Annotated + +import polars as pl +import typer + + +def main( + excel: Annotated[str, typer.Option(help="Excel input")] = "/Users/corneliusromer/code/nextclade_data_workflows/rsv/profiles/pango/amino-acid-genotypes.xlsx", + sheet: Annotated[str, typer.Option(help="Sheet name")] = "AA_RSVA", + outfile: Annotated[str, typer.Option(help="Clades.tsv output")] = "/Users/corneliusromer/code/nextclade_data_workflows/rsv/data/a/EPI_ISL_412866/clades_pango_raw.tsv", +): + df = pl.read_excel(excel, sheet_name=sheet) + df = df.with_columns([ + pl.col("Signature Mutations").str.split(",").alias("mutations"), + ]) + + # Output into clades.tsv + with open(outfile, "w") as f: + f.write("clade\tgene\tsite\talt\n") + for row in df.rows(named=True): + if row["mutations"] is None: + continue + print(row) + for mut in row["mutations"]: + gene, rest = mut.split(":") + site = rest[1:-1] + alt = rest[-1] + if gene == "G": + continue + f.write(f"{row['RSV genotype']}\t{gene}\t{site}\t{alt}\n") + +if __name__ == "__main__": + typer.run(main) \ No newline at end of file diff --git a/sars-cov-2/profiles/clades/lineage_overwrite.tsv b/sars-cov-2/profiles/clades/lineage_overwrite.tsv index 9f69357..ecb74f0 100644 --- a/sars-cov-2/profiles/clades/lineage_overwrite.tsv +++ b/sars-cov-2/profiles/clades/lineage_overwrite.tsv @@ -1,5 +1,7 @@ lineage pos char +DV.7.1 22927 T + B.1.411 1519 T B.1.411 14774 G B.1.411 15438 T