From ba255464dae260d8133ab5a33dff8a9e6228aa48 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Wed, 30 Jul 2025 11:36:00 -0700 Subject: [PATCH 1/9] refactor: begin --- datasets/hiv/.gitignore | 3 +- datasets/hiv/Scripts/Data_Prep.py | 31 --------- datasets/hiv/Scripts/Kegg_Orthology.py | 64 ------------------- datasets/hiv/Scripts/SPRAS_Formatting.py | 28 -------- datasets/hiv/Snakefile | 8 +-- datasets/hiv/scripts/kegg_orthology.py | 61 ++++++++++++++++++ .../name_mapping.py} | 32 +++------- datasets/hiv/scripts/prepare.py | 33 ++++++++++ datasets/hiv/scripts/spras_formatting.py | 31 +++++++++ 9 files changed, 141 insertions(+), 150 deletions(-) delete mode 100644 datasets/hiv/Scripts/Data_Prep.py delete mode 100644 datasets/hiv/Scripts/Kegg_Orthology.py delete mode 100644 datasets/hiv/Scripts/SPRAS_Formatting.py create mode 100644 datasets/hiv/scripts/kegg_orthology.py rename datasets/hiv/{Scripts/Name_Mapping.py => scripts/name_mapping.py} (89%) create mode 100644 datasets/hiv/scripts/prepare.py create mode 100644 datasets/hiv/scripts/spras_formatting.py diff --git a/datasets/hiv/.gitignore b/datasets/hiv/.gitignore index e6a1bb2..390e292 100644 --- a/datasets/hiv/.gitignore +++ b/datasets/hiv/.gitignore @@ -1 +1,2 @@ -processed \ No newline at end of file +/processed +/Pickles diff --git a/datasets/hiv/Scripts/Data_Prep.py b/datasets/hiv/Scripts/Data_Prep.py deleted file mode 100644 index 1aca432..0000000 --- a/datasets/hiv/Scripts/Data_Prep.py +++ /dev/null @@ -1,31 +0,0 @@ -import pandas -import pickle -import os - -prize_05 = pandas.read_csv('raw/prize_05.csv', sep='\t', lineterminator='\n') -prize_060 = pandas.read_csv('raw/prize_060.csv', sep='\t', lineterminator='\n') - -prize_05['Uniprot'] = prize_05['Uniprot'].str.split('-', expand=False).str[0] -prize_05 = prize_05.sort_values('Prize', - ascending=False).drop_duplicates('Uniprot').sort_index() - -prize_060['Uniprot'] = prize_060['Uniprot'].str.split('-', expand=False).str[0] -prize_060 = prize_060.sort_values('Prize', - ascending=False).drop_duplicates('Uniprot').sort_index() - -prize_060_nodes = prize_060['Uniprot'].tolist() -prize_05_nodes = prize_05['Uniprot'].tolist() - -nodeset = list(set(prize_05_nodes+prize_060_nodes)) - -df = { - "NodeIDs": nodeset, - "prize_05": prize_05, - "prize_060": prize_060 -} - -if not os.path.exists('./Pickles'): - os.makedirs('./Pickles') - -with open("Pickles/NodeIDs.pkl","wb") as file: - pickle.dump(df,file) diff --git a/datasets/hiv/Scripts/Kegg_Orthology.py b/datasets/hiv/Scripts/Kegg_Orthology.py deleted file mode 100644 index 9f1de1c..0000000 --- a/datasets/hiv/Scripts/Kegg_Orthology.py +++ /dev/null @@ -1,64 +0,0 @@ -from Bio.KEGG.KGML.KGML_parser import read -from bioservices import UniProt, KEGG -import pandas as pd -from more_itertools import chunked - -pathway = read(open("Raw_Data/ko03250.xml", 'r')) - -#Read in Kegg pathway data and keep only orthologs -entries_data = [] -for entry in pathway.entries.values(): - if entry.type == 'ortholog': - entries_data.append({ - 'name': entry.name - }) -entries_df = pd.DataFrame(entries_data) - -#Some orthologs have multiple ko codes in the same row -#The following two lines move all ko codes to individual rows -orthology_ids = entries_df['name'].str.split(' ').explode() -orthology_ids = orthology_ids.apply(lambda x: x.split(':')[1]).tolist() - -#Using bioservices KEGG class to map ortholog(ko) codes to human(hsa) codes -k = KEGG() -ko_hsa_map = k.link('hsa', '+'.join(orthology_ids)) -ko_hsa_dict = {x.split('\t')[0].split(':')[1]: x.split('\t')[1] for x in ko_hsa_map.split('\n')[:-1]} -ko_hsa_df = pd.DataFrame(ko_hsa_dict.items(),columns= ['KEGG_Orthology','HSA']) - -#Kegg .get is limited to 10 entries per call -#The following code chunks the hsa list into sets of 10 -#then calls the .get function on each which returns kegg api data in string format -hsa_chunked = list(chunked(ko_hsa_df['HSA'].tolist(),10)) -raw_uniprot = [] -for entry in hsa_chunked: - raw_uniprot.append(k.get('+'.join(entry)).split('\n///\n\n')) - -#Raw Kegg api data is filtered to obtain hsa and uniprot codes for each protein -#Note: Although bioservices .link and .conv return cleaner outputs, they do not support -#one to many relationships at this time. -#Note: bioservices also supplies a parser method for the kegg api but it is also broken at this time. -processed_uniprot = [] -for chunk in raw_uniprot: - for item in chunk: - item = item.split('\n') - processed_uniprot.append([(x.strip().split(' ')[1:],'hsa:'+(item[0].split(' '*7)[1])) - for x in item if 'UniProt' in x][0]) - -#Creates a dictionary where uniprot ids are keys and hsa ids are values -hsa_uniprot_dict = {} -for item in processed_uniprot : - for entry in item[0]: - hsa_uniprot_dict.update({'up:'+entry:item[1]}) - -#Creates a dataframe with uniprot and hsa values then merges with ko-hsa dataframe by hsa -hsa_uniprot_map = pd.DataFrame.from_dict(hsa_uniprot_dict.items()) -hsa_uniprot_map.columns = ['Uniprot','HSA'] -final_df = ko_hsa_df.merge(hsa_uniprot_map,on = 'HSA') -uniprotIDs = final_df['Uniprot'].apply(lambda x: x.split(':')[1]).tolist() - -#Filters the combined dataframe to include only rows where the uniprot code is in swissprot -u = UniProt() -tst = u.mapping(fr='UniProtKB', to='UniProtKB-Swiss-Prot',query = ','.join(uniprotIDs)) -failed_uniprot = pd.Series(list(set(tst['failedIds']))).apply(lambda x: 'up:'+x) - -final_df = final_df[~final_df['Uniprot'].isin(failed_uniprot)] diff --git a/datasets/hiv/Scripts/SPRAS_Formatting.py b/datasets/hiv/Scripts/SPRAS_Formatting.py deleted file mode 100644 index 837f52b..0000000 --- a/datasets/hiv/Scripts/SPRAS_Formatting.py +++ /dev/null @@ -1,28 +0,0 @@ -import pickle -from pathlib import Path -import os - -current_directory = Path(os.path.dirname(os.path.realpath(__file__))) -PROCESSED_DIR = current_directory.parent / 'processed' - -with open('Pickles/UniprotIDs.pkl', 'rb') as file: - UniprotIDs = pickle.load(file) - -UIDs = UniprotIDs["UniprotIDs"] -UMap= UniprotIDs["UniprotMap"] - -with open('Pickles/NodeIDs.pkl','rb') as file2: - prizes = pickle.load(file2) - -prize_05 = prizes["prize_05"] -prize_060 = prizes["prize_060"] - -prize_05['Uniprot'] = prize_05['Uniprot'].apply(lambda x: UMap.get(x)) -prize_060['Uniprot'] = prize_060['Uniprot'].apply(lambda x: UMap.get(x)) - -prize_05.columns = ['NODEID','prize'] -prize_060.columns = ['NODEID','prize'] - - -prize_05.to_csv(PROCESSED_DIR / 'processed_prize_05.txt', sep='\t', header=True, index=False) -prize_060.to_csv(PROCESSED_DIR / 'processed_prize_060.txt', sep='\t', header=True, index=False) diff --git a/datasets/hiv/Snakefile b/datasets/hiv/Snakefile index 14c1d6b..a09afb9 100644 --- a/datasets/hiv/Snakefile +++ b/datasets/hiv/Snakefile @@ -11,7 +11,7 @@ rule data_prep: output: "Pickles/NodeIDs.pkl" shell: - "uv run Scripts/Data_Prep.py" + "uv run scripts/prepare.py" rule name_mapping: input: @@ -19,7 +19,7 @@ rule name_mapping: output: "Pickles/UniprotIDs.pkl" shell: - "uv run Scripts/Name_Mapping.py" + "uv run scripts/name_mapping.py" rule spras_formatting: input: @@ -29,7 +29,7 @@ rule spras_formatting: "processed/processed_prize_05.txt", "processed/processed_prize_060.txt" shell: - "uv run Scripts/SPRAS_Formatting.py" + "uv run scripts/spras_formatting.py" rule copy_network: input: @@ -37,4 +37,4 @@ rule copy_network: output: "processed/phosphosite-irefindex13.0-uniprot.txt" shell: - "cp raw/phosphosite-irefindex13.0-uniprot.txt processed/phosphosite-irefindex13.0-uniprot.txt" \ No newline at end of file + "cp raw/phosphosite-irefindex13.0-uniprot.txt processed/phosphosite-irefindex13.0-uniprot.txt" diff --git a/datasets/hiv/scripts/kegg_orthology.py b/datasets/hiv/scripts/kegg_orthology.py new file mode 100644 index 0000000..647ffec --- /dev/null +++ b/datasets/hiv/scripts/kegg_orthology.py @@ -0,0 +1,61 @@ +from Bio.KEGG.KGML.KGML_parser import read +from bioservices import UniProt, KEGG +import pandas as pd +from more_itertools import chunked + +pathway = read(open("Raw_Data/ko03250.xml", "r")) + +# Read in Kegg pathway data and keep only orthologs +entries_data = [] +for entry in pathway.entries.values(): + if entry.type == "ortholog": + entries_data.append({"name": entry.name}) +entries_df = pd.DataFrame(entries_data) + +# Some orthologs have multiple ko codes in the same row +# The following two lines move all ko codes to individual rows +orthology_ids = entries_df["name"].str.split(" ").explode() +orthology_ids = orthology_ids.apply(lambda x: x.split(":")[1]).tolist() + +# Using bioservices KEGG class to map ortholog(ko) codes to human(hsa) codes +k = KEGG() +ko_hsa_map = k.link("hsa", "+".join(orthology_ids)) +ko_hsa_dict = {x.split("\t")[0].split(":")[1]: x.split("\t")[1] for x in ko_hsa_map.split("\n")[:-1]} +ko_hsa_df = pd.DataFrame(ko_hsa_dict.items(), columns=["KEGG_Orthology", "HSA"]) + +# Kegg .get is limited to 10 entries per call +# The following code chunks the hsa list into sets of 10 +# then calls the .get function on each which returns kegg api data in string format +hsa_chunked = list(chunked(ko_hsa_df["HSA"].tolist(), 10)) +raw_uniprot = [] +for entry in hsa_chunked: + raw_uniprot.append(k.get("+".join(entry)).split("\n///\n\n")) + +# Raw Kegg api data is filtered to obtain hsa and uniprot codes for each protein +# Note: Although bioservices .link and .conv return cleaner outputs, they do not support +# one to many relationships at this time. +# Note: bioservices also supplies a parser method for the kegg api but it is also broken at this time. +processed_uniprot = [] +for chunk in raw_uniprot: + for item in chunk: + item = item.split("\n") + processed_uniprot.append([(x.strip().split(" ")[1:], "hsa:" + (item[0].split(" " * 7)[1])) for x in item if "UniProt" in x][0]) + +# Creates a dictionary where uniprot ids are keys and hsa ids are values +hsa_uniprot_dict = {} +for item in processed_uniprot: + for entry in item[0]: + hsa_uniprot_dict.update({"up:" + entry: item[1]}) + +# Creates a dataframe with uniprot and hsa values then merges with ko-hsa dataframe by hsa +hsa_uniprot_map = pd.DataFrame.from_dict(hsa_uniprot_dict.items()) +hsa_uniprot_map.columns = ["Uniprot", "HSA"] +final_df = ko_hsa_df.merge(hsa_uniprot_map, on="HSA") +uniprotIDs = final_df["Uniprot"].apply(lambda x: x.split(":")[1]).tolist() + +# Filters the combined dataframe to include only rows where the uniprot code is in swissprot +u = UniProt() +tst = u.mapping(fr="UniProtKB", to="UniProtKB-Swiss-Prot", query=",".join(uniprotIDs)) +failed_uniprot = pd.Series(list(set(tst["failedIds"]))).apply(lambda x: "up:" + x) + +final_df = final_df[~final_df["Uniprot"].isin(failed_uniprot)] diff --git a/datasets/hiv/Scripts/Name_Mapping.py b/datasets/hiv/scripts/name_mapping.py similarity index 89% rename from datasets/hiv/Scripts/Name_Mapping.py rename to datasets/hiv/scripts/name_mapping.py index 9cace88..6fb6e6a 100644 --- a/datasets/hiv/Scripts/Name_Mapping.py +++ b/datasets/hiv/scripts/name_mapping.py @@ -17,36 +17,28 @@ def main(): - - with open('Pickles/NodeIDs.pkl', 'rb') as file: + with open("Pickles/NodeIDs.pkl", "rb") as file: NodeIDs = pickle.load(file)["NodeIDs"] - job_id = submit_id_mapping( - from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids= NodeIDs - ) + job_id = submit_id_mapping(from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids=NodeIDs) if check_id_mapping_results_ready(job_id): link = get_id_mapping_results_link(job_id) uniprot_results = get_id_mapping_results_search(link) uniprot_IDs = [] uniprot_map = {} - for i in uniprot_results.get('results'): + for i in uniprot_results.get("results"): uniprot_IDs.append((i.get("to").get("uniProtkbId"))) - uniprot_map.update({i.get("from"):i.get("to").get("uniProtkbId")}) + uniprot_map.update({i.get("from"): i.get("to").get("uniProtkbId")}) - df ={ - "UniprotIDs": uniprot_IDs, - "UniprotMap": uniprot_map - } + df = {"UniprotIDs": uniprot_IDs, "UniprotMap": uniprot_map} - with open("Pickles/UniprotIDs.pkl","wb") as file: - pickle.dump(df,file) + with open("Pickles/UniprotIDs.pkl", "wb") as file: + pickle.dump(df, file) return - - def check_response(response): try: response.raise_for_status() @@ -169,9 +161,7 @@ def get_id_mapping_results_search(url): else: size = 500 query["size"] = size - compressed = ( - query["compressed"][0].lower() == "true" if "compressed" in query else False - ) + compressed = query["compressed"][0].lower() == "true" if "compressed" in query else False parsed = parsed._replace(query=urlencode(query, doseq=True)) url = parsed.geturl() request = session.get(url) @@ -195,11 +185,9 @@ def get_id_mapping_results_stream(url): parsed = urlparse(url) query = parse_qs(parsed.query) file_format = query["format"][0] if "format" in query else "json" - compressed = ( - query["compressed"][0].lower() == "true" if "compressed" in query else False - ) + compressed = query["compressed"][0].lower() == "true" if "compressed" in query else False return decode_results(request, file_format, compressed) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/datasets/hiv/scripts/prepare.py b/datasets/hiv/scripts/prepare.py new file mode 100644 index 0000000..de57b39 --- /dev/null +++ b/datasets/hiv/scripts/prepare.py @@ -0,0 +1,33 @@ +import pandas +from pathlib import Path +import pickle +import os + +# https://stackoverflow.com/a/5137509/7589775 +hiv_path = Path(os.path.dirname(os.path.realpath(__file__)), '..') + +def main(): + prize_05 = pandas.read_csv(hiv_path / "raw" / "prize_05.csv", sep="\t", lineterminator="\n") + prize_060 = pandas.read_csv(hiv_path / "raw" / "prize_060.csv", sep="\t", lineterminator="\n") + + prize_05["Uniprot"] = prize_05["Uniprot"].str.split("-", expand=False).str[0] + prize_05 = prize_05.sort_values("Prize", ascending=False).drop_duplicates("Uniprot").sort_index() + + prize_060["Uniprot"] = prize_060["Uniprot"].str.split("-", expand=False).str[0] + prize_060 = prize_060.sort_values("Prize", ascending=False).drop_duplicates("Uniprot").sort_index() + + prize_060_nodes = prize_060["Uniprot"].tolist() + prize_05_nodes = prize_05["Uniprot"].tolist() + + nodeset = list(set(prize_05_nodes + prize_060_nodes)) + + df = {"NodeIDs": nodeset, "prize_05": prize_05, "prize_060": prize_060} + + (hiv_path / "Pickles").mkdir(exist_ok=True) + + with open(hiv_path / "Pickles" / "NodeIDs.pkl", "wb") as file: + pickle.dump(df, file) + +if __name__ == '__main__': + main() + diff --git a/datasets/hiv/scripts/spras_formatting.py b/datasets/hiv/scripts/spras_formatting.py new file mode 100644 index 0000000..82c0d0d --- /dev/null +++ b/datasets/hiv/scripts/spras_formatting.py @@ -0,0 +1,31 @@ +import pickle +from pathlib import Path +import os + +current_directory = Path(os.path.dirname(os.path.realpath(__file__))) +processed_directory = current_directory.parent / "processed" + +def main(): + with open("Pickles/UniprotIDs.pkl", "rb") as file: + UniprotIDs = pickle.load(file) + + UIDs = UniprotIDs["UniprotIDs"] + UMap = UniprotIDs["UniprotMap"] + + with open("Pickles/NodeIDs.pkl", "rb") as file2: + prizes = pickle.load(file2) + + prize_05 = prizes["prize_05"] + prize_060 = prizes["prize_060"] + + prize_05["Uniprot"] = prize_05["Uniprot"].apply(lambda x: UMap.get(x)) + prize_060["Uniprot"] = prize_060["Uniprot"].apply(lambda x: UMap.get(x)) + + prize_05.columns = ["NODEID", "prize"] + prize_060.columns = ["NODEID", "prize"] + + prize_05.to_csv(processed_directory / "processed_prize_05.txt", sep="\t", header=True, index=False) + prize_060.to_csv(processed_directory / "processed_prize_060.txt", sep="\t", header=True, index=False) + +if __name__ == '__main__': + main() From c80c56145aa7336a64b6ac22058a7ee9aa131202 Mon Sep 17 00:00:00 2001 From: Aden O'Brien Date: Wed, 30 Jul 2025 12:44:05 -0700 Subject: [PATCH 2/9] updated documentation --- datasets/hiv/README.md | 15 + datasets/hiv/Snakefile | 12 +- datasets/hiv/raw/.gitignore | 3 + datasets/hiv/raw/README.md | 10 + datasets/hiv/raw/prize_05.csv | 1127 -------------- datasets/hiv/raw/prize_060.csv | 1789 ---------------------- datasets/hiv/scripts/README.md | 89 ++ datasets/hiv/scripts/fetch.py | 36 + datasets/hiv/scripts/kegg_orthology.py | 121 +- datasets/hiv/scripts/name_mapping.py | 8 + datasets/hiv/scripts/prepare.py | 15 +- datasets/hiv/scripts/spras_formatting.py | 5 +- 12 files changed, 248 insertions(+), 2982 deletions(-) create mode 100644 datasets/hiv/README.md create mode 100644 datasets/hiv/raw/.gitignore create mode 100644 datasets/hiv/raw/README.md delete mode 100644 datasets/hiv/raw/prize_05.csv delete mode 100644 datasets/hiv/raw/prize_060.csv create mode 100644 datasets/hiv/scripts/README.md create mode 100644 datasets/hiv/scripts/fetch.py diff --git a/datasets/hiv/README.md b/datasets/hiv/README.md new file mode 100644 index 0000000..7c9ffc5 --- /dev/null +++ b/datasets/hiv/README.md @@ -0,0 +1,15 @@ +# HIV dataset + +## Raw files + +See `raw/README.md`. + +## File organization + +See `Snakefile` for the way that all of the IO files are connected. + +1. `fetch.py` - This grabs the score files from https://doi.org/10.1371/journal.ppat.1011492 - see `fetch.py` for more info. +1. `prepare.py` - This cleans up the prize files in `raw`; specifically to remove duplicates. +1. `name_mapping.py` - Converts from UniProt KB-ACID to UniProt KB to meet in the middle with `kegg_ortholog.py`. We chose UniProt KB for its generality. +1. `spras_formatting.py` - Formats the input files into a SPRAS-ready format. +1. `kegg_orthology.py` - This is used to generate the KEGG ortholog file for gold standards, but this has yet to be finalized. diff --git a/datasets/hiv/Snakefile b/datasets/hiv/Snakefile index a09afb9..7fd43aa 100644 --- a/datasets/hiv/Snakefile +++ b/datasets/hiv/Snakefile @@ -4,10 +4,18 @@ rule all: "processed/processed_prize_060.txt", "processed/phosphosite-irefindex13.0-uniprot.txt" +rule fetch: + output: + "raw/prize_05.tsv" + "raw/prize_060.tsv" + "raw/ko03250.xml" + shell: + "uv run scripts/fetch.py" + rule data_prep: input: - "raw/prize_05.csv", - "raw/prize_060.csv" + "raw/prize_05.tsv", + "raw/prize_060.tsv" output: "Pickles/NodeIDs.pkl" shell: diff --git a/datasets/hiv/raw/.gitignore b/datasets/hiv/raw/.gitignore new file mode 100644 index 0000000..1bf7803 --- /dev/null +++ b/datasets/hiv/raw/.gitignore @@ -0,0 +1,3 @@ +prizes_05.tsv +prizes_060.tsv +ko03250.xml diff --git a/datasets/hiv/raw/README.md b/datasets/hiv/raw/README.md new file mode 100644 index 0000000..e6b74a9 --- /dev/null +++ b/datasets/hiv/raw/README.md @@ -0,0 +1,10 @@ +# raw + +Some `raw` files are fetched from `../scripts/fetch.py`. + +The `phosphosite-irefindex13.0-uniprot.txt` is +a background interactome provided by SPRAS: https://github.com/Reed-CompBio/spras/blob/be8bc7f8d71880d7ce9c9ceeeddfefa6eb60c522/input/phosphosite-irefindex13.0-uniprot.txt. + +The `ko03250.xml` is from `https://www.kegg.jp/entry/ko03250`. Specifically, if you click on the pathway image in the entry, +you'll get to https://www.kegg.jp/pathway/ko03250, where you download the KGML file (which is formatted as a `.xml` file) +under `Download` -> `KGML`. (The final file is at https://www.kegg.jp/kegg-bin/download?entry=ko03250&format=kgml). diff --git a/datasets/hiv/raw/prize_05.csv b/datasets/hiv/raw/prize_05.csv deleted file mode 100644 index b0bc193..0000000 --- a/datasets/hiv/raw/prize_05.csv +++ /dev/null @@ -1,1127 +0,0 @@ -Uniprot Prize -B0YIW2 5.029365599650682 -Q99081 4.582688156260696 -P02765 4.027455886706566 -P69905 3.9558170656097413 -P06732 3.885427876677981 -B7ZKJ8 3.885427876677981 -P62805 3.885427876677981 -P23142 3.885427876677981 -H3BLZ8 3.7376592766836954 -Q9BU76 3.6456113515037165 -Q2KHT3 3.6456113515037165 -P02768 3.495543593199523 -Q8NBM4 3.4400540448812222 -Q5TEC6 3.4400540448812222 -Q92576 3.324208299637505 -Q9Y2R4 3.324208299637505 -P68363-2 3.324208299637505 -P61978-2 3.3188421301096285 -Q5T5H1 3.3188421301096285 -O75152 3.3188421301096285 -P11171 3.3010716433184752 -Q6PKG0 3.3010716433184752 -Q49A26 3.2919252118261477 -Q5T200 3.2919252118261477 -P02787 3.2382104940279395 -A0A0A0MTS7 3.238051370574229 -P49207 3.1995871637886015 -Q8IX15-3 3.1604645579223023 -P01023 3.1604645579223023 -P19823 3.1443051364121026 -P00747 3.1395052780156214 -Q9Y6V0-5 3.1395052780156214 -P02788 3.1395052780156214 -P49790-3 3.1383736075715567 -P31942 3.1358609655646563 -P16403 3.096027032739838 -A0A087WVP1 3.0343829212843927 -Q16718-2 3.029621522802649 -P20742 3.029621522802649 -Q13049 3.029008185470572 -P19827 3.029008185470572 -P05452 3.029008185470572 -Q15063 3.029008185470572 -P26599-3 2.962566624533064 -P36955 2.962566624533064 -A0A0G2JQJ7 2.962566624533064 -P10643 2.962566624533064 -P01024 2.962566624533064 -P08697 2.962566624533064 -P01008 2.9454736787752185 -P05543 2.938925530905134 -O75494 2.829700827078314 -Q9UIQ6 2.829700827078314 -Q01130 2.829700827078314 -Q86V48 2.829700827078314 -Q9Y5B9 2.829700827078314 -Q99490-2 2.829700827078314 -A0A087X0X3 2.795249183187452 -C9JV77 2.7916197949376103 -P16401 2.747877665335442 -A8MXP9 2.7306756910406835 -Q9Y2W2 2.7306756910406835 -Q86YV0 2.7306756910406835 -E9PCW1 2.7306756910406835 -P02647 2.720750771925425 -A9Z1X7 2.7206557198521564 -O95297-3 2.7206557198521564 -P18031 2.7206557198521564 -Q14152 2.7053973652732433 -P02748 2.6422702227204438 -P18858 2.62905096626394 -A0A087WX41 2.62527679827281 -Q7L4I2 2.62527679827281 -Q9Y5V3-2 2.6037488593668185 -K4DI81 2.5854596173332802 -P35443 2.5781570496516366 -Q6DN12 2.5739996934436533 -Q9UQ35 2.5599794882543963 -Q9H814 2.55229335257594 -Q9H6F5 2.5433868130220105 -P52657 2.5153505347187393 -Q96PK6 2.5123326043136593 -Q8TAD8 2.5123326043136593 -P46783 2.4894305938870938 -Q9Y520-7 2.4894305938870938 -Q9UK76 2.48882925856076 -A0A075B738 2.481386971939166 -Q9NX58 2.481386971939166 -Q8IZP0 2.481386971939166 -Q93100-3 2.481386971939166 -A0A0G2JPR0 2.460765792624727 -M0QYC1 2.432755782368094 -O75151 2.415789642322741 -P48634 2.4140889675670696 -P51957 2.4140889675670696 -Q00403 2.4140889675670696 -Q8NCD3 2.4140889675670696 -P16949 2.4140889675670696 -P06396-2 2.4104977645875323 -Q7Z5J4 2.407172399141249 -O95801 2.380388595749011 -Q9NYF3 2.349945242744137 -H0Y5F5 2.3235758982106964 -P04350 2.323008637528439 -Q92619 2.320724815309965 -P16150 2.3002413241447113 -Q5JTH9 2.296341778114889 -P02774-3 2.287877464838444 -Q8WWI1 2.283650895240041 -Q96BH1 2.283650895240041 -Q99622 2.281967707725145 -Q15477 2.281967707725145 -P20648 2.281967707725145 -Q9BQ70 2.281967707725145 -Q9NX00 2.2816943670374368 -Q9UKV3-5 2.2724349590916813 -C9JYS8 2.2724349590916813 -P07996 2.2656102638472038 -Q15154 2.2486198914855287 -Q7Z460 2.24046014360604 -A0A0C4DGB9 2.236380744008414 -P62995 2.2320502169087955 -Q9Y5Z4 2.2211603377352622 -Q53GS7 2.2142665101729353 -Q13619 2.2123687456514425 -Q8NDT2 2.208371655746705 -Q9UPP1 2.20252042150617 -P46013 2.198038970223691 -H0Y449 2.1959846069805935 -Q9BSK4 2.1906023175925395 -Q92614-2 2.175819836002309 -P16520 2.175819836002309 -Q92804 2.153112950890257 -Q9NVG8 2.1478379363448883 -O75533 2.1439001320846174 -P31751 2.1439001320846174 -Q9NWH9 2.1439001320846174 -Q9NW64 2.1439001320846174 -P17544-6 2.1439001320846174 -Q92747 2.1439001320846174 -O95169 2.114945948576723 -Q9P0U3 2.114945948576723 -Q9Y5K6 2.1123920250904837 -P50750-2 2.105059488765234 -Q9ULF5 2.101534906059656 -O95644 2.101534906059656 -P06400 2.100687896308613 -B7WPE2 2.100687896308613 -Q8ND56-2 2.100687896308613 -P15336 2.100160602610354 -P10412 2.096770743840173 -P22694-2 2.0930040732608277 -P18615 2.0930040732608277 -P19338 2.0930040732608277 -Q8N6H7 2.0930040732608277 -O76021 2.0930040732608277 -Q16666 2.0930040732608277 -Q9ULU4-19 2.0930040732608277 -Q8NFC6 2.0930040732608277 -Q9UPR3 2.0930040732608277 -P18507-3 2.0930040732608277 -O94782 2.0930040732608277 -Q08AE8 2.0930040732608277 -Q6UUV9-2 2.0930040732608277 -Q9UJU6 2.0930040732608277 -O14578-4 2.0930040732608277 -P17844 2.0930040732608277 -Q09161 2.0930040732608277 -O00472 2.0930040732608277 -Q08117-2 2.0930040732608277 -P22626 2.0930040732608277 -Q01518 2.0930040732608277 -P06748 2.0930040732608277 -O43148-2 2.0930040732608277 -E9PGC8 2.0930040732608277 -Q96IZ7 2.0930040732608277 -Q15555 2.0930040732608277 -Q99879 2.0930040732608277 -Q9C0C2 2.0930040732608277 -Q9Y2W1 2.0930040732608277 -J3QR29 2.0930040732608277 -Q13769 2.0930040732608277 -A0A096LP69 2.0930040732608277 -Q14978-2 2.0930040732608277 -O60563 2.0930040732608277 -A0A087X188 2.0930040732608277 -A0A075B6G3 2.0930040732608277 -P38159 2.0930040732608277 -P23588-2 2.0930040732608277 -Q13151 2.0930040732608277 -P35269 2.0930040732608277 -Q9H4A3-7 2.0930040732608277 -O94964-2 2.0930040732608277 -Q08170 2.0930040732608277 -Q14669-3 2.0930040732608277 -Q15036 2.0930040732608277 -O94913 2.0930040732608277 -Q9Y6G9 2.0930040732608277 -Q15906-2 2.0930040732608277 -Q9H000 2.0930040732608277 -Q6ZUT1-2 2.0930040732608277 -Q15334 2.0930040732608277 -P62753 2.0930040732608277 -Q12872-2 2.0930040732608277 -O75385 2.0930040732608277 -E9PN89 2.0930040732608277 -Q92797 2.0930040732608277 -P62979 2.0930040732608277 -Q92733 2.0930040732608277 -Q96F86 2.0930040732608277 -Q9HB90 2.0930040732608277 -Q13428-8 2.0930040732608277 -F8W7T1 2.0930040732608277 -Q9ULR3 2.0930040732608277 -Q86Y91-4 2.0930040732608277 -Q5VT06 2.091231409713808 -Q8NG31 2.091231409713808 -Q8WXE0 2.091231409713808 -Q9UPU5 2.091231409713808 -O75182 2.091231409713808 -Q15758 2.091231409713808 -Q86U86 2.091231409713808 -Q9NWB6 2.091231409713808 -Q53HC0 2.091231409713808 -E7EQT4 2.091231409713808 -Q9BRD0 2.091231409713808 -Q8N3X1-2 2.091231409713808 -P61956 2.091231409713808 -Q99729-3 2.091231409713808 -O60307 2.091231409713808 -O94874-2 2.091231409713808 -P36915 2.0892707200089293 -Q9HBD4 2.0892707200089293 -Q9BWJ5 2.0844344915355792 -Q92854 2.0844344915355792 -Q92688 2.0813998527817237 -O95104 2.080073763404992 -Q8TAQ2 2.0770885338720166 -Q14980 2.0741363450505035 -Q9NPD8 2.0741363450505035 -E7EVA0 2.0741363450505035 -H9KVB4 2.0741363450505035 -E9PG73 2.0741363450505035 -Q5W0B1 2.0741363450505035 -Q9HCD5 2.0741363450505035 -P39880-3 2.0741363450505035 -P42345 2.0741363450505035 -Q9ULW0 2.0741363450505035 -P35612 2.0741363450505035 -Q96MU7 2.0741363450505035 -P56211 2.0741363450505035 -Q9Y250 2.0741363450505035 -Q9H4L7-2 2.0741363450505035 -Q13573 2.0741363450505035 -Q92504 2.0741363450505035 -Q99623 2.0741363450505035 -O75554 2.0741363450505035 -A1X283 2.0741363450505035 -Q7Z5R6 2.0741363450505035 -P25686 2.0741363450505035 -Q12789 2.0741363450505035 -Q86V59 2.0741363450505035 -P23528 2.0741363450505035 -Q96ST2 2.0741363450505035 -Q07960 2.0741363450505035 -Q15149-2 2.0741363450505035 -Q8IYW5 2.0741363450505035 -Q9NR30 2.0741363450505035 -O96028 2.0741363450505035 -F5H7W8 2.0741363450505035 -Q9BVI0 2.0741363450505035 -Q5VTL8 2.0741363450505035 -P50219 2.0741363450505035 -P32519 2.0741363450505035 -Q86WB0 2.0741363450505035 -P49711 2.0741363450505035 -E7ESS2 2.0741363450505035 -Q14677-3 2.0741363450505035 -P62820 2.0741363450505035 -Q12888-2 2.0741363450505035 -P49792 2.0713486005782946 -P52756 2.0713486005782946 -Q8NEZ4-3 2.0713486005782946 -A0A087WUE4 2.0713486005782946 -Q9Y3Q8 2.0713486005782946 -C9JCC6 2.070445474815505 -P04264 2.070445474815505 -O75791 2.0694380337606075 -P45973 2.0694380337606075 -P29374 2.0694380337606075 -Q86X95 2.066314476087648 -Q8IWB9 2.066247893108421 -P28908 2.06313711752553 -Q9C0K0 2.063074993592968 -G5E9I4 2.057475184452913 -Q01433 2.057475184452913 -O60271-3 2.057475184452913 -O95429 2.057475184452913 -Q13506 2.057475184452913 -Q99590 2.057475184452913 -Q16537 2.053529256826423 -Q69YQ0 2.053529256826423 -Q9UHB6-4 2.053529256826423 -Q8IU60 2.0479931944263523 -P33991 2.0479931944263523 -Q9NYF8 2.0476353441228454 -Q1W6H9 2.0476353441228454 -Q8N163 2.0476353441228454 -Q2M2I8 2.0476353441228454 -A0A0C4DGT3 2.0476353441228454 -Q9NZ32 2.0476353441228454 -Q8IUC4 2.0476353441228454 -P60174 2.0443402002488544 -P57740 2.0437288702099448 -Q14155-5 2.036554522091055 -Q96PY6-3 2.036554522091055 -Q9Y2H2 2.036554522091055 -Q9ULT8 2.036554522091055 -A0A075B7F8 2.0258840676012166 -Q02790 2.0258840676012166 -Q9H1E3 2.0238476248551227 -Q8N5I9 2.023668755442608 -O43399 2.023668755442608 -Q8NC56 2.0195708250945215 -Q13469 2.019474619149151 -Q9UII2 2.0162849025813614 -O76071 2.0162849025813614 -P20226 2.0162849025813614 -Q8ND61 2.0162849025813614 -Q659C4 2.0162849025813614 -Q9NU22 2.0162849025813614 -P42684 2.010141366952319 -P20290 2.0096256649264213 -J3QS41 2.0060888000963253 -Q8IWZ8 2.0050115390474197 -Q6ZSZ6 2.0050115390474197 -H0YLM1 2.004925899348196 -Q9H4A6 2.004925899348196 -Q9C0D5 2.003893158665265 -P26358 2.001547002460393 -P50443 2.001547002460393 -Q68DK7 2.001547002460393 -Q9BUQ8 2.001547002460393 -Q9NSK0-3 2.001547002460393 -Q96KB5-2 2.001547002460393 -Q6WKZ4 2.001547002460393 -Q9NRA8-3 2.001547002460393 -P37802-2 2.0000647481974987 -Q13111 2.0000647481974987 -P38935 2.0000647481974987 -B7ZL14 1.9993647345450238 -Q3T8J9-3 1.9993647345450238 -O00571-2 1.9993647345450238 -P01106 1.995364993831002 -Q13905-3 1.995364993831002 -P09564 1.9952164715454883 -P04114 1.9952164715454883 -P15169 1.992890123029605 -Q14C86-4 1.9917206849966782 -Q5UIP0 1.9917206849966782 -Q9H4E7 1.9917206849966782 -Q8IWX8 1.9917206849966782 -O95453 1.9917206849966782 -Q6KC79 1.9917206849966782 -Q00325-2 1.991375722886299 -O75449 1.991375722886299 -Q15651 1.991375722886299 -Q96T51 1.9856617408191743 -P26358-2 1.980659922732739 -P51452 1.978294679290043 -Q29RF7 1.9775401751552404 -P17483 1.9750550166567504 -Q9Y2F5 1.9750550166567504 -P60468 1.969143829670624 -P49454 1.969143829670624 -H0YL70 1.9683029334710678 -O75396 1.9601876093993842 -Q9H582 1.9556137936327183 -Q56VL3 1.9486769534370176 -Q0ZGT2 1.9481548894726342 -Q49AR2 1.9480945422071272 -Q14847 1.9480945422071272 -Q9UPT8 1.9470950307992143 -Q15365 1.944984803261588 -Q70CQ4 1.9445373394280647 -Q9BYW2 1.9434305219033923 -Q8TF61 1.9434305219033923 -O95218-2 1.9434305219033923 -O75122 1.9434305219033923 -Q8TDY2 1.9434305219033923 -Q8N5U6-2 1.9418115434283743 -Q7Z3C6 1.9415669480747135 -Q1KMD3 1.9415669480747135 -P78371 1.9368901870733022 -Q15052 1.9368901870733022 -O95071 1.9368901870733022 -P35251 1.9368901870733022 -F5H0R1 1.9368901870733022 -P78414 1.9368901870733022 -Q92620 1.9368901870733022 -P78527 1.9340489974114472 -P42331-4 1.9340489974114472 -Q7Z2Z1 1.9340489974114472 -A0A1B0GV45 1.9340489974114472 -Q92833 1.9340489974114472 -Q7L9B9 1.9340489974114472 -Q9H307 1.9340489974114472 -Q15172 1.9323419277671952 -Q9UBF8-2 1.9323419277671952 -Q9BXB4 1.9323419277671952 -P52292 1.9323419277671952 -Q8TEK3 1.9301631333073044 -V5IRT4 1.9287952975837162 -P62328 1.9283777316613235 -O75064 1.9271301716229814 -A0A0U1RRM6 1.9271301716229814 -P56182 1.9271301716229814 -Q3MHD2-2 1.9254733389302683 -O60524 1.9254733389302683 -P46100-4 1.9217182069478533 -Q53F19 1.9217182069478533 -J3KMZ8 1.918483687108067 -O75909 1.9177379301127873 -O15117-3 1.9177379301127873 -Q9H4L4 1.9177379301127873 -A0A0C4DFX9 1.9177379301127873 -Q92615 1.9177379301127873 -Q96QE3 1.9177379301127873 -Q1ED39 1.9177379301127873 -Q92994 1.9177379301127873 -P27708 1.9177379301127873 -Q5SRE5 1.9177379301127873 -Q8TF01 1.9134882382668335 -Q6VY07 1.9134882382668335 -A0A0A0MRV0 1.9126016199016322 -Q5VUA4 1.9126016199016322 -Q8WWQ0 1.9126016199016322 -Q13627 1.9125880555053754 -C9J2V2 1.9125880555053754 -Q13112 1.9125880555053754 -Q765P7 1.9125880555053754 -G5E9M7 1.9125880555053754 -Q8IZD4 1.9125880555053754 -Q9H410 1.9096467796825596 -O00418 1.9096467796825596 -O60504 1.9096467796825596 -O15042 1.9092246320613344 -Q15121-2 1.9068102597006436 -O60293 1.9068102597006436 -Q8IX90 1.9068102597006436 -P49006 1.9064228740215896 -Q92794 1.9056028560293767 -Q1RMZ1 1.8994200131133023 -Q8WUA4 1.896650097868306 -O43290 1.896650097868306 -D6RIF6 1.8928651212222507 -P49761 1.8925827513787215 -Q9BVA0 1.8925827513787215 -Q684P5 1.8833492355924442 -Q8WUM0 1.8818650852676617 -I3L0U5 1.8818650852676617 -A0A140T9T7 1.8755427857898597 -Q96PC5 1.8721924141342208 -Q96G01 1.8721924141342208 -G3V4K3 1.872162332032529 -Q9NQL2 1.872162332032529 -Q9H019 1.871047222852938 -Q6PJT7 1.869044120260192 -Q9Y4B6 1.869044120260192 -O75175 1.8680222366670973 -Q9BZZ5-2 1.856455021578958 -O43516-3 1.8554918125016646 -P78347 1.8554918125016646 -Q5T5Y3-3 1.8554918125016646 -Q96EV8 1.8554918125016646 -Q99442 1.8554918125016646 -P08311 1.8553794198461875 -P52597 1.8446537391239852 -Q9Y478 1.8446537391239852 -Q8WVC6 1.8446537391239852 -Q9NSU2 1.8446537391239852 -P49321-3 1.8437965826752318 -Q9UKS7-7 1.8432577831038797 -P30414 1.8429899049403593 -H0Y4E8 1.842425461713462 -F8W0Q9 1.842425461713462 -Q9NPI6 1.840430126933067 -P17152 1.8399900893860353 -P08779 1.8399432472278279 -A0A087X1R1 1.8380453078961314 -Q15596 1.8359056314622086 -Q9H4Z2 1.8359056314622086 -V9GYM8 1.8359056314622086 -Q15025 1.8359056314622086 -O60784-2 1.8359056314622086 -P52948 1.8290791644626694 -Q8WYQ5 1.827307091291376 -Q96EP0 1.825511192580273 -Q8WWM7 1.8190036768064894 -K7ELC2 1.8159384695411844 -Q86X53 1.8159384695411844 -Q96GN5 1.8159384695411844 -Q15751 1.815501713477811 -Q5TCZ1 1.815501713477811 -Q7Z7G8 1.815501713477811 -O00767 1.8150340171635877 -Q9NU19 1.813795383823543 -Q13595 1.8134821764910776 -P13598 1.8128129802259039 -Q8WW12 1.8128129802259039 -Q9NR12 1.8128129802259039 -P16104 1.8124777079272067 -J3KPC5 1.8115344911869387 -P12956 1.8115344911869387 -A0FGR8-2 1.8087656672268064 -Q9Y6Q9 1.806211545700895 -Q92667 1.8053604267553998 -Q9Y2H0 1.8046647169767462 -Q15648 1.8027453719934066 -P41002 1.8027453719934066 -Q86YV5 1.8027453719934066 -Q9Y519 1.8027453719934066 -Q9NQW6 1.8014868709712135 -Q96N16-2 1.8014868709712135 -Q92843-2 1.8007998786172206 -Q9Y487 1.7994284332160366 -Q96L91 1.7992020816383723 -O60336 1.7992020816383723 -Q9BW61 1.7992020816383723 -P13994 1.796675590645714 -Q9BTT6 1.795911398762831 -Q96EY5 1.7940052918727143 -P21359 1.7933085596410143 -P0C1Z6 1.7933085596410143 -P13639 1.7933085596410143 -Q9P2R6 1.7933085596410143 -Q14671 1.7927955325322298 -Q05519 1.7927955325322298 -Q9NZ63 1.7922326814840743 -Q9NRY4 1.792165796529125 -Q96RT1-8 1.7919187298648103 -Q9Y385 1.7907974703379206 -I3L2J0 1.7907974703379206 -P46779 1.7867712786372327 -Q13263 1.7867712786372327 -Q99952 1.7867712786372327 -C9JQE8 1.7856684494305761 -Q9NZN8 1.784613430637171 -Q9H910-3 1.7785735346084048 -Q15003 1.778246202637384 -Q9ULC8-3 1.778246202637384 -A0A140TA76 1.778246202637384 -Q96I23 1.7775522661491632 -O75420 1.7757877143324041 -Q9NPQ8 1.774172424035765 -O75995 1.773513627853916 -P49750-4 1.7720618765978158 -Q9HC52 1.7720618765978158 -Q9Y2D5 1.7693290668469035 -Q5SW79 1.7688201117820157 -O76031 1.7682012671239087 -Q9P013 1.7682012671239087 -Q8WXX7 1.7668267820920907 -A6NMQ1 1.7668267820920907 -Q6AI39 1.7668267820920907 -P42166 1.7668267820920907 -O60343-2 1.7668267820920907 -P04004 1.7660954889737666 -P40227 1.7639058211203502 -Q9HC62 1.7619178688595742 -Q96T37 1.7619178688595742 -Q8IUW5 1.7604442112214442 -P60891 1.7604442112214442 -Q9UK61 1.7595362564644987 -P23246 1.7594791229630506 -A2RRD8 1.7594791229630506 -Q92610 1.7594791229630506 -Q6P2E9 1.7594791229630506 -Q8NI08-2 1.7594791229630506 -Q9BTA9 1.758477834900746 -P19174 1.7536469168550288 -Q13085-4 1.751804598112101 -F5GX28 1.7493077191635205 -Q96Q15 1.7493077191635205 -P62701 1.7447763945422439 -Q8TBC3 1.7447763945422439 -Q9P275-2 1.7447763945422439 -O14974 1.743972249080854 -A0A1B0GVL4 1.7421036403940267 -Q9NXR1 1.7347057683454448 -Q13185 1.7319626633555625 -Q14739 1.7319626633555625 -P40855 1.7313850342383876 -Q8TDY4 1.7299577009918496 -P16333 1.7299577009918496 -Q96SK2 1.7299577009918496 -Q12830 1.7299577009918496 -A6NIW2 1.7299577009918496 -Q9Y2X7-3 1.7293642143059356 -Q92900 1.7277438934967115 -Q6ZV73 1.7265598328201814 -O75781 1.7258711686866466 -Q8TBB5 1.720045341334363 -Q8N6T3 1.720045341334363 -Q9NQS7 1.7192877172491672 -A0A0C4DGZ1 1.7192877172491672 -Q04637-8 1.7184322502679668 -Q15056 1.717590657754205 -P51587 1.7141251254542167 -Q9H299 1.7092399321618308 -G3XAN8 1.7092399321618308 -Q3KQU3 1.7092399321618308 -Q9BQ61 1.7092399321618308 -Q15051 1.7092399321618308 -Q9Y3D0 1.7092399321618308 -Q13242 1.7080785314356497 -Q9UKM9-2 1.7080785314356497 -Q9NWZ5 1.7080785314356497 -R4GMX8 1.7080785314356497 -Q8IWI9-3 1.7080785314356497 -Q9BZ95-5 1.7029043597981288 -Q9NQX3-2 1.7000852357484855 -Q5VT52 1.7000852357484855 -P04053 1.6966551417840128 -Q9H1A4 1.694898972781371 -O43583 1.694898972781371 -O43166 1.6919854649545663 -P50991 1.690617938729064 -Q8IXT5 1.6901698599848254 -O43896 1.6901698599848254 -O75362 1.688737299297794 -Q9H8V3-3 1.688737299297794 -Q8TB72-3 1.68758461340343 -Q58A45 1.6869718435986893 -P04921 1.6862254152495693 -Q9UPR0 1.6831225762746234 -P46109 1.6818237841265384 -Q14011 1.681141112634762 -P30291 1.6804466318356368 -O95758-4 1.6800357982877165 -P29084 1.679223063064028 -Q86W56 1.679223063064028 -Q8TC07-3 1.6772080389536972 -Q9H6L4-2 1.6772080389536972 -P08651 1.6772080389536972 -P51003 1.6772080389536972 -O43572 1.6772080389536972 -Q3ZAQ7-2 1.6772080389536972 -A0A087WZV0 1.6771463933324673 -A0A1B0GTW1 1.6771463933324673 -X6R7X0 1.6771463933324673 -Q9C0B0 1.6771463933324673 -Q9BQF6 1.6771463933324673 -O95747 1.6753615114019746 -Q7Z7A4 1.6710146392964635 -Q07666 1.6710146392964635 -Q96DV4 1.6708431432717943 -Q2TAZ0-3 1.670572014013327 -Q13435 1.6698092197126786 -P98171-2 1.668670898676557 -P02686 1.6643778204226511 -Q8NBZ0 1.6643778204226511 -P49585 1.6642517673671366 -O75717 1.663274744014383 -P50747 1.663274744014383 -Q4LE39 1.663274744014383 -Q86XK3-2 1.663274744014383 -P47974 1.663274744014383 -Q8IWS0 1.663274744014383 -Q96D15 1.6619734426235824 -Q32MZ4-3 1.6599301052774371 -P29590-9 1.6564200532197424 -Q13247 1.6556264804570962 -Q96C90 1.6556264804570962 -Q6DKI7 1.6556264804570962 -P49790 1.6556264804570962 -Q9HBM6 1.6529959642169378 -Q7LBC6 1.6527500850207928 -O15014 1.651172937797034 -O94761 1.6502214136897413 -P27815 1.6502214136897413 -P16402 1.6487484134643091 -B4DY08 1.6487484134643091 -J3KNL6 1.6487484134643091 -Q99549-2 1.644622719177054 -B9EGE7 1.644622719177054 -Q96DF8 1.644622719177054 -Q6ZNJ1 1.64269992345517 -Q9Y2U5 1.6367104797579248 -Q14151 1.6358338110561552 -P35236-3 1.6349593665241 -Q12979 1.6342311178769535 -A0A0B4J2E5 1.6330199652404895 -Q96HA1 1.6324656025906694 -Q9NWQ8 1.6323040840368914 -Q9H9P5-6 1.630771279510544 -P28749 1.630588813414638 -O14874 1.630588813414638 -J3QQJ0 1.6283890950706992 -O75475 1.6252713233319813 -Q9ULL5-3 1.625181279988604 -Q8IYH5 1.625181279988604 -Q8NCN4 1.625181279988604 -Q9HCH0 1.625181279988604 -Q01167 1.6250089821690419 -P63313 1.623366530478342 -Q9Y314 1.6231317053084626 -Q9Y383 1.6231317053084626 -Q8N999 1.622993700947564 -P63272 1.6203457919362025 -Q96MF7 1.618118054418137 -Q7L2E3-2 1.6168480219685337 -Q9GZT9 1.614854310072849 -P48651 1.6139697076023756 -Q8NDD1 1.6139697076023756 -Q13422 1.6139697076023756 -Q9BXW9 1.611266629431071 -Q7KYR7 1.610393863373781 -Q13370 1.6098048778662402 -O60516 1.608898667913959 -Q86UK7-2 1.6066377596960149 -P08670 1.6047294192141037 -A0A5E8 1.603907509952331 -Q76FK4-2 1.603907509952331 -Q15185 1.603907509952331 -P29966 1.6016262103462995 -Q7Z589-7 1.6011078030337247 -Q15363 1.6010646244535973 -Q9NPG3 1.6010646244535973 -P17181 1.6004867491643342 -Q86UE8 1.6004867491643342 -Q53HL2 1.5987537264652687 -A4D2B0 1.596365603301689 -Q86YS7 1.594359049740105 -Q68DH5 1.593257221651549 -Q9NYA4 1.5867062539232397 -Q14699 1.5867062539232397 -A8K727 1.5867062539232397 -Q96GE4 1.5863495754611554 -B5MCU0 1.5863495754611554 -O60333-3 1.5863495754611554 -Q8WWW0 1.5860233224503277 -Q9UHB7 1.5860233224503277 -Q8NI27 1.5846150343713394 -Q14188 1.5846150343713394 -J3KN59 1.5838005455677533 -P31483 1.5838005455677533 -Q6P3S6 1.5836658366009186 -A0A0J9YWL0 1.5774447536145375 -Q8WVC0 1.576851257996113 -O14908 1.5764803833226075 -Q7Z5L9-2 1.5758455443241708 -Q6NW29 1.5752653922867537 -O15021 1.574150295475778 -Q66K74 1.5697896741692863 -Q7Z569 1.562145663762073 -P55201 1.5613449534678685 -Q14149 1.5597182751399368 -Q9Y2Y9 1.5593568988240107 -Q5T4S7-4 1.5593568988240107 -Q9HCE1-2 1.5587155374692414 -P49757 1.5583262707564272 -Q6P6C2 1.5576469662179655 -Q9BUJ2 1.5576469662179655 -O43295 1.55630259723018 -I1E4Y6 1.55630259723018 -Q99856 1.553229166435797 -P60709 1.553229166435797 -Q9UKL0 1.553229166435797 -Q96II8 1.553229166435797 -Q9Y6R0 1.553229166435797 -Q9UQR0 1.553229166435797 -H7C494 1.553229166435797 -J3KPH8 1.553229166435797 -O95475 1.553229166435797 -E7ERS3 1.551636854535616 -P42858 1.551636854535616 -Q99640 1.551636854535616 -Q14498 1.551511002861387 -Q9UBP0 1.551511002861387 -Q9NTM9 1.549335131113205 -Q9UPX8-3 1.5484813148031706 -Q01196 1.5467490697478377 -Q8WUQ7-2 1.544011596513742 -Q9NRQ5 1.5425987598831323 -Q15058 1.5421741826769957 -P17096 1.5414008330832916 -P23396 1.5409742972892138 -Q9Y2Z0 1.5374294639604158 -Q9Y6I3 1.5348290417612744 -H7C2Q8 1.53334867204404 -P46100-3 1.53334867204404 -Q8ND82 1.5317923423127593 -Q96BY7 1.5312360416149495 -Q9Y4A5 1.530978566649734 -Q9Y3B9 1.530978566649734 -P18065 1.530585166912765 -P42766 1.530585166912765 -Q9H0E3-3 1.5299457846891635 -O00139-4 1.529193301956704 -Q13057-2 1.5278819476884955 -Q3V6T2 1.5254789263504873 -Q96I24 1.525285823427137 -Q86W92 1.52366361999421 -Q5SQI0 1.5233702471669925 -O60496 1.5228442562900155 -D3DQV9 1.521921608227538 -O95674 1.521837471774267 -P15822 1.521837471774267 -Q8WUH2 1.518843374947793 -Q9H6H4 1.5153527618285414 -Q92888-3 1.5151231030186672 -A0A0J9YYD9 1.5151231030186672 -Q9BRL6 1.5126987508397562 -Q9UKJ3 1.5095282736803597 -P12270 1.5067032141687422 -O00716 1.5067032141687422 -P54278 1.5004501242853219 -Q8WVZ9 1.5001236856081273 -Q96N67 1.4991713079622904 -Q27J81 1.4991713079622904 -O15054-1 1.4991713079622904 -P0C7P0 1.4989888127394944 -Q9NYP9 1.4986862331154844 -Q96PN7-5 1.4986862331154844 -Q16186 1.498638296123168 -X6RLX0 1.498638296123168 -H3BQL3 1.498638296123168 -Q9UG63-2 1.4981130172751085 -O95239 1.4962166717747136 -P56589 1.4938112941438502 -Q14103-4 1.4937444379141003 -Q53QZ3 1.4927819189940181 -Q15776 1.4917764239854727 -Q9UPU7 1.4917764239854727 -Q9BW19 1.4917764239854727 -O15534 1.4872646212864058 -P82094-2 1.4844341997257475 -A0A0G2JNZ2 1.4831620582394172 -O60664 1.481077721531142 -Q01082 1.4805861772647626 -Q969R8 1.480219922066905 -A0A0U1RRH7 1.4787436390436959 -Q68E01 1.47704849491289 -A0A024R4E5 1.4744322137737111 -E9PJ55 1.4741661518581666 -O96013 1.4733488915683681 -Q96JM7 1.4664663535517233 -Q14684 1.4659087622757931 -Q15181 1.4651206340344114 -Q5JSZ5 1.462102021086222 -P00519-2 1.4600214671135892 -O43861 1.4595842003311976 -Q14807 1.4594594113221078 -Q2NKX9 1.4576375808977051 -Q92922 1.4576375808977051 -Q9H6A9 1.4576375808977051 -P49915 1.4574508741014023 -J3KPF0 1.4570838456002488 -Q92685 1.4569641682536676 -P20963 1.4562735899353279 -Q8WXG6 1.455795107080136 -Q8N1F7 1.4506568843932597 -Q6ZU65 1.4452368742932693 -Q8WWW8-2 1.4424549898390118 -Q96NY9 1.4371602289363188 -P16455 1.4364871925598512 -Q8TF74 1.4364871925598512 -Q9BVC5 1.4364871925598512 -Q96F46 1.436131077602721 -Q86YE8 1.4357694665997545 -C9J7T7 1.4332804246591722 -P14921 1.432391983160914 -P28066 1.432391983160914 -Q9H496 1.4315219662451866 -O75410-2 1.4274553490164978 -P07766 1.4270205175131458 -P17480 1.4265158906429753 -Q2KHR2 1.4245729184327107 -Q9Y388 1.4221072948726277 -Q9Y3S2 1.4203653481358378 -Q12968 1.4200481489287875 -Q6P3S1 1.4200481489287875 -A0A0C4DFM7 1.4177864760910868 -Q9ULH0 1.4170719101299214 -O43439-4 1.4170719101299214 -Q9NQT8 1.4170719101299214 -Q7Z422 1.4167301823964789 -Q9UIG0 1.4141155725530086 -Q9Y248 1.4141155725530086 -A0A0A0MRR7 1.4141148092544977 -Q9Y2V2 1.4140634399725864 -Q9UJF2-2 1.4140634399725864 -Q14693-7 1.4139384363862515 -P06127 1.4134962460604465 -Q8N488 1.4130726650170624 -H7BZJ3 1.4079714054211705 -Q9UHD8 1.4077462420892584 -Q7KZF4 1.4077462420892584 -P11387 1.407165160730767 -P22314 1.4055026873551124 -Q9H2Y7 1.4042785122991548 -O94888 1.4027498920199595 -Q9BWT3 1.4012255401070153 -Q14839-2 1.398338110192834 -P17275 1.398338110192834 -P49768 1.3955225663490063 -M0QXA7 1.3954546462122803 -Q9UNE7 1.3951713002688575 -G5EA09 1.3945555670580556 -P42025 1.3929551436380998 -Q9NRH2 1.386059858172826 -A0A0A0MTC5 1.3857922789405932 -P46379-3 1.381734440271269 -F5H8D7 1.381311096353027 -Q8NF99 1.3790844369432236 -Q9Y3X0 1.3790844369432236 -Q86XN7 1.3790708641788663 -O95466-2 1.3787085313713439 -Q15027 1.3787085313713439 -Q96T58 1.3740579455411437 -Q02078 1.372105870811973 -Q86TB9-4 1.372105870811973 -Q9Y2K7 1.372105870811973 -Q9HB21 1.370439810950771 -B4E0Y9 1.3650013905568206 -Q86U70 1.3638978446465067 -O15355 1.3638978446465067 -B1AM27 1.3638978446465067 -P53801 1.3626210124844773 -Q96T23 1.3597869554337356 -Q14814 1.3556876285013741 -E9PNT2 1.3510999693925152 -Q9UNN5 1.3507891200859448 -O75132 1.3462298959658738 -Q9Y3P9 1.3459376771082716 -Q5T6F2 1.3459376771082716 -O14639 1.3459376771082716 -O43491 1.3459376771082716 -Q8NDI1 1.3445089856069514 -Q7Z2W4 1.3445089856069514 -Q9NSY1 1.3421465341377863 -Q96S53 1.342136945246755 -E7EPT4 1.3399916693835972 -Q9GZY8-5 1.3393202241362625 -Q96EB1 1.3390674987148612 -P0DI81-3 1.336601117149929 -Q969T9 1.336601117149929 -A0A0A0MT60 1.3303932882739926 -Q15032-3 1.325699598575591 -Q8IV63 1.325699598575591 -O95628-6 1.3225681523274027 -A0A087WWF6 1.321912323523786 -Q6ZRI6 1.321912323523786 -Q13158 1.3219024088198772 -O00231 1.3182975778427108 -Q96RR4 1.3180526639899415 -Q15464 1.3153136536263887 -Q3ZCQ8-2 1.3124760170954974 -Q9NUA8 1.3119173775121502 -Q92598 1.311829224711046 -Q3B726 1.3114040854518267 -Q8NHQ9 1.3099985711203703 -Q9BX63 1.3054621506029551 -P30041 1.3036944847038232 -O14523-2 1.3025993654945351 -P35527 1.3012384398573682 -Q9BXI6-2 1.291913505353536 -Q5QJE6 1.289184704701846 -Q15424-4 1.289184704701846 -Q9H4L5 1.2849550851075413 -Q96A65 1.2834606945086136 -E7EQS8 1.277233313737906 -Q8WXD9 1.2766428307525945 -A0A0A0MQU4 1.2747712441742238 -Q6P0Q8 1.2747712441742238 -E7EVB6 1.2747712441742238 -Q14676 1.2736999749815556 -Q14181 1.273410703338121 -Q9Y508 1.2718500041542673 -Q8N201 1.2692161465227425 -Q96C36 1.2692161465227425 -O43395 1.26312637564306 -Q9UJZ1 1.2606316627801153 -Q00537-2 1.2606316627801153 -F5H527 1.2600223449685395 -Q8N490-2 1.259550361396689 -M0R226 1.25874242722772 -Q15813 1.25874242722772 -Q6NZY4 1.254357705738553 -Q9BZX2 1.254357705738553 -Q6JBY9 1.254357705738553 -O60869 1.254357705738553 -O95696-2 1.2528130397867507 -Q96ST3 1.2522578417768275 -Q8IYL3 1.2515272999610367 -Q06945 1.2478890430709089 -Q15291 1.2475593922253896 -Q96SL8 1.2470766183852005 -Q7Z5K2-3 1.2437444782694829 -Q5T8P6 1.2426795081873598 -Q96A57-2 1.2418536980236525 -P39687 1.2396318901092034 -Q8NHM5 1.2395522882104963 -Q15390 1.2392179222024022 -O60264 1.2390939190066692 -J3KTL2 1.2314350027874297 -Q8WV93 1.2214395107571645 -X6RAL5 1.2214395107571645 -Q12846 1.2198285070067099 -Q96DC7-2 1.2170234258276496 -A0A0A0MT22 1.213667888116882 -Q8N103 1.211178504714197 -P54259 1.211178504714197 -P78332 1.2103665780676767 -Q9P2B4 1.2103665780676767 -P43403 1.2090669777370142 -S4R347 1.2086702565688232 -Q7L273 1.2074600584781505 -Q16204 1.2008769173364633 -A0A0D9SF60 1.2008769173364633 -Q9BVA1 1.1988524735897537 -Q06546 1.1965087177551361 -P05387 1.1958261054040045 -Q13200 1.1954554880085198 -Q9NPA5 1.1935372180611366 -Q9UGP4 1.190607333317505 -Q8NBX0 1.188872309904105 -P13807 1.188872309904105 -Q13123 1.1878250534090182 -Q9BUR4 1.1865290485394648 -P07108-5 1.1839392587237012 -O75391 1.1834390080524702 -Q16539-2 1.1834390080524702 -Q15366 1.179739158975082 -B0QYS7 1.1790711506753633 -P06239-3 1.1790711506753633 -P84243 1.1783755536979346 -Q8N4V1-2 1.173901663292787 -P48382 1.171520785487593 -Q9NTJ3 1.171520785487593 -Q96P11-2 1.170050718503766 -O75925-2 1.1681182680530657 -P13645 1.1652764267874487 -Q14657 1.1652764267874487 -Q8TDJ6-3 1.1647377477877605 -P35626 1.1626041254547055 -F8W9L8 1.1626041254547055 -Q9UM11 1.1603735977113108 -Q9H3N1 1.1593642064456227 -P62070-4 1.1564272523291241 -C9JA08 1.1562087789717341 -Q8N2F6-2 1.1562087789717341 -Q5H9R7 1.1562087789717341 -Q9H0G5 1.1534793335105478 -P62633-6 1.1525590608925138 -P52943-2 1.152390933348122 -P04637 1.151807870311012 -Q9Y3T9 1.1442551046759946 -J3KNR0 1.143363662798499 -Q9HCJ3 1.143048359008893 -Q9UBB4 1.1412670168811663 -Q99704 1.1355799997588671 -A0A0A0MQS2 1.1299340550653791 -Q7Z4G1-2 1.128900486251755 -Q03252 1.1275374703528738 -O14681 1.1243593918876376 -A0A087WTW0 1.1180470836417982 -Q96G03 1.1178546649914076 -Q5HY81 1.1121118109072745 -O43561 1.1115393335893018 -P25205-2 1.111027922417744 -Q9UKS6 1.1105882253298058 -D4PHA4 1.1098582831892392 -Q9BT25 1.1054250730101307 -Q92538-2 1.1054250730101307 -P31749 1.104840709561845 -Q96HY6 1.1002324397496726 -Q9H7N4 1.0967187621892156 -Q6UB35 1.0940639419500142 -Q9Y6D0 1.0940639419500142 -E9PFI5 1.0926831622151392 -P55036 1.0910671549306885 -Q14141 1.0880741151441737 -O00567 1.087160586916168 -P62877 1.086769181236134 -Q9BRF8 1.0860017014311272 -Q13523 1.0829321344921685 -A0A087WUT6 1.0807938165422941 -P26639-2 1.0770238031876582 -Q6NUK1 1.0730463251789988 -P28290 1.0719684141051082 -Q9NPF5 1.071577068546023 -Q5QP82 1.0694030417898757 -Q8IWA0 1.0677297524298175 -P10809 1.0657634082295693 -Q9UI08-2 1.0644695104547544 -Q96IF1 1.063666825180044 -O60303 1.0627849111635628 -Q9NYL2 1.0519697889423663 -G1UD79 1.04739233499352 -P04049 1.0456681597101953 -Q96D05-2 1.0441422484606757 -Q69YN4 1.0423962692803261 -Q96T49 1.0390585238828396 -J3QTA6 1.0386996097261536 -Q9C0C9 1.0284889541021813 -Q9Y4I1-3 1.0254285457955026 -E7EV07 1.0221829329849241 -Q13542 1.0196538521636485 -O75376-2 1.0185667849408584 -P51397 1.016504798868468 -Q96D71 1.0162008775157336 -O00570 1.0153622127624502 -Q9Y2I7 1.0142648815187703 -A0A1B0GTN9 1.0109627568592905 -Q9HD15 1.0096828639556032 -Q96QD9 1.0096828639556032 -Q15652 1.0094992600098456 -Q7Z2E3 1.0084603804047647 -J3KQL8 1.0077614527324577 -O15169 1.005808582397118 -Q9NQC3 1.0051920614387835 -J3KR72 1.0037550381491787 -Q2NL67 1.003196032647505 -Q9NVA4 1.0022879202716266 diff --git a/datasets/hiv/raw/prize_060.csv b/datasets/hiv/raw/prize_060.csv deleted file mode 100644 index 4f66021..0000000 --- a/datasets/hiv/raw/prize_060.csv +++ /dev/null @@ -1,1789 +0,0 @@ -Uniprot Prize -A9Z1X7 6.343096753401681 -Q99081 6.341176225843264 -Q14152 6.2788510120134875 -B0YIW2 5.581820586044439 -P69905 5.581820586044439 -H3BLZ8 5.477823239269641 -P02787 5.334125824240235 -Q9Y2R4 5.174845117869905 -Q9UKV3-5 5.158390930270963 -Q96DC7-2 5.0865083998731 -P23142 5.0865083998731 -P37802-2 4.898987333103724 -Q9NZ63 4.898987333103724 -Q9NRA8-3 4.898987333103724 -P02765 4.898987333103724 -O60869 4.753524914278521 -Q15388 4.753524914278521 -Q9H1E3 4.753524914278521 -Q92576 4.753524914278521 -Q9Y2W1 4.732635903457529 -P18031 4.649058136710953 -P51957 4.649058136710953 -Q6PKG0 4.649058136710953 -Q92922 4.649058136710953 -Q9UQ35 4.649058136710953 -Q5T5H1 4.649058136710953 -Q9H6F5 4.649058136710953 -O96013 4.649058136710953 -Q13740-2 4.630059193615658 -Q8N684-3 4.6039456274308534 -Q8WWM7 4.57995222468541 -P61978-2 4.57995222468541 -P49207 4.568697931367667 -Q14738 4.553439376520597 -A0A075B738 4.553439376520597 -Q5JTH9 4.525949613864842 -P52594-4 4.497219015813258 -P02786 4.497219015813258 -Q8NBM4 4.441375748603867 -O60749 4.434437246356287 -Q96BH1 4.401484592892618 -Q15027 4.401484592892618 -Q99490-2 4.401410413245209 -Q8IU60 4.401410413245209 -Q13243 4.401410413245209 -P48634 4.401410413245209 -P06748 4.388776771651659 -A0A087X0X3 4.388776771651659 -O43719 4.388776771651659 -P16949 4.388776771651659 -G0XQ39 4.388776771651659 -P01106 4.371909274401753 -Q7Z5J4 4.371909274401753 -Q9NQC3 4.352386472972332 -O75151 4.338487243836551 -Q96T58 4.333357363918116 -Q00403 4.302349340291357 -Q49A26 4.302349340291357 -O14646 4.302349340291357 -Q8TEA8 4.302349340291357 -F8VX04 4.296308796231895 -Q93100-3 4.256576053583031 -P29350-2 4.256576053583031 -P52756 4.256576053583031 -Q9NX58 4.250159505368727 -Q9UIQ6 4.247227922154882 -P02768 4.232255423400988 -P62805 4.232255423400988 -P68363-2 4.2136464273032646 -Q9HBD4 4.211317356325475 -Q9Y2W2 4.2080914315625275 -Q86Y91-4 4.186742180098104 -F8W0Q9 4.1783681130751615 -Q15154 4.1783681130751615 -Q01082 4.1783681130751615 -Q9H4L5 4.17829685567155 -O75995 4.17829685567155 -P38935 4.17829685567155 -O15127 4.17829685567155 -O15042 4.153933358800745 -O75396 4.152515054060739 -Q9Y2D5 4.152515054060739 -Q9BXB4 4.152515054060739 -P04004 4.120723433343331 -B7ZL14 4.1120240197381825 -P49790-3 4.111796391926523 -P18858 4.091509234016306 -P98179 4.07578779438785 -Q14739 4.07099390650613 -Q96HY6 4.07099390650613 -Q8NCD3 4.068226501317466 -Q9BVC5 4.068226501317466 -E7ESS2 4.068226501317466 -P20742 4.068150407428992 -Q9Y5B9 4.066397214904211 -Q9H814 4.064125401548571 -P16383 4.051435663285689 -P07766 4.051435663285689 -Q9UPP1 4.051435663285689 -O75449 4.051435663285689 -P46013 4.032997482233417 -Q6JBY9 4.028916213096974 -P11171 4.016588518455186 -Q9Y6X9 4.0106948331092696 -P29590-9 3.9920272692459497 -Q13094 3.9918565901522243 -A8MXP9 3.9890007464151327 -P04637 3.986936425830184 -A0A0A0MTS7 3.9844445443194014 -Q5TEC6 3.960658462798587 -Q9ULU4-19 3.960056522033299 -M0QYC1 3.959480419609159 -Q9Y314 3.9542424847257545 -Q9BTA9 3.911579870736385 -Q16718-2 3.9085984456603344 -Q92609-2 3.8952393043172115 -P06396-2 3.8704427712703438 -P19823 3.8704427712703438 -Q7Z5R6 3.84470125099561 -O75533 3.8418096567452302 -Q13242 3.8418096567452302 -Q9Y520-7 3.83967142642704 -Q6DN12 3.83967142642704 -Q3KQU3 3.833836160262521 -P06493 3.833836160262521 -Q86V48 3.833836160262521 -Q9UKJ3 3.833836160262521 -O14681 3.8328762818784976 -Q8TBB5 3.8039477122984495 -O94874-2 3.8039477122984495 -Q01130 3.7961469536001258 -P38159 3.7886902447132824 -Q96DF8 3.772763518572416 -Q8WUM0 3.772763518572416 -P42166 3.772763518572416 -Q16566 3.7641746801720632 -Q14684 3.7530508396671123 -O95231 3.7530508396671123 -Q14677-3 3.7506192770943407 -Q13428-8 3.736683906762259 -O75791 3.7299875110275424 -P17483 3.7295111161000642 -Q04760 3.726570937832407 -Q13185 3.7202457191566873 -O60293 3.7103758360372376 -Q6P3S6 3.7103758360372376 -E9PCW1 3.7038502787789156 -P16150 3.7038502787789156 -Q9NWQ8 3.6894782213710084 -Q9ULR3 3.6894782213710084 -Q9Y2V2 3.6894782213710084 -O75494 3.6894782213710084 -Q8N999 3.6876040053311003 -Q9H4L4 3.670024161869914 -A0A0G2JNZ2 3.662081284722043 -Q66PJ3 3.661493292298249 -Q07666 3.661493292298249 -P42167 3.647766935759595 -B7ZKJ8 3.6385360638094326 -P08697 3.6385360638094326 -P01024 3.636573293729437 -P02788 3.636573293729437 -Q9Y6G9 3.6324468985975114 -P49750-4 3.6324468985975114 -P22626 3.6324468985975114 -Q6UN15 3.631000353453803 -P06400 3.6261879133906634 -Q8ND56-2 3.6258385285883468 -P16402 3.623421166516008 -O95644 3.623421166516008 -O14545 3.6070470127990206 -Q14847 3.6063938853950277 -Q2KHT3 3.6056913015762313 -Q96D05-2 3.6008409341145913 -P27816-5 3.6008409341145913 -O95793 3.6004296174071797 -M0QXA7 3.5908748668239943 -Q9H400 3.588092477415545 -Q9NX00 3.575353156831956 -P23588-2 3.575353156831956 -Q92733 3.575353156831956 -Q14149 3.575353156831956 -E7EQT4 3.575353156831956 -Q92667 3.575353156831956 -P43487 3.5748349831619954 -O00571-2 3.564645989850563 -Q9Y4P8 3.5637338388107627 -Q6FI81 3.5637338388107627 -P33991 3.5586613811896384 -Q9UN86-2 3.548553190934899 -Q7L2J0 3.5455679145723744 -Q92769 3.5455679145723744 -Q7Z460 3.5455679145723744 -Q03252 3.5386476752235962 -E7EQZ4 3.517204355209181 -P02748 3.517204355209181 -P50750-2 3.516057833486569 -Q15172 3.516057833486569 -O75152 3.5073572194946134 -Q15149-2 3.494788599301796 -Q6P2E9 3.494788599301796 -Q12986 3.4735819185057712 -Q13619 3.459130272877953 -B7WPE2 3.459130272877953 -C9JV77 3.4436972171003912 -O95625 3.4320497781881105 -P17096 3.4320497781881105 -Q9UHD8 3.4261670156433603 -P62753 3.4261670156433603 -Q14643 3.4261670156433603 -Q9UI08-2 3.425341959321777 -P08670 3.4201576861668763 -P25205-2 3.420089571135709 -A0A0C4DGT3 3.420089571135709 -Q5T200 3.420089571135709 -Q15063 3.4134039075494336 -P05543 3.4134039075494336 -Q96GN5 3.413395827264136 -Q9Y2H0 3.3960732772028073 -Q12849 3.3816661060042734 -Q9UK76 3.3774200891787642 -Q9HCD5 3.372330414233952 -P28749 3.3693968233714715 -P21291 3.368626896839261 -P02647 3.364265154316689 -P02774-3 3.3635831829668335 -Q8WV93 3.3635831829668335 -O15294 3.3635831829668335 -P84085 3.3635831829668335 -Q12834 3.3612437200008953 -Q8N6H7 3.3573059164791 -Q13547 3.353284197844687 -O14974 3.349119808276538 -Q9HCN8 3.33063379713005 -P55209 3.33063379713005 -P20962 3.33063379713005 -Q86VZ1 3.3287314734817786 -P36955 3.3244291839285243 -Q13098-7 3.323302577866717 -F8WA39 3.314748749215954 -P24534 3.311261386839016 -Q96PQ6 3.308222512365894 -O94906 3.308222512365894 -Q9BWD1-2 3.302267119982475 -Q15629 3.2912574672496295 -Q9P270 3.288347923069404 -Q9Y4F3-5 3.288347923069404 -Q14671 3.288347923069404 -Q9NQW6 3.288347923069404 -Q7Z2W4 3.272291398277376 -Q9BYW2 3.2722368623821807 -P26358 3.2722368623821807 -Q8NDT2 3.2722368623821807 -Q5T5C0 3.2718809474355157 -O76031 3.2571361276247517 -Q9HC52 3.2461901817457517 -Q9UPN4 3.2414042120621387 -Q8TCJ2 3.210363373294723 -Q2KHR2 3.2051935538764518 -P13807 3.200731064878577 -Q9UIG0 3.1998724251452733 -P49792 3.1989946829845137 -P57740 3.196758764445284 -Q9Y5L4 3.196152667824138 -Q7L2H7 3.196152667824138 -Q3B7T1 3.184729660549928 -Q96PK6 3.183154820255894 -O75145 3.183154820255894 -Q9NYF8 3.182325579091621 -E9PFI5 3.1756661941859967 -P15153 3.1756661941859967 -Q14181 3.1751442119066233 -P63220 3.1746642551842688 -P62995 3.174225934200866 -Q13574 3.172875137451181 -P06239-3 3.172875137451181 -Q9Y5J1 3.172875137451181 -Q15032-3 3.1721484673372498 -Q92794 3.171504173729173 -J3KTL2 3.171504173729173 -Q04637-8 3.171504173729173 -B4DY08 3.1690920543895156 -Q96N67 3.1624955108401056 -Q9NYF3 3.1596782970322166 -Q9NQS7 3.1471331468742276 -P60709 3.1461775953543336 -P01023 3.1461775953543336 -E9PGC8 3.1397706923158037 -Q9H2K8 3.128908505191591 -P09693 3.128908505191591 -Q5VT52 3.1163766142179288 -Q32MZ4-3 3.102032133600808 -Q9C0C9 3.10197222951546 -Q15287 3.10197222951546 -Q92615 3.0952598293741524 -Q6R327-3 3.0931531761760143 -Q13283 3.0931531761760143 -O00303 3.0923378805912427 -Q9NPQ8 3.0923378805912427 -Q15056 3.0893776814980867 -P20700 3.0849615632751934 -Q13111 3.0832250567178034 -Q8NG31 3.0832250567178034 -Q14157-5 3.0832250567178034 -Q96T37 3.0790928432733895 -Q9H4E7 3.076175194305481 -Q7Z417 3.074828125470865 -Q12968 3.052224257371509 -Q9Y385 3.0379043550731826 -O43318 3.0379043550731826 -P49915 3.0379043550731826 -C9JQE8 3.0379043550731826 -Q9BZI7 3.0342391193114375 -P36915 3.0228260202225354 -Q14692 3.0228260202225354 -P29590-2 2.9977933255414815 -F8W8D3 2.9832140640473255 -P42677 2.9776224782214022 -P18615 2.973008187319219 -Q92522 2.9702237590072382 -Q9NU19 2.9643627794173883 -Q14980 2.9597387852660417 -P26368 2.956328120758974 -Q86YV0 2.9536709659580604 -Q53EL6 2.949670994594595 -Q75N03 2.9486957246323016 -Q06546 2.9465232017182568 -O60832 2.9458437757500837 -Q1MSJ5 2.945225853807718 -Q9NYP9 2.944427988955578 -P17152 2.943849448129372 -P00747 2.9394753087147225 -A0A5E8 2.933988728065752 -I1E4Y6 2.931855559472644 -Q99590 2.9220695108673373 -J3KPD3 2.91593368553183 -O60496 2.8984675262792234 -Q9Y2U5 2.8984675262792234 -Q5UIP0 2.8975798342405636 -O00308 2.8901067746404236 -E7ERS3 2.8890984183579715 -A1L0T0 2.866956886649357 -A0FGR8-2 2.86507366537191 -Q01196 2.856550719233104 -Q13595 2.850096276296079 -Q13435 2.848132942575468 -Q9NVM6 2.846320230664697 -Q92608 2.8447470699774837 -A6NMQ1 2.8423630443254466 -P05452 2.8337809317710136 -Q6XZF7 2.8284287032593722 -Q9Y2I7 2.8273019635280763 -P00492 2.8253881941849808 -P16989 2.8225364630128387 -Q13247 2.8168366046908817 -P62750 2.8165975150198572 -Q99729-3 2.815128862027249 -O75190 2.813719121184025 -P55011 2.8125517993486873 -O00231 2.811200866390171 -Q9BXI6-2 2.811200866390171 -Q9BRA2 2.810462680348271 -Q9UH03 2.810462680348271 -F5H8D7 2.8097966330524167 -P08238 2.8069947873220866 -Q8IZD4 2.8069947873220866 -P19827 2.8009652765947375 -Q8TC07-3 2.798815504047829 -Q8WY36 2.798815504047829 -Q06945 2.7981630786118195 -Q9UI10-3 2.7916817416191106 -P61927 2.7834920424511873 -A0A0U1RRH7 2.776597240080683 -P07996 2.776597240080683 -Q8ND61 2.776597240080683 -Q6KC79-2 2.770612115493293 -Q9C0C7-3 2.764017995653386 -Q15003 2.764017995653386 -Q6PJI9 2.764017995653386 -Q14966 2.7618986760443587 -Q7LBC6 2.756032352923451 -V9GYM8 2.7554518502099064 -O14828 2.7554518502099064 -B4DNK4 2.7513650187869705 -Q96FS4 2.749854139717848 -Q9Y3U8 2.7481923873666387 -E9PRY8 2.7481923873666387 -Q9HB71 2.7445815636606303 -O60784-2 2.7349005881513886 -Q9NYZ3 2.734508923866527 -Q96L91 2.722718898351411 -Q00839 2.7225202323227413 -P53634 2.7205715939563104 -Q13422 2.705766898088222 -Q13148 2.705766898088222 -Q7Z5L9-2 2.704273981724401 -P19174 2.685806242746805 -Q99613 2.6838834581102877 -P62310 2.6834251397979436 -P09496-2 2.6834251397979436 -Q92979 2.6834251397979436 -X6RAL5 2.6834251397979436 -Q9HCE1-2 2.6833436357789227 -F8VP89 2.6833436357789227 -P50747 2.6755600065638925 -Q6IQ49 2.6755600065638925 -Q6P4F7 2.6755600065638925 -P51452 2.66235003372713 -O95801 2.659548924352871 -Q9H992 2.6526279256872063 -Q96S55 2.6526279256872063 -J3KNR0 2.6526279256872063 -Q3ZCQ8-2 2.645423080459946 -Q9NW64 2.645276291202411 -Q15751 2.6414861603166297 -Q5SRN5 2.6414861603166297 -A0A0B4J1V8 2.6414861603166297 -Q53GS9 2.6414861603166297 -A0A0G2JNG9 2.640748903625856 -A0A0C4DG17 2.6364501950266392 -Q9NWH9 2.6355354695575612 -P27540 2.633593043150965 -O75179 2.633593043150965 -Q8ND76 2.6300235507006127 -Q3V6T2 2.6300235507006127 -Q92747 2.628136008038071 -Q8NE71 2.6267255596291212 -P50914 2.623959476423252 -P26599-3 2.6194002133637784 -P48730-2 2.618603072279847 -Q9Y383 2.617137973772385 -Q8TF74 2.6132693051799283 -Q8NC44 2.6132693051799283 -Q96B97 2.6132693051799283 -Q14209 2.6132693051799283 -O00418 2.6132693051799283 -Q9UHB7 2.6132693051799283 -Q9H307 2.6121402058732497 -Q9NTI5 2.61177179246528 -P18621-3 2.605381124212529 -Q9Y277 2.6036317228636645 -Q8NHG8 2.6001654139076047 -Q8IWI9-3 2.6001654139076047 -P06753 2.5999560805560638 -Q9NZI8 2.5999560805560638 -P31942 2.5953831426907596 -O43809 2.5953831426907596 -O75081-2 2.5912818476609134 -A0A0A0MR07 2.591148053290532 -Q9NR30 2.581918378610299 -O60264 2.5782412117103597 -Q9BRD0 2.575225488876934 -P78414 2.570807213558587 -P52732 2.5665004567840213 -P49796 2.5664130602433195 -Q5JSZ5 2.5647758193564623 -P32519 2.5647758193564623 -P84103 2.5544669526939963 -P19338 2.5544669526939963 -P49585 2.5544669526939963 -Q8NFA0 2.5544669526939963 -P10412 2.5544669526939963 -Q9UJU6 2.5544669526939963 -P41236 2.551348449904725 -P41440 2.549420952717478 -P29966 2.5460514662873712 -P53801 2.5439828704279166 -Q13469 2.541120987972749 -Q8WWQ0 2.5366587460915517 -J3QR29 2.5366587460915517 -O94804 2.5364389616953584 -Q9HBU6 2.5346696973625593 -Q96ST2 2.5346696973625593 -O43566 2.5296297240129686 -Q86TB9-4 2.5294212306813564 -P14625 2.527418763370007 -O75531 2.522093879674677 -Q9BTD8 2.515469763793852 -Q9UJK0 2.501798274705546 -A0A0A0MT22 2.496618461875327 -P37108 2.481770230111571 -O14641 2.4767706785392063 -O75347 2.4765180169028795 -Q9BZE4 2.4764410084091497 -Q96GK7 2.4759465075681835 -P61006 2.472191946223454 -Q8TAP9 2.468815989645967 -O15047 2.468815989645967 -Q9BSW2-2 2.468815989645967 -Q5T3J3 2.468815989645967 -Q9Y3R5 2.468815989645967 -Q9NZJ0 2.468815989645967 -C9JYS8 2.468815989645967 -Q9Y4B5 2.468815989645967 -O76064 2.468815989645967 -Q96A65 2.468815989645967 -Q12789 2.468551760338201 -P05204 2.468551760338201 -X6RAB3 2.4674617147829934 -Q8IYW5 2.4674617147829934 -P52948 2.4674617147829934 -Q13573 2.4674617147829934 -Q8NFC6 2.4674617147829934 -Q14571 2.4674617147829934 -P17480 2.4674617147829934 -A0A0A0MT60 2.4674617147829934 -Q08AE8 2.4674617147829934 -M0R2Z9 2.4674617147829934 -P52566 2.4674617147829934 -Q9NXR1 2.4674617147829934 -O95104 2.467210480455078 -A0A0A0MRJ7 2.467210480455078 -Q49AR2 2.4660140462198124 -P13639 2.460629325043291 -O75323 2.460024012381305 -P63241-2 2.4584120005528907 -Q9Y3S1 2.458141005847922 -Q16539-2 2.453479812351802 -K7ER00 2.451678077731941 -Q9BU76 2.449133822612877 -Q9NWQ4 2.446991942218826 -P62273 2.4445307339638433 -Q9UQ80 2.4443541391971433 -Q5VTL8 2.4423788327663156 -Q9BVA0 2.4284676107671443 -Q9UP95 2.4225847452847526 -P30304 2.4225847452847526 -Q6NUK1 2.4225847452847526 -Q9H211 2.4224779775039638 -P13598 2.421118892619319 -P06732 2.421118892619319 -Q5FBB7 2.4181972190476997 -H0Y449 2.4171764320816247 -Q96BY6-3 2.4134080656976864 -O95071 2.411329919464377 -O94964-2 2.411329919464377 -H0Y4E8 2.4106411743220497 -Q96ER9 2.409789650464054 -Q15648 2.404168701619811 -E7EVA0 2.403213018226533 -O95562 2.403213018226533 -O60669 2.401186584834538 -Q9H6A9 2.4003811710852485 -P27815 2.4003811710852485 -P16401 2.4003811710852485 -Q9BTE6 2.399368338742995 -Q7L4I2 2.399353894136525 -P10242-4 2.3940349516397097 -P42345 2.3940349516397097 -Q9BSK4 2.391555891833587 -I3L2J0 2.387851225630014 -Q86WJ1 2.387851225630014 -Q9ULJ3 2.3841756517597137 -Q92797 2.3836445561407307 -P28482 2.379460658438463 -P26358-2 2.379460658438463 -Q8N5U6-2 2.379460658438463 -A0A0A0MRV0 2.379460658438463 -Q9BW71 2.379460658438463 -O95218-2 2.379460658438463 -P46019 2.379460658438463 -X6R7X0 2.379460658438463 -Q9UHB6-4 2.379460658438463 -Q9ULW0 2.379460658438463 -O94913 2.379460658438463 -Q9Y2F5 2.379460658438463 -M0R088 2.379460658438463 -P52565 2.374939246474548 -Q96GM8 2.3733874213209147 -A0A075B746 2.3733874213209147 -P27701 2.37162272333702 -Q92506 2.37162272333702 -Q96EY5 2.371143496958736 -O95685 2.362081783466086 -Q92538-2 2.3607129875173016 -Q96JM3 2.3607129875173016 -P30041 2.3601818305414803 -Q9NPA8 2.3601818305414803 -Q5VTU8 2.3490738871320884 -Q8N3F8 2.3481915494629093 -P30291 2.3481915494629093 -Q8TF50 2.3481915494629093 -A0A1B0GTW1 2.3481487994663968 -Q14978-2 2.345750344550942 -Q96P11-2 2.3454254207826253 -Q9NRY4 2.3453800692503988 -O60563 2.3453800692503988 -A2RRD8 2.3453800692503988 -Q5VV67 2.3453800692503988 -A0A0J9YYD9 2.3453800692503988 -Q07864 2.3433908886146164 -E9PDJ2 2.341567079008073 -Q9H0B6 2.339877471765953 -O95248-4 2.338193060122805 -P15814 2.337050124765354 -P31946 2.3357168081286237 -A6QL63 2.3346504091989 -Q96T23 2.3346504091989 -O75937 2.3346504091989 -O60343-2 2.3346504091989 -Q9BQG0-2 2.3346504091989 -Q5JTV8 2.3346504091989 -Q15052 2.3346504091989 -Q86U86 2.3346504091989 -Q9BY43-2 2.3346504091989 -Q86X95 2.3346504091989 -Q96HA1 2.3346504091989 -Q9H3N1 2.3346504091989 -Q8IY63 2.3346504091989 -O43776 2.3346504091989 -P54278 2.3346504091989 -P14921 2.3346504091989 -Q9P265 2.3346504091989 -P78371 2.3346504091989 -O75717 2.3346504091989 -Q86XK3-2 2.3346504091989 -Q7LDG7-2 2.3346504091989 -Q5TGY3 2.330746483796545 -A0A087WZG4 2.326913529775188 -Q7Z5K2-3 2.326125572781434 -Q6ZU80 2.3239206503505017 -D6RAF8 2.323894146175129 -O75385 2.3233691974776733 -Q9Y5W3 2.320967995683317 -P84101 2.3191552675638136 -Q9UMN6 2.3191552675638136 -Q92973 2.3191552675638136 -B5MCU0 2.3191552675638136 -Q02078 2.3191552675638136 -Q96PY6-3 2.3191552675638136 -O15355 2.3191552675638136 -O43586 2.318573793224209 -P04350 2.314982216298125 -P06127 2.314561515384489 -Q9UBF8-2 2.314322638231197 -Q9NVG8 2.314099472964523 -Q7Z7G8 2.313383283281896 -Q15555 2.3120067881397053 -Q9BTC0 2.310518141178195 -Q8TAD8 2.3074064909657377 -P18077 2.3040839094701653 -P43403 2.303453581412009 -Q96Q45-3 2.299075970419505 -O43166 2.299075970419505 -O94782 2.299075970419505 -Q9Y6B7 2.299075970419505 -Q16875-4 2.299075970419505 -O76021 2.299075970419505 -P13994 2.299075970419505 -P62979 2.299075970419505 -Q9P2N5 2.299075970419505 -O14578-4 2.299075970419505 -P55199 2.299075970419505 -Q69YQ0 2.299075970419505 -Q6W2J9 2.299075970419505 -P35269 2.299075970419505 -P62851 2.299075970419505 -P49902 2.299075970419505 -Q13769 2.299075970419505 -Q8IWZ8 2.299075970419505 -Q13112 2.299075970419505 -Q8IVT5 2.299075970419505 -Q00613 2.299075970419505 -O94915 2.299075970419505 -P25054 2.299075970419505 -A0A0A0MQU4 2.299075970419505 -F5H527 2.2976035638794907 -Q96EV2 2.296377216576487 -O15117-3 2.296377216576487 -Q9BZX2 2.296377216576487 -A0A140T9T7 2.296377216576487 -A0A1B0GW41 2.296377216576487 -O00161 2.296377216576487 -Q96F86 2.2924345925914555 -Q5H9F3-3 2.2924345925914555 -H0Y5F5 2.291924178154625 -A0A0G2JPR0 2.2918670259954865 -P18065 2.2918670259954865 -P20290 2.290532015929821 -P40692 2.2892202950754537 -Q9C0C2 2.287912009318913 -O75909 2.2870496331806054 -Q96DI7 2.2864803544997585 -Q14669-3 2.285729237090503 -X6RLX0 2.285729237090503 -A0A087WXK8 2.285729237090503 -Q14241 2.2830773959782498 -Q8WWY3 2.2828182211073975 -Q16666 2.2806692853163013 -Q9NZR1 2.2801174402900424 -O75182 2.2789446091132675 -P16403 2.2788533740202284 -Q5SQI0 2.2779184769070056 -Q13576 2.2779184769070056 -Q8IZP0 2.2779184769070056 -B8ZZS0 2.2771236985359664 -Q96MU7 2.2771236985359664 -P12270 2.2771236985359664 -P49790 2.276197614199379 -Q9BWH2 2.2761753687407147 -P09960-2 2.2754610466223038 -P56385 2.274746360801074 -Q8WUA4 2.273207627865623 -Q9BVG9 2.271997726104445 -A0A024R4E5 2.266642712619027 -Q15424-4 2.266642712619027 -P00519-2 2.266642712619027 -P27708 2.266642712619027 -Q96EP0 2.2644827436430095 -Q14232 2.262694492819737 -Q8N9N7 2.258939498430969 -Q9NSY1 2.256800668588044 -P31751 2.256356422189658 -P16104 2.256103854743705 -O43399 2.256103854743705 -O60307 2.253712080399724 -Q9Y4B6 2.253712080399724 -Q9NVC6 2.2488856062003055 -Q53F19 2.2481223008904165 -Q9P209 2.2468356012020068 -Q14188 2.2468356012020068 -P42331-4 2.246602127121684 -Q02880 2.245894113900068 -Q16186 2.2441475250594975 -Q9Y2Z0 2.2441475250594975 -P78549 2.2441475250594975 -P23246 2.2441475250594975 -Q96CB8 2.2437355734484297 -A0A0J9YWL0 2.2420910926726214 -O75368 2.2383274205822623 -P18085 2.2363689638413686 -P35443 2.2337704031300354 -Q14657 2.2337704031300354 -Q96I23 2.2337704031300354 -P04114 2.2337704031300354 -P15169 2.2337704031300354 -Q96S15 2.2337704031300354 -P02751 2.2337704031300354 -A0AVF1 2.2337704031300354 -Q16659 2.233505938411529 -O43395 2.233505938411529 -Q13627 2.232795901289166 -Q9BUQ8 2.232268449696564 -Q9ULT8 2.231850616890981 -P61956 2.231850616890981 -Q96A19 2.231850616890981 -Q86V59 2.229659160358248 -Q9NSC5 2.2291724357357787 -Q9H1B7 2.2255590449321456 -Q9NPG3 2.2248700523389306 -Q5SW79 2.2248700523389306 -P46783 2.2243089739892663 -Q9ULX3 2.2243089739892663 -Q92890-1 2.220951883678657 -Q9H6D3 2.220457390306835 -Q9NPA5 2.220457390306835 -O15014 2.215141560511576 -Q5W0V3 2.2131654802235623 -Q92844 2.2131654802235623 -A0A087WTF0 2.2131654802235623 -P22694-2 2.2131654802235623 -A0A087X1Z1 2.2131654802235623 -O00257 2.2131654802235623 -A0A1B0GV70 2.21253930016725 -Q86X53 2.21253930016725 -Q9UNL2-2 2.2123967206190396 -O94762 2.209560664353656 -Q9NUQ3 2.2075830259238174 -B9EGE7 2.2075830259238174 -Q14011 2.2075830259238174 -Q8N163 2.2075830259238174 -P35268 2.2074101015457988 -Q9UKT7 2.206588464449987 -O43290 2.206588464449987 -P17544-6 2.2064876313546926 -Q13506 2.205796891825875 -Q96R06 2.205796891825875 -P21359 2.205796891825875 -Q13118 2.2022506666900687 -Q765P7 2.201298938577048 -O94761 2.20117238998352 -Q15906-2 2.2002029014346296 -Q14155-5 2.2002029014346296 -Q8TF01 2.2002029014346296 -Q96N16-2 2.2002029014346296 -Q8IV04 2.199634220644328 -Q9C0B5 2.1996119453418634 -O15173-2 2.1996119453418634 -Q76N32 2.1996119453418634 -O75592 2.1996119453418634 -Q8TB72-3 2.1971915408111116 -Q15459 2.1971915408111116 -O00472 2.1971915408111116 -Q13131-2 2.1964874068502436 -P20674 2.1956961458131237 -O14672 2.1944671655094194 -Q8IUW5 2.1944671655094194 -Q6NUT3-3 2.1944671655094194 -Q01433 2.1944671655094194 -Q13415 2.1919535392926974 -J3QS41 2.188874046142944 -Q7L9B9 2.186498260136675 -P53367-2 2.184545879684002 -J3KQ96 2.184545879684002 -P61221 2.182981006177341 -Q5SSJ5 2.181043306859131 -Q92804 2.181043306859131 -A2RU30 2.1803063070089026 -Q9Y6Q9 2.1803063070089026 -Q9NYV4 2.1803063070089026 -Q7KZ85 2.1779965957899776 -P46109 2.1779965957899776 -Q7Z569 2.1779965957899776 -Q15398 2.1776611004183932 -H0YEM9 2.177387813228976 -Q53HC0 2.1769670173326676 -Q969S3 2.175139594436965 -Q96RT1-8 2.175139594436965 -P60174 2.174599667619271 -F5H2A4 2.1716865645979713 -O14757 2.16964076975173 -Q02040 2.169570523462526 -O00479 2.169570523462526 -Q9H9B1 2.169570523462526 -Q9C0K0 2.168372120180288 -Q7Z2Z1 2.1639374258407877 -Q96EZ8-2 2.1639374258407877 -H7BZJ3 2.1631522638114964 -P52597 2.1623535941748315 -Q9P1Y6 2.1575526324802183 -O60499 2.157472214061376 -Q09161 2.1535841944572303 -Q15334 2.151184367357258 -A0A087X188 2.151184367357258 -P49773 2.150937476023596 -O75175 2.1502680638116165 -A0A1C7CYX9 2.147634536571454 -P78347 2.1443531124255264 -P42684 2.1443531124255264 -Q6UUV9-2 2.1443531124255264 -Q5BKY9 2.1414909307538643 -J3KMZ8 2.139357795416215 -Q5QJE6 2.1388767056303064 -E7ETA6 2.1357959156410353 -Q04759 2.1355174838017414 -P48651 2.133917148684859 -Q86UK7-2 2.132637726566628 -J3KNL2 2.132637726566628 -J3KPC5 2.13148193844429 -O75376-2 2.1314274567968576 -P63244 2.1256488598308514 -Q9C0D5 2.1228849043594265 -Q9Y3Q8 2.1169414847507215 -Q6P4R8-2 2.1153446040349304 -P02686 2.1153446040349304 -P51451 2.1153446040349304 -Q29RF7 2.1153446040349304 -P31749 2.115205836871803 -P20020 2.115175057066785 -Q96S44 2.115101885595147 -Q96D71 2.114307490386863 -P52292 2.1141175455399233 -A2VDJ0-5 2.1128627190139317 -Q7Z6E9 2.1120166167617134 -P21281 2.1120166167617134 -Q9H4A3-7 2.1120166167617134 -Q9UBH6 2.1120166167617134 -P29084 2.1120166167617134 -F5H1Z8 2.1120166167617134 -Q9BQF6 2.1093699848810448 -Q9Y2H2 2.1093699060861972 -Q99567 2.1093699060861972 -P24928 2.1093699060861972 -Q99717 2.1093699060861972 -Q14498 2.1093699060861972 -Q6NUJ5 2.1093699060861972 -Q7Z6Z7 2.1091063059644304 -Q9NUY8 2.108856472906373 -P25686 2.108856332351709 -Q9HAZ1 2.108856332351709 -O75122 2.108856332351709 -Q8IWB9 2.108856332351709 -Q9BVI0 2.108856332351709 -O96028 2.108856332351709 -Q15366 2.1035064109427077 -Q15121-2 2.102947334439146 -F5H7W8 2.102947334439146 -Q8TDB6 2.102179609376134 -Q8NBZ0 2.102179609376134 -C9JCP7 2.1007689450120735 -Q53GL0 2.100598551485609 -Q68DK7 2.100598551485609 -Q1KMD3 2.100598551485609 -Q5T5Y3-3 2.100598551485609 -P34972 2.100598551485609 -P56589 2.099961865993031 -Q8TAE8 2.0969151322673714 -Q99952 2.09680082725643 -Q6ZUT1-2 2.0965620919406924 -Q6WKZ4 2.0940746930227516 -P40337 2.0940746930227516 -Q9UPT5 2.0940746930227516 -E9PES4 2.091970519290764 -Q9H4Z2 2.091970519290764 -Q9Y519 2.091970519290764 -Q86W92 2.0911962133381135 -Q76L83 2.090872163480254 -Q9HCG8 2.0906774593530906 -Q9NPI6 2.090513105009064 -Q5W0Z9 2.090513105009064 -Q96IZ7 2.090513105009064 -Q9NS23-5 2.087748673289812 -O60292 2.087748673289812 -Q96A57-2 2.0871938065321616 -Q9Y2I8-2 2.082037822230754 -Q9BUJ2 2.082037822230754 -Q86YP4-3 2.082037822230754 -R4GN35 2.0792630811480173 -Q6ZTW0 2.076126736623318 -Q9NR81-2 2.076126736623318 -S4R347 2.074498970192415 -Q86YA3 2.070140731192561 -P85037 2.068923751831252 -A0A0C4DGZ1 2.067761260554672 -Q13905-3 2.067761260554672 -Q9Y6X4 2.067761260554672 -Q8IXT5 2.0672868878993915 -Q08945 2.0649751490014143 -P38432 2.0623654773138638 -Q8IYB1 2.059796597379046 -Q99442 2.0596773113537092 -Q9UKE5 2.0596773113537092 -P51003 2.0596773113537092 -Q14141 2.0596773113537092 -Q9Y6R1 2.0596773113537092 -Q68CP9 2.0596773113537092 -Q9UN36 2.058857732574496 -H7C0J3 2.0569217983037253 -P14866 2.0566283279346407 -P24723 2.056181680721583 -Q6ZNJ1 2.053553258172669 -O95777 2.0516852149464335 -P30414 2.0511705158995266 -Q9H910-3 2.050309220240459 -Q9H0E3-3 2.0502876986582717 -Q86U06 2.0444070823056384 -H7BZ55 2.0442375783148776 -Q7KZF4 2.0442375783148776 -Q8N0Z3 2.0441078857689914 -Q16537 2.044027819799674 -Q8IZT6 2.044027819799674 -Q6NXT6 2.044027819799674 -Q15036 2.041963362578797 -P41182 2.041963362578797 -Q8ND82 2.041963362578797 -P52701 2.041963362578797 -Q13303 2.041963362578797 -Q9P275-2 2.041963362578797 -O00512 2.041963362578797 -P29374 2.0400661110774783 -O00203 2.0397413778619256 -Q02224 2.0397413778619256 -Q8NC56 2.037296056958541 -Q5W0B1 2.037296056958541 -Q12846 2.036181414263432 -Q9H7N4 2.0284998413815085 -Q15233-2 2.028482100348224 -O60336 2.028482100348224 -E9PNI7 2.0282540999774312 -Q641Q2 2.0282540999774312 -Q8IWX8 2.023409087697932 -Q8NCF5 2.023195317711924 -Q6ZRI6 2.023195317711924 -Q6P1L5 2.0229474827644864 -A0A0G2JLV7 2.0226061530419397 -O15231-6 2.0211647221102447 -Q9H410 2.0210634500435134 -P29762 2.0187473893625043 -Q9H7F0 2.018453559736117 -Q7Z4H7 2.018453559736117 -Q9GZR2 2.0178844958883446 -Q86XZ4 2.0166004499098658 -Q9Y5B0 2.0160495196100743 -Q2M2I8 2.015226233917232 -P49815 2.014830844492049 -P26641-2 2.0147818804919995 -Q53EZ4 2.013154470011907 -P35236-3 2.0111310600104124 -Q9NZ32 2.0100950372823636 -P04053 2.009624306881172 -Q6L9W6 2.008786491289889 -A0A0C4DFX9 2.007387438926113 -G3V1A6 2.0058521910104714 -O95696-2 2.0058521910104714 -Q9Y4E8-3 2.0058521910104714 -Q9H2G2 2.0058521910104714 -P50548 2.0058521910104714 -Q9P2N6-3 2.0058521910104714 -Q8IYB4 2.0058521910104714 -O60683-2 2.0058521910104714 -Q6NW29 2.0058521910104714 -A0A0G2JNW7 2.0058521910104714 -Q6PJT7 2.0058521910104714 -Q96JH7 2.0058521910104714 -Q9BQ52 2.004833183336829 -Q9HCK8 2.002481472673527 -Q96Q89-2 2.0006825998478672 -F5H0R1 2.0004896940206867 -Q14207 2.0004896940206867 -Q0VDF9 1.9977890694842837 -Q15025 1.9977890694842837 -A0A0B4J2E5 1.9977890694842837 -Q8IX90 1.9977890694842837 -P55265-4 1.9977890694842837 -D6RIF6 1.9971092688947787 -A0A087X1R1 1.9968773503723312 -Q96KB5-2 1.9956983409132483 -P31146 1.9955203209803059 -Q08752 1.9955203209803059 -Q9UPR3 1.9955203209803059 -P42575 1.9955203209803059 -Q92556 1.994718445912385 -Q92934 1.9940402620975821 -P11388-4 1.9936771130586195 -G8JLB6 1.9935080122812758 -P61073-2 1.9925845465047842 -O60486 1.9925845465047842 -P11274 1.9870681450856689 -Q9Y6R0 1.9859979698209045 -Q92614-2 1.9849695021527582 -Q9ULL5-3 1.9825700826091797 -Q08378 1.9812157473100451 -Q15643 1.9808076255153675 -Q9GZP4 1.9807055879415203 -P78332 1.9781680226108629 -P60866 1.975725801332516 -A0A1B0GW05 1.975004137378945 -O75170 1.973134765183257 -Q6KC79 1.972153319692893 -C9JA08 1.972153319692893 -O00139-4 1.972153319692893 -O95613 1.9721316639544582 -P68133 1.9714906543123707 -P20774 1.9697757333691355 -Q9H1I8 1.9693105653433045 -A0A0A6YY96 1.9662278530665926 -Q9HAW4 1.9662278530665926 -Q6PL18 1.9662278530665926 -H7C0H2 1.9662278530665926 -Q9UKS7-7 1.9662278530665926 -O43896 1.9641871757374834 -P01733 1.9641178791349176 -Q9NQX3-2 1.9621558029521944 -Q7Z591 1.9616776683260024 -Q5VUA4 1.9608770508360702 -G1UD79 1.9581319598186329 -F8W7T1 1.9563362787973602 -Q9NXG2 1.9563362787973602 -Q05519 1.9555751931622782 -P35251 1.9555751931622782 -Q86U70 1.9555751931622782 -Q9NZN5 1.9538724812508126 -P08621 1.9528536388425435 -Q86UE8 1.9527326113537566 -Q6IA17 1.9517998970839894 -Q8IZC7 1.948214785106097 -P55081 1.9474036631895797 -P35611-6 1.946549874492993 -P49454 1.94643754978836 -H9KVB4 1.9463790874981766 -Q9UQQ2 1.9441509244800759 -Q96G01 1.941848263441864 -P54105 1.9405740725846252 -Q5T4S7-4 1.939447864804523 -Q9UBC3 1.9376698511597183 -Q9Y4X4 1.9371487533688523 -G3V4K3 1.9371487533688523 -Q96AY4 1.936729419509988 -O00567 1.9353415006208459 -Q96PE3 1.9340349347519035 -Q9UK61 1.9340349347519035 -Q9BWT3 1.9340349347519035 -O60504 1.9338244808981946 -E7EMB3 1.9330467768070907 -J3KNL6 1.92839569236804 -O75037 1.9261241455852476 -Q9HB58-6 1.9225780734463032 -Q8TAQ2 1.9225780734463032 -P51114 1.9204679840543855 -Q2NKX9 1.9204679840543855 -A0A1B0GTU4 1.9196844447060242 -P16520 1.9196092474372508 -O75962 1.9186528868578685 -Q92597 1.9152769658281898 -O75362 1.91488933856793 -Q9UKA4 1.9125636542054327 -Q9UGN5 1.9118817340409031 -P60953 1.9113026823085444 -Q6ZV73 1.9111872438405775 -P24666 1.9100407337324943 -Q92621 1.909901338212433 -Q99741 1.9048638363103287 -P30622 1.903661099141827 -Q8TDY2 1.903661099141827 -Q1W6H9 1.903661099141827 -Q96K83 1.8999631738643223 -P57088 1.8996958537048327 -P43686 1.8990250440390657 -Q1ED39 1.8990250440390657 -Q00537-2 1.8990250440390657 -Q9BW19 1.897771045597418 -O43143 1.897771045597418 -Q6PD62 1.897771045597418 -Q9NWS9 1.896735227825509 -Q9P107 1.8964189171841284 -O60238 1.8962186714258185 -Q86UE4 1.895390973557809 -Q9H8Y5 1.8949877859881998 -Q15111 1.8945886214178447 -A0A0D9SFK2 1.8935697937757385 -Q4LE39 1.8934040029807349 -Q7Z4S6-4 1.8914382620685781 -Q9UEY8 1.8914079828034591 -Q9Y3P8 1.8914079828034591 -Q96GA3 1.889866976842714 -Q8IWZ3-6 1.888409870070193 -Q14C86-4 1.8873990749187397 -P23528 1.8873502053234212 -Q92833 1.886847231548412 -Q00987-11 1.886486028061935 -O75132 1.886486028061935 -Q9P2R6 1.886486028061935 -P50219 1.886486028061935 -A6NIW2 1.8848540669960945 -Q00536-2 1.8845415229048914 -J3KR72 1.8837655430786786 -Q562F6 1.8830844188318439 -Q9P260 1.883011167829557 -P09651 1.882737851832501 -Q99622 1.8823692198544568 -P45973 1.8813181462003392 -Q99623 1.8796474890052843 -Q14814 1.8796474890052843 -Q9UPT8 1.8796474890052843 -E9PN30 1.8792562407805726 -Q4G0F5 1.879211068366587 -Q14CW9 1.8775431368457043 -Q9NU22 1.8775431368457043 -P46531 1.877237685342001 -P18583-5 1.8740956882765514 -O00716 1.8724724280059326 -Q8WVC0 1.8719854017854187 -Q6UB98 1.871935817637404 -Q9Y4I1-3 1.8709234416488483 -Q9NXV2 1.8700133079676442 -P35626 1.8689248039109314 -P62820 1.8689248039109314 -Q9NY27 1.86842884179613 -Q7Z460-4 1.8672867932619466 -Q8N5I9 1.8671174898109877 -Q9Y5B6 1.8668029063280263 -Q96E39 1.8658186985764118 -Q9NUA8 1.8658186985764118 -Q86TL0 1.8633569757161088 -Q9P225 1.8625403111619427 -Q8NCN4 1.8622530797369996 -Q9Y6I3 1.8615460085836233 -Q9UM11 1.8596148242247816 -Q16760 1.858230261246268 -Q9H8U3 1.8551393216501397 -O95714 1.8551393216501397 -Q96JK2 1.854290273132139 -O95466-2 1.854290273132139 -Q9P246-2 1.8541642771844984 -P43307 1.8534512145544326 -Q8NBX0 1.8534512145544326 -P53985 1.8530703410341138 -Q9BPX3 1.851538111190216 -Q96BT3 1.8513592966006567 -Q15021 1.8513592966006567 -Q9UKY7 1.8511751263200067 -Q8N5G2 1.8491590241913138 -Q8TDM6 1.8481353173656336 -Q8N9T8 1.8476017559347993 -O60701 1.8473685584702397 -Q9NZN5-2 1.8473345037674773 -E7EV07 1.8466997921960726 -Q15814 1.8466997921960726 -Q15596 1.8464464252492203 -D3DQV9 1.8449020656854573 -Q13541 1.8445738079358835 -P37275-2 1.8427420942076063 -P56182 1.8419443916959364 -P60468 1.8415406809273345 -Q9H6L5 1.8415406809273345 -Q14318-2 1.8411761745188369 -Q86XL3 1.8401359943190263 -Q04727-3 1.8395148709869915 -Q96K37 1.8379990461874238 -Q7Z4W1 1.8360303172367103 -Q8ND04-2 1.8359132145762926 -E9PCH4 1.8359132145762926 -G5E9I4 1.8355724400101325 -Q8N3X1-2 1.8355724400101325 -P05198 1.8326064928994803 -Q9Y2X7-3 1.8301890029537908 -Q9HC62 1.8297842682389116 -P28908 1.8286316113602068 -P49711 1.827642972984798 -Q12802-2 1.825769060324459 -Q8IZ21-3 1.8257412157133237 -O14639 1.8257412157133237 -Q13263 1.8257412157133237 -A0A0G2JH68 1.8257412157133237 -Q8TEM1 1.8252341061126196 -O95429 1.8244905051074116 -P49768 1.8203538251836016 -I3L1I5 1.8203538251836016 -H7C494 1.8194002646928604 -B3KS98 1.8193798470359166 -Q13813 1.8193798470359166 -P05141 1.8177637869374714 -Q9Y266 1.8134298343206936 -Q02086 1.8124070597379403 -Q96G23 1.8124070597379403 -P12956 1.8124070597379403 -Q5T2D3 1.8124070597379403 -Q8WX93 1.812254492088731 -Q6UWD8 1.8121415199445068 -Q9UEE9 1.8112602031282 -P28066 1.8112602031282 -Q13523 1.8112602031282 -P46379-3 1.8112602031282 -Q9Y679 1.8112602031282 -Q96E09 1.8112319439076676 -Q9NTJ3 1.8081024380740605 -Q99549-2 1.8081024380740605 -Q76FK4-2 1.8081024380740605 -O15054-1 1.8081024380740605 -P51784 1.8081024380740605 -P15336 1.8080195867474464 -Q6P6C2 1.8053678009232488 -O00255 1.805216852556065 -Q92619 1.805216852556065 -Q8WU90 1.805216852556065 -O00264 1.805216852556065 -O15151 1.7985871531656756 -Q9NZ52 1.7974934522612709 -P62699 1.797090104104933 -Q15773 1.7969334119600464 -P38398-7 1.7960018676860092 -O43900 1.794848497001336 -Q9Y237-2 1.7947551844009606 -P62854 1.793030440105996 -P11831 1.7901716198421307 -Q9NRQ5 1.7901716198421307 -A0A1B0GV45 1.7879598744498 -Q7L590 1.7869525947906078 -O75400-3 1.7861044389578091 -Q9H165 1.7857631083921943 -Q9NQ29 1.7856618124506727 -P61970 1.7843984558489752 -Q6PCT2-2 1.783699214384715 -P58546 1.782049370064308 -Q86WR7 1.7814971839266904 -Q8N5C8 1.7806331781935534 -Q9UPU5 1.7781611433059177 -Q9NS91 1.777343829188853 -Q5HY81 1.7761776882440239 -Q5TC82 1.7754920331387507 -O75064 1.775156522545704 -Q92688 1.7750894518227847 -Q9UKM9 1.7746251470057748 -Q9H1A4 1.77357180291106 -P41162 1.77357180291106 -Q9NZM3 1.77357180291106 -P49761 1.7735655299578044 -Q9H6K5 1.7735655299578044 -Q3L8U1 1.7725757176226735 -Q17RY0 1.7693755454994993 -A0A087WUE4 1.7686992617212867 -Q5T6C5 1.7686992617212867 -P42224 1.7661891764741557 -Q8N490-2 1.7626061260283152 -H0YL70 1.7603041093412912 -Q5THJ4 1.7578191224899795 -P19838-2 1.7565353791338274 -Q9UNY4 1.7565353791338274 -Q96RY7 1.7536429532132964 -Q9UBD5-2 1.752458052128842 -Q9BTE3 1.752458052128842 -Q8N573 1.752361310862412 -Q8WXX5 1.752361310862412 -Q9NV70-2 1.752361310862412 -P29597 1.752361310862412 -Q96II8 1.752361310862412 -Q8N9N8 1.752361310862412 -Q96PV7 1.752361310862412 -O14924 1.752361310862412 -P51965 1.752361310862412 -Q9Y2S0-2 1.7520714343059673 -A0A1B0GTN9 1.7485474069880655 -Q92545 1.7479549372255476 -Q99986 1.7479549372255476 -Q659C4 1.7410790476493874 -B7Z1P2 1.7409374588274027 -Q96HR8 1.7390644518380052 -Q86XP3 1.73893433388079 -P05386 1.7374648056529645 -Q14687 1.7372332598669846 -Q9Y6K9-2 1.7343429036885483 -O14561 1.7337720933027978 -Q9Y6H1 1.7306496172539747 -Q96DU3 1.7301857513959167 -P23511 1.7280478829048984 -P49327 1.7278430117838308 -Q14242-2 1.7268702024680425 -J3QTA6 1.7262615564494324 -Q8N1K5-4 1.725959434588072 -Q16512-2 1.723931270247094 -Q96AV8 1.7229690842120107 -O94876 1.7203089953793032 -P31323 1.7197271245341366 -Q15527 1.7195083814592058 -Q86TI0 1.7184496543497305 -Q96PN7-5 1.7177641409659705 -Q08117-2 1.717000035510205 -A0A0C4DFM7 1.7163696134264412 -Q14676 1.7160906547478465 -A2ABF8 1.7136028937708117 -Q86XN7 1.7126554558223508 -P21333 1.7126554558223508 -Q9BVJ6 1.7126228640753052 -B8ZZ87 1.7119214051269198 -Q9Y294 1.7115031429363325 -Q9NRJ4 1.709223807269717 -P13051 1.7083049596547497 -P50851 1.70719506688214 -Q96B01 1.70719506688214 -Q5JSH3 1.7062365506005044 -O43861 1.7058908923864156 -Q9HCH0 1.7058908923864156 -Q99683 1.7058908923864156 -Q9UNE7 1.7054933052489143 -Q2NL67 1.7032342924386423 -Q96D15 1.7032342924386423 -Q9UHJ3 1.7026341709398347 -E9PN89 1.7026341709398347 -Q01518 1.7026341709398347 -Q17R89 1.7026341709398347 -Q99459 1.700304226801232 -Q9Y2L5 1.700239610810821 -H7BYN4 1.6935460890002627 -P25445 1.692660352832085 -P56211 1.6917169612995973 -P20963 1.6892956971870206 -Q9H4L7-2 1.6886879618941708 -Q9BZF2 1.6854656243647503 -Q9H7X3 1.6854171698814588 -P42356 1.684845087000013 -Q8N9F8 1.6823074490926753 -P49916 1.6810685247034622 -Q5FWF4-2 1.6790591086717306 -O43257 1.6790591086717306 -P02649 1.6790591086717306 -O00767 1.6790591086717306 -Q8WWI1 1.6777145859486238 -Q9Y3D3 1.676657494563034 -Q9BVV6-3 1.6765393640830388 -O95721 1.6761443608369515 -O15347 1.6758157304570875 -Q7Z422 1.674918267147864 -O75554 1.674231063625865 -P22415 1.6740105011808257 -Q9NRL2 1.6735950383878917 -Q9UQE7 1.671327567434748 -P30405 1.6708877591120552 -Q07866-4 1.670385039822689 -Q6WCQ1-2 1.6702246543917967 -P51692 1.6702246543917967 -Q86Z02 1.6702246543917967 -P17706 1.6702246543917967 -Q9BWF3-4 1.6676700963650766 -Q9HB90 1.6668307661689883 -Q53GS7 1.6665869443271135 -P30305-4 1.6653688059093044 -Q8IWE5 1.6653688059093044 -Q9UEW8 1.6653688059093044 -P36542 1.6653674923236563 -P61803 1.665362739494293 -A0A0U1RRB6 1.665362739494293 -Q12873-3 1.664721899083161 -Q96FJ0-2 1.6646627083993766 -F8W130 1.6641132069663522 -Q9UKG1 1.6611098397053683 -Q9UNX3 1.6606062062403686 -Q9BUR4 1.6601323187112542 -Q5HYJ3 1.6600823743491557 -Q12872-2 1.6599785359993766 -P14317 1.6594720977321182 -Q9NZN8 1.6593129808626967 -E7EUN2 1.659119992072103 -Q68E01 1.6590801388799619 -Q99640 1.658608189992973 -E9PG73 1.6584978585788994 -P49321-3 1.6584978585788994 -Q14151 1.6581081893684835 -Q13625-3 1.6579730692224444 -P28715 1.6579389034381258 -Q06587 1.6571965139808917 -Q6UUV7 1.656830661044035 -Q6AI39 1.6539951285262302 -P22059 1.6520042020879702 -Q9P0L0 1.6513029492179827 -O96007 1.6500844003700044 -Q12830 1.6492789495885976 -Q9P2Y5 1.6492789495885976 -O43639 1.648647264234571 -Q96L94 1.647983903924502 -Q12874 1.647983903924502 -Q9H501 1.647983903924502 -Q8NHZ8 1.6471410347508786 -Q15776 1.6471410347508786 -Q9H8M2 1.6471410347508786 -Q70EL2 1.6471410347508786 -Q9H9J4 1.6457504281862554 -Q96B36-3 1.6457504281862554 -Q12888-2 1.6457504281862554 -A0A087WTJ2 1.6457504281862554 -J3KNN5 1.6443003144245338 -F8WAL6 1.6443003144245338 -Q71DI3 1.6442353395771292 -Q8N883 1.6440101797346887 -Q04721 1.6440101797346887 -Q6UB35 1.6427386613060824 -Q96F63 1.6378556503503088 -P51816 1.6363175266233958 -Q8WVM8 1.6363175266233958 -Q8NDD1 1.6363175266233958 -Q9BSJ8 1.635374493425764 -Q13045 1.6341613785810227 -Q9NYV6 1.6316583988139468 -B4DT28 1.6315086186821506 -Q8IV63 1.6296733595210375 -O15021 1.6296733595210375 -Q9NXL9 1.6293233190319771 -Q6ZN06 1.6293233190319771 -B3KTM8 1.6292777130936889 -Q8TEV9 1.6269145783689967 -Q9Y6R4-2 1.6268959723388179 -P35240 1.6265714269249545 -Q15700-2 1.6263911649091207 -Q14693-7 1.626130295135159 -Q9H2P0 1.624808828870062 -P05114 1.6242492965600805 -Q14CB8-2 1.620997837512863 -Q96JM7 1.6195933949371673 -Q96NJ6 1.6189018733681717 -Q9ULH0 1.6189018733681717 -Q9UKS6 1.6159988983932818 -Q9H582 1.6133054617811813 -Q9NQC7 1.612106355371317 -Q05655-2 1.6109940866703802 -P51116 1.609781829954048 -Q8IYH5 1.609408094700522 -A8K727 1.609408094700522 -Q9P2B7 1.609408094700522 -Q8NCY6 1.6086830081843049 -P57737-3 1.6040234376510798 -Q5VYS8 1.603875551962769 -O94986 1.6035349761331743 -P11117 1.603321048635197 -E7ERR0 1.603321048635197 -Q9Y608-4 1.6007679399387202 -P30281 1.598223062354585 -Q9NQT8 1.598223062354585 -Q96K21 1.5972069967098041 -P26639-2 1.5965909100744242 -Q92698 1.593848238666057 -Q70CQ4 1.593848238666057 -Q8N183 1.5933840305592093 -P54132 1.586746263324474 -Q9Y6D5 1.5865397631548432 -P50402 1.5862693413866356 -O00232 1.5862693413866356 -Q7Z7L8 1.584833632571175 -H7C0P6 1.584007209422455 -Q9BWW4 1.5831131916035361 -Q9ULH7-5 1.5819601534414822 -P33527-9 1.5815710539796546 -Q6VY07 1.5815710539796546 -A0A0U1RRM6 1.5812143192503216 -Q9Y2K7 1.5794421458666126 -C9JI98 1.5794302325169345 -Q9GZY8-2 1.5784028296532207 -Q9ULX6 1.5781175997143473 -Q9NQL2 1.5764779505286213 -Q9BRR9-2 1.5748943789559953 -P20339 1.5741488951097151 -A0A075B6G3 1.5738583906146597 -M0QZI3 1.573236768623413 -P35573 1.569845333811154 -Q01780 1.5654252779075117 -Q9Y4E5 1.564564406742409 -O95758-4 1.564564406742409 -Q9UPX8-3 1.563473464203731 -Q86UP2 1.5624422525425712 -Q9UPT9 1.5614287238940694 -X6R3V9 1.561059474299261 -A0A087WUT6 1.5577475982668993 -Q92585 1.5569767269569084 -Q8WV99 1.5552026005705635 -Q9UER7 1.5552026005705635 -Q92766-2 1.5519883247095958 -O43439-4 1.5519558789851258 -Q9Y6Y0 1.551667930031181 -C9IZ08 1.5511424357157702 -P69892 1.5511424357157702 -P51397 1.5511424357157702 -O95400 1.5508017118733144 -Q9UHV7 1.5486616485183236 -Q504T8 1.5486616485183236 -A1X283 1.5460366657486526 -Q92859 1.5460076236345228 -Q969R8 1.5441133773478535 -Q8WXI9 1.542970011245797 -Q9UIF9 1.5424798953471341 -Q7L2E3-2 1.5391164541772189 -O43663 1.5349196671674303 -Q9Y2H6 1.5322453979214572 -Q9Y5V3-2 1.5316077109711947 -O43561 1.5310360302947401 -Q13177 1.5305309453028033 -A0A087WZY3 1.5292878805357475 -A0A087WV86 1.526398369254283 -Q92620 1.518580786239318 -Q01664 1.5166205053708042 -O43379-4 1.5159558609772275 -Q9H9A7 1.5146332476217013 -P35527 1.5133087732824906 -Q9H875 1.5133087732824906 -Q9NVF7 1.5090585248038344 -Q9Y3B9 1.5057204321829378 -P10075 1.5051452418007956 -Q96QR8 1.5051452418007956 -Q9C0J8 1.5036151479308775 -Q8WXG6 1.5036151479308775 -Q8NC51-2 1.5004065333374912 -Q5FWF5 1.4984736574738056 -Q13838-2 1.4984736574738056 -J3KNZ9 1.4962686667011433 -Q14807 1.492495687642211 -O60271-3 1.4917591670315173 -Q9UH99-2 1.4910184665446684 -Q9UKD2 1.490566356290414 -Q14573 1.4874803925384024 -G5E9M7 1.485453553400625 -Q9UBB9 1.4845903185473337 -Q9UBB4 1.4830861750988265 -Q9H0J9 1.482250391253681 -Q99856 1.4802545717879971 -Q9BXS6 1.4799452133175632 -E9PIM0 1.479298314879221 -P31629 1.4791063446641404 -Q96TA1 1.4786344439388601 -Q9Y324 1.4772699135355203 -P05164-3 1.4753451526771215 -Q8N8D1 1.4747859000771348 -O43572 1.4713939308389674 -Q9UBL0 1.4705552858568849 -E9PNT2 1.4685877289738674 -P54578 1.4685877289738674 -Q9NYL2 1.468322699381004 -Q00013 1.4643853322860894 -Q8IUE6 1.463073450993377 -P07305 1.46162488602898 -Q93073 1.459522672553388 -A6PW57 1.4593914397177195 -Q96P16 1.4570079787476788 -Q13409-2 1.4561773765895465 -Q13614 1.4557777149519568 -Q14865 1.4540825670777604 -I3L0U5 1.4522765572785907 -P19634 1.4520605611797321 -Q92630 1.4483397637574744 -P10643 1.447981758087282 -Q86UU0 1.4466764150796212 -Q96EV8 1.445395737579544 -P84243 1.4448977505570288 -P33316-2 1.4409851777563056 -Q8IXQ3 1.440933892151441 -H7C2Q8 1.4391985010946566 -P55201 1.4385316455772108 -Q01167 1.4356866027497721 -Q9BUA3 1.431505752902634 -Q96DY7 1.4297511736283954 -Q8IWB7 1.4260845252781738 -J3KN59 1.4260845252781738 -P01008 1.4260845252781738 -A0A024R214 1.4196687311660439 -Q68DH5 1.417546886728212 -Q9Y6A5 1.4075026391766297 -Q6P0N0 1.4030029785508336 -Q86XN8-3 1.3995639333086007 -P53350 1.3987182982633413 -B4DTS2 1.396884699572115 -B0QXZ6 1.3951262396816184 -Q15717-2 1.394052548400427 -Q96DV4 1.3932332382438077 -Q70J99 1.392582018177835 -Q9Y2X3 1.3922352507423341 -Q8N3U4 1.391034451100885 -Q8TDG4 1.3868053259935338 -O60318 1.383021618733968 -O95376 1.3827887121728022 -Q9NP80 1.3827887121728022 -O60725 1.3797541403764413 -Q8N9N5 1.3792142393579827 -E9PC69 1.3738731176774814 -Q92854 1.3730524480497879 -Q9Y3Q3 1.3730524480497879 -Q9UII2 1.3730524480497879 -Q8N9B5 1.3729406425650623 -O60547 1.3723040340242356 -P47914 1.369442498918776 -Q8TAT5 1.3687215498106968 -Q15942 1.3678001355168583 -U3KQ54 1.3670046475134838 -P00441 1.36453645056204 -Q8WTW3 1.3638146224972307 -O43293 1.363017464370094 -B0QYS7 1.3591922797356877 -O60216 1.3588165413056754 -Q9BV73 1.356943830485934 -Q9UHI6 1.3560062749673367 -Q01831 1.3560062749673367 -Q8WW12 1.3560062749673367 -Q02790 1.3544069214372674 -Q9BZL1 1.3542207072484604 -Q9P0U3 1.3542207072484604 -Q99595 1.3542207072484604 -Q13895 1.353599237453952 -Q8TF76 1.3535930799199831 -Q9H147 1.3531830757111039 -P20226 1.3531830757111039 -P62861 1.3531830757111039 -P13645 1.3531830757111039 -Q01826-2 1.3513036880649827 -P05455 1.3506311030982134 -O75475 1.3461461134384227 -Q9NST1 1.3454491641186934 -Q9Y6X3 1.3449984454653359 -O94925 1.3449984454653359 -Q9NPF2 1.3449984454653359 -Q66PJ3-2 1.3449984454653359 -Q53HL2 1.3429467152620067 -Q99755 1.3403532330874004 -P55084 1.3295945326940692 -B5MDQ6 1.32041181506991 -Q15435 1.316905460319621 -Q8TBC3 1.3138920520517516 -Q9UKL3 1.3138553798059365 -Q9NPF5 1.3116792709689393 -Q9BVS4 1.311402438471763 -P48729-2 1.3106679012476816 -Q96PZ0 1.3082322523829244 -Q9BW61 1.306724169347866 -Q8IWA0 1.3063491562376395 -Q9NVU0 1.305301920685258 -Q7L0J3 1.3022847363865186 -Q8IXM2-3 1.3002350105944236 -Q8IX21-2 1.2931184671763987 -Q99704 1.2900094764731345 -Q8IWV1 1.2833187401789021 -Q9NQZ2 1.2785564923681236 -A0A0A0MTR7 1.2725009323440952 -P0C7T5 1.2706591844841157 -X5D2R7 1.2692461950729055 -D6REX3 1.268615360830644 -A0A1C7CYX1 1.265693749859266 -Q14161 1.2639556171574793 -P54259 1.2582784414396775 -Q96HQ2 1.2546567887730067 -Q96F24 1.2546258190074373 -B4E0Y9 1.2537515976875266 -Q8IXM6 1.2487713092174104 -Q86VR2 1.2487713092174104 -O43678 1.2487713092174104 -P07919 1.2487713092174104 -O43295 1.2487713092174104 -Q15059 1.248401418397103 -Q66K14-2 1.2480552702487933 -Q8WVK2 1.2477710365474972 -Q9Y3T9 1.2457608213067715 -P33993 1.2447422085125182 -Q92993-3 1.2430484597748601 -J3QQJ0 1.2396932488946057 -O95425-3 1.236838284888995 -Q9BWG6 1.2361786294953083 -O94776 1.2361786294953083 -Q96BR1 1.2333561447804091 -Q7Z628 1.2251408732317681 -Q3T8J9-3 1.2152987791831276 -Q8NHQ9 1.2111266788778736 -Q9UGP4 1.2109504951110637 -G3XAH0 1.2108885437236678 -O43677 1.2097781751415768 -O75616 1.2059595457245773 -Q6ZR52-2 1.2023439697638574 -Q86WB0 1.2014851607627997 -O95684-2 1.1976463369632704 -Q9P2K3-3 1.1953538477513326 -P51858 1.1945326026194714 -O00115 1.1938949900799105 -Q7Z4G1-2 1.1896832592757838 -Q9Y6D0 1.1896832592757838 -P04264 1.1896832592757838 -Q9H6L4-2 1.1886614493510455 -P04921 1.1880136792145806 -Q9P219 1.1876388263379467 -Q96SI1 1.1845998644456244 -P49756 1.1841314076024123 -Q8WXE0 1.1840293890285647 -B2RBV5 1.182122536225889 -Q9Y462 1.182122536225889 -Q9H7P9 1.1810648173550151 -O75369-8 1.1805960753218467 -Q5T7W7 1.1805579318956934 -Q8IX15-3 1.177793827873462 -O14647 1.176455518274309 -Q4KMP7 1.1723160562396677 -Q8WUX9 1.1686473753412865 -Q58A45 1.164020197090878 -P27824-2 1.1566280992775522 -P54274 1.1556992090228102 -Q9NQT4 1.1553431721857779 -Q9BRP8 1.1552301761343235 -Q9NUP7 1.1543390042722876 -P54725 1.1540058558732107 -Q8TEW0-11 1.1526090696062488 -P13647 1.1505966596611905 -Q99570 1.1479911707216974 -Q969T9 1.1434355826778384 -P68366 1.1434184215259944 -O75528 1.1430278072133924 -Q8TBK6-2 1.1426653714980075 -K4DI81 1.1408581221090033 -Q9GZT6 1.138251826399812 -A0A0D9SF58 1.1374137915645597 -Q9UBP6 1.1287236934044158 -A0A024R0Y4 1.1237732877265372 -Q8IWS0 1.1217186217864514 -Q8TF68 1.120121554444549 -O43396 1.117211254321597 -Q8NAV1 1.1149979039289826 -Q9NZJ4 1.112390752037016 -P08779 1.111224601631182 -K7EM46 1.1082349592990537 -Q8NI22 1.09866842396838 -Q8IWY9 1.0977158783726109 -Q16563-2 1.090574678522846 -P17026 1.087773389601758 -Q9BWJ5 1.0744270981393589 -Q9NRX4 1.0744270981393589 -Q12770 1.0744270981393589 -P35908 1.0697104891688962 -Q8IWB1 1.0690633923691257 -Q9BXP5 1.0683710059022202 -A6NHB5 1.063404420910747 -Q96JN0-3 1.0621280866256055 -Q32MZ4-4 1.0598074913659896 -Q9NQR1 1.0590922024073208 -O94763 1.050435894519489 -F6VDE0 1.0495878951195952 -Q14839-2 1.0429281891321034 -P16333 1.041571328726673 -A0A140T9E9 1.0365936993727805 -Q05823 1.0342089009492175 -Q14938 1.0293924163164294 -O43516-3 1.025233860148135 -Q96QE3 1.020156913567794 -Q7Z4V5 1.017989757193611 -Q9BS16 1.0158356816677774 -Q86YS7 1.0153806241700578 -P62328 1.0131189259244653 -O60524 1.0085293397608732 -Q8WUH2 1.0071196237056412 -Q7L8J4 1.0013990784807703 diff --git a/datasets/hiv/scripts/README.md b/datasets/hiv/scripts/README.md new file mode 100644 index 0000000..cfd34b2 --- /dev/null +++ b/datasets/hiv/scripts/README.md @@ -0,0 +1,89 @@ +## HIV benchmarking + +This folder contains raw data, processed data, and SPRAS results pertaining to the dataset taken from the following research article: **[HIV-1 virological synapse formation enhances infection spread by dysregulating Aurora Kinase B](https://doi.org/10.1371/journal.ppat.1011492)** - Bruce JW, Park E, Magnano C, Horswill M, Richards A, Potts G, et al. (2023) + +The study examines human immune cells responding to viral infection as well as the changes that take place inside the already infected cells, which is the focus here. +The data is from protein abundance and phosphorylation experiments, which will be the input to pathway reconstruction. + +**Overarching goal:** Recreate published biological case study on HIV data using SPRAS. This will help in identifying nodes i.e. proteins that are relevant to the disease. + +#### requirements.txt: +- Pins the versions of the important packages for data analysis but assumes the Jupyter notebook environment packages are already available. + +#### 1. compare_prizes_network.ipynb + +This notebook performs data analysis that compares the corrected prize files (5 min and 60 min) from the research article to the original prize files: +- Some proteins in the original prize files have the syntax `majorIdentifier-N ` where N denotes isoforms. +- Data analysis involves checking how many proteins in the original prize files are repeats of the same majorIdentifier +- Additionally, it involves checking if the network file used (`phosphosite-irefindex13.0-uniprot.txt`) contains secondary identifiers with the -N syntax. +- If it does not, we will want to strip that syntax from the prize file as part of preprocessing. + + +#### 2. generate_protein_mapping_input.ipynb +This notebook creates the list of proteins to upload to UniProt as a .txt file. +- The proteins from the the `prize05.csv` and `prize060.csv` files are combined together and this is saved as a csv. The file is saved as `prize_list_proteins.txt`. + + +#### 3. preprocess_prize_file.ipynb + +This notebook preprocesses the original prize files into a SPRAS compatible format. The following preprocessing steps are done: +- Node Identifier Simplification: + - Original Format: Columns - UniprotID (i.e. protein IDs) and prize. Some proteins had the syntax majorIdentifier-N (N denotes isoforms). + - Modification: Remove the -N suffix to retain only the majorIdentifier. + - Prize Selection: Retain the maximum prize number associated with each major protein identifier. +- Protein Mapping: + - Network file used: `hiv_raw_data/phosphosite-irefindex13.0-uniprot.txt` + - Issue: Node file and network file use different protein codes. + - Solution: Map and replace protein identifiers in the node file to match the network file using UniProt database. The mapping file used was `hiv_raw_data/idmapping_2024_06_26.tsv`, which was downloaded from UniProt. To get the most recent UniProt mapping, run `generate_protein_mapping_input.ipynb`, upload that to Uniprot, and then use the resulting `idmapping`. +- Column Header Update: + - Original Headers: UniProtID and prize + - New Headers: NODEID and prize +- These modified files are saved as `hiv_processed_data/modified_prize_xx.csv` + + +#### 4. filter_empty_pathways.ipynb +- Removes any empty pathways that were created from the SPRAS output in place +- Recommended: make a copy of the directory and then execute code on duplicate directory + + +#### 5. plot_num_nodes_summary_table.ipynb +- using the `XXX-pathway-summary.txt` file, this notebook produces histograms for the number of nodes present in all the pathways combined + - i.e. Number of Nodes vs. Count, where count represents the number of total pathways that have a particular number of nodes in their pathway + + +#### 6. build_kegg-orthology_to_swissprot_map.ipynb +This notebook creates an output of KEGG Orthology mapped to Uniprot and KEGG Orthology mapped to Swissprot, both saved as CSVs. This is done through: +- Using the `biopython` KGMLParser module, the downloaded KGML pathway is parsed to produce a dataframe of proteins (KEGG Orthology IDs) +- Using API calls to the [genome.jp](https://www.genome.jp/) database, the KEGG orthology IDs are mapped to the KEGG Human protein IDs i.e. the HSA IDs +- Using the `hsa_uniprot.list` file from [LinkDB](https://www.genome.jp/linkdb/) (this was done by downloading the link information between HSA and Uniprot), the HSA IDs are mapped to all the UniProt IDs corresponding to the HSA IDs. Note that this produced a 1-to-many mapping for some proteins +- To remove the 1-to-many mapping and produce a 1-to-1 mapping instead, API calls to [genome.jp](https://www.genome.jp/) are done again to filter out UniProt IDs that *don't* have SwissProt IDs i.e. protein IDs that haven't been 'manually reviewed'. +- All these mappings are saved locally as csvs. + + +#### 7. hiv05_comparison_ratios +This notebook contains code to create comparison ratios for SPRAS ensemble pathways vs. the original publication pathway *and* SPRAS ensemble pathways vs. the KEGG pathway +- This is done by intersecting the nodes found in both pathways, and then comparing the number of nodes found in both to the number of nodes found in the individual original pathways + + +#### 8. ensemble_node_maxfreq.ipynb +This notebook contains code that loads the ensemble pathway and assigns the highest frequency associated with each node to the respective node. +- The original SPRAS output ensemble pathway file assigns frequencies to edges. +- The result produced by this notebook assigns the maximum edge frequency associated with each node to the node, creating a max node frequency list that is saved as a CSV. + + +#### 9. build_prc_kegg_hiv05.ipynb +This notebook produces a precision recall curve using `scikit-learn`. This is done through the following steps: +- Building the PRC uses the max node frequency produced from the ensemble pathway, and the KEGG to Swissprot/Uniprot mapping. +- A vector of 0s and 1s is built that indicates whether the particular protein in the ensemble pathway is found in the KEGG pathway or not. This vector is called `y_kegg` and is attached to the ensemble pathway dataframe. + - The `max_freq` column in the dataframe is considered as the probabilities/scores and the `y_kegg` column in the dataframe is considered as the true label +- Then, a precision recall curve is built. + + +#### 10. build_prc_gene-ontology_hiv05_hiv060 +This notebook also produces several precision recall curves using `scikit-learn`. These are done from selecting 3 biological processes from the publication's list of observed biological processes and then taking the Uniprot list of all Gene Ontology proteins (for the human taxonomy) for that particular biological process. +- A vector of 0s and 1s is built that indicates whether the particular protein in the ensemble pathway is found in the Gene Ontology list or not. This vector is called `y_go` and is attached to the ensemble pathway dataframe. + - The `max_freq` column in the dataframe is considered as the probabilities/scores and the `y_go` column in the dataframe is considered as the true label +- Then, a precision recall curve is built. + + + diff --git a/datasets/hiv/scripts/fetch.py b/datasets/hiv/scripts/fetch.py new file mode 100644 index 0000000..3283487 --- /dev/null +++ b/datasets/hiv/scripts/fetch.py @@ -0,0 +1,36 @@ +""" +Fetches the `prize_05.tsv` and `prize_060.tsv` files from +https://github.com/gitter-lab/hiv1-aurkb, as well as the KGML file +for `kegg_orthology.py` + +Associated paper: https://doi.org/10.1371/journal.ppat.1011492 +""" + +import urllib.request +import os +from pathlib import Path + +# https://stackoverflow.com/a/5137509/7589775 +hiv_path = Path(os.path.dirname(os.path.realpath(__file__))).parent + +base_url = "https://raw.githubusercontent.com/gitter-lab/hiv1-aurkb/ac9278d447e4188eea3bf4b24c4c4e0c19b0c6d9/Results/base_analysis/" +prizes_05_url = base_url + "prize_05.csv" +prizes_060_url = base_url + "prize_060.csv" + +def main(): + # Note: These files are .tsv, but have the wrong file extension .csv. + urllib.request.urlretrieve(prizes_05_url, hiv_path / "raw" / "prizes_05.tsv") + urllib.request.urlretrieve(prizes_060_url, hiv_path / "raw" / "prizes_060.tsv") + + # and our final KGML file for the HIV pathway. + # KEGG requires a Referrer (server-CORS enforcement?) + # https://stackoverflow.com/a/46511429/7589775 + opener = urllib.request.build_opener() + opener.addheaders = [('Referer', 'https://www.kegg.jp/pathway/ko03250')] + urllib.request.install_opener(opener) + urllib.request.urlretrieve( + "https://www.kegg.jp/kegg-bin/download?entry=ko03250&format=kgml", + hiv_path / "raw" / "ko03250.xml") + +if __name__ == '__main__': + main() diff --git a/datasets/hiv/scripts/kegg_orthology.py b/datasets/hiv/scripts/kegg_orthology.py index 647ffec..6b34351 100644 --- a/datasets/hiv/scripts/kegg_orthology.py +++ b/datasets/hiv/scripts/kegg_orthology.py @@ -2,60 +2,69 @@ from bioservices import UniProt, KEGG import pandas as pd from more_itertools import chunked +from pathlib import Path +import os -pathway = read(open("Raw_Data/ko03250.xml", "r")) - -# Read in Kegg pathway data and keep only orthologs -entries_data = [] -for entry in pathway.entries.values(): - if entry.type == "ortholog": - entries_data.append({"name": entry.name}) -entries_df = pd.DataFrame(entries_data) - -# Some orthologs have multiple ko codes in the same row -# The following two lines move all ko codes to individual rows -orthology_ids = entries_df["name"].str.split(" ").explode() -orthology_ids = orthology_ids.apply(lambda x: x.split(":")[1]).tolist() - -# Using bioservices KEGG class to map ortholog(ko) codes to human(hsa) codes -k = KEGG() -ko_hsa_map = k.link("hsa", "+".join(orthology_ids)) -ko_hsa_dict = {x.split("\t")[0].split(":")[1]: x.split("\t")[1] for x in ko_hsa_map.split("\n")[:-1]} -ko_hsa_df = pd.DataFrame(ko_hsa_dict.items(), columns=["KEGG_Orthology", "HSA"]) - -# Kegg .get is limited to 10 entries per call -# The following code chunks the hsa list into sets of 10 -# then calls the .get function on each which returns kegg api data in string format -hsa_chunked = list(chunked(ko_hsa_df["HSA"].tolist(), 10)) -raw_uniprot = [] -for entry in hsa_chunked: - raw_uniprot.append(k.get("+".join(entry)).split("\n///\n\n")) - -# Raw Kegg api data is filtered to obtain hsa and uniprot codes for each protein -# Note: Although bioservices .link and .conv return cleaner outputs, they do not support -# one to many relationships at this time. -# Note: bioservices also supplies a parser method for the kegg api but it is also broken at this time. -processed_uniprot = [] -for chunk in raw_uniprot: - for item in chunk: - item = item.split("\n") - processed_uniprot.append([(x.strip().split(" ")[1:], "hsa:" + (item[0].split(" " * 7)[1])) for x in item if "UniProt" in x][0]) - -# Creates a dictionary where uniprot ids are keys and hsa ids are values -hsa_uniprot_dict = {} -for item in processed_uniprot: - for entry in item[0]: - hsa_uniprot_dict.update({"up:" + entry: item[1]}) - -# Creates a dataframe with uniprot and hsa values then merges with ko-hsa dataframe by hsa -hsa_uniprot_map = pd.DataFrame.from_dict(hsa_uniprot_dict.items()) -hsa_uniprot_map.columns = ["Uniprot", "HSA"] -final_df = ko_hsa_df.merge(hsa_uniprot_map, on="HSA") -uniprotIDs = final_df["Uniprot"].apply(lambda x: x.split(":")[1]).tolist() - -# Filters the combined dataframe to include only rows where the uniprot code is in swissprot -u = UniProt() -tst = u.mapping(fr="UniProtKB", to="UniProtKB-Swiss-Prot", query=",".join(uniprotIDs)) -failed_uniprot = pd.Series(list(set(tst["failedIds"]))).apply(lambda x: "up:" + x) - -final_df = final_df[~final_df["Uniprot"].isin(failed_uniprot)] +# https://stackoverflow.com/a/5137509/7589775 +hiv_path = Path(os.path.dirname(os.path.realpath(__file__)), '..') + +def main(): + pathway = read(open(hiv_path / "raw" / "ko03250.xml", "r")) + + # Read in Kegg pathway data and keep only orthologs + entries_data = [] + for entry in pathway.entries.values(): + if entry.type == "ortholog": + entries_data.append({"name": entry.name}) + entries_df = pd.DataFrame(entries_data) + + # Some orthologs have multiple ko codes in the same row + # The following two lines move all ko codes to individual rows + orthology_ids = entries_df["name"].str.split(" ").explode() + orthology_ids = orthology_ids.apply(lambda x: x.split(":")[1]).tolist() + + # Using bioservices KEGG class to map ortholog(ko) codes to human(hsa) codes + k = KEGG() + ko_hsa_map = k.link("hsa", "+".join(orthology_ids)) + ko_hsa_dict = {x.split("\t")[0].split(":")[1]: x.split("\t")[1] for x in ko_hsa_map.split("\n")[:-1]} + ko_hsa_df = pd.DataFrame(ko_hsa_dict.items(), columns=["KEGG_Orthology", "HSA"]) + + # Kegg .get is limited to 10 entries per call + # The following code chunks the hsa list into sets of 10 + # then calls the .get function on each which returns kegg api data in string format + hsa_chunked = list(chunked(ko_hsa_df["HSA"].tolist(), 10)) + raw_uniprot = [] + for entry in hsa_chunked: + raw_uniprot.append(k.get("+".join(entry)).split("\n///\n\n")) + + # Raw Kegg api data is filtered to obtain hsa and uniprot codes for each protein + # Note: Although bioservices .link and .conv return cleaner outputs, they do not support + # one to many relationships at this time. + # Note: bioservices also supplies a parser method for the kegg api but it is also broken at this time. + processed_uniprot = [] + for chunk in raw_uniprot: + for item in chunk: + item = item.split("\n") + processed_uniprot.append([(x.strip().split(" ")[1:], "hsa:" + (item[0].split(" " * 7)[1])) for x in item if "UniProt" in x][0]) + + # Creates a dictionary where uniprot ids are keys and hsa ids are values + hsa_uniprot_dict = {} + for item in processed_uniprot: + for entry in item[0]: + hsa_uniprot_dict.update({"up:" + entry: item[1]}) + + # Creates a dataframe with uniprot and hsa values then merges with ko-hsa dataframe by hsa + hsa_uniprot_map = pd.DataFrame.from_dict(hsa_uniprot_dict.items()) + hsa_uniprot_map.columns = ["Uniprot", "HSA"] + final_df = ko_hsa_df.merge(hsa_uniprot_map, on="HSA") + uniprotIDs = final_df["Uniprot"].apply(lambda x: x.split(":")[1]).tolist() + + # Filters the combined dataframe to include only rows where the uniprot code is in swissprot + u = UniProt() + tst = u.mapping(fr="UniProtKB", to="UniProtKB-Swiss-Prot", query=",".join(uniprotIDs)) + failed_uniprot = pd.Series(list(set(tst["failedIds"]))).apply(lambda x: "up:" + x) + + final_df = final_df[~final_df["Uniprot"].isin(failed_uniprot)] + +if __name__ == '__main__': + main() diff --git a/datasets/hiv/scripts/name_mapping.py b/datasets/hiv/scripts/name_mapping.py index 6fb6e6a..55f7369 100644 --- a/datasets/hiv/scripts/name_mapping.py +++ b/datasets/hiv/scripts/name_mapping.py @@ -1,3 +1,8 @@ +""" +This code is almost fully copied from https://www.uniprot.org/help/id_mapping_prog, +with the only exception being at the top of `main`. +""" + import re import time import json @@ -17,6 +22,9 @@ def main(): + # This is the only major exception to this being example code from UniProt. + # See prepare.py for the NodeIDs generation: this is the deduplicated list of node IDs + # from the two prize files in `raw`. with open("Pickles/NodeIDs.pkl", "rb") as file: NodeIDs = pickle.load(file)["NodeIDs"] diff --git a/datasets/hiv/scripts/prepare.py b/datasets/hiv/scripts/prepare.py index de57b39..d715dd5 100644 --- a/datasets/hiv/scripts/prepare.py +++ b/datasets/hiv/scripts/prepare.py @@ -7,20 +7,25 @@ hiv_path = Path(os.path.dirname(os.path.realpath(__file__)), '..') def main(): - prize_05 = pandas.read_csv(hiv_path / "raw" / "prize_05.csv", sep="\t", lineterminator="\n") - prize_060 = pandas.read_csv(hiv_path / "raw" / "prize_060.csv", sep="\t", lineterminator="\n") + # See `fetch.py` for information about these two files. + prize_05 = pandas.read_csv(hiv_path / "raw" / "prize_05.tsv", sep="\t", lineterminator="\n") + prize_060 = pandas.read_csv(hiv_path / "raw" / "prize_060.tsv", sep="\t", lineterminator="\n") + # Some proteins in the original prize files have the syntax `majorIdentifier-N` where N denotes isoforms. + # We don't particurarly care about any particular isoform when doing pathway reconstruction, + # so we treat -N isoforms as duplicates and remove them. prize_05["Uniprot"] = prize_05["Uniprot"].str.split("-", expand=False).str[0] - prize_05 = prize_05.sort_values("Prize", ascending=False).drop_duplicates("Uniprot").sort_index() - prize_060["Uniprot"] = prize_060["Uniprot"].str.split("-", expand=False).str[0] + + # We want to preserve the highest Prize column out of all of the isoform (and non-isoform) variants. + prize_05 = prize_05.sort_values("Prize", ascending=False).drop_duplicates("Uniprot").sort_index() prize_060 = prize_060.sort_values("Prize", ascending=False).drop_duplicates("Uniprot").sort_index() prize_060_nodes = prize_060["Uniprot"].tolist() prize_05_nodes = prize_05["Uniprot"].tolist() - nodeset = list(set(prize_05_nodes + prize_060_nodes)) + # See `name_mapping.py`` for the NodeIDs storage motivation. df = {"NodeIDs": nodeset, "prize_05": prize_05, "prize_060": prize_060} (hiv_path / "Pickles").mkdir(exist_ok=True) diff --git a/datasets/hiv/scripts/spras_formatting.py b/datasets/hiv/scripts/spras_formatting.py index 82c0d0d..a9aa84f 100644 --- a/datasets/hiv/scripts/spras_formatting.py +++ b/datasets/hiv/scripts/spras_formatting.py @@ -6,21 +6,20 @@ processed_directory = current_directory.parent / "processed" def main(): + # See name_mapping.py for the origin of this file with open("Pickles/UniprotIDs.pkl", "rb") as file: UniprotIDs = pickle.load(file) - - UIDs = UniprotIDs["UniprotIDs"] UMap = UniprotIDs["UniprotMap"] with open("Pickles/NodeIDs.pkl", "rb") as file2: prizes = pickle.load(file2) - prize_05 = prizes["prize_05"] prize_060 = prizes["prize_060"] prize_05["Uniprot"] = prize_05["Uniprot"].apply(lambda x: UMap.get(x)) prize_060["Uniprot"] = prize_060["Uniprot"].apply(lambda x: UMap.get(x)) + # Format with SPRAS column names prize_05.columns = ["NODEID", "prize"] prize_060.columns = ["NODEID", "prize"] From a38177bb02fbbc17518c50c7c61b3549029a2e29 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Wed, 30 Jul 2025 12:47:02 -0700 Subject: [PATCH 3/9] chore: remove other readme --- datasets/hiv/scripts/README.md | 89 ---------------------------------- 1 file changed, 89 deletions(-) delete mode 100644 datasets/hiv/scripts/README.md diff --git a/datasets/hiv/scripts/README.md b/datasets/hiv/scripts/README.md deleted file mode 100644 index cfd34b2..0000000 --- a/datasets/hiv/scripts/README.md +++ /dev/null @@ -1,89 +0,0 @@ -## HIV benchmarking - -This folder contains raw data, processed data, and SPRAS results pertaining to the dataset taken from the following research article: **[HIV-1 virological synapse formation enhances infection spread by dysregulating Aurora Kinase B](https://doi.org/10.1371/journal.ppat.1011492)** - Bruce JW, Park E, Magnano C, Horswill M, Richards A, Potts G, et al. (2023) - -The study examines human immune cells responding to viral infection as well as the changes that take place inside the already infected cells, which is the focus here. -The data is from protein abundance and phosphorylation experiments, which will be the input to pathway reconstruction. - -**Overarching goal:** Recreate published biological case study on HIV data using SPRAS. This will help in identifying nodes i.e. proteins that are relevant to the disease. - -#### requirements.txt: -- Pins the versions of the important packages for data analysis but assumes the Jupyter notebook environment packages are already available. - -#### 1. compare_prizes_network.ipynb - -This notebook performs data analysis that compares the corrected prize files (5 min and 60 min) from the research article to the original prize files: -- Some proteins in the original prize files have the syntax `majorIdentifier-N ` where N denotes isoforms. -- Data analysis involves checking how many proteins in the original prize files are repeats of the same majorIdentifier -- Additionally, it involves checking if the network file used (`phosphosite-irefindex13.0-uniprot.txt`) contains secondary identifiers with the -N syntax. -- If it does not, we will want to strip that syntax from the prize file as part of preprocessing. - - -#### 2. generate_protein_mapping_input.ipynb -This notebook creates the list of proteins to upload to UniProt as a .txt file. -- The proteins from the the `prize05.csv` and `prize060.csv` files are combined together and this is saved as a csv. The file is saved as `prize_list_proteins.txt`. - - -#### 3. preprocess_prize_file.ipynb - -This notebook preprocesses the original prize files into a SPRAS compatible format. The following preprocessing steps are done: -- Node Identifier Simplification: - - Original Format: Columns - UniprotID (i.e. protein IDs) and prize. Some proteins had the syntax majorIdentifier-N (N denotes isoforms). - - Modification: Remove the -N suffix to retain only the majorIdentifier. - - Prize Selection: Retain the maximum prize number associated with each major protein identifier. -- Protein Mapping: - - Network file used: `hiv_raw_data/phosphosite-irefindex13.0-uniprot.txt` - - Issue: Node file and network file use different protein codes. - - Solution: Map and replace protein identifiers in the node file to match the network file using UniProt database. The mapping file used was `hiv_raw_data/idmapping_2024_06_26.tsv`, which was downloaded from UniProt. To get the most recent UniProt mapping, run `generate_protein_mapping_input.ipynb`, upload that to Uniprot, and then use the resulting `idmapping`. -- Column Header Update: - - Original Headers: UniProtID and prize - - New Headers: NODEID and prize -- These modified files are saved as `hiv_processed_data/modified_prize_xx.csv` - - -#### 4. filter_empty_pathways.ipynb -- Removes any empty pathways that were created from the SPRAS output in place -- Recommended: make a copy of the directory and then execute code on duplicate directory - - -#### 5. plot_num_nodes_summary_table.ipynb -- using the `XXX-pathway-summary.txt` file, this notebook produces histograms for the number of nodes present in all the pathways combined - - i.e. Number of Nodes vs. Count, where count represents the number of total pathways that have a particular number of nodes in their pathway - - -#### 6. build_kegg-orthology_to_swissprot_map.ipynb -This notebook creates an output of KEGG Orthology mapped to Uniprot and KEGG Orthology mapped to Swissprot, both saved as CSVs. This is done through: -- Using the `biopython` KGMLParser module, the downloaded KGML pathway is parsed to produce a dataframe of proteins (KEGG Orthology IDs) -- Using API calls to the [genome.jp](https://www.genome.jp/) database, the KEGG orthology IDs are mapped to the KEGG Human protein IDs i.e. the HSA IDs -- Using the `hsa_uniprot.list` file from [LinkDB](https://www.genome.jp/linkdb/) (this was done by downloading the link information between HSA and Uniprot), the HSA IDs are mapped to all the UniProt IDs corresponding to the HSA IDs. Note that this produced a 1-to-many mapping for some proteins -- To remove the 1-to-many mapping and produce a 1-to-1 mapping instead, API calls to [genome.jp](https://www.genome.jp/) are done again to filter out UniProt IDs that *don't* have SwissProt IDs i.e. protein IDs that haven't been 'manually reviewed'. -- All these mappings are saved locally as csvs. - - -#### 7. hiv05_comparison_ratios -This notebook contains code to create comparison ratios for SPRAS ensemble pathways vs. the original publication pathway *and* SPRAS ensemble pathways vs. the KEGG pathway -- This is done by intersecting the nodes found in both pathways, and then comparing the number of nodes found in both to the number of nodes found in the individual original pathways - - -#### 8. ensemble_node_maxfreq.ipynb -This notebook contains code that loads the ensemble pathway and assigns the highest frequency associated with each node to the respective node. -- The original SPRAS output ensemble pathway file assigns frequencies to edges. -- The result produced by this notebook assigns the maximum edge frequency associated with each node to the node, creating a max node frequency list that is saved as a CSV. - - -#### 9. build_prc_kegg_hiv05.ipynb -This notebook produces a precision recall curve using `scikit-learn`. This is done through the following steps: -- Building the PRC uses the max node frequency produced from the ensemble pathway, and the KEGG to Swissprot/Uniprot mapping. -- A vector of 0s and 1s is built that indicates whether the particular protein in the ensemble pathway is found in the KEGG pathway or not. This vector is called `y_kegg` and is attached to the ensemble pathway dataframe. - - The `max_freq` column in the dataframe is considered as the probabilities/scores and the `y_kegg` column in the dataframe is considered as the true label -- Then, a precision recall curve is built. - - -#### 10. build_prc_gene-ontology_hiv05_hiv060 -This notebook also produces several precision recall curves using `scikit-learn`. These are done from selecting 3 biological processes from the publication's list of observed biological processes and then taking the Uniprot list of all Gene Ontology proteins (for the human taxonomy) for that particular biological process. -- A vector of 0s and 1s is built that indicates whether the particular protein in the ensemble pathway is found in the Gene Ontology list or not. This vector is called `y_go` and is attached to the ensemble pathway dataframe. - - The `max_freq` column in the dataframe is considered as the probabilities/scores and the `y_go` column in the dataframe is considered as the true label -- Then, a precision recall curve is built. - - - From e29e737f3cc723887d71ce8a469997cdcd52fba4 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Wed, 30 Jul 2025 13:29:25 -0700 Subject: [PATCH 4/9] fix: use correct file names --- datasets/hiv/Snakefile | 8 ++++---- datasets/hiv/scripts/fetch.py | 2 +- datasets/hiv/scripts/prepare.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datasets/hiv/Snakefile b/datasets/hiv/Snakefile index 7fd43aa..ebab2b5 100644 --- a/datasets/hiv/Snakefile +++ b/datasets/hiv/Snakefile @@ -6,16 +6,16 @@ rule all: rule fetch: output: - "raw/prize_05.tsv" - "raw/prize_060.tsv" + "raw/prizes_05.tsv", + "raw/prizes_060.tsv", "raw/ko03250.xml" shell: "uv run scripts/fetch.py" rule data_prep: input: - "raw/prize_05.tsv", - "raw/prize_060.tsv" + "raw/prizes_05.tsv", + "raw/prizes_060.tsv" output: "Pickles/NodeIDs.pkl" shell: diff --git a/datasets/hiv/scripts/fetch.py b/datasets/hiv/scripts/fetch.py index 3283487..2ef3273 100644 --- a/datasets/hiv/scripts/fetch.py +++ b/datasets/hiv/scripts/fetch.py @@ -18,7 +18,7 @@ prizes_060_url = base_url + "prize_060.csv" def main(): - # Note: These files are .tsv, but have the wrong file extension .csv. + # Note: These files are .tsv, but have the wrong file extension .csv in the original data source. urllib.request.urlretrieve(prizes_05_url, hiv_path / "raw" / "prizes_05.tsv") urllib.request.urlretrieve(prizes_060_url, hiv_path / "raw" / "prizes_060.tsv") diff --git a/datasets/hiv/scripts/prepare.py b/datasets/hiv/scripts/prepare.py index d715dd5..4b048a6 100644 --- a/datasets/hiv/scripts/prepare.py +++ b/datasets/hiv/scripts/prepare.py @@ -8,8 +8,8 @@ def main(): # See `fetch.py` for information about these two files. - prize_05 = pandas.read_csv(hiv_path / "raw" / "prize_05.tsv", sep="\t", lineterminator="\n") - prize_060 = pandas.read_csv(hiv_path / "raw" / "prize_060.tsv", sep="\t", lineterminator="\n") + prize_05 = pandas.read_csv(hiv_path / "raw" / "prizes_05.tsv", sep="\t", lineterminator="\n") + prize_060 = pandas.read_csv(hiv_path / "raw" / "prize_s060.tsv", sep="\t", lineterminator="\n") # Some proteins in the original prize files have the syntax `majorIdentifier-N` where N denotes isoforms. # We don't particurarly care about any particular isoform when doing pathway reconstruction, From 70500b3cf0fe35e1201577171d9b524fb13187d3 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Wed, 30 Jul 2025 13:55:10 -0700 Subject: [PATCH 5/9] fix: more file path changes --- configs/dmmm.yaml | 4 ++-- datasets/hiv/Snakefile | 8 ++++---- datasets/hiv/scripts/fetch.py | 2 +- datasets/hiv/scripts/spras_formatting.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/configs/dmmm.yaml b/configs/dmmm.yaml index 4ff108e..3c36336 100644 --- a/configs/dmmm.yaml +++ b/configs/dmmm.yaml @@ -46,12 +46,12 @@ datasets: # TODO: use old paramaters for datasets # HIV: https://github.com/Reed-CompBio/spras-benchmarking/blob/0293ae4dc0be59502fac06b42cfd9796a4b4413e/hiv-benchmarking/spras-config/config.yaml - label: dmmmhiv060 - node_files: ["processed_prize_060.txt"] + node_files: ["processed_prizes_060.txt"] edge_files: ["phosphosite-irefindex13.0-uniprot.txt"] other_files: [] data_dir: "datasets/hiv/processed" - label: dmmmhiv05 - node_files: ["processed_prize_05.txt"] + node_files: ["processed_prizes_05.txt"] edge_files: ["phosphosite-irefindex13.0-uniprot.txt"] other_files: [] data_dir: "datasets/hiv/processed" diff --git a/datasets/hiv/Snakefile b/datasets/hiv/Snakefile index ebab2b5..3cb0e62 100644 --- a/datasets/hiv/Snakefile +++ b/datasets/hiv/Snakefile @@ -1,7 +1,7 @@ rule all: input: - "processed/processed_prize_05.txt", - "processed/processed_prize_060.txt", + "processed/processed_prizes_05.txt", + "processed/processed_prizes_060.txt", "processed/phosphosite-irefindex13.0-uniprot.txt" rule fetch: @@ -34,8 +34,8 @@ rule spras_formatting: "Pickles/NodeIDs.pkl", "Pickles/UniprotIDs.pkl" output: - "processed/processed_prize_05.txt", - "processed/processed_prize_060.txt" + "processed/processed_prizes_05.txt", + "processed/processed_prizes_060.txt" shell: "uv run scripts/spras_formatting.py" diff --git a/datasets/hiv/scripts/fetch.py b/datasets/hiv/scripts/fetch.py index 2ef3273..894a3a8 100644 --- a/datasets/hiv/scripts/fetch.py +++ b/datasets/hiv/scripts/fetch.py @@ -1,5 +1,5 @@ """ -Fetches the `prize_05.tsv` and `prize_060.tsv` files from +Fetches the `prizes_05.tsv` and `prizes_060.tsv` files from https://github.com/gitter-lab/hiv1-aurkb, as well as the KGML file for `kegg_orthology.py` diff --git a/datasets/hiv/scripts/spras_formatting.py b/datasets/hiv/scripts/spras_formatting.py index a9aa84f..3efb3e0 100644 --- a/datasets/hiv/scripts/spras_formatting.py +++ b/datasets/hiv/scripts/spras_formatting.py @@ -23,8 +23,8 @@ def main(): prize_05.columns = ["NODEID", "prize"] prize_060.columns = ["NODEID", "prize"] - prize_05.to_csv(processed_directory / "processed_prize_05.txt", sep="\t", header=True, index=False) - prize_060.to_csv(processed_directory / "processed_prize_060.txt", sep="\t", header=True, index=False) + prize_05.to_csv(processed_directory / "processed_prizes_05.txt", sep="\t", header=True, index=False) + prize_060.to_csv(processed_directory / "processed_prizes_060.txt", sep="\t", header=True, index=False) if __name__ == '__main__': main() From d1d3cf77970adfcd83dde58f11933f7e8ec53f37 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Wed, 30 Jul 2025 14:23:39 -0700 Subject: [PATCH 6/9] fix: typo in prize 060 --- datasets/hiv/scripts/prepare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/hiv/scripts/prepare.py b/datasets/hiv/scripts/prepare.py index 4b048a6..a907bc1 100644 --- a/datasets/hiv/scripts/prepare.py +++ b/datasets/hiv/scripts/prepare.py @@ -9,7 +9,7 @@ def main(): # See `fetch.py` for information about these two files. prize_05 = pandas.read_csv(hiv_path / "raw" / "prizes_05.tsv", sep="\t", lineterminator="\n") - prize_060 = pandas.read_csv(hiv_path / "raw" / "prize_s060.tsv", sep="\t", lineterminator="\n") + prize_060 = pandas.read_csv(hiv_path / "raw" / "prizes_060.tsv", sep="\t", lineterminator="\n") # Some proteins in the original prize files have the syntax `majorIdentifier-N` where N denotes isoforms. # We don't particurarly care about any particular isoform when doing pathway reconstruction, From d72db21735d7e6b42192d5b29d9e42fe67515512 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Thu, 31 Jul 2025 12:03:48 -0700 Subject: [PATCH 7/9] docs: correct exceptions to copied code --- datasets/hiv/scripts/name_mapping.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/datasets/hiv/scripts/name_mapping.py b/datasets/hiv/scripts/name_mapping.py index 55f7369..2674a13 100644 --- a/datasets/hiv/scripts/name_mapping.py +++ b/datasets/hiv/scripts/name_mapping.py @@ -1,6 +1,6 @@ """ This code is almost fully copied from https://www.uniprot.org/help/id_mapping_prog, -with the only exception being at the top of `main`. +with the only exception being at the top and bottom of `main`. """ import re @@ -22,7 +22,7 @@ def main(): - # This is the only major exception to this being example code from UniProt. + # This is 1 of two major exceptions to this being example code from UniProt. # See prepare.py for the NodeIDs generation: this is the deduplicated list of node IDs # from the two prize files in `raw`. with open("Pickles/NodeIDs.pkl", "rb") as file: @@ -41,12 +41,10 @@ def main(): df = {"UniprotIDs": uniprot_IDs, "UniprotMap": uniprot_map} + # Second major exception: we save the Uniprot IDs. with open("Pickles/UniprotIDs.pkl", "wb") as file: pickle.dump(df, file) - return - - def check_response(response): try: response.raise_for_status() From e07d6d476f3b5dd7c2bd1686f4320c6e1925463c Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 8 Aug 2025 14:17:57 -0700 Subject: [PATCH 8/9] docs: be clearer about paper --- datasets/hiv/scripts/fetch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/hiv/scripts/fetch.py b/datasets/hiv/scripts/fetch.py index 894a3a8..eaf303a 100644 --- a/datasets/hiv/scripts/fetch.py +++ b/datasets/hiv/scripts/fetch.py @@ -3,7 +3,7 @@ https://github.com/gitter-lab/hiv1-aurkb, as well as the KGML file for `kegg_orthology.py` -Associated paper: https://doi.org/10.1371/journal.ppat.1011492 +Associated paper for the prizes file: https://doi.org/10.1371/journal.ppat.1011492 """ import urllib.request From 97d3cfaae3ef970019169893f7dd695488e316a8 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 8 Aug 2025 14:20:48 -0700 Subject: [PATCH 9/9] remove redundant docs cmt Co-authored-by: Anthony Gitter --- datasets/hiv/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/hiv/README.md b/datasets/hiv/README.md index 7c9ffc5..a0bb7f3 100644 --- a/datasets/hiv/README.md +++ b/datasets/hiv/README.md @@ -8,7 +8,7 @@ See `raw/README.md`. See `Snakefile` for the way that all of the IO files are connected. -1. `fetch.py` - This grabs the score files from https://doi.org/10.1371/journal.ppat.1011492 - see `fetch.py` for more info. +1. `fetch.py` - This grabs the score files from https://doi.org/10.1371/journal.ppat.1011492 1. `prepare.py` - This cleans up the prize files in `raw`; specifically to remove duplicates. 1. `name_mapping.py` - Converts from UniProt KB-ACID to UniProt KB to meet in the middle with `kegg_ortholog.py`. We chose UniProt KB for its generality. 1. `spras_formatting.py` - Formats the input files into a SPRAS-ready format.