diff --git a/.github/workflows/bar-api.yml b/.github/workflows/bar-api.yml index d858b37..3ab318e 100644 --- a/.github/workflows/bar-api.yml +++ b/.github/workflows/bar-api.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: [3.9, 3.10.15, 3.11, 3.12, 3.13] + python-version: [3.10.17, 3.11, 3.12, 3.13] services: redis: diff --git a/Dockerfile b/Dockerfile index 5aea506..48efcf7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12-bookworm +FROM python:3.13-bookworm WORKDIR /usr/src/app diff --git a/api/models/canola_nssnp.py b/api/models/canola_nssnp.py new file mode 100644 index 0000000..187ed42 --- /dev/null +++ b/api/models/canola_nssnp.py @@ -0,0 +1,36 @@ +from typing import Optional +from api import db + + +# CanolaBase = declarative_base() +class CanolaProteinReference(db.Model): + __bind_key__ = "canola_nssnp" + __tablename__ = "protein_reference" + + protein_reference_id: db.Mapped[int] = db.mapped_column(db.Integer(), primary_key=True, autoincrement=True) + gene_identifier: db.Mapped[str] = db.mapped_column(db.String(45), nullable=False) + gene_name: db.Mapped[Optional[str]] = db.mapped_column(db.String(45), nullable=True) + + proteinsJoin = db.relationship("CanolaSnpsToProtein", backref="protein", cascade="all, delete-orphan") + + +class CanolaSnpsToProtein(db.Model): + __bind_key__ = "canola_nssnp" + __tablename__ = "snps_to_protein" + + snps_reference_id: db.Mapped[int] = db.mapped_column(db.Integer(), primary_key=True, autoincrement=True) + protein_reference_id: db.Mapped[int] = db.mapped_column( + db.Integer(), db.ForeignKey("protein_reference.protein_reference_id", ondelete="CASCADE"), primary_key=True + ) + transcript_pos: db.Mapped[int] = db.mapped_column(db.Integer(), nullable=False) + chromosome: db.Mapped[str] = db.mapped_column(db.String(25), nullable=False) + chromosomal_loci: db.Mapped[int] = db.mapped_column(db.Integer(), nullable=False) + ref_DNA: db.Mapped[str] = db.mapped_column(db.String(1), nullable=False) + alt_DNA: db.Mapped[str] = db.mapped_column(db.String(45), nullable=False) + aa_pos: db.Mapped[int] = db.mapped_column(db.Integer(), nullable=False) + ref_aa: db.Mapped[str] = db.mapped_column(db.String(3), nullable=False) + alt_aa: db.Mapped[str] = db.mapped_column(db.String(3), nullable=False) + type: db.Mapped[str] = db.mapped_column(db.String(50), nullable=False) + effect_impact: db.Mapped[str] = db.mapped_column(db.String(50), nullable=False) + transcript_biotype: db.Mapped[Optional[str]] = db.mapped_column(db.String(45), nullable=True) + alt_freq: db.Mapped[float] = db.mapped_column(db.Numeric(10, 5), nullable=False) diff --git a/api/models/homologs_db.py b/api/models/homologs_db.py new file mode 100644 index 0000000..426d9b1 --- /dev/null +++ b/api/models/homologs_db.py @@ -0,0 +1,14 @@ +from api import db + + +class homologs(db.Model): + __bind_key__ = "homologs_db" + __tablename__ = "homologs" + + homologs_id: db.Mapped[int] = db.mapped_column(db.Integer(), primary_key=True, autoincrement=True) + search_protein_name: db.Mapped[str] = db.mapped_column(db.String(45), nullable=False) + result_protein_name: db.Mapped[str] = db.mapped_column(db.String(45), nullable=False) + search_species_name: db.Mapped[str] = db.mapped_column(db.String(45), nullable=False) + result_species_name: db.Mapped[str] = db.mapped_column(db.String(45), nullable=False) + Percent_id: db.Mapped[float] = db.mapped_column(db.Numeric(10, 5), nullable=False) + e_score: db.Mapped[str] = db.mapped_column(db.String(10), nullable=False) diff --git a/api/resources/snps.py b/api/resources/snps.py old mode 100755 new mode 100644 index 112c11d..b026b73 --- a/api/resources/snps.py +++ b/api/resources/snps.py @@ -17,6 +17,11 @@ SnpsReference as SoybeanSnpsReference, SamplesLookup as SoybeanSampleNames, ) +from api.models.canola_nssnp import ( + CanolaProteinReference as CanolaProteinReference, + CanolaSnpsToProtein as CanolaSnpsToProtein, +) +from api.models.homologs_db import homologs as HomologsDB from api.utils.bar_utils import BARUtils from flask import request import re @@ -141,7 +146,7 @@ class GeneNameAlias(Resource): def get(self, species="", gene_id=""): """Endpoint returns annotated SNP poplar data in order of (to match A th API format): AA pos (zero-indexed), sample id, 'missense_variant','MODERATE', 'MISSENSE', codon/DNA base change, - AA change (DH), pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype + AA change (DH), pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype (allele frequency for canola) values with single quotes are fixed""" results_json = [] @@ -160,43 +165,76 @@ def get(self, species="", gene_id=""): protein_reference = SoybeanProteinReference snps_to_protein = SoybeanSnpsToProtein snps_reference = SoybeanSnpsReference + elif species == "canola" and BARUtils.is_canola_gene_valid(gene_id): + protein_reference = CanolaProteinReference + snps_to_protein = CanolaSnpsToProtein else: return BARUtils.error_exit("Invalid gene id"), 400 - rows = ( - db.session.execute( - db.select(protein_reference, snps_to_protein, snps_reference) - .select_from(protein_reference) - .join(snps_to_protein) - .join(snps_reference) - .where(protein_reference.gene_identifier == gene_id) + if species == "canola" and BARUtils.is_canola_gene_valid(gene_id): + rows = ( + db.session.execute( + db.select(protein_reference, snps_to_protein) + .select_from(protein_reference) + .join(snps_to_protein) + .where(protein_reference.gene_identifier == gene_id) + ) + .tuples() + .all() + ) + for protein, snptoprotein in rows: + itm_lst = [ + snptoprotein.chromosome, + snptoprotein.aa_pos - 1, # zero index-ed + None, + "missense_variant", + "MODERATE", + "MISSENSE", + str(snptoprotein.transcript_pos) + snptoprotein.ref_DNA + ">" + snptoprotein.alt_DNA, + snptoprotein.ref_aa + snptoprotein.alt_aa, + None, + gene_id, + "protein_coding", + "CODING", + protein.gene_name, + float(snptoprotein.alt_freq), + ] + results_json.append(itm_lst) + else: + rows = ( + db.session.execute( + db.select(protein_reference, snps_to_protein, snps_reference) + .select_from(protein_reference) + .join(snps_to_protein) + .join(snps_reference) + .where(protein_reference.gene_identifier == gene_id) + ) + .tuples() + .all() ) - .tuples() - .all() - ) - # BAR A Th API format is chr, AA pos (zero-indexed), sample id, 'missense_variant', - # 'MODERATE', 'MISSENSE', codon/DNA base change, AA change (DH), - # pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype - for protein, snpsjoin, snpstbl in rows: - itm_lst = [ - snpstbl.chromosome, - # snpstbl.chromosomal_loci, - snpsjoin.aa_pos - 1, # zero index-ed - snpstbl.sample_id, - "missense_variant", - "MODERATE", - "MISSENSE", - str(snpsjoin.transcript_pos) + snpsjoin.ref_DNA + ">" + snpsjoin.alt_DNA, - snpsjoin.ref_aa + snpsjoin.alt_aa, - None, - re.sub(r".\d$", "", protein.gene_identifier), - "protein_coding", - "CODING", - protein.gene_identifier, - None, - ] - results_json.append(itm_lst) + # BAR A Th API format is chr, AA pos (zero-indexed), sample id, 'missense_variant', + # 'MODERATE', 'MISSENSE', codon/DNA base change, AA change (DH), + # pro length, gene ID, 'protein_coding', 'CODING', transcript id, biotype + for protein, snpsjoin, snpstbl in rows: + itm_lst = [ + snpstbl.chromosome, + # snpstbl.chromosomal_loci, + snpsjoin.aa_pos - 1, # zero index-ed + snpstbl.sample_id, + "missense_variant", + "MODERATE", + "MISSENSE", + str(snpsjoin.transcript_pos) + snpsjoin.ref_DNA + ">" + snpsjoin.alt_DNA, + snpsjoin.ref_aa + snpsjoin.alt_aa, + None, + re.sub(r".\d$", "", protein.gene_identifier), + "protein_coding", + "CODING", + protein.gene_identifier, + None, + ] + results_json.append(itm_lst) # Return results if there are data if len(results_json) > 0: @@ -502,3 +540,50 @@ def get(self, pval="", araid="", popid=""): pop_both_sig_idx = HotspotUtils.get_sig_index(pop_both_sig) output = {"ara_id": araid, "pop_id": popid, "ara_hotspots": ara_both_sig_idx, "pop_hotspots": pop_both_sig_idx} return BARUtils.success_exit(output) + + +@snps.route("/homologs///") +class Homologs(Resource): + @snps.param("search_species", _in="path", default="canola") + @snps.param("search_gene", _in="path", default="BnaA07g31480D") + @snps.param("target_species", _in="path", default="arabidopsis") + def get(self, search_species="", search_gene="", target_species=""): + """This endpoint shows the homologs proteins of search_gene in target_species. + The endpoint returns a list of homologous pairs of proteins in following format: + Percent_id(percent identity get by blast); e score + """ + # Escape input + search_species = escape(search_species) + gene_id = escape(search_gene) + target_species = escape(target_species) + supported = ["arabidopsis", "canola"] + + if (search_species not in supported) or (target_species not in supported): + return BARUtils.error_exit("Species not supported"), 400 + elif (search_species == "arabidopsis" and BARUtils.is_arabidopsis_gene_valid(gene_id)) or ( + search_species == "canola" and BARUtils.is_canola_gene_valid(gene_id) + ): + results = HomologsDB.query.filter_by( + search_protein_name=gene_id, search_species_name=search_species, result_species_name=target_species + ).all() + if not results: + return BARUtils.error_exit("No homologs found for the given query"), 400 + + homologs_list = [ + { + "search_species_name": search_species, + "search_protein_name": gene_id, + "result_species_name": target_species, + "result_protein_name": homolog.result_protein_name, + "Percent_id": float(homolog.Percent_id), + "e_score": float(homolog.e_score), + } + for homolog in results + ] + homologs_list.sort(key=lambda x: x["e_score"]) + if len(homologs_list) >= 5: + homologs_list = homologs_list[:5] + response = {"homologs": homologs_list} + return BARUtils.success_exit(response), 200 + else: + return BARUtils.error_exit("Invalid gene id"), 400 diff --git a/api/utils/bar_utils.py b/api/utils/bar_utils.py index 3dd6cfa..7fe0739 100644 --- a/api/utils/bar_utils.py +++ b/api/utils/bar_utils.py @@ -83,6 +83,17 @@ def is_cannabis_gene_valid(gene): else: return False + @staticmethod + def is_canola_gene_valid(gene): + """This function verifies if canola gene (BnaC07g42830D) is valid + :param gene: + :return: + """ + if re.search(r"^Bna[AC]\d{2}g\d{5}[A-D]?$", gene, re.I): + return True + else: + return False + @staticmethod def is_arachis_gene_valid(gene): """This function verifies if arachis gene is valid: Adur10000_comp0_c0_seq1 diff --git a/api/utils/docking_utils.py b/api/utils/docking_utils.py old mode 100755 new mode 100644 diff --git a/config/BAR_API.cfg b/config/BAR_API.cfg old mode 100644 new mode 100755 index f6e0ab2..f05a5fe --- a/config/BAR_API.cfg +++ b/config/BAR_API.cfg @@ -15,6 +15,7 @@ SQLALCHEMY_BINDS = { 'arachis': 'mysql://root:root@localhost/arachis', 'brassica_rapa': 'mysql://root:root@localhost/brassica_rapa', 'cannabis': 'mysql://root:root@localhost/cannabis', + 'canola_nssnp' : 'mysql://root:root@localhost/canola_nssnp', 'dna_damage': 'mysql://root:root@localhost/dna_damage', 'embryo': 'mysql://root:root@localhost/embryo', 'eplant2': 'mysql://root:root@localhost/eplant2', @@ -24,6 +25,7 @@ SQLALCHEMY_BINDS = { 'eplant_tomato' : 'mysql://root:root@localhost/eplant_tomato', 'fastpheno' : 'mysql://root:root@localhost/fastpheno', 'germination': 'mysql://root:root@localhost/germination', + 'homologs_db' : 'mysql://root:root@localhost/homologs_db', 'kalanchoe': 'mysql://root:root@localhost/kalanchoe', 'klepikova': 'mysql://root:root@localhost/klepikova', 'llama3': 'mysql://root:root@localhost/llama3', diff --git a/config/databases/canola_nssnp.sql b/config/databases/canola_nssnp.sql new file mode 100644 index 0000000..46f1cb8 --- /dev/null +++ b/config/databases/canola_nssnp.sql @@ -0,0 +1,98 @@ +-- MySQL dump 10.13 Distrib 8.4.4, for Linux (x86_64) +-- +-- Host: localhost Database: canola_nssnp +-- ------------------------------------------------------ +-- Server version 8.4.4 + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!50503 SET NAMES utf8mb4 */; +/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; +/*!40103 SET TIME_ZONE='+00:00' */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; +/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; + +-- +-- Current Database: `canola_nssnp` +-- + +CREATE DATABASE /*!32312 IF NOT EXISTS*/ `canola_nssnp` /*!40100 DEFAULT CHARACTER SET utf8mb3 */ /*!80016 DEFAULT ENCRYPTION='N' */; + +USE `canola_nssnp`; + +-- +-- Table structure for table `protein_reference` +-- + +DROP TABLE IF EXISTS `protein_reference`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!50503 SET character_set_client = utf8mb4 */; +CREATE TABLE `protein_reference` ( + `protein_reference_id` int NOT NULL AUTO_INCREMENT, + `gene_identifier` varchar(45) NOT NULL, + `gene_name` varchar(45) DEFAULT NULL, + PRIMARY KEY (`protein_reference_id`) +) ENGINE=InnoDB AUTO_INCREMENT=63266 DEFAULT CHARSET=utf8mb3; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `protein_reference` +-- + +LOCK TABLES `protein_reference` WRITE; +/*!40000 ALTER TABLE `protein_reference` DISABLE KEYS */; +INSERT INTO `protein_reference` VALUES (1,'BnaC09g12820D','GSBRNA2T00000001001'),(2,'BnaC09g12810D','GSBRNA2T00000003001'),(3,'BnaC09g12800D','GSBRNA2T00000005001'),(4,'BnaC09g12790D','GSBRNA2T00000007001'),(5,'BnaC09g12780D','GSBRNA2T00000008001'),(6,'BnaC09g12770D','GSBRNA2T00000009001'),(7,'BnaC09g12760D','GSBRNA2T00000011001'),(8,'BnaC09g12750D','GSBRNA2T00000012001'),(9,'BnaC09g12740D','GSBRNA2T00000015001'),(10,'BnaC09g12730D','GSBRNA2T00000016001'),(63265,'BnaA07g31480D','GSBRNA2T00102721001'); +/*!40000 ALTER TABLE `protein_reference` ENABLE KEYS */; +UNLOCK TABLES; + +-- +-- Table structure for table `snps_to_protein` +-- + +DROP TABLE IF EXISTS `snps_to_protein`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!50503 SET character_set_client = utf8mb4 */; +CREATE TABLE `snps_to_protein` ( + `snps_reference_id` int NOT NULL AUTO_INCREMENT, + `protein_reference_id` int NOT NULL, + `transcript_pos` int NOT NULL, + `chromosome` varchar(25) NOT NULL, + `chromosomal_loci` int NOT NULL, + `ref_DNA` varchar(1) NOT NULL, + `alt_DNA` varchar(45) NOT NULL, + `aa_pos` int NOT NULL, + `ref_aa` varchar(3) NOT NULL, + `alt_aa` varchar(3) NOT NULL, + `type` varchar(50) NOT NULL, + `effect_impact` varchar(50) NOT NULL, + `transcript_biotype` varchar(45) DEFAULT NULL, + `alt_freq` decimal(10,5) NOT NULL, + PRIMARY KEY (`snps_reference_id`,`protein_reference_id`), + KEY `protein_fk_idx` (`protein_reference_id`), + CONSTRAINT `protein_fk` FOREIGN KEY (`protein_reference_id`) REFERENCES `protein_reference` (`protein_reference_id`) +) ENGINE=InnoDB AUTO_INCREMENT=327048 DEFAULT CHARSET=utf8mb3; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `snps_to_protein` +-- + +LOCK TABLES `snps_to_protein` WRITE; +/*!40000 ALTER TABLE `snps_to_protein` DISABLE KEYS */; +INSERT INTO `snps_to_protein` VALUES (142004,63265,52,'chrA07',21985117,'A','C',18,'Met','Leu','missense_variant','MODERATE','protein_coding',0.00990),(142005,63265,130,'chrA07',21985679,'G','T',44,'Gly','Cys','missense_variant','MODERATE','protein_coding',0.04930),(142006,63265,163,'chrA07',21985712,'C','G',55,'Leu','Val','missense_variant','MODERATE','protein_coding',0.09210),(142007,63265,268,'chrA07',21985817,'G','A',90,'Asp','Asn','missense_variant','MODERATE','protein_coding',0.00660),(142008,63265,284,'chrA07',21985833,'G','C',95,'Arg','Thr','missense_variant','MODERATE','protein_coding',0.01070),(142009,63265,433,'chrA07',21985982,'C','T',145,'Pro','Ser','missense_variant','MODERATE','protein_coding',0.05260),(142010,63265,442,'chrA07',21985991,'G','A',148,'Glu','Lys','missense_variant','MODERATE','protein_coding',0.05260),(142011,63265,445,'chrA07',21985994,'A','G',149,'Thr','Ala','missense_variant','MODERATE','protein_coding',0.05260),(142012,63265,486,'chrA07',21986035,'C','G',162,'His','Gln','missense_variant','MODERATE','protein_coding',0.08310),(142013,63265,500,'chrA07',21986049,'T','G',167,'Ile','Ser','missense_variant','MODERATE','protein_coding',0.08140),(142014,63265,526,'chrA07',21986075,'G','A',176,'Gly','Ser','missense_variant','MODERATE','protein_coding',0.08390),(142015,63265,541,'chrA07',21986090,'T','C',181,'Trp','Arg','missense_variant','MODERATE','protein_coding',0.01070),(142016,63265,634,'chrA07',21986183,'C','A',212,'Leu','Ile','missense_variant','MODERATE','protein_coding',0.08630),(142017,63265,640,'chrA07',21986189,'C','T',214,'Arg','Trp','missense_variant','MODERATE','protein_coding',0.08630),(142018,63265,694,'chrA07',21986243,'G','T',232,'Asp','Tyr','missense_variant','MODERATE','protein_coding',0.01070),(142019,63265,769,'chrA07',21986318,'T','C',257,'Tyr','His','missense_variant','MODERATE','protein_coding',0.05180),(142020,63265,848,'chrA07',21986397,'G','A',283,'Ser','Asn','missense_variant','MODERATE','protein_coding',0.05100),(142021,63265,859,'chrA07',21986408,'T','C',287,'Ser','Pro','missense_variant','MODERATE','protein_coding',0.05920),(142022,63265,889,'chrA07',21986438,'A','G',297,'Lys','Glu','missense_variant','MODERATE','protein_coding',0.06330),(142023,63265,979,'chrA07',21986528,'C','G',327,'His','Asp','missense_variant','MODERATE','protein_coding',0.01150),(142024,63265,995,'chrA07',21986544,'C','T',332,'Thr','Ile','missense_variant','MODERATE','protein_coding',0.01150),(142025,63265,1039,'chrA07',21986588,'C','T',347,'Leu','Phe','missense_variant','MODERATE','protein_coding',0.07070),(142026,63265,1042,'chrA07',21986591,'T','C',348,'Trp','Arg','missense_variant','MODERATE','protein_coding',0.01150),(142027,63265,1060,'chrA07',21986609,'A','G',354,'Asn','Asp','missense_variant','MODERATE','protein_coding',0.01150),(142028,63265,1075,'chrA07',21986624,'T','C',359,'Tyr','His','missense_variant','MODERATE','protein_coding',0.01150),(142029,63265,1267,'chrA07',21986816,'C','G',423,'Arg','Gly','missense_variant','MODERATE','protein_coding',0.07240),(142030,63265,1336,'chrA07',21986885,'C','G',446,'Arg','Gly','missense_variant','MODERATE','protein_coding',0.00330),(142031,63265,1363,'chrA07',21986912,'G','A',455,'Ala','Thr','missense_variant','MODERATE','protein_coding',0.32150),(142032,63265,1420,'chrA07',21986969,'T','A',474,'Ser','Thr','missense_variant','MODERATE','protein_coding',0.01810),(142033,63265,1462,'chrA07',21987011,'C','G',488,'Arg','Gly','missense_variant','MODERATE','protein_coding',0.04610),(142034,63265,1595,'chrA07',21987144,'A','C',532,'Glu','Ala','missense_variant','MODERATE','protein_coding',0.04440),(142035,63265,1735,'chrA07',21987284,'C','G',579,'His','Asp','missense_variant','MODERATE','protein_coding',0.29930),(142036,63265,1744,'chrA07',21987293,'T','C',582,'Tyr','His','missense_variant','MODERATE','protein_coding',0.02380),(142037,63265,1865,'chrA07',21987414,'C','T',622,'Ser','Phe','missense_variant','MODERATE','protein_coding',0.27380),(142038,63265,2077,'chrA07',21987626,'C','G',693,'His','Asp','missense_variant','MODERATE','protein_coding',0.00160),(142039,63265,2086,'chrA07',21987635,'C','A',696,'His','Asn','missense_variant','MODERATE','protein_coding',0.00160),(142040,63265,2089,'chrA07',21987638,'A','G',697,'Arg','Gly','missense_variant','MODERATE','protein_coding',0.00160),(142041,63265,2215,'chrA07',21987764,'A','T',739,'Arg','Trp','missense_variant','MODERATE','protein_coding',0.00160),(142042,63265,2227,'chrA07',21987776,'G','C',743,'Gly','Arg','missense_variant','MODERATE','protein_coding',0.00160),(142043,63265,2233,'chrA07',21987782,'T','C',745,'Tyr','His','missense_variant','MODERATE','protein_coding',0.00160),(142044,63265,2254,'chrA07',21987803,'C','T',752,'Leu','Phe','missense_variant','MODERATE','protein_coding',0.00160),(142045,63265,2323,'chrA07',21987872,'T','C',775,'Phe','Leu','missense_variant','MODERATE','protein_coding',0.28040),(142046,63265,2350,'chrA07',21987899,'G','A',784,'Gly','Arg','missense_variant','MODERATE','protein_coding',0.00160),(142047,63265,2395,'chrA07',21987944,'C','G',799,'Leu','Val','missense_variant','MODERATE','protein_coding',0.00330),(142048,63265,2425,'chrA07',21987974,'G','C',809,'Val','Leu','missense_variant','MODERATE','protein_coding',0.27300),(142049,63265,2434,'chrA07',21987983,'A','G',812,'Ile','Val','missense_variant','MODERATE','protein_coding',0.27300),(142050,63265,2446,'chrA07',21987995,'A','G',816,'Ile','Val','missense_variant','MODERATE','protein_coding',0.27380),(142051,63265,2464,'chrA07',21988013,'A','C',822,'Ser','Arg','missense_variant','MODERATE','protein_coding',0.27140),(142052,63265,2518,'chrA07',21988067,'C','G',840,'Arg','Gly','missense_variant','MODERATE','protein_coding',0.01150),(142053,63265,2521,'chrA07',21988070,'T','G',841,'Leu','Val','missense_variant','MODERATE','protein_coding',0.24180),(142054,63265,2545,'chrA07',21988094,'G','A',849,'Glu','Lys','missense_variant','MODERATE','protein_coding',0.31830),(142055,63265,2596,'chrA07',21988145,'A','G',866,'Arg','Gly','missense_variant','MODERATE','protein_coding',0.26320),(142056,63265,2632,'chrA07',21988181,'T','C',878,'Cys','Arg','missense_variant','MODERATE','protein_coding',0.26730),(142057,63265,2737,'chrA07',21988286,'G','A',913,'Gly','Arg','missense_variant','MODERATE','protein_coding',0.26640),(142058,63265,2770,'chrA07',21988319,'G','A',924,'Val','Met','missense_variant','MODERATE','protein_coding',0.27220),(142059,63265,2800,'chrA07',21988349,'C','T',934,'Arg','Cys','missense_variant','MODERATE','protein_coding',0.05260),(142060,63265,2848,'chrA07',21988397,'T','C',950,'Cys','Arg','missense_variant','MODERATE','protein_coding',0.27380),(142061,63265,2878,'chrA07',21988427,'T','C',960,'Ser','Pro','missense_variant','MODERATE','protein_coding',0.27300),(142062,63265,2881,'chrA07',21988430,'C','T',961,'Leu','Phe','missense_variant','MODERATE','protein_coding',0.02800),(142063,63265,2890,'chrA07',21988439,'G','A',964,'Ala','Thr','missense_variant','MODERATE','protein_coding',0.02800),(142064,63265,2908,'chrA07',21988457,'G','C',970,'Asp','His','missense_variant','MODERATE','protein_coding',0.01320),(142065,63265,2941,'chrA07',21988490,'A','T',981,'Arg','Trp','missense_variant','MODERATE','protein_coding',0.31170),(142066,63265,2968,'chrA07',21988517,'C','T',990,'Arg','Cys','missense_variant','MODERATE','protein_coding',0.02880),(142067,63265,3163,'chrA07',21988712,'G','A',1055,'Ala','Thr','missense_variant','MODERATE','protein_coding',0.27470),(142068,63265,3188,'chrA07',21988737,'C','G',1063,'Ala','Gly','missense_variant','MODERATE','protein_coding',0.27550),(142069,63265,3199,'chrA07',21988748,'A','T',1067,'Ser','Cys','missense_variant','MODERATE','protein_coding',0.32070),(142070,63265,3283,'chrA07',21988832,'G','T',1095,'Val','Phe','missense_variant','MODERATE','protein_coding',0.04360),(142071,63265,3289,'chrA07',21988838,'G','T',1097,'Asp','Tyr','missense_variant','MODERATE','protein_coding',0.04360),(327014,10,191,'chrC09',9191243,'G','A',64,'Ser','Phe','missense_variant','MODERATE','protein_coding',0.44000),(327015,10,188,'chrC09',9191246,'G','T',63,'Pro','Gln','missense_variant','MODERATE','protein_coding',0.00000),(327016,10,164,'chrC09',9191270,'C','T',55,'Arg','Lys','missense_variant','MODERATE','protein_coding',0.44160),(327017,10,43,'chrC09',9191391,'C','T',15,'Gly','Arg','missense_variant','MODERATE','protein_coding',0.43260),(327018,9,169,'chrC09',9195157,'T','C',57,'Phe','Leu','missense_variant','MODERATE','protein_coding',0.00130),(327019,9,457,'chrC09',9195518,'A','G',153,'Thr','Ala','missense_variant','MODERATE','protein_coding',0.00160),(327020,8,67,'chrC09',9230756,'C','T',23,'Arg','Cys','missense_variant','MODERATE','protein_coding',0.00350),(327021,8,81,'chrC09',9230770,'G','C',27,'Glu','Asp','missense_variant','MODERATE','protein_coding',0.43170),(327022,7,2450,'chrC09',9234630,'G','T',817,'Ala','Asp','missense_variant','MODERATE','protein_coding',0.00010),(327023,7,1718,'chrC09',9237814,'A','C',573,'Ile','Ser','missense_variant','MODERATE','protein_coding',0.00900),(327024,7,1408,'chrC09',9238193,'T','C',470,'Ile','Val','missense_variant','MODERATE','protein_coding',0.00740),(327025,7,865,'chrC09',9238736,'C','T',289,'Asp','Asn','missense_variant','MODERATE','protein_coding',0.00200),(327026,7,825,'chrC09',9238776,'G','T',275,'Asn','Lys','missense_variant','MODERATE','protein_coding',0.01230),(327027,7,763,'chrC09',9238838,'A','G',255,'Phe','Leu','missense_variant','MODERATE','protein_coding',0.00740),(327028,7,673,'chrC09',9238928,'C','T',225,'Glu','Lys','missense_variant','MODERATE','protein_coding',0.00660),(327029,7,612,'chrC09',9238989,'A','T',204,'Asn','Lys','missense_variant','MODERATE','protein_coding',0.00660),(327030,6,17,'chrC09',9267233,'A','T',6,'Lys','Ile','missense_variant','MODERATE','protein_coding',0.00000),(327031,6,305,'chrC09',9267521,'G','C',102,'Gly','Ala','missense_variant','MODERATE','protein_coding',0.00000),(327032,6,2093,'chrC09',9270136,'G','T',698,'Gly','Val','missense_variant','MODERATE','protein_coding',0.00490),(327033,5,661,'chrC09',9273716,'G','C',221,'Pro','Ala','missense_variant','MODERATE','protein_coding',0.00990),(327034,5,481,'chrC09',9274017,'A','T',161,'Leu','Met','missense_variant','MODERATE','protein_coding',0.00990),(327035,4,67,'chrC09',9282013,'C','A',23,'Val','Phe','missense_variant','MODERATE','protein_coding',0.00660),(327036,3,385,'chrC09',9301658,'T','A',129,'Phe','Ile','missense_variant','MODERATE','protein_coding',0.00660),(327037,3,419,'chrC09',9301692,'A','G',140,'Glu','Gly','missense_variant','MODERATE','protein_coding',0.00580),(327038,3,726,'chrC09',9301999,'C','G',242,'Phe','Leu','missense_variant','MODERATE','protein_coding',0.00580),(327039,3,896,'chrC09',9302169,'T','A',299,'Val','Asp','missense_variant','MODERATE','protein_coding',0.00250),(327040,3,1106,'chrC09',9302379,'G','T',369,'Arg','Ile','missense_variant','MODERATE','protein_coding',0.00510),(327041,3,1220,'chrC09',9302493,'A','T',407,'Glu','Val','missense_variant','MODERATE','protein_coding',0.00490),(327042,3,1307,'chrC09',9302580,'A','T',436,'His','Leu','missense_variant','MODERATE','protein_coding',0.00660),(327043,3,1576,'chrC09',9302849,'T','A',526,'Cys','Ser','missense_variant','MODERATE','protein_coding',0.00820),(327044,3,1697,'chrC09',9302970,'A','C',566,'Asn','Thr','missense_variant','MODERATE','protein_coding',0.00660),(327045,2,200,'chrC09',9311197,'A','C',67,'His','Pro','missense_variant','MODERATE','protein_coding',0.00820),(327046,1,1319,'chrC09',9319194,'G','A',440,'Ser','Leu','missense_variant','MODERATE','protein_coding',0.00660),(327047,1,1085,'chrC09',9319428,'C','T',362,'Arg','Gln','missense_variant','MODERATE','protein_coding',0.00660); +/*!40000 ALTER TABLE `snps_to_protein` ENABLE KEYS */; +UNLOCK TABLES; +/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; +/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; + +-- Dump completed on 2025-03-29 19:28:13 diff --git a/config/databases/homologs_db.sql b/config/databases/homologs_db.sql new file mode 100644 index 0000000..29a273a --- /dev/null +++ b/config/databases/homologs_db.sql @@ -0,0 +1,66 @@ +-- MySQL dump 10.13 Distrib 8.4.4, for Linux (x86_64) +-- +-- Host: localhost Database: homologs_db +-- ------------------------------------------------------ +-- Server version 8.4.4 + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!50503 SET NAMES utf8mb4 */; +/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; +/*!40103 SET TIME_ZONE='+00:00' */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; +/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; + +-- +-- Current Database: `homologs_db` +-- + +CREATE DATABASE /*!32312 IF NOT EXISTS*/ `homologs_db` /*!40100 DEFAULT CHARACTER SET utf8mb3 */ /*!80016 DEFAULT ENCRYPTION='N' */; + +USE `homologs_db`; + +-- +-- Table structure for table `homologs` +-- + +DROP TABLE IF EXISTS `homologs`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!50503 SET character_set_client = utf8mb4 */; +CREATE TABLE `homologs` ( + `homologs_id` int NOT NULL AUTO_INCREMENT, + `search_protein_name` varchar(45) NOT NULL, + `result_protein_name` varchar(45) NOT NULL, + `search_species_name` varchar(45) NOT NULL, + `result_species_name` varchar(45) NOT NULL, + `Percent_id` decimal(10,5) NOT NULL, + `e_score` varchar(10) NOT NULL, + `is_search_structure` int NOT NULL, + `is_result_structure` int NOT NULL, + PRIMARY KEY (`homologs_id`) +) ENGINE=InnoDB AUTO_INCREMENT=1356306 DEFAULT CHARSET=utf8mb3; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `homologs` +-- + +LOCK TABLES `homologs` WRITE; +/*!40000 ALTER TABLE `homologs` DISABLE KEYS */; +INSERT INTO `homologs` VALUES (1,'AT5G16970.1','BnaC09g40930D','arabidopsis','canola',86.04700,'0.0',0,0),(2,'AT5G16970.1','BnaA10g17570D','arabidopsis','canola',86.04700,'0.0',0,0),(3,'AT5G16970.1','BnaC03g08130D','arabidopsis','canola',85.08800,'0.0',0,0),(4,'AT5G16970.1','BnaA05g32330D','arabidopsis','canola',81.63300,'0.0',0,0),(5,'AT5G16970.1','BnaCnng06210D','arabidopsis','canola',80.87000,'0.0',0,0),(6,'AT5G16970.1','BnaA03g06330D','arabidopsis','canola',85.37300,'0.0',0,0),(7,'AT5G16970.1','BnaA09g28840D','arabidopsis','canola',77.90700,'0.0',0,0),(8,'AT5G16970.1','BnaC05g20410D','arabidopsis','canola',77.32600,'0.0',0,0),(9,'AT5G16970.1','BnaC02g06960D','arabidopsis','canola',76.23200,'0.0',0,0),(10,'AT5G16970.1','BnaA02g03340D','arabidopsis','canola',75.94200,'0.0',0,0),(11,'AT4G32100.1','BnaC01g06360D','arabidopsis','canola',55.28500,'2.18e-39',0,0),(12,'AT4G32100.1','BnaA01g04810D','arabidopsis','canola',55.28500,'1.46e-38',0,0),(13,'AT4G32100.1','BnaA01g04820D','arabidopsis','canola',52.84600,'6.47e-38',0,0),(14,'AT4G32100.1','BnaAnng05770D','arabidopsis','canola',53.33300,'1.23e-37',0,0),(15,'AT4G32100.1','BnaC01g06960D','arabidopsis','canola',51.61300,'1.19e-36',0,0),(16,'AT4G32100.1','BnaC07g18640D','arabidopsis','canola',51.66700,'6.37e-36',0,0),(17,'AT4G32100.1','BnaA01g34870D','arabidopsis','canola',49.19400,'4.15e-35',0,0),(18,'AT4G32100.1','BnaA02g24080D','arabidopsis','canola',49.13800,'3.63e-27',0,0),(19,'AT4G32100.1','BnaA02g24090D','arabidopsis','canola',48.27600,'6.69e-27',0,0),(20,'AT4G32100.1','BnaC02g31830D','arabidopsis','canola',47.41400,'2.72e-25',0,0),(21,'AT2G43120.2','BnaC04g02730D','arabidopsis','canola',90.96600,'0.0',0,0),(22,'AT2G43120.2','BnaC03g24040D','arabidopsis','canola',91.27700,'0.0',0,0),(23,'AT2G43120.2','BnaA03g20050D','arabidopsis','canola',90.65400,'0.0',0,0),(86750,'AT1G74360.1','BnaC06g35270D','arabidopsis','canola',84.27700,'0.0',0,0),(86751,'AT1G74360.1','BnaA07g31480D','arabidopsis','canola',84.02200,'0.0',0,0),(86752,'AT1G74360.1','BnaA02g11590D','arabidopsis','canola',77.97100,'0.0',0,0),(86753,'AT1G74360.1','BnaCnng50590D','arabidopsis','canola',77.97100,'0.0',0,0),(86754,'AT1G74360.1','BnaA08g00410D','arabidopsis','canola',31.84900,'5.39e-154',0,0),(86755,'AT1G74360.1','BnaA06g11580D','arabidopsis','canola',33.86600,'3.20e-153',0,0),(86756,'AT1G74360.1','BnaA02g16780D','arabidopsis','canola',67.03000,'2.11e-152',0,0),(86757,'AT1G74360.1','BnaA06g34400D','arabidopsis','canola',32.93200,'1.50e-151',0,0),(86758,'AT1G74360.1','BnaA08g16520D','arabidopsis','canola',33.89500,'1.68e-150',0,0),(86759,'AT1G74360.1','BnaC07g47240D','arabidopsis','canola',33.88700,'3.05e-150',0,0),(690828,'BnaA07g31480D','AT1G74360.1','canola','arabidopsis',84.02200,'0.0',0,0),(690829,'BnaA07g31480D','AT2G01950.1','canola','arabidopsis',32.34200,'3.57e-149',0,0),(690830,'BnaA07g31480D','AT1G55610.1','canola','arabidopsis',31.71500,'9.47e-148',0,0),(690831,'BnaA07g31480D','AT1G55610.2','canola','arabidopsis',31.71500,'9.47e-148',0,0),(690832,'BnaA07g31480D','AT1G17230.1','canola','arabidopsis',32.44000,'7.99e-146',0,0),(690833,'BnaA07g31480D','AT1G17230.2','canola','arabidopsis',32.44000,'1.34e-145',0,0),(690834,'BnaA07g31480D','AT4G39400.1','canola','arabidopsis',33.15200,'5.71e-145',0,0),(690835,'BnaA07g31480D','AT3G13380.1','canola','arabidopsis',32.44600,'3.05e-143',0,0),(690836,'BnaA07g31480D','AT5G63930.1','canola','arabidopsis',33.05600,'1.84e-142',0,0),(690837,'BnaA07g31480D','AT5G63930.2','canola','arabidopsis',33.23900,'3.94e-141',0,0),(824264,'BnaC07g23540D','AT5G65470.1','canola','arabidopsis',39.72900,'9.91e-109',0,0),(824265,'BnaC07g23540D','AT4G24530.1','canola','arabidopsis',36.96500,'1.03e-108',0,0),(824266,'BnaC07g23540D','AT2G01480.1','canola','arabidopsis',41.78400,'2.97e-107',0,0),(824267,'BnaC07g23540D','AT2G01480.2','canola','arabidopsis',43.07700,'6.19e-104',0,0),(824268,'BnaC07g23540D','AT1G14970.2','canola','arabidopsis',40.00000,'1.03e-103',0,0),(824269,'BnaC07g23540D','AT1G14970.1','canola','arabidopsis',40.00000,'2.33e-102',0,0),(824270,'BnaC07g23540D','AT1G14970.3','canola','arabidopsis',40.24700,'2.02e-101',0,0),(824271,'BnaC07g23540D','AT1G38065.2','canola','arabidopsis',39.86200,'2.00e-95',0,0),(824272,'BnaC07g23550D','AT3G26400.1','canola','arabidopsis',77.19600,'0.0',0,0),(824273,'BnaC07g23550D','AT1G13020.1','canola','arabidopsis',67.31400,'1.71e-180',0,0),(824274,'BnaC07g23560D','AT3G26410.1','canola','arabidopsis',93.21100,'0.0',0,0),(824275,'BnaC07g23570D','AT3G26420.1','canola','arabidopsis',79.75700,'1.69e-126',0,0),(824276,'BnaC07g23570D','AT2G21660.1','canola','arabidopsis',60.49400,'1.99e-30',0,0),(824277,'BnaC07g23570D','AT4G39260.3','canola','arabidopsis',57.31700,'6.84e-30',0,0),(824278,'BnaC07g23570D','AT5G04280.1','canola','arabidopsis',41.95100,'2.88e-29',0,0),(824279,'BnaC07g23570D','AT4G39260.2','canola','arabidopsis',58.02500,'4.78e-29',0,0),(824280,'BnaC07g23570D','AT1G60650.2','canola','arabidopsis',40.44100,'3.40e-28',0,0),(824281,'BnaC07g23570D','AT1G60650.1','canola','arabidopsis',40.44100,'3.40e-28',0,0),(824282,'BnaC07g23570D','AT4G39260.4','canola','arabidopsis',56.06100,'2.46e-21',0,0),(824283,'BnaC07g23580D','AT3G26430.1','canola','arabidopsis',84.21100,'0.0',0,0),(824284,'BnaC07g23580D','AT1G67830.1','canola','arabidopsis',60.58800,'2.97e-155',0,0),(824285,'BnaC07g23580D','AT5G14450.1','canola','arabidopsis',50.66000,'4.50e-131',0,0),(1320148,'BnaA10g09850D','BnaA10g09850D','canola','canola',100.00000,'5.68e-110',0,0),(1320149,'BnaA10g09850D','BnaC09g32290D','canola','canola',93.59000,'5.28e-93',0,0),(1320150,'BnaA10g09850D','BnaC02g12890D','canola','canola',72.85700,'3.68e-26',0,0),(1320151,'BnaA10g09860D','BnaA10g09860D','canola','canola',100.00000,'0.0',0,0),(1320152,'BnaA10g09860D','BnaC09g32300D','canola','canola',86.68300,'0.0',0,0),(1320153,'BnaA10g09860D','BnaA02g08940D','canola','canola',69.50900,'0.0',0,0),(1320154,'BnaA10g09860D','BnaC02g12870D','canola','canola',83.58500,'0.0',0,0),(1320155,'BnaA10g09860D','BnaC08g12230D','canola','canola',58.34400,'1.15e-177',0,0),(1320156,'BnaA10g09860D','BnaA01g15750D','canola','canola',55.24900,'1.77e-162',0,0),(1320157,'BnaA10g09860D','BnaC01g18800D','canola','canola',62.35600,'5.48e-116',0,0),(1320158,'BnaA10g09860D','BnaA06g19020D','canola','canola',34.15200,'4.23e-50',0,0),(1320159,'BnaA10g09860D','BnaC09g05960D','canola','canola',44.25500,'2.79e-46',0,0),(1320160,'BnaA10g09860D','BnaA06g22700D','canola','canola',42.16900,'2.31e-43',0,0),(1320161,'BnaA10g09870D','BnaA10g09870D','canola','canola',100.00000,'0.0',0,0),(1320162,'BnaA10g09870D','BnaC09g32310D','canola','canola',90.82800,'0.0',0,0),(1320163,'BnaA10g09870D','BnaA02g08930D','canola','canola',72.72700,'0.0',0,0),(1320164,'BnaA10g09870D','BnaC02g12860D','canola','canola',70.51700,'0.0',0,0),(1320165,'BnaA10g09870D','BnaA03g11280D','canola','canola',70.32600,'0.0',0,0),(1320166,'BnaA10g09870D','BnaC03g71710D','canola','canola',87.03700,'1.49e-113',0,0),(1320167,'BnaA10g09880D','BnaA10g09880D','canola','canola',100.00000,'0.0',0,0),(1320168,'BnaA10g09880D','BnaC09g32320D','canola','canola',98.12600,'0.0',0,0),(1320169,'BnaA10g09880D','BnaA02g08870D','canola','canola',85.38300,'0.0',0,0),(1320170,'BnaA10g09880D','BnaA01g20560D','canola','canola',51.22000,'1.02e-130',0,0),(1320171,'BnaA10g09880D','BnaC01g25850D','canola','canola',51.81100,'9.19e-130',0,0),(1320172,'BnaA10g09880D','BnaA06g19130D','canola','canola',51.24000,'4.00e-128',0,0),(1356296,'BnaA07g31480D','BnaA07g31480D','canola','canola',100.00000,'0.0',0,0),(1356297,'BnaA07g31480D','BnaC06g35270D','canola','canola',96.79400,'0.0',0,0),(1356298,'BnaA07g31480D','BnaCnng50590D','canola','canola',75.29100,'5.45e-180',0,0),(1356299,'BnaA07g31480D','BnaA02g11590D','canola','canola',74.41900,'3.01e-178',0,0),(1356300,'BnaA07g31480D','BnaC05g13410D','canola','canola',32.51300,'7.21e-153',0,0),(1356301,'BnaA07g31480D','BnaA06g11580D','canola','canola',33.08800,'1.36e-152',0,0),(1356302,'BnaA07g31480D','BnaC07g47240D','canola','canola',33.42600,'6.47e-151',0,0),(1356303,'BnaA07g31480D','BnaA01g05490D','canola','canola',33.58100,'3.69e-148',0,0),(1356304,'BnaA07g31480D','BnaC07g21390D','canola','canola',32.23900,'2.77e-147',0,0),(1356305,'BnaA07g31480D','BnaA06g34400D','canola','canola',31.89600,'3.39e-147',0,0); +/*!40000 ALTER TABLE `homologs` ENABLE KEYS */; +UNLOCK TABLES; +/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; +/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; + +-- Dump completed on 2025-03-29 19:28:31 diff --git a/config/init.sh b/config/init.sh index 23bd482..68aff95 100755 --- a/config/init.sh +++ b/config/init.sh @@ -14,6 +14,7 @@ mysql -u $DB_USER -p$DB_PASS < ./config/databases/arabidopsis_ecotypes.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/arachis.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/brassica_rapa.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/cannabis.sql +mysql -u $DB_USER -p$DB_PASS < ./config/databases/canola_nssnp.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/dna_damage.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/embryo.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/eplant2.sql @@ -23,6 +24,7 @@ mysql -u $DB_USER -p$DB_PASS < ./config/databases/eplant_soybean.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/eplant_tomato.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/fastpheno.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/germination.sql +mysql -u $DB_USER -p$DB_PASS < ./config/databases/homologs_db.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/kalanchoe.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/klepikova.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/llama3.sql @@ -41,7 +43,6 @@ mysql -u $DB_USER -p$DB_PASS < ./config/databases/tomato_nssnp.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/tomato_sequence.sql mysql -u $DB_USER -p$DB_PASS < ./config/databases/triphysaria.sql - echo "Data are now loaded. Preparing API config" echo "Please manually edit config file!" diff --git a/docker-compose.yml b/docker-compose.yml index 2e690b5..5559c36 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,14 +1,14 @@ services: mysqldb: - image: mysql:9.0.1 + image: mysql:9.3.0 container_name: BAR_mysqldb restart: always environment: - MYSQL_ROOT_PASSWORD=root redis: - image: redis:7.2.5 + image: redis:7.4.2 container_name: BAR_redis restart: always ports: diff --git a/docs/requirements.txt b/docs/requirements.txt index 5bcfd2f..f73d28e 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,27 +1,31 @@ -alabaster==0.7.16 -Babel==2.14.0 -beautifulsoup4==4.12.3 -certifi==2024.2.2 -charset-normalizer==3.3.2 +alabaster==1.0.0 +babel==2.17.0 +beautifulsoup4==4.13.4 +certifi==2025.1.31 +charset-normalizer==3.4.1 docutils==0.21.2 -furo==2024.4.27 -idna==3.7 +furo==2024.8.6 +idna==3.10 imagesize==1.4.1 -Jinja2==3.1.3 -MarkupSafe==2.1.5 -packaging==24.0 -Pygments==2.17.2 -pytz==2024.1 -requests==2.31.0 +Jinja2==3.1.6 +MarkupSafe==3.0.2 +packaging==25.0 +Pygments==2.19.1 +pytz==2025.2 +requests==2.32.3 +roman-numerals-py==3.1.0 +setuptools==78.1.1 snowballstemmer==2.2.0 -soupsieve==2.5 -Sphinx==7.3.7 -sphinx-basic-ng==1.0.0b1 +soupsieve==2.6 +Sphinx==8.2.3 +sphinx-basic-ng==1.0.0b2 sphinx-copybutton==0.5.2 -sphinxcontrib-applehelp==1.0.8 -sphinxcontrib-devhelp==1.0.6 -sphinxcontrib-htmlhelp==2.0.5 +sphinxcontrib-applehelp==2.0.0 +sphinxcontrib-devhelp==2.0.0 +sphinxcontrib-htmlhelp==2.1.0 sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.7 -sphinxcontrib-serializinghtml==1.1.10 -urllib3==2.2.1 +sphinxcontrib-qthelp==2.0.0 +sphinxcontrib-serializinghtml==2.0.0 +typing_extensions==4.13.2 +urllib3==2.4.0 +wheel==0.45.1 diff --git a/requirements.txt b/requirements.txt index 2657891..41ef7a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,34 +1,34 @@ -aniso8601==10.0.0 +aniso8601==10.0.1 async-timeout==5.0.1 -attrs==25.1.0 +attrs==25.3.0 black==25.1.0 blinker==1.9.0 cachelib==0.9.0 certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 -coverage==7.6.10 +coverage==7.8.0 Deprecated==1.2.18 -flake8==7.1.1 +flake8==7.2.0 Flask==3.1.0 -Flask-Caching==2.3.0 -Flask-Cors==5.0.0 -Flask-Limiter==3.10.1 +Flask-Caching==2.3.1 +flask-cors==5.0.1 +Flask-Limiter==3.12 flask-marshmallow==1.3.0 flask-restx==1.3.0 Flask-SQLAlchemy==3.1.1 -greenlet==3.1.1 +greenlet==3.2.0 idna==3.10 importlib_resources==6.5.2 -iniconfig==2.0.0 +iniconfig==2.1.0 itsdangerous==2.2.0 -Jinja2==3.1.5 +Jinja2==3.1.6 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 -limits==4.0.1 +limits==5.0.0 markdown-it-py==3.0.0 MarkupSafe==3.0.2 -marshmallow==3.26.1 +marshmallow==4.0.0 mccabe==0.7.0 mdurl==0.1.2 mypy-extensions==1.0.0 @@ -36,25 +36,25 @@ mysqlclient==2.2.7 ordered-set==4.1.0 packaging==24.2 pathspec==0.12.1 -platformdirs==4.3.6 +platformdirs==4.3.7 pluggy==1.5.0 -pycodestyle==2.12.1 -pyflakes==3.2.0 +pycodestyle==2.13.0 +pyflakes==3.3.2 Pygments==2.19.1 pyrsistent==0.20.0 -pytest==8.3.4 +pytest==8.3.5 python-dateutil==2.9.0.post0 -pytz==2025.1 +pytz==2025.2 redis==5.2.1 referencing==0.36.2 requests==2.32.3 rich==13.9.4 -rpds-py==0.22.3 -setuptools==75.8.0 +rpds-py==0.24.0 +setuptools==78.1.1 six==1.17.0 -SQLAlchemy==2.0.38 -typing_extensions==4.12.2 -urllib3==2.3.0 +SQLAlchemy==2.0.40 +typing_extensions==4.13.2 +urllib3==2.4.0 Werkzeug==3.1.3 wheel==0.45.1 wrapt==1.17.2 diff --git a/tests/data/get_canola_homolog_information.json b/tests/data/get_canola_homolog_information.json new file mode 100644 index 0000000..e11763c --- /dev/null +++ b/tests/data/get_canola_homolog_information.json @@ -0,0 +1,47 @@ +{ + "wasSuccessful": true, + "data": { + "homologs": [ + { + "search_species_name": "arabidopsis", + "search_protein_name": "AT5G16970.1", + "result_species_name": "canola", + "result_protein_name": "BnaC09g40930D", + "Percent_id": 86.047, + "e_score": 0.0 + }, + { + "search_species_name": "arabidopsis", + "search_protein_name": "AT5G16970.1", + "result_species_name": "canola", + "result_protein_name": "BnaA10g17570D", + "Percent_id": 86.047, + "e_score": 0.0 + }, + { + "search_species_name": "arabidopsis", + "search_protein_name": "AT5G16970.1", + "result_species_name": "canola", + "result_protein_name": "BnaC03g08130D", + "Percent_id": 85.088, + "e_score": 0.0 + }, + { + "search_species_name": "arabidopsis", + "search_protein_name": "AT5G16970.1", + "result_species_name": "canola", + "result_protein_name": "BnaA05g32330D", + "Percent_id": 81.633, + "e_score": 0.0 + }, + { + "search_species_name": "arabidopsis", + "search_protein_name": "AT5G16970.1", + "result_species_name": "canola", + "result_protein_name": "BnaCnng06210D", + "Percent_id": 80.87, + "e_score": 0.0 + } + ] + } +} diff --git a/tests/resources/test_proxy.py b/tests/resources/test_proxy.py index f7467b8..aca80e5 100644 --- a/tests/resources/test_proxy.py +++ b/tests/resources/test_proxy.py @@ -25,7 +25,6 @@ def test_get_atted_api5(self): # If no data, the service should return this response response = self.app_client.get("/proxy/atted_api5/At1g01011/5") - expected = {"error": "No gene ID specified.", "status_code": 400} self.assertEqual(response.json, expected) diff --git a/tests/resources/test_snps.py b/tests/resources/test_snps.py index 61e6d4b..b634a88 100644 --- a/tests/resources/test_snps.py +++ b/tests/resources/test_snps.py @@ -1,6 +1,7 @@ from api import app from unittest import TestCase import pytest +from json import load class TestIntegrations(TestCase): @@ -95,6 +96,31 @@ def test_get_snps(self): } self.assertEqual(response.json, expected) + # Valid request canola + response = self.app_client.get("/snps/canola/BnaC09g12790D") + expected = { + "wasSuccessful": True, + "data": [ + [ + "chrC09", + 22, + None, + "missense_variant", + "MODERATE", + "MISSENSE", + "67C>A", + "ValPhe", + None, + "BnaC09g12790D", + "protein_coding", + "CODING", + "GSBRNA2T00000007001", + 0.0066, + ] + ], + } + self.assertEqual(response.json, expected) + # Invalid gene id response = self.app_client.get("/snps/poplar/abc") expected = {"wasSuccessful": False, "error": "Invalid gene id"} @@ -108,6 +134,19 @@ def test_get_snps(self): } self.assertEqual(response.json, expected) + # Invalid gene id for canola + response = self.app_client.get("/snps/canola/abc") + expected = {"wasSuccessful": False, "error": "Invalid gene id"} + self.assertEqual(response.json, expected) + + # Gene does not exist for canola + response = self.app_client.get("/snps/canola/BnaC07g99930D") + expected = { + "wasSuccessful": False, + "error": "There are no data found for the given gene", + } + self.assertEqual(response.json, expected) + def test_get_sample_definitions(self): """ Test cases for sample definition @@ -218,3 +257,41 @@ def test_pymol_snps_pymol_unneeded(self): response = self.app_client.get("/snps/pymol/Potri.016G107900.1?snps=25l&chain=None") expected = {"wasSuccessful": False, "error": "Invalid SNP string format"} self.assertEqual(response.json, expected) + + def test_homologs(self): + + # test for get homologs + response = self.app_client.get("/snps/homologs/arabidopsis/AT5G16970.1/canola") + with open("tests/data/get_canola_homolog_information.json") as file: + expected = load(file) + self.assertEqual(response.json, expected) + + # test for invalid input + response = self.app_client.get("/snps/homologs/rice/AT3G18710.1/canola") + expected = { + "wasSuccessful": False, + "error": "Species not supported", + } + self.assertEqual(response.json, expected) + + response = self.app_client.get("/snps/homologs/arabidopsis/AT3G18710.1/rice") + expected = { + "wasSuccessful": False, + "error": "Species not supported", + } + self.assertEqual(response.json, expected) + + response = self.app_client.get("/snps/homologs/arabidopsis/abc/canola") + expected = { + "wasSuccessful": False, + "error": "Invalid gene id", + } + self.assertEqual(response.json, expected) + + # test for no homologs data + response = self.app_client.get("/snps/homologs/arabidopsis/AT3G18710.1/canola") + expected = { + "wasSuccessful": False, + "error": "No homologs found for the given query", + } + self.assertEqual(response.json, expected)