Skip to content

Commit 8e3fd35

Browse files
committed
Deleted some unused files, renamed efp proxy to gene_expression
1 parent a404be1 commit 8e3fd35

File tree

13 files changed

+4269
-579
lines changed

13 files changed

+4269
-579
lines changed

api/Archive/embryo_efp_feb_6_2025_dump.sql

Lines changed: 79 additions & 0 deletions
Large diffs are not rendered by default.

api/Archive/sample_data_results.csv

Lines changed: 1729 additions & 0 deletions
Large diffs are not rendered by default.

api/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def create_app():
146146
from api.resources.efp_image import efp_image
147147
from api.resources.fastpheno import fastpheno
148148
from api.resources.llama3 import llama3
149-
from api.resources.efp_proxy import efp_proxy_ns
149+
from api.resources.efp_gene_expression import gene_expression
150150

151151
bar_api.add_namespace(gene_information)
152152
bar_api.add_namespace(rnaseq_gene_expression)
@@ -161,7 +161,7 @@ def create_app():
161161
bar_api.add_namespace(efp_image)
162162
bar_api.add_namespace(fastpheno)
163163
bar_api.add_namespace(llama3)
164-
bar_api.add_namespace(efp_proxy_ns)
164+
bar_api.add_namespace(gene_expression)
165165
bar_api.init_app(bar_app)
166166
return bar_app
167167

api/models/efp_schemas.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,44 @@ def _schema(species: str, charset: str = "latin1") -> DatabaseSpec:
234234
("willow", "willow"),
235235
]
236236

237+
# databases that store Affymetrix/microarray probeset IDs instead of gene identifiers.
238+
# For Arabidopsis databases in this set, the API will auto-convert AGI → probeset
239+
# via the at_agi_lookup service before querying expression data.
240+
_PROBESET_DBS = {
241+
# Arabidopsis microarray databases (Affymetrix ATH1 chip, need AGI→probeset lookup)
242+
"affydb",
243+
"arabidopsis_ecotypes",
244+
"atgenexp",
245+
"atgenexp_hormone",
246+
"atgenexp_pathogen",
247+
"atgenexp_plus",
248+
"atgenexp_stress",
249+
"guard_cell",
250+
"hnahal",
251+
"lateral_root_initiation",
252+
"light_series",
253+
"meristem_db",
254+
"meristem_db_new",
255+
"root",
256+
"rohan",
257+
"rpatel",
258+
"seed_db",
259+
# Non-Arabidopsis microarray databases (probeset IDs, no AGI lookup needed)
260+
"barley_mas",
261+
"barley_rma",
262+
"human_developmental",
263+
"human_developmental_SpongeLab",
264+
"human_diseased",
265+
"maize_gdowns",
266+
"medicago_mas",
267+
"medicago_rma",
268+
"poplar",
269+
"rice_mas",
270+
"rice_rma",
271+
"triticale",
272+
"triticale_mas",
273+
}
274+
237275
# databases that use utf8mb4 charset (all others default to latin1)
238276
_UTF8MB4 = {
239277
"actinidia_bud_development", "actinidia_flower_fruit_development",
@@ -281,7 +319,10 @@ def _schema(species: str, charset: str = "latin1") -> DatabaseSpec:
281319
# fmt: on
282320

283321
SIMPLE_EFP_DATABASE_SCHEMAS: Dict[str, DatabaseSpec] = {
284-
n: _schema(s, "utf8mb4" if n in _UTF8MB4 else "latin1")
322+
n: {
323+
**_schema(s, "utf8mb4" if n in _UTF8MB4 else "latin1"),
324+
**({"identifier_type": "probeset"} if n in _PROBESET_DBS else {}),
325+
}
285326
for n, s in _SPECS
286327
}
287328

api/resources/efp_proxy.py

Lines changed: 0 additions & 46 deletions
This file was deleted.

api/resources/gene_expression.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from flask_restx import Namespace, Resource
2+
from markupsafe import escape
3+
4+
from api import db
5+
from api.models.annotations_lookup import AtAgiLookup
6+
from api.services.efp_data import query_efp_database_dynamic, DYNAMIC_DATABASE_SCHEMAS
7+
8+
gene_expression = Namespace(
9+
'Gene Expression',
10+
description='Gene expression data from BAR eFP databases',
11+
path='/gene_expression',
12+
)
13+
14+
15+
@gene_expression.route("/expression/<string:database>/<string:gene_id>")
16+
@gene_expression.doc(
17+
description="Retrieve gene expression values from a specified eFP database."
18+
)
19+
@gene_expression.param(
20+
"gene_id",
21+
"Gene ID (AGI format like AT1G01010 or probeset like 261585_at)",
22+
_in="path",
23+
default="AT1G01010",
24+
)
25+
@gene_expression.param(
26+
"database",
27+
"Database name (e.g., sample_data, klepikova, single_cell)",
28+
_in="path",
29+
default="klepikova",
30+
)
31+
class GeneExpression(Resource):
32+
def get(self, database, gene_id):
33+
34+
database = escape(database)
35+
gene_id = escape(gene_id)
36+
37+
upper_id = gene_id.upper()
38+
is_agi = upper_id.startswith("AT") and "G" in upper_id
39+
40+
# for databases that store probeset IDs, convert AGI to probeset via at_agi_lookup
41+
schema = DYNAMIC_DATABASE_SCHEMAS.get(str(database))
42+
if schema and is_agi and schema.get("identifier_type") == "probeset":
43+
subquery = (
44+
db.select(AtAgiLookup.probeset)
45+
.where(AtAgiLookup.agi == upper_id)
46+
.order_by(AtAgiLookup.date.desc())
47+
.limit(1)
48+
.subquery()
49+
)
50+
sq_query = db.session.query(subquery)
51+
if sq_query.count() > 0:
52+
gene_id = sq_query[0][0]
53+
else:
54+
return {"success": False, "error": f"No probeset found for {gene_id}", "error_code": 404}, 404
55+
56+
result = query_efp_database_dynamic(database, gene_id, sample_ids=None)
57+
58+
if result["success"]:
59+
return result
60+
else:
61+
return result, result.get("error_code", 500)
62+
63+
64+
gene_expression.add_resource(GeneExpression, '/expression/<string:database>/<string:gene_id>')

0 commit comments

Comments
 (0)