|
1 | 1 | from flask_restx import Namespace, Resource |
2 | 2 | from markupsafe import escape |
3 | 3 |
|
4 | | -from api import db |
5 | | -from api.models.annotations_lookup import AtAgiLookup |
6 | | -from api.services.efp_data import query_efp_database_dynamic, DYNAMIC_DATABASE_SCHEMAS |
| 4 | +from api.services.efp_data import query_efp_database_dynamic |
| 5 | +from api.utils.bar_utils import BARUtils |
| 6 | +from api.utils.gene_id_utils import ( |
| 7 | + CROSS_SPECIES_DATABASES, |
| 8 | + DATABASE_SPECIES, |
| 9 | + PROBESET_DATABASES, |
| 10 | + convert_gene_to_probeset, |
| 11 | + is_probeset_id, |
| 12 | + normalize_gene_id, |
| 13 | + validate_gene_id, |
| 14 | +) |
7 | 15 |
|
8 | 16 | gene_expression = Namespace( |
9 | 17 | 'Gene Expression', |
|
18 | 26 | ) |
19 | 27 | @gene_expression.param( |
20 | 28 | "gene_id", |
21 | | - "Gene ID (AGI format like AT1G01010 or probeset like 261585_at)", |
| 29 | + "Gene ID (e.g. AT1G01010 for Arabidopsis, or a probeset like 261585_at)", |
22 | 30 | _in="path", |
23 | 31 | default="AT1G01010", |
24 | 32 | ) |
25 | 33 | @gene_expression.param( |
26 | 34 | "database", |
27 | | - "Database name (e.g., sample_data, klepikova, single_cell)", |
| 35 | + "Database name (e.g. klepikova, atgenexp, sample_data)", |
28 | 36 | _in="path", |
29 | 37 | default="klepikova", |
30 | 38 | ) |
31 | 39 | class GeneExpression(Resource): |
32 | 40 | def get(self, database, gene_id): |
| 41 | + database = str(escape(database)) |
| 42 | + gene_id = str(escape(gene_id)) |
| 43 | + |
| 44 | + # 1. Resolve database species and expected input species. |
| 45 | + # Cross-species databases (e.g. phelipanche) accept an Arabidopsis AGI |
| 46 | + # even though the database itself belongs to a different species. |
| 47 | + species = DATABASE_SPECIES.get(database) |
| 48 | + if species is None: |
| 49 | + return BARUtils.error_exit(f"Unknown database '{database}'"), 400 |
| 50 | + input_species = CROSS_SPECIES_DATABASES.get(database, species) |
33 | 51 |
|
34 | | - database = escape(database) |
35 | | - gene_id = escape(gene_id) |
| 52 | + # 2. If the caller already supplied a probeset ID, use it directly |
| 53 | + if is_probeset_id(gene_id): |
| 54 | + query_id = gene_id |
| 55 | + else: |
| 56 | + # 3. Validate gene ID format against the expected input species regex |
| 57 | + if not validate_gene_id(gene_id, input_species): |
| 58 | + return BARUtils.error_exit(f"Invalid {input_species} gene ID: '{gene_id}'"), 400 |
36 | 59 |
|
37 | | - upper_id = gene_id.upper() |
38 | | - is_agi = upper_id.startswith("AT") and "G" in upper_id |
| 60 | + # 4. Normalise (e.g. strip maize transcript suffix _T##) |
| 61 | + gene_id = normalize_gene_id(gene_id, species) |
39 | 62 |
|
40 | | - # for databases that store probeset IDs, convert AGI to probeset via at_agi_lookup |
41 | | - schema = DYNAMIC_DATABASE_SCHEMAS.get(str(database)) |
42 | | - if schema and is_agi and schema.get("identifier_type") == "probeset": |
43 | | - subquery = ( |
44 | | - db.select(AtAgiLookup.probeset) |
45 | | - .where(AtAgiLookup.agi == upper_id) |
46 | | - .order_by(AtAgiLookup.date.desc()) |
47 | | - .limit(1) |
48 | | - .subquery() |
49 | | - ) |
50 | | - sq_query = db.session.query(subquery) |
51 | | - if sq_query.count() > 0: |
52 | | - gene_id = sq_query[0][0] |
| 63 | + # 5. Microarray / non-direct databases need gene ID → probeset conversion |
| 64 | + if database in PROBESET_DATABASES: |
| 65 | + probeset, err = convert_gene_to_probeset(gene_id, species, database) |
| 66 | + if err: |
| 67 | + return BARUtils.error_exit(err), 404 |
| 68 | + query_id = probeset |
53 | 69 | else: |
54 | | - return {"success": False, "error": f"No probeset found for {gene_id}", "error_code": 404}, 404 |
| 70 | + query_id = gene_id |
55 | 71 |
|
56 | | - result = query_efp_database_dynamic(database, gene_id, sample_ids=None) |
| 72 | + result = query_efp_database_dynamic(database, query_id) |
57 | 73 |
|
58 | 74 | if result["success"]: |
59 | | - return result |
60 | | - else: |
61 | | - return result, result.get("error_code", 500) |
| 75 | + return BARUtils.success_exit(result) |
| 76 | + return BARUtils.error_exit(result["error"]), result.get("error_code", 500) |
62 | 77 |
|
63 | 78 |
|
64 | 79 | gene_expression.add_resource(GeneExpression, '/expression/<string:database>/<string:gene_id>') |
0 commit comments