Skip to content

Commit fe06b4a

Browse files
authored
Merge pull request #508 from VariantEffect/release-2025.3.1
Release 2025.3.1
2 parents 9f0e5d7 + e3ab0e4 commit fe06b4a

29 files changed

+560
-187
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# python-base
33
# Set up shared environment variables
44
################################
5-
FROM python:3.11 AS python-base
5+
FROM --platform=amd64 python:3.11 AS python-base
66

77
# Poetry
88
# https://python-poetry.org/docs/configuration/#using-environment-variables
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""rename_tax_id_to_code
2+
3+
Revision ID: 019eb75ad9ae
4+
Revises: b29bbfb2a13a
5+
Create Date: 2025-08-20 16:21:15.872816
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = '019eb75ad9ae'
14+
down_revision = '0b29eefbe1dd'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.alter_column('taxonomies', 'tax_id', new_column_name='code', existing_type=sa.Integer(), existing_nullable=False)
22+
# ### end Alembic commands ###
23+
24+
25+
def downgrade():
26+
# ### commands auto generated by Alembic - please adjust! ###
27+
op.alter_column('taxonomies', 'code', new_column_name='tax_id', existing_type=sa.Integer(), existing_nullable=False)
28+
# ### end Alembic commands ###
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""controlled keyword modification
2+
3+
Revision ID: 0b29eefbe1dd
4+
Revises: 2b7a977e7e98
5+
Create Date: 2025-08-07 15:45:43.241567
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = '0b29eefbe1dd'
14+
down_revision = '2b7a977e7e98'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
# Rename existing columns
22+
op.alter_column('controlled_keywords', 'value', new_column_name='label', existing_type=sa.String(), existing_nullable=False)
23+
op.alter_column('controlled_keywords', 'vocabulary', new_column_name='system', existing_type=sa.String(), existing_nullable=True)
24+
# Add new columns
25+
op.add_column('controlled_keywords', sa.Column('code', sa.String(), nullable=True))
26+
op.add_column('controlled_keywords', sa.Column('version', sa.String(), nullable=True))
27+
# Drop and recreate the correct unique constraint
28+
op.drop_constraint('ix_controlled_keywords_key_value', 'controlled_keywords', type_='unique')
29+
op.create_unique_constraint('ix_controlled_keywords_key_label', 'controlled_keywords', ['key', 'label'])
30+
# ### end Alembic commands ###
31+
32+
# TODO: Will modify this part when we get the final GO terms.
33+
op.execute(
34+
"""INSERT INTO controlled_keywords (key, label, system, code, version, special, description, creation_date, modification_date) VALUES ('Phenotypic Assay Mechanism', 'Other', NULL, NULL, NULL, False, 'The Gene Ontology (GO) is a structured, standardized representation of biological knowledge.', NOW(), NOW())"""
35+
)
36+
37+
38+
def downgrade():
39+
# ### commands auto generated by Alembic - please adjust! ###
40+
op.alter_column('controlled_keywords', 'label', new_column_name='value', existing_type=sa.String(), existing_nullable=False)
41+
op.alter_column('controlled_keywords', 'system', new_column_name='vocabulary', existing_type=sa.String(), existing_nullable=True)
42+
op.drop_constraint('ix_controlled_keywords_key_label', 'controlled_keywords', type_='unique')
43+
op.create_unique_constraint('ix_controlled_keywords_key_value', 'controlled_keywords', ['key', 'value'])
44+
op.drop_column('controlled_keywords', 'version')
45+
op.drop_column('controlled_keywords', 'code')
46+
# ### end Alembic commands ###

src/mavedb/lib/experiments.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def search_experiments(
100100
query = query.filter(
101101
Experiment.keyword_objs.any(
102102
ExperimentControlledKeywordAssociation.controlled_keyword.has(
103-
ControlledKeyword.value.in_(search.keywords)
103+
ControlledKeyword.label.in_(search.keywords)
104104
)
105105
)
106106
)

src/mavedb/lib/keywords.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
from mavedb.models.controlled_keyword import ControlledKeyword
77

88

9-
def search_keyword(db: Session, key: str, value: Optional[str]):
9+
def search_keyword(db: Session, key: str, label: Optional[str]):
1010
lower_key = key.lower().strip()
11-
lower_value = value.lower().strip() if value is not None else None
11+
lower_label = label.lower().strip() if label is not None else None
1212
query = db.query(ControlledKeyword)
1313
if lower_key:
1414
query = query.filter(func.lower(ControlledKeyword.key) == lower_key)
15-
if lower_value:
16-
query = query.filter(func.lower(ControlledKeyword.value) == lower_value)
15+
if lower_label:
16+
query = query.filter(func.lower(ControlledKeyword.label) == lower_label)
1717

1818
controlled_keyword = query.one_or_none()
1919
if controlled_keyword is None:
20-
raise ValueError(f"Invalid keyword {key} or {value}")
20+
raise ValueError(f"Invalid keyword {key} or {label}")
2121
return controlled_keyword

src/mavedb/lib/score_sets.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
114114
ScoreSet.target_genes.any(
115115
TargetGene.target_accession.has(func.lower(TargetAccession.assembly).icontains(lower_search_text))
116116
),
117-
# TODO(#94): add LICENSE, plus TAX_ID if numeric
117+
# TODO(#94): add LICENSE, plus TAXONOMY CODE if numeric
118118
ScoreSet.publication_identifiers.any(
119119
func.lower(PublicationIdentifier.identifier).icontains(lower_search_text)
120120
),
@@ -208,7 +208,7 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
208208
ScoreSet.experiment.has(
209209
Experiment.keyword_objs.any(
210210
ExperimentControlledKeywordAssociation.controlled_keyword.has(
211-
ControlledKeyword.value.in_(search.keywords)
211+
ControlledKeyword.label.in_(search.keywords)
212212
)
213213
)
214214
)

src/mavedb/lib/taxonomies.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@
1010

1111
async def find_or_create_taxonomy(db: Session, taxonomy: TaxonomyCreate):
1212
"""
13-
Find an existing taxonomy ID record with the specified tax_id int, or create a new one.
13+
Find an existing taxonomy ID record with the specified code, or create a new one.
1414
1515
:param db: An active database session
1616
:param taxonomy: A TaxonomyCreate object containing the taxonomy details to search for or create.
1717
:return: An existing Taxonomy containing the specified taxonomy ID, or a new, unsaved Taxonomy
18-
tax_id: A valid taxonomy ID from NCBI
18+
code: A valid taxonomy ID from NCBI
1919
"""
20-
taxonomy_record = db.query(Taxonomy).filter(Taxonomy.tax_id == taxonomy.tax_id).one_or_none()
20+
taxonomy_record = db.query(Taxonomy).filter(Taxonomy.code == taxonomy.code).one_or_none()
2121
if not taxonomy_record:
22-
taxonomy_record = await search_NCBI_taxonomy(db, str(taxonomy.tax_id))
22+
taxonomy_record = await search_NCBI_taxonomy(db, str(taxonomy.code))
2323
return taxonomy_record
2424

2525

@@ -51,14 +51,14 @@ async def search_NCBI_taxonomy(db: Session, search: str) -> Any:
5151
ncbi_taxonomy.setdefault("rank", "NULL")
5252
ncbi_taxonomy.setdefault("has_described_species_name", False)
5353
taxonomy_record = Taxonomy(
54-
tax_id=ncbi_taxonomy["tax_id"],
54+
code=ncbi_taxonomy["code"],
5555
organism_name=ncbi_taxonomy["organism_name"],
5656
common_name=ncbi_taxonomy["common_name"],
5757
rank=ncbi_taxonomy["rank"],
5858
has_described_species_name=ncbi_taxonomy["has_described_species_name"],
5959
url="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=info&id="
60-
+ str(ncbi_taxonomy["tax_id"]),
61-
article_reference="NCBI:txid" + str(ncbi_taxonomy["tax_id"]),
60+
+ str(ncbi_taxonomy["code"]),
61+
article_reference="NCBI:txid" + str(ncbi_taxonomy["code"]),
6262
)
6363
db.add(taxonomy_record)
6464
db.commit()

src/mavedb/lib/validation/keywords.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,41 @@
1+
import re
12
from typing import Optional
23

34
from mavedb.lib.validation.exceptions import ValidationError
45
from mavedb.lib.validation.utilities import is_null
56

67

7-
# TODO: value will not be Optional when we confirm the final controlled keyword list.
8-
def validate_description(value: str, key: str, description: Optional[str]):
9-
if value.lower() == "other" and (description is None or description.strip() == ""):
8+
def validate_code(key: str, label: str, code: Optional[str]):
9+
if key.lower() == "phenotypic assay mechanism" and label.lower() != "other":
10+
# The Gene Ontology accession is a unique seven digit identifier prefixed by GO:.
11+
# e.g. GO:0005739, GO:1904659, or GO:0016597.
12+
if code is None or not re.match(r"^GO:\d{7}$", code):
13+
raise ValidationError("Invalid Gene Ontology accession.")
14+
15+
16+
# TODO: label will not be Optional when we confirm the final controlled keyword list.
17+
def validate_description(label: str, key: str, description: Optional[str]):
18+
if label.lower() == "other" and (description is None or description.strip() == ""):
1019
raise ValidationError(
1120
"Other option does not allow empty description.", custom_loc=["body", "keywordDescriptions", key]
1221
)
1322

1423

1524
def validate_duplicates(keywords: list):
1625
keys = []
17-
values = []
26+
labels = []
1827
for k in keywords:
1928
keys.append(k.keyword.key.lower()) # k: ExperimentControlledKeywordCreate object
20-
if k.keyword.value.lower() != "other":
21-
values.append(k.keyword.value.lower())
29+
if k.keyword.label.lower() != "other":
30+
labels.append(k.keyword.label.lower())
2231

2332
keys_set = set(keys)
24-
values_set = set(values)
33+
labels_set = set(labels)
2534

2635
if len(keys) != len(keys_set):
2736
raise ValidationError("Duplicate keys found in keywords.")
28-
if len(values) != len(values_set):
29-
raise ValidationError("Duplicate values found in keywords.")
37+
if len(labels) != len(labels_set):
38+
raise ValidationError("Duplicate labels found in keywords.")
3039

3140

3241
def validate_keyword(keyword: str):
@@ -47,7 +56,7 @@ def validate_keyword(keyword: str):
4756

4857

4958
def validate_keyword_keys(keywords: list):
50-
keyword_dict = {k.keyword.key.lower(): k.keyword.value.lower() for k in keywords}
59+
keyword_dict = {k.keyword.key.lower(): k.keyword.label.lower() for k in keywords}
5160
variant_library_method = keyword_dict.get("variant library creation method", "")
5261

5362
if variant_library_method == "endogenous locus library method":

src/mavedb/models/controlled_keyword.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ class ControlledKeyword(Base):
1010

1111
id = Column(Integer, primary_key=True, index=True)
1212
key = Column(String, nullable=False)
13-
value = Column(String, nullable=False)
14-
vocabulary = Column(String, nullable=True)
13+
label = Column(String, nullable=False)
14+
system = Column(String, nullable=True)
15+
code = Column(String, nullable=True)
16+
version = Column(String, nullable=True)
1517
special = Column(Boolean, nullable=True)
1618
description = Column(String, nullable=True)
1719
creation_date = Column(Date, nullable=False, default=date.today)
1820
modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today)
19-
__table_args__ = (UniqueConstraint("key", "value", name="ix_controlled_keywords_key_value"),)
21+
__table_args__ = (UniqueConstraint("key", "label", name="ix_controlled_keywords_key_label"),)

src/mavedb/models/experiment.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,10 @@ def keywords(self) -> list[dict]:
161161
{
162162
"keyword": {
163163
"key": controlled_keyword.key,
164-
"value": controlled_keyword.value,
165-
"vocabulary": controlled_keyword.vocabulary,
164+
"label": controlled_keyword.label,
165+
"system": controlled_keyword.system,
166+
"code": controlled_keyword.code,
167+
"version": controlled_keyword.version,
166168
"special": controlled_keyword.special,
167169
"description": controlled_keyword.description,
168170
},
@@ -190,8 +192,8 @@ async def set_keywords(self, db, keywords: list):
190192
controlled_keyword=await self._find_keyword(
191193
db,
192194
keyword_obj.keyword.key,
193-
keyword_obj.keyword.value,
194-
keyword_obj.keyword.vocabulary,
195+
keyword_obj.keyword.label,
196+
keyword_obj.keyword.code,
195197
),
196198
description=keyword_obj.description,
197199
)
@@ -211,15 +213,15 @@ async def _find_or_create_legacy_keyword(self, db, keyword_text):
211213
keyword_obj = LegacyKeyword(text=keyword_text)
212214
return keyword_obj
213215

214-
async def _find_keyword(self, db, key: str, value: str, vocabulary: Optional[str]):
216+
async def _find_keyword(self, db, key: str, label: str, code: Optional[str]):
215217
query = (
216-
db.query(ControlledKeyword).filter(ControlledKeyword.key == key).filter(ControlledKeyword.value == value)
218+
db.query(ControlledKeyword).filter(ControlledKeyword.key == key).filter(ControlledKeyword.label == label)
217219
)
218-
if vocabulary:
219-
query = query.filter(ControlledKeyword.vocabulary == vocabulary)
220+
if code:
221+
query = query.filter(ControlledKeyword.code == code)
220222
controlled_keyword_obj = query.one_or_none()
221223
if controlled_keyword_obj is None:
222-
raise ValueError(f"Unknown keyword {key}:{value}")
224+
raise ValueError(f"Unknown keyword {key}:{label}")
223225
return controlled_keyword_obj
224226

225227

0 commit comments

Comments
 (0)