Skip to content

Commit cce3ffd

Browse files
authored
Merge pull request #407 from VariantEffect/estelle/geneOntologyControlledKeywords
Controlled keywords - gene ontology
2 parents f6443e7 + a28ea72 commit cce3ffd

File tree

17 files changed

+288
-135
lines changed

17 files changed

+288
-135
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""controlled keyword modification
2+
3+
Revision ID: 0b29eefbe1dd
4+
Revises: 2b7a977e7e98
5+
Create Date: 2025-08-07 15:45:43.241567
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = '0b29eefbe1dd'
14+
down_revision = '2b7a977e7e98'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
# Rename existing columns
22+
op.alter_column('controlled_keywords', 'value', new_column_name='label', existing_type=sa.String(), existing_nullable=False)
23+
op.alter_column('controlled_keywords', 'vocabulary', new_column_name='system', existing_type=sa.String(), existing_nullable=True)
24+
# Add new columns
25+
op.add_column('controlled_keywords', sa.Column('code', sa.String(), nullable=True))
26+
op.add_column('controlled_keywords', sa.Column('version', sa.String(), nullable=True))
27+
# Drop and recreate the correct unique constraint
28+
op.drop_constraint('ix_controlled_keywords_key_value', 'controlled_keywords', type_='unique')
29+
op.create_unique_constraint('ix_controlled_keywords_key_label', 'controlled_keywords', ['key', 'label'])
30+
# ### end Alembic commands ###
31+
32+
# TODO: Will modify this part when we get the final GO terms.
33+
op.execute(
34+
"""INSERT INTO controlled_keywords (key, label, system, code, version, special, description, creation_date, modification_date) VALUES ('Phenotypic Assay Mechanism', 'Other', NULL, NULL, NULL, False, 'The Gene Ontology (GO) is a structured, standardized representation of biological knowledge.', NOW(), NOW())"""
35+
)
36+
37+
38+
def downgrade():
39+
# ### commands auto generated by Alembic - please adjust! ###
40+
op.alter_column('controlled_keywords', 'label', new_column_name='value', existing_type=sa.String(), existing_nullable=False)
41+
op.alter_column('controlled_keywords', 'system', new_column_name='vocabulary', existing_type=sa.String(), existing_nullable=True)
42+
op.drop_constraint('ix_controlled_keywords_key_label', 'controlled_keywords', type_='unique')
43+
op.create_unique_constraint('ix_controlled_keywords_key_value', 'controlled_keywords', ['key', 'value'])
44+
op.drop_column('controlled_keywords', 'version')
45+
op.drop_column('controlled_keywords', 'code')
46+
# ### end Alembic commands ###

src/mavedb/lib/experiments.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def search_experiments(
100100
query = query.filter(
101101
Experiment.keyword_objs.any(
102102
ExperimentControlledKeywordAssociation.controlled_keyword.has(
103-
ControlledKeyword.value.in_(search.keywords)
103+
ControlledKeyword.label.in_(search.keywords)
104104
)
105105
)
106106
)

src/mavedb/lib/keywords.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
from mavedb.models.controlled_keyword import ControlledKeyword
77

88

9-
def search_keyword(db: Session, key: str, value: Optional[str]):
9+
def search_keyword(db: Session, key: str, label: Optional[str]):
1010
lower_key = key.lower().strip()
11-
lower_value = value.lower().strip() if value is not None else None
11+
lower_label = label.lower().strip() if label is not None else None
1212
query = db.query(ControlledKeyword)
1313
if lower_key:
1414
query = query.filter(func.lower(ControlledKeyword.key) == lower_key)
15-
if lower_value:
16-
query = query.filter(func.lower(ControlledKeyword.value) == lower_value)
15+
if lower_label:
16+
query = query.filter(func.lower(ControlledKeyword.label) == lower_label)
1717

1818
controlled_keyword = query.one_or_none()
1919
if controlled_keyword is None:
20-
raise ValueError(f"Invalid keyword {key} or {value}")
20+
raise ValueError(f"Invalid keyword {key} or {label}")
2121
return controlled_keyword

src/mavedb/lib/score_sets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
208208
ScoreSet.experiment.has(
209209
Experiment.keyword_objs.any(
210210
ExperimentControlledKeywordAssociation.controlled_keyword.has(
211-
ControlledKeyword.value.in_(search.keywords)
211+
ControlledKeyword.label.in_(search.keywords)
212212
)
213213
)
214214
)

src/mavedb/lib/validation/keywords.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,41 @@
1+
import re
12
from typing import Optional
23

34
from mavedb.lib.validation.exceptions import ValidationError
45
from mavedb.lib.validation.utilities import is_null
56

67

8+
def validate_code(key: str, label: str, code: Optional[str]):
9+
if key.lower() == "phenotypic assay mechanism" and label.lower() != "other":
10+
# The Gene Ontology accession is a unique seven digit identifier prefixed by GO:.
11+
# e.g. GO:0005739, GO:1904659, or GO:0016597.
12+
if code is None or not re.match(r"^GO:\d{7}$", code):
13+
raise ValidationError("Invalid Gene Ontology accession.")
14+
15+
716
# TODO: value will not be Optional when we confirm the final controlled keyword list.
8-
def validate_description(value: str, key: str, description: Optional[str]):
9-
if value.lower() == "other" and (description is None or description.strip() == ""):
17+
def validate_description(label: str, key: str, description: Optional[str]):
18+
if label.lower() == "other" and (description is None or description.strip() == ""):
1019
raise ValidationError(
1120
"Other option does not allow empty description.", custom_loc=["body", "keywordDescriptions", key]
1221
)
1322

1423

1524
def validate_duplicates(keywords: list):
1625
keys = []
17-
values = []
26+
labels = []
1827
for k in keywords:
1928
keys.append(k.keyword.key.lower()) # k: ExperimentControlledKeywordCreate object
20-
if k.keyword.value.lower() != "other":
21-
values.append(k.keyword.value.lower())
29+
if k.keyword.label.lower() != "other":
30+
labels.append(k.keyword.label.lower())
2231

2332
keys_set = set(keys)
24-
values_set = set(values)
33+
labels_set = set(labels)
2534

2635
if len(keys) != len(keys_set):
2736
raise ValidationError("Duplicate keys found in keywords.")
28-
if len(values) != len(values_set):
29-
raise ValidationError("Duplicate values found in keywords.")
37+
if len(labels) != len(labels_set):
38+
raise ValidationError("Duplicate labels found in keywords.")
3039

3140

3241
def validate_keyword(keyword: str):
@@ -47,7 +56,7 @@ def validate_keyword(keyword: str):
4756

4857

4958
def validate_keyword_keys(keywords: list):
50-
keyword_dict = {k.keyword.key.lower(): k.keyword.value.lower() for k in keywords}
59+
keyword_dict = {k.keyword.key.lower(): k.keyword.label.lower() for k in keywords}
5160
variant_library_method = keyword_dict.get("variant library creation method", "")
5261

5362
if variant_library_method == "endogenous locus library method":

src/mavedb/models/controlled_keyword.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ class ControlledKeyword(Base):
1010

1111
id = Column(Integer, primary_key=True, index=True)
1212
key = Column(String, nullable=False)
13-
value = Column(String, nullable=False)
14-
vocabulary = Column(String, nullable=True)
13+
label = Column(String, nullable=False)
14+
system = Column(String, nullable=True)
15+
code = Column(String, nullable=True)
16+
version = Column(String, nullable=True)
1517
special = Column(Boolean, nullable=True)
1618
description = Column(String, nullable=True)
1719
creation_date = Column(Date, nullable=False, default=date.today)
1820
modification_date = Column(Date, nullable=False, default=date.today, onupdate=date.today)
19-
__table_args__ = (UniqueConstraint("key", "value", name="ix_controlled_keywords_key_value"),)
21+
__table_args__ = (UniqueConstraint("key", "label", name="ix_controlled_keywords_key_label"),)

src/mavedb/models/experiment.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,10 @@ def keywords(self) -> list[dict]:
161161
{
162162
"keyword": {
163163
"key": controlled_keyword.key,
164-
"value": controlled_keyword.value,
165-
"vocabulary": controlled_keyword.vocabulary,
164+
"label": controlled_keyword.label,
165+
"system": controlled_keyword.system,
166+
"code": controlled_keyword.code,
167+
"version": controlled_keyword.version,
166168
"special": controlled_keyword.special,
167169
"description": controlled_keyword.description,
168170
},
@@ -190,8 +192,8 @@ async def set_keywords(self, db, keywords: list):
190192
controlled_keyword=await self._find_keyword(
191193
db,
192194
keyword_obj.keyword.key,
193-
keyword_obj.keyword.value,
194-
keyword_obj.keyword.vocabulary,
195+
keyword_obj.keyword.label,
196+
keyword_obj.keyword.code,
195197
),
196198
description=keyword_obj.description,
197199
)
@@ -211,15 +213,15 @@ async def _find_or_create_legacy_keyword(self, db, keyword_text):
211213
keyword_obj = LegacyKeyword(text=keyword_text)
212214
return keyword_obj
213215

214-
async def _find_keyword(self, db, key: str, value: str, vocabulary: Optional[str]):
216+
async def _find_keyword(self, db, key: str, label: str, code: Optional[str]):
215217
query = (
216-
db.query(ControlledKeyword).filter(ControlledKeyword.key == key).filter(ControlledKeyword.value == value)
218+
db.query(ControlledKeyword).filter(ControlledKeyword.key == key).filter(ControlledKeyword.label == label)
217219
)
218-
if vocabulary:
219-
query = query.filter(ControlledKeyword.vocabulary == vocabulary)
220+
if code:
221+
query = query.filter(ControlledKeyword.code == code)
220222
controlled_keyword_obj = query.one_or_none()
221223
if controlled_keyword_obj is None:
222-
raise ValueError(f"Unknown keyword {key}:{value}")
224+
raise ValueError(f"Unknown keyword {key}:{label}")
223225
return controlled_keyword_obj
224226

225227

src/mavedb/routers/controlled_keywords.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def fetch_keywords_by_key(
3232
items = (
3333
db.query(ControlledKeyword)
3434
.filter(func.lower(ControlledKeyword.key) == lower_key)
35-
.order_by(ControlledKeyword.value)
35+
.order_by(ControlledKeyword.label)
3636
.all()
3737
)
3838
if not items:
@@ -41,8 +41,8 @@ def fetch_keywords_by_key(
4141

4242

4343
@router.post("/search/{key}/{value}", status_code=200, response_model=keyword.Keyword)
44-
def search_keyword_by_key_and_value(key: str, value: str, db: Session = Depends(deps.get_db)) -> ControlledKeyword:
44+
def search_keyword_by_key_and_value(key: str, label: str, db: Session = Depends(deps.get_db)) -> ControlledKeyword:
4545
"""
4646
Search keywords.
4747
"""
48-
return _search_keyword(db, key, value)
48+
return _search_keyword(db, key, label)

src/mavedb/routers/experiments.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -279,22 +279,22 @@ async def create_experiment(
279279
for publication in publication_identifiers:
280280
setattr(publication, "primary", publication.identifier in primary_identifiers)
281281

282-
# TODO: Controlled keywords currently is allowed none value.
282+
# TODO: Controlled keywords currently is allowed none label.
283283
# Will be changed in the future when we get the final list.
284284
keywords: list[ExperimentControlledKeywordAssociation] = []
285285
if item_create.keywords:
286-
all_values_none = all(k.keyword.value is None for k in item_create.keywords)
287-
if all_values_none is False:
286+
all_labels_none = all(k.keyword.label is None for k in item_create.keywords)
287+
if all_labels_none is False:
288288
# Users may choose part of keywords from dropdown menu. Remove not chosen keywords from the list.
289-
filtered_keywords = list(filter(lambda k: k.keyword.value is not None, item_create.keywords))
289+
filtered_keywords = list(filter(lambda k: k.keyword.label is not None, item_create.keywords))
290290
try:
291291
validate_keyword_list(filtered_keywords)
292292
except ValidationError as e:
293293
raise HTTPException(status_code=422, detail=str(e))
294294
for upload_keyword in filtered_keywords:
295295
try:
296296
description = upload_keyword.description
297-
controlled_keyword = search_keyword(db, upload_keyword.keyword.key, upload_keyword.keyword.value)
297+
controlled_keyword = search_keyword(db, upload_keyword.keyword.key, upload_keyword.keyword.label)
298298
experiment_controlled_keyword = ExperimentControlledKeywordAssociation(
299299
controlled_keyword=controlled_keyword,
300300
description=description,
@@ -417,10 +417,10 @@ async def update_experiment(
417417
item.raw_read_identifiers = raw_read_identifiers
418418

419419
if item_update.keywords:
420-
all_values_none = all(k.keyword.value is None for k in item_update.keywords)
421-
if all_values_none is False:
420+
all_labels_none = all(k.keyword.label is None for k in item_update.keywords)
421+
if all_labels_none is False:
422422
# Users may choose part of keywords from dropdown menu. Remove not chosen keywords from the list.
423-
filtered_keywords = list(filter(lambda k: k.keyword.value is not None, item_update.keywords))
423+
filtered_keywords = list(filter(lambda k: k.keyword.label is not None, item_update.keywords))
424424
try:
425425
validate_keyword_list(filtered_keywords)
426426
except ValidationError as e:

src/mavedb/routers/statistics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,8 @@ def experiment_keyword_statistics(
150150
raise HTTPException(500, "No association table associated with the keywords field when one was expected.")
151151

152152
query = _join_model_and_filter_unpublished(
153-
select(ControlledKeyword.value, func.count(ControlledKeyword.value)).join(queried_assc), queried_model
154-
).group_by(ControlledKeyword.value)
153+
select(ControlledKeyword.label, func.count(ControlledKeyword.label)).join(queried_assc), queried_model
154+
).group_by(ControlledKeyword.label)
155155

156156
return _count_for_identifier_in_query(db, query)
157157

0 commit comments

Comments
 (0)