Skip to content

Commit 693ec58

Browse files
authored
Merge branch 'release-2025.0.1' into estelle/debugShowTmpSupersedingScoreSet
2 parents 8a4b426 + c267ede commit 693ec58

File tree

18 files changed

+560
-39
lines changed

18 files changed

+560
-39
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""score set level score thresholds
2+
3+
Revision ID: aa73d39b3705
4+
Revises: 68a0ec57694e
5+
Create Date: 2024-11-13 11:23:57.917725
6+
7+
"""
8+
9+
from alembic import op
10+
import sqlalchemy as sa
11+
from sqlalchemy.dialects import postgresql
12+
13+
# revision identifiers, used by Alembic.
14+
revision = "aa73d39b3705"
15+
down_revision = "68a0ec57694e"
16+
branch_labels = None
17+
depends_on = None
18+
19+
20+
def upgrade():
21+
# ### commands auto generated by Alembic - please adjust! ###
22+
op.add_column("scoresets", sa.Column("score_calibrations", postgresql.JSONB(astext_type=sa.Text()), nullable=True))
23+
# ### end Alembic commands ###
24+
25+
26+
def downgrade():
27+
# ### commands auto generated by Alembic - please adjust! ###
28+
op.drop_column("scoresets", "score_calibrations")
29+
# ### end Alembic commands ###

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "mavedb"
7-
version = "2024.4.2"
7+
version = "2025.0.0"
88
description = "API for MaveDB, the database of Multiplexed Assays of Variant Effect."
99
license = "AGPL-3.0-only"
1010
readme = "README.md"

src/mavedb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@
66
logger = module_logging.getLogger(__name__)
77

88
__project__ = "mavedb-api"
9-
__version__ = "2024.4.2"
9+
__version__ = "2025.0.0"
1010

1111
logger.info(f"MaveDB {__version__}")

src/mavedb/lib/experiments.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
from typing import Optional
33

4-
from sqlalchemy import func, or_
4+
from sqlalchemy import func, or_, not_
55
from sqlalchemy.orm import Session
66

77
from mavedb.lib.logging.context import logging_context, save_to_logging_context
@@ -99,6 +99,19 @@ def search_experiments(
9999
)
100100
)
101101

102+
if search.meta_analysis is not None:
103+
if not search.meta_analysis:
104+
query = query.filter(
105+
or_(
106+
# Keep experiments without any score sets
107+
not_(Experiment.score_sets.any()),
108+
# Keep experiments where score sets exist but have no meta_analyzes_score_sets
109+
Experiment.score_sets.any(not_(ScoreSet.meta_analyzes_score_sets.any()))
110+
)
111+
)
112+
else:
113+
query = query.filter(Experiment.score_sets.any(ScoreSet.meta_analyzes_score_sets.any()))
114+
102115
items: list[Experiment] = query.order_by(Experiment.urn, Experiment.title).all()
103116
if not items:
104117
items = []

src/mavedb/lib/score_sets.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
)
2323
from mavedb.lib.mave.utils import is_csv_null
2424
from mavedb.lib.validation.constants.general import null_values_list
25+
from mavedb.lib.validation.utilities import is_null as validate_is_null
2526
from mavedb.models.contributor import Contributor
2627
from mavedb.models.controlled_keyword import ControlledKeyword
2728
from mavedb.models.doi_identifier import DoiIdentifier
@@ -402,6 +403,7 @@ def get_score_set_counts_as_csv(
402403
score_set: ScoreSet,
403404
start: Optional[int] = None,
404405
limit: Optional[int] = None,
406+
drop_na_columns: Optional[bool] = None,
405407
) -> str:
406408
assert type(score_set.dataset_columns) is dict
407409
count_columns = [str(x) for x in list(score_set.dataset_columns.get("count_columns", []))]
@@ -420,6 +422,9 @@ def get_score_set_counts_as_csv(
420422
variants = db.scalars(variants_query).all()
421423

422424
rows_data = variants_to_csv_rows(variants, columns=columns, dtype=type_column)
425+
if drop_na_columns:
426+
rows_data, columns = drop_na_columns_from_csv_file_rows(rows_data, columns)
427+
423428
stream = io.StringIO()
424429
writer = csv.DictWriter(stream, fieldnames=columns, quoting=csv.QUOTE_MINIMAL)
425430
writer.writeheader()
@@ -432,6 +437,7 @@ def get_score_set_scores_as_csv(
432437
score_set: ScoreSet,
433438
start: Optional[int] = None,
434439
limit: Optional[int] = None,
440+
drop_na_columns: Optional[bool] = None,
435441
) -> str:
436442
assert type(score_set.dataset_columns) is dict
437443
score_columns = [str(x) for x in list(score_set.dataset_columns.get("score_columns", []))]
@@ -450,13 +456,38 @@ def get_score_set_scores_as_csv(
450456
variants = db.scalars(variants_query).all()
451457

452458
rows_data = variants_to_csv_rows(variants, columns=columns, dtype=type_column)
459+
if drop_na_columns:
460+
rows_data, columns = drop_na_columns_from_csv_file_rows(rows_data, columns)
461+
453462
stream = io.StringIO()
454463
writer = csv.DictWriter(stream, fieldnames=columns, quoting=csv.QUOTE_MINIMAL)
455464
writer.writeheader()
456465
writer.writerows(rows_data)
457466
return stream.getvalue()
458467

459468

469+
def drop_na_columns_from_csv_file_rows(
470+
rows_data: Iterable[dict[str, Any]],
471+
columns: list[str]
472+
) -> tuple[list[dict[str, Any]], list[str]]:
473+
"""Process rows_data for downloadable CSV by removing empty columns."""
474+
# Convert map to list.
475+
rows_data = list(rows_data)
476+
columns_to_check = ["hgvs_nt", "hgvs_splice", "hgvs_pro"]
477+
columns_to_remove = []
478+
479+
# Check if all values in a column are None or "NA"
480+
for col in columns_to_check:
481+
if all(validate_is_null(row[col]) for row in rows_data):
482+
columns_to_remove.append(col)
483+
for row in rows_data:
484+
row.pop(col, None) # Remove column from each row
485+
486+
# Remove these columns from the header list
487+
columns = [col for col in columns if col not in columns_to_remove]
488+
return rows_data, columns
489+
490+
460491
null_values_re = re.compile(r"\s+|none|nan|na|undefined|n/a|null|nil", flags=re.IGNORECASE)
461492

462493

src/mavedb/models/score_set.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ class ScoreSet(Base):
157157

158158
target_genes: Mapped[List["TargetGene"]] = relationship(back_populates="score_set", cascade="all, delete-orphan")
159159
score_ranges = Column(JSONB, nullable=True)
160+
score_calibrations = Column(JSONB, nullable=True)
160161

161162
# Unfortunately, we can't use association_proxy here, because in spite of what the documentation seems to imply, it
162163
# doesn't check for a pre-existing keyword with the same text.

src/mavedb/routers/experiments.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from fastapi import APIRouter, Depends, HTTPException
88
from fastapi.encoders import jsonable_encoder
99
from sqlalchemy.orm import Session
10+
from sqlalchemy import or_
1011

1112
from mavedb import deps
1213
from mavedb.lib.authentication import UserData, get_current_user
@@ -44,7 +45,7 @@
4445
)
4546

4647

47-
# TODO: Rewrite this function.
48+
# None of any part calls this function. Feel free to modify it if we need it in the future.
4849
@router.get(
4950
"/experiments/",
5051
status_code=200,
@@ -54,30 +55,26 @@
5455
def list_experiments(
5556
*,
5657
editable: Optional[bool] = None,
57-
q: Optional[str] = None,
5858
db: Session = Depends(deps.get_db),
5959
user_data: Optional[UserData] = Depends(get_current_user),
6060
) -> list[Experiment]:
6161
"""
6262
List experiments.
6363
"""
6464
query = db.query(Experiment)
65-
if q is not None:
66-
save_to_logging_context({"query_string": q})
6765

66+
if editable:
6867
if user_data is None or user_data.user is None:
6968
logger.debug(msg="User is anonymous; Cannot list their experiments.", extra=logging_context())
7069
return []
7170

72-
if len(q) > 0:
73-
logger.debug(msg="Listing experiments for the current user.", extra=logging_context())
74-
query = query.filter(
75-
Experiment.created_by_id == user_data.user.id
76-
) # .filter(Experiment.published_date is None)
77-
# else:
78-
# query = query.filter(Experiment.created_by_id == user.id).filter(Experiment.published_date is None)
79-
else:
80-
logger.debug(msg="No query string was provided; Listing all experiments.", extra=logging_context())
71+
logger.debug(msg="Listing experiments for the current user.", extra=logging_context())
72+
query = query.filter(
73+
or_(
74+
Experiment.created_by_id == user_data.user.id,
75+
Experiment.contributors.any(Contributor.orcid_id == user_data.user.username)
76+
)
77+
)
8178

8279
items = query.order_by(Experiment.urn).all()
8380
return items

src/mavedb/routers/hgvs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
router = APIRouter(
1313
prefix="/api/v1/hgvs",
14-
tags=["hgvs"],
14+
tags=["transcripts"],
1515
responses={404: {"description": "Not found"}},
1616
)
1717

@@ -85,7 +85,7 @@ def gene_info(gene: str, hdp: RESTDataProvider = Depends(hgvs_data_provider)) ->
8585
return gene_info
8686

8787

88-
@router.get("/transcripts/gene/{gene}", status_code=200, response_model=list[str])
88+
@router.get("/gene/{gene}", status_code=200, response_model=list[str])
8989
def list_transcripts_for_gene(gene: str, hdp: RESTDataProvider = Depends(hgvs_data_provider)) -> list[str]:
9090
"""
9191
List transcripts associated with a particular gene
@@ -98,7 +98,7 @@ def list_transcripts_for_gene(gene: str, hdp: RESTDataProvider = Depends(hgvs_da
9898
return list(transcripts)
9999

100100

101-
@router.get("/transcripts/{transcript}", status_code=200, response_model=dict[str, Any])
101+
@router.get("/{transcript}", status_code=200, response_model=dict[str, Any])
102102
def transcript_info(transcript: str, hdp: RESTDataProvider = Depends(hgvs_data_provider)) -> dict[str, Any]:
103103
"""
104104
List transcript information for a particular transcript
@@ -111,7 +111,7 @@ def transcript_info(transcript: str, hdp: RESTDataProvider = Depends(hgvs_data_p
111111
return transcript_info
112112

113113

114-
@router.get("/transcripts/protein/{transcript}", status_code=200, response_model=str)
114+
@router.get("/protein/{transcript}", status_code=200, response_model=str)
115115
def convert_to_protein(transcript: str, hdp: RESTDataProvider = Depends(hgvs_data_provider)) -> str:
116116
"""
117117
Convert a provided transcript from it's nucleotide accession identifier to its protein accession identifier

src/mavedb/routers/score_sets.py

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,18 @@
99
from fastapi.encoders import jsonable_encoder
1010
from fastapi.exceptions import HTTPException
1111
from fastapi.responses import StreamingResponse
12-
from sqlalchemy import or_
13-
from sqlalchemy.exc import MultipleResultsFound
12+
from sqlalchemy import or_, select
13+
from sqlalchemy.exc import MultipleResultsFound, NoResultFound
1414
from sqlalchemy.orm import Session
1515

1616
from mavedb import deps
1717
from mavedb.lib.authentication import UserData
18-
from mavedb.lib.authorization import get_current_user, require_current_user, require_current_user_with_email
18+
from mavedb.lib.authorization import (
19+
get_current_user,
20+
require_current_user,
21+
require_current_user_with_email,
22+
RoleRequirer,
23+
)
1924
from mavedb.lib.contributors import find_or_create_contributor
2025
from mavedb.lib.exceptions import MixedTargetError, NonexistentOrcidUserError, ValidationError
2126
from mavedb.lib.identifiers import (
@@ -50,6 +55,7 @@
5055
)
5156
from mavedb.models.contributor import Contributor
5257
from mavedb.models.enums.processing_state import ProcessingState
58+
from mavedb.models.enums.user_role import UserRole
5359
from mavedb.models.experiment import Experiment
5460
from mavedb.models.license import License
5561
from mavedb.models.mapped_variant import MappedVariant
@@ -58,7 +64,7 @@
5864
from mavedb.models.target_gene import TargetGene
5965
from mavedb.models.target_sequence import TargetSequence
6066
from mavedb.models.variant import Variant
61-
from mavedb.view_models import mapped_variant, score_set
67+
from mavedb.view_models import mapped_variant, score_set, calibration
6268
from mavedb.view_models.search import ScoreSetsSearch
6369

6470
logger = logging.getLogger(__name__)
@@ -185,6 +191,7 @@ def get_score_set_scores_csv(
185191
urn: str,
186192
start: int = Query(default=None, description="Start index for pagination"),
187193
limit: int = Query(default=None, description="Number of variants to return"),
194+
drop_na_columns: Optional[bool] = None,
188195
db: Session = Depends(deps.get_db),
189196
user_data: Optional[UserData] = Depends(get_current_user),
190197
) -> Any:
@@ -219,7 +226,7 @@ def get_score_set_scores_csv(
219226

220227
assert_permission(user_data, score_set, Action.READ)
221228

222-
csv_str = get_score_set_scores_as_csv(db, score_set, start, limit)
229+
csv_str = get_score_set_scores_as_csv(db, score_set, start, limit, drop_na_columns)
223230
return StreamingResponse(iter([csv_str]), media_type="text/csv")
224231

225232

@@ -239,6 +246,7 @@ async def get_score_set_counts_csv(
239246
urn: str,
240247
start: int = Query(default=None, description="Start index for pagination"),
241248
limit: int = Query(default=None, description="Number of variants to return"),
249+
drop_na_columns: Optional[bool] = None,
242250
db: Session = Depends(deps.get_db),
243251
user_data: Optional[UserData] = Depends(get_current_user),
244252
) -> Any:
@@ -273,7 +281,7 @@ async def get_score_set_counts_csv(
273281

274282
assert_permission(user_data, score_set, Action.READ)
275283

276-
csv_str = get_score_set_counts_as_csv(db, score_set, start, limit)
284+
csv_str = get_score_set_counts_as_csv(db, score_set, start, limit, drop_na_columns)
277285
return StreamingResponse(iter([csv_str]), media_type="text/csv")
278286

279287

@@ -347,8 +355,10 @@ async def create_score_set(
347355
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Unknown experiment")
348356
# Not allow add score set in meta-analysis experiments.
349357
if any(s.meta_analyzes_score_sets for s in experiment.score_sets):
350-
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN,
351-
detail="Score sets may not be added to a meta-analysis experiment.")
358+
raise HTTPException(
359+
status_code=status.HTTP_403_FORBIDDEN,
360+
detail="Score sets may not be added to a meta-analysis experiment.",
361+
)
352362

353363
save_to_logging_context({"experiment": experiment.urn})
354364
assert_permission(user_data, experiment, Action.ADD_SCORE_SET)
@@ -668,6 +678,43 @@ async def upload_score_set_variant_data(
668678
return item
669679

670680

681+
@router.post(
682+
"/score-sets/{urn}/calibration/data",
683+
response_model=score_set.ScoreSet,
684+
responses={422: {}},
685+
response_model_exclude_none=True,
686+
)
687+
async def update_score_set_calibration_data(
688+
*,
689+
urn: str,
690+
calibration_update: dict[str, calibration.Calibration],
691+
db: Session = Depends(deps.get_db),
692+
user_data: UserData = Depends(RoleRequirer([UserRole.admin])),
693+
):
694+
"""
695+
Update thresholds / score calibrations for a score set.
696+
"""
697+
save_to_logging_context({"requested_resource": urn, "resource_property": "score_thresholds"})
698+
699+
try:
700+
item = db.scalars(select(ScoreSet).where(ScoreSet.urn == urn)).one()
701+
except NoResultFound:
702+
logger.info(
703+
msg="Failed to add score thresholds; The requested score set does not exist.", extra=logging_context()
704+
)
705+
raise HTTPException(status_code=404, detail=f"score set with URN '{urn}' not found")
706+
707+
assert_permission(user_data, item, Action.UPDATE)
708+
709+
item.score_calibrations = {k: v.dict() for k, v in calibration_update.items()}
710+
db.add(item)
711+
db.commit()
712+
db.refresh(item)
713+
714+
save_to_logging_context({"updated_resource": item.urn})
715+
return item
716+
717+
671718
@router.put(
672719
"/score-sets/{urn}", response_model=score_set.ScoreSet, responses={422: {}}, response_model_exclude_none=True
673720
)

src/mavedb/scripts/populate_mapped_variants.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ def populate_mapped_variant_data(db: Session, urns: Sequence[Optional[str]], all
6666

6767
try:
6868
existing_mapped_variants = (
69-
db.query(MappedVariant).join(Variant).join(ScoreSet).filter(MappedVariant.current.is_(True)).all()
69+
db.query(MappedVariant)
70+
.join(Variant)
71+
.join(ScoreSet)
72+
.filter(ScoreSet.id == ss_id, MappedVariant.current.is_(True))
73+
.all()
7074
)
7175

7276
for variant in existing_mapped_variants:

0 commit comments

Comments
 (0)