Skip to content

Commit fc9079c

Browse files
committed
feat: allow creation and modification of class based calibrations
- Added router functionality for validation and standardization of class based calibration files. - Added lib functionality for creation/modification of class based calibrations. - Invoked lib functionality from routers to allow client creation/modification of class based calibrations. - Introduced a new CSV file `calibration_classes.csv` containing variant URNs and their corresponding class names. - Implemented tests for creating and updating score calibrations using class-based classifications. - Enhanced existing test suite with parameterized tests to validate score calibration creation and modification. - Ensured that the response includes correct functional classifications and variant counts.
1 parent 1d51d67 commit fc9079c

File tree

6 files changed

+1270
-190
lines changed

6 files changed

+1270
-190
lines changed

src/mavedb/lib/score_calibrations.py

Lines changed: 196 additions & 111 deletions
Large diffs are not rendered by default.

src/mavedb/lib/score_sets.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,7 +1100,7 @@ def bulk_create_urns(n, score_set, reset_counter=False) -> list[str]:
11001100
return child_urns
11011101

11021102

1103-
def csv_data_to_df(file_data: BinaryIO) -> pd.DataFrame:
1103+
def csv_data_to_df(file_data: BinaryIO, induce_hgvs_cols: bool = True) -> pd.DataFrame:
11041104
extra_na_values = list(
11051105
set(
11061106
list(null_values_list)
@@ -1121,9 +1121,10 @@ def csv_data_to_df(file_data: BinaryIO) -> pd.DataFrame:
11211121
dtype={**{col: str for col in HGVSColumns.options()}, "scores": float},
11221122
)
11231123

1124-
for c in HGVSColumns.options():
1125-
if c not in ingested_df.columns:
1126-
ingested_df[c] = np.NaN
1124+
if induce_hgvs_cols:
1125+
for c in HGVSColumns.options():
1126+
if c not in ingested_df.columns:
1127+
ingested_df[c] = np.NaN
11271128

11281129
return ingested_df
11291130

src/mavedb/routers/score_calibrations.py

Lines changed: 225 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,55 @@
11
import logging
2-
3-
from fastapi import APIRouter, Depends, HTTPException, Query
42
from typing import Optional
3+
4+
from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
55
from sqlalchemy.orm import Session
66

77
from mavedb import deps
8+
from mavedb.lib.authentication import UserData, get_current_user
9+
from mavedb.lib.authorization import require_current_user
10+
from mavedb.lib.flexible_model_loader import json_or_form_loader
811
from mavedb.lib.logging import LoggedRoute
912
from mavedb.lib.logging.context import (
1013
logging_context,
1114
save_to_logging_context,
1215
)
13-
from mavedb.lib.authentication import get_current_user, UserData
14-
from mavedb.lib.authorization import require_current_user
1516
from mavedb.lib.permissions import Action, assert_permission, has_permission
1617
from mavedb.lib.score_calibrations import (
1718
create_score_calibration_in_score_set,
18-
modify_score_calibration,
1919
delete_score_calibration,
2020
demote_score_calibration_from_primary,
21+
modify_score_calibration,
2122
promote_score_calibration_to_primary,
2223
publish_score_calibration,
24+
variant_classification_df_to_dict,
2325
)
26+
from mavedb.lib.score_sets import csv_data_to_df
27+
from mavedb.lib.validation.constants.general import calibration_class_column_name, calibration_variant_column_name
28+
from mavedb.lib.validation.dataframe.calibration import validate_and_standardize_calibration_classes_dataframe
2429
from mavedb.models.score_calibration import ScoreCalibration
2530
from mavedb.routers.score_sets import fetch_score_set_by_urn
2631
from mavedb.view_models import score_calibration
2732

28-
2933
logger = logging.getLogger(__name__)
3034

3135
router = APIRouter(
3236
prefix="/api/v1/score-calibrations",
33-
tags=["score-calibrations"],
37+
tags=["Score Calibrations"],
3438
responses={404: {"description": "Not found"}},
3539
route_class=LoggedRoute,
3640
)
3741

42+
# Create dependency loaders for flexible JSON/form parsing
43+
calibration_create_loader = json_or_form_loader(
44+
score_calibration.ScoreCalibrationCreate,
45+
field_name="calibration_json",
46+
)
47+
48+
calibration_modify_loader = json_or_form_loader(
49+
score_calibration.ScoreCalibrationModify,
50+
field_name="calibration_json",
51+
)
52+
3853

3954
@router.get(
4055
"/{urn}",
@@ -136,31 +151,126 @@ async def get_primary_score_calibrations_for_score_set(
136151
@router.post(
137152
"/",
138153
response_model=score_calibration.ScoreCalibrationWithScoreSetUrn,
139-
responses={404: {}},
154+
responses={404: {}, 422: {"description": "Validation Error"}},
155+
openapi_extra={
156+
"requestBody": {
157+
"content": {
158+
"application/json": {
159+
"schema": {"$ref": "#/components/schemas/ScoreCalibrationCreate"},
160+
},
161+
"multipart/form-data": {
162+
"schema": {
163+
"type": "object",
164+
"properties": {
165+
"calibration_json": {
166+
"type": "string",
167+
"description": "JSON string containing the calibration data",
168+
"example": '{"score_set_urn":"urn:mavedb:0000000X-X-X","title":"My Calibration","description":"Functional score calibration","baseline_score":1.0}',
169+
},
170+
"classes_file": {
171+
"type": "string",
172+
"format": "binary",
173+
"description": "CSV file containing variant classifications",
174+
},
175+
},
176+
}
177+
},
178+
},
179+
"description": "Score calibration data. Can be sent as JSON body or multipart form data",
180+
}
181+
},
140182
)
141183
async def create_score_calibration_route(
142184
*,
143-
calibration: score_calibration.ScoreCalibrationCreate,
185+
calibration: score_calibration.ScoreCalibrationCreate = Depends(calibration_create_loader),
186+
classes_file: Optional[UploadFile] = File(
187+
None,
188+
description=f"CSV file containing variant classifications. This file must contain two columns: '{calibration_variant_column_name}' and '{calibration_class_column_name}'.",
189+
),
144190
db: Session = Depends(deps.get_db),
145191
user_data: UserData = Depends(require_current_user),
146192
) -> ScoreCalibration:
147193
"""
148194
Create a new score calibration.
149195
150-
The score set URN must be provided to associate the calibration with an existing score set.
151-
The user must have write permission on the associated score set.
196+
This endpoint supports two different request formats to accommodate various client needs:
197+
198+
## Method 1: JSON Request Body (application/json)
199+
Send calibration data as a standard JSON request body. This method is ideal for
200+
creating calibrations without file uploads.
201+
202+
**Content-Type**: `application/json`
203+
204+
**Example**:
205+
```json
206+
{
207+
"score_set_urn": "urn:mavedb:0000000X-X-X",
208+
"title": "My Calibration",
209+
"description": "Functional score calibration",
210+
"baseline_score": 1.0
211+
}
212+
```
213+
214+
## Method 2: Multipart Form Data (multipart/form-data)
215+
Send calibration data as JSON in a form field, optionally with file uploads.
216+
This method is required when uploading classification files.
217+
218+
**Content-Type**: `multipart/form-data`
219+
220+
**Form Fields**:
221+
- `calibration_json` (string, required): JSON string containing the calibration data
222+
- `classes_file` (file, optional): CSV file containing variant classifications
223+
224+
**Example**:
225+
```bash
226+
curl -X POST "/api/v1/score-calibrations/" \\
227+
-H "Authorization: Bearer your-token" \\
228+
-F 'calibration_json={"score_set_urn":"urn:mavedb:0000000X-X-X","title":"My Calibration","description":"Functional score calibration","baseline_score":"1.0"}' \\
229+
-F 'classes_file=@variant_classes.csv'
230+
```
231+
232+
## Requirements
233+
- The score set URN must be provided to associate the calibration with an existing score set
234+
- User must have write permission on the associated score set
235+
- If uploading a classes_file, it must be a valid CSV with variant classification data
236+
237+
## File Upload Details
238+
The `classes_file` parameter accepts CSV files containing variant classification data.
239+
The file should have appropriate headers and contain columns for variant urns and class names.
240+
241+
## Response
242+
Returns the created score calibration with its generated URN and associated score set information.
152243
"""
153244
if not calibration.score_set_urn:
154245
raise HTTPException(status_code=422, detail="score_set_urn must be provided to create a score calibration.")
155246

156247
save_to_logging_context({"requested_resource": calibration.score_set_urn, "resource_property": "calibrations"})
157248

158249
score_set = await fetch_score_set_by_urn(db, calibration.score_set_urn, user_data, None, False)
250+
if not score_set:
251+
logger.debug("The requested score set does not exist", extra=logging_context())
252+
raise HTTPException(status_code=404, detail="The requested score set does not exist")
253+
159254
# TODO#539: Allow any authenticated user to upload a score calibration for a score set, not just those with
160255
# permission to update the score set itself.
161256
assert_permission(user_data, score_set, Action.UPDATE)
162257

163-
created_calibration = await create_score_calibration_in_score_set(db, calibration, user_data.user)
258+
if classes_file:
259+
try:
260+
classes_df = csv_data_to_df(classes_file.file, induce_hgvs_cols=False)
261+
except UnicodeDecodeError as e:
262+
raise HTTPException(
263+
status_code=400, detail=f"Error decoding file: {e}. Ensure the file has correct values."
264+
)
265+
266+
standardized_classes_df = validate_and_standardize_calibration_classes_dataframe(
267+
db, score_set, calibration, classes_df
268+
)
269+
variant_classes = variant_classification_df_to_dict(standardized_classes_df)
270+
271+
created_calibration = await create_score_calibration_in_score_set(
272+
db, calibration, user_data.user, variant_classes if classes_file else None
273+
)
164274

165275
db.commit()
166276
db.refresh(created_calibration)
@@ -171,23 +281,108 @@ async def create_score_calibration_route(
171281
@router.put(
172282
"/{urn}",
173283
response_model=score_calibration.ScoreCalibrationWithScoreSetUrn,
174-
responses={404: {}},
284+
responses={404: {}, 422: {"description": "Validation Error"}},
285+
openapi_extra={
286+
"requestBody": {
287+
"content": {
288+
"application/json": {
289+
"schema": {"$ref": "#/components/schemas/ScoreCalibrationModify"},
290+
},
291+
"multipart/form-data": {
292+
"schema": {
293+
"type": "object",
294+
"properties": {
295+
"calibration_json": {
296+
"type": "string",
297+
"description": "JSON string containing the calibration update data",
298+
"example": '{"title":"Updated Calibration","description":"Updated description","baseline_score":2.0}',
299+
},
300+
"classes_file": {
301+
"type": "string",
302+
"format": "binary",
303+
"description": "CSV file containing updated variant classifications",
304+
},
305+
},
306+
}
307+
},
308+
},
309+
"description": "Score calibration update data. Can be sent as JSON body or multipart form data",
310+
}
311+
},
175312
)
176313
async def modify_score_calibration_route(
177314
*,
178315
urn: str,
179-
calibration_update: score_calibration.ScoreCalibrationModify,
316+
calibration_update: score_calibration.ScoreCalibrationModify = Depends(calibration_modify_loader),
317+
classes_file: Optional[UploadFile] = File(
318+
None,
319+
description=f"CSV file containing variant classifications. This file must contain two columns: '{calibration_variant_column_name}' and '{calibration_class_column_name}'.",
320+
),
180321
db: Session = Depends(deps.get_db),
181322
user_data: UserData = Depends(require_current_user),
182323
) -> ScoreCalibration:
183324
"""
184325
Modify an existing score calibration by its URN.
326+
327+
This endpoint supports two different request formats to accommodate various client needs:
328+
329+
## Method 1: JSON Request Body (application/json)
330+
Send calibration update data as a standard JSON request body. This method is ideal for
331+
modifying calibrations without file uploads.
332+
333+
**Content-Type**: `application/json`
334+
335+
**Example**:
336+
```json
337+
{
338+
"score_set_urn": "urn:mavedb:0000000X-X-X",
339+
"title": "Updated Calibration Title",
340+
"description": "Updated functional score calibration",
341+
"baseline_score": 1.0
342+
}
343+
```
344+
345+
## Method 2: Multipart Form Data (multipart/form-data)
346+
Send calibration update data as JSON in a form field, optionally with file uploads.
347+
This method is required when uploading new classification files.
348+
349+
**Content-Type**: `multipart/form-data`
350+
351+
**Form Fields**:
352+
- `calibration_json` (string, required): JSON string containing the calibration update data
353+
- `classes_file` (file, optional): CSV file containing updated variant classifications
354+
355+
**Example**:
356+
```bash
357+
curl -X PUT "/api/v1/score-calibrations/{urn}" \\
358+
-H "Authorization: Bearer your-token" \\
359+
-F 'calibration_json={"score_set_urn":"urn:mavedb:0000000X-X-X","title":"My Calibration","description":"Functional score calibration","baseline_score":"1.0"}' \\
360+
-F 'classes_file=@updated_variant_classes.csv'
361+
```
362+
363+
## Requirements
364+
- User must have update permission on the calibration
365+
- If changing the score_set_urn, user must have permission on the new score set
366+
- All fields in the update are optional - only provided fields will be modified
367+
368+
## File Upload Details
369+
The `classes_file` parameter accepts CSV files containing updated variant classification data.
370+
If provided, this will replace the existing classification data for the calibration.
371+
The file should have appropriate headers and follow the expected format for variant
372+
classifications within the associated score set.
373+
374+
## Response
375+
Returns the updated score calibration with all modifications applied and any new
376+
classification data from the uploaded file.
185377
"""
186378
save_to_logging_context({"requested_resource": urn})
187379

188380
# If the user supplies a new score_set_urn, validate it exists and the user has permission to use it.
189381
if calibration_update.score_set_urn is not None:
190382
score_set = await fetch_score_set_by_urn(db, calibration_update.score_set_urn, user_data, None, False)
383+
if not score_set:
384+
logger.debug("The requested score set does not exist", extra=logging_context())
385+
raise HTTPException(status_code=404, detail="The requested score set does not exist")
191386

192387
# TODO#539: Allow any authenticated user to upload a score calibration for a score set, not just those with
193388
# permission to update the score set itself.
@@ -200,7 +395,22 @@ async def modify_score_calibration_route(
200395

201396
assert_permission(user_data, item, Action.UPDATE)
202397

203-
updated_calibration = await modify_score_calibration(db, item, calibration_update, user_data.user)
398+
if classes_file:
399+
try:
400+
classes_df = csv_data_to_df(classes_file.file, induce_hgvs_cols=False)
401+
except UnicodeDecodeError as e:
402+
raise HTTPException(
403+
status_code=400, detail=f"Error decoding file: {e}. Ensure the file has correct values."
404+
)
405+
406+
standardized_classes_df = validate_and_standardize_calibration_classes_dataframe(
407+
db, score_set, calibration_update, classes_df
408+
)
409+
variant_classes = variant_classification_df_to_dict(standardized_classes_df)
410+
411+
updated_calibration = await modify_score_calibration(
412+
db, item, calibration_update, user_data.user, variant_classes if classes_file else None
413+
)
204414

205415
db.commit()
206416
db.refresh(updated_calibration)

0 commit comments

Comments
 (0)