Skip to content

Commit cab9fe5

Browse files
committed
Updates to score set endpoints to receieve score and count columns metadata as JSON
Updates to score set router endpoints to have score and count columns metadata included in request payload as JSON data rather than Upload files. This is more consistent with how the extra_metadata field is handled, and will allow for more flexibility to design an interactive UI for creating column metadata in the future (either in addition to or to replace the current JSON file form inputs). This commit also fixes an exisitng bug, where None values were being excluded from score set updates, which prevented any way of clearning existing values from fields.
1 parent 2950b2e commit cab9fe5

File tree

1 file changed

+66
-56
lines changed

1 file changed

+66
-56
lines changed

src/mavedb/routers/score_sets.py

Lines changed: 66 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from arq import ArqRedis
88
from fastapi import APIRouter, Depends, File, Query, Request, UploadFile, status
99
from fastapi.encoders import jsonable_encoder
10-
from fastapi.exceptions import HTTPException
10+
from fastapi.exceptions import HTTPException, RequestValidationError
1111
from fastapi.responses import StreamingResponse
1212
from ga4gh.va_spec.acmg_2015 import VariantPathogenicityEvidenceLine
1313
from ga4gh.va_spec.base.core import ExperimentalVariantFunctionalImpactStudyResult, Statement
@@ -181,7 +181,7 @@ async def score_set_update(
181181
"target_genes",
182182
"dataset_columns",
183183
]:
184-
setattr(item, var, value) if value is not None else None
184+
setattr(item, var, value)
185185

186186
item_update_license_id = item_update_dict.get("license_id")
187187
if item_update_license_id is not None:
@@ -393,15 +393,11 @@ async def score_set_update(
393393
class ParseScoreSetUpdate(TypedDict):
394394
scores_df: Optional[pd.DataFrame]
395395
counts_df: Optional[pd.DataFrame]
396-
score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]]
397-
count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]]
398396

399397

400398
async def parse_score_set_variants_uploads(
401399
scores_file: Optional[UploadFile] = File(None),
402400
counts_file: Optional[UploadFile] = File(None),
403-
score_columns_metadata_file: Optional[UploadFile] = File(None),
404-
count_columns_metadata_file: Optional[UploadFile] = File(None),
405401
) -> ParseScoreSetUpdate:
406402
if scores_file and scores_file.file:
407403
try:
@@ -425,31 +421,9 @@ async def parse_score_set_variants_uploads(
425421
else:
426422
counts_df = None
427423

428-
if score_columns_metadata_file and score_columns_metadata_file.file:
429-
try:
430-
score_columns_metadata = json.load(score_columns_metadata_file.file)
431-
except json.JSONDecodeError as e:
432-
raise HTTPException(
433-
status_code=400, detail=f"Error decoding scores metadata file: {e}. Ensure the file is valid JSON."
434-
)
435-
else:
436-
score_columns_metadata = None
437-
438-
if count_columns_metadata_file and count_columns_metadata_file.file:
439-
try:
440-
count_columns_metadata = json.load(count_columns_metadata_file.file)
441-
except json.JSONDecodeError as e:
442-
raise HTTPException(
443-
status_code=400, detail=f"Error decoding counts metadata file: {e}. Ensure the file is valid JSON."
444-
)
445-
else:
446-
count_columns_metadata = None
447-
448424
return {
449425
"scores_df": scores_df,
450426
"counts_df": counts_df,
451-
"score_columns_metadata": score_columns_metadata,
452-
"count_columns_metadata": count_columns_metadata,
453427
}
454428

455429

@@ -1320,10 +1294,11 @@ async def create_score_set(
13201294
async def upload_score_set_variant_data(
13211295
*,
13221296
urn: str,
1297+
data: Request,
13231298
counts_file: Optional[UploadFile] = File(None),
13241299
scores_file: Optional[UploadFile] = File(None),
1325-
count_columns_metadata_file: Optional[UploadFile] = File(None),
1326-
score_columns_metadata_file: Optional[UploadFile] = File(None),
1300+
# count_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None,
1301+
# score_columns_metadata: Optional[dict[str, DatasetColumnMetadata]] = None,
13271302
db: Session = Depends(deps.get_db),
13281303
user_data: UserData = Depends(require_current_user_with_email),
13291304
worker: ArqRedis = Depends(deps.get_worker),
@@ -1334,6 +1309,19 @@ async def upload_score_set_variant_data(
13341309
"""
13351310
save_to_logging_context({"requested_resource": urn, "resource_property": "variants"})
13361311

1312+
try:
1313+
score_set_variants_data = await parse_score_set_variants_uploads(scores_file, counts_file)
1314+
1315+
form_data = await data.form()
1316+
# Parse variants dataset column metadata JSON strings
1317+
dataset_column_metadata = {
1318+
key: json.loads(str(value))
1319+
for key, value in form_data.items()
1320+
if key in ["count_columns_metadata", "score_columns_metadata"]
1321+
}
1322+
except Exception as e:
1323+
raise HTTPException(status_code=422, detail=str(e))
1324+
13371325
# item = db.query(ScoreSet).filter(ScoreSet.urn == urn).filter(ScoreSet.private.is_(False)).one_or_none()
13381326
item = db.query(ScoreSet).filter(ScoreSet.urn == urn).one_or_none()
13391327
if not item or not item.urn:
@@ -1343,13 +1331,6 @@ async def upload_score_set_variant_data(
13431331
assert_permission(user_data, item, Action.UPDATE)
13441332
assert_permission(user_data, item, Action.SET_SCORES)
13451333

1346-
score_set_variants_data = await parse_score_set_variants_uploads(
1347-
scores_file,
1348-
counts_file,
1349-
score_columns_metadata_file,
1350-
count_columns_metadata_file,
1351-
)
1352-
13531334
# Although this is also updated within the variant creation job, update it here
13541335
# as well so that we can display the proper UI components (queue invocation delay
13551336
# races the score set GET request).
@@ -1362,8 +1343,8 @@ async def upload_score_set_variant_data(
13621343
user_data=user_data,
13631344
new_scores_df=score_set_variants_data["scores_df"],
13641345
new_counts_df=score_set_variants_data["counts_df"],
1365-
new_score_columns_metadata=score_set_variants_data["score_columns_metadata"],
1366-
new_count_columns_metadata=score_set_variants_data["count_columns_metadata"],
1346+
new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata", {}),
1347+
new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata", {}),
13671348
worker=worker,
13681349
)
13691350

@@ -1421,44 +1402,60 @@ async def update_score_set_with_variants(
14211402
*,
14221403
urn: str,
14231404
request: Request,
1424-
# Variants data and metadata files
1405+
# Variants data files
14251406
counts_file: Optional[UploadFile] = File(None),
14261407
scores_file: Optional[UploadFile] = File(None),
1427-
count_columns_metadata_file: Optional[UploadFile] = File(None),
1428-
score_columns_metadata_file: Optional[UploadFile] = File(None),
14291408
db: Session = Depends(deps.get_db),
14301409
user_data: UserData = Depends(require_current_user_with_email),
14311410
worker: ArqRedis = Depends(deps.get_worker),
14321411
) -> Any:
14331412
"""
14341413
Update a score set and variants.
14351414
"""
1436-
logger.debug(msg="Began score set with variants update.", extra=logging_context())
1415+
logger.info(msg="Began score set with variants update.", extra=logging_context())
14371416

14381417
try:
14391418
# Get all form data from the request
14401419
form_data = await request.form()
14411420

1442-
# Convert form data to dictionary, excluding file fields
1421+
# Convert form data to dictionary, excluding file and associated column metadata fields
14431422
form_dict = {
14441423
key: value
14451424
for key, value in form_data.items()
1446-
if key not in ["counts_file", "scores_file", "count_columns_metadata_file", "score_columns_metadata_file"]
1425+
if key not in ["counts_file", "scores_file", "count_columns_metadata", "score_columns_metadata"]
14471426
}
1448-
14491427
# Create the update object using **kwargs in as_form
14501428
item_update_partial = score_set.ScoreSetUpdateAllOptional.as_form(**form_dict)
1429+
1430+
# parse uploaded CSV files
1431+
score_set_variants_data = await parse_score_set_variants_uploads(
1432+
scores_file,
1433+
counts_file,
1434+
)
1435+
1436+
# Parse variants dataset column metadata JSON strings
1437+
dataset_column_metadata = {
1438+
key: json.loads(str(value))
1439+
for key, value in form_data.items()
1440+
if key in ["count_columns_metadata", "score_columns_metadata"]
1441+
}
14511442
except Exception as e:
14521443
raise HTTPException(status_code=422, detail=str(e))
14531444

14541445
# get existing item from db
14551446
existing_item = db.query(ScoreSet).filter(ScoreSet.urn == urn).one_or_none()
14561447

14571448
# merge existing item data with item_update data to validate against ScoreSetUpdate
1449+
14581450
if existing_item:
14591451
existing_item_data = score_set.ScoreSet.model_validate(existing_item).model_dump()
14601452
updated_data = {**existing_item_data, **item_update_partial.model_dump(exclude_unset=True)}
1461-
score_set.ScoreSetUpdate.model_validate(updated_data)
1453+
try:
1454+
score_set.ScoreSetUpdate.model_validate(updated_data)
1455+
except Exception as e:
1456+
# format as fastapi validation error
1457+
raise RequestValidationError(errors=e.errors())
1458+
# raise HTTPException(status_code=422, detail=e.errors())
14621459
else:
14631460
logger.info(msg="Failed to update score set; The requested score set does not exist.", extra=logging_context())
14641461
raise HTTPException(status_code=404, detail=f"score set with URN '{urn}' not found")
@@ -1474,15 +1471,24 @@ async def update_score_set_with_variants(
14741471
updatedItem = itemUpdateResult["item"]
14751472
should_create_variants = itemUpdateResult.get("should_create_variants", False)
14761473

1477-
# process uploaded files
1478-
score_set_variants_data = await parse_score_set_variants_uploads(
1479-
scores_file,
1480-
counts_file,
1481-
score_columns_metadata_file,
1482-
count_columns_metadata_file,
1474+
existing_score_columns_metadata = (existing_item.dataset_columns or {}).get("score_columns_metadata", {})
1475+
existing_count_columns_metadata = (existing_item.dataset_columns or {}).get("count_columns_metadata", {})
1476+
1477+
did_score_columns_metadata_change = (
1478+
dataset_column_metadata.get("score_columns_metadata", {}) != existing_score_columns_metadata
1479+
)
1480+
did_count_columns_metadata_change = (
1481+
dataset_column_metadata.get("count_columns_metadata", {}) != existing_count_columns_metadata
14831482
)
14841483

1485-
if should_create_variants or any([val is not None for val in score_set_variants_data.values()]):
1484+
# run variant creation job only if targets have changed (indicated by "should_create_variants"), new score
1485+
# or count files were uploaded, or dataset column metadata has changed
1486+
if (
1487+
should_create_variants
1488+
or did_score_columns_metadata_change
1489+
or did_count_columns_metadata_change
1490+
or any([val is not None for val in score_set_variants_data.values()])
1491+
):
14861492
assert_permission(user_data, updatedItem, Action.SET_SCORES)
14871493

14881494
updatedItem.processing_state = ProcessingState.processing
@@ -1494,8 +1500,12 @@ async def update_score_set_with_variants(
14941500
worker=worker,
14951501
new_scores_df=score_set_variants_data["scores_df"],
14961502
new_counts_df=score_set_variants_data["counts_df"],
1497-
new_score_columns_metadata=score_set_variants_data["score_columns_metadata"],
1498-
new_count_columns_metadata=score_set_variants_data["count_columns_metadata"],
1503+
new_score_columns_metadata=dataset_column_metadata.get("score_columns_metadata")
1504+
if did_score_columns_metadata_change
1505+
else existing_score_columns_metadata,
1506+
new_count_columns_metadata=dataset_column_metadata.get("count_columns_metadata")
1507+
if did_count_columns_metadata_change
1508+
else existing_count_columns_metadata,
14991509
)
15001510

15011511
db.add(updatedItem)

0 commit comments

Comments
 (0)