1111from fastapi .responses import StreamingResponse
1212from ga4gh .va_spec .acmg_2015 import VariantPathogenicityEvidenceLine
1313from ga4gh .va_spec .base .core import Statement , ExperimentalVariantFunctionalImpactStudyResult
14+ from mavedb .lib .target_genes import find_or_create_target_gene_by_accession , find_or_create_target_gene_by_sequence
1415from mavedb .view_models .contributor import ContributorCreate
1516from mavedb .view_models .doi_identifier import DoiIdentifierCreate
1617from mavedb .view_models .publication_identifier import PublicationIdentifierCreate
@@ -123,11 +124,6 @@ async def enqueue_variant_creation(
123124 scores_column_metadata = item .dataset_columns .get ("scores_column_metadata" )
124125 counts_column_metadata = item .dataset_columns .get ("counts_column_metadata" )
125126
126- # Although this is also updated within the variant creation job, update it here
127- # as well so that we can display the proper UI components (queue invocation delay
128- # races the score set GET request).
129- item .processing_state = ProcessingState .processing
130-
131127 # await the insertion of this job into the worker queue, not the job itself.
132128 job = await worker .enqueue_job (
133129 "create_variants_for_score_set" ,
@@ -161,7 +157,7 @@ async def score_set_update(
161157 item_update_dict : dict [str , Any ] = item_update .model_dump (exclude_unset = exclude_unset )
162158
163159 item = db .query (ScoreSet ).filter (ScoreSet .urn == urn ).one_or_none ()
164- if not item :
160+ if not item or item . id is None :
165161 logger .info (msg = "Failed to update score set; The requested score set does not exist." , extra = logging_context ())
166162 raise HTTPException (status_code = 404 , detail = f"score set with URN '{ urn } ' not found" )
167163
@@ -255,17 +251,9 @@ async def score_set_update(
255251 if "score_ranges" in item_update_dict :
256252 item .score_ranges = item_update_dict .get ("score_ranges" , null ())
257253
258- # If item_update_dict includes target_genes, delete the old target gene, WT sequence, and reference map. These will be deleted when we set the score set's
259- # target_gene to None, because we have set cascade='all,delete-orphan' on ScoreSet.target_gene. (Since the
260- # relationship is defined with the target gene as owner, this is actually set up in the backref attribute of
261- # TargetGene.score_set.)
262- #
263- # We must flush our database queries now so that the old target gene will be deleted before inserting a new one
264- # with the same score_set_id.
265-
266254 if "target_genes" in item_update_dict :
267- item . target_genes = []
268- db . flush ()
255+ assert all ( tg . id is not None for tg in item . target_genes )
256+ existing_target_ids : list [ int ] = [ tg . id for tg in item . target_genes if tg . id is not None ]
269257
270258 targets : List [TargetGene ] = []
271259 accessions = False
@@ -301,17 +289,11 @@ async def score_set_update(
301289 # View model validation rules enforce that sequences must have a label defined if there are more than one
302290 # targets defined on a score set.
303291 seq_label = gene .target_sequence .label if gene .target_sequence .label is not None else gene .name
304- target_sequence = TargetSequence (
305- ** jsonable_encoder (
306- gene .target_sequence ,
307- by_alias = False ,
308- exclude = {"taxonomy" , "label" },
309- ),
310- taxonomy = taxonomy ,
311- label = seq_label ,
312- )
313- target_gene = TargetGene (
314- ** jsonable_encoder (
292+
293+ target_gene = target_gene = find_or_create_target_gene_by_sequence (
294+ db ,
295+ score_set_id = item .id ,
296+ tg = jsonable_encoder (
315297 gene ,
316298 by_alias = False ,
317299 exclude = {
@@ -320,7 +302,11 @@ async def score_set_update(
320302 "target_accession" ,
321303 },
322304 ),
323- target_sequence = target_sequence ,
305+ tg_sequence = {
306+ ** jsonable_encoder (gene .target_sequence , by_alias = False , exclude = {"taxonomy" , "label" }),
307+ "taxonomy" : taxonomy ,
308+ "label" : seq_label ,
309+ }
324310 )
325311
326312 elif gene .target_accession :
@@ -333,9 +319,11 @@ async def score_set_update(
333319 "MaveDB does not support score-sets with both sequence and accession based targets. Please re-submit this scoreset using only one type of target."
334320 )
335321 accessions = True
336- target_accession = TargetAccession (** jsonable_encoder (gene .target_accession , by_alias = False ))
337- target_gene = TargetGene (
338- ** jsonable_encoder (
322+
323+ target_gene = find_or_create_target_gene_by_accession (
324+ db ,
325+ score_set_id = item .id ,
326+ tg = jsonable_encoder (
339327 gene ,
340328 by_alias = False ,
341329 exclude = {
@@ -344,7 +332,7 @@ async def score_set_update(
344332 "target_accession" ,
345333 },
346334 ),
347- target_accession = target_accession ,
335+ tg_accession = jsonable_encoder ( gene . target_accession , by_alias = False ) ,
348336 )
349337 else :
350338 save_to_logging_context ({"failing_target" : gene })
@@ -365,7 +353,13 @@ async def score_set_update(
365353 targets .append (target_gene )
366354
367355 item .target_genes = targets
368- should_create_variants = True if item .variants else False
356+
357+ assert all (tg .id is not None for tg in item .target_genes )
358+ current_target_ids : list [int ] = [tg .id for tg in item .target_genes if tg .id is not None ]
359+
360+ if sorted (existing_target_ids ) != sorted (current_target_ids ):
361+ logger .info (msg = f"Target genes have changed for score set { item .id } " , extra = logging_context ())
362+ should_create_variants = True if item .variants else False
369363
370364 else :
371365 logger .debug (msg = "Skipped score range and target gene update. Score set is published." , extra = logging_context ())
@@ -1131,6 +1125,7 @@ async def create_score_set(
11311125 # View model validation rules enforce that sequences must have a label defined if there are more than one
11321126 # targets defined on a score set.
11331127 seq_label = gene .target_sequence .label if gene .target_sequence .label is not None else gene .name
1128+
11341129 target_sequence = TargetSequence (
11351130 ** jsonable_encoder (gene .target_sequence , by_alias = False , exclude = {"taxonomy" , "label" }),
11361131 taxonomy = taxonomy ,
@@ -1454,8 +1449,17 @@ async def update_score_set(
14541449 should_create_variants = itemUpdateResult ["should_create_variants" ]
14551450
14561451 if should_create_variants :
1452+ # Although this is also updated within the variant creation job, update it here
1453+ # as well so that we can display the proper UI components (queue invocation delay
1454+ # races the score set GET request).
1455+ updatedItem .processing_state = ProcessingState .processing
1456+
14571457 await enqueue_variant_creation (item = updatedItem , user_data = user_data , worker = worker )
14581458
1459+ db .add (updatedItem )
1460+ db .commit ()
1461+ db .refresh (updatedItem )
1462+
14591463 enriched_experiment = enrich_experiment_with_num_score_sets (updatedItem .experiment , user_data )
14601464 return score_set .ScoreSet .model_validate (updatedItem ).copy (update = {"experiment" : enriched_experiment })
14611465
0 commit comments