33from asyncio .unix_events import _UnixSelectorEventLoop
44from copy import deepcopy
55from datetime import date
6+ import json
67from unittest .mock import patch
78from uuid import uuid4
89
@@ -118,15 +119,19 @@ async def setup_records_and_files(async_client, data_files, input_score_set):
118119 with (
119120 open (data_files / scores_fp , "rb" ) as score_file ,
120121 open (data_files / counts_fp , "rb" ) as count_file ,
122+ open (data_files / "score_columns_metadata.json" , "rb" ) as score_columns_file ,
123+ open (data_files / "count_columns_metadata.json" , "rb" ) as count_columns_file ,
121124 ):
122125 scores = csv_data_to_df (score_file )
123126 counts = csv_data_to_df (count_file )
127+ score_columns_metadata = json .load (score_columns_file )
128+ count_columns_metadata = json .load (count_columns_file )
124129
125- return score_set ["urn" ], scores , counts
130+ return score_set ["urn" ], scores , counts , score_columns_metadata , count_columns_metadata
126131
127132
128133async def setup_records_files_and_variants (session , async_client , data_files , input_score_set , worker_ctx ):
129- score_set_urn , scores , counts = await setup_records_and_files (async_client , data_files , input_score_set )
134+ score_set_urn , scores , counts , score_columns_metadata , count_columns_metadata = await setup_records_and_files (async_client , data_files , input_score_set )
130135 score_set = session .scalars (select (ScoreSetDbModel ).where (ScoreSetDbModel .urn == score_set_urn )).one ()
131136
132137 # Patch CDOT `_get_transcript`, in the event this function is called on an accesssion based scoreset.
@@ -135,7 +140,7 @@ async def setup_records_files_and_variants(session, async_client, data_files, in
135140 "_get_transcript" ,
136141 return_value = TEST_NT_CDOT_TRANSCRIPT ,
137142 ):
138- result = await create_variants_for_score_set (worker_ctx , uuid4 ().hex , score_set .id , 1 , scores , counts )
143+ result = await create_variants_for_score_set (worker_ctx , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata )
139144
140145 score_set_with_variants = session .scalars (select (ScoreSetDbModel ).where (ScoreSetDbModel .urn == score_set_urn )).one ()
141146
@@ -248,7 +253,7 @@ async def test_create_variants_for_score_set_with_validation_error(
248253 session ,
249254 data_files ,
250255):
251- score_set_urn , scores , counts = await setup_records_and_files (async_client , data_files , input_score_set )
256+ score_set_urn , scores , counts , score_columns_metadata , count_columns_metadata = await setup_records_and_files (async_client , data_files , input_score_set )
252257 score_set = session .scalars (select (ScoreSetDbModel ).where (ScoreSetDbModel .urn == score_set_urn )).one ()
253258
254259 if input_score_set == TEST_MINIMAL_SEQ_SCORESET :
@@ -266,7 +271,7 @@ async def test_create_variants_for_score_set_with_validation_error(
266271 ) as hdp ,
267272 ):
268273 result = await create_variants_for_score_set (
269- standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts
274+ standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata
270275 )
271276
272277 # Call data provider _get_transcript method if this is an accession based score set, otherwise do not.
@@ -298,7 +303,7 @@ async def test_create_variants_for_score_set_with_caught_exception(
298303 session ,
299304 data_files ,
300305):
301- score_set_urn , scores , counts = await setup_records_and_files (async_client , data_files , input_score_set )
306+ score_set_urn , scores , counts , score_columns_metadata , count_columns_metadata = await setup_records_and_files (async_client , data_files , input_score_set )
302307 score_set = session .scalars (select (ScoreSetDbModel ).where (ScoreSetDbModel .urn == score_set_urn )).one ()
303308
304309 # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee
@@ -307,7 +312,7 @@ async def test_create_variants_for_score_set_with_caught_exception(
307312 patch .object (pd .DataFrame , "isnull" , side_effect = Exception ) as mocked_exc ,
308313 ):
309314 result = await create_variants_for_score_set (
310- standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts
315+ standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata
311316 )
312317 mocked_exc .assert_called ()
313318
@@ -334,7 +339,7 @@ async def test_create_variants_for_score_set_with_caught_base_exception(
334339 session ,
335340 data_files ,
336341):
337- score_set_urn , scores , counts = await setup_records_and_files (async_client , data_files , input_score_set )
342+ score_set_urn , scores , counts , score_columns_metadata , count_columns_metadata = await setup_records_and_files (async_client , data_files , input_score_set )
338343 score_set = session .scalars (select (ScoreSetDbModel ).where (ScoreSetDbModel .urn == score_set_urn )).one ()
339344
340345 # This is somewhat (extra) dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee
@@ -343,7 +348,7 @@ async def test_create_variants_for_score_set_with_caught_base_exception(
343348 patch .object (pd .DataFrame , "isnull" , side_effect = BaseException ),
344349 ):
345350 result = await create_variants_for_score_set (
346- standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts
351+ standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata
347352 )
348353
349354 db_variants = session .scalars (select (Variant )).all ()
@@ -369,7 +374,7 @@ async def test_create_variants_for_score_set_with_existing_variants(
369374 session ,
370375 data_files ,
371376):
372- score_set_urn , scores , counts = await setup_records_and_files (async_client , data_files , input_score_set )
377+ score_set_urn , scores , counts , score_columns_metadata , count_columns_metadata = await setup_records_and_files (async_client , data_files , input_score_set )
373378 score_set = session .scalars (select (ScoreSetDbModel ).where (ScoreSetDbModel .urn == score_set_urn )).one ()
374379
375380 with patch .object (
@@ -378,7 +383,7 @@ async def test_create_variants_for_score_set_with_existing_variants(
378383 return_value = TEST_NT_CDOT_TRANSCRIPT ,
379384 ) as hdp :
380385 result = await create_variants_for_score_set (
381- standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts
386+ standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata
382387 )
383388
384389 # Call data provider _get_transcript method if this is an accession based score set, otherwise do not.
@@ -401,7 +406,7 @@ async def test_create_variants_for_score_set_with_existing_variants(
401406 return_value = TEST_NT_CDOT_TRANSCRIPT ,
402407 ) as hdp :
403408 result = await create_variants_for_score_set (
404- standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts
409+ standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata
405410 )
406411
407412 db_variants = session .scalars (select (Variant )).all ()
@@ -427,7 +432,7 @@ async def test_create_variants_for_score_set_with_existing_exceptions(
427432 session ,
428433 data_files ,
429434):
430- score_set_urn , scores , counts = await setup_records_and_files (async_client , data_files , input_score_set )
435+ score_set_urn , scores , counts , score_columns_metadata , count_columns_metadata = await setup_records_and_files (async_client , data_files , input_score_set )
431436 score_set = session .scalars (select (ScoreSetDbModel ).where (ScoreSetDbModel .urn == score_set_urn )).one ()
432437
433438 # This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee
@@ -440,7 +445,7 @@ async def test_create_variants_for_score_set_with_existing_exceptions(
440445 ) as mocked_exc ,
441446 ):
442447 result = await create_variants_for_score_set (
443- standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts
448+ standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata
444449 )
445450 mocked_exc .assert_called ()
446451
@@ -461,7 +466,7 @@ async def test_create_variants_for_score_set_with_existing_exceptions(
461466 return_value = TEST_NT_CDOT_TRANSCRIPT ,
462467 ) as hdp :
463468 result = await create_variants_for_score_set (
464- standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts
469+ standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata
465470 )
466471
467472 # Call data provider _get_transcript method if this is an accession based score set, otherwise do not.
@@ -493,7 +498,7 @@ async def test_create_variants_for_score_set(
493498 session ,
494499 data_files ,
495500):
496- score_set_urn , scores , counts = await setup_records_and_files (async_client , data_files , input_score_set )
501+ score_set_urn , scores , counts , score_columns_metadata , count_columns_metadata = await setup_records_and_files (async_client , data_files , input_score_set )
497502 score_set = session .scalars (select (ScoreSetDbModel ).where (ScoreSetDbModel .urn == score_set_urn )).one ()
498503
499504 with patch .object (
@@ -502,7 +507,7 @@ async def test_create_variants_for_score_set(
502507 return_value = TEST_NT_CDOT_TRANSCRIPT ,
503508 ) as hdp :
504509 result = await create_variants_for_score_set (
505- standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts
510+ standalone_worker_context , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata
506511 )
507512
508513 # Call data provider _get_transcript method if this is an accession based score set, otherwise do not.
@@ -536,7 +541,7 @@ async def test_create_variants_for_score_set_enqueues_manager_and_successful_map
536541):
537542 score_set_is_seq = all (["targetSequence" in target for target in input_score_set ["targetGenes" ]])
538543 score_set_is_multi_target = len (input_score_set ["targetGenes" ]) > 1
539- score_set_urn , scores , counts = await setup_records_and_files (async_client , data_files , input_score_set )
544+ score_set_urn , scores , counts , score_columns_metadata , count_columns_metadata = await setup_records_and_files (async_client , data_files , input_score_set )
540545 score_set = session .scalars (select (ScoreSetDbModel ).where (ScoreSetDbModel .urn == score_set_urn )).one ()
541546
542547 async def dummy_mapping_job ():
@@ -573,7 +578,7 @@ async def dummy_linking_job():
573578 patch ("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS" , 0 ),
574579 patch ("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED" , True ),
575580 ):
576- await arq_redis .enqueue_job ("create_variants_for_score_set" , uuid4 ().hex , score_set .id , 1 , scores , counts )
581+ await arq_redis .enqueue_job ("create_variants_for_score_set" , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata )
577582 await arq_worker .async_run ()
578583 await arq_worker .run_check ()
579584
@@ -612,11 +617,11 @@ async def test_create_variants_for_score_set_exception_skips_mapping(
612617 arq_worker ,
613618 arq_redis ,
614619):
615- score_set_urn , scores , counts = await setup_records_and_files (async_client , data_files , input_score_set )
620+ score_set_urn , scores , counts , score_columns_metadata , count_columns_metadata = await setup_records_and_files (async_client , data_files , input_score_set )
616621 score_set = session .scalars (select (ScoreSetDbModel ).where (ScoreSetDbModel .urn == score_set_urn )).one ()
617622
618623 with patch .object (pd .DataFrame , "isnull" , side_effect = Exception ) as mocked_exc :
619- await arq_redis .enqueue_job ("create_variants_for_score_set" , uuid4 ().hex , score_set .id , 1 , scores , counts )
624+ await arq_redis .enqueue_job ("create_variants_for_score_set" , uuid4 ().hex , score_set .id , 1 , scores , counts , score_columns_metadata , count_columns_metadata )
620625 await arq_worker .async_run ()
621626 await arq_worker .run_check ()
622627
0 commit comments