Skip to content

Commit a225d06

Browse files
committed
Update worker unit tests to include score and count columns metadata files
1 parent c4b9819 commit a225d06

File tree

6 files changed

+58
-33
lines changed

6 files changed

+58
-33
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"c_0": {
3+
"description": "c_0 description",
4+
"details": "c_0 details"
5+
},
6+
"c_1": {
7+
"description": "c_1 description",
8+
"details": "c_1 details"
9+
}
10+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"s_0": {
3+
"description": "s_0 description",
4+
"details": "s_0 details"
5+
},
6+
"s_1": {
7+
"description": "s_0 description",
8+
"details": "s_0 details"
9+
}
10+
}

tests/worker/data/scores.csv

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
hgvs_nt,hgvs_pro,score
2-
c.1A>T,p.Thr1Ser,0.3
3-
c.2C>T,p.Thr1Met,0.0
4-
c.6T>A,p.Phe2Leu,-1.65
1+
hgvs_nt,hgvs_pro,score,s_0,s_1
2+
c.1A>T,p.Thr1Ser,0.3,val1,val1
3+
c.2C>T,p.Thr1Met,0.0,val2,val2
4+
c.6T>A,p.Phe2Leu,-1.65,val3,val3

tests/worker/data/scores_acc.csv

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
hgvs_nt,score
2-
NM_001637.3:c.1G>C,0.3
3-
NM_001637.3:c.2A>G,0.0
4-
NM_001637.3:c.6C>A,-1.65
1+
hgvs_nt,score,s_0,s_1
2+
NM_001637.3:c.1G>C,0.3,val1,val1
3+
NM_001637.3:c.2A>G,0.0,val2,val2
4+
NM_001637.3:c.6C>A,-1.65,val3,val3
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
hgvs_nt,score
2-
TEST3:n.1A>T,0.3
3-
TEST3:n.6T>A,-1.65
4-
TEST4:n.2A>T,0.1
1+
hgvs_nt,score,s_0,s_1
2+
TEST3:n.1A>T,0.3,val1,val1
3+
TEST3:n.6T>A,-1.65,val2,val2
4+
TEST4:n.2A>T,0.1,val3,val3

tests/worker/test_jobs.py

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from asyncio.unix_events import _UnixSelectorEventLoop
44
from copy import deepcopy
55
from datetime import date
6+
import json
67
from unittest.mock import patch
78
from uuid import uuid4
89

@@ -118,15 +119,19 @@ async def setup_records_and_files(async_client, data_files, input_score_set):
118119
with (
119120
open(data_files / scores_fp, "rb") as score_file,
120121
open(data_files / counts_fp, "rb") as count_file,
122+
open(data_files / "score_columns_metadata.json", "rb") as score_columns_file,
123+
open(data_files / "count_columns_metadata.json", "rb") as count_columns_file,
121124
):
122125
scores = csv_data_to_df(score_file)
123126
counts = csv_data_to_df(count_file)
127+
score_columns_metadata = json.load(score_columns_file)
128+
count_columns_metadata = json.load(count_columns_file)
124129

125-
return score_set["urn"], scores, counts
130+
return score_set["urn"], scores, counts, score_columns_metadata, count_columns_metadata
126131

127132

128133
async def setup_records_files_and_variants(session, async_client, data_files, input_score_set, worker_ctx):
129-
score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set)
134+
score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files(async_client, data_files, input_score_set)
130135
score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one()
131136

132137
# Patch CDOT `_get_transcript`, in the event this function is called on an accesssion based scoreset.
@@ -135,7 +140,7 @@ async def setup_records_files_and_variants(session, async_client, data_files, in
135140
"_get_transcript",
136141
return_value=TEST_NT_CDOT_TRANSCRIPT,
137142
):
138-
result = await create_variants_for_score_set(worker_ctx, uuid4().hex, score_set.id, 1, scores, counts)
143+
result = await create_variants_for_score_set(worker_ctx, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata)
139144

140145
score_set_with_variants = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one()
141146

@@ -248,7 +253,7 @@ async def test_create_variants_for_score_set_with_validation_error(
248253
session,
249254
data_files,
250255
):
251-
score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set)
256+
score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files(async_client, data_files, input_score_set)
252257
score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one()
253258

254259
if input_score_set == TEST_MINIMAL_SEQ_SCORESET:
@@ -266,7 +271,7 @@ async def test_create_variants_for_score_set_with_validation_error(
266271
) as hdp,
267272
):
268273
result = await create_variants_for_score_set(
269-
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts
274+
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata
270275
)
271276

272277
# Call data provider _get_transcript method if this is an accession based score set, otherwise do not.
@@ -298,7 +303,7 @@ async def test_create_variants_for_score_set_with_caught_exception(
298303
session,
299304
data_files,
300305
):
301-
score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set)
306+
score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files(async_client, data_files, input_score_set)
302307
score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one()
303308

304309
# This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee
@@ -307,7 +312,7 @@ async def test_create_variants_for_score_set_with_caught_exception(
307312
patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc,
308313
):
309314
result = await create_variants_for_score_set(
310-
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts
315+
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata
311316
)
312317
mocked_exc.assert_called()
313318

@@ -334,7 +339,7 @@ async def test_create_variants_for_score_set_with_caught_base_exception(
334339
session,
335340
data_files,
336341
):
337-
score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set)
342+
score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files(async_client, data_files, input_score_set)
338343
score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one()
339344

340345
# This is somewhat (extra) dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee
@@ -343,7 +348,7 @@ async def test_create_variants_for_score_set_with_caught_base_exception(
343348
patch.object(pd.DataFrame, "isnull", side_effect=BaseException),
344349
):
345350
result = await create_variants_for_score_set(
346-
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts
351+
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata
347352
)
348353

349354
db_variants = session.scalars(select(Variant)).all()
@@ -369,7 +374,7 @@ async def test_create_variants_for_score_set_with_existing_variants(
369374
session,
370375
data_files,
371376
):
372-
score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set)
377+
score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files(async_client, data_files, input_score_set)
373378
score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one()
374379

375380
with patch.object(
@@ -378,7 +383,7 @@ async def test_create_variants_for_score_set_with_existing_variants(
378383
return_value=TEST_NT_CDOT_TRANSCRIPT,
379384
) as hdp:
380385
result = await create_variants_for_score_set(
381-
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts
386+
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata
382387
)
383388

384389
# Call data provider _get_transcript method if this is an accession based score set, otherwise do not.
@@ -401,7 +406,7 @@ async def test_create_variants_for_score_set_with_existing_variants(
401406
return_value=TEST_NT_CDOT_TRANSCRIPT,
402407
) as hdp:
403408
result = await create_variants_for_score_set(
404-
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts
409+
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata
405410
)
406411

407412
db_variants = session.scalars(select(Variant)).all()
@@ -427,7 +432,7 @@ async def test_create_variants_for_score_set_with_existing_exceptions(
427432
session,
428433
data_files,
429434
):
430-
score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set)
435+
score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files(async_client, data_files, input_score_set)
431436
score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one()
432437

433438
# This is somewhat dumb and wouldn't actually happen like this, but it serves as an effective way to guarantee
@@ -440,7 +445,7 @@ async def test_create_variants_for_score_set_with_existing_exceptions(
440445
) as mocked_exc,
441446
):
442447
result = await create_variants_for_score_set(
443-
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts
448+
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata
444449
)
445450
mocked_exc.assert_called()
446451

@@ -461,7 +466,7 @@ async def test_create_variants_for_score_set_with_existing_exceptions(
461466
return_value=TEST_NT_CDOT_TRANSCRIPT,
462467
) as hdp:
463468
result = await create_variants_for_score_set(
464-
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts
469+
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata
465470
)
466471

467472
# Call data provider _get_transcript method if this is an accession based score set, otherwise do not.
@@ -493,7 +498,7 @@ async def test_create_variants_for_score_set(
493498
session,
494499
data_files,
495500
):
496-
score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set)
501+
score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files(async_client, data_files, input_score_set)
497502
score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one()
498503

499504
with patch.object(
@@ -502,7 +507,7 @@ async def test_create_variants_for_score_set(
502507
return_value=TEST_NT_CDOT_TRANSCRIPT,
503508
) as hdp:
504509
result = await create_variants_for_score_set(
505-
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts
510+
standalone_worker_context, uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata
506511
)
507512

508513
# Call data provider _get_transcript method if this is an accession based score set, otherwise do not.
@@ -536,7 +541,7 @@ async def test_create_variants_for_score_set_enqueues_manager_and_successful_map
536541
):
537542
score_set_is_seq = all(["targetSequence" in target for target in input_score_set["targetGenes"]])
538543
score_set_is_multi_target = len(input_score_set["targetGenes"]) > 1
539-
score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set)
544+
score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files(async_client, data_files, input_score_set)
540545
score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one()
541546

542547
async def dummy_mapping_job():
@@ -573,7 +578,7 @@ async def dummy_linking_job():
573578
patch("mavedb.worker.jobs.LINKING_BACKOFF_IN_SECONDS", 0),
574579
patch("mavedb.worker.jobs.CLIN_GEN_SUBMISSION_ENABLED", True),
575580
):
576-
await arq_redis.enqueue_job("create_variants_for_score_set", uuid4().hex, score_set.id, 1, scores, counts)
581+
await arq_redis.enqueue_job("create_variants_for_score_set", uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata)
577582
await arq_worker.async_run()
578583
await arq_worker.run_check()
579584

@@ -612,11 +617,11 @@ async def test_create_variants_for_score_set_exception_skips_mapping(
612617
arq_worker,
613618
arq_redis,
614619
):
615-
score_set_urn, scores, counts = await setup_records_and_files(async_client, data_files, input_score_set)
620+
score_set_urn, scores, counts, score_columns_metadata, count_columns_metadata = await setup_records_and_files(async_client, data_files, input_score_set)
616621
score_set = session.scalars(select(ScoreSetDbModel).where(ScoreSetDbModel.urn == score_set_urn)).one()
617622

618623
with patch.object(pd.DataFrame, "isnull", side_effect=Exception) as mocked_exc:
619-
await arq_redis.enqueue_job("create_variants_for_score_set", uuid4().hex, score_set.id, 1, scores, counts)
624+
await arq_redis.enqueue_job("create_variants_for_score_set", uuid4().hex, score_set.id, 1, scores, counts, score_columns_metadata, count_columns_metadata)
620625
await arq_worker.async_run()
621626
await arq_worker.run_check()
622627

0 commit comments

Comments
 (0)