Skip to content

Commit 1a609d1

Browse files
authored
Merge pull request #70 from ao508/update-igo-fields
Update IGO accepted fields and add microbiome enums
2 parents da764df + ff4c16e commit 1a609d1

11 files changed

+658
-11
lines changed

src/main/java/org/mskcc/smile/commons/enums/CmoSampleClass.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ public enum CmoSampleClass {
1515
PRIMARY("Primary"),
1616
RECURRENCE("Recurrence"),
1717
TUMOR("Tumor"),
18-
UNKNOWN_TUMOR("Unknown Tumor");
19-
// MICROBIOME("Microbiome");
18+
UNKNOWN_TUMOR("Unknown Tumor"),
19+
MICROBIOME("Microbiome");
2020

2121
private static final Map<String, CmoSampleClass> valueToCmoSampleClass = new HashMap<>();
2222

src/main/java/org/mskcc/smile/commons/enums/SampleOrigin.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ public enum SampleOrigin {
3030
TISSUE("Tissue"),
3131
URINE("Urine"),
3232
VIABLY_FROZEN_CELLS("Viably Frozen Cells"),
33-
WHOLE_BLOOD("Whole Blood");
34-
// STOOL("Stool");
33+
WHOLE_BLOOD("Whole Blood"),
34+
STOOL("Stool");
3535

3636
private static final Map<String, SampleOrigin> valueToSampleOrigin = new HashMap<>();
3737

src/main/java/org/mskcc/smile/commons/enums/SampleType.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ public enum SampleType {
3131
SLIDES("Slides"),
3232
TISSUE("Tissue"),
3333
UHMWDNA("uhmwDNA"),
34-
WHOLE_BLOOD("Whole Blood");
35-
//STOOL("Stool");
34+
WHOLE_BLOOD("Whole Blood"),
35+
STOOL("Stool");
3636

3737
private static final Map<String, SampleType> valueToSampleType = new HashMap<>();
3838

src/main/java/org/mskcc/smile/commons/impl/JsonComparatorImpl.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
public class JsonComparatorImpl implements JsonComparator {
2929
private final ObjectMapper mapper = new ObjectMapper();
3030

31+
public final List<String> LONG_TYPE_STD_FIELDS = Arrays.asList("igoDeliveryDate");
32+
3133
public final String[] DEFAULT_IGNORED_FIELDS = new String[]{
3234
"smileRequestId",
3335
"smileSampleId",
@@ -43,11 +45,14 @@ public class JsonComparatorImpl implements JsonComparator {
4345
"genePanel",
4446
"additionalProperties",
4547
"cmoInfoIgoId",
46-
"date"};
48+
"date",
49+
"altid",
50+
"sampleStatus"};
4751

4852
public final String[] IGO_ACCEPTED_FIELDS = new String[]{
4953
//RequestMetadata fields
5054
"deliveryDate",
55+
"igoDeliveryDate",
5156
"isCmoRequest",
5257
"libraryType",
5358
"pooledNormals",
@@ -56,6 +61,7 @@ public class JsonComparatorImpl implements JsonComparator {
5661
"igoComplete",
5762
"igoSampleId",
5863
"strand",
64+
"ilabRequestId",
5965
// SampleMetadata fields
6066
"baitSet",
6167
"cfDNA2dBarcode",
@@ -87,7 +93,9 @@ public class JsonComparatorImpl implements JsonComparator {
8793
"qcReports",
8894
"status",
8995
"cmoSampleIdFields",
90-
"runs"
96+
"runs",
97+
"sampleStatus",
98+
"igoSampleStatus"
9199
};
92100

93101
public final String[] GENERIC_IGNORED_FIELDS = new String[]{
@@ -105,6 +113,7 @@ private Map<String, String> initStandardizedIgoRequestJsonPropsMap() {
105113
map.put("projectId", "igoProjectId");
106114
map.put("requestId", "igoRequestId");
107115
map.put("recipe", "genePanel");
116+
map.put("deliveryDate", "igoDeliveryDate");
108117
return map;
109118
}
110119

@@ -403,7 +412,8 @@ private String standardizeAndFilterRequestSamplesJson(String jsonString, String[
403412
private Boolean jsonHasQcAndOrLibrariesAndOrStatusFields(String jsonString)
404413
throws JsonProcessingException {
405414
JsonNode jsonNode = mapper.readTree(jsonString);
406-
return jsonNode.has("libraries") || jsonNode.has("qcReports") || jsonNode.has("status");
415+
return jsonNode.has("libraries") || jsonNode.has("qcReports")
416+
|| (jsonNode.has("status") && !jsonNode.has("samples"));
407417
}
408418

409419
/**
@@ -599,9 +609,12 @@ private JsonNode standardizeJsonProperties(ObjectNode node, Map<String, String>
599609
// updating and removal needs to be done separately from iteration above
600610
// to avoid a java.util.ConcurrentModificationException
601611
for (String field : fieldsToUpdate) {
602-
String value = node.get(field).asText();
603612
String stdJsonProp = jsonPropsMap.get(field);
604-
node.put(stdJsonProp, value);
613+
if (LONG_TYPE_STD_FIELDS.contains(stdJsonProp)) {
614+
node.put(stdJsonProp, node.get(field).asLong());
615+
} else {
616+
node.put(stdJsonProp, node.get(field).asText());
617+
}
605618
node.remove(field);
606619
}
607620
return node;

src/test/java/org/mskcc/smile/commons/JsonComparatorTest.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,48 @@ public void testUpdateCohortCompleteData() throws Exception {
308308
Assertions.assertFalse(isConsistent);
309309
}
310310

311+
/**
312+
* Tests that IGO-specific updates are recognized (ilabRequestId, deliveryDate) at the request level
313+
* as well as the sample level (sampleStatus).
314+
* @throws Exception
315+
*/
316+
@Test public void testNewIgoSupportedFieldsComparison() throws Exception {
317+
MockJsonTestData igoSchema = mockedJsonDataMap.get(
318+
"mockIgoSchemaUpdatedRequest");
319+
MockJsonTestData universalSchema = mockedJsonDataMap.get(
320+
"mockPublishedIgoSchemaUpdatedRequest");
321+
322+
// test new igo request/sample with universal schema from smile (deliveryDate v igoDeliveryDate)
323+
Assertions.assertTrue(jsonComparator.isConsistent(igoSchema.getJsonString(),
324+
universalSchema.getJsonString()));
325+
Assertions.assertTrue(jsonComparator.isConsistentByIgoProperties(igoSchema.getJsonString(),
326+
universalSchema.getJsonString()));
327+
328+
// test that update to igoSampleStatus is recognized by both IGO and non-IGO consistency checks
329+
MockJsonTestData universalSchemaIgoUpdates = mockedJsonDataMap.get(
330+
"mockPublishedIgoSchemaUpdatedRequestWithIgoDataChanges");
331+
Assertions.assertFalse(jsonComparator.isConsistent(universalSchema.getJsonString(),
332+
universalSchemaIgoUpdates.getJsonString()));
333+
Assertions.assertFalse(jsonComparator.isConsistentByIgoProperties(universalSchema.getJsonString(),
334+
universalSchemaIgoUpdates.getJsonString()));
335+
336+
// test that update to a non-IGO property is recognized but not for the IGO-specific consistency check
337+
MockJsonTestData universalSchemaNonIgoUpdates = mockedJsonDataMap.get(
338+
"mockPublishedIgoSchemaUpdatedRequestWithNonIgoDataChanges");
339+
Assertions.assertFalse(jsonComparator.isConsistent(universalSchema.getJsonString(),
340+
universalSchemaNonIgoUpdates.getJsonString()));
341+
Assertions.assertTrue(jsonComparator.isConsistentByIgoProperties(universalSchema.getJsonString(),
342+
universalSchemaNonIgoUpdates.getJsonString()));
343+
344+
// test that updates to both IGO and non-IGO props are recognized by both types of consistency checks
345+
MockJsonTestData universalSchemaMixedUpdates = mockedJsonDataMap.get(
346+
"mockPublishedIgoSchemaUpdatedRequestWithMixedDataChanges");
347+
Assertions.assertFalse(jsonComparator.isConsistent(universalSchema.getJsonString(),
348+
universalSchemaMixedUpdates.getJsonString()));
349+
Assertions.assertFalse(jsonComparator.isConsistentByIgoProperties(universalSchema.getJsonString(),
350+
universalSchemaMixedUpdates.getJsonString()));
351+
}
352+
311353
private String getErrorMessage(Map<String, String> errorsMap) {
312354
StringBuilder builder = new StringBuilder();
313355
builder.append("\nConsistencyCheckerUtil failures summary:\n");
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
{
2+
"smileRequestId": "86b34225-5ad1-4893-a371-c726f3fafc9d",
3+
"igoProjectId": "MOCKREQUEST1",
4+
"igoRequestId": "MOCKREQUEST1_B",
5+
"igoDeliveryDate": 1757388036044,
6+
"ilabRequestId": "IGO-658741",
7+
"genePanel": "GENESET101",
8+
"projectManagerName": "Bar, Foo",
9+
"piEmail": "request1pi@mskcc.org",
10+
"labHeadName": "Foo Bar",
11+
"labHeadEmail": "request1pi@mskcc.org",
12+
"investigatorName": "John Smith",
13+
"investigatorEmail": "NEWINVESTIGATOREMAIL@mskcc.org",
14+
"dataAnalystName": "Poin Dexter",
15+
"dataAnalystEmail": "dexterp@mskcc.org",
16+
"otherContactEmails": "dexterp@mskcc.org",
17+
"dataAccessEmails": "",
18+
"qcAccessEmails": "",
19+
"strand": null,
20+
"libraryType": null,
21+
"isCmoRequest": true,
22+
"bicAnalysis": false,
23+
"status": {
24+
"validationStatus": true,
25+
"validationReport": "{}"
26+
},
27+
"requestJson": "{\"ilabRequestId\":\"IGO-658741\",\"requestId\":\"MOCKREQUEST1_B\",\"projectId\":\"MOCKREQUEST1\",\"dataAccessEmails\":\"\",\"dataAnalystEmail\":\"dexterp@mskcc.org\",\"dataAnalystName\":\"Poin Dexter\",\"investigatorEmail\":\"NEWINVESTIGATOREMAIL@mskcc.org\",\"investigatorName\":\"John Smith\",\"labHeadEmail\":\"request1pi@mskcc.org\",\"labHeadName\":\"Foo Bar\",\"libraryType\":null,\"otherContactEmails\":\"dexterp@mskcc.org\",\"piEmail\":\"request1pi@mskcc.org\",\"projectManagerName\":\"Bar, Foo\",\"qcAccessEmails\":\"\",\"recipe\":\"GENESET101\",\"strand\":null,\"deliveryDate\":1757388036044,\"bicAnalysis\":false,\"isCmoRequest\":true,\"samples\":[{\"igoId\":\"MOCKREQUEST1_B_1\",\"cmoPatientId\":\"C-MP789JR\",\"cmoSampleName\":\"C-MP789JR-X001-d\",\"sampleName\":\"XXX002_P3_12345_L1\",\"altid\":null,\"baitSet\":\"GENESET101_BAITS\",\"cfDNA2dBarcode\":null,\"cmoInfoIgoId\":null,\"cmoSampleClass\":\"Primary\",\"collectionYear\":\"\",\"investigatorSampleId\":\"XXX002_P3_12345_L1\",\"oncoTreeCode\":\"CLL\",\"preservation\":\"Frozen\",\"sampleOrigin\":\"Tissue\",\"sex\":\"F\",\"species\":\"Human\",\"specimenType\":\"PDX\",\"tissueLocation\":\"\",\"tubeId\":\"\",\"tumorOrNormal\":\"UpdatedTumor\",\"igoComplete\":true,\"qcReports\":[],\"libraries\":[{\"barcodeId\":\"IDT29\",\"barcodeIndex\":\"ATTGAGGA\",\"libraryIgoId\":\"MOCKREQUEST1_B_1_1_1_1\",\"libraryVolume\":35.0,\"libraryConcentrationNgul\":34.8,\"dnaInputNg\":null,\"captureConcentrationNm\":\"11.49425287356322\",\"captureInputNg\":\"400.0\",\"captureName\":\"Pool-MOCKREQUEST1_B-Tube7_1\",\"runs\":[{\"runMode\":\"HiSeq High Output\",\"runId\":\"RUNID_0123\",\"flowCellId\":\"X5KL2KKAY\",\"readLength\":\"\",\"runDate\":\"2018-06-05\",\"flowCellLanes\":[5],\"fastqs\":[\"/FASTQ/Project_MOCKREQUEST1_B/Sample_XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1/XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1_S82_R1_001.fastq.gz\",\"/FASTQ/Project_MOCKREQUEST1_B/Sample_XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1/XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1_S82_R2_001.fastq.gz\"]}]}],\"cmoSampleIdFields\":{\"naToExtract\":\"\",\"sampleType\":\"Tissue\",\"normalizedPatientId\":\"MRN_REDACTED\",\"recipe\":\"GENESET101_BAITS\"},\"sampleStatus\":\"IGO sample status PASSED\"}],\"pooledNormals\":[]}",
28+
"pooledNormals": [],
29+
"samples": [
30+
{
31+
"smileSampleId": "c47ffd66-e724-4b7a-a665-c7e3d26d963e",
32+
"smilePatientId": "8594c785-56d9-4f35-b866-100c70f75223",
33+
"primaryId": "MOCKREQUEST1_B_1",
34+
"cmoPatientId": "C-MP789JR",
35+
"cmoSampleName": "C-MP789JR-X001-d",
36+
"sampleName": "XXX002_P3_12345_L1",
37+
"cmoInfoIgoId": null,
38+
"investigatorSampleId": "XXX002_P3_12345_L1",
39+
"importDate": "2026-01-08",
40+
"sampleType": "Primary",
41+
"oncotreeCode": "CLL",
42+
"collectionYear": "",
43+
"tubeId": "",
44+
"cfDNA2dBarcode": null,
45+
"species": "Human",
46+
"sex": "Female",
47+
"tumorOrNormal": "UpdatedTumor",
48+
"preservation": "Frozen",
49+
"sampleClass": "PDX",
50+
"sampleOrigin": "Tissue",
51+
"tissueLocation": "",
52+
"genePanel": "GENESET101_BAITS",
53+
"baitSet": "GENESET101_BAITS",
54+
"datasource": "igo",
55+
"igoComplete": true,
56+
"status": {
57+
"validationStatus": true,
58+
"validationReport": "{}"
59+
},
60+
"cmoSampleIdFields": {
61+
"naToExtract": "",
62+
"sampleType": "Tissue",
63+
"normalizedPatientId": "MRN_REDACTED",
64+
"recipe": "GENESET101_BAITS"
65+
},
66+
"qcReports": [],
67+
"libraries": [
68+
{
69+
"barcodeId": "IDT29",
70+
"barcodeIndex": "ATTGAGGA",
71+
"libraryIgoId": "MOCKREQUEST1_B_1_1_1_1",
72+
"libraryVolume": 35,
73+
"libraryConcentrationNgul": 34.8,
74+
"dnaInputNg": null,
75+
"captureConcentrationNm": "11.49425287356322",
76+
"captureInputNg": "400.0",
77+
"captureName": "Pool-MOCKREQUEST1_B-Tube7_1",
78+
"runs": [
79+
{
80+
"runMode": "HiSeq High Output",
81+
"runId": "RUNID_0123",
82+
"flowCellId": "X5KL2KKAY",
83+
"readLength": "",
84+
"runDate": "2018-06-05",
85+
"flowCellLanes": [
86+
5
87+
],
88+
"fastqs": [
89+
"/FASTQ/Project_MOCKREQUEST1_B/Sample_XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1/XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1_S82_R1_001.fastq.gz",
90+
"/FASTQ/Project_MOCKREQUEST1_B/Sample_XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1/XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1_S82_R2_001.fastq.gz"
91+
]
92+
}
93+
]
94+
}
95+
],
96+
"sampleAliases": [
97+
{
98+
"value": "MOCKREQUEST1_B_1",
99+
"namespace": "igoId"
100+
},
101+
{
102+
"value": "XXX002_P3_12345_L1",
103+
"namespace": "investigatorId"
104+
}
105+
],
106+
"patientAliases": [
107+
{
108+
"value": "C-MP789JR",
109+
"namespace": "cmoId"
110+
}
111+
],
112+
"additionalProperties": {
113+
"isCmoSample": "true",
114+
"igoSampleStatus": "IGO sample status PASSED",
115+
"altId": "ALT-908",
116+
"igoRequestId": "MOCKREQUEST1_B"
117+
}
118+
}
119+
]
120+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
{
2+
"projectId": "MOCKREQUEST1",
3+
"requestId": "MOCKREQUEST1_B",
4+
"ilabRequestId": "IGO-658741",
5+
"deliveryDate": 1757388036044,
6+
"recipe": "GENESET101",
7+
"projectManagerName": "Bar, Foo",
8+
"piEmail": "request1pi@mskcc.org",
9+
"labHeadName": "Foo Bar",
10+
"labHeadEmail": "request1pi@mskcc.org",
11+
"investigatorName": "John Smith",
12+
"investigatorEmail": "NEWINVESTIGATOREMAIL@mskcc.org",
13+
"dataAnalystName": "Poin Dexter",
14+
"dataAnalystEmail": "dexterp@mskcc.org",
15+
"otherContactEmails": "dexterp@mskcc.org",
16+
"dataAccessEmails": "",
17+
"qcAccessEmails": "",
18+
"isCmoRequest": true,
19+
"bicAnalysis": false,
20+
"samples": [
21+
{
22+
"cmoSampleName": "C-MP789JR-X001-d",
23+
"sampleName": "XXX002_P3_12345_L1",
24+
"altid": "ALT-908",
25+
"cmoSampleClass": "Primary",
26+
"oncoTreeCode": "CLL",
27+
"collectionYear": "",
28+
"tubeId": "",
29+
"qcReports": [],
30+
"libraries": [
31+
{
32+
"barcodeId": "IDT29",
33+
"barcodeIndex": "ATTGAGGA",
34+
"libraryIgoId": "MOCKREQUEST1_B_1_1_1_1",
35+
"libraryVolume": 35.0,
36+
"libraryConcentrationNgul": 34.8,
37+
"captureConcentrationNm": "11.49425287356322",
38+
"captureInputNg": "400.0",
39+
"captureName": "Pool-MOCKREQUEST1_B-Tube7_1",
40+
"runs": [
41+
{
42+
"runMode": "HiSeq High Output",
43+
"runId": "RUNID_0123",
44+
"flowCellId": "X5KL2KKAY",
45+
"readLength": "",
46+
"runDate": "2018-06-05",
47+
"flowCellLanes": [
48+
5
49+
],
50+
"fastqs": [
51+
"/FASTQ/Project_MOCKREQUEST1_B/Sample_XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1/XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1_S82_R1_001.fastq.gz",
52+
"/FASTQ/Project_MOCKREQUEST1_B/Sample_XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1/XXX002_P3_12345_L1_IGO_MOCKREQUEST1_B_1_S82_R2_001.fastq.gz"
53+
]
54+
}
55+
]
56+
}
57+
],
58+
"cmoPatientId": "C-MP789JR",
59+
"igoId": "MOCKREQUEST1_B_1",
60+
"investigatorSampleId": "XXX002_P3_12345_L1",
61+
"species": "Human",
62+
"sex": "Female",
63+
"tumorOrNormal": "UpdatedTumor",
64+
"preservation": "Frozen",
65+
"specimenType": "PDX",
66+
"sampleOrigin": "Tissue",
67+
"tissueLocation": "",
68+
"baitSet": "GENESET101_BAITS",
69+
"igoComplete": true,
70+
"cmoSampleIdFields": {
71+
"naToExtract": "",
72+
"sampleType": "Tissue",
73+
"normalizedPatientId": "MRN_REDACTED",
74+
"recipe": "GENESET101_BAITS"
75+
},
76+
"status": {
77+
"validationStatus": true,
78+
"validationReport": "{}"
79+
},
80+
"sampleStatus": "IGO sample status PASSED"
81+
}],
82+
"pooledNormals": [
83+
"/FASTQ/Project_POOLEDNORMALS/Sample_FFPEPOOLEDNORMAL_IGO_GENESET101_TAGCTTGA/FFPEPOOLEDNORMAL_IGO_GENESET101_TAGCTTGA_S23_R1_001.fastq.gz",
84+
"/FASTQ/Project_POOLEDNORMALS/Sample_FFPEPOOLEDNORMAL_IGO_GENESET101_TAGCTTGA/FFPEPOOLEDNORMAL_IGO_GENESET101_TAGCTTGA_S23_R2_001.fastq.gz",
85+
"/FASTQ/Project_POOLEDNORMALS/Sample_FROZENPOOLEDNORMAL_IGO_AMBIGUOUS_TTAGGCTG/FROZENPOOLEDNORMAL_IGO_AMBIGUOUS_TTAGGCTG_S86_R1_001.fastq.gz",
86+
"/FASTQ/Project_POOLEDNORMALS/Sample_FROZENPOOLEDNORMAL_IGO_AMBIGUOUS_TTAGGCTG/FROZENPOOLEDNORMAL_IGO_AMBIGUOUS_TTAGGCTG_S86_R2_001.fastq.gz",
87+
"/FASTQ/Project_POOLEDNORMALS/Sample_FROZENPOOLEDNORMAL_IGO_HEMESET_v1_TTAGGCTG/FROZENPOOLEDNORMAL_IGO_HEMESET_v1_TTAGGCTG_S147_R1_001.fastq.gz",
88+
"/FASTQ/Project_POOLEDNORMALS/Sample_FROZENPOOLEDNORMAL_IGO_HEMESET_v1_TTAGGCTG/FROZENPOOLEDNORMAL_IGO_HEMESET_v1_TTAGGCTG_S147_R2_001.fastq.gz",
89+
"/FASTQ/Project_POOLEDNORMALS/Sample_FROZENPOOLEDNORMAL_IGO_GENESET101_TTAGGCTG/FROZENPOOLEDNORMAL_IGO_GENESET101_TTAGGCTG_S196_R1_001.fastq.gz",
90+
"/FASTQ/Project_POOLEDNORMALS/Sample_FROZENPOOLEDNORMAL_IGO_GENESET101_TTAGGCTG/FROZENPOOLEDNORMAL_IGO_GENESET101_TTAGGCTG_S196_R2_001.fastq.gz",
91+
"/FASTQ/Project_POOLEDNORMALS/Sample_MOUSEPOOLEDNORMAL_IGO_AMBIGUOUS_GGCGTCAT/MOUSEPOOLEDNORMAL_IGO_AMBIGUOUS_GGCGTCAT_S61_R1_001.fastq.gz",
92+
"/FASTQ/Project_POOLEDNORMALS/Sample_MOUSEPOOLEDNORMAL_IGO_AMBIGUOUS_GGCGTCAT/MOUSEPOOLEDNORMAL_IGO_AMBIGUOUS_GGCGTCAT_S61_R2_001.fastq.gz"
93+
],
94+
"projectId": "MOCKREQUEST1"
95+
}

0 commit comments

Comments
 (0)