Skip to content

Commit 8935776

Browse files
authored
Merge pull request #3 from HealthNLPorg/v0.0.1
V0.0.1
2 parents 8410952 + 29473fb commit 8935776

File tree

3 files changed

+59
-14
lines changed

3 files changed

+59
-14
lines changed

README.md

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,7 @@ mybroker/bin/artemis stop
125125
## Input and output structure
126126

127127
Given the structure of the summarized gold timelines and the shared task data, the Docker assumes that the input in the `input`
128-
folder will take the form of a collection of notes comprising all the patients of a given cancer type cohort (for the shared task one of melanoma or ovarian or breast cancers), the base filenames of which will correspond to the scheme:
129-
```
130-
<patient identifier>_<four digit year>_<two digit month>_<two digit date>
131-
```
132-
Where the year month and date correspond to the creation time of the file. All the files in the shared task dataset follow this schema so for our data there is nothing you need to do.
133-
134-
Assuming successful processing, the output file will be a tab separated value (`tsv`) file in the `output` folder.
128+
folder will take the form of a collection of notes comprising all the patients of a given cancer type cohort. Assuming successful processing, the output file will be a tab separated value (`tsv`) file in the `output` folder.
135129
The file will have the columns:
136130
```
137131
DCT patient_id chemo_text chemo_annotation_id normed_timex timex_annotation_id tlink note_name tlink_inst

timelines/instance-generator/src/main/java/org/apache/ctakes/temporal/ae/TimeMentionNormalizer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ public void process( JCas jCas ) throws AnalysisEngineProcessException {
111111
.anyMatch( tui -> this.tuiSet.contains( tui ) );
112112

113113
if ( !hasRelevantTUIs ){
114-
LOGGER.info(fileName + " : no events with the provided TUIs " + this.tuis + "skipping to save time");
114+
LOGGER.info(fileName + " : no events with the provided TUIs " + this.tuis + " skipping to save time");
115115
return;
116116
}
117117
}

timelines/instance-generator/src/user/resources/org/apache/ctakes/timelines/timelines_py/src/timelines/timeline_delegator.py

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ def process(self, cas: Cas):
377377
if len(proc_mentions) > 0:
378378
self._write_raw_timelines(cas, proc_mentions)
379379
else:
380+
self._add_empty_discovery(cas)
380381
patient_id, note_name = pt_and_note(cas)
381382
print(
382383
f"No chemotherapy mentions ( using TUI: {CHEMO_TUI} ) found in patient {patient_id} note {note_name} - skipping"
@@ -424,6 +425,7 @@ def _write_raw_timelines(self, cas: Cas, proc_mentions: List[FeatureStructure]):
424425
)
425426
self._write_actual_proc_mentions(cas, actual_proc_mentions)
426427
else:
428+
self._add_empty_discovery(cas)
427429
print(
428430
f"No concrete chemotherapy mentions found in patient {patient_id} note {note_name} - skipping"
429431
)
@@ -432,14 +434,18 @@ def _write_actual_proc_mentions(
432434
self, cas: Cas, positive_chemo_mentions: List[FeatureStructure]
433435
):
434436
timex_type = cas.typesystem.get_type(ctakes_types.TimeMention)
435-
event_type = cas.typesystem.get_type(ctakes_types.EventMention)
436437
cas_source_data = cas.select(ctakes_types.Metadata)[0].sourceData
437438
document_creation_time = cas_source_data.sourceOriginalDate
438439
relevant_timexes = timexes_with_normalization(cas.select(timex_type))
439440

440441
base_tokens, token_map = tokens_and_map(cas, mode="dtr")
441442
begin2token, end2token = invert_map(token_map)
442443

444+
def local_window_mentions(chemo):
445+
return get_tlink_window_mentions(
446+
chemo, relevant_timexes, begin2token, end2token, token_map
447+
)
448+
443449
def dtr_result(chemo):
444450
inst = get_dtr_instance(chemo, base_tokens, begin2token, end2token)
445451
result = list(self.dtr_classifier(inst))[0]
@@ -455,9 +461,7 @@ def tlink_result(chemo, timex):
455461
return label, inst
456462

457463
def tlink_result_dict(chemo):
458-
window_mentions = get_tlink_window_mentions(
459-
chemo, relevant_timexes, begin2token, end2token, token_map
460-
)
464+
window_mentions = local_window_mentions(chemo)
461465
return {
462466
window_mention: tlink_result(chemo, window_mention)
463467
for window_mention in window_mentions
@@ -475,17 +479,32 @@ def tlink_result_dict(chemo):
475479
)
476480
)
477481
}
478-
if len(list(relevant_timexes)) == 0:
482+
483+
if (
484+
len(list(relevant_timexes)) == 0
485+
or len(
486+
list(
487+
chain.from_iterable(
488+
map(local_window_mentions, positive_chemo_mentions)
489+
)
490+
)
491+
)
492+
== 0
493+
):
479494
print(
480-
f"WARNING: No normalized timexes discovered in {patient_id} file {note_name}"
495+
f"WARNING: Timexes suitable for TLINK pairing discovered in {patient_id} file {note_name}"
481496
)
497+
self._add_empty_discovery(cas)
498+
return
482499
for chemo in positive_chemo_mentions:
500+
chemo_dtr, dtr_inst = "", "" # purely so pyright stops complaining
483501
if self.use_dtr:
484502
chemo_dtr, dtr_inst = dtr_result(chemo)
485503
tlink_dict = tlink_result_dict(chemo)
486504
for timex, tlink_inst_pair in tlink_dict.items():
487505
tlink, tlink_inst = tlink_inst_pair
488506
chemo_text = normalize_mention(chemo)
507+
# if we made it this fair these attributes are populated
489508
timex_text = timex.time.normalizedForm
490509
if self.use_dtr:
491510
instance = [
@@ -514,3 +533,35 @@ def tlink_result_dict(chemo):
514533
tlink_inst,
515534
]
516535
self.raw_events.append(instance)
536+
537+
def _add_empty_discovery(self, cas: Cas):
538+
cas_source_data = cas.select(ctakes_types.Metadata)[0].sourceData
539+
document_creation_time = cas_source_data.sourceOriginalDate
540+
patient_id, note_name = pt_and_note(cas)
541+
if self.use_dtr:
542+
instance = [
543+
document_creation_time,
544+
patient_id,
545+
"none",
546+
"none",
547+
"none",
548+
"none",
549+
"none",
550+
"none",
551+
note_name,
552+
"none",
553+
"none",
554+
]
555+
else:
556+
instance = [
557+
document_creation_time,
558+
patient_id,
559+
"none",
560+
"none",
561+
"none",
562+
"none",
563+
"none",
564+
note_name,
565+
"none",
566+
]
567+
self.raw_events.append(instance)

0 commit comments

Comments
 (0)