Skip to content

Commit 8b05497

Browse files
authored
Merge pull request #5 from HealthNLPorg/v0.1.0
V0.1.0
2 parents 8935776 + fda17a2 commit 8b05497

File tree

6 files changed

+611
-606
lines changed

6 files changed

+611
-606
lines changed

README.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,24 @@ load PbjStarter
177177
178178
add PythonRunner Command="-m pip install resources/org/apache/ctakes/timelines/timelines_py" Wait=yes
179179
180-
set TimelinesSecondStep=timelines.timelines_pipeline
180+
set TimelinesSecondStep=timelines.timelines_python_pipeline
181181
182182
add PythonRunner Command="-m $TimelinesSecondStep -rq JavaToPy -o $OutputDirectory"
183183
184184
set minimumSpan=2
185185
set exclusionTags=“”
186186
187187
// Just the components we need from DefaultFastPipeline
188+
189+
// Write nice big banners when ctakes starts and finishes.
188190
set WriteBanner=yes
189191
190192
// Load a simple token processing pipeline from another pipeline file
191193
load DefaultTokenizerPipeline
192194
193195
// Add non-core annotators
194196
add ContextDependentTokenizerAnnotator
197+
// Dictionary module requires tokens so needs to be loaded after the tokenization stack
195198
load DictionarySubPipe
196199
197200
add BackwardsTimeAnnotator classifierJarPath=/org/apache/ctakes/temporal/models/timeannotator/model.jar
@@ -214,7 +217,7 @@ add PythonRunner Command="-m pip install resources/org/apache/ctakes/timelines/t
214217
```
215218
This sets up the necessary environment variables and installs the relevant Python code as well as its dependencies to the Python environment.
216219
```
217-
set TimelinesSecondStep=timelines.timelines_pipeline
220+
set TimelinesSecondStep=timelines.timelines_python_pipeline
218221
219222
add PythonRunner Command="-m $TimelinesSecondStep -rq JavaToPy -o $OutputDirectory"
220223
```
@@ -251,7 +254,7 @@ Sends the CASes which have been processed by the Java annotators to the Python a
251254

252255
The core Python logic is in the file:
253256
```
254-
timelines/instance-generator/src/user/resources/org/apache/ctakes/timelines/timelines_py/src/timelines/timelines_delegator.py
257+
timelines/instance-generator/src/user/resources/org/apache/ctakes/timelines/timelines_py/src/timelines/timelines_annotator.py
255258
```
256259
Like the Java annotators the Python annotator implements a `process` method which is the core driver of the annotator for processing each note's contents. The raw output for the whole cancer type cohort is collected and written to TSV on disk in the `collection_process_complete` method.
257260

timelines/instance-generator/src/user/resources/org/apache/ctakes/timelines/pipeline/Timelines.piper

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ load PbjStarter
77

88
add PythonRunner Command="-m pip install resources/org/apache/ctakes/timelines/timelines_py" Wait=yes
99

10-
set TimelinesSecondStep=timelines.timelines_pipeline
10+
set TimelinesSecondStep=timelines.timelines_python_pipeline
1111

1212
add PythonRunner Command="-m $TimelinesSecondStep -rq JavaToPy -o $OutputDirectory"
1313

@@ -29,10 +29,6 @@ load DictionarySubPipe
2929

3030
add BackwardsTimeAnnotator classifierJarPath=/org/apache/ctakes/temporal/models/timeannotator/model.jar
3131
add DCTAnnotator
32-
// loading this after the dictionaries so
33-
// we no longer waste time normalizing timexes
34-
// for files with no chemos
35-
// add TimeMentionNormalizer tuis=T061 timeout=25
3632
add TimeMentionNormalizer timeout=10
3733

3834
add PbjJmsSender SendQueue=JavaToPy SendStop=yes

0 commit comments

Comments
 (0)