Skip to content

Commit 6e2fa10

Browse files
author
Youcef Sebiat
authored
Merge pull request #226 from X-DataInitiative/CNAM-433-LimitedPeriodAdder-Strategies
Cnam 433 limited period adder strategies
2 parents 61d4170 + ba682af commit 6e2fa10

File tree

17 files changed

+405
-115
lines changed

17 files changed

+405
-115
lines changed

src/main/resources/config/fall/default.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ root {
88
end_delay = 15 days
99
end_threshold_gc = 90 days
1010
end_threshold_ngc = 30 days
11+
to_exposure_strategy = "purchase_count_based"
1112
}
1213
}
1314
interaction {
@@ -29,7 +30,6 @@ root {
2930
run_parameters {
3031
outcome: ["Acts", "Diagnoses", "Outcomes"] // pipeline of calculation of outcome, possible values : Acts, Diagnoses, and Outcomes
3132
exposure: ["Patients", "StartGapPatients", "DrugPurchases", "Exposures"] // pipeline of the calculation of exposure, possible values : Patients, StartGapPatients, DrugPurchases, Exposures
32-
hospital_stay: ["HospitalStay"] //pipeline for hospital stay, possible values : HospitalStay
3333
}
3434
}
3535

src/main/resources/config/fall/paths/cmap.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ env_name = "cmap"
22

33
input = {
44
dcir = "/shared/Observapur/staging/Flattening/flat_table/DCIR"
5-
mco_ce = "/shared/Observapur/staging/Flattening/flat_table/MCO_CE"
65
mco = "/shared/Observapur/staging/Flattening/flat_table/MCO"
6+
mco_ce = "/user/ds/CNAM360/flattening/flat_table/MCO_CE"
77
ir_ben = "/shared/Observapur/staging/Flattening/single_table/IR_BEN_R"
88
ir_imb = "/shared/Observapur/staging/Flattening/single_table/IR_IMB_R"
99
ir_pha = "/shared/Observapur/staging/Flattening/single_table/IR_PHA_R_MOL"

src/main/resources/config/fall/template.conf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
# output.root = "target/test/output"
1212
# output.save_mode = "errorIfExists" // Possible values = [overwrite, append, errorIfExists, withTimestamp] Strategy of saving output data. errorIfExists by deault
1313

14-
# exposures.min_purchases: 1 // 1+ (Usually 1 or 2)
1514
# exposures.start_delay: 0 months // 0+ (Usually between 0 and 3). Represents the delay in months between a dispensation and its exposure start date.
1615
# exposures.purchases_window: 0 months // 0+ (Usually 0 or 6) Represents the window size in months. Ignored when min_purchases=1.
1716
# exposures.end_threshold_gc: 90 days // If periodStrategy="limited", represents the period without purchases for an exposure to be considered "finished".

src/main/scala/fr/polytechnique/cmap/cnam/etl/datatypes/Addable.scala

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,53 @@
22

33
package fr.polytechnique.cmap.cnam.etl.datatypes
44

5+
import scala.annotation.tailrec
6+
import scala.reflect.ClassTag
7+
import scala.reflect.runtime.universe._
8+
59
trait Addable [A] {
610
def + (other: A): RemainingPeriod[A]
711
}
12+
13+
object Addable {
14+
/***
15+
* Transforms a list of `Addable` of type A into a list of A where every element do not intersect with any other
16+
* element of the list. The passed list of elements must ordered from the first to the last.
17+
* To achieve the non intersection property, the algorithm proceedes as the following:
18+
* 1. Takes an element of `RightRemainingPeriod` and add it to head of List of `LeftRemainingPeriod`.
19+
* 1. If the result is `RightRemainingPeriod`, recurse with the new element as the `RightRemainingPeriod`.
20+
* 2. If the result is `LeftRemainingPeriod`, add it to the Accumulator, and recurse with the head of of List of
21+
* `LeftRemainingPeriod` as the new `RightRemainingPeriod`.
22+
* 3. If the result is `DisjointedRemainingPeriod`, add the Left part to the Accumulator,
23+
* and recurse with the right part.
24+
* 4. If the result is `NullRemainingPeriod`, add the element of `RightRemainingPeriod` to the Accumulator,
25+
* and recurse with the head of of List of `LeftRemainingPeriod` as the new `RightRemainingPeriod`
26+
* 2. Recurse until lrs is empty.
27+
* @param rr element to start the add operation with
28+
* @param lrs list of LeftRemainingPeriod to combine. Must be timely ordered with respect to definition of `+` of type A.
29+
* @param acc list to accumulate the results in.
30+
* @tparam A type parameter. A must extend the Addable trait.
31+
* @return List of `LeftRemainingPeriod` of type A.
32+
*/
33+
@tailrec
34+
def combineAddables[A <: Addable[A] : ClassTag : TypeTag](
35+
rr: RightRemainingPeriod[A],
36+
lrs: List[LeftRemainingPeriod[A]],
37+
acc: List[LeftRemainingPeriod[A]]): List[LeftRemainingPeriod[A]] = {
38+
lrs match {
39+
case Nil => rr.toLeft :: acc
40+
case lr :: Nil => rr.e + lr.e match {
41+
case NullRemainingPeriod => acc
42+
case l: LeftRemainingPeriod[A] => l :: acc
43+
case r: RightRemainingPeriod[A] => combineAddables[A](r, List.empty, acc)
44+
case d: DisjointedRemainingPeriod[A] => combineAddables[A](d.r, List.empty, d.l :: acc)
45+
}
46+
case lr :: lr2 :: rest => rr.e + lr.e match {
47+
case NullRemainingPeriod => combineAddables(lr2.toRight, rest, acc)
48+
case l: LeftRemainingPeriod[A] => combineAddables(lr2.toRight, rest, l :: acc)
49+
case r: RightRemainingPeriod[A] => combineAddables[A](r, lr2 :: rest, acc)
50+
case d: DisjointedRemainingPeriod[A] => combineAddables[A](d.r, lr2 :: rest, d.l :: acc)
51+
}
52+
}
53+
}
54+
}

src/main/scala/fr/polytechnique/cmap/cnam/etl/datatypes/Period.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import java.sql.Timestamp
66
import fr.polytechnique.cmap.cnam.etl.transformers.interaction._
77
import fr.polytechnique.cmap.cnam.util.functions._
88

9-
case class Period(start: Timestamp, end: Timestamp) extends Remainable[Period] with Addable[Period]{
9+
case class Period(start: Timestamp, end: Timestamp) extends Subtractable[Period] with Addable[Period]{
1010
self =>
1111

1212
def & (other: Period): Boolean = {

src/main/scala/fr/polytechnique/cmap/cnam/etl/datatypes/Remainable.scala

Lines changed: 0 additions & 7 deletions
This file was deleted.

src/main/scala/fr/polytechnique/cmap/cnam/etl/datatypes/RemainingPeriod.scala

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22

33
package fr.polytechnique.cmap.cnam.etl.datatypes
44

5-
import scala.annotation.tailrec
6-
import scala.reflect.ClassTag
7-
import scala.reflect.runtime.universe._
85
import cats.Functor
96

107

@@ -37,22 +34,5 @@ object RemainingPeriod {
3734
}
3835
}
3936
}
40-
41-
@tailrec
42-
def delimitPeriods[A <: Remainable[A] : ClassTag : TypeTag](
43-
rr: RightRemainingPeriod[A],
44-
lrs: List[LeftRemainingPeriod[A]],
45-
acc: List[LeftRemainingPeriod[A]]): List[LeftRemainingPeriod[A]] = {
46-
lrs match {
47-
case Nil => rr.toLeft :: acc
48-
case _@LeftRemainingPeriod(null) :: Nil => rr.toLeft :: acc // This happens because of the Left Join
49-
case lr :: rest => rr.e - lr.e match {
50-
case NullRemainingPeriod => acc
51-
case l: LeftRemainingPeriod[A] => l :: acc
52-
case r: RightRemainingPeriod[A] => delimitPeriods[A](r, rest, acc)
53-
case d: DisjointedRemainingPeriod[A] => delimitPeriods[A](d.r, rest, d.l :: acc)
54-
}
55-
}
56-
}
5737
}
5838

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// License: BSD 3 clause
2+
3+
package fr.polytechnique.cmap.cnam.etl.datatypes
4+
5+
import scala.annotation.tailrec
6+
import scala.reflect.ClassTag
7+
import scala.reflect.runtime.universe.TypeTag
8+
9+
trait Subtractable[A] {
10+
def - (other: A): RemainingPeriod[A]
11+
}
12+
13+
object Subtractable {
14+
/***
15+
* Subtract a list of `LeftRemainingPeriod` A from RightRemainingPeriod of type A. It proceedes until it is not
16+
* possible to subtract.
17+
* To achieve the non subtraction property, the algorithm proceeds as the following:
18+
* 1. Takes the element of `RightRemainingPeriod` and subtract the head of List of `LeftRemainingPeriod` from it.
19+
* 1. If the result is `RightRemainingPeriod`, recurse with the remaining element as the `RightRemainingPeriod`.
20+
* 2. If the result is `LeftRemainingPeriod`, add it to the Accumulator, stop.
21+
* 3. If the result is `DisjointedRemainingPeriod`, add the Left part to the Accumulator,
22+
* and recurse with the right part.
23+
* 4. If the result is `NullRemainingPeriod`, return the acc and stop it.
24+
* 2. Recurse until lrs is empty.
25+
* @param rr element to start the add operation with
26+
* @param lrs list of LeftRemainingPeriod to combine. Must be timely ordered with respect to definition of `-` of type A.
27+
* @param acc list to accumulate the results in.
28+
* @tparam A type parameter. A must extend the Subtractable trait.
29+
* @return List of `LeftRemainingPeriod` of type A.
30+
*/
31+
@tailrec
32+
def combineSubtracables[A <: Subtractable[A] : ClassTag : TypeTag](
33+
rr: RightRemainingPeriod[A],
34+
lrs: List[LeftRemainingPeriod[A]],
35+
acc: List[LeftRemainingPeriod[A]]): List[LeftRemainingPeriod[A]] = {
36+
lrs match {
37+
case Nil => rr.toLeft :: acc
38+
case _@LeftRemainingPeriod(null) :: Nil => rr.toLeft :: acc // This happens because of the Left Join
39+
case lr :: rest => rr.e - lr.e match {
40+
case NullRemainingPeriod => acc
41+
case l: LeftRemainingPeriod[A] => l :: acc
42+
case r: RightRemainingPeriod[A] => combineSubtracables[A](r, rest, acc)
43+
case d: DisjointedRemainingPeriod[A] => combineSubtracables[A](d.r, rest, d.l :: acc)
44+
}
45+
}
46+
}
47+
}

src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposureDuration.scala

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,26 @@ import fr.polytechnique.cmap.cnam.etl.datatypes._
88
import fr.polytechnique.cmap.cnam.etl.events.{Event, Exposure}
99

1010

11+
/***
12+
* Internal Data representation that allows LimitedPeriodAdder to transform DrugPurchase into an Exposure.
13+
*
14+
* There is only two ways of creating a Duration Exposure:
15+
* 1. From transforming a `Event[Drug]`. This transformation is the responsibility of the caller.
16+
* 2. By calling `+` of this class.
17+
* @param patientID string representation of patientID
18+
* @param value equivalent of the value of `Event[Drug]`
19+
* @param period represents the `Timestamps` of the beginning and the end of the Exposure.
20+
* @param span long that represents the duration of the Exposure.
21+
*/
1122
case class ExposureDuration(patientID: String, value: String, period: Period, span: Long)
1223
extends Addable[ExposureDuration] {
1324
self =>
25+
/***
26+
* Add two `ExposureDuration` to form a `RemainingPeriod[ExposureDuration]`. For the exact rules, look at the
27+
* test of `+` for `Period` & `ExposureDuration`
28+
* @param other other `ExposureDruration` to be added
29+
* @return result of the addition as `RemainingPeriod[ExposureDuration]`
30+
*/
1431
override def +(other: ExposureDuration): RemainingPeriod[ExposureDuration] =
1532
if ((self.patientID != other.patientID) | (self.value != other.value)) {
1633
RightRemainingPeriod(self)
@@ -33,7 +50,27 @@ case class ExposureDuration(patientID: String, value: String, period: Period, sp
3350
}
3451
}
3552
}
53+
}
54+
55+
/***
56+
* Defines the strategies to be used to add different ExposureDuration and make them one.
57+
*/
58+
sealed trait ExposureDurationStrategy extends Function1[ExposureDuration, Event[Exposure]] with Serializable
3659

37-
def toExposure: Event[Exposure] =
38-
Exposure(self.patientID, self.value, 1D, self.period.start, self.period.start + Duration(milliseconds = span) get)
60+
/***
61+
* Sets the Exposure end as start + purchase_1*purchase_1_duration + purchase_2*purchase_2_duration ...
62+
*/
63+
object PurchaseCountBased extends ExposureDurationStrategy {
64+
override def apply(v1: ExposureDuration): Event[Exposure] = {
65+
Exposure(v1.patientID, v1.value, 1D, v1.period.start, v1.period.start + Duration(milliseconds = v1.span) get)
66+
}
3967
}
68+
69+
/***
70+
* Sets the Exposure end as being the end of the last Exposure Duration end.
71+
*/
72+
object LatestPurchaseBased extends ExposureDurationStrategy {
73+
override def apply(v1: ExposureDuration): Event[Exposure] = {
74+
Exposure(v1.patientID, "NA", v1.value, 1D, v1.period.start, Some(v1.period.end))
75+
}
76+
}

src/main/scala/fr/polytechnique/cmap/cnam/etl/transformers/exposures/ExposurePeriodAdder.scala

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22

33
package fr.polytechnique.cmap.cnam.etl.transformers.exposures
44

5-
import scala.annotation.tailrec
6-
import scala.reflect.ClassTag
7-
import scala.reflect.runtime.universe._
85
import me.danielpes.spark.datetime.implicits._
96
import me.danielpes.spark.datetime.{Period => Duration}
107
import org.apache.spark.sql.Dataset
@@ -40,7 +37,8 @@ final case class LimitedExposureAdder(
4037
override val startDelay: Duration,
4138
endDelay: Duration,
4239
endThresholdGc: Duration,
43-
endThresholdNgc: Duration) extends ExposurePeriodAdder(startDelay) {
40+
endThresholdNgc: Duration,
41+
toExposureStrategy: ExposureDurationStrategy) extends ExposurePeriodAdder(startDelay) {
4442

4543
override def toExposure(followUps: Dataset[Event[FollowUp]])
4644
(drugs: Dataset[Event[Drug]]): Dataset[Event[Exposure]] = {
@@ -54,12 +52,13 @@ final case class LimitedExposureAdder(
5452
.map(fromDrugToExposureDuration)
5553
.groupByKey(ep => (ep.patientID, ep.value))
5654
.flatMapGroups((_, eds) => combineExposureDurations(eds))
57-
.map(e => e.toExposure)
55+
.map(ed => ed.copy(period = ed.period.copy(end = ed.period.end - endDelay get)))
56+
.map(e => toExposureStrategy(e))
5857
}
5958

6059
def combineExposureDurations(exposureDurations: Iterator[ExposureDuration]): List[ExposureDuration] = {
6160
val sortedExposureDurations = exposureDurations.toList.sortBy(_.period.start).map(LeftRemainingPeriod(_))
62-
combineExposureDurationsRec(sortedExposureDurations.head.toRight, sortedExposureDurations.drop(1), List.empty)
61+
Addable.combineAddables(sortedExposureDurations.head.toRight, sortedExposureDurations.drop(1), List.empty)
6362
.map(_.e)
6463
}
6564

@@ -73,29 +72,6 @@ final case class LimitedExposureAdder(
7372
)
7473
}
7574

76-
77-
@tailrec
78-
def combineExposureDurationsRec[A <: Addable[A] : ClassTag : TypeTag](
79-
rr: RightRemainingPeriod[A],
80-
lrs: List[LeftRemainingPeriod[A]],
81-
acc: List[LeftRemainingPeriod[A]]): List[LeftRemainingPeriod[A]] = {
82-
lrs match {
83-
case Nil => rr.toLeft :: acc
84-
case lr :: Nil => rr.e + lr.e match {
85-
case NullRemainingPeriod => acc
86-
case l: LeftRemainingPeriod[A] => l :: acc
87-
case r: RightRemainingPeriod[A] => combineExposureDurationsRec[A](r, List.empty, acc)
88-
case d: DisjointedRemainingPeriod[A] => combineExposureDurationsRec[A](d.r, List.empty, d.l :: acc)
89-
}
90-
case lr :: lr2 :: rest => rr.e + lr.e match {
91-
case NullRemainingPeriod => combineExposureDurationsRec(lr2.toRight, rest, acc)
92-
case l: LeftRemainingPeriod[A] => combineExposureDurationsRec(lr2.toRight, rest, l :: acc)
93-
case r: RightRemainingPeriod[A] => combineExposureDurationsRec[A](r, lr2 :: rest, acc)
94-
case d: DisjointedRemainingPeriod[A] => combineExposureDurationsRec[A](d.r, lr2 :: rest, d.l :: acc)
95-
}
96-
}
97-
}
98-
9975
def fromConditioningToDuration(weight: Double): Long = weight match {
10076
case 1 => endThresholdGc.totalMilliseconds
10177
case _ => endThresholdNgc.totalMilliseconds

0 commit comments

Comments
 (0)