Skip to content

Commit 2dcae42

Browse files
authored
Merge pull request #149 from X-DataInitiative/CNAM-327-refactor-mlpp-config
Cnam 327 refactor mlpp config
2 parents 7bab987 + cac3222 commit 2dcae42

35 files changed

+614
-321
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// When changing this file, it is important to change the template.conf file accordingly
2+
3+
root{
4+
base{
5+
bucket_size: 1 //the duration of a bucket
6+
lag_count: 1 //the number of lag which we can reach in the bucket
7+
keep_first_only: true //true or false to select the first date of an outcome or to keep all the outcomes dates related to a patient
8+
features_as_list: true //true or false to show the bucket in the outcomes
9+
}
10+
}
11+
12+
cnam = ${root} {
13+
extra = {
14+
min_timestamp = "2015-01-01" //date in yyyy-MM-dd
15+
max_timestamp = "2016-01-01" //date in yyyy-MM-dd
16+
include_censored_bucket = false //true or false to include censored bucket when calculating the end bucket
17+
}
18+
19+
include "paths/cnam.conf" // Default paths at CNAM for the MLPP
20+
}
21+
22+
cmap = ${root} {
23+
extra = {
24+
min_timestamp = "2010-01-01" //date in yyyy-MM-dd
25+
max_timestamp = "2011-01-01" //date in yyyy-MM-dd
26+
include_censored_bucket = true //true or false to include censored bucket when calculating the end bucket
27+
}
28+
29+
include "paths/cmap.conf" // Default paths at CMAP for the MLPP
30+
}
31+
32+
test = ${root} {
33+
extra = {
34+
min_timestamp = "2006-01-01" //date in yyyy-MM-dd
35+
max_timestamp = "2006-08-01" //date in yyyy-MM-dd
36+
include_censored_bucket = false //true or false to include censored bucket when calculating the end bucket
37+
}
38+
39+
include "paths/test.conf" // Testing paths for the MLPP
40+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
env_name = "cmap"
2+
3+
input = {
4+
patients = "/shared/Observapur/featuring/patients-filtered"
5+
outcomes = "/shared/Observapur/featuring/fractures/*"
6+
exposures = "/shared/Observapur/featuring/exposures"
7+
}
8+
9+
output = {
10+
root = "/shared/fall/featuring"
11+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
env_name = "cnam"
2+
3+
input = {
4+
patients = "/shared/fall/staging/2018-02-20/featuringPharamacoClasses/filter_patients/data"
5+
outcomes = "/shared/fall/staging/2018-02-20/featuringPharamacoClasses/fractures/*_fractures/data"
6+
exposures = "/shared/fall/staging/2018-02-20/featuringPharamacoClasses/exposures/data"
7+
}
8+
9+
output = {
10+
root = "/shared/fall/staging/All/featuring"
11+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
env_name = "test"
2+
3+
input = {
4+
patients = "src/test/resources/MLPP/patient"
5+
outcomes = "src/test/resources/MLPP/outcome"
6+
exposures = "src/test/resources/MLPP/exposure"
7+
}
8+
9+
output = {
10+
root = "target/test/output/featuring"
11+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Template configuration file for the MLPP study. To override the defaults, copy this file to your working
2+
# directory, then uncomment the desired lines and pass the file path to spark-submit
3+
4+
#input.patients = "path/to/source/dir"
5+
#input.outcomes = "path/to/source/dir"
6+
#input.exposures = "path/to/source/dir"
7+
8+
#output.root = "path/to/output/dir"
9+
10+
#base.bucket_size = 1 //the duration of a bucket
11+
#base.lag_count = 1 //the number of lag which we can reach in the bucket
12+
#base.keep_first_only = true //true or false to select the first date of an outcome or to keep all the outcomes dates related to a patient
13+
#base.features_as_list = true //true or false to show the bucket in the outcomes
14+
15+
#extra.min_timestamp = "2010-01-01" //date in yyyy-MM-dd
16+
#extra.max_timestamp = "2011-01-01" //date in yyyy-MM-dd
17+
#extra.include_censored_bucket = true //true or false to include censored bucket when calculating the end bucket
18+
19+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
package fr.polytechnique.cmap.cnam.etl.config
2+
3+
trait Config

src/main/scala/fr/polytechnique/cmap/cnam/etl/config/ConfigLoader.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,10 @@ trait ConfigLoader {
4444
* It could be added to the trait itself, but the type is only needed by this method, so for
4545
* now I think we can leave it here.
4646
*/
47-
protected[etl] def loadConfigWithDefaults[C <: StudyConfig : ClassTag : ConfigReader](
48-
configPath: String,
49-
defaultsPath: String,
50-
env: String): C = {
47+
protected[etl] def loadConfigWithDefaults[C <: Config : ClassTag : ConfigReader](
48+
configPath: String,
49+
defaultsPath: String,
50+
env: String): C = {
5151

5252
val defaultConfig = ConfigFactory.parseResources(defaultsPath).resolve.getConfig(env)
5353
val config = ConfigFactory.parseFile(new java.io.File(configPath)).resolve.withFallback(defaultConfig).resolve
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package fr.polytechnique.cmap.cnam.etl.config.mlpp
2+
3+
import fr.polytechnique.cmap.cnam.etl.config.Config
4+
import fr.polytechnique.cmap.cnam.util.Path
5+
6+
trait MLPPLoaderConfig extends Config
7+
8+
object MLPPLoaderConfig {
9+
10+
abstract class InputPaths(
11+
patients: Option[String] = None,
12+
outcomes: Option[String] = None,
13+
exposures: Option[String] = None)
14+
15+
abstract class OutputPaths(root: Path) //the root may be different in the different case
16+
17+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package fr.polytechnique.cmap.cnam.etl.config.mlpp
2+
3+
import java.sql.Timestamp
4+
import pureconfig.ConfigReader
5+
import fr.polytechnique.cmap.cnam.etl.config.ConfigLoader
6+
import fr.polytechnique.cmap.cnam.util.Path
7+
import fr.polytechnique.cmap.cnam.util.functions.{makeTS, parseTimestamp}
8+
9+
trait MLPPLoaderConfigLoader extends ConfigLoader {
10+
//convert yyyy-MM-dd to Timestamp
11+
implicit val timeStampReader: ConfigReader[Timestamp] = ConfigReader[String].map(
12+
str => parseTimestamp(str, "yyyy-MM-dd").getOrElse(makeTS(2006, 1, 1)))
13+
14+
//read the path
15+
implicit val pathReader: ConfigReader[Path] = ConfigReader[String].map(Path(_))
16+
}

src/main/scala/fr/polytechnique/cmap/cnam/etl/config/StudyConfig.scala renamed to src/main/scala/fr/polytechnique/cmap/cnam/etl/config/study/StudyConfig.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
package fr.polytechnique.cmap.cnam.etl.config
1+
package fr.polytechnique.cmap.cnam.etl.config.study
2+
3+
import fr.polytechnique.cmap.cnam.etl.config.Config
24

35
object StudyConfig {
46
case class InputPaths(
@@ -23,4 +25,4 @@ object StudyConfig {
2325
exposures: String)
2426
}
2527

26-
trait StudyConfig
28+
trait StudyConfig extends Config

0 commit comments

Comments
 (0)