Skip to content

Commit e5d43c2

Browse files
committed
Updated stage 2 findings in the ICU sepsis example.
Signed-off-by: Marvin Hansen <[email protected]>
1 parent 94cc8bd commit e5d43c2

File tree

5 files changed

+169
-41
lines changed

5 files changed

+169
-41
lines changed

Cargo.lock

Lines changed: 0 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/case_study_icu_sepsis/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,3 @@ deep_causality = { path = "../../deep_causality" }
1818
# Exernal dependencies
1919
arrow-array = {version = "56", default-features = false}
2020
parquet = { version = "56" }
21-
parquet_derive = {version = "56", default-features = true}

examples/case_study_icu_sepsis/notes/stage_two_findings.md

Lines changed: 161 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ real 3m15.109s
1717

1818
Results:
1919

20+
## Full Data (Sepsis and Non-Sepsis cases)
21+
2022
Run second stage!
2123
Explicitly excluded column Patient_ID (index 42).
2224
Original number of columns: 43
@@ -65,6 +67,107 @@ Selected features and their normalized scores (CDL):
6567
- Feature: MAP (index: 5), Importance Score: 0.0325
6668
- Feature: HR (index: 1), Importance Score: 0.0313
6769

70+
## Sepsis Only
71+
72+
Run second stage!
73+
Explicitly excluded column Patient_ID (index 42).
74+
Original number of columns: 43
75+
New number of columns after filtering: 42
76+
Target column index: 41
77+
Selected features and their normalized scores (CDL):
78+
- Feature: ICULOS (Index: 40), Importance Score: 1.0000
79+
- Feature: HospAdmTime (Index: 39), Importance Score: 0.3398
80+
- Feature: Unit2 (Index: 38), Importance Score: 0.1632
81+
- Feature: Age (Index: 35), Importance Score: 0.0886
82+
- Feature: Unit1 (Index: 37), Importance Score: 0.0782
83+
- Feature: Gender (Index: 36), Importance Score: 0.0723
84+
85+
- Feature: Platelets (Index: 34), Importance Score: 0.0309
86+
- Feature: Phosphate (Index: 25), Importance Score: 0.0254
87+
- Feature: Resp (Index: 7), Importance Score: 0.0248
88+
- Feature: HR (Index: 1), Importance Score: 0.0232
89+
- Feature: Bilirubin_direct (Index: 21), Importance Score: 0.0221
90+
- Feature: WBC (Index: 32), Importance Score: 0.0215
91+
- Feature: PaCO2 (Index: 13), Importance Score: 0.0210
92+
93+
- Feature: EtCO2 (Index: 8), Importance Score: 0.0208
94+
- Feature: Potassium (Index: 26), Importance Score: 0.0200
95+
- Feature: Fibrinogen (Index: 33), Importance Score: 0.0202
96+
- Feature: O2Sat (Index: 2), Importance Score: 0.0198
97+
- Feature: Alkalinephos (Index: 17), Importance Score: 0.0195
98+
- Feature: Bilirubin_total (Index: 27), Importance Score: 0.0190
99+
- Feature: Hour (Index: 0), Importance Score: 0.0186
100+
- Feature: BaseExcess (Index: 9), Importance Score: 0.0186
101+
- Feature: Lactate (Index: 23), Importance Score: 0.0187
102+
- Feature: Hgb (Index: 30), Importance Score: 0.0184
103+
- Feature: pH (Index: 12), Importance Score: 0.0183
104+
- Feature: Creatinine (Index: 20), Importance Score: 0.0181
105+
- Feature: DBP (Index: 6), Importance Score: 0.0181
106+
- Feature: Hct (Index: 29), Importance Score: 0.0176
107+
- Feature: HCO3 (Index: 10), Importance Score: 0.0175
108+
- Feature: TroponinI (Index: 28), Importance Score: 0.0173
109+
- Feature: Chloride (Index: 19), Importance Score: 0.0171
110+
- Feature: SBP (Index: 4), Importance Score: 0.0172
111+
- Feature: FiO2 (Index: 11), Importance Score: 0.0170
112+
- Feature: Glucose (Index: 22), Importance Score: 0.0170
113+
- Feature: BUN (Index: 16), Importance Score: 0.0165
114+
- Feature: Temp (Index: 3), Importance Score: 0.0164
115+
- Feature: SaO2 (Index: 14), Importance Score: 0.0160
116+
- Feature: MAP (Index: 5), Importance Score: 0.0160
117+
- Feature: AST (Index: 15), Importance Score: 0.0157
118+
- Feature: Calcium (Index: 18), Importance Score: 0.0152
119+
120+
121+
## Non-Sepsis Only
122+
123+
Run second stage!
124+
Explicitly excluded column Patient_ID (index 42).
125+
Original number of columns: 43
126+
New number of columns after filtering: 42
127+
Target column index: 41
128+
Selected features and their normalized scores (CDL):
129+
- Feature: ICULOS (Index: 40), Importance Score: 1.0000
130+
- Feature: HospAdmTime (Index: 39), Importance Score: 0.5360
131+
- Feature: Unit2 (Index: 38), Importance Score: 0.4064
132+
- Feature: Unit1 (Index: 37), Importance Score: 0.3303
133+
- Feature: Gender (Index: 36), Importance Score: 0.2379
134+
- Feature: Age (Index: 35), Importance Score: 0.2131
135+
136+
- Feature: Platelets (Index: 34), Importance Score: 0.1622
137+
- Feature: Fibrinogen (Index: 33), Importance Score: 0.1507
138+
- Feature: WBC (Index: 32), Importance Score: 0.1351
139+
- Feature: PTT (Index: 31), Importance Score: 0.1212
140+
- Feature: Hgb (Index: 30), Importance Score: 0.1057
141+
- Feature: Hct (Index: 29), Importance Score: 0.0957
142+
143+
- Feature: TroponinI (Index: 28), Importance Score: 0.0874
144+
- Feature: Bilirubin_total (Index: 27), Importance Score: 0.0782
145+
- Feature: Potassium (Index: 26), Importance Score: 0.0739
146+
- Feature: Phosphate (Index: 25), Importance Score: 0.0697
147+
- Feature: Magnesium (Index: 24), Importance Score: 0.0670
148+
- Feature: Lactate (Index: 23), Importance Score: 0.0636
149+
- Feature: Glucose (Index: 22), Importance Score: 0.0587
150+
- Feature: Bilirubin_direct (Index: 21), Importance Score: 0.0561
151+
- Feature: Creatinine (Index: 20), Importance Score: 0.0533
152+
- Feature: Calcium (Index: 18), Importance Score: 0.0491
153+
- Feature: Chloride (Index: 19), Importance Score: 0.0468
154+
- Feature: Alkalinephos (Index: 17), Importance Score: 0.0456
155+
- Feature: BUN (Index: 16), Importance Score: 0.0418
156+
- Feature: AST (Index: 15), Importance Score: 0.0400
157+
- Feature: SaO2 (Index: 14), Importance Score: 0.0383
158+
- Feature: PaCO2 (Index: 13), Importance Score: 0.0371
159+
- Feature: pH (Index: 12), Importance Score: 0.0353
160+
- Feature: FiO2 (Index: 11), Importance Score: 0.0324
161+
- Feature: EtCO2 (Index: 8), Importance Score: 0.0314
162+
- Feature: Resp (Index: 7), Importance Score: 0.0306
163+
- Feature: HCO3 (Index: 10), Importance Score: 0.0300
164+
- Feature: BaseExcess (Index: 9), Importance Score: 0.0298
165+
- Feature: DBP (Index: 6), Importance Score: 0.0290
166+
- Feature: MAP (Index: 5), Importance Score: 0.0285
167+
- Feature: SBP (Index: 4), Importance Score: 0.0267
168+
- Feature: HR (Index: 1), Importance Score: 0.0248
169+
- Feature: Temp (Index: 3), Importance Score: 0.0245
170+
68171
Findings:
69172

70173
The selected features can be grouped into several categories, all of which are
@@ -101,4 +204,61 @@ known to be associated with sepsis:
101204
of sepsis. These markers reflect the patient's respiratory status.
102205
* Electrolytes: Phosphate, Potassium, Magnesium, Chloride, Calcium.
103206
* Analysis: Electrolyte imbalances are common in critically ill patients and
104-
can be exacerbated by sepsis.
207+
can be exacerbated by sepsis.
208+
209+
Analysis of the Results
210+
211+
1. Combined Dataset: The top 6 features are all demographic or administrative
212+
data (ICULOS, HospAdmTime, Unit1/2, Gender, Age). The first clinical
213+
measurements (Platelets, Fibrinogen, etc.) appear further down the list with
214+
significantly lower importance scores.
215+
216+
2. Non-Sepsis Only Dataset: The results are remarkably similar to the combined
217+
dataset. The same top 6 demographic/administrative features dominate, and the
218+
clinical variables have comparable, relatively low scores.
219+
220+
3. Sepsis-Only Dataset: This is where things get interesting.
221+
* The same demographic/administrative features are still at the top, but
222+
their importance scores are drastically lower (e.g., HospAdmTime drops
223+
from ~0.55 to ~0.34, and Age from ~0.22 to ~0.09).
224+
* More importantly, a much wider range of clinical variables now appear in
225+
the list with non-trivial importance scores (e.g., Platelets, Phosphate,
226+
Resp, HR, etc.). In the combined and non-sepsis datasets, many of these
227+
clinical features had scores so low they didn't even make the list.
228+
229+
The Problem
230+
231+
The strong predictive power of the demographic and administrative features in the combined dataset is masking the importance of the clinical variables.
232+
233+
Here's a breakdown of why this is happening:
234+
235+
* Confounding Variables: The demographic and administrative data (ICULOS,
236+
HospAdmTime, etc.) are likely strong confounders. They are correlated with
237+
both the clinical measurements and the outcome (sepsis). For example, a
238+
patient who has been in the ICU for a long time (ICULOS) is more likely to
239+
have both more clinical measurements taken and a higher chance of developing
240+
sepsis.
241+
* Data Imbalance: With a 93% to 7% ratio, the model is heavily biased towards the
242+
non-sepsis cases. The features that are good at predicting "not sepsis" will
243+
dominate the feature selection process. Since the non-sepsis group is so large,
244+
the model learns that the demographic data is a very good predictor for the
245+
majority of the data.
246+
* Masking Effect: Because the MRMR algorithm is trying to find a balance between
247+
relevance to the target and redundancy with other features, the strong,
248+
universally present demographic features get picked first. Once they are in the
249+
model, they "explain away" a lot of the variance, leaving less for the
250+
clinical variables to contribute, thus lowering their apparent importance.
251+
252+
What Happens When You Separate the Datasets?
253+
254+
* Non-Sepsis: When you run MRMR on only the non-sepsis data, the situation is
255+
largely the same as the combined set. The demographic features are still the
256+
best predictors for this large, relatively homogeneous group.
257+
*
258+
* Sepsis: When you isolate the sepsis cases, you remove the overwhelming
259+
influence of the non-sepsis group. In this context, the model is forced to look
260+
for the subtle patterns within the sepsis patients. This is where the clinical
261+
variables (HR, Resp, WBC, etc.) become much more important, as they are the
262+
indicators that change as the condition progresses. The demographic data is
263+
still relevant, but its predictive power is diminished relative to the
264+
now-crucial clinical measurements.

examples/case_study_icu_sepsis/src/main.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,13 @@ mod stage_one;
1010
mod stage_two;
1111
// mod stage_zero;
1212

13-
const DATA_PATH: &str = "examples/case_study_icu_sepsis/data/all/dataset.parquet";
13+
#[allow(dead_code)]
14+
const FULL_DATA_PATH: &str = "examples/case_study_icu_sepsis/data/all/dataset.parquet";
15+
#[allow(dead_code)]
16+
const SEPS_TRUE_PATH: &str = "examples/case_study_icu_sepsis/data/seperated/seps_true.parquet";
17+
#[allow(dead_code)]
18+
const SEPS_FALSE_PATH: &str = "examples/case_study_icu_sepsis/data/seperated/seps_false.parquet";
1419

1520
fn main() {
16-
run::run(StageTwo, DATA_PATH)
21+
run::run(StageTwo, SEPS_FALSE_PATH)
1722
}

examples/case_study_icu_sepsis/src/run_enum.rs

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55
use std::fmt::Display;
66

7+
#[allow(dead_code)]
78
#[derive(Debug, Copy, Clone)]
89
pub enum StageEnum {
910
StageOne,
@@ -20,27 +21,3 @@ impl Display for StageEnum {
2021
}
2122
}
2223
}
23-
24-
#[cfg(test)]
25-
mod tests {
26-
use super::*;
27-
28-
#[test]
29-
fn test_display_for_stage_enum() {
30-
assert_eq!(format!("{}", StageEnum::StageOne), "StageOne");
31-
assert_eq!(format!("{}", StageEnum::StageTwo), "StageTwo");
32-
assert_eq!(format!("{}", StageEnum::All), "All");
33-
}
34-
35-
#[test]
36-
fn test_stage_enum_variants() {
37-
let stage_one = StageEnum::StageOne;
38-
assert!(matches!(stage_one, StageEnum::StageOne));
39-
40-
let stage_two = StageEnum::StageTwo;
41-
assert!(matches!(stage_two, StageEnum::StageTwo));
42-
43-
let all = StageEnum::All;
44-
assert!(matches!(all, StageEnum::All));
45-
}
46-
}

0 commit comments

Comments
 (0)