Skip to content

Commit 44094b6

Browse files
committed
fix(dataset):info
1 parent a729a34 commit 44094b6

File tree

5 files changed

+102
-101
lines changed

5 files changed

+102
-101
lines changed

data/data_provider/datasets/HumanActivity.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,24 @@
1212
from data.dependencies.HumanActivity.HumanActivity import HumanActivity, Activity_time_chunk
1313

1414
class Data(Dataset):
15+
'''
16+
wrapper for Human Activity dataset
17+
18+
- tasks: forecasting
19+
- sampling rate (rounded): 1 millisecond
20+
- max time length (padded): 131 (4000 milliseconds)
21+
- seq_len -> pred_len:
22+
- 3000 -> 300
23+
- 3000 -> 1000
24+
- number of variables: 12
25+
- number of samples: 1360 (949 + 193 + 218)
26+
'''
1527
def __init__(
1628
self,
1729
configs: ExpConfigs,
1830
flag: str = 'train',
1931
**kwargs
2032
):
21-
'''
22-
wrapper for Human Activity dataset
23-
24-
- number of variables: 12
25-
- number of samples: 949 + 192 + 218
26-
'''
27-
logger.debug(f"getting {flag} set of Human Activity")
2833
self.configs = configs
2934
assert flag in ['train', 'test', 'val', 'test_all']
3035
self.flag = flag

data/data_provider/datasets/MIMIC_III.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,26 @@
1515
warnings.filterwarnings('ignore')
1616

1717
class Data(Dataset):
18+
'''
19+
wrapper for MIMIC III DeBrouwer2019 dataset implemented in tsdm
20+
tsdm: https://openreview.net/forum?id=a-bD9-0ycs0
21+
22+
- tasks: forecasting
23+
- sampling rate (rounded): 30 minutes
24+
- max time length (padded): 96 (48 hours)
25+
- seq_len -> pred_len:
26+
- 72 -> 3
27+
- 72 -> 24
28+
- 48 -> 48
29+
- number of variables: 96
30+
- number of samples: 21250 (17212 + 1913 + 2125)
31+
'''
1832
def __init__(
1933
self,
2034
configs: ExpConfigs,
2135
flag: str = 'train',
2236
**kwargs
2337
):
24-
'''
25-
wrapper for MIMIC III DeBrouwer2019 dataset implemented in tsdm
26-
tsdm: https://openreview.net/forum?id=a-bD9-0ycs0
27-
28-
this version of MIMIC III does not align the timesteps among samples (but do align within sample), which means:
29-
- It use custom collate_fn to pad trailing 0s in each batch
30-
- Tensor length along time dimension is not fixed in different batches, which depends on the max number of timesteps in each batch
31-
- time steps does not spread evenly, and the start and end time is also not fixed
32-
33-
- max time length: 96
34-
- number of variables: 96
35-
- number of samples: 21250
36-
'''
37-
logger.debug(f"getting {flag} set of MIMIC_III in tsdm format")
3838
self.configs = configs
3939
assert flag in ['train', 'test', 'val', 'test_all']
4040
self.flag = flag

data/data_provider/datasets/MIMIC_IV.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,25 @@
1515
warnings.filterwarnings('ignore')
1616

1717
class Data(Dataset):
18+
'''
19+
wrapper for MIMIC IV Bilos2021 dataset implemented in tsdm
20+
tsdm: https://openreview.net/forum?id=a-bD9-0ycs0
21+
22+
- tasks: forecasting
23+
- sampling rate (rounded): 1 minute
24+
- max time length (padded): 971 (48 hours)
25+
- seq_len -> pred_len:
26+
- 2160 -> 3
27+
- 2160 -> 720
28+
- number of variables: 100
29+
- number of samples: 17874 (14477 + 1609 + 1788)
30+
'''
1831
def __init__(
1932
self,
2033
configs: ExpConfigs,
2134
flag: str = 'train',
2235
**kwargs
2336
):
24-
'''
25-
wrapper for MIMIC IV Bilos2021 dataset implemented in tsdm
26-
tsdm: https://openreview.net/forum?id=a-bD9-0ycs0
27-
28-
this version of MIMIC IV does not align the timesteps among samples (but do align within sample), which means:
29-
- It use custom collate_fn to pad trailing 0s in each batch
30-
- Tensor length along time dimension is not fixed in different batches, which depends on the max number of timesteps in each batch
31-
- time steps does not spread evenly, and the start and end time is also not fixed
32-
33-
- max time length: 2880
34-
- number of variables: 100
35-
- number of samples: 17874
36-
'''
37-
logger.debug(f"getting {flag} set of MIMIC_IV in tsdm format")
3837
self.configs = configs
3938
assert flag in ['train', 'test', 'val', 'test_all']
4039
self.flag = flag

data/data_provider/datasets/P12.py

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15,63 +15,63 @@
1515
warnings.filterwarnings('ignore')
1616

1717
class Data(Dataset):
18+
'''
19+
wrapper for PhysioNet 2012 dataset implemented in tsdm
20+
tsdm: https://openreview.net/forum?id=a-bD9-0ycs0
21+
22+
- tasks: forecasting
23+
- sampling rate (rounded): 1 hour
24+
- max time length (padded): 48 (48 hours)
25+
- seq_len -> pred_len:
26+
- 36 -> 3
27+
- 36 -> 12
28+
- 24 -> 24
29+
- number of variables: 36
30+
31+
- 0: Albumin (g/dL)
32+
- 1: ALP [Alkaline phosphatase (IU/L)]
33+
- 2: ALT [Alanine transaminase (IU/L)]
34+
- 3: AST [Aspartate transaminase (IU/L)]
35+
- 4: Bilirubin (mg/dL)
36+
- 5: BUN [Blood urea nitrogen (mg/dL)]
37+
- 6: Cholesterol (mg/dL)
38+
- 7: Creatinine [Serum creatinine (mg/dL)]
39+
- 8: DiasABP [Invasive diastolic arterial blood pressure (mmHg)]
40+
- 9: FiO2 [Fractional inspired O2 (0-1)]
41+
- 10: GCS [Glasgow Coma Score (3-15)]
42+
- 11: Glucose [Serum glucose (mg/dL)]
43+
- 12: HCO3 [Serum bicarbonate (mmol/L)]
44+
- 13: HCT [Hematocrit (%)]
45+
- 14: HR [Heart rate (bpm)]
46+
- 15: K [Serum potassium (mEq/L)]
47+
- 16: Lactate (mmol/L)
48+
- 17: Mg [Serum magnesium (mmol/L)]
49+
- 18: MAP [Invasive mean arterial blood pressure (mmHg)]
50+
- 19: MechVent [Mechanical ventilation respiration (0:false, or 1:true)]
51+
- 20: Na [Serum sodium (mEq/L)]
52+
- 21: NIDiasABP [Non-invasive diastolic arterial blood pressure (mmHg)]
53+
- 22: NIMAP [Non-invasive mean arterial blood pressure (mmHg)]
54+
- 23: NISysABP [Non-invasive systolic arterial blood pressure (mmHg)]
55+
- 24: PaCO2 [partial pressure of arterial CO2 (mmHg)]
56+
- 25: PaO2 [Partial pressure of arterial O2 (mmHg)]
57+
- 26: pH [Arterial pH (0-14)]
58+
- 27: Platelets (cells/nL)
59+
- 28: RespRate [Respiration rate (bpm)]
60+
- 29: SaO2 [O2 saturation in hemoglobin (%)]
61+
- 30: SysABP [Invasive systolic arterial blood pressure (mmHg)]
62+
- 31: Temp [Temperature (°C)]
63+
- 32: TropI [Troponin-I (μg/L)]
64+
- 33: TropT [Troponin-T (μg/L)]
65+
- 34: Urine [Urine output (mL)]
66+
- 35: WBC [White blood cell count (cells/nL)]
67+
- number of samples: 11981 (9704 + 1078 + 1199)
68+
'''
1869
def __init__(
1970
self,
2071
configs: ExpConfigs,
2172
flag: str = 'train',
2273
**kwargs
2374
):
24-
'''
25-
wrapper for Physionet 2012 dataset implemented in tsdm
26-
tsdm: https://openreview.net/forum?id=a-bD9-0ycs0
27-
28-
this version of P12 does not align the timesteps among samples (but do align within sample), which means:
29-
- It use custom collate_fn to pad trailing 0s in each batch
30-
- Tensor length along time dimension is not fixed in different batches, which depends on the max number of timesteps in each batch
31-
- time steps does not spread evenly, and the start and end time is also not fixed
32-
33-
- max time length: 48
34-
- number of variables: 36
35-
36-
- 0: Albumin (g/dL)
37-
- 1: ALP [Alkaline phosphatase (IU/L)]
38-
- 2: ALT [Alanine transaminase (IU/L)]
39-
- 3: AST [Aspartate transaminase (IU/L)]
40-
- 4: Bilirubin (mg/dL)
41-
- 5: BUN [Blood urea nitrogen (mg/dL)]
42-
- 6: Cholesterol (mg/dL)
43-
- 7: Creatinine [Serum creatinine (mg/dL)]
44-
- 8: DiasABP [Invasive diastolic arterial blood pressure (mmHg)]
45-
- 9: FiO2 [Fractional inspired O2 (0-1)]
46-
- 10: GCS [Glasgow Coma Score (3-15)]
47-
- 11: Glucose [Serum glucose (mg/dL)]
48-
- 12: HCO3 [Serum bicarbonate (mmol/L)]
49-
- 13: HCT [Hematocrit (%)]
50-
- 14: HR [Heart rate (bpm)]
51-
- 15: K [Serum potassium (mEq/L)]
52-
- 16: Lactate (mmol/L)
53-
- 17: Mg [Serum magnesium (mmol/L)]
54-
- 18: MAP [Invasive mean arterial blood pressure (mmHg)]
55-
- 19: MechVent [Mechanical ventilation respiration (0:false, or 1:true)]
56-
- 20: Na [Serum sodium (mEq/L)]
57-
- 21: NIDiasABP [Non-invasive diastolic arterial blood pressure (mmHg)]
58-
- 22: NIMAP [Non-invasive mean arterial blood pressure (mmHg)]
59-
- 23: NISysABP [Non-invasive systolic arterial blood pressure (mmHg)]
60-
- 24: PaCO2 [partial pressure of arterial CO2 (mmHg)]
61-
- 25: PaO2 [Partial pressure of arterial O2 (mmHg)]
62-
- 26: pH [Arterial pH (0-14)]
63-
- 27: Platelets (cells/nL)
64-
- 28: RespRate [Respiration rate (bpm)]
65-
- 29: SaO2 [O2 saturation in hemoglobin (%)]
66-
- 30: SysABP [Invasive systolic arterial blood pressure (mmHg)]
67-
- 31: Temp [Temperature (°C)]
68-
- 32: TropI [Troponin-I (μg/L)]
69-
- 33: TropT [Troponin-T (μg/L)]
70-
- 34: Urine [Urine output (mL)]
71-
- 35: WBC [White blood cell count (cells/nL)]
72-
- number of samples: 11981
73-
'''
74-
logger.debug(f"getting {flag} set of PhysioNet'12 in tsdm format")
7575
self.configs = configs
7676
assert flag in ['train', 'test', 'val', 'test_all']
7777
self.flag = flag

data/data_provider/datasets/USHCN.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,24 @@
1515
warnings.filterwarnings('ignore')
1616

1717
class Data(Dataset):
18+
'''
19+
wrapper for USHCN DeBrouwer2019 dataset implemented in tsdm
20+
tsdm: https://openreview.net/forum?id=a-bD9-0ycs0
21+
22+
- tasks: forecasting
23+
- max time length: 337 (4 year)
24+
- seq_len -> pred_len:
25+
- 150 -> 3
26+
- 150 -> 50
27+
- number of variables: 5
28+
- number of samples: 1114 (902 + 100 + 112)
29+
'''
1830
def __init__(
1931
self,
2032
configs: ExpConfigs,
2133
flag: str = 'train',
2234
**kwargs
2335
):
24-
'''
25-
wrapper for USHCN DeBrouwer2019 dataset implemented in tsdm
26-
tsdm: https://openreview.net/forum?id=a-bD9-0ycs0
27-
28-
this version of USHCN does not align the timesteps among samples (but do align within sample), which means:
29-
- It use custom collate_fn to pad trailing 0s in each batch
30-
- Tensor length along time dimension is not fixed in different batches, which depends on the max number of timesteps in each batch
31-
- time steps does not spread evenly, and the start and end time is also not fixed
32-
33-
- max time length: 200
34-
- number of variables: 5
35-
- number of samples: 1114
36-
- actual time length: 4 year
37-
'''
38-
logger.debug(f"getting {flag} set of USHCN in tsdm format")
3936
self.configs = configs
4037
assert flag in ['train', 'test', 'val', 'test_all']
4138
self.flag = flag

0 commit comments

Comments
 (0)