Skip to content

Commit 8828be2

Browse files
author
Kelly
committed
adds flake8 and fies linting errors
1 parent e88a641 commit 8828be2

File tree

11 files changed

+734
-319
lines changed

11 files changed

+734
-319
lines changed

code/.DS_Store

0 Bytes
Binary file not shown.

code/requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,6 @@ lightgbm==3.3.2
44
scikit-learn==1.0.2
55
tqdm==4.62.3
66
scipy==1.7.3
7-
optuna==2.10.0
7+
optuna==2.10.0
8+
flake8==4.0.1
9+
black==22.1.0

code/setup.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
name="topcoder_cognitive_state",
1313
version="0.0.1",
1414
packages=find_packages(),
15-
py_modules=[splitext(basename(path))[0] for path in glob("topcoder_cognitive_state/*.py")],
15+
py_modules=[
16+
splitext(basename(path))[0] for path in glob("topcoder_cognitive_state/*.py")
17+
],
1618
install_requires=requirements,
17-
)
19+
)
Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
TARGET2LABEL = {
2-
"low": 0,
3-
"medium": 1,
4-
"high": 2,
5-
"baseline": 3,
6-
"channelized": 4,
7-
"surprise": 5
2+
"low": 0,
3+
"medium": 1,
4+
"high": 2,
5+
"baseline": 3,
6+
"channelized": 4,
7+
"surprise": 5,
88
}
99
LABEL2TARGET = dict(zip(TARGET2LABEL.values(), TARGET2LABEL.keys()))
10-
11-
1210
METADATA_COLUMNS = ["test_suite"]
13-
NAN_VALUES = [-9999.9]
11+
NAN_VALUES = [-9999.9]
Lines changed: 169 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,171 @@
1-
import os
2-
from multiprocessing import Pool
1+
from typing import Tuple
32
import time
43

5-
import pandas as pd
4+
import pandas as pd
65
from tqdm import tqdm
76

87
from topcoder_cognitive_state.CONSTANTS import METADATA_COLUMNS, NAN_VALUES
98

109

11-
def _test_missing_features(df):
10+
def _test_missing_features(df: pd.DataFrame) -> pd.DataFrame:
11+
"""
12+
This test contains three tests which are run manually:
13+
14+
1. Check if some columns are missing
15+
2. Check if some columns have None values
16+
3. Check if some columns have -9999.9 (missing) values
17+
"""
1218
# cols = ['ViveEye_gazeOrigin_L_X', 'ViveEye_gazeOrigin_L_Y', 'ViveEye_gazeOrigin_L_Z']
1319
# cols = ['Myo_EMG_0', 'Myo_EMG_1', 'Myo_EMG_2', 'Myo_EMG_3', 'Myo_EMG_4', 'Myo_EMG_5', 'Myo_EMG_6']
1420
# cols = ['Polar_bpm', 'Polar_hrv', 'tlx_score']
1521
cols = [
1622
# features
17-
'tlx_score', 'E4_BVP', 'E4_GSR', 'LooxidLink_EEG_A3', 'LooxidLink_EEG_A4',
18-
'LooxidLink_EEG_FP1', 'LooxidLink_EEG_FP2', 'LooxidLink_EEG_A7', 'LooxidLink_EEG_A8',
19-
20-
'Muse_EEG_TP9', 'Muse_EEG_AF7', 'Muse_EEG_AF8', 'Muse_EEG_TP10',
21-
'Muse_PPG_0', 'Muse_PPG_1', 'Muse_PPG_2',
22-
23-
'Myo_GYR_X', 'Myo_GYR_Y', 'Myo_GYR_Z',
24-
'Myo_EMG_0', 'Myo_EMG_1', 'Myo_EMG_2', 'Myo_EMG_3', 'Myo_EMG_4', 'Myo_EMG_5', 'Myo_EMG_6', 'Myo_EMG_7',
25-
26-
'PICARD_fnirs_0', 'PICARD_fnirs_1',
27-
28-
'Polar_bpm', 'Polar_hrv',
29-
30-
'ViveEye_pupilPos_L_X', 'ViveEye_pupilPos_L_Y',
31-
'ViveEye_pupilPos_R_X', 'ViveEye_pupilPos_R_Y',
32-
33-
'ViveEye_gazeOrigin_L_X', 'ViveEye_gazeOrigin_L_Y', 'ViveEye_gazeOrigin_L_Z',
34-
'ViveEye_gazeOrigin_R_X', 'ViveEye_gazeOrigin_R_Y', 'ViveEye_gazeOrigin_R_Z',
35-
'ViveEye_gazeDirection_L_X', 'ViveEye_gazeDirection_L_Y', 'ViveEye_gazeDirection_L_Z',
36-
'ViveEye_gazeDirection_R_X', 'ViveEye_gazeDirection_R_Y', 'ViveEye_gazeDirection_R_Z',
37-
38-
'ViveEye_eyeOpenness_L', 'ViveEye_pupilDiameter_L',
39-
'ViveEye_eyeOpenness_R', 'ViveEye_pupilDiameter_R',
40-
41-
'Zephyr_HR', 'Zephyr_HRV',
23+
"tlx_score",
24+
"E4_BVP",
25+
"E4_GSR",
26+
"LooxidLink_EEG_A3",
27+
"LooxidLink_EEG_A4",
28+
"LooxidLink_EEG_FP1",
29+
"LooxidLink_EEG_FP2",
30+
"LooxidLink_EEG_A7",
31+
"LooxidLink_EEG_A8",
32+
"Muse_EEG_TP9",
33+
"Muse_EEG_AF7",
34+
"Muse_EEG_AF8",
35+
"Muse_EEG_TP10",
36+
"Muse_PPG_0",
37+
"Muse_PPG_1",
38+
"Muse_PPG_2",
39+
"Myo_GYR_X",
40+
"Myo_GYR_Y",
41+
"Myo_GYR_Z",
42+
"Myo_EMG_0",
43+
"Myo_EMG_1",
44+
"Myo_EMG_2",
45+
"Myo_EMG_3",
46+
"Myo_EMG_4",
47+
"Myo_EMG_5",
48+
"Myo_EMG_6",
49+
"Myo_EMG_7",
50+
"PICARD_fnirs_0",
51+
"PICARD_fnirs_1",
52+
"Polar_bpm",
53+
"Polar_hrv",
54+
"ViveEye_pupilPos_L_X",
55+
"ViveEye_pupilPos_L_Y",
56+
"ViveEye_pupilPos_R_X",
57+
"ViveEye_pupilPos_R_Y",
58+
"ViveEye_gazeOrigin_L_X",
59+
"ViveEye_gazeOrigin_L_Y",
60+
"ViveEye_gazeOrigin_L_Z",
61+
"ViveEye_gazeOrigin_R_X",
62+
"ViveEye_gazeOrigin_R_Y",
63+
"ViveEye_gazeOrigin_R_Z",
64+
"ViveEye_gazeDirection_L_X",
65+
"ViveEye_gazeDirection_L_Y",
66+
"ViveEye_gazeDirection_L_Z",
67+
"ViveEye_gazeDirection_R_X",
68+
"ViveEye_gazeDirection_R_Y",
69+
"ViveEye_gazeDirection_R_Z",
70+
"ViveEye_eyeOpenness_L",
71+
"ViveEye_pupilDiameter_L",
72+
"ViveEye_eyeOpenness_R",
73+
"ViveEye_pupilDiameter_R",
74+
"Zephyr_HR",
75+
"Zephyr_HRV",
4276
]
4377

4478
# case 1 - no column
45-
# df = df.drop(cols, axis=1)
79+
# df = df.drop(cols, axis=1)
4680

4781
# case 2 - None values
48-
#for col in cols:
82+
# for col in cols:
4983
# df[col] = None
5084

51-
# case 3 - missing values
85+
# case 3 - missing values
5286
for col in cols:
5387
df[col] = -9999.9
5488
return df
5589

5690

57-
def read_and_prepare_data_chunk(df):
91+
def read_and_prepare_data_chunk(df: pd.DataFrame) -> pd.DataFrame:
92+
"""
93+
Read raw data and prepare it for processing.
94+
I.e., create columns if they are missing,
95+
replace missing values with None,
96+
etc.
97+
98+
Args:
99+
df (pd.DataFrame): input raw data
100+
101+
Returns:
102+
pd.DataFrame: processed data
103+
"""
58104
EXPECTED_COLUMNS = [
59105
# features
60-
'tlx_score', 'E4_BVP', 'E4_GSR', 'LooxidLink_EEG_A3', 'LooxidLink_EEG_A4',
61-
'LooxidLink_EEG_FP1', 'LooxidLink_EEG_FP2', 'LooxidLink_EEG_A7', 'LooxidLink_EEG_A8',
62-
63-
'Muse_EEG_TP9', 'Muse_EEG_AF7', 'Muse_EEG_AF8', 'Muse_EEG_TP10',
64-
'Muse_PPG_0', 'Muse_PPG_1', 'Muse_PPG_2',
65-
66-
'Myo_GYR_X', 'Myo_GYR_Y', 'Myo_GYR_Z',
67-
'Myo_EMG_0', 'Myo_EMG_1', 'Myo_EMG_2', 'Myo_EMG_3', 'Myo_EMG_4', 'Myo_EMG_5', 'Myo_EMG_6', 'Myo_EMG_7',
68-
69-
'PICARD_fnirs_0', 'PICARD_fnirs_1',
70-
71-
'Polar_bpm', 'Polar_hrv',
72-
73-
'ViveEye_pupilPos_L_X', 'ViveEye_pupilPos_L_Y',
74-
'ViveEye_pupilPos_R_X', 'ViveEye_pupilPos_R_Y',
75-
76-
'ViveEye_gazeOrigin_L_X', 'ViveEye_gazeOrigin_L_Y', 'ViveEye_gazeOrigin_L_Z',
77-
'ViveEye_gazeOrigin_R_X', 'ViveEye_gazeOrigin_R_Y', 'ViveEye_gazeOrigin_R_Z',
78-
'ViveEye_gazeDirection_L_X', 'ViveEye_gazeDirection_L_Y', 'ViveEye_gazeDirection_L_Z',
79-
'ViveEye_gazeDirection_R_X', 'ViveEye_gazeDirection_R_Y', 'ViveEye_gazeDirection_R_Z',
80-
81-
'ViveEye_eyeOpenness_L', 'ViveEye_pupilDiameter_L',
82-
'ViveEye_eyeOpenness_R', 'ViveEye_pupilDiameter_R',
83-
84-
'Zephyr_HR', 'Zephyr_HRV',
85-
106+
"tlx_score",
107+
"E4_BVP",
108+
"E4_GSR",
109+
"LooxidLink_EEG_A3",
110+
"LooxidLink_EEG_A4",
111+
"LooxidLink_EEG_FP1",
112+
"LooxidLink_EEG_FP2",
113+
"LooxidLink_EEG_A7",
114+
"LooxidLink_EEG_A8",
115+
"Muse_EEG_TP9",
116+
"Muse_EEG_AF7",
117+
"Muse_EEG_AF8",
118+
"Muse_EEG_TP10",
119+
"Muse_PPG_0",
120+
"Muse_PPG_1",
121+
"Muse_PPG_2",
122+
"Myo_GYR_X",
123+
"Myo_GYR_Y",
124+
"Myo_GYR_Z",
125+
"Myo_EMG_0",
126+
"Myo_EMG_1",
127+
"Myo_EMG_2",
128+
"Myo_EMG_3",
129+
"Myo_EMG_4",
130+
"Myo_EMG_5",
131+
"Myo_EMG_6",
132+
"Myo_EMG_7",
133+
"PICARD_fnirs_0",
134+
"PICARD_fnirs_1",
135+
"Polar_bpm",
136+
"Polar_hrv",
137+
"ViveEye_pupilPos_L_X",
138+
"ViveEye_pupilPos_L_Y",
139+
"ViveEye_pupilPos_R_X",
140+
"ViveEye_pupilPos_R_Y",
141+
"ViveEye_gazeOrigin_L_X",
142+
"ViveEye_gazeOrigin_L_Y",
143+
"ViveEye_gazeOrigin_L_Z",
144+
"ViveEye_gazeOrigin_R_X",
145+
"ViveEye_gazeOrigin_R_Y",
146+
"ViveEye_gazeOrigin_R_Z",
147+
"ViveEye_gazeDirection_L_X",
148+
"ViveEye_gazeDirection_L_Y",
149+
"ViveEye_gazeDirection_L_Z",
150+
"ViveEye_gazeDirection_R_X",
151+
"ViveEye_gazeDirection_R_Y",
152+
"ViveEye_gazeDirection_R_Z",
153+
"ViveEye_eyeOpenness_L",
154+
"ViveEye_pupilDiameter_L",
155+
"ViveEye_eyeOpenness_R",
156+
"ViveEye_pupilDiameter_R",
157+
"Zephyr_HR",
158+
"Zephyr_HRV",
86159
# target
87-
"induced_state"
160+
"induced_state",
88161
]
89162

163+
# uncomment to enable test
90164
# df = _test_missing_features(df)
91165

92166
# test_suite
93-
if 'test_suite' not in df.columns:
94-
df['test_suite'] = "test"
167+
if "test_suite" not in df.columns:
168+
df["test_suite"] = "test"
95169

96170
df["time"] = pd.to_datetime(df["time"], unit="us")
97171
df["timestamp"] = df["time"].dt.round("1s")
@@ -118,30 +192,48 @@ def read_and_prepare_data_chunk(df):
118192
return ags
119193

120194

121-
def get_dummy_template(df):
195+
def get_dummy_template(df: pd.DataFrame) -> pd.DataFrame:
196+
"""
197+
The template is needed to match the expected sample submission format.
198+
"""
122199
df["time"] = pd.to_datetime(df["time"], unit="us")
123200
df["timestamp"] = df["time"].dt.round("1s")
124201
df = df.drop("time", axis=1)
125202
dummy_template = df.drop_duplicates(
126-
subset=METADATA_COLUMNS + ["timestamp"],
127-
keep="first"
203+
subset=METADATA_COLUMNS + ["timestamp"], keep="first"
128204
).reset_index(drop=True)
129205
dummy_template = dummy_template[METADATA_COLUMNS + ["timestamp"]]
130206
return dummy_template
131207

132208

133-
def get_needed_data(df):
209+
def get_needed_data(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
210+
"""
211+
Read data for training/testing and prepare template format for submission
212+
213+
Return:
214+
res1 - pd.DataFrame - read data
215+
res2 - pd.DataFrame - template for submission
216+
"""
134217
res1 = read_and_prepare_data_chunk(df)
135218
res2 = get_dummy_template(df)
136-
return [res1, res2]
219+
return res1, res2
137220

138221

139222
def read_data(
140-
path_to_data: str,
141-
debug: bool = False
142-
) -> pd.DataFrame:
223+
path_to_data: str, debug: bool = False
224+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
225+
"""
226+
Read data. The data is read in chunks to reduce memory consumption.
227+
228+
Args:
229+
path_to_data (str): path to data
230+
debug (bool, optional): run data loading on a sample of data. Defaults to False.
231+
232+
Returns:
233+
Tuple[pd.DataFrame, pd.DataFrame]: Read data and prepared template for submission
234+
"""
143235
t_start = time.time()
144-
chunksize = 10 ** 6
236+
chunksize = 10**6
145237

146238
if path_to_data is None:
147239
path_to_data = "./data/training-data.zip"
@@ -152,11 +244,9 @@ def read_data(
152244
else:
153245
nrows = None
154246

247+
# create chunks iterator to read data
155248
chunks = pd.read_csv(
156-
path_to_data,
157-
na_values=NAN_VALUES,
158-
chunksize=chunksize,
159-
nrows=nrows
249+
path_to_data, na_values=NAN_VALUES, chunksize=chunksize, nrows=nrows
160250
)
161251

162252
# get data for processing
@@ -167,12 +257,14 @@ def read_data(
167257
res = [i[0] for i in full_result]
168258
res = pd.concat(res, axis=0)
169259
res = res.sort_index()
170-
res = res[~res.index.duplicated(keep='first')]
260+
res = res[~res.index.duplicated(keep="first")]
171261

172262
# collect dummies for sub
173263
res2 = [i[1] for i in full_result]
174264
res2 = pd.concat(res2, axis=0)
175-
res2 = res2.drop_duplicates(subset=METADATA_COLUMNS + ["timestamp"], keep="first").reset_index(drop=True)
265+
res2 = res2.drop_duplicates(
266+
subset=METADATA_COLUMNS + ["timestamp"], keep="first"
267+
).reset_index(drop=True)
176268
t_end = time.time()
177269
print(f"Data is read. Time per reading: {(t_end-t_start)/60:.2f} minutes")
178270
return res, res2

0 commit comments

Comments
 (0)