-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathReconstructPhaseTablesFromParticipantLogs.py
More file actions
158 lines (142 loc) · 7.2 KB
/
ReconstructPhaseTablesFromParticipantLogs.py
File metadata and controls
158 lines (142 loc) · 7.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# This script can reconstruct phase tables with missing data from the participant logs
# Prerequsites:
# - The headers of the phase tables need to be valid (apart from those the files can be empty)
# - the IndependentVariables.csv needs to be complete (we just assume this since it was the case for us, otherwise also restore that!)
# - no underscore (_) in the Phase or multi trial variable names
import os
from contextlib import chdir
FolderToLogs = "StudyLogs/ParticipantLogs"
FolderToPhaseTables = "StudyLogs"
IndependentVars = {}
def LoadIndependentVars():
global IndependentVars
ConvertCoding(os.path.join(FolderToPhaseTables, "IndependentVariables.csv"))
with open(os.path.join(FolderToPhaseTables, "IndependentVariables.csv"), 'r') as file:
header_entries = []
for line in file:
if len(header_entries) == 0:
header_entries = line.strip().split(",")
continue
entries = line.strip().split(",")
data = {}
for i in range(0, len(entries)):
data[header_entries[i]] = entries[i]
IndependentVars[entries[0]] = data
#print(IndependentVars)
def CreateEntryLine(header_entries, data):
line = ""
for entry in header_entries:
if entry in data:
line += data[entry]
else:
if not (entry == "lowerPrio" or entry == "higerPrio"):
#some vars are not required (which we don't know here, so I added them for our case manually!)
print("WARNING: Missing data for "+entry)
if not entry == header_entries[-1]:
line += ","
else:
line += "\n"
return line
def CheckForSplitCommaInSentence(entries):
entries_cleaned = []
i=0
while i < len(entries):
if entries[i].startswith("\"") and not entries[i].endswith("\""):
cleaned_entry = ""
while not entries[i].endswith("\""):
cleaned_entry += entries[i] + "[Komma]"
i += 1
cleaned_entry += entries[i]
entries_cleaned.append(cleaned_entry)
else:
entries_cleaned.append(entries[i])
i += 1
return entries_cleaned
def RecoverDataTable(phase_filename, multi_trial):
phase_name = phase_filename.replace("Phase_","").replace(".csv","")
multi_trial_var_name = ""
if multi_trial:
phase_name = phase_filename.replace(".csv","").split("_")[1]
multi_trial_var_name = phase_filename.replace(".csv","").split("_")[2]
header_entries = []
out_lines = []
#read in header of this table
with open(os.path.join(FolderToPhaseTables, phase_filename), 'r') as f:
header_line = f.readline()
header_entries = header_line.strip().split(",")
out_lines.append(header_line)
#now read through all participant logs and gather relevant information
with chdir(FolderToLogs):
for filename in sorted(filter(os.path.isfile, os.listdir(".")), key=os.path.getmtime):
with open(filename, 'r') as file:
ParticipantID = filename.split("-")[1].split("_")[0]
reading_relevant_condition = False
trial_nr = 0
start_time = 0.0
data = {}
for line in file:
if "Start Condition:" in line and "Phase: "+phase_name+";" in line:
reading_relevant_condition = True
trial_nr = 0 #simply not used if not multi-trial var
start_time = float(line.strip().replace("#","").split(":")[0])
data = {"Phase":phase_name}
data.update(IndependentVars[ParticipantID]) # adds ParticipantId and IVs to dict
for factor_levels in line.split("(")[1].split(")")[0].split(";"):
if "Phase:" in factor_levels:
continue
factor, level = factor_levels[1:].split(": ")
data[factor] = level
if reading_relevant_condition and "EndCondition" in line:
reading_relevant_condition = False
if not multi_trial:
#in multi_trial case we store data not at the end but when it is recorded
data["Time"] = "{:.2f}".format(float(line.strip().replace("#","").split(":")[0]) - start_time)
out_lines.append(CreateEntryLine(header_entries, data))
#print(line)
if "Recorded" in line:
var_name = line.split(" ")[2][:-1] #last part removes ":" from the end
var_value = line.strip().split(" ",3)[3]
if multi_trial and var_name == multi_trial_var_name and reading_relevant_condition:
#this is a multi trial var we are looking for
var_entries = var_value.replace("{","").replace("}","").split(",")
#maybe we split at , in phrases which are escaped in ", so check that!
var_entries = CheckForSplitCommaInSentence(var_entries)
for i in range(0,len(var_entries)):
header_index = len(header_entries)-len(var_entries)+i
data[header_entries[header_index]] = var_entries[i]
data["Trial"] = str(trial_nr)
trial_nr += 1
out_lines.append(CreateEntryLine(header_entries, data))
if (not multi_trial) and var_name in header_entries:
data[var_name] = var_value
#now write this
if not os.path.exists(os.path.join(FolderToPhaseTables,"Recovered")):
os.mkdir(os.path.join(FolderToPhaseTables,"Recovered"))
with open(os.path.join(FolderToPhaseTables,"Recovered",phase_filename), 'w') as f:
f.writelines(out_lines)
def ConvertCoding(full_filename):
#remove all the byte order marks that Unreal puts in there
with open(full_filename, mode='r', encoding='utf-8-sig') as file:
lines = file.readlines()
modified_lines = [line.lstrip('\ufeff') for line in lines]
with open(full_filename, mode='w', encoding='utf-8') as file:
file.writelines(modified_lines)
def Main():
LoadIndependentVars()
#go through all files in the phasetable folder
for filename in os.listdir(FolderToPhaseTables):
if filename.startswith("Phase") and filename.endswith(".csv"):
#if not filename == "Phase_Decision_singlePlayDurationLeft.csv":
# continue
full_name = os.path.join(FolderToPhaseTables, filename)
ConvertCoding(full_name)
print(full_name)
#check whether this is a multiple trial data table or a normal phase
with open(full_name, 'r') as f:
header = f.readline()
if ",Trial," in header:
#we expect this to be a multiple trial data table
RecoverDataTable(filename, True)
else:
RecoverDataTable(filename, False)
Main()