Skip to content

Commit 7384e35

Browse files
committed
Fix code to conform to numbers in sign language
1 parent ad4cb83 commit 7384e35

File tree

2 files changed

+56
-48
lines changed

2 files changed

+56
-48
lines changed

augmented_data/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def read_dataset_text():
2323
# 0099 is just completely broken.
2424
if folder_name in ("0099"):
2525
continue
26-
if folder_name in ("0101", "0102"):
26+
if folder_name in ("0090", "0101", "0102"):
2727
encoding = 'utf-8'
2828
else:
2929
encoding = 'iso-8859-1'

augmented_data/time.py

Lines changed: 55 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,27 @@
66
from collections import defaultdict
77
from .utils import has_numbers, replace_multiple
88

9+
# Exclude 0
10+
single_sign_minutes = set()
11+
for i in range(1, 20):
12+
single_sign_minutes.add(i)
13+
14+
for i in range(2, 6):
15+
single_sign_minutes.add(i * 10) # Tens
16+
single_sign_minutes.add(i * 11) # Repdigits
17+
18+
print(f'single_sign_minutes: {single_sign_minutes}')
919

1020
class TimeData:
1121
def __init__(self):
1222
self.hour_str = None
13-
self.minute_str = None
14-
self.minute_tens_str = None
23+
self.minute1_str = None
24+
self.minute2_str = None
1525
self.hour_int = None
1626
self.minute_int = None
1727
self.hour_line = None
18-
self.minute_line = None
19-
self.minute_tens_line = None
28+
self.minute1_line = None
29+
self.minute2_line = None
2030
self.hour_is_separate = None
2131

2232
def is_valid(self):
@@ -47,7 +57,7 @@ def set_hour(self, hour_str, line_num=None):
4757
self.hour_line = line_num
4858

4959
def set_minute_mms(self, minute_str, line_num):
50-
if self.minute_str is not None and self.minute_tens_str is not None:
60+
if self.minute2_str is not None:
5161
raise Exception('ERROR: minute was already set')
5262
if minute_str == '':
5363
raise Exception('ERROR: minute is empty')
@@ -58,26 +68,29 @@ def set_minute_mms(self, minute_str, line_num):
5868
if minute_int < 0:
5969
raise Exception(f'ERROR: minute is negative: {minute_int}')
6070

61-
# If self.minute_int is None then this function was called for the first time
62-
if self.minute_int is None and minute_int <= 12:
63-
self.minute_str = minute_str
64-
self.minute_line = line_num
71+
if self.minute1_str is None:
72+
if minute_int not in single_sign_minutes:
73+
print(f'WARNING: minute1 is not a valid single sign value: {minute_str}')
74+
self.minute1_str = minute_str
75+
self.minute1_line = line_num
6576
else:
66-
if minute_str not in ['10', '20', '30', '40', '50']:
67-
print(f'WARNING: minute_tens is not a valid tens value: {minute_str}')
68-
self.minute_tens_str = minute_str
69-
self.minute_tens_line = line_num
77+
if minute_str not in ['20', '30', '40', '50']:
78+
print(f'WARNING: minute2 is not a valid value: {minute_str}')
79+
if self.minute_int not in range(1, 10):
80+
print(f'WARNING: minute1 is not a valid value: {self.minute1_str}')
81+
self.minute2_str = minute_str
82+
self.minute2_line = line_num
7083

7184
if self.minute_int is None:
7285
self.minute_int = 0
7386
self.minute_int += minute_int
7487

7588
def set_minute_text(self, minute_str):
76-
if self.minute_str is not None:
89+
if self.minute1_str is not None:
7790
raise Exception('ERROR: minute was already set')
7891
if minute_str == '':
7992
raise Exception('ERROR: minute is empty')
80-
self.minute_str = minute_str
93+
self.minute1_str = minute_str
8194
try:
8295
minute_int = int(minute_str)
8396
except ValueError:
@@ -93,7 +106,7 @@ def set_hour_is_separate(self, hour_is_separate):
93106
def get_line_numbers(self):
94107
if self.hour_line is None:
95108
raise Exception('ERROR: hour has not been set yet')
96-
return (self.hour_line, self.minute_line, self.minute_tens_line)
109+
return (self.hour_line, self.minute1_line, self.minute2_line)
97110

98111
def get_time_int(self):
99112
if self.hour_int is None:
@@ -136,11 +149,11 @@ def process_time(state, time_data, time_positions):
136149
if not time_data.is_valid():
137150
return (state, time_positions)
138151
# Here we know that the time is valid
139-
(hour_line, minute_line, minute_tens_line) = time_data.get_line_numbers()
152+
(hour_line, minute1_line, minute2_line) = time_data.get_line_numbers()
140153
(hour_int, minute_int) = time_data.get_time_int()
141154
hour_is_separate = time_data.get_hour_is_separate()
142-
print(f'Found time: {hour_int}:{minute_int}, lines: {hour_line}, {minute_line}, {minute_tens_line} (file_number: {file_number})')
143-
time_position = (hour_line, minute_line, minute_tens_line, hour_int, minute_int, hour_is_separate)
155+
print(f'Found time: {hour_int}:{minute_int}, lines: {hour_line}, {minute1_line}, {minute2_line} (file_number: {file_number})')
156+
time_position = (hour_line, minute1_line, minute2_line, hour_int, minute_int, hour_is_separate)
144157
time_positions_list = time_positions.get(file_number, [])
145158
time_positions_list.append(time_position)
146159
time_positions[file_number] = time_positions_list
@@ -209,35 +222,35 @@ def process_time(state, time_data, time_positions):
209222

210223
for file_number, time_infos in time_positions.items():
211224
for time_info in time_infos:
212-
(hour_line, minute_line, minute_tens_line, old_hour, old_minute, hour_is_separate) = time_info
225+
(hour_line, minute1_line, minute2_line, old_hour, old_minute, hour_is_separate) = time_info
213226
old_time = (old_hour, old_minute)
214227
if old_time not in time_mappings[file_number]:
215228
new_hour = random.randrange(24)
216-
new_minute = None
217-
new_minute_tens = None
218-
if minute_line is not None and minute_tens_line is None:
219-
new_minute = random.randrange(13)
220-
elif minute_line is not None and minute_tens_line is not None:
221-
new_minute = random.randrange(10)
222-
if minute_tens_line is not None:
223-
new_minute_tens = (random.randrange(5)+1)*10 # 10, 20, 30, 40, 50
224-
new_time = (new_hour, new_minute, new_minute_tens)
229+
new_minute1 = None
230+
new_minute2 = None
231+
if minute1_line is not None and minute2_line is None:
232+
new_minute1 = random.choice(tuple(single_sign_minutes))
233+
elif minute1_line is not None and minute2_line is not None:
234+
new_minute1 = random.randrange(1, 10)
235+
if minute2_line is not None:
236+
new_minute2 = random.choice([20, 30, 40, 50])
237+
new_time = (new_hour, new_minute1, new_minute2)
225238
else:
226239
new_time = time_mappings[file_number][old_time]
227240

228-
(new_hour, new_minute, new_minute_tens) = new_time
241+
(new_hour, new_minute1, new_minute2) = new_time
229242
if hour_is_separate:
230243
new_hour_str = f'num:{new_hour}'
231244
else:
232245
new_hour_str = f'uhr:{new_hour}'
233246
new_mms_data[hour_line]['maingloss'] = new_hour_str
234-
if new_minute is not None:
235-
new_mms_data[minute_line]['maingloss'] = f'num:{new_minute}'
236-
if new_minute_tens is not None:
237-
if minute_tens_line < len(new_mms_data):
238-
new_mms_data[minute_tens_line]['maingloss'] = f'num:{new_minute_tens}'
247+
if new_minute1 is not None:
248+
new_mms_data[minute1_line]['maingloss'] = f'num:{new_minute1}'
249+
if new_minute2 is not None:
250+
if minute2_line < len(new_mms_data):
251+
new_mms_data[minute2_line]['maingloss'] = f'num:{new_minute2}'
239252
else:
240-
print(f'WARNING: minute_tens_line {minute_tens_line} not found in file {file_number}')
253+
print(f'WARNING: minute2_line {minute2_line} not found in file {file_number}')
241254
for line_num, row in enumerate(new_mms_data):
242255
print(f'line {line_num}: {row["maingloss"]}')
243256

@@ -287,21 +300,16 @@ def process_time(state, time_data, time_positions):
287300
if old_time not in time_mapping:
288301
print(f'WARNING: old_time {old_time} not found in time_mapping (file: {folder_name})')
289302
continue
290-
(new_hour, new_minute, new_minute_tens) = time_mapping[old_time]
291-
if new_minute is None:
292-
new_minute = 0
293-
if new_minute_tens is not None:
294-
new_minute += new_minute_tens
295-
if new_minute is None and new_minute_tens is None and time_format != TimeFormat.JUST_HOUR:
296-
print(f'WARNING: new_minute and new_minute_tens are None but time_format is {time_format} (file: {folder_name})')
303+
(new_hour, new_minute1, new_minute2) = time_mapping[old_time]
304+
if new_minute2 is not None:
305+
new_minute1 += new_minute2
306+
if new_minute1 is None and new_minute2 is None and time_format != TimeFormat.JUST_HOUR:
307+
print(f'WARNING: new_minute1 and new_minute2 are None but time_format is {time_format} (file: {folder_name})')
297308
continue
298-
new_whole_time = assemble_time(time_format, new_hour, new_minute)
309+
new_whole_time = assemble_time(time_format, new_hour, new_minute1)
299310
whole_time_mapping[old_whole_time] = new_whole_time
300311
time_mapping_str_to_tuple[old_whole_time] = old_time
301312

302-
#if folder_name == '0003':
303-
# print(f'whole_time_mapping: {whole_time_mapping}, all_distinct_times_in_file: {all_distinct_times_in_file}')
304-
305313

306314
new_text_data = []
307315
replaced_counts = defaultdict(int)

0 commit comments

Comments
 (0)