Skip to content

Commit 800bd2f

Browse files
authored
Merge branch 'remsky:master' into master
2 parents c5086c7 + 6f1450c commit 800bd2f

File tree

5 files changed

+311
-53
lines changed

5 files changed

+311
-53
lines changed

api/src/routers/development.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ async def generate_chunks():
104104

105105
if chunk_audio is not None:
106106
# Normalize audio before writing
107-
normalized_audio = await normalizer.normalize(chunk_audio)
107+
normalized_audio = normalizer.normalize(chunk_audio)
108108
# Write chunk and yield bytes
109109
chunk_bytes = writer.write_chunk(normalized_audio)
110110
if chunk_bytes:
@@ -114,6 +114,7 @@ async def generate_chunks():
114114
final_bytes = writer.write_chunk(finalize=True)
115115
if final_bytes:
116116
yield final_bytes
117+
writer.close()
117118
else:
118119
raise ValueError("Failed to generate audio data")
119120

@@ -223,10 +224,13 @@ async def dual_output():
223224
).decode("utf-8")
224225

225226
# Add any chunks that may be in the acumulator into the return word_timestamps
226-
chunk_data.word_timestamps = (
227-
timestamp_acumulator + chunk_data.word_timestamps
228-
)
229-
timestamp_acumulator = []
227+
if chunk_data.word_timestamps is not None:
228+
chunk_data.word_timestamps = (
229+
timestamp_acumulator + chunk_data.word_timestamps
230+
)
231+
timestamp_acumulator = []
232+
else:
233+
chunk_data.word_timestamps = []
230234

231235
yield CaptionedSpeechResponse(
232236
audio=base64_chunk,
@@ -271,7 +275,7 @@ async def single_output():
271275
)
272276

273277
# Add any chunks that may be in the acumulator into the return word_timestamps
274-
if chunk_data.word_timestamps != None:
278+
if chunk_data.word_timestamps is not None:
275279
chunk_data.word_timestamps = (
276280
timestamp_acumulator + chunk_data.word_timestamps
277281
)

api/src/services/text_processing/normalizer.py

Lines changed: 96 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
Converts them into a format suitable for text-to-speech processing.
55
"""
66

7+
import math
78
import re
89
from functools import lru_cache
10+
from typing import List, Optional, Union
911

1012
import inflect
1113
from numpy import number
@@ -132,6 +134,7 @@
132134
"px": "pixel", # CSS units
133135
}
134136

137+
MONEY_UNITS = {"$": ("dollar", "cent"), "£": ("pound", "pence"), "€": ("euro", "cent")}
135138

136139
# Pre-compiled regex patterns for performance
137140
EMAIL_PATTERN = re.compile(
@@ -152,35 +155,22 @@
152155
)
153156

154157
TIME_PATTERN = re.compile(
155-
r"([0-9]{2} ?: ?[0-9]{2}( ?: ?[0-9]{2})?)( ?(pm|am)\b)?", re.IGNORECASE
158+
r"([0-9]{1,2} ?: ?[0-9]{2}( ?: ?[0-9]{2})?)( ?(pm|am)\b)?", re.IGNORECASE
156159
)
157160

158-
INFLECT_ENGINE = inflect.engine()
161+
MONEY_PATTERN = re.compile(
162+
r"(-?)(["
163+
+ "".join(MONEY_UNITS.keys())
164+
+ r"])(\d+(?:\.\d+)?)((?: hundred| thousand| (?:[bm]|tr|quadr)illion|k|m|b|t)*)\b",
165+
re.IGNORECASE,
166+
)
159167

168+
NUMBER_PATTERN = re.compile(
169+
r"(-?)(\d+(?:\.\d+)?)((?: hundred| thousand| (?:[bm]|tr|quadr)illion|k|m|b)*)\b",
170+
re.IGNORECASE,
171+
)
160172

161-
def split_num(num: re.Match[str]) -> str:
162-
"""Handle number splitting for various formats"""
163-
num = num.group()
164-
if "." in num:
165-
return num
166-
elif ":" in num:
167-
h, m = [int(n) for n in num.split(":")]
168-
if m == 0:
169-
return f"{h} o'clock"
170-
elif m < 10:
171-
return f"{h} oh {m}"
172-
return f"{h} {m}"
173-
year = int(num[:4])
174-
if year < 1100 or year % 1000 < 10:
175-
return num
176-
left, right = num[:2], int(num[2:4])
177-
s = "s" if num.endswith("s") else ""
178-
if 100 <= year % 1000 <= 999:
179-
if right == 0:
180-
return f"{left} hundred{s}"
181-
elif right < 10:
182-
return f"{left} oh {right}{s}"
183-
return f"{left} {right}{s}"
173+
INFLECT_ENGINE = inflect.engine()
184174

185175

186176
def handle_units(u: re.Match[str]) -> str:
@@ -208,14 +198,61 @@ def conditional_int(number: float, threshold: float = 0.00001):
208198
return number
209199

210200

201+
def translate_multiplier(multiplier: str) -> str:
202+
"""Translate multiplier abrevations to words"""
203+
204+
multiplier_translation = {
205+
"k": "thousand",
206+
"m": "million",
207+
"b": "billion",
208+
"t": "trillion",
209+
}
210+
if multiplier.lower() in multiplier_translation:
211+
return multiplier_translation[multiplier.lower()]
212+
return multiplier.strip()
213+
214+
215+
def split_four_digit(number: float):
216+
part1 = str(conditional_int(number))[:2]
217+
part2 = str(conditional_int(number))[2:]
218+
return f"{INFLECT_ENGINE.number_to_words(part1)} {INFLECT_ENGINE.number_to_words(part2)}"
219+
220+
221+
def handle_numbers(n: re.Match[str]) -> str:
222+
number = n.group(2)
223+
224+
try:
225+
number = float(number)
226+
except:
227+
return n.group()
228+
229+
if n.group(1) == "-":
230+
number *= -1
231+
232+
multiplier = translate_multiplier(n.group(3))
233+
234+
number = conditional_int(number)
235+
if multiplier != "":
236+
multiplier = f" {multiplier}"
237+
else:
238+
if (
239+
number % 1 == 0
240+
and len(str(number)) == 4
241+
and number > 1500
242+
and number % 1000 > 9
243+
):
244+
return split_four_digit(number)
245+
246+
return f"{INFLECT_ENGINE.number_to_words(number)}{multiplier}"
247+
248+
211249
def handle_money(m: re.Match[str]) -> str:
212250
"""Convert money expressions to spoken form"""
213251

214-
bill = "dollar" if m.group(2) == "$" else "pound"
215-
coin = "cent" if m.group(2) == "$" else "pence"
252+
bill, coin = MONEY_UNITS[m.group(2)]
253+
216254
number = m.group(3)
217255

218-
multiplier = m.group(4)
219256
try:
220257
number = float(number)
221258
except:
@@ -224,12 +261,17 @@ def handle_money(m: re.Match[str]) -> str:
224261
if m.group(1) == "-":
225262
number *= -1
226263

264+
multiplier = translate_multiplier(m.group(4))
265+
266+
if multiplier != "":
267+
multiplier = f" {multiplier}"
268+
227269
if number % 1 == 0 or multiplier != "":
228270
text_number = f"{INFLECT_ENGINE.number_to_words(conditional_int(number))}{multiplier} {INFLECT_ENGINE.plural(bill, count=number)}"
229271
else:
230272
sub_number = int(str(number).split(".")[-1].ljust(2, "0"))
231273

232-
text_number = f"{INFLECT_ENGINE.number_to_words(int(round(number)))} {INFLECT_ENGINE.plural(bill, count=number)} and {INFLECT_ENGINE.number_to_words(sub_number)} {INFLECT_ENGINE.plural(coin, count=sub_number)}"
274+
text_number = f"{INFLECT_ENGINE.number_to_words(int(math.floor(number)))} {INFLECT_ENGINE.plural(bill, count=number)} and {INFLECT_ENGINE.number_to_words(sub_number)} {INFLECT_ENGINE.plural(coin, count=sub_number)}"
233275

234276
return text_number
235277

@@ -320,15 +362,31 @@ def handle_phone_number(p: re.Match[str]) -> str:
320362
def handle_time(t: re.Match[str]) -> str:
321363
t = t.groups()
322364

323-
numbers = " ".join(
324-
[INFLECT_ENGINE.number_to_words(X.strip()) for X in t[0].split(":")]
325-
)
365+
time_parts = t[0].split(":")
366+
367+
numbers = []
368+
numbers.append(INFLECT_ENGINE.number_to_words(time_parts[0].strip()))
369+
370+
minute_number = INFLECT_ENGINE.number_to_words(time_parts[1].strip())
371+
if int(time_parts[1]) < 10:
372+
if int(time_parts[1]) != 0:
373+
numbers.append(f"oh {minute_number}")
374+
else:
375+
numbers.append(minute_number)
326376

327377
half = ""
328-
if t[2] is not None:
329-
half = t[2].strip()
378+
if len(time_parts) > 2:
379+
seconds_number = INFLECT_ENGINE.number_to_words(time_parts[2].strip())
380+
second_word = INFLECT_ENGINE.plural("second", int(time_parts[2].strip()))
381+
numbers.append(f"and {seconds_number} {second_word}")
382+
else:
383+
if t[2] is not None:
384+
half = " " + t[2].strip()
385+
else:
386+
if int(time_parts[1]) == 0:
387+
numbers.append("o'clock")
330388

331-
return numbers + half
389+
return " ".join(numbers) + half
332390

333391

334392
def normalize_text(text: str, normalization_options: NormalizationOptions) -> str:
@@ -366,7 +424,7 @@ def normalize_text(text: str, normalization_options: NormalizationOptions) -> st
366424
for a, b in zip("、。!,:;?–", ",.!,:;?-"):
367425
text = text.replace(a, b + " ")
368426

369-
# Handle simple time in the format of HH:MM:SS
427+
# Handle simple time in the format of HH:MM:SS (am/pm)
370428
text = TIME_PATTERN.sub(
371429
handle_time,
372430
text,
@@ -390,15 +448,12 @@ def normalize_text(text: str, normalization_options: NormalizationOptions) -> st
390448
# Handle numbers and money
391449
text = re.sub(r"(?<=\d),(?=\d)", "", text)
392450

393-
text = re.sub(
394-
r"(?i)(-?)([$£])(\d+(?:\.\d+)?)((?: hundred| thousand| (?:[bm]|tr|quadr)illion)*)\b",
451+
text = MONEY_PATTERN.sub(
395452
handle_money,
396453
text,
397454
)
398455

399-
text = re.sub(
400-
r"\d*\.\d+|\b\d{4}s?\b|(?<!:)\b(?:[1-9]|1[0-2]):[0-5]\d\b(?!:)", split_num, text
401-
)
456+
text = NUMBER_PATTERN.sub(handle_numbers, text)
402457

403458
text = re.sub(r"\d*\.\d+", handle_decimal, text)
404459

0 commit comments

Comments
 (0)