-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
53 lines (46 loc) · 1.65 KB
/
main.py
File metadata and controls
53 lines (46 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os, json
from pathlib import Path
from google.cloud import texttospeech
from epitran import Epitran
ROOT = Path("cp mkdir storage/emulated/0/Dokumente")
BLOCKS_DIR = ROOT / "blocks"
AUDIO_DIR = ROOT / "audio"
LEXICON_FILE = ROOT / "lexicon.json"
tts_client = texttospeech.TextToSpeechClient()
epi = Epitran("deu-Latn")
def ipa(word):
return epi.transliterate(word)
def synthesize(text):
input_text = texttospeech.SynthesisInput(text=text)
voice = texttospeech.VoiceSelectionParams(language_code="de-DE", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL)
audio_cfg = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.LINEAR16)
return tts_client.synthesize_speech(input_text, voice, audio_cfg).audio_content
def process_block(n, lex):
path = BLOCKS_DIR / f"block_{n:03d}.txt"
if not path.exists():
return
words = path.read_text(encoding="utf8").splitlines()
for w in words:
token = w[::-1]
if token in lex:
continue
data = {
"deutsch": w,
"ipa": ipa(w),
"bedeutung": f"Bedeutung von {w}",
"bedeutung_rev": f"{w[::-1]} als Symbol",
"pos": "Substantiv",
"audio": ""
}
wav_path = AUDIO_DIR / f"{token}.wav"
wav_path.write_bytes(synthesize(token))
data["audio"] = f"file://{wav_path}"
lex[token] = data
def run():
AUDIO_DIR.mkdir(exist_ok=True)
lex = {}
for n in range(1, 1000):
process_block(n, lex)
LEXICON_FILE.write_text(json.dumps(lex, indent=2, ensure_ascii=False), encoding="utf8")
if __name__ == "__main__":
run()