2323from gentle .ffmpeg import to_wav
2424from gentle import diff_align
2525from gentle import language_model
26- from gentle import language_model_transcribe
2726from gentle import metasentence
2827from gentle import standard_kaldi
2928import gentle
@@ -83,8 +82,6 @@ def transcribe(self, uid, transcript, audio, async):
8382 wavfile .write (audio )
8483
8584 status ['status' ] = 'ENCODING'
86- # with open(os.path.join(outdir, 'align.json'), 'w') as alignfile:
87- # json.dump(output, alignfile, indent=2)
8885
8986 wavfile = os .path .join (outdir , 'a.wav' )
9087 if to_wav (os .path .join (outdir , 'upload' ), wavfile ) != 0 :
@@ -224,8 +221,8 @@ def realign(chunk):
224221 for ret in realignments :
225222 st_idx = o_words .index (ret ["chunk" ]["words" ][0 ])
226223 end_idx = o_words .index (ret ["chunk" ]["words" ][- 1 ])+ 1
227- logging .info ('splice in: "%s' % (str (ret ["words" ])))
228- logging .info ('splice out: "%s' % (str (o_words [st_idx :end_idx ])))
224+ logging .debug ('splice in: "%s' % (str (ret ["words" ])))
225+ logging .debug ('splice out: "%s' % (str (o_words [st_idx :end_idx ])))
229226 o_words = o_words [:st_idx ] + ret ["words" ] + o_words [end_idx :]
230227
231228 output ['words' ] = o_words
@@ -234,7 +231,7 @@ def realign(chunk):
234231
235232 else :
236233 # Match format
237- output = language_model_transcribe . make_transcription_alignment ({"words" : words })
234+ output = make_transcription_alignment ({"words" : words })
238235
239236 # ...remove the original upload
240237 os .unlink (os .path .join (outdir , 'upload' ))
@@ -342,6 +339,28 @@ def getChild(self, path, req):
342339 else :
343340 return Resource .getChild (self , path , req )
344341
342+ def make_transcription_alignment (trans ):
343+ # Spoof the `diff_align` output format
344+ transcript = ""
345+ words = []
346+ for t_wd in trans ["words" ]:
347+ word = {
348+ "case" : "success" ,
349+ "startOffset" : len (transcript ),
350+ "endOffset" : len (transcript ) + len (t_wd ["word" ]),
351+ "word" : t_wd ["word" ],
352+ "alignedWord" : t_wd ["word" ],
353+ "phones" : t_wd ["phones" ],
354+ "start" : t_wd ["start" ],
355+ "end" : t_wd ["start" ] + t_wd ["duration" ]}
356+ words .append (word )
357+
358+ transcript += word ["word" ] + " "
359+
360+ trans ["transcript" ] = transcript
361+ trans ["words" ] = words
362+ return trans
363+
345364def serve (port = 8765 , interface = '0.0.0.0' , installSignalHandlers = 0 , nthreads = 4 , data_dir = get_datadir ('webdata' )):
346365 logging .info ("SERVE %d, %s, %d" , port , interface , installSignalHandlers )
347366
0 commit comments