@@ -3,8 +3,8 @@ package xyz.bluspring.unitytranslate.transcriber.google
33import kotlinx.coroutines.CoroutineStart
44import kotlinx.coroutines.Dispatchers
55import kotlinx.coroutines.async
6+ import kotlinx.coroutines.launch
67import kotlinx.coroutines.runBlocking
7- import net.sourceforge.javaflacencoder.AudioStreamEncoder
88import net.sourceforge.javaflacencoder.FLACEncoder
99import net.sourceforge.javaflacencoder.FLACStreamOutputStream
1010import net.sourceforge.javaflacencoder.StreamConfiguration
@@ -17,6 +17,7 @@ import java.nio.ByteOrder
1717import javax.sound.sampled.AudioFormat
1818import javax.sound.sampled.AudioInputStream
1919import javax.sound.sampled.AudioSystem
20+ import kotlin.concurrent.thread
2021import kotlin.random.Random
2122import kotlin.random.nextULong
2223
@@ -41,6 +42,7 @@ object Main {
4142
4243 // https://giulianopz.github.io/full-duplex-http-streaming-in-go
4344 // https://gist.github.com/offlinehacker/5780124
45+ // https://blog.travispayton.com/wp-content/uploads/2014/03/Google-Speech-API.pdf
4446
4547 // https://github.com/StainlessStlRat/FullDuplexNettyExample
4648
@@ -54,22 +56,27 @@ object Main {
5456 // network_speech_recognition_engine_impl.cc
5557
5658 runBlocking {
57- var outputStream: OutputStream ? = null
59+ val byteStream = QueuedByteArrayOutputStream (FRAME_SIZE )
60+ val outputStream = BufferedOutputStream (byteStream)
5861 val encoder = FLACEncoder ()
62+
5963 encoder.threadCount = 1
6064 encoder.setStreamConfiguration(StreamConfiguration (1 , 16 , FRAME_SIZE , SAMPLE_RATE , 16 ))
6165
66+ val audioFormat = AudioFormat (SAMPLE_RATE .toFloat(), 16 , 1 , true , false )
67+ val mic = AudioSystem .getTargetDataLine(audioFormat)
68+ mic.open(audioFormat)
69+ val audioStream = AudioInputStream (mic)
70+ mic.start()
71+
72+ println (" Loaded mic" )
73+
6274 // Mic thread
63- async(Dispatchers .Main ) {
64- val audioFormat = AudioFormat (AudioFormat .Encoding .PCM_SIGNED , SAMPLE_RATE .toFloat(), 16 , 1 , 2 , SAMPLE_RATE .toFloat(), false )
65- val mic = AudioSystem .getTargetDataLine(audioFormat)
66- mic.open(audioFormat)
67- val audioStream = AudioInputStream (mic)
68- mic.start()
69-
70- println (" Loaded mic" )
71- while (true ) {
72- AudioStreamEncoder .encodeAudioInputStream(audioStream, FRAME_SIZE , encoder, false )
75+ launch {
76+ while (mic.isOpen) {
77+ val bytesRead = ByteArray (FRAME_SIZE )
78+ audioStream.read(bytesRead)
79+ outputStream.write(bytesRead)
7380 }
7481
7582 /* while (mic.isOpen) {
@@ -88,51 +95,64 @@ object Main {
8895 encoder.addSamples(intArray, 1)
8996 }*/
9097 }
98+ .start()
9199
92100 // Upstream thread - sends the data directly to the API.
93- async( Dispatchers . Main , start = CoroutineStart . UNDISPATCHED ) {
101+ thread {
94102 println (" Started upstream" )
95- val url = URI .create(" https://www.google.com/speech-api/full-duplex/v1/up?key=${GoogleApiKeys .GOOGLE_API_KEY } &pair=${requestKey} &output=pb &lang=en-US&pFilter=0&app=chromium&continuous" ).toURL()
103+ val url = URI .create(" https://www.google.com/speech-api/full-duplex/v1/up?key=${GoogleApiKeys .GOOGLE_API_KEY } &pair=${requestKey} &output=json &lang=en-US&pFilter=0&app=chromium&continuous&interim " ).toURL()
96104 val connection = url.openConnection() as HttpURLConnection
105+ connection.connectTimeout = 30_000
97106 connection.requestMethod = " POST"
98107 connection.doOutput = true
99108 connection.setRequestProperty(" Content-Type" , " audio/x-flac; rate=16000" )
100109 connection.setRequestProperty(" User-Agent" , USER_AGENT )
101110 connection.setChunkedStreamingMode(FRAME_SIZE * 2 )
102111 connection.connect()
103112
104- outputStream = BufferedOutputStream ( connection.getOutputStream(), FRAME_SIZE * 2 )
105- encoder.setOutputStream( FLACStreamOutputStream (outputStream) )
113+ val netStream = connection.getOutputStream()
114+ // outputStream = connection.getOutputStream()//BufferedOutputStream(, FRAME_SIZE * 2 )
106115 encoder.clear()
116+ encoder.setOutputStream(FLACStreamOutputStream (netStream))
107117 encoder.openFLACStream()
108118 println (" Loaded upstream" )
119+
120+ byteStream.whatToWrite = { array ->
121+ val stream = netStream
122+
123+ encoder.addSamples(array, array.size / 2 )
124+ encoder.encodeSamples(array.size / 2 , false )
125+ }
109126 }
110127
111128 // Downstream thread - receives the data from the API.
112- async( Dispatchers . Main , start = CoroutineStart . UNDISPATCHED ) {
129+ thread {
113130 println (" Started downstream" )
114- val url = URI .create(" https://www.google.com/speech-api/full-duplex/v1/down?key=${GoogleApiKeys .GOOGLE_API_KEY } &pair=${requestKey} &output=pb " ).toURL()
131+ val url = URI .create(" https://www.google.com/speech-api/full-duplex/v1/down?key=${GoogleApiKeys .GOOGLE_API_KEY } &pair=${requestKey} &output=json " ).toURL()
115132 val connection = url.openConnection() as HttpURLConnection
133+ connection.connectTimeout = 30_000
116134 connection.requestMethod = " GET"
117135 connection.setRequestProperty(" User-Agent" , USER_AGENT )
118136 connection.doInput = true
119137 connection.connect()
120138
121139 try {
122- val reader = connection.getInputStream().bufferedReader ()
140+ val reader = connection.getInputStream().reader ()
123141 println (" Loaded downstream" )
124- while (true ) {
142+ while (connection.responseCode == 200 ) {
143+ // println(reader.read())
125144 if (reader.ready()) {
126- val line = reader.readLine ()
145+ val line = reader.readText ()
127146 println (line)
128147 }
129148 }
130149 } catch (e: Throwable ) {
131150 e.printStackTrace()
132- val stream = connection.errorStream
133- for (line in stream.reader().readLines()) {
134- println (" Error downstream: $line " )
135- }
151+ }
152+
153+ val stream = connection.errorStream ? : return @thread
154+ for (line in stream.reader().readLines()) {
155+ println (" Error downstream: $line " )
136156 }
137157 }
138158 }
0 commit comments