Skip to content

Commit 6b1f0f2

Browse files
committed
Still haven't gotten anywhere but we're here right now
1 parent a18f996 commit 6b1f0f2

File tree

2 files changed

+73
-25
lines changed

2 files changed

+73
-25
lines changed

transcribers/google/src/main/kotlin/xyz/bluspring/unitytranslate/transcriber/google/Main.kt

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ package xyz.bluspring.unitytranslate.transcriber.google
33
import kotlinx.coroutines.CoroutineStart
44
import kotlinx.coroutines.Dispatchers
55
import kotlinx.coroutines.async
6+
import kotlinx.coroutines.launch
67
import kotlinx.coroutines.runBlocking
7-
import net.sourceforge.javaflacencoder.AudioStreamEncoder
88
import net.sourceforge.javaflacencoder.FLACEncoder
99
import net.sourceforge.javaflacencoder.FLACStreamOutputStream
1010
import net.sourceforge.javaflacencoder.StreamConfiguration
@@ -17,6 +17,7 @@ import java.nio.ByteOrder
1717
import javax.sound.sampled.AudioFormat
1818
import javax.sound.sampled.AudioInputStream
1919
import javax.sound.sampled.AudioSystem
20+
import kotlin.concurrent.thread
2021
import kotlin.random.Random
2122
import kotlin.random.nextULong
2223

@@ -41,6 +42,7 @@ object Main {
4142

4243
// https://giulianopz.github.io/full-duplex-http-streaming-in-go
4344
// https://gist.github.com/offlinehacker/5780124
45+
// https://blog.travispayton.com/wp-content/uploads/2014/03/Google-Speech-API.pdf
4446

4547
// https://github.com/StainlessStlRat/FullDuplexNettyExample
4648

@@ -54,22 +56,27 @@ object Main {
5456
// network_speech_recognition_engine_impl.cc
5557

5658
runBlocking {
57-
var outputStream: OutputStream? = null
59+
val byteStream = QueuedByteArrayOutputStream(FRAME_SIZE)
60+
val outputStream = BufferedOutputStream(byteStream)
5861
val encoder = FLACEncoder()
62+
5963
encoder.threadCount = 1
6064
encoder.setStreamConfiguration(StreamConfiguration(1, 16, FRAME_SIZE, SAMPLE_RATE, 16))
6165

66+
val audioFormat = AudioFormat(SAMPLE_RATE.toFloat(), 16, 1, true, false)
67+
val mic = AudioSystem.getTargetDataLine(audioFormat)
68+
mic.open(audioFormat)
69+
val audioStream = AudioInputStream(mic)
70+
mic.start()
71+
72+
println("Loaded mic")
73+
6274
// Mic thread
63-
async(Dispatchers.Main) {
64-
val audioFormat = AudioFormat(AudioFormat.Encoding.PCM_SIGNED, SAMPLE_RATE.toFloat(), 16, 1, 2, SAMPLE_RATE.toFloat(), false)
65-
val mic = AudioSystem.getTargetDataLine(audioFormat)
66-
mic.open(audioFormat)
67-
val audioStream = AudioInputStream(mic)
68-
mic.start()
69-
70-
println("Loaded mic")
71-
while (true) {
72-
AudioStreamEncoder.encodeAudioInputStream(audioStream, FRAME_SIZE, encoder, false)
75+
launch {
76+
while (mic.isOpen) {
77+
val bytesRead = ByteArray(FRAME_SIZE)
78+
audioStream.read(bytesRead)
79+
outputStream.write(bytesRead)
7380
}
7481

7582
/*while (mic.isOpen) {
@@ -88,51 +95,64 @@ object Main {
8895
encoder.addSamples(intArray, 1)
8996
}*/
9097
}
98+
.start()
9199

92100
// Upstream thread - sends the data directly to the API.
93-
async(Dispatchers.Main, start = CoroutineStart.UNDISPATCHED) {
101+
thread {
94102
println("Started upstream")
95-
val url = URI.create("https://www.google.com/speech-api/full-duplex/v1/up?key=${GoogleApiKeys.GOOGLE_API_KEY}&pair=${requestKey}&output=pb&lang=en-US&pFilter=0&app=chromium&continuous").toURL()
103+
val url = URI.create("https://www.google.com/speech-api/full-duplex/v1/up?key=${GoogleApiKeys.GOOGLE_API_KEY}&pair=${requestKey}&output=json&lang=en-US&pFilter=0&app=chromium&continuous&interim").toURL()
96104
val connection = url.openConnection() as HttpURLConnection
105+
connection.connectTimeout = 30_000
97106
connection.requestMethod = "POST"
98107
connection.doOutput = true
99108
connection.setRequestProperty("Content-Type", "audio/x-flac; rate=16000")
100109
connection.setRequestProperty("User-Agent", USER_AGENT)
101110
connection.setChunkedStreamingMode(FRAME_SIZE * 2)
102111
connection.connect()
103112

104-
outputStream = BufferedOutputStream(connection.getOutputStream(), FRAME_SIZE * 2)
105-
encoder.setOutputStream(FLACStreamOutputStream(outputStream))
113+
val netStream = connection.getOutputStream()
114+
//outputStream = connection.getOutputStream()//BufferedOutputStream(, FRAME_SIZE * 2)
106115
encoder.clear()
116+
encoder.setOutputStream(FLACStreamOutputStream(netStream))
107117
encoder.openFLACStream()
108118
println("Loaded upstream")
119+
120+
byteStream.whatToWrite = { array ->
121+
val stream = netStream
122+
123+
encoder.addSamples(array, array.size / 2)
124+
encoder.encodeSamples(array.size / 2, false)
125+
}
109126
}
110127

111128
// Downstream thread - receives the data from the API.
112-
async(Dispatchers.Main, start = CoroutineStart.UNDISPATCHED) {
129+
thread {
113130
println("Started downstream")
114-
val url = URI.create("https://www.google.com/speech-api/full-duplex/v1/down?key=${GoogleApiKeys.GOOGLE_API_KEY}&pair=${requestKey}&output=pb").toURL()
131+
val url = URI.create("https://www.google.com/speech-api/full-duplex/v1/down?key=${GoogleApiKeys.GOOGLE_API_KEY}&pair=${requestKey}&output=json").toURL()
115132
val connection = url.openConnection() as HttpURLConnection
133+
connection.connectTimeout = 30_000
116134
connection.requestMethod = "GET"
117135
connection.setRequestProperty("User-Agent", USER_AGENT)
118136
connection.doInput = true
119137
connection.connect()
120138

121139
try {
122-
val reader = connection.getInputStream().bufferedReader()
140+
val reader = connection.getInputStream().reader()
123141
println("Loaded downstream")
124-
while (true) {
142+
while (connection.responseCode == 200) {
143+
// println(reader.read())
125144
if (reader.ready()) {
126-
val line = reader.readLine()
145+
val line = reader.readText()
127146
println(line)
128147
}
129148
}
130149
} catch (e: Throwable) {
131150
e.printStackTrace()
132-
val stream = connection.errorStream
133-
for (line in stream.reader().readLines()) {
134-
println("Error downstream: $line")
135-
}
151+
}
152+
153+
val stream = connection.errorStream ?: return@thread
154+
for (line in stream.reader().readLines()) {
155+
println("Error downstream: $line")
136156
}
137157
}
138158
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package xyz.bluspring.unitytranslate.transcriber.google
2+
3+
import java.io.OutputStream
4+
import java.util.LinkedList
5+
6+
class QueuedByteArrayOutputStream(val sizeToWrite: Int) : OutputStream() {
7+
private val backing = LinkedList<Int>()
8+
var whatToWrite: (IntArray) -> Unit = {}
9+
10+
override fun write(b: Int) {
11+
synchronized(backing) {
12+
backing.add(b)
13+
14+
if (backing.size >= sizeToWrite) {
15+
val byteArray = backing.toIntArray()
16+
flush()
17+
18+
whatToWrite.invoke(byteArray)
19+
}
20+
}
21+
}
22+
23+
override fun flush() {
24+
synchronized(backing) {
25+
backing.clear()
26+
}
27+
}
28+
}

0 commit comments

Comments
 (0)