Skip to content

Commit a18f996

Browse files
committed
Initial work towards getting rid of browser transcription
1 parent aa62fbc commit a18f996

File tree

4 files changed

+167
-0
lines changed

4 files changed

+167
-0
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
repositories {
2+
maven("https://maven.maxhenkel.de/repository/public")
3+
}
4+
5+
dependencies {
6+
shade(api("com.google.code.gson:gson:${mod.dep("gson")}")!!)
7+
shade(api("net.sourceforge.javaflacencoder:java-flac-encoder:${mod.dep("java_flac_encoder")}")!!)
8+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package xyz.bluspring.unitytranslate.transcriber.google
2+
3+
object GoogleApiKeys {
4+
// Seems to be generic keys, we're referencing Thorium's but they seem to be utilized all over the internet.
5+
// https://github.com/Alex313031/thorium/blob/main/src/google_apis/google_api_keys-inc.cc
6+
7+
const val GOOGLE_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
8+
const val GOOGLE_CLIENT_ID = "77185425430.apps.googleusercontent.com"
9+
const val GOOGLE_CLIENT_SECRET = "OTJgUOQcT7lO7GsGZq2G4IlT"
10+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
package xyz.bluspring.unitytranslate.transcriber.google
2+
3+
class GoogleSpeechTranscriber {
4+
}
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
package xyz.bluspring.unitytranslate.transcriber.google
2+
3+
import kotlinx.coroutines.CoroutineStart
4+
import kotlinx.coroutines.Dispatchers
5+
import kotlinx.coroutines.async
6+
import kotlinx.coroutines.runBlocking
7+
import net.sourceforge.javaflacencoder.AudioStreamEncoder
8+
import net.sourceforge.javaflacencoder.FLACEncoder
9+
import net.sourceforge.javaflacencoder.FLACStreamOutputStream
10+
import net.sourceforge.javaflacencoder.StreamConfiguration
11+
import java.io.BufferedOutputStream
12+
import java.io.OutputStream
13+
import java.net.HttpURLConnection
14+
import java.net.URI
15+
import java.nio.ByteBuffer
16+
import java.nio.ByteOrder
17+
import javax.sound.sampled.AudioFormat
18+
import javax.sound.sampled.AudioInputStream
19+
import javax.sound.sampled.AudioSystem
20+
import kotlin.random.Random
21+
import kotlin.random.nextULong
22+
23+
object Main {
24+
private const val LOW_BITS = 0x00000000_FFFFFFFFuL
25+
private const val HIGH_BITS = 0xFFFFFFFF_00000000uL
26+
27+
const val USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
28+
29+
const val SAMPLE_RATE = 16_000
30+
const val FRAME_SIZE = (SAMPLE_RATE / 1000) * 20
31+
32+
private fun generateRequestKey(): String {
33+
val time = System.currentTimeMillis().toULong()
34+
val timeLow = time and LOW_BITS
35+
36+
val random = Random.nextULong()
37+
val randomHigh = random and HIGH_BITS
38+
39+
return (timeLow or randomHigh).toHexString()
40+
}
41+
42+
// https://giulianopz.github.io/full-duplex-http-streaming-in-go
43+
// https://gist.github.com/offlinehacker/5780124
44+
45+
// https://github.com/StainlessStlRat/FullDuplexNettyExample
46+
47+
/*
48+
49+
*/
50+
51+
@JvmStatic
52+
fun main(args: Array<out String>) {
53+
val requestKey = generateRequestKey()
54+
// network_speech_recognition_engine_impl.cc
55+
56+
runBlocking {
57+
var outputStream: OutputStream? = null
58+
val encoder = FLACEncoder()
59+
encoder.threadCount = 1
60+
encoder.setStreamConfiguration(StreamConfiguration(1, 16, FRAME_SIZE, SAMPLE_RATE, 16))
61+
62+
// Mic thread
63+
async(Dispatchers.Main) {
64+
val audioFormat = AudioFormat(AudioFormat.Encoding.PCM_SIGNED, SAMPLE_RATE.toFloat(), 16, 1, 2, SAMPLE_RATE.toFloat(), false)
65+
val mic = AudioSystem.getTargetDataLine(audioFormat)
66+
mic.open(audioFormat)
67+
val audioStream = AudioInputStream(mic)
68+
mic.start()
69+
70+
println("Loaded mic")
71+
while (true) {
72+
AudioStreamEncoder.encodeAudioInputStream(audioStream, FRAME_SIZE, encoder, false)
73+
}
74+
75+
/*while (mic.isOpen) {
76+
if (mic.available() < FRAME_SIZE) {
77+
Thread.sleep(5)
78+
continue
79+
}
80+
81+
val byteArray = ByteArray(FRAME_SIZE * 2)
82+
mic.read(byteArray, 0, byteArray.size)
83+
84+
val intBuffer = ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer()
85+
val intArray = IntArray(intBuffer.remaining())
86+
intBuffer.get(intArray)
87+
88+
encoder.addSamples(intArray, 1)
89+
}*/
90+
}
91+
92+
// Upstream thread - sends the data directly to the API.
93+
async(Dispatchers.Main, start = CoroutineStart.UNDISPATCHED) {
94+
println("Started upstream")
95+
val url = URI.create("https://www.google.com/speech-api/full-duplex/v1/up?key=${GoogleApiKeys.GOOGLE_API_KEY}&pair=${requestKey}&output=pb&lang=en-US&pFilter=0&app=chromium&continuous").toURL()
96+
val connection = url.openConnection() as HttpURLConnection
97+
connection.requestMethod = "POST"
98+
connection.doOutput = true
99+
connection.setRequestProperty("Content-Type", "audio/x-flac; rate=16000")
100+
connection.setRequestProperty("User-Agent", USER_AGENT)
101+
connection.setChunkedStreamingMode(FRAME_SIZE * 2)
102+
connection.connect()
103+
104+
outputStream = BufferedOutputStream(connection.getOutputStream(), FRAME_SIZE * 2)
105+
encoder.setOutputStream(FLACStreamOutputStream(outputStream))
106+
encoder.clear()
107+
encoder.openFLACStream()
108+
println("Loaded upstream")
109+
}
110+
111+
// Downstream thread - receives the data from the API.
112+
async(Dispatchers.Main, start = CoroutineStart.UNDISPATCHED) {
113+
println("Started downstream")
114+
val url = URI.create("https://www.google.com/speech-api/full-duplex/v1/down?key=${GoogleApiKeys.GOOGLE_API_KEY}&pair=${requestKey}&output=pb").toURL()
115+
val connection = url.openConnection() as HttpURLConnection
116+
connection.requestMethod = "GET"
117+
connection.setRequestProperty("User-Agent", USER_AGENT)
118+
connection.doInput = true
119+
connection.connect()
120+
121+
try {
122+
val reader = connection.getInputStream().bufferedReader()
123+
println("Loaded downstream")
124+
while (true) {
125+
if (reader.ready()) {
126+
val line = reader.readLine()
127+
println(line)
128+
}
129+
}
130+
} catch (e: Throwable) {
131+
e.printStackTrace()
132+
val stream = connection.errorStream
133+
for (line in stream.reader().readLines()) {
134+
println("Error downstream: $line")
135+
}
136+
}
137+
}
138+
}
139+
// this is where the data comes from
140+
//val downUrl = HttpUtil.post("https://www.google.com/speech-api/full-duplex/v1/down?key=${GoogleApiKeys.GOOGLE_API_KEY}&pair=${requestKey}&output=pb", JsonObject())
141+
142+
// this needs an octet-stream of the wav
143+
//val upUrl = HttpUtil.post("https://www.google.com/speech-api/full-duplex/v1/up?key=${GoogleApiKeys.GOOGLE_API_KEY}&pair=${requestKey}&output=pb&lang=en-US&pFilter=0&app=chromium&continuous&audioFormat=audio/wav", JsonObject())
144+
}
145+
}

0 commit comments

Comments
 (0)