1+ package xyz.bluspring.unitytranslate.transcriber.google
2+
3+ import kotlinx.coroutines.CoroutineStart
4+ import kotlinx.coroutines.Dispatchers
5+ import kotlinx.coroutines.async
6+ import kotlinx.coroutines.runBlocking
7+ import net.sourceforge.javaflacencoder.AudioStreamEncoder
8+ import net.sourceforge.javaflacencoder.FLACEncoder
9+ import net.sourceforge.javaflacencoder.FLACStreamOutputStream
10+ import net.sourceforge.javaflacencoder.StreamConfiguration
11+ import java.io.BufferedOutputStream
12+ import java.io.OutputStream
13+ import java.net.HttpURLConnection
14+ import java.net.URI
15+ import java.nio.ByteBuffer
16+ import java.nio.ByteOrder
17+ import javax.sound.sampled.AudioFormat
18+ import javax.sound.sampled.AudioInputStream
19+ import javax.sound.sampled.AudioSystem
20+ import kotlin.random.Random
21+ import kotlin.random.nextULong
22+
23+ object Main {
24+ private const val LOW_BITS = 0x00000000_FFFFFFFFuL
25+ private const val HIGH_BITS = 0xFFFFFFFF_00000000uL
26+
27+ const val USER_AGENT = " Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
28+
29+ const val SAMPLE_RATE = 16_000
30+ const val FRAME_SIZE = (SAMPLE_RATE / 1000 ) * 20
31+
32+ private fun generateRequestKey (): String {
33+ val time = System .currentTimeMillis().toULong()
34+ val timeLow = time and LOW_BITS
35+
36+ val random = Random .nextULong()
37+ val randomHigh = random and HIGH_BITS
38+
39+ return (timeLow or randomHigh).toHexString()
40+ }
41+
42+ // https://giulianopz.github.io/full-duplex-http-streaming-in-go
43+ // https://gist.github.com/offlinehacker/5780124
44+
45+ // https://github.com/StainlessStlRat/FullDuplexNettyExample
46+
47+ /*
48+
49+ */
50+
51+ @JvmStatic
52+ fun main (args : Array <out String >) {
53+ val requestKey = generateRequestKey()
54+ // network_speech_recognition_engine_impl.cc
55+
56+ runBlocking {
57+ var outputStream: OutputStream ? = null
58+ val encoder = FLACEncoder ()
59+ encoder.threadCount = 1
60+ encoder.setStreamConfiguration(StreamConfiguration (1 , 16 , FRAME_SIZE , SAMPLE_RATE , 16 ))
61+
62+ // Mic thread
63+ async(Dispatchers .Main ) {
64+ val audioFormat = AudioFormat (AudioFormat .Encoding .PCM_SIGNED , SAMPLE_RATE .toFloat(), 16 , 1 , 2 , SAMPLE_RATE .toFloat(), false )
65+ val mic = AudioSystem .getTargetDataLine(audioFormat)
66+ mic.open(audioFormat)
67+ val audioStream = AudioInputStream (mic)
68+ mic.start()
69+
70+ println (" Loaded mic" )
71+ while (true ) {
72+ AudioStreamEncoder .encodeAudioInputStream(audioStream, FRAME_SIZE , encoder, false )
73+ }
74+
75+ /* while (mic.isOpen) {
76+ if (mic.available() < FRAME_SIZE) {
77+ Thread.sleep(5)
78+ continue
79+ }
80+
81+ val byteArray = ByteArray(FRAME_SIZE * 2)
82+ mic.read(byteArray, 0, byteArray.size)
83+
84+ val intBuffer = ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer()
85+ val intArray = IntArray(intBuffer.remaining())
86+ intBuffer.get(intArray)
87+
88+ encoder.addSamples(intArray, 1)
89+ }*/
90+ }
91+
92+ // Upstream thread - sends the data directly to the API.
93+ async(Dispatchers .Main , start = CoroutineStart .UNDISPATCHED ) {
94+ println (" Started upstream" )
95+ val url = URI .create(" https://www.google.com/speech-api/full-duplex/v1/up?key=${GoogleApiKeys .GOOGLE_API_KEY } &pair=${requestKey} &output=pb&lang=en-US&pFilter=0&app=chromium&continuous" ).toURL()
96+ val connection = url.openConnection() as HttpURLConnection
97+ connection.requestMethod = " POST"
98+ connection.doOutput = true
99+ connection.setRequestProperty(" Content-Type" , " audio/x-flac; rate=16000" )
100+ connection.setRequestProperty(" User-Agent" , USER_AGENT )
101+ connection.setChunkedStreamingMode(FRAME_SIZE * 2 )
102+ connection.connect()
103+
104+ outputStream = BufferedOutputStream (connection.getOutputStream(), FRAME_SIZE * 2 )
105+ encoder.setOutputStream(FLACStreamOutputStream (outputStream))
106+ encoder.clear()
107+ encoder.openFLACStream()
108+ println (" Loaded upstream" )
109+ }
110+
111+ // Downstream thread - receives the data from the API.
112+ async(Dispatchers .Main , start = CoroutineStart .UNDISPATCHED ) {
113+ println (" Started downstream" )
114+ val url = URI .create(" https://www.google.com/speech-api/full-duplex/v1/down?key=${GoogleApiKeys .GOOGLE_API_KEY } &pair=${requestKey} &output=pb" ).toURL()
115+ val connection = url.openConnection() as HttpURLConnection
116+ connection.requestMethod = " GET"
117+ connection.setRequestProperty(" User-Agent" , USER_AGENT )
118+ connection.doInput = true
119+ connection.connect()
120+
121+ try {
122+ val reader = connection.getInputStream().bufferedReader()
123+ println (" Loaded downstream" )
124+ while (true ) {
125+ if (reader.ready()) {
126+ val line = reader.readLine()
127+ println (line)
128+ }
129+ }
130+ } catch (e: Throwable ) {
131+ e.printStackTrace()
132+ val stream = connection.errorStream
133+ for (line in stream.reader().readLines()) {
134+ println (" Error downstream: $line " )
135+ }
136+ }
137+ }
138+ }
139+ // this is where the data comes from
140+ // val downUrl = HttpUtil.post("https://www.google.com/speech-api/full-duplex/v1/down?key=${GoogleApiKeys.GOOGLE_API_KEY}&pair=${requestKey}&output=pb", JsonObject())
141+
142+ // this needs an octet-stream of the wav
143+ // val upUrl = HttpUtil.post("https://www.google.com/speech-api/full-duplex/v1/up?key=${GoogleApiKeys.GOOGLE_API_KEY}&pair=${requestKey}&output=pb&lang=en-US&pFilter=0&app=chromium&continuous&audioFormat=audio/wav", JsonObject())
144+ }
145+ }
0 commit comments