Skip to content

Commit 54e32d4

Browse files
committed
new aiholo speech impl
1 parent 534d0df commit 54e32d4

30 files changed

+6682
-23
lines changed

java-ai/auth_and_run.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
3+
# token itself expires after 1 hour, but it is automatically refreshed as long as the stored credentials remain valid.
4+
# provides long-lived authentication (~1 week) via Application Default Credentials (ADC).
5+
gcloud auth application-default login
6+
mvn clean package
7+
java -jar .\target\oracleai-0.0.1-SNAPSHOT.jar

java-ai/pom.xml

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,29 @@
1616

1717
<properties>
1818
<oci.sdk.version>3.52.1</oci.sdk.version>
19+
<oracle.jdbc.version>21.7.0.0</oracle.jdbc.version>
1920
</properties>
2021

22+
23+
<dependencyManagement>
24+
<dependencies>
25+
<dependency>
26+
<groupId>com.google.cloud</groupId>
27+
<artifactId>libraries-bom</artifactId>
28+
<version>26.32.0</version>
29+
<type>pom</type>
30+
<scope>import</scope>
31+
</dependency>
32+
</dependencies>
33+
</dependencyManagement>
34+
35+
36+
37+
38+
39+
40+
41+
2142
<dependencies>
2243
<dependency>
2344
<groupId>org.springframework.boot</groupId>
@@ -99,6 +120,106 @@
99120
<artifactId>service</artifactId>
100121
<version>0.12.0</version>
101122
</dependency>
123+
124+
<!-- <dependency>
125+
<groupId>com.oracle.database.spring</groupId>
126+
<artifactId>oracle-spring-boot-starter-ucp</artifactId>
127+
<version>23.4.0</version>
128+
</dependency> -->
129+
<!-- <dependency>
130+
<groupId>com.oracle.database.spring</groupId>
131+
<artifactId>oracle-spring-boot-starter-wallet</artifactId>
132+
<version>23.4.0</version>
133+
</dependency> -->
134+
135+
<dependency>
136+
<groupId>com.oracle.database.jdbc</groupId>
137+
<artifactId>ojdbc8</artifactId>
138+
<version>${oracle.jdbc.version}</version>
139+
</dependency>
140+
<dependency>
141+
<groupId>com.oracle.database.jdbc</groupId>
142+
<artifactId>ucp</artifactId>
143+
<version>${oracle.jdbc.version}</version>
144+
</dependency>
145+
<dependency>
146+
<groupId>com.oracle.database.security</groupId>
147+
<artifactId>oraclepki</artifactId>
148+
<version>${oracle.jdbc.version}</version>
149+
</dependency>
150+
<dependency>
151+
<groupId>com.oracle.database.security</groupId>
152+
<artifactId>osdt_core</artifactId>
153+
<version>${oracle.jdbc.version}</version>
154+
</dependency>
155+
<dependency>
156+
<groupId>com.oracle.database.security</groupId>
157+
<artifactId>osdt_cert</artifactId>
158+
<version>${oracle.jdbc.version}</version>
159+
</dependency>
160+
161+
<dependency>
162+
<groupId>com.google.cloud</groupId>
163+
<artifactId>google-cloud-texttospeech</artifactId>
164+
</dependency>
165+
<dependency>
166+
<groupId>com.google.cloud</groupId>
167+
<artifactId>google-cloud-speech</artifactId>
168+
</dependency>
169+
<dependency>
170+
<groupId>net.sourceforge.argparse4j</groupId>
171+
<artifactId>argparse4j</artifactId>
172+
<version>0.9.0</version>
173+
</dependency>
174+
175+
176+
<dependency>
177+
<groupId>org.springframework.boot</groupId>
178+
<artifactId>spring-boot-starter-websocket</artifactId>
179+
</dependency>
180+
181+
182+
<dependency>
183+
<groupId>commons-cli</groupId>
184+
<artifactId>commons-cli</artifactId>
185+
<version>1.6.0</version>
186+
</dependency>
187+
188+
<dependency>
189+
<groupId>com.google.auth</groupId>
190+
<artifactId>google-auth-library-oauth2-http</artifactId>
191+
<version>1.18.0</version>
192+
</dependency>
193+
194+
<dependency>
195+
<groupId>jakarta.websocket</groupId>
196+
<artifactId>jakarta.websocket-api</artifactId>
197+
<version>2.2.0</version>
198+
<!-- <version>2.1.1</version> -->
199+
</dependency>
200+
201+
<dependency>
202+
<groupId>org.apache.tomcat.embed</groupId>
203+
<artifactId>tomcat-embed-websocket</artifactId>
204+
</dependency>
205+
206+
<!-- <dependency>
207+
<groupId>org.apache.tomcat</groupId>
208+
<artifactId>tomcat-websocket</artifactId>
209+
<version>10.1.14</version>
210+
</dependency> -->
211+
<dependency>
212+
<groupId>org.glassfish.tyrus</groupId>
213+
<artifactId>tyrus-server</artifactId>
214+
<version>2.1.3</version>
215+
</dependency>
216+
217+
<dependency>
218+
<groupId>org.glassfish.tyrus</groupId>
219+
<artifactId>tyrus-container-servlet</artifactId>
220+
<version>2.1.3</version>
221+
</dependency>
222+
102223
</dependencies>
103224
<build>
104225
<plugins>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package oracleai;
2+
3+
import jakarta.websocket.server.ServerEndpointConfig;
4+
5+
public class BinaryServerConfigurator extends ServerEndpointConfig.Configurator {
6+
@Override
7+
public boolean checkOrigin(String originHeaderValue) {
8+
System.out.println("✅ WebSocket checkOrigin originHeaderValue: " + originHeaderValue);
9+
return true; // Allow all origins for WebSocket
10+
}
11+
}
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
package oracleai;
2+
3+
import com.google.api.gax.rpc.ApiStreamObserver;
4+
import com.google.api.gax.rpc.BidiStreamingCallable;
5+
import com.google.cloud.speech.v1.*;
6+
import com.google.protobuf.ByteString;
7+
import org.springframework.stereotype.Component;
8+
import org.springframework.web.socket.BinaryMessage;
9+
import org.springframework.web.socket.CloseStatus;
10+
import org.springframework.web.socket.WebSocketSession;
11+
import org.springframework.web.socket.handler.BinaryWebSocketHandler;
12+
import org.springframework.web.socket.TextMessage;
13+
14+
import java.io.FileOutputStream;
15+
import java.io.IOException;
16+
import java.nio.ByteBuffer;
17+
import java.nio.file.Files;
18+
import java.nio.file.Paths;
19+
import java.util.Arrays;
20+
import java.util.concurrent.ConcurrentHashMap;
21+
22+
@Component
23+
public class CustomWebSocketHandler extends BinaryWebSocketHandler {
24+
private static final ConcurrentHashMap<String, ApiStreamObserver<StreamingRecognizeRequest>> activeSessions = new ConcurrentHashMap<>();
25+
private static SpeechClient speechClient;
26+
private static final int MIN_AUDIO_BUFFER_SIZE = 48000; // 🔥 Buffer at least 3 seconds before sending
27+
private static final double SILENCE_THRESHOLD = 0.01; // 🔥 Adjust silence detection (RMS method)
28+
29+
static {
30+
try {
31+
speechClient = SpeechClient.create();
32+
} catch (IOException e) {
33+
throw new RuntimeException("Failed to initialize SpeechClient", e);
34+
}
35+
}
36+
37+
@Override
38+
public void afterConnectionEstablished(WebSocketSession session) {
39+
System.out.println("✅ WebSocket Connected: " + session.getId());
40+
41+
ApiStreamObserver<StreamingRecognizeResponse> responseObserver = new ApiStreamObserver<>() {
42+
@Override
43+
public void onNext(StreamingRecognizeResponse response) {
44+
for (StreamingRecognitionResult result : response.getResultsList()) {
45+
if (result.getAlternativesCount() > 0) {
46+
String transcript = result.getAlternatives(0).getTranscript().trim();
47+
if (!transcript.isEmpty()) {
48+
System.out.println("📝 Full API Response: " + response.toString());
49+
System.out.println("📝 Transcription: " + transcript);
50+
51+
try {
52+
session.sendMessage(new TextMessage(transcript));
53+
} catch (IOException e) {
54+
e.printStackTrace();
55+
}
56+
}
57+
}
58+
}
59+
}
60+
61+
@Override
62+
public void onError(Throwable t) {
63+
System.err.println("❌ Google API Error: " + t.getMessage());
64+
}
65+
66+
@Override
67+
public void onCompleted() {
68+
System.out.println("✅ Streaming completed.");
69+
}
70+
};
71+
72+
// ✅ Configure Google Speech API for better accuracy
73+
BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable =
74+
speechClient.streamingRecognizeCallable();
75+
ApiStreamObserver<StreamingRecognizeRequest> requestObserver = callable.bidiStreamingCall(responseObserver);
76+
activeSessions.put(session.getId(), requestObserver);
77+
78+
RecognitionConfig config = RecognitionConfig.newBuilder()
79+
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
80+
.setSampleRateHertz(16000)
81+
.setLanguageCode("en-US")
82+
.setEnableAutomaticPunctuation(true)
83+
.setModel("latest_long") // ✅ Best for longer speech
84+
.setAudioChannelCount(1)
85+
.setEnableWordTimeOffsets(true)
86+
.build();
87+
88+
StreamingRecognitionConfig streamingConfig = StreamingRecognitionConfig.newBuilder()
89+
.setConfig(config)
90+
.setInterimResults(true)
91+
.setSingleUtterance(false) // ✅ Allows continuous speech
92+
.build();
93+
94+
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
95+
.setStreamingConfig(streamingConfig)
96+
.build());
97+
}
98+
99+
@Override
100+
protected void handleBinaryMessage(WebSocketSession session, BinaryMessage message) {
101+
ByteBuffer payload = message.getPayload();
102+
byte[] audioBytes = new byte[payload.remaining()];
103+
payload.get(audioBytes);
104+
105+
// ✅ Verify PCM Format
106+
if (!isValidPCMFormat(audioBytes)) {
107+
System.out.println("⚠️ Invalid PCM format. Skipping...");
108+
return;
109+
}
110+
111+
// ✅ Check silence with RMS (Root Mean Square)
112+
if (isSilent(audioBytes)) {
113+
System.out.println("🔇 Skipping silent audio.");
114+
return;
115+
}
116+
117+
// ✅ Save to WAV file
118+
try {
119+
saveAudioToWAV(audioBytes, "audio_" + System.currentTimeMillis() + ".wav");
120+
} catch (IOException e) {
121+
e.printStackTrace();
122+
}
123+
124+
// ✅ Buffering audio before sending
125+
if (audioBytes.length < MIN_AUDIO_BUFFER_SIZE) {
126+
System.out.println("⏳ Accumulating audio, not sending yet...");
127+
return; // Don't send yet, wait for more audio
128+
}
129+
130+
// ✅ Send to Google API
131+
if (activeSessions.containsKey(session.getId())) {
132+
ApiStreamObserver<StreamingRecognizeRequest> requestObserver = activeSessions.get(session.getId());
133+
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
134+
.setAudioContent(ByteString.copyFrom(audioBytes))
135+
.build());
136+
}
137+
}
138+
139+
// ✅ Validate PCM Format
140+
private boolean isValidPCMFormat(byte[] audioData) {
141+
if (audioData.length < 2) return false;
142+
143+
for (int i = 0; i < audioData.length; i += 2) {
144+
short sample = (short) ((audioData[i + 1] << 8) | (audioData[i] & 0xFF));
145+
if (sample < -32768 || sample > 32767) {
146+
return false; // Not valid 16-bit PCM range
147+
}
148+
}
149+
return true;
150+
}
151+
152+
// ✅ Improved Silence Detection with RMS
153+
private boolean isSilent(byte[] audioData) {
154+
double sum = 0.0;
155+
for (int i = 0; i < audioData.length; i += 2) {
156+
short sample = (short) ((audioData[i + 1] << 8) | (audioData[i] & 0xFF));
157+
sum += sample * sample;
158+
}
159+
double rms = Math.sqrt(sum / (audioData.length / 2));
160+
161+
System.out.println("📊 RMS Value: " + rms); // Debugging
162+
163+
return rms < SILENCE_THRESHOLD;
164+
}
165+
166+
// ✅ Save Audio to WAV File
167+
private void saveAudioToWAV(byte[] audioData, String filename) throws IOException {
168+
String filePath = "C:/Users/opc/Downloads/audio_logs/" + filename;
169+
Files.createDirectories(Paths.get("C:/Users/opc/Downloads/audio_logs/"));
170+
171+
try (FileOutputStream fos = new FileOutputStream(filePath)) {
172+
fos.write(generateWAVHeader(audioData.length, 16000, 1)); // ✅ Proper WAV header
173+
fos.write(audioData);
174+
}
175+
176+
System.out.println("💾 Saved WAV: " + filePath);
177+
}
178+
179+
// ✅ Generate Correct WAV Header
180+
private byte[] generateWAVHeader(int dataSize, int sampleRate, int channels) {
181+
int totalDataLen = dataSize + 36;
182+
int byteRate = sampleRate * channels * 2;
183+
184+
return new byte[]{
185+
'R', 'I', 'F', 'F', (byte) (totalDataLen & 0xff), (byte) ((totalDataLen >> 8) & 0xff),
186+
(byte) ((totalDataLen >> 16) & 0xff), (byte) ((totalDataLen >> 24) & 0xff),
187+
'W', 'A', 'V', 'E', 'f', 'm', 't', ' ',
188+
16, 0, 0, 0, 1, 0, (byte) channels, 0,
189+
(byte) (sampleRate & 0xff), (byte) ((sampleRate >> 8) & 0xff),
190+
(byte) ((sampleRate >> 16) & 0xff), (byte) ((sampleRate >> 24) & 0xff),
191+
(byte) (byteRate & 0xff), (byte) ((byteRate >> 8) & 0xff),
192+
(byte) ((byteRate >> 16) & 0xff), (byte) ((byteRate >> 24) & 0xff),
193+
(byte) (channels * 2), 0, 16, 0,
194+
'd', 'a', 't', 'a', (byte) (dataSize & 0xff), (byte) ((dataSize >> 8) & 0xff),
195+
(byte) ((dataSize >> 16) & 0xff), (byte) ((dataSize >> 24) & 0xff)
196+
};
197+
}
198+
}

0 commit comments

Comments
 (0)