Skip to content

Commit 4ee2c8f

Browse files
Fix voice transcription request path
1 parent 851e5fd commit 4ee2c8f

File tree

4 files changed

+131
-48
lines changed

4 files changed

+131
-48
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
[![Minecraft 1.21.1](https://img.shields.io/badge/Minecraft-1.21.1-blue?style=for-the-badge&logo=minecraft)](https://www.minecraft.net/)
99
[![Fabric](https://img.shields.io/badge/Fabric-Loader-9c8a7b?style=for-the-badge&logo=fabric)](https://fabricmc.net/)
1010
[![Gemini 3.1 Pro Preview](https://img.shields.io/badge/Power-Gemini_3.1_Pro_Preview-orange?style=for-the-badge&logo=google-gemini)](https://deepmind.google/technologies/gemini/)
11-
[![Official Release](https://img.shields.io/badge/Release-v1.3.0_MCP_Bridge_Update-purple?style=for-the-badge)](https://github.com/aaronaalmendarez/gemini-minecraft/releases/tag/v1.3.0)
11+
[![Official Release](https://img.shields.io/badge/Release-v1.3.1_Voice_Fix_Update-purple?style=for-the-badge)](https://github.com/aaronaalmendarez/gemini-minecraft/releases/tag/v1.3.1)
1212
[![MIT License](https://img.shields.io/badge/License-MIT-green?style=for-the-badge)](LICENSE)
1313

1414
[**Quick Start**](#-getting-started)[**Features**](#-pillars-of-intelligence)[**Roadmap**](#-roadmap)[**Commands**](#-command-terminal)[**Technical Specs**](#-the-nerd-stack)
@@ -17,15 +17,15 @@
1717

1818
### *“The first AI that doesn't just talk to you—it lives in your world.”*
1919

20-
### *Now featuring a structured voxel architect, copy-paste MCP setup, live screenshot tools, build previews, delayed command batches, and self-repairing execution.*
20+
### *Now featuring a fixed push-to-talk voice path, a structured voxel architect, copy-paste MCP setup, live screenshot tools, build previews, delayed command batches, and self-repairing execution.*
2121

2222
![Demo](readme_resources/demo.gif)
2323

2424
</div>
2525

2626
## ⚡ Quick Try
2727

28-
1. **Download** the latest release: [**v1.3.0 JARs**](https://github.com/aaronaalmendarez/gemini-minecraft/releases/tag/v1.3.0)
28+
1. **Download** the latest release: [**v1.3.1 JARs**](https://github.com/aaronaalmendarez/gemini-minecraft/releases/tag/v1.3.1)
2929
2. Drop the `.jar` into your **`mods`** folder.
3030
3. Launch with **Fabric 1.21.1**.
3131
4. **Experience the Power**:
@@ -290,8 +290,8 @@ That is it. No manual token pasting is required.
290290

291291
#### Release Assets
292292

293-
The `v1.3.0` release ships:
294-
- `gemini-ai-companion-1.3.0.jar` for the Fabric mod
293+
The `v1.3.1` release ships:
294+
- `gemini-ai-companion-1.3.1.jar` for the Fabric mod
295295
- `gemini-minecraft-mcp-sidecar.jar` for the standalone Java MCP sidecar
296296

297297
The recommended client path is still the generated Node sidecar config, because it includes the richest MCP guidance and best host compatibility.

gradle.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ loader_version=0.18.4
1414
loom_version=1.15-SNAPSHOT
1515

1616
# Mod Properties
17-
mod_version=1.3.0
17+
mod_version=1.3.1
1818
maven_group=com.aaron.gemini
1919
archives_base_name=gemini-ai-companion
2020

release_notes_v131.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# v1.3.1 - Voice Fix Update
2+
3+
This release fixes the push-to-talk voice transcription path.
4+
5+
## Highlights
6+
7+
- Fixed voice transcription requests that were returning `HTTP 500`
8+
- Switched voice STT to a plain audio transcription request instead of the structured chat schema path
9+
- Improved transcription error reporting so failures now include Gemini's real error message
10+
- Kept the MCP bridge, build preview, delayed command batching, and voxel planner updates from `v1.3.0`
11+
12+
## Included Assets
13+
14+
- `gemini-ai-companion-1.3.1.jar`
15+
- `gemini-minecraft-mcp-sidecar.jar`

src/main/java/com/aaron/gemini/GeminiCompanion.java

Lines changed: 110 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2657,6 +2657,53 @@ private static boolean hasActionPayload(ModeMessage message, ServerPlayerEntity
26572657
return !filterExecutableCommands(message.commands, player).isEmpty();
26582658
}
26592659

2660+
private static String buildPreviewRetryContext(VoxelBuildPlanner.CompiledBuild compiledBuild, List<String> previewCommands, String prefix) {
2661+
List<String> parts = new ArrayList<>();
2662+
if (prefix != null && !prefix.isBlank()) {
2663+
parts.add(prefix.trim());
2664+
}
2665+
if (compiledBuild != null) {
2666+
if (compiledBuild.summary() != null && !compiledBuild.summary().isBlank()) {
2667+
parts.add("Preview summary: " + compiledBuild.summary());
2668+
}
2669+
if (compiledBuild.resolvedOrigin() != null) {
2670+
VoxelBuildPlanner.GridPoint origin = compiledBuild.resolvedOrigin();
2671+
parts.add("Resolved origin: " + origin.x() + ", " + origin.y() + ", " + origin.z());
2672+
}
2673+
if (compiledBuild.appliedRotation() != 0) {
2674+
parts.add("Applied rotation: " + compiledBuild.appliedRotation());
2675+
}
2676+
if (compiledBuild.phases() > 0) {
2677+
parts.add("Phase count: " + compiledBuild.phases());
2678+
}
2679+
if (compiledBuild.autoFixAvailable()) {
2680+
parts.add("Auto-fix is available for this preview.");
2681+
}
2682+
if (compiledBuild.issues() != null && !compiledBuild.issues().isEmpty()) {
2683+
List<String> issueParts = new ArrayList<>();
2684+
for (int i = 0; i < Math.min(4, compiledBuild.issues().size()); i++) {
2685+
VoxelBuildPlanner.SupportIssue issue = compiledBuild.issues().get(i);
2686+
issueParts.add(issue.cuboid() + ": " + issue.issue() + ", gap=" + issue.gapBelow() + ", suggestedY=" + issue.suggestedY());
2687+
}
2688+
parts.add("Preview issues: " + String.join(" | ", issueParts));
2689+
}
2690+
if (compiledBuild.repairs() != null && !compiledBuild.repairs().isEmpty()) {
2691+
parts.add("Preview repairs: " + String.join(" | ", compiledBuild.repairs()));
2692+
}
2693+
if (compiledBuild.error() != null && !compiledBuild.error().isBlank()) {
2694+
parts.add("Preview error: " + compiledBuild.error());
2695+
}
2696+
}
2697+
if (previewCommands != null && !previewCommands.isEmpty()) {
2698+
int previewCount = Math.min(6, previewCommands.size());
2699+
parts.add("Validated preview commands: " + String.join(" | ", previewCommands.subList(0, previewCount)));
2700+
if (previewCommands.size() > previewCount) {
2701+
parts.add("Preview command count: " + previewCommands.size());
2702+
}
2703+
}
2704+
return String.join(". ", parts);
2705+
}
2706+
26602707
private static ModeMessage handleCommandMode(
26612708
ServerCommandSource source,
26622709
ServerPlayerEntity player,
@@ -2674,6 +2721,7 @@ private static ModeMessage handleCommandMode(
26742721
setRetryStats(player, attempt - 1);
26752722
List<String> executableCommands = new ArrayList<>();
26762723
VoxelBuildPlanner.CompiledBuild compiledBuild = null;
2724+
PreparedCommands prepared = null;
26772725
if (current.buildPlan() != null) {
26782726
compiledBuild = VoxelBuildPlanner.compile(player, current.buildPlan());
26792727
if (!compiledBuild.valid()) {
@@ -2683,14 +2731,13 @@ private static ModeMessage handleCommandMode(
26832731
}
26842732
LOGGER.info("AI build retry {}/{} for player {}. Errors: {}", attempt, retryLimit, player.getName().getString(), compiledBuild.error());
26852733
setStatus(player, "AI encountered a build-plan error, retrying (" + attempt + "/" + retryLimit + ")...", Formatting.RED);
2686-
String repairContext = compiledBuild.repairs().isEmpty() ? "" : " Repairs: " + String.join(" | ", compiledBuild.repairs());
2687-
String schemaHint =
2688-
" Supported build_plan schema: " +
2689-
"cuboids:[{block:\"oak_planks\",from:{x:0,y:0,z:0},to:{x:4,y:2,z:4}}] or " +
2690-
"cuboids:[{block:\"oak_planks\",location:{x:0,y:0,z:0},size:{x:5,y:3,z:5},fill:\"hollow\"}] or " +
2691-
"blocks:[{block:\"oak_door\",pos:{x:2,y:1,z:0},properties:{facing:\"south\"}}] or " +
2692-
"steps:[{phase:\"foundation\",plan:{cuboids:[{block:\"stone_bricks\",from:{x:0,y:0,z:0},to:{x:4,y:0,z:4}}]}},{phase:\"walls\",plan:{cuboids:[{block:\"oak_planks\",start:{x:0,y:1,z:0},size:{x:5,y:3,z:5},hollow:true}]}}].";
2693-
String errorContext = "Build plan errors: " + compiledBuild.error() + repairContext + schemaHint;
2734+
String schemaHint =
2735+
" Supported build_plan schema: " +
2736+
"cuboids:[{block:\"oak_planks\",from:{x:0,y:0,z:0},to:{x:4,y:2,z:4}}] or " +
2737+
"cuboids:[{block:\"oak_planks\",location:{x:0,y:0,z:0},size:{x:5,y:3,z:5},fill:\"hollow\"}] or " +
2738+
"blocks:[{block:\"oak_door\",pos:{x:2,y:1,z:0},properties:{facing:\"south\"}}] or " +
2739+
"steps:[{phase:\"foundation\",plan:{cuboids:[{block:\"stone_bricks\",from:{x:0,y:0,z:0},to:{x:4,y:0,z:4}}]}},{phase:\"walls\",plan:{cuboids:[{block:\"oak_planks\",start:{x:0,y:1,z:0},size:{x:5,y:3,z:5},hollow:true}]}}].";
2740+
String errorContext = buildPreviewRetryContext(compiledBuild, List.of(), "Build preview failed.") + ". " + schemaHint;
26942741
current = callGeminiSafely(apiKey, prompt, context, history, errorContext, modelChoice);
26952742
if (!"COMMAND".equals(current.mode)) {
26962743
return current;
@@ -2703,21 +2750,23 @@ private static ModeMessage handleCommandMode(
27032750
if (executableCommands.isEmpty()) {
27042751
return new ModeMessage("ASK", current.message, List.of(), current.searchUsed, current.sources, current.highlights);
27052752
}
2706-
PreparedCommands prepared = prepareCommandsForExecution(player, executableCommands);
2753+
prepared = prepareCommandsForExecution(player, executableCommands);
27072754
CommandResult validation = validateCommands(player, prepared.executeCommands);
27082755
if (!validation.success) {
27092756
if (attempt == retryLimit) {
2710-
LOGGER.info("AI retry exhausted for player {}. Validation errors: {}", player.getName().getString(), validation.errorSummary);
2757+
LOGGER.info("AI retry exhausted for player {}. Validation errors: {}", player.getName().getString(), validation.errorSummary);
27112758
return new ModeMessage("COMMAND", "AI could not produce valid commands after several tries.", List.of(), false, List.of(), List.of());
27122759
}
2713-
2714-
LOGGER.info("AI command retry {}/{} for player {}. Validation errors: {}", attempt, retryLimit, player.getName().getString(), validation.errorSummary);
2715-
setStatus(player, "AI encountered an error, retrying (" + attempt + "/" + retryLimit + ")...", Formatting.RED);
2716-
String errorContext = "Command errors: " + validation.errorSummary;
2717-
current = callGeminiSafely(apiKey, prompt, context, history, errorContext, modelChoice);
2718-
if (!"COMMAND".equals(current.mode)) {
2719-
return current;
2720-
}
2760+
2761+
LOGGER.info("AI command retry {}/{} for player {}. Validation errors: {}", attempt, retryLimit, player.getName().getString(), validation.errorSummary);
2762+
setStatus(player, "AI encountered an error, retrying (" + attempt + "/" + retryLimit + ")...", Formatting.RED);
2763+
String errorContext = compiledBuild != null
2764+
? buildPreviewRetryContext(compiledBuild, prepared.executeCommands, "Build preview validation failed. Command errors: " + validation.errorSummary)
2765+
: "Command errors: " + validation.errorSummary;
2766+
current = callGeminiSafely(apiKey, prompt, context, history, errorContext, modelChoice);
2767+
if (!"COMMAND".equals(current.mode)) {
2768+
return current;
2769+
}
27212770
continue;
27222771
}
27232772

@@ -5652,37 +5701,23 @@ private static String callGeminiTranscribe(String apiKey, byte[] audioBytes, Str
56525701
JsonObject content = new JsonObject();
56535702
content.addProperty("role", "user");
56545703
JsonArray parts = new JsonArray();
5704+
JsonObject promptPart = new JsonObject();
5705+
promptPart.addProperty("text", "Generate a transcript of the speech. Return only the transcript text.");
5706+
parts.add(promptPart);
56555707
JsonObject audioPart = new JsonObject();
56565708
JsonObject inlineData = new JsonObject();
56575709
inlineData.addProperty("mime_type", mimeType == null || mimeType.isBlank() ? "audio/wav" : mimeType);
56585710
inlineData.addProperty("data", Base64.getEncoder().encodeToString(audioBytes));
56595711
audioPart.add("inline_data", inlineData);
56605712
parts.add(audioPart);
5661-
JsonObject promptPart = new JsonObject();
5662-
promptPart.addProperty("text", "Transcribe the speech to plain text. Return only the transcript.");
5663-
parts.add(promptPart);
56645713
content.add("parts", parts);
56655714
contents.add(content);
56665715
request.add("contents", contents);
56675716

5668-
JsonObject generationConfig = new JsonObject();
5669-
JsonObject thinkingConfig = new JsonObject();
5670-
thinkingConfig.addProperty("thinkingLevel", "minimal");
5671-
generationConfig.add("thinkingConfig", thinkingConfig);
5672-
applyGeminiResponseSchema(generationConfig);
5673-
request.add("generationConfig", generationConfig);
5674-
5675-
String body = GSON.toJson(request);
5676-
String modelId = ModelChoice.FLASH.modelId;
5677-
HttpRequest httpRequest = HttpRequest.newBuilder()
5678-
.uri(URI.create(GEMINI_ENDPOINT_BASE + modelId + ":generateContent?key=" + apiKey))
5679-
.header("Content-Type", "application/json; charset=utf-8")
5680-
.POST(HttpRequest.BodyPublishers.ofString(body, StandardCharsets.UTF_8))
5681-
.build();
5682-
5683-
HttpResponse<String> response = HTTP_CLIENT.send(httpRequest, HttpResponse.BodyHandlers.ofString(StandardCharsets.UTF_8));
5717+
String modelId = "gemini-2.5-flash";
5718+
HttpResponse<String> response = sendWithRetries(apiKey, modelId, request);
56845719
if (response.statusCode() < 200 || response.statusCode() >= 300) {
5685-
throw new IllegalStateException("HTTP " + response.statusCode());
5720+
throw new IllegalStateException("HTTP " + response.statusCode() + " - " + summarizeGeminiErrorBody(response.body()));
56865721
}
56875722

56885723
JsonObject json = GSON.fromJson(response.body(), JsonObject.class);
@@ -5699,11 +5734,44 @@ private static String callGeminiTranscribe(String apiKey, byte[] audioBytes, Str
56995734
if (responseParts == null || responseParts.isEmpty()) {
57005735
return "";
57015736
}
5702-
JsonObject firstPart = responseParts.get(0).getAsJsonObject();
5703-
if (!firstPart.has("text")) {
5704-
return "";
5737+
StringBuilder transcript = new StringBuilder();
5738+
for (JsonElement element : responseParts) {
5739+
if (element == null || !element.isJsonObject()) {
5740+
continue;
5741+
}
5742+
JsonObject responsePart = element.getAsJsonObject();
5743+
if (responsePart.has("text")) {
5744+
String text = responsePart.get("text").getAsString();
5745+
if (!text.isBlank()) {
5746+
if (transcript.length() > 0) {
5747+
transcript.append('\n');
5748+
}
5749+
transcript.append(text.trim());
5750+
}
5751+
}
5752+
}
5753+
return transcript.toString();
5754+
}
5755+
5756+
private static String summarizeGeminiErrorBody(String body) {
5757+
if (body == null || body.isBlank()) {
5758+
return "Empty response body";
5759+
}
5760+
try {
5761+
JsonObject json = GSON.fromJson(body, JsonObject.class);
5762+
if (json != null && json.has("error") && json.get("error").isJsonObject()) {
5763+
JsonObject error = json.getAsJsonObject("error");
5764+
String message = error.has("message") ? error.get("message").getAsString() : body;
5765+
String status = error.has("status") ? error.get("status").getAsString() : "";
5766+
if (!status.isBlank() && !message.contains(status)) {
5767+
return status + ": " + message;
5768+
}
5769+
return message;
5770+
}
5771+
} catch (Exception ignored) {
57055772
}
5706-
return firstPart.get("text").getAsString();
5773+
String flattened = body.replace('\n', ' ').replace('\r', ' ').trim();
5774+
return flattened.length() > 240 ? flattened.substring(0, 240) + "..." : flattened;
57075775
}
57085776

57095777
private static String sanitizeTranscript(String transcript) {

0 commit comments

Comments
 (0)