Skip to content

Commit 3da51e4

Browse files
committed
init 3
1 parent e55f47b commit 3da51e4

File tree

3 files changed

+77
-29
lines changed

3 files changed

+77
-29
lines changed

src/main/java/oracleai/GenerateAPictureStoryUsingOnlySpeech.java

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
package oracleai;
22

3+
import com.fasterxml.jackson.core.JsonProcessingException;
4+
import com.fasterxml.jackson.databind.ObjectMapper;
35
import oracleai.services.ImageGeneration;
46
import oracleai.services.OracleObjectStore;
57
import oracleai.services.OracleSpeechAI;
8+
import org.jetbrains.annotations.NotNull;
69
import org.springframework.stereotype.Controller;
710
import org.springframework.ui.Model;
811
import org.springframework.web.bind.annotation.*;
@@ -11,6 +14,7 @@
1114
import javax.sound.sampled.*;
1215
import java.io.*;
1316
import java.util.*;
17+
import java.util.stream.Collectors;
1418

1519
@Controller
1620
@RequestMapping("/picturestory")
@@ -26,25 +30,32 @@ public String reset(Model model) {
2630
}
2731

2832
@PostMapping("/picturestory")
29-
public String picturestory(@RequestParam("genopts") String genopts,
30-
@RequestParam("file") MultipartFile multipartFile, Model model) throws Exception {
33+
public String picturestory(@RequestParam("opts") String opts,
34+
@RequestParam("genopts") String genopts,
35+
@RequestParam("file") MultipartFile multipartFile,
36+
Model model) throws Exception {
37+
if (opts.equals("fileaudio") ) return fileaudio(genopts, multipartFile, model);
38+
else return liveaudio(genopts, model);
39+
}
40+
41+
@NotNull
42+
private String fileaudio(String genopts, MultipartFile multipartFile, Model model) throws Exception {
3143
OracleObjectStore.sendToObjectStorage(multipartFile.getOriginalFilename(), multipartFile.getInputStream());
3244
String transcriptionJobId = OracleSpeechAI.getTranscriptFromOCISpeech(multipartFile.getOriginalFilename());
3345
System.out.println("transcriptionJobId: " + transcriptionJobId);
3446
String jsonTranscriptFromObjectStorage =
3547
OracleObjectStore.getFromObjectStorage(transcriptionJobId,
36-
AIApplication.OBJECTSTORAGE_NAMESPACE + "_" + AIApplication.OBJECTSTORAGE_BUCKETNAME + "_" + multipartFile.getOriginalFilename() + ".json");
48+
AIApplication.OBJECTSTORAGE_NAMESPACE + "_" +
49+
AIApplication.OBJECTSTORAGE_BUCKETNAME + "_" +
50+
multipartFile.getOriginalFilename() + ".json");
3751
System.out.println("jsonTranscriptFromObjectStorage: " + jsonTranscriptFromObjectStorage);
38-
// System.out.println("getFromObjectStorage: " + getFromObjectStorage("leia.m4a"));
39-
// String pictureDescription = parse(jsonTranscriptFromObjectStorage);
40-
String pictureDescription = "man rowing a boat through the forest";
52+
String pictureDescription = getConcatenatedTokens(jsonTranscriptFromObjectStorage);
4153
imageLocations.add(ImageGeneration.imagegeneration(pictureDescription + " " + genopts));
4254
model.addAttribute("imageLocations", imageLocations.toArray(new String[0]));
4355
return "resultswithimages";
4456
}
4557

46-
@GetMapping("/picturestoryrecordlocally")
47-
public String picturestoryrecordlocally(@RequestParam("genopts") String genopts, Model model) throws Exception {
58+
public String liveaudio(String genopts, Model model) throws Exception {
4859
AudioFormat format =
4960
new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 44100.0f, 16, 1,
5061
(16 / 8) * 1, 44100.0f, true);
@@ -65,24 +76,33 @@ public String picturestoryrecordlocally(@RequestParam("genopts") String genopts,
6576
AudioSystem.write(audioInputStream, fileType, file);
6677
System.out.println("Saved " + file.getAbsolutePath());
6778
OracleObjectStore.sendToObjectStorage(file.getName(), new FileInputStream(file));
68-
69-
String transcriptionJobId = OracleSpeechAI.getTranscriptFromOCISpeech(file.getName()); //json file
70-
// String transcriptionJobId = getTranscriptFromOCISpeech("testing123.wav");
79+
String transcriptionJobId = OracleSpeechAI.getTranscriptFromOCISpeech(file.getName());
7180
System.out.println("transcriptionJobId: " + transcriptionJobId);
7281
String jsonTranscriptFromObjectStorage =
7382
OracleObjectStore.getFromObjectStorage(transcriptionJobId,
74-
// AIApplication.OBJECTSTORAGE_NAMESPACE + "_" + AIApplication.OBJECTSTORAGE_BUCKETNAME + "_" + "testing123.wav" + ".json"));
75-
AIApplication.OBJECTSTORAGE_NAMESPACE + "_" + AIApplication.OBJECTSTORAGE_BUCKETNAME + "_" + file.getName() + ".json");
83+
AIApplication.OBJECTSTORAGE_NAMESPACE + "_" +
84+
AIApplication.OBJECTSTORAGE_BUCKETNAME + "_" + file.getName() + ".json");
7685
System.out.println("jsonTranscriptFromObjectStorage: " + jsonTranscriptFromObjectStorage);
77-
// System.out.println("getFromObjectStorage: " + getFromObjectStorage("leia.m4a"));
78-
// String pictureDescription = parse(jsonTranscriptFromObjectStorage);
79-
String pictureDescription = "man rowing a boat through the forest";
86+
String pictureDescription = getConcatenatedTokens(jsonTranscriptFromObjectStorage);
8087
imageLocations.add(ImageGeneration.imagegeneration(pictureDescription + " " + genopts));
8188
model.addAttribute("imageLocations", imageLocations.toArray(new String[0]));
8289
return "resultswithimages";
8390
}
8491

85-
92+
public String getConcatenatedTokens(String json) {
93+
ObjectMapper objectMapper = new ObjectMapper();
94+
try {
95+
OracleSpeechAI.TranscriptionResponse response =
96+
objectMapper.readValue(json, OracleSpeechAI.TranscriptionResponse.class);
97+
return response.getTranscriptions().stream()
98+
.flatMap(transcription -> transcription.getTokens().stream())
99+
.map(OracleSpeechAI.TranscriptionResponse.Transcription.Token::getToken)
100+
.collect(Collectors.joining(" "));
101+
} catch (JsonProcessingException e) {
102+
e.printStackTrace();
103+
return null;
104+
}
105+
}
86106

87107
public class SoundRecorder implements Runnable {
88108
AudioInputStream audioInputStream;
@@ -106,7 +126,8 @@ public void stop() {
106126

107127
@Override
108128
public void run() {
109-
try (final ByteArrayOutputStream out = new ByteArrayOutputStream(); final TargetDataLine line = getTargetDataLineForRecord();) {
129+
try (final ByteArrayOutputStream out = new ByteArrayOutputStream();
130+
final TargetDataLine line = getTargetDataLineForRecord();) {
110131
int frameSizeInBytes = format.getFrameSize();
111132
int bufferLengthInFrames = line.getBufferSize() / 8;
112133
final int bufferLengthInBytes = bufferLengthInFrames * frameSizeInBytes;
@@ -121,7 +142,9 @@ public void run() {
121142
}
122143
}
123144

124-
public void buildByteOutputStream(final ByteArrayOutputStream out, final TargetDataLine line, int frameSizeInBytes, final int bufferLengthInBytes) throws IOException {
145+
public void buildByteOutputStream(final ByteArrayOutputStream out,
146+
final TargetDataLine line, int frameSizeInBytes,
147+
final int bufferLengthInBytes) throws IOException {
125148
final byte[] data = new byte[bufferLengthInBytes];
126149
int numBytesRead;
127150

src/main/java/oracleai/services/OracleSpeechAI.java

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
package oracleai.services;
22

3+
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
34
import com.oracle.bmc.aispeech.AIServiceSpeechClient;
45
import com.oracle.bmc.aispeech.model.*;
56
import com.oracle.bmc.aispeech.requests.CreateTranscriptionJobRequest;
67
import com.oracle.bmc.aispeech.requests.GetTranscriptionJobRequest;
78
import com.oracle.bmc.aispeech.responses.CreateTranscriptionJobResponse;
89
import com.oracle.bmc.aispeech.responses.GetTranscriptionJobResponse;
910
import com.oracle.bmc.auth.AuthenticationDetailsProvider;
11+
import lombok.Getter;
12+
import lombok.Setter;
1013
import oracleai.AIApplication;
1114

1215
import java.io.IOException;
1316
import java.util.ArrayList;
1417
import java.util.Arrays;
18+
import java.util.List;
1519

1620
public class OracleSpeechAI {
1721

@@ -84,19 +88,38 @@ public static String getTranscriptFromOCISpeech(String fileName) throws IOExcept
8488
getTranscriptionJobResponseresponse =
8589
client.getTranscriptionJob(getTranscriptionJobRequest);
8690
transcriptJobState = getTranscriptionJobResponseresponse.getTranscriptionJob().getLifecycleState();
87-
if(lastState != null && lastState.equals(transcriptJobState)) System.out.print(".");
88-
System.out.println("transcriptJobState:" + transcriptJobState);
91+
if (lastState != null && lastState.equals(transcriptJobState)) System.out.print(".");
92+
else System.out.println("transcriptJobState:" + transcriptJobState);
8993
}
9094
System.out.println("getInputLocation:" +
9195
getTranscriptionJobResponseresponse.getTranscriptionJob().getInputLocation());
9296
String fullString = getTranscriptionJobResponseresponse.getTranscriptionJob().getId();
9397
int lastIndex = fullString.lastIndexOf(".");
9498
String extractedString = "";
95-
if (lastIndex != -1) extractedString = fullString.substring(lastIndex + 1);
99+
if (lastIndex != -1) extractedString = fullString.substring(lastIndex + 1);
96100
return "job-" + extractedString;
97101
}
98102

103+
@JsonIgnoreProperties(ignoreUnknown = true)
104+
@Getter
105+
@Setter
106+
public static class TranscriptionResponse {
107+
private List<Transcription> transcriptions;
99108

109+
@JsonIgnoreProperties(ignoreUnknown = true)
110+
@Getter
111+
@Setter
112+
public static class Transcription {
113+
private List<Token> tokens;
114+
115+
@JsonIgnoreProperties(ignoreUnknown = true)
116+
@Getter
117+
@Setter
118+
public static class Token {
119+
private String token;
120+
}
121+
}
122+
}
100123

101124

102125
}

src/main/resources/static/SpeechTranscriptionAndImageGeneration.html

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,26 @@ <h2>Develop with Oracle AI Services and Oracle Database</h2>
2828
<h4>Generate a picture story board from voice recordings</h4>
2929
<br>
3030
<h5>Provide the audio file of a verbal description of a scene.</h5>
31-
<h5>Uses Oracle Vision AI, Oracle Gen AI, and Oracle Language AI</h5>
31+
<h5>Uses Oracle Speech AI and Image Generation AI (eg DALL-E, Stable Diffusion, DeepFloyd IF, ...)</h5>
3232
<br>
3333

3434
<form method="post" action="/picturestory/picturestory" enctype="multipart/form-data">
35-
<br>
36-
<input type="file" name="file" accept="audio/*" />
35+
<b>Audio Recording Options...</b>
36+
<br><input type="radio" name="opts" value="fileaudio" checked>Audio from file (select this to use audio file)
37+
<br><input type="file" name="file" accept="audio/*" />
3738
<br>You can download and use the sample audio here:
3839
<br><a href="audio/shipwrecked.m4a">shipwrecked</a>
3940
<br><a href="audio/paradise.m4a">paradise</a>
40-
<br><a href="audio/shipwrecked.m4a">shipwrecked</a>
41-
<br><br>
42-
<!-- <input type="submit" value="Click here and record (up to 10 seconds of audio) describing next scene.">-->
43-
<br><br> <b>Options...</b>
41+
<br><a href="audio/invaded.m4a">invaded</a>
42+
<br>
43+
<br><input type="radio" name="opts" value="recordedaudio">Audio from live recording (select this to record 8 seconds of audio from mic - process must be running on local machine)
44+
<br><br> <b>Image Options...</b>
4445
<br><input type="radio" name="genopts" value=", photo taken on a Pentax k1000" checked>photo taken on a Pentax k1000
4546
<br><input type="radio" name="genopts" value=", pixel art">pixel art
4647
<br><input type="radio" name="genopts" value=", digital art">digital art
4748
<br><input type="radio" name="genopts" value=", 3d render">3d render
4849
<br><br>
50+
<!-- <input type="submit" value="Click here and record (up to 10 seconds of audio) describing next scene.">-->
4951
<input type="submit" value="Submit to add to story">
5052
</form>
5153
<br><br>

0 commit comments

Comments
 (0)