|
| 1 | +import java.io.BufferedInputStream; |
| 2 | +import java.io.File; |
| 3 | +import java.io.FileOutputStream; |
| 4 | +import java.io.IOException; |
| 5 | +import java.net.MalformedURLException; |
| 6 | +import java.net.URI; |
| 7 | +import java.net.URL; |
| 8 | +import java.util.ArrayList; |
| 9 | +import java.util.List; |
| 10 | +import java.util.Scanner; |
| 11 | +import java.util.UUID; |
| 12 | + |
| 13 | +import org.json.JSONException; |
| 14 | +import org.json.JSONObject; |
| 15 | + |
| 16 | +import io.swagger.client.ApiException; |
| 17 | +import io.swagger.client.ApiResponse; |
| 18 | +import io.swagger.client.api.VoiceSynthesisApi; |
| 19 | +import io.swagger.client.model.Voice; |
| 20 | +import io.swagger.client.model.VoiceSynthesis; |
| 21 | +import io.swagger.client.model.VoiceSynthesis.StatusEnum; |
| 22 | +import com.squareup.okhttp.Response; |
| 23 | + |
| 24 | +public class VoiceSynthesisMain { |
| 25 | + |
| 26 | + //Cognitive service link |
| 27 | + //https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-apis#authentication |
| 28 | + |
| 29 | + public static void main(String[] args) throws ApiException, IOException, InterruptedException, JSONException { |
| 30 | + if(args.length<7) { |
| 31 | + System.out.println("Not enough arguments. Expected number of arguments is 7.\n" |
| 32 | + + "endpoint: i.e. https://centralindia.cris.ai \n" |
| 33 | + + "ibizaStsUrl: i.e. https://centralindia.api.cognitive.microsoft.com/sts/v1.0/issueToken \n" |
| 34 | + + "subscriptionKey: a standard one acquired from Azure \n" |
| 35 | + + "localInputTextFile: i.e. \\Java project\\VoiceSynthsisAPIToJAVA\\en-US.txt \n" |
| 36 | + + "locale: i.e. it-IT \n" |
| 37 | + + "voiceName: i.e. ElsaNeural \n" |
| 38 | + + "concatenateResult: true or false. Whether you want the output file to be one part or multipart. \n" |
| 39 | + + "Program exited."); |
| 40 | + return; |
| 41 | + } |
| 42 | + String endpoint = args[0]; |
| 43 | + String ibizaStsUrl = args[1]; |
| 44 | + // the Subscription key should be a standard one, not the free one. |
| 45 | + String subscriptionKey = args[2]; |
| 46 | + // The input text file could contains only plain text or only SSML or mixed together(as shown in blow script) |
| 47 | + // The input text file encoding format should be UTF-8-BOM |
| 48 | + // The input text file should contains at least 50 lines of text |
| 49 | + String localInputTextFile = args[3]; |
| 50 | + |
| 51 | + String locale = args[4]; |
| 52 | + String voiceName = args[5]; |
| 53 | + if(!new File(localInputTextFile).exists()) { |
| 54 | + System.out.println("Input input text file does not exist. Program exited."); |
| 55 | + return; |
| 56 | + } |
| 57 | + |
| 58 | + // indicate if want concatenate the output waves with a single file or not. True or false |
| 59 | + String concatenateResult = args[6]; |
| 60 | + VoiceSynthsisAPIs(endpoint, ibizaStsUrl, subscriptionKey, localInputTextFile, locale, voiceName, concatenateResult); |
| 61 | + |
| 62 | + } |
| 63 | + |
| 64 | + |
| 65 | + private static void VoiceSynthsisAPIs(String endpoint, String ibizaStsUrl, String subscriptionKey, String |
| 66 | + localInputTextFile, String locale, String voiceName, String concatenateResult) |
| 67 | + throws ApiException, IOException, JSONException, InterruptedException |
| 68 | + { |
| 69 | + |
| 70 | + |
| 71 | + |
| 72 | + final String name = "Simple neural TTS batch synthesis"; |
| 73 | + final String description = "Simple neural TTS batch synthesis description"; |
| 74 | + |
| 75 | + // public voice means the voice could be used by all Subscriptions, if the voice is private(for your Subscription only), this should be set to false |
| 76 | + boolean isPublicVoice = true; |
| 77 | + |
| 78 | + // you can directly set the voiceId or query the voice information by name/locale/ispublic properties from server. |
| 79 | + //var voiceId = new Guid("Your voice model Guid"); |
| 80 | + |
| 81 | + VoiceSynthesisApi voiceApi=new VoiceSynthesisApi(subscriptionKey, endpoint); |
| 82 | + |
| 83 | + UUID voiceId = GetVoiceId( voiceApi, locale, voiceName, isPublicVoice); |
| 84 | + |
| 85 | + if (voiceId.getLeastSignificantBits()==0L && voiceId.getMostSignificantBits()==0L) |
| 86 | + { |
| 87 | + System.out.println("Does not have a available voice for locale :"+locale+ ", name : "+voiceName+", public : "+isPublicVoice); |
| 88 | + return; |
| 89 | + } |
| 90 | + |
| 91 | + File file=new File(localInputTextFile); |
| 92 | + // Submit a voice synthesis request and get a ID |
| 93 | + //URI synthesisLocation = customVoiceAPI.CreateVoiceSynthesis(name, description, locale, localInputTextFile, voiceId, concatenateResult); |
| 94 | + |
| 95 | + JSONObject properties = new JSONObject(); |
| 96 | + |
| 97 | + properties.put("ConcatenateResult", concatenateResult); |
| 98 | + |
| 99 | + Response res=voiceApi.NewcreateVoiceSynthesisWithHttpInfo(name, description, locale, voiceId.toString(), properties.toString(),file); |
| 100 | + List<String> synthesisLocation=res.headers("Location"); |
| 101 | + if(synthesisLocation==null ||synthesisLocation.size()==0) { |
| 102 | + System.out.println("No synthesis location returned from server. Program exited."); |
| 103 | + } |
| 104 | + String[] seq=synthesisLocation.get(0).toString().split("/"); |
| 105 | + UUID synthesisId = UUID.fromString(seq.length>0?seq[seq.length-1]:""); |
| 106 | + |
| 107 | + System.out.println("Checking status."); |
| 108 | + // check for the status of the submitted synthesis every 10 sec. (can also be 1, 2, 5 min depending on usage) |
| 109 | + boolean completed = false; |
| 110 | + while (!completed) |
| 111 | + { |
| 112 | + //var synthesis = customVoiceAPI.s(synthesisId); |
| 113 | + VoiceSynthesis synthesis=voiceApi.getVoiceSynthesis(synthesisId); |
| 114 | + |
| 115 | + switch (synthesis.getStatus().toString()) |
| 116 | + { |
| 117 | + case "Failed": |
| 118 | + case "Succeeded": |
| 119 | + completed = true; |
| 120 | + // if the synthesis was successfull, download the results to local |
| 121 | + if (synthesis.getStatus().toString() == "Succeeded") |
| 122 | + { |
| 123 | + String resultsUri = synthesis.getResultsUrl(); |
| 124 | + System.out.println(resultsUri); |
| 125 | + URL url=new URL(resultsUri); |
| 126 | + //WebClient webClient = new WebClient(); |
| 127 | + //String filename = Path.GetTempFileName()+"_"+synthesis.Id+"_.zip"; |
| 128 | + //webClient.DownloadFile(resultsUri, filename); |
| 129 | + |
| 130 | + File filename=File.createTempFile("_"+synthesis.getId()+"_", ".zip"); |
| 131 | + try (BufferedInputStream inputStream = new BufferedInputStream(url.openStream()); |
| 132 | + FileOutputStream fileOS = new FileOutputStream(filename)) { |
| 133 | + byte data[] = new byte[1024]; |
| 134 | + int byteContent; |
| 135 | + while ((byteContent = inputStream.read(data, 0, 1024)) != -1) { |
| 136 | + fileOS.write(data, 0, byteContent); |
| 137 | + } |
| 138 | + } catch (IOException e) { |
| 139 | + // handles IO exceptions |
| 140 | + } |
| 141 | + System.out.println("Synthesis succeeded. Results: "+filename); |
| 142 | + } |
| 143 | + break; |
| 144 | + |
| 145 | + case "Running": |
| 146 | + break; |
| 147 | + |
| 148 | + case "NotStarted": |
| 149 | + break; |
| 150 | + } |
| 151 | + |
| 152 | + System.out.println("Syntheses status: "+ synthesis.getStatus()); |
| 153 | + //await Task.Delay(TimeSpan.FromSeconds(10)).ConfigureAwait(false); |
| 154 | + Thread.sleep(10*1000L); |
| 155 | + } |
| 156 | + |
| 157 | + System.out.println("Press any key..."); |
| 158 | + Scanner input=new Scanner(System.in); |
| 159 | + input.nextLine(); |
| 160 | + } |
| 161 | + |
| 162 | + private static UUID GetVoiceId( VoiceSynthesisApi voiceApi, String locale, String voiceName, boolean publicVoice) throws ApiException |
| 163 | + { |
| 164 | + // Get available voices list |
| 165 | + //VoiceSynthesisApi voiceApi=new VoiceSynthesisApi(); |
| 166 | + Iterable<Voice> voices = voiceApi.getSupportedVoicesForVoiceSynthesis(); |
| 167 | + Voice voice = null; |
| 168 | + List<Voice> list=new ArrayList<>(); |
| 169 | + |
| 170 | + |
| 171 | + if (publicVoice) |
| 172 | + { |
| 173 | + for(Voice v:voices) |
| 174 | + { |
| 175 | + if( v.getName().contains(voiceName) &&v.getLocale().equals(locale) && v.isIsPublicVoice()) |
| 176 | + list.add(v); |
| 177 | + } |
| 178 | + voice=(list.size()>0)?list.get(0):null; |
| 179 | + |
| 180 | + } |
| 181 | + else |
| 182 | + { |
| 183 | + for(Voice v:voices) |
| 184 | + { |
| 185 | + if(v.getLocale().equals(locale) && v.getName().contains(voiceName)) |
| 186 | + list.add(v); |
| 187 | + } |
| 188 | + voice=(list.size()>0)?list.get(0):null; |
| 189 | + } |
| 190 | + if (voice == null) |
| 191 | + { |
| 192 | + System.out.println("Does not have a available voice for locale : "+locale+", name : "+ voiceName+", public : "+publicVoice); |
| 193 | + return new UUID( 0L , 0L ); |
| 194 | + } |
| 195 | + return voice.getId(); |
| 196 | + } |
| 197 | + |
| 198 | + |
| 199 | + |
| 200 | + |
| 201 | +} |
0 commit comments