Skip to content

Commit 5425754

Browse files
feat(texttospeech)!: update the API
BREAKING CHANGE: This release has breaking changes. #### texttospeech:v1beta1 The following keys were deleted: - resources.voices.methods.generateVoiceCloningKey.description - resources.voices.methods.generateVoiceCloningKey.flatPath - resources.voices.methods.generateVoiceCloningKey.httpMethod - resources.voices.methods.generateVoiceCloningKey.id - resources.voices.methods.generateVoiceCloningKey.parameterOrder - resources.voices.methods.generateVoiceCloningKey.path - resources.voices.methods.generateVoiceCloningKey.request.$ref - resources.voices.methods.generateVoiceCloningKey.response.$ref - resources.voices.methods.generateVoiceCloningKey.scopes - schemas.GenerateVoiceCloningKeyRequest.description - schemas.GenerateVoiceCloningKeyRequest.id - schemas.GenerateVoiceCloningKeyRequest.properties.consentScript.description - schemas.GenerateVoiceCloningKeyRequest.properties.consentScript.type - schemas.GenerateVoiceCloningKeyRequest.properties.languageCode.description - schemas.GenerateVoiceCloningKeyRequest.properties.languageCode.type - schemas.GenerateVoiceCloningKeyRequest.properties.referenceAudio.$ref - schemas.GenerateVoiceCloningKeyRequest.properties.referenceAudio.description - schemas.GenerateVoiceCloningKeyRequest.properties.voiceTalentConsent.$ref - schemas.GenerateVoiceCloningKeyRequest.properties.voiceTalentConsent.description - schemas.GenerateVoiceCloningKeyRequest.type - schemas.GenerateVoiceCloningKeyResponse.description - schemas.GenerateVoiceCloningKeyResponse.id - schemas.GenerateVoiceCloningKeyResponse.properties.voiceCloningKey.description - schemas.GenerateVoiceCloningKeyResponse.properties.voiceCloningKey.type - schemas.GenerateVoiceCloningKeyResponse.type - schemas.InputAudio.description - schemas.InputAudio.id - schemas.InputAudio.properties.audioConfig.$ref - schemas.InputAudio.properties.audioConfig.description - schemas.InputAudio.properties.content.description - schemas.InputAudio.properties.content.format - schemas.InputAudio.properties.content.type - schemas.InputAudio.type - schemas.InputAudioConfig.description - schemas.InputAudioConfig.id - schemas.InputAudioConfig.properties.audioEncoding.description - schemas.InputAudioConfig.properties.audioEncoding.enum - schemas.InputAudioConfig.properties.audioEncoding.enumDescriptions - schemas.InputAudioConfig.properties.audioEncoding.type - schemas.InputAudioConfig.properties.sampleRateHertz.description - schemas.InputAudioConfig.properties.sampleRateHertz.format - schemas.InputAudioConfig.properties.sampleRateHertz.type - schemas.InputAudioConfig.type - schemas.VoiceCloneParams.description - schemas.VoiceCloneParams.id - schemas.VoiceCloneParams.properties.voiceCloningKey.description - schemas.VoiceCloneParams.properties.voiceCloningKey.type - schemas.VoiceCloneParams.type - schemas.VoiceSelectionParams.properties.voiceClone.$ref - schemas.VoiceSelectionParams.properties.voiceClone.description #### texttospeech:v1 The following keys were deleted: - resources.voices.methods.generateVoiceCloningKey.description - resources.voices.methods.generateVoiceCloningKey.flatPath - resources.voices.methods.generateVoiceCloningKey.httpMethod - resources.voices.methods.generateVoiceCloningKey.id - resources.voices.methods.generateVoiceCloningKey.parameterOrder - resources.voices.methods.generateVoiceCloningKey.path - resources.voices.methods.generateVoiceCloningKey.request.$ref - resources.voices.methods.generateVoiceCloningKey.response.$ref - resources.voices.methods.generateVoiceCloningKey.scopes - schemas.GenerateVoiceCloningKeyRequest.description - schemas.GenerateVoiceCloningKeyRequest.id - schemas.GenerateVoiceCloningKeyRequest.properties.consentScript.description - schemas.GenerateVoiceCloningKeyRequest.properties.consentScript.type - schemas.GenerateVoiceCloningKeyRequest.properties.languageCode.description - schemas.GenerateVoiceCloningKeyRequest.properties.languageCode.type - schemas.GenerateVoiceCloningKeyRequest.properties.referenceAudio.$ref - schemas.GenerateVoiceCloningKeyRequest.properties.referenceAudio.description - schemas.GenerateVoiceCloningKeyRequest.properties.voiceTalentConsent.$ref - schemas.GenerateVoiceCloningKeyRequest.properties.voiceTalentConsent.description - schemas.GenerateVoiceCloningKeyRequest.type - schemas.GenerateVoiceCloningKeyResponse.description - schemas.GenerateVoiceCloningKeyResponse.id - schemas.GenerateVoiceCloningKeyResponse.properties.voiceCloningKey.description - schemas.GenerateVoiceCloningKeyResponse.properties.voiceCloningKey.type - schemas.GenerateVoiceCloningKeyResponse.type - schemas.InputAudio.description - schemas.InputAudio.id - schemas.InputAudio.properties.audioConfig.$ref - schemas.InputAudio.properties.audioConfig.description - schemas.InputAudio.properties.content.description - schemas.InputAudio.properties.content.format - schemas.InputAudio.properties.content.type - schemas.InputAudio.type - schemas.InputAudioConfig.description - schemas.InputAudioConfig.id - schemas.InputAudioConfig.properties.audioEncoding.description - schemas.InputAudioConfig.properties.audioEncoding.enum - schemas.InputAudioConfig.properties.audioEncoding.enumDescriptions - schemas.InputAudioConfig.properties.audioEncoding.type - schemas.InputAudioConfig.properties.sampleRateHertz.description - schemas.InputAudioConfig.properties.sampleRateHertz.format - schemas.InputAudioConfig.properties.sampleRateHertz.type - schemas.InputAudioConfig.type - schemas.VoiceCloneParams.description - schemas.VoiceCloneParams.id - schemas.VoiceCloneParams.properties.voiceCloningKey.description - schemas.VoiceCloneParams.properties.voiceCloningKey.type - schemas.VoiceCloneParams.type - schemas.VoiceSelectionParams.properties.voiceClone.$ref - schemas.VoiceSelectionParams.properties.voiceClone.description
1 parent b1a0c35 commit 5425754

File tree

4 files changed

+2
-580
lines changed

4 files changed

+2
-580
lines changed

discovery/texttospeech-v1.json

Lines changed: 1 addition & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -294,24 +294,6 @@
294294
},
295295
"voices": {
296296
"methods": {
297-
"generateVoiceCloningKey": {
298-
"description": "Generates voice clone key given a short voice prompt. This method validates the voice prompts with a series of checks against the voice talent statement to verify the voice clone is safe to generate.",
299-
"flatPath": "v1/voices:generateVoiceCloningKey",
300-
"httpMethod": "POST",
301-
"id": "texttospeech.voices.generateVoiceCloningKey",
302-
"parameterOrder": [],
303-
"parameters": {},
304-
"path": "v1/voices:generateVoiceCloningKey",
305-
"request": {
306-
"$ref": "GenerateVoiceCloningKeyRequest"
307-
},
308-
"response": {
309-
"$ref": "GenerateVoiceCloningKeyResponse"
310-
},
311-
"scopes": [
312-
"https://www.googleapis.com/auth/cloud-platform"
313-
]
314-
},
315297
"list": {
316298
"description": "Returns a list of Voice supported for synthesis.",
317299
"flatPath": "v1/voices",
@@ -336,7 +318,7 @@
336318
}
337319
}
338320
},
339-
"revision": "20241001",
321+
"revision": "20241008",
340322
"rootUrl": "https://texttospeech.googleapis.com/",
341323
"schemas": {
342324
"AdvancedVoiceOptions": {
@@ -485,40 +467,6 @@
485467
"properties": {},
486468
"type": "object"
487469
},
488-
"GenerateVoiceCloningKeyRequest": {
489-
"description": "Request message for the `GenerateVoiceCloningKey` method.",
490-
"id": "GenerateVoiceCloningKeyRequest",
491-
"properties": {
492-
"consentScript": {
493-
"description": "Required. The script used for the voice talent statement. The script will be provided to the caller through other channels. It must be returned unchanged in this field.",
494-
"type": "string"
495-
},
496-
"languageCode": {
497-
"description": "Required. The language of the supplied audio as a [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. Example: \"en-US\". See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes.",
498-
"type": "string"
499-
},
500-
"referenceAudio": {
501-
"$ref": "InputAudio",
502-
"description": "Required. The training audio used to create voice clone. This is currently limited to LINEAR16 PCM WAV files mono audio with 24khz sample rate. This needs to be specified in [InputAudio.audio_config], other values will be explicitly rejected."
503-
},
504-
"voiceTalentConsent": {
505-
"$ref": "InputAudio",
506-
"description": "Required. The voice talent audio used to verify consent to voice clone."
507-
}
508-
},
509-
"type": "object"
510-
},
511-
"GenerateVoiceCloningKeyResponse": {
512-
"description": "Response message for the `GenerateVoiceCloningKey` method.",
513-
"id": "GenerateVoiceCloningKeyResponse",
514-
"properties": {
515-
"voiceCloningKey": {
516-
"description": "The voice clone key. Use it in the SynthesizeSpeechRequest by setting [voice.voice_clone.voice_cloning_key].",
517-
"type": "string"
518-
}
519-
},
520-
"type": "object"
521-
},
522470
"GoogleCloudTexttospeechV1SynthesizeLongAudioMetadata": {
523471
"description": "Metadata for response returned by the `SynthesizeLongAudio` method.",
524472
"id": "GoogleCloudTexttospeechV1SynthesizeLongAudioMetadata",
@@ -542,54 +490,6 @@
542490
},
543491
"type": "object"
544492
},
545-
"InputAudio": {
546-
"description": "Holds audio content and config.",
547-
"id": "InputAudio",
548-
"properties": {
549-
"audioConfig": {
550-
"$ref": "InputAudioConfig",
551-
"description": "Required. Provides information that specifies how to process content."
552-
},
553-
"content": {
554-
"description": "Required. The audio data bytes encoded as specified in `InputAudioConfig`. Note: as with all bytes fields, proto buffers use a pure binary representation, whereas JSON representations use base64. Audio samples should be between 5-25 seconds in length.",
555-
"format": "byte",
556-
"type": "string"
557-
}
558-
},
559-
"type": "object"
560-
},
561-
"InputAudioConfig": {
562-
"description": "Description of inputted audio data.",
563-
"id": "InputAudioConfig",
564-
"properties": {
565-
"audioEncoding": {
566-
"description": "Required. The format of the audio byte stream.",
567-
"enum": [
568-
"AUDIO_ENCODING_UNSPECIFIED",
569-
"LINEAR16",
570-
"MP3",
571-
"OGG_OPUS",
572-
"MULAW",
573-
"ALAW"
574-
],
575-
"enumDescriptions": [
576-
"Not specified. Will return result google.rpc.Code.INVALID_ARGUMENT.",
577-
"Uncompressed 16-bit signed little-endian samples (Linear PCM). Audio content returned as LINEAR16 also contains a WAV header.",
578-
"MP3 audio at 32kbps.",
579-
"Opus encoded audio wrapped in an ogg container. The result will be a file which can be played natively on Android, and in browsers (at least Chrome and Firefox). The quality of the encoding is considerably higher than MP3 while using approximately the same bitrate.",
580-
"8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. Audio content returned as MULAW also contains a WAV header.",
581-
"8-bit samples that compand 14-bit audio samples using G.711 PCMU/A-law. Audio content returned as ALAW also contains a WAV header."
582-
],
583-
"type": "string"
584-
},
585-
"sampleRateHertz": {
586-
"description": "Required. The sample rate (in hertz) for this audio.",
587-
"format": "int32",
588-
"type": "integer"
589-
}
590-
},
591-
"type": "object"
592-
},
593493
"ListOperationsResponse": {
594494
"description": "The response message for Operations.ListOperations.",
595495
"id": "ListOperationsResponse",
@@ -823,17 +723,6 @@
823723
},
824724
"type": "object"
825725
},
826-
"VoiceCloneParams": {
827-
"description": "The configuration of Voice Clone feature.",
828-
"id": "VoiceCloneParams",
829-
"properties": {
830-
"voiceCloningKey": {
831-
"description": "Required. Created by GenerateVoiceCloningKey.",
832-
"type": "string"
833-
}
834-
},
835-
"type": "object"
836-
},
837726
"VoiceSelectionParams": {
838727
"description": "Description of which voice to use for a synthesis request.",
839728
"id": "VoiceSelectionParams",
@@ -865,10 +754,6 @@
865754
"A gender-neutral voice. This voice is not yet supported."
866755
],
867756
"type": "string"
868-
},
869-
"voiceClone": {
870-
"$ref": "VoiceCloneParams",
871-
"description": "Optional. The configuration for a voice clone. If [VoiceCloneParams.voice_clone_key] is set, the service will choose the voice clone matching the specified configuration."
872757
}
873758
},
874759
"type": "object"

discovery/texttospeech-v1beta1.json

Lines changed: 1 addition & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -237,24 +237,6 @@
237237
},
238238
"voices": {
239239
"methods": {
240-
"generateVoiceCloningKey": {
241-
"description": "Generates voice clone key given a short voice prompt. This method validates the voice prompts with a series of checks against the voice talent statement to verify the voice clone is safe to generate.",
242-
"flatPath": "v1beta1/voices:generateVoiceCloningKey",
243-
"httpMethod": "POST",
244-
"id": "texttospeech.voices.generateVoiceCloningKey",
245-
"parameterOrder": [],
246-
"parameters": {},
247-
"path": "v1beta1/voices:generateVoiceCloningKey",
248-
"request": {
249-
"$ref": "GenerateVoiceCloningKeyRequest"
250-
},
251-
"response": {
252-
"$ref": "GenerateVoiceCloningKeyResponse"
253-
},
254-
"scopes": [
255-
"https://www.googleapis.com/auth/cloud-platform"
256-
]
257-
},
258240
"list": {
259241
"description": "Returns a list of Voice supported for synthesis.",
260242
"flatPath": "v1beta1/voices",
@@ -279,7 +261,7 @@
279261
}
280262
}
281263
},
282-
"revision": "20241001",
264+
"revision": "20241008",
283265
"rootUrl": "https://texttospeech.googleapis.com/",
284266
"schemas": {
285267
"AdvancedVoiceOptions": {
@@ -418,40 +400,6 @@
418400
},
419401
"type": "object"
420402
},
421-
"GenerateVoiceCloningKeyRequest": {
422-
"description": "Request message for the `GenerateVoiceCloningKey` method.",
423-
"id": "GenerateVoiceCloningKeyRequest",
424-
"properties": {
425-
"consentScript": {
426-
"description": "Required. The script used for the voice talent statement. The script will be provided to the caller through other channels. It must be returned unchanged in this field.",
427-
"type": "string"
428-
},
429-
"languageCode": {
430-
"description": "Required. The language of the supplied audio as a [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. Example: \"en-US\". See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes.",
431-
"type": "string"
432-
},
433-
"referenceAudio": {
434-
"$ref": "InputAudio",
435-
"description": "Required. The training audio used to create voice clone. This is currently limited to LINEAR16 PCM WAV files mono audio with 24khz sample rate. This needs to be specified in [InputAudio.audio_config], other values will be explicitly rejected."
436-
},
437-
"voiceTalentConsent": {
438-
"$ref": "InputAudio",
439-
"description": "Required. The voice talent audio used to verify consent to voice clone."
440-
}
441-
},
442-
"type": "object"
443-
},
444-
"GenerateVoiceCloningKeyResponse": {
445-
"description": "Response message for the `GenerateVoiceCloningKey` method.",
446-
"id": "GenerateVoiceCloningKeyResponse",
447-
"properties": {
448-
"voiceCloningKey": {
449-
"description": "The voice clone key. Use it in the SynthesizeSpeechRequest by setting [voice.voice_clone.voice_cloning_key].",
450-
"type": "string"
451-
}
452-
},
453-
"type": "object"
454-
},
455403
"GoogleCloudTexttospeechV1beta1SynthesizeLongAudioMetadata": {
456404
"description": "Metadata for response returned by the `SynthesizeLongAudio` method.",
457405
"id": "GoogleCloudTexttospeechV1beta1SynthesizeLongAudioMetadata",
@@ -475,56 +423,6 @@
475423
},
476424
"type": "object"
477425
},
478-
"InputAudio": {
479-
"description": "Holds audio content and config.",
480-
"id": "InputAudio",
481-
"properties": {
482-
"audioConfig": {
483-
"$ref": "InputAudioConfig",
484-
"description": "Required. Provides information that specifies how to process content."
485-
},
486-
"content": {
487-
"description": "Required. The audio data bytes encoded as specified in `InputAudioConfig`. Note: as with all bytes fields, proto buffers use a pure binary representation, whereas JSON representations use base64. Audio samples should be between 5-25 seconds in length.",
488-
"format": "byte",
489-
"type": "string"
490-
}
491-
},
492-
"type": "object"
493-
},
494-
"InputAudioConfig": {
495-
"description": "Description of inputted audio data.",
496-
"id": "InputAudioConfig",
497-
"properties": {
498-
"audioEncoding": {
499-
"description": "Required. The format of the audio byte stream.",
500-
"enum": [
501-
"AUDIO_ENCODING_UNSPECIFIED",
502-
"LINEAR16",
503-
"MP3",
504-
"MP3_64_KBPS",
505-
"OGG_OPUS",
506-
"MULAW",
507-
"ALAW"
508-
],
509-
"enumDescriptions": [
510-
"Not specified. Will return result google.rpc.Code.INVALID_ARGUMENT.",
511-
"Uncompressed 16-bit signed little-endian samples (Linear PCM). Audio content returned as LINEAR16 also contains a WAV header.",
512-
"MP3 audio at 32kbps.",
513-
"MP3 at 64kbps.",
514-
"Opus encoded audio wrapped in an ogg container. The result will be a file which can be played natively on Android, and in browsers (at least Chrome and Firefox). The quality of the encoding is considerably higher than MP3 while using approximately the same bitrate.",
515-
"8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. Audio content returned as MULAW also contains a WAV header.",
516-
"8-bit samples that compand 14-bit audio samples using G.711 PCMU/A-law. Audio content returned as ALAW also contains a WAV header."
517-
],
518-
"type": "string"
519-
},
520-
"sampleRateHertz": {
521-
"description": "Required. The sample rate (in hertz) for this audio.",
522-
"format": "int32",
523-
"type": "integer"
524-
}
525-
},
526-
"type": "object"
527-
},
528426
"ListOperationsResponse": {
529427
"description": "The response message for Operations.ListOperations.",
530428
"id": "ListOperationsResponse",
@@ -800,17 +698,6 @@
800698
},
801699
"type": "object"
802700
},
803-
"VoiceCloneParams": {
804-
"description": "The configuration of Voice Clone feature.",
805-
"id": "VoiceCloneParams",
806-
"properties": {
807-
"voiceCloningKey": {
808-
"description": "Required. Created by GenerateVoiceCloningKey.",
809-
"type": "string"
810-
}
811-
},
812-
"type": "object"
813-
},
814701
"VoiceSelectionParams": {
815702
"description": "Description of which voice to use for a synthesis request.",
816703
"id": "VoiceSelectionParams",
@@ -842,10 +729,6 @@
842729
"A gender-neutral voice. This voice is not yet supported."
843730
],
844731
"type": "string"
845-
},
846-
"voiceClone": {
847-
"$ref": "VoiceCloneParams",
848-
"description": "Optional. The configuration for a voice clone. If [VoiceCloneParams.voice_clone_key] is set, the service will choose the voice clone matching the specified configuration."
849732
}
850733
},
851734
"type": "object"

0 commit comments

Comments
 (0)