diff --git a/generation_config.yaml b/generation_config.yaml
index 0f47c81c37d6..3f5ce705fa25 100644
--- a/generation_config.yaml
+++ b/generation_config.yaml
@@ -1,5 +1,5 @@
 gapic_generator_version: 2.62.0
-googleapis_commitish: 525c95a7a122ec2869ae06cd02fa5013819463f6
+googleapis_commitish: b738e78ed63effb7d199ed2d61c9e03291b6077f
 libraries_bom_version: 26.66.0
 
 # the libraries are ordered with respect to library name, which is
diff --git a/java-texttospeech/README.md b/java-texttospeech/README.md
index 2c4af4ed5729..272308a7a603 100644
--- a/java-texttospeech/README.md
+++ b/java-texttospeech/README.md
@@ -42,20 +42,20 @@ If you are using Maven without the BOM, add this to your dependencies:
 <dependency>
   <groupId>com.google.cloud</groupId>
   <artifactId>google-cloud-texttospeech</artifactId>
-  <version>2.73.0</version>
+  <version>2.74.0</version>
 </dependency>
 ```
 
 If you are using Gradle without BOM, add this to your dependencies:
 
 ```Groovy
-implementation 'com.google.cloud:google-cloud-texttospeech:2.73.0'
+implementation 'com.google.cloud:google-cloud-texttospeech:2.74.0'
 ```
 
 If you are using SBT, add this to your dependencies:
 
 ```Scala
-libraryDependencies += "com.google.cloud" % "google-cloud-texttospeech" % "2.73.0"
+libraryDependencies += "com.google.cloud" % "google-cloud-texttospeech" % "2.74.0"
 ```
 
 ## Authentication
@@ -194,7 +194,7 @@ Java is a registered trademark of Oracle and/or its affiliates.
 [kokoro-badge-link-5]: http://storage.googleapis.com/cloud-devrel-public/java/badges/google-cloud-java/java11.html
 [stability-image]: https://img.shields.io/badge/stability-stable-green
 [maven-version-image]: https://img.shields.io/maven-central/v/com.google.cloud/google-cloud-texttospeech.svg
-[maven-version-link]: https://central.sonatype.com/artifact/com.google.cloud/google-cloud-texttospeech/2.73.0
+[maven-version-link]: https://central.sonatype.com/artifact/com.google.cloud/google-cloud-texttospeech/2.74.0
 [authentication]: https://github.com/googleapis/google-cloud-java#authentication
 [auth-scopes]: https://developers.google.com/identity/protocols/oauth2/scopes
 [predefined-iam-roles]: https://cloud.google.com/iam/docs/understanding-roles#predefined_roles
diff --git a/java-texttospeech/google-cloud-texttospeech/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechClient.java b/java-texttospeech/google-cloud-texttospeech/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechClient.java
index 06ba226a6f77..edfdad323b53 100644
--- a/java-texttospeech/google-cloud-texttospeech/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechClient.java
+++ b/java-texttospeech/google-cloud-texttospeech/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechClient.java
@@ -93,7 +93,7 @@
  *    </tr>
  *    <tr>
  *      <td><p> StreamingSynthesize</td>
- *      <td><p> Performs bidirectional streaming speech synthesis: receive audio while sending text.</td>
+ *      <td><p> Performs bidirectional streaming speech synthesis: receives audio while sending text.</td>
  *      <td>
  *      <p>Callable method variants take no parameters and return an immutable API callable object, which can be used to initiate calls to the service.</p>
  *      <ul>
@@ -393,7 +393,7 @@ public final SynthesizeSpeechResponse synthesizeSpeech(SynthesizeSpeechRequest r
 
   // AUTO-GENERATED DOCUMENTATION AND METHOD.
   /**
-   * Performs bidirectional streaming speech synthesis: receive audio while sending text.
+   * Performs bidirectional streaming speech synthesis: receives audio while sending text.
    *
    * <p>Sample code:
    *
diff --git a/java-texttospeech/grpc-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechGrpc.java b/java-texttospeech/grpc-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechGrpc.java
index 21afc920a1ac..050b402d6827 100644
--- a/java-texttospeech/grpc-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechGrpc.java
+++ b/java-texttospeech/grpc-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechGrpc.java
@@ -275,7 +275,7 @@ default void synthesizeSpeech(
      *
      *
      * <pre>
-     * Performs bidirectional streaming speech synthesis: receive audio while
+     * Performs bidirectional streaming speech synthesis: receives audio while
      * sending text.
      * </pre>
      */
@@ -361,7 +361,7 @@ public void synthesizeSpeech(
      *
      *
      * <pre>
-     * Performs bidirectional streaming speech synthesis: receive audio while
+     * Performs bidirectional streaming speech synthesis: receives audio while
      * sending text.
      * </pre>
      */
@@ -426,7 +426,7 @@ public com.google.cloud.texttospeech.v1beta1.SynthesizeSpeechResponse synthesize
      *
      *
      * <pre>
-     * Performs bidirectional streaming speech synthesis: receive audio while
+     * Performs bidirectional streaming speech synthesis: receives audio while
      * sending text.
      * </pre>
      */
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AdvancedVoiceOptions.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AdvancedVoiceOptions.java
index e76337192eaa..2b34045bf96f 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AdvancedVoiceOptions.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AdvancedVoiceOptions.java
@@ -70,8 +70,8 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() {
    *
    *
    * <pre>
-   * Only for Journey voices. If false, the synthesis will be context aware
-   * and have higher latency.
+   * Only for Journey voices. If false, the synthesis is context aware
+   * and has a higher latency.
    * </pre>
    *
    * <code>optional bool low_latency_journey_synthesis = 1;</code>
@@ -87,8 +87,8 @@ public boolean hasLowLatencyJourneySynthesis() {
    *
    *
    * <pre>
-   * Only for Journey voices. If false, the synthesis will be context aware
-   * and have higher latency.
+   * Only for Journey voices. If false, the synthesis is context aware
+   * and has a higher latency.
    * </pre>
    *
    * <code>optional bool low_latency_journey_synthesis = 1;</code>
@@ -457,8 +457,8 @@ public Builder mergeFrom(
      *
      *
      * <pre>
-     * Only for Journey voices. If false, the synthesis will be context aware
-     * and have higher latency.
+     * Only for Journey voices. If false, the synthesis is context aware
+     * and has a higher latency.
      * </pre>
      *
      * <code>optional bool low_latency_journey_synthesis = 1;</code>
@@ -474,8 +474,8 @@ public boolean hasLowLatencyJourneySynthesis() {
      *
      *
      * <pre>
-     * Only for Journey voices. If false, the synthesis will be context aware
-     * and have higher latency.
+     * Only for Journey voices. If false, the synthesis is context aware
+     * and has a higher latency.
      * </pre>
      *
      * <code>optional bool low_latency_journey_synthesis = 1;</code>
@@ -491,8 +491,8 @@ public boolean getLowLatencyJourneySynthesis() {
      *
      *
      * <pre>
-     * Only for Journey voices. If false, the synthesis will be context aware
-     * and have higher latency.
+     * Only for Journey voices. If false, the synthesis is context aware
+     * and has a higher latency.
      * </pre>
      *
      * <code>optional bool low_latency_journey_synthesis = 1;</code>
@@ -512,8 +512,8 @@ public Builder setLowLatencyJourneySynthesis(boolean value) {
      *
      *
      * <pre>
-     * Only for Journey voices. If false, the synthesis will be context aware
-     * and have higher latency.
+     * Only for Journey voices. If false, the synthesis is context aware
+     * and has a higher latency.
      * </pre>
      *
      * <code>optional bool low_latency_journey_synthesis = 1;</code>
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AdvancedVoiceOptionsOrBuilder.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AdvancedVoiceOptionsOrBuilder.java
index 8c2e56aa16de..e8b6f3b17f5b 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AdvancedVoiceOptionsOrBuilder.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AdvancedVoiceOptionsOrBuilder.java
@@ -28,8 +28,8 @@ public interface AdvancedVoiceOptionsOrBuilder
    *
    *
    * <pre>
-   * Only for Journey voices. If false, the synthesis will be context aware
-   * and have higher latency.
+   * Only for Journey voices. If false, the synthesis is context aware
+   * and has a higher latency.
    * </pre>
    *
    * <code>optional bool low_latency_journey_synthesis = 1;</code>
@@ -42,8 +42,8 @@ public interface AdvancedVoiceOptionsOrBuilder
    *
    *
    * <pre>
-   * Only for Journey voices. If false, the synthesis will be context aware
-   * and have higher latency.
+   * Only for Journey voices. If false, the synthesis is context aware
+   * and has a higher latency.
    * </pre>
    *
    * <code>optional bool low_latency_journey_synthesis = 1;</code>
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioConfig.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioConfig.java
index 58fb220994af..e1e15655027e 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioConfig.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioConfig.java
@@ -115,10 +115,10 @@ public com.google.cloud.texttospeech.v1beta1.AudioEncoding getAudioEncoding() {
    *
    *
    * <pre>
-   * Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is
+   * Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
    * the normal native speed supported by the specific voice. 2.0 is twice as
    * fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
-   * speed. Any other values &lt; 0.25 or &gt; 4.0 will return an error.
+   * speed. Any other values &lt; 0.25 or &gt; 2.0 will return an error.
    * </pre>
    *
    * <code>
@@ -899,10 +899,10 @@ public Builder clearAudioEncoding() {
      *
      *
      * <pre>
-     * Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is
+     * Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
      * the normal native speed supported by the specific voice. 2.0 is twice as
      * fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
-     * speed. Any other values &lt; 0.25 or &gt; 4.0 will return an error.
+     * speed. Any other values &lt; 0.25 or &gt; 2.0 will return an error.
      * </pre>
      *
      * <code>
@@ -920,10 +920,10 @@ public double getSpeakingRate() {
      *
      *
      * <pre>
-     * Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is
+     * Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
      * the normal native speed supported by the specific voice. 2.0 is twice as
      * fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
-     * speed. Any other values &lt; 0.25 or &gt; 4.0 will return an error.
+     * speed. Any other values &lt; 0.25 or &gt; 2.0 will return an error.
      * </pre>
      *
      * <code>
@@ -945,10 +945,10 @@ public Builder setSpeakingRate(double value) {
      *
      *
      * <pre>
-     * Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is
+     * Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
      * the normal native speed supported by the specific voice. 2.0 is twice as
      * fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
-     * speed. Any other values &lt; 0.25 or &gt; 4.0 will return an error.
+     * speed. Any other values &lt; 0.25 or &gt; 2.0 will return an error.
      * </pre>
      *
      * <code>
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioConfigOrBuilder.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioConfigOrBuilder.java
index 3ccd85faeaa1..118047e27e80 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioConfigOrBuilder.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioConfigOrBuilder.java
@@ -58,10 +58,10 @@ public interface AudioConfigOrBuilder
    *
    *
    * <pre>
-   * Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is
+   * Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
    * the normal native speed supported by the specific voice. 2.0 is twice as
    * fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
-   * speed. Any other values &lt; 0.25 or &gt; 4.0 will return an error.
+   * speed. Any other values &lt; 0.25 or &gt; 2.0 will return an error.
    * </pre>
    *
    * <code>
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioEncoding.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioEncoding.java
index 1b04d3f7c617..b00f48d07ebe 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioEncoding.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/AudioEncoding.java
@@ -34,7 +34,8 @@ public enum AudioEncoding implements com.google.protobuf.ProtocolMessageEnum {
    *
    *
    * <pre>
-   * Not specified. Will return result
+   * Not specified. Only used by GenerateVoiceCloningKey. Otherwise, will return
+   * result
    * [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
    * </pre>
    *
@@ -76,7 +77,7 @@ public enum AudioEncoding implements com.google.protobuf.ProtocolMessageEnum {
    *
    *
    * <pre>
-   * Opus encoded audio wrapped in an ogg container. The result will be a
+   * Opus encoded audio wrapped in an ogg container. The result is a
    * file which can be played natively on Android, and in browsers (at least
    * Chrome and Firefox). The quality of the encoding is considerably higher
    * than MP3 while using approximately the same bitrate.
@@ -112,13 +113,23 @@ public enum AudioEncoding implements com.google.protobuf.ProtocolMessageEnum {
    *
    * <pre>
    * Uncompressed 16-bit signed little-endian samples (Linear PCM).
-   * Note that as opposed to LINEAR16, audio will not be wrapped in a WAV (or
+   * Note that as opposed to LINEAR16, audio won't be wrapped in a WAV (or
    * any other) header.
    * </pre>
    *
    * <code>PCM = 7;</code>
    */
   PCM(7),
+  /**
+   *
+   *
+   * <pre>
+   * M4A audio.
+   * </pre>
+   *
+   * <code>M4A = 8;</code>
+   */
+  M4A(8),
   UNRECOGNIZED(-1),
   ;
 
@@ -126,7 +137,8 @@ public enum AudioEncoding implements com.google.protobuf.ProtocolMessageEnum {
    *
    *
    * <pre>
-   * Not specified. Will return result
+   * Not specified. Only used by GenerateVoiceCloningKey. Otherwise, will return
+   * result
    * [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
    * </pre>
    *
@@ -172,7 +184,7 @@ public enum AudioEncoding implements com.google.protobuf.ProtocolMessageEnum {
    *
    *
    * <pre>
-   * Opus encoded audio wrapped in an ogg container. The result will be a
+   * Opus encoded audio wrapped in an ogg container. The result is a
    * file which can be played natively on Android, and in browsers (at least
    * Chrome and Firefox). The quality of the encoding is considerably higher
    * than MP3 while using approximately the same bitrate.
@@ -211,7 +223,7 @@ public enum AudioEncoding implements com.google.protobuf.ProtocolMessageEnum {
    *
    * <pre>
    * Uncompressed 16-bit signed little-endian samples (Linear PCM).
-   * Note that as opposed to LINEAR16, audio will not be wrapped in a WAV (or
+   * Note that as opposed to LINEAR16, audio won't be wrapped in a WAV (or
    * any other) header.
    * </pre>
    *
@@ -219,6 +231,17 @@ public enum AudioEncoding implements com.google.protobuf.ProtocolMessageEnum {
    */
   public static final int PCM_VALUE = 7;
 
+  /**
+   *
+   *
+   * <pre>
+   * M4A audio.
+   * </pre>
+   *
+   * <code>M4A = 8;</code>
+   */
+  public static final int M4A_VALUE = 8;
+
   public final int getNumber() {
     if (this == UNRECOGNIZED) {
       throw new java.lang.IllegalArgumentException(
@@ -259,6 +282,8 @@ public static AudioEncoding forNumber(int value) {
         return ALAW;
       case 7:
         return PCM;
+      case 8:
+        return M4A;
       default:
         return null;
     }
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationParams.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationParams.java
index 938ab454bfbd..60af22ffda3c 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationParams.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationParams.java
@@ -91,7 +91,7 @@ public enum PhoneticEncoding implements com.google.protobuf.ProtocolMessageEnum
      *
      *
      * <pre>
-     * IPA. (e.g. apple -&gt; ˈæpəl )
+     * IPA, such as apple -&gt; ˈæpəl.
      * https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
      * </pre>
      *
@@ -102,13 +102,50 @@ public enum PhoneticEncoding implements com.google.protobuf.ProtocolMessageEnum
      *
      *
      * <pre>
-     * X-SAMPA (e.g. apple -&gt; "{p&#64;l" )
+     * X-SAMPA, such as apple -&gt; "{p&#64;l".
      * https://en.wikipedia.org/wiki/X-SAMPA
      * </pre>
      *
      * <code>PHONETIC_ENCODING_X_SAMPA = 2;</code>
      */
     PHONETIC_ENCODING_X_SAMPA(2),
+    /**
+     *
+     *
+     * <pre>
+     * For reading-to-pron conversion to work well, the `pronunciation` field
+     *  should only contain Kanji, Hiragana, and Katakana.
+     *
+     * The pronunciation can also contain pitch accents.
+     * The start of a pitch phrase is specified with `^` and the down-pitch
+     * position is specified with `!`, for example:
+     *
+     *     phrase:端  pronunciation:^はし
+     *     phrase:箸  pronunciation:^は!し
+     *     phrase:橋  pronunciation:^はし!
+     *
+     * We currently only support the Tokyo dialect, which allows at most one
+     * down-pitch per phrase (i.e. at most one `!` between `^`).
+     * </pre>
+     *
+     * <code>PHONETIC_ENCODING_JAPANESE_YOMIGANA = 3;</code>
+     */
+    PHONETIC_ENCODING_JAPANESE_YOMIGANA(3),
+    /**
+     *
+     *
+     * <pre>
+     * Used to specify pronunciations for Mandarin words. See
+     * https://en.wikipedia.org/wiki/Pinyin.
+     *
+     * For example: 朝阳, the pronunciation is "chao2 yang2". The number
+     * represents the tone, and there is a space between syllables. Neutral
+     * tones are represented by 5, for example 孩子 "hai2 zi5".
+     * </pre>
+     *
+     * <code>PHONETIC_ENCODING_PINYIN = 4;</code>
+     */
+    PHONETIC_ENCODING_PINYIN(4),
     UNRECOGNIZED(-1),
     ;
 
@@ -127,7 +164,7 @@ public enum PhoneticEncoding implements com.google.protobuf.ProtocolMessageEnum
      *
      *
      * <pre>
-     * IPA. (e.g. apple -&gt; ˈæpəl )
+     * IPA, such as apple -&gt; ˈæpəl.
      * https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
      * </pre>
      *
@@ -139,7 +176,7 @@ public enum PhoneticEncoding implements com.google.protobuf.ProtocolMessageEnum
      *
      *
      * <pre>
-     * X-SAMPA (e.g. apple -&gt; "{p&#64;l" )
+     * X-SAMPA, such as apple -&gt; "{p&#64;l".
      * https://en.wikipedia.org/wiki/X-SAMPA
      * </pre>
      *
@@ -147,6 +184,45 @@ public enum PhoneticEncoding implements com.google.protobuf.ProtocolMessageEnum
      */
     public static final int PHONETIC_ENCODING_X_SAMPA_VALUE = 2;
 
+    /**
+     *
+     *
+     * <pre>
+     * For reading-to-pron conversion to work well, the `pronunciation` field
+     *  should only contain Kanji, Hiragana, and Katakana.
+     *
+     * The pronunciation can also contain pitch accents.
+     * The start of a pitch phrase is specified with `^` and the down-pitch
+     * position is specified with `!`, for example:
+     *
+     *     phrase:端  pronunciation:^はし
+     *     phrase:箸  pronunciation:^は!し
+     *     phrase:橋  pronunciation:^はし!
+     *
+     * We currently only support the Tokyo dialect, which allows at most one
+     * down-pitch per phrase (i.e. at most one `!` between `^`).
+     * </pre>
+     *
+     * <code>PHONETIC_ENCODING_JAPANESE_YOMIGANA = 3;</code>
+     */
+    public static final int PHONETIC_ENCODING_JAPANESE_YOMIGANA_VALUE = 3;
+
+    /**
+     *
+     *
+     * <pre>
+     * Used to specify pronunciations for Mandarin words. See
+     * https://en.wikipedia.org/wiki/Pinyin.
+     *
+     * For example: 朝阳, the pronunciation is "chao2 yang2". The number
+     * represents the tone, and there is a space between syllables. Neutral
+     * tones are represented by 5, for example 孩子 "hai2 zi5".
+     * </pre>
+     *
+     * <code>PHONETIC_ENCODING_PINYIN = 4;</code>
+     */
+    public static final int PHONETIC_ENCODING_PINYIN_VALUE = 4;
+
     public final int getNumber() {
       if (this == UNRECOGNIZED) {
         throw new java.lang.IllegalArgumentException(
@@ -177,6 +253,10 @@ public static PhoneticEncoding forNumber(int value) {
           return PHONETIC_ENCODING_IPA;
         case 2:
           return PHONETIC_ENCODING_X_SAMPA;
+        case 3:
+          return PHONETIC_ENCODING_JAPANESE_YOMIGANA;
+        case 4:
+          return PHONETIC_ENCODING_PINYIN;
         default:
           return null;
       }
@@ -244,9 +324,9 @@ private PhoneticEncoding(int value) {
    *
    *
    * <pre>
-   * The phrase to which the customization will be applied.
-   * The phrase can be multiple words (in the case of proper nouns etc), but
-   * should not span to a whole sentence.
+   * The phrase to which the customization is applied.
+   * The phrase can be multiple words, such as proper nouns, but shouldn't span
+   * the length of the sentence.
    * </pre>
    *
    * <code>optional string phrase = 1;</code>
@@ -262,9 +342,9 @@ public boolean hasPhrase() {
    *
    *
    * <pre>
-   * The phrase to which the customization will be applied.
-   * The phrase can be multiple words (in the case of proper nouns etc), but
-   * should not span to a whole sentence.
+   * The phrase to which the customization is applied.
+   * The phrase can be multiple words, such as proper nouns, but shouldn't span
+   * the length of the sentence.
    * </pre>
    *
    * <code>optional string phrase = 1;</code>
@@ -288,9 +368,9 @@ public java.lang.String getPhrase() {
    *
    *
    * <pre>
-   * The phrase to which the customization will be applied.
-   * The phrase can be multiple words (in the case of proper nouns etc), but
-   * should not span to a whole sentence.
+   * The phrase to which the customization is applied.
+   * The phrase can be multiple words, such as proper nouns, but shouldn't span
+   * the length of the sentence.
    * </pre>
    *
    * <code>optional string phrase = 1;</code>
@@ -866,9 +946,9 @@ public Builder mergeFrom(
      *
      *
      * <pre>
-     * The phrase to which the customization will be applied.
-     * The phrase can be multiple words (in the case of proper nouns etc), but
-     * should not span to a whole sentence.
+     * The phrase to which the customization is applied.
+     * The phrase can be multiple words, such as proper nouns, but shouldn't span
+     * the length of the sentence.
      * </pre>
      *
      * <code>optional string phrase = 1;</code>
@@ -883,9 +963,9 @@ public boolean hasPhrase() {
      *
      *
      * <pre>
-     * The phrase to which the customization will be applied.
-     * The phrase can be multiple words (in the case of proper nouns etc), but
-     * should not span to a whole sentence.
+     * The phrase to which the customization is applied.
+     * The phrase can be multiple words, such as proper nouns, but shouldn't span
+     * the length of the sentence.
      * </pre>
      *
      * <code>optional string phrase = 1;</code>
@@ -908,9 +988,9 @@ public java.lang.String getPhrase() {
      *
      *
      * <pre>
-     * The phrase to which the customization will be applied.
-     * The phrase can be multiple words (in the case of proper nouns etc), but
-     * should not span to a whole sentence.
+     * The phrase to which the customization is applied.
+     * The phrase can be multiple words, such as proper nouns, but shouldn't span
+     * the length of the sentence.
      * </pre>
      *
      * <code>optional string phrase = 1;</code>
@@ -933,9 +1013,9 @@ public com.google.protobuf.ByteString getPhraseBytes() {
      *
      *
      * <pre>
-     * The phrase to which the customization will be applied.
-     * The phrase can be multiple words (in the case of proper nouns etc), but
-     * should not span to a whole sentence.
+     * The phrase to which the customization is applied.
+     * The phrase can be multiple words, such as proper nouns, but shouldn't span
+     * the length of the sentence.
      * </pre>
      *
      * <code>optional string phrase = 1;</code>
@@ -957,9 +1037,9 @@ public Builder setPhrase(java.lang.String value) {
      *
      *
      * <pre>
-     * The phrase to which the customization will be applied.
-     * The phrase can be multiple words (in the case of proper nouns etc), but
-     * should not span to a whole sentence.
+     * The phrase to which the customization is applied.
+     * The phrase can be multiple words, such as proper nouns, but shouldn't span
+     * the length of the sentence.
      * </pre>
      *
      * <code>optional string phrase = 1;</code>
@@ -977,9 +1057,9 @@ public Builder clearPhrase() {
      *
      *
      * <pre>
-     * The phrase to which the customization will be applied.
-     * The phrase can be multiple words (in the case of proper nouns etc), but
-     * should not span to a whole sentence.
+     * The phrase to which the customization is applied.
+     * The phrase can be multiple words, such as proper nouns, but shouldn't span
+     * the length of the sentence.
      * </pre>
      *
      * <code>optional string phrase = 1;</code>
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationParamsOrBuilder.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationParamsOrBuilder.java
index a053c3639e23..7c0b9e416fac 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationParamsOrBuilder.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationParamsOrBuilder.java
@@ -28,9 +28,9 @@ public interface CustomPronunciationParamsOrBuilder
    *
    *
    * <pre>
-   * The phrase to which the customization will be applied.
-   * The phrase can be multiple words (in the case of proper nouns etc), but
-   * should not span to a whole sentence.
+   * The phrase to which the customization is applied.
+   * The phrase can be multiple words, such as proper nouns, but shouldn't span
+   * the length of the sentence.
    * </pre>
    *
    * <code>optional string phrase = 1;</code>
@@ -43,9 +43,9 @@ public interface CustomPronunciationParamsOrBuilder
    *
    *
    * <pre>
-   * The phrase to which the customization will be applied.
-   * The phrase can be multiple words (in the case of proper nouns etc), but
-   * should not span to a whole sentence.
+   * The phrase to which the customization is applied.
+   * The phrase can be multiple words, such as proper nouns, but shouldn't span
+   * the length of the sentence.
    * </pre>
    *
    * <code>optional string phrase = 1;</code>
@@ -58,9 +58,9 @@ public interface CustomPronunciationParamsOrBuilder
    *
    *
    * <pre>
-   * The phrase to which the customization will be applied.
-   * The phrase can be multiple words (in the case of proper nouns etc), but
-   * should not span to a whole sentence.
+   * The phrase to which the customization is applied.
+   * The phrase can be multiple words, such as proper nouns, but shouldn't span
+   * the length of the sentence.
    * </pre>
    *
    * <code>optional string phrase = 1;</code>
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciations.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciations.java
index e3fe2d57db13..3cfa00dbeae6 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciations.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciations.java
@@ -74,7 +74,7 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() {
    *
    *
    * <pre>
-   * The pronunciation customizations to be applied.
+   * The pronunciation customizations are applied.
    * </pre>
    *
    * <code>repeated .google.cloud.texttospeech.v1beta1.CustomPronunciationParams pronunciations = 1;
@@ -90,7 +90,7 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() {
    *
    *
    * <pre>
-   * The pronunciation customizations to be applied.
+   * The pronunciation customizations are applied.
    * </pre>
    *
    * <code>repeated .google.cloud.texttospeech.v1beta1.CustomPronunciationParams pronunciations = 1;
@@ -107,7 +107,7 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() {
    *
    *
    * <pre>
-   * The pronunciation customizations to be applied.
+   * The pronunciation customizations are applied.
    * </pre>
    *
    * <code>repeated .google.cloud.texttospeech.v1beta1.CustomPronunciationParams pronunciations = 1;
@@ -122,7 +122,7 @@ public int getPronunciationsCount() {
    *
    *
    * <pre>
-   * The pronunciation customizations to be applied.
+   * The pronunciation customizations are applied.
    * </pre>
    *
    * <code>repeated .google.cloud.texttospeech.v1beta1.CustomPronunciationParams pronunciations = 1;
@@ -138,7 +138,7 @@ public com.google.cloud.texttospeech.v1beta1.CustomPronunciationParams getPronun
    *
    *
    * <pre>
-   * The pronunciation customizations to be applied.
+   * The pronunciation customizations are applied.
    * </pre>
    *
    * <code>repeated .google.cloud.texttospeech.v1beta1.CustomPronunciationParams pronunciations = 1;
@@ -565,7 +565,7 @@ private void ensurePronunciationsIsMutable() {
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -585,7 +585,7 @@ private void ensurePronunciationsIsMutable() {
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -604,7 +604,7 @@ public int getPronunciationsCount() {
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -624,7 +624,7 @@ public com.google.cloud.texttospeech.v1beta1.CustomPronunciationParams getPronun
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -650,7 +650,7 @@ public Builder setPronunciations(
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -674,7 +674,7 @@ public Builder setPronunciations(
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -700,7 +700,7 @@ public Builder addPronunciations(
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -726,7 +726,7 @@ public Builder addPronunciations(
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -749,7 +749,7 @@ public Builder addPronunciations(
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -773,7 +773,7 @@ public Builder addPronunciations(
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -798,7 +798,7 @@ public Builder addAllPronunciations(
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -820,7 +820,7 @@ public Builder clearPronunciations() {
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -842,7 +842,7 @@ public Builder removePronunciations(int index) {
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -858,7 +858,7 @@ public Builder removePronunciations(int index) {
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -878,7 +878,7 @@ public Builder removePronunciations(int index) {
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -899,7 +899,7 @@ public Builder removePronunciations(int index) {
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -917,7 +917,7 @@ public Builder removePronunciations(int index) {
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
@@ -936,7 +936,7 @@ public Builder removePronunciations(int index) {
      *
      *
      * <pre>
-     * The pronunciation customizations to be applied.
+     * The pronunciation customizations are applied.
      * </pre>
      *
      * <code>
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationsOrBuilder.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationsOrBuilder.java
index 6be9b64d7dde..71a5c81640dd 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationsOrBuilder.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomPronunciationsOrBuilder.java
@@ -28,7 +28,7 @@ public interface CustomPronunciationsOrBuilder
    *
    *
    * <pre>
-   * The pronunciation customizations to be applied.
+   * The pronunciation customizations are applied.
    * </pre>
    *
    * <code>repeated .google.cloud.texttospeech.v1beta1.CustomPronunciationParams pronunciations = 1;
@@ -41,7 +41,7 @@ public interface CustomPronunciationsOrBuilder
    *
    *
    * <pre>
-   * The pronunciation customizations to be applied.
+   * The pronunciation customizations are applied.
    * </pre>
    *
    * <code>repeated .google.cloud.texttospeech.v1beta1.CustomPronunciationParams pronunciations = 1;
@@ -53,7 +53,7 @@ public interface CustomPronunciationsOrBuilder
    *
    *
    * <pre>
-   * The pronunciation customizations to be applied.
+   * The pronunciation customizations are applied.
    * </pre>
    *
    * <code>repeated .google.cloud.texttospeech.v1beta1.CustomPronunciationParams pronunciations = 1;
@@ -65,7 +65,7 @@ public interface CustomPronunciationsOrBuilder
    *
    *
    * <pre>
-   * The pronunciation customizations to be applied.
+   * The pronunciation customizations are applied.
    * </pre>
    *
    * <code>repeated .google.cloud.texttospeech.v1beta1.CustomPronunciationParams pronunciations = 1;
@@ -78,7 +78,7 @@ public interface CustomPronunciationsOrBuilder
    *
    *
    * <pre>
-   * The pronunciation customizations to be applied.
+   * The pronunciation customizations are applied.
    * </pre>
    *
    * <code>repeated .google.cloud.texttospeech.v1beta1.CustomPronunciationParams pronunciations = 1;
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomVoiceParams.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomVoiceParams.java
index c66403384689..54ee242cb3c3 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomVoiceParams.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomVoiceParams.java
@@ -305,7 +305,7 @@ public com.google.protobuf.ByteString getModelBytes() {
    * </code>
    *
    * @deprecated google.cloud.texttospeech.v1beta1.CustomVoiceParams.reported_usage is deprecated.
-   *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=402
+   *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=435
    * @return The enum numeric value on the wire for reportedUsage.
    */
   @java.lang.Override
@@ -326,7 +326,7 @@ public int getReportedUsageValue() {
    * </code>
    *
    * @deprecated google.cloud.texttospeech.v1beta1.CustomVoiceParams.reported_usage is deprecated.
-   *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=402
+   *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=435
    * @return The reportedUsage.
    */
   @java.lang.Override
@@ -847,7 +847,7 @@ public Builder setModelBytes(com.google.protobuf.ByteString value) {
      * </code>
      *
      * @deprecated google.cloud.texttospeech.v1beta1.CustomVoiceParams.reported_usage is deprecated.
-     *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=402
+     *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=435
      * @return The enum numeric value on the wire for reportedUsage.
      */
     @java.lang.Override
@@ -868,7 +868,7 @@ public int getReportedUsageValue() {
      * </code>
      *
      * @deprecated google.cloud.texttospeech.v1beta1.CustomVoiceParams.reported_usage is deprecated.
-     *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=402
+     *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=435
      * @param value The enum numeric value on the wire for reportedUsage to set.
      * @return This builder for chaining.
      */
@@ -892,7 +892,7 @@ public Builder setReportedUsageValue(int value) {
      * </code>
      *
      * @deprecated google.cloud.texttospeech.v1beta1.CustomVoiceParams.reported_usage is deprecated.
-     *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=402
+     *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=435
      * @return The reportedUsage.
      */
     @java.lang.Override
@@ -919,7 +919,7 @@ public Builder setReportedUsageValue(int value) {
      * </code>
      *
      * @deprecated google.cloud.texttospeech.v1beta1.CustomVoiceParams.reported_usage is deprecated.
-     *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=402
+     *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=435
      * @param value The reportedUsage to set.
      * @return This builder for chaining.
      */
@@ -947,7 +947,7 @@ public Builder setReportedUsage(
      * </code>
      *
      * @deprecated google.cloud.texttospeech.v1beta1.CustomVoiceParams.reported_usage is deprecated.
-     *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=402
+     *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=435
      * @return This builder for chaining.
      */
     @java.lang.Deprecated
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomVoiceParamsOrBuilder.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomVoiceParamsOrBuilder.java
index d770e563d83c..37c84e2e78fb 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomVoiceParamsOrBuilder.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/CustomVoiceParamsOrBuilder.java
@@ -66,7 +66,7 @@ public interface CustomVoiceParamsOrBuilder
    * </code>
    *
    * @deprecated google.cloud.texttospeech.v1beta1.CustomVoiceParams.reported_usage is deprecated.
-   *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=402
+   *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=435
    * @return The enum numeric value on the wire for reportedUsage.
    */
   @java.lang.Deprecated
@@ -84,7 +84,7 @@ public interface CustomVoiceParamsOrBuilder
    * </code>
    *
    * @deprecated google.cloud.texttospeech.v1beta1.CustomVoiceParams.reported_usage is deprecated.
-   *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=402
+   *     See google/cloud/texttospeech/v1beta1/cloud_tts.proto;l=435
    * @return The reportedUsage.
    */
   @java.lang.Deprecated
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/MultiSpeakerMarkup.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/MultiSpeakerMarkup.java
index 7a9150f04736..39bb3c016c71 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/MultiSpeakerMarkup.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/MultiSpeakerMarkup.java
@@ -128,7 +128,7 @@ public interface TurnOrBuilder
    *
    *
    * <pre>
-   * A Multi-speaker turn.
+   * A multi-speaker turn.
    * </pre>
    *
    * Protobuf type {@code google.cloud.texttospeech.v1beta1.MultiSpeakerMarkup.Turn}
@@ -453,7 +453,7 @@ protected Builder newBuilderForType(
      *
      *
      * <pre>
-     * A Multi-speaker turn.
+     * A multi-speaker turn.
      * </pre>
      *
      * Protobuf type {@code google.cloud.texttospeech.v1beta1.MultiSpeakerMarkup.Turn}
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingAudioConfig.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingAudioConfig.java
index 494faedc0b48..1e116b757595 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingAudioConfig.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingAudioConfig.java
@@ -72,8 +72,8 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() {
    *
    * <pre>
    * Required. The format of the audio byte stream.
-   * For now, streaming only supports PCM and OGG_OPUS. All other encodings
-   * will return an error.
+   * Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
+   * return an error.
    * </pre>
    *
    * <code>
@@ -92,8 +92,8 @@ public int getAudioEncodingValue() {
    *
    * <pre>
    * Required. The format of the audio byte stream.
-   * For now, streaming only supports PCM and OGG_OPUS. All other encodings
-   * will return an error.
+   * Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
+   * return an error.
    * </pre>
    *
    * <code>
@@ -130,6 +130,30 @@ public int getSampleRateHertz() {
     return sampleRateHertz_;
   }
 
+  public static final int SPEAKING_RATE_FIELD_NUMBER = 3;
+  private double speakingRate_ = 0D;
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
+   * the normal native speed supported by the specific voice. 2.0 is twice as
+   * fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
+   * speed. Any other values &lt; 0.25 or &gt; 2.0 will return an error.
+   * </pre>
+   *
+   * <code>
+   * double speaking_rate = 3 [(.google.api.field_behavior) = INPUT_ONLY, (.google.api.field_behavior) = OPTIONAL];
+   * </code>
+   *
+   * @return The speakingRate.
+   */
+  @java.lang.Override
+  public double getSpeakingRate() {
+    return speakingRate_;
+  }
+
   private byte memoizedIsInitialized = -1;
 
   @java.lang.Override
@@ -152,6 +176,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io
     if (sampleRateHertz_ != 0) {
       output.writeInt32(2, sampleRateHertz_);
     }
+    if (java.lang.Double.doubleToRawLongBits(speakingRate_) != 0) {
+      output.writeDouble(3, speakingRate_);
+    }
     getUnknownFields().writeTo(output);
   }
 
@@ -169,6 +196,9 @@ public int getSerializedSize() {
     if (sampleRateHertz_ != 0) {
       size += com.google.protobuf.CodedOutputStream.computeInt32Size(2, sampleRateHertz_);
     }
+    if (java.lang.Double.doubleToRawLongBits(speakingRate_) != 0) {
+      size += com.google.protobuf.CodedOutputStream.computeDoubleSize(3, speakingRate_);
+    }
     size += getUnknownFields().getSerializedSize();
     memoizedSize = size;
     return size;
@@ -187,6 +217,8 @@ public boolean equals(final java.lang.Object obj) {
 
     if (audioEncoding_ != other.audioEncoding_) return false;
     if (getSampleRateHertz() != other.getSampleRateHertz()) return false;
+    if (java.lang.Double.doubleToLongBits(getSpeakingRate())
+        != java.lang.Double.doubleToLongBits(other.getSpeakingRate())) return false;
     if (!getUnknownFields().equals(other.getUnknownFields())) return false;
     return true;
   }
@@ -202,6 +234,11 @@ public int hashCode() {
     hash = (53 * hash) + audioEncoding_;
     hash = (37 * hash) + SAMPLE_RATE_HERTZ_FIELD_NUMBER;
     hash = (53 * hash) + getSampleRateHertz();
+    hash = (37 * hash) + SPEAKING_RATE_FIELD_NUMBER;
+    hash =
+        (53 * hash)
+            + com.google.protobuf.Internal.hashLong(
+                java.lang.Double.doubleToLongBits(getSpeakingRate()));
     hash = (29 * hash) + getUnknownFields().hashCode();
     memoizedHashCode = hash;
     return hash;
@@ -345,6 +382,7 @@ public Builder clear() {
       bitField0_ = 0;
       audioEncoding_ = 0;
       sampleRateHertz_ = 0;
+      speakingRate_ = 0D;
       return this;
     }
 
@@ -387,6 +425,9 @@ private void buildPartial0(com.google.cloud.texttospeech.v1beta1.StreamingAudioC
       if (((from_bitField0_ & 0x00000002) != 0)) {
         result.sampleRateHertz_ = sampleRateHertz_;
       }
+      if (((from_bitField0_ & 0x00000004) != 0)) {
+        result.speakingRate_ = speakingRate_;
+      }
     }
 
     @java.lang.Override
@@ -441,6 +482,9 @@ public Builder mergeFrom(com.google.cloud.texttospeech.v1beta1.StreamingAudioCon
       if (other.getSampleRateHertz() != 0) {
         setSampleRateHertz(other.getSampleRateHertz());
       }
+      if (other.getSpeakingRate() != 0D) {
+        setSpeakingRate(other.getSpeakingRate());
+      }
       this.mergeUnknownFields(other.getUnknownFields());
       onChanged();
       return this;
@@ -479,6 +523,12 @@ public Builder mergeFrom(
                 bitField0_ |= 0x00000002;
                 break;
               } // case 16
+            case 25:
+              {
+                speakingRate_ = input.readDouble();
+                bitField0_ |= 0x00000004;
+                break;
+              } // case 25
             default:
               {
                 if (!super.parseUnknownField(input, extensionRegistry, tag)) {
@@ -505,8 +555,8 @@ public Builder mergeFrom(
      *
      * <pre>
      * Required. The format of the audio byte stream.
-     * For now, streaming only supports PCM and OGG_OPUS. All other encodings
-     * will return an error.
+     * Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
+     * return an error.
      * </pre>
      *
      * <code>
@@ -525,8 +575,8 @@ public int getAudioEncodingValue() {
      *
      * <pre>
      * Required. The format of the audio byte stream.
-     * For now, streaming only supports PCM and OGG_OPUS. All other encodings
-     * will return an error.
+     * Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
+     * return an error.
      * </pre>
      *
      * <code>
@@ -548,8 +598,8 @@ public Builder setAudioEncodingValue(int value) {
      *
      * <pre>
      * Required. The format of the audio byte stream.
-     * For now, streaming only supports PCM and OGG_OPUS. All other encodings
-     * will return an error.
+     * Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
+     * return an error.
      * </pre>
      *
      * <code>
@@ -572,8 +622,8 @@ public com.google.cloud.texttospeech.v1beta1.AudioEncoding getAudioEncoding() {
      *
      * <pre>
      * Required. The format of the audio byte stream.
-     * For now, streaming only supports PCM and OGG_OPUS. All other encodings
-     * will return an error.
+     * Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
+     * return an error.
      * </pre>
      *
      * <code>
@@ -598,8 +648,8 @@ public Builder setAudioEncoding(com.google.cloud.texttospeech.v1beta1.AudioEncod
      *
      * <pre>
      * Required. The format of the audio byte stream.
-     * For now, streaming only supports PCM and OGG_OPUS. All other encodings
-     * will return an error.
+     * Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
+     * return an error.
      * </pre>
      *
      * <code>
@@ -671,6 +721,77 @@ public Builder clearSampleRateHertz() {
       return this;
     }
 
+    private double speakingRate_;
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
+     * the normal native speed supported by the specific voice. 2.0 is twice as
+     * fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
+     * speed. Any other values &lt; 0.25 or &gt; 2.0 will return an error.
+     * </pre>
+     *
+     * <code>
+     * double speaking_rate = 3 [(.google.api.field_behavior) = INPUT_ONLY, (.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     *
+     * @return The speakingRate.
+     */
+    @java.lang.Override
+    public double getSpeakingRate() {
+      return speakingRate_;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
+     * the normal native speed supported by the specific voice. 2.0 is twice as
+     * fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
+     * speed. Any other values &lt; 0.25 or &gt; 2.0 will return an error.
+     * </pre>
+     *
+     * <code>
+     * double speaking_rate = 3 [(.google.api.field_behavior) = INPUT_ONLY, (.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     *
+     * @param value The speakingRate to set.
+     * @return This builder for chaining.
+     */
+    public Builder setSpeakingRate(double value) {
+
+      speakingRate_ = value;
+      bitField0_ |= 0x00000004;
+      onChanged();
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
+     * the normal native speed supported by the specific voice. 2.0 is twice as
+     * fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
+     * speed. Any other values &lt; 0.25 or &gt; 2.0 will return an error.
+     * </pre>
+     *
+     * <code>
+     * double speaking_rate = 3 [(.google.api.field_behavior) = INPUT_ONLY, (.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     *
+     * @return This builder for chaining.
+     */
+    public Builder clearSpeakingRate() {
+      bitField0_ = (bitField0_ & ~0x00000004);
+      speakingRate_ = 0D;
+      onChanged();
+      return this;
+    }
+
     @java.lang.Override
     public final Builder setUnknownFields(final com.google.protobuf.UnknownFieldSet unknownFields) {
       return super.setUnknownFields(unknownFields);
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingAudioConfigOrBuilder.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingAudioConfigOrBuilder.java
index 5fc35ab1c5aa..554858757e18 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingAudioConfigOrBuilder.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingAudioConfigOrBuilder.java
@@ -29,8 +29,8 @@ public interface StreamingAudioConfigOrBuilder
    *
    * <pre>
    * Required. The format of the audio byte stream.
-   * For now, streaming only supports PCM and OGG_OPUS. All other encodings
-   * will return an error.
+   * Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
+   * return an error.
    * </pre>
    *
    * <code>
@@ -46,8 +46,8 @@ public interface StreamingAudioConfigOrBuilder
    *
    * <pre>
    * Required. The format of the audio byte stream.
-   * For now, streaming only supports PCM and OGG_OPUS. All other encodings
-   * will return an error.
+   * Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
+   * return an error.
    * </pre>
    *
    * <code>
@@ -70,4 +70,22 @@ public interface StreamingAudioConfigOrBuilder
    * @return The sampleRateHertz.
    */
   int getSampleRateHertz();
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
+   * the normal native speed supported by the specific voice. 2.0 is twice as
+   * fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
+   * speed. Any other values &lt; 0.25 or &gt; 2.0 will return an error.
+   * </pre>
+   *
+   * <code>
+   * double speaking_rate = 3 [(.google.api.field_behavior) = INPUT_ONLY, (.google.api.field_behavior) = OPTIONAL];
+   * </code>
+   *
+   * @return The speakingRate.
+   */
+  double getSpeakingRate();
 }
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesisInput.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesisInput.java
index 213e8170aa7b..1de63ec338c2 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesisInput.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesisInput.java
@@ -39,7 +39,9 @@ private StreamingSynthesisInput(com.google.protobuf.GeneratedMessageV3.Builder<?
     super(builder);
   }
 
-  private StreamingSynthesisInput() {}
+  private StreamingSynthesisInput() {
+    prompt_ = "";
+  }
 
   @java.lang.Override
   @SuppressWarnings({"unused"})
@@ -62,6 +64,7 @@ public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() {
             com.google.cloud.texttospeech.v1beta1.StreamingSynthesisInput.Builder.class);
   }
 
+  private int bitField0_;
   private int inputSourceCase_ = 0;
 
   @SuppressWarnings("serial")
@@ -72,6 +75,7 @@ public enum InputSourceCase
           com.google.protobuf.Internal.EnumLite,
           com.google.protobuf.AbstractMessage.InternalOneOfEnum {
     TEXT(1),
+    MARKUP(5),
     INPUTSOURCE_NOT_SET(0);
     private final int value;
 
@@ -93,6 +97,8 @@ public static InputSourceCase forNumber(int value) {
       switch (value) {
         case 1:
           return TEXT;
+        case 5:
+          return MARKUP;
         case 0:
           return INPUTSOURCE_NOT_SET;
         default:
@@ -116,9 +122,8 @@ public InputSourceCase getInputSourceCase() {
    *
    * <pre>
    * The raw text to be synthesized. It is recommended that each input
-   * contains complete, terminating sentences, as this will likely result in
-   * better prosody in the output audio. That being said, users are free to
-   * input text however they please.
+   * contains complete, terminating sentences, which results in better prosody
+   * in the output audio.
    * </pre>
    *
    * <code>string text = 1;</code>
@@ -134,9 +139,8 @@ public boolean hasText() {
    *
    * <pre>
    * The raw text to be synthesized. It is recommended that each input
-   * contains complete, terminating sentences, as this will likely result in
-   * better prosody in the output audio. That being said, users are free to
-   * input text however they please.
+   * contains complete, terminating sentences, which results in better prosody
+   * in the output audio.
    * </pre>
    *
    * <code>string text = 1;</code>
@@ -165,9 +169,8 @@ public java.lang.String getText() {
    *
    * <pre>
    * The raw text to be synthesized. It is recommended that each input
-   * contains complete, terminating sentences, as this will likely result in
-   * better prosody in the output audio. That being said, users are free to
-   * input text however they please.
+   * contains complete, terminating sentences, which results in better prosody
+   * in the output audio.
    * </pre>
    *
    * <code>string text = 1;</code>
@@ -191,6 +194,151 @@ public com.google.protobuf.ByteString getTextBytes() {
     }
   }
 
+  public static final int MARKUP_FIELD_NUMBER = 5;
+
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return Whether the markup field is set.
+   */
+  public boolean hasMarkup() {
+    return inputSourceCase_ == 5;
+  }
+
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return The markup.
+   */
+  public java.lang.String getMarkup() {
+    java.lang.Object ref = "";
+    if (inputSourceCase_ == 5) {
+      ref = inputSource_;
+    }
+    if (ref instanceof java.lang.String) {
+      return (java.lang.String) ref;
+    } else {
+      com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref;
+      java.lang.String s = bs.toStringUtf8();
+      if (inputSourceCase_ == 5) {
+        inputSource_ = s;
+      }
+      return s;
+    }
+  }
+
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return The bytes for markup.
+   */
+  public com.google.protobuf.ByteString getMarkupBytes() {
+    java.lang.Object ref = "";
+    if (inputSourceCase_ == 5) {
+      ref = inputSource_;
+    }
+    if (ref instanceof java.lang.String) {
+      com.google.protobuf.ByteString b =
+          com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref);
+      if (inputSourceCase_ == 5) {
+        inputSource_ = b;
+      }
+      return b;
+    } else {
+      return (com.google.protobuf.ByteString) ref;
+    }
+  }
+
+  public static final int PROMPT_FIELD_NUMBER = 6;
+
+  @SuppressWarnings("serial")
+  private volatile java.lang.Object prompt_ = "";
+
+  /**
+   *
+   *
+   * <pre>
+   * This is system instruction supported only for controllable voice models.
+   * </pre>
+   *
+   * <code>optional string prompt = 6;</code>
+   *
+   * @return Whether the prompt field is set.
+   */
+  @java.lang.Override
+  public boolean hasPrompt() {
+    return ((bitField0_ & 0x00000001) != 0);
+  }
+
+  /**
+   *
+   *
+   * <pre>
+   * This is system instruction supported only for controllable voice models.
+   * </pre>
+   *
+   * <code>optional string prompt = 6;</code>
+   *
+   * @return The prompt.
+   */
+  @java.lang.Override
+  public java.lang.String getPrompt() {
+    java.lang.Object ref = prompt_;
+    if (ref instanceof java.lang.String) {
+      return (java.lang.String) ref;
+    } else {
+      com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref;
+      java.lang.String s = bs.toStringUtf8();
+      prompt_ = s;
+      return s;
+    }
+  }
+
+  /**
+   *
+   *
+   * <pre>
+   * This is system instruction supported only for controllable voice models.
+   * </pre>
+   *
+   * <code>optional string prompt = 6;</code>
+   *
+   * @return The bytes for prompt.
+   */
+  @java.lang.Override
+  public com.google.protobuf.ByteString getPromptBytes() {
+    java.lang.Object ref = prompt_;
+    if (ref instanceof java.lang.String) {
+      com.google.protobuf.ByteString b =
+          com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref);
+      prompt_ = b;
+      return b;
+    } else {
+      return (com.google.protobuf.ByteString) ref;
+    }
+  }
+
   private byte memoizedIsInitialized = -1;
 
   @java.lang.Override
@@ -208,6 +356,12 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io
     if (inputSourceCase_ == 1) {
       com.google.protobuf.GeneratedMessageV3.writeString(output, 1, inputSource_);
     }
+    if (inputSourceCase_ == 5) {
+      com.google.protobuf.GeneratedMessageV3.writeString(output, 5, inputSource_);
+    }
+    if (((bitField0_ & 0x00000001) != 0)) {
+      com.google.protobuf.GeneratedMessageV3.writeString(output, 6, prompt_);
+    }
     getUnknownFields().writeTo(output);
   }
 
@@ -220,6 +374,12 @@ public int getSerializedSize() {
     if (inputSourceCase_ == 1) {
       size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, inputSource_);
     }
+    if (inputSourceCase_ == 5) {
+      size += com.google.protobuf.GeneratedMessageV3.computeStringSize(5, inputSource_);
+    }
+    if (((bitField0_ & 0x00000001) != 0)) {
+      size += com.google.protobuf.GeneratedMessageV3.computeStringSize(6, prompt_);
+    }
     size += getUnknownFields().getSerializedSize();
     memoizedSize = size;
     return size;
@@ -236,11 +396,18 @@ public boolean equals(final java.lang.Object obj) {
     com.google.cloud.texttospeech.v1beta1.StreamingSynthesisInput other =
         (com.google.cloud.texttospeech.v1beta1.StreamingSynthesisInput) obj;
 
+    if (hasPrompt() != other.hasPrompt()) return false;
+    if (hasPrompt()) {
+      if (!getPrompt().equals(other.getPrompt())) return false;
+    }
     if (!getInputSourceCase().equals(other.getInputSourceCase())) return false;
     switch (inputSourceCase_) {
       case 1:
         if (!getText().equals(other.getText())) return false;
         break;
+      case 5:
+        if (!getMarkup().equals(other.getMarkup())) return false;
+        break;
       case 0:
       default:
     }
@@ -255,11 +422,19 @@ public int hashCode() {
     }
     int hash = 41;
     hash = (19 * hash) + getDescriptor().hashCode();
+    if (hasPrompt()) {
+      hash = (37 * hash) + PROMPT_FIELD_NUMBER;
+      hash = (53 * hash) + getPrompt().hashCode();
+    }
     switch (inputSourceCase_) {
       case 1:
         hash = (37 * hash) + TEXT_FIELD_NUMBER;
         hash = (53 * hash) + getText().hashCode();
         break;
+      case 5:
+        hash = (37 * hash) + MARKUP_FIELD_NUMBER;
+        hash = (53 * hash) + getMarkup().hashCode();
+        break;
       case 0:
       default:
     }
@@ -404,6 +579,7 @@ private Builder(com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
     public Builder clear() {
       super.clear();
       bitField0_ = 0;
+      prompt_ = "";
       inputSourceCase_ = 0;
       inputSource_ = null;
       return this;
@@ -445,6 +621,12 @@ public com.google.cloud.texttospeech.v1beta1.StreamingSynthesisInput buildPartia
     private void buildPartial0(
         com.google.cloud.texttospeech.v1beta1.StreamingSynthesisInput result) {
       int from_bitField0_ = bitField0_;
+      int to_bitField0_ = 0;
+      if (((from_bitField0_ & 0x00000004) != 0)) {
+        result.prompt_ = prompt_;
+        to_bitField0_ |= 0x00000001;
+      }
+      result.bitField0_ |= to_bitField0_;
     }
 
     private void buildPartialOneofs(
@@ -500,6 +682,11 @@ public Builder mergeFrom(com.google.cloud.texttospeech.v1beta1.StreamingSynthesi
       if (other
           == com.google.cloud.texttospeech.v1beta1.StreamingSynthesisInput.getDefaultInstance())
         return this;
+      if (other.hasPrompt()) {
+        prompt_ = other.prompt_;
+        bitField0_ |= 0x00000004;
+        onChanged();
+      }
       switch (other.getInputSourceCase()) {
         case TEXT:
           {
@@ -508,6 +695,13 @@ public Builder mergeFrom(com.google.cloud.texttospeech.v1beta1.StreamingSynthesi
             onChanged();
             break;
           }
+        case MARKUP:
+          {
+            inputSourceCase_ = 5;
+            inputSource_ = other.inputSource_;
+            onChanged();
+            break;
+          }
         case INPUTSOURCE_NOT_SET:
           {
             break;
@@ -546,6 +740,19 @@ public Builder mergeFrom(
                 inputSource_ = s;
                 break;
               } // case 10
+            case 42:
+              {
+                java.lang.String s = input.readStringRequireUtf8();
+                inputSourceCase_ = 5;
+                inputSource_ = s;
+                break;
+              } // case 42
+            case 50:
+              {
+                prompt_ = input.readStringRequireUtf8();
+                bitField0_ |= 0x00000004;
+                break;
+              } // case 50
             default:
               {
                 if (!super.parseUnknownField(input, extensionRegistry, tag)) {
@@ -584,9 +791,8 @@ public Builder clearInputSource() {
      *
      * <pre>
      * The raw text to be synthesized. It is recommended that each input
-     * contains complete, terminating sentences, as this will likely result in
-     * better prosody in the output audio. That being said, users are free to
-     * input text however they please.
+     * contains complete, terminating sentences, which results in better prosody
+     * in the output audio.
      * </pre>
      *
      * <code>string text = 1;</code>
@@ -603,9 +809,8 @@ public boolean hasText() {
      *
      * <pre>
      * The raw text to be synthesized. It is recommended that each input
-     * contains complete, terminating sentences, as this will likely result in
-     * better prosody in the output audio. That being said, users are free to
-     * input text however they please.
+     * contains complete, terminating sentences, which results in better prosody
+     * in the output audio.
      * </pre>
      *
      * <code>string text = 1;</code>
@@ -635,9 +840,8 @@ public java.lang.String getText() {
      *
      * <pre>
      * The raw text to be synthesized. It is recommended that each input
-     * contains complete, terminating sentences, as this will likely result in
-     * better prosody in the output audio. That being said, users are free to
-     * input text however they please.
+     * contains complete, terminating sentences, which results in better prosody
+     * in the output audio.
      * </pre>
      *
      * <code>string text = 1;</code>
@@ -667,9 +871,8 @@ public com.google.protobuf.ByteString getTextBytes() {
      *
      * <pre>
      * The raw text to be synthesized. It is recommended that each input
-     * contains complete, terminating sentences, as this will likely result in
-     * better prosody in the output audio. That being said, users are free to
-     * input text however they please.
+     * contains complete, terminating sentences, which results in better prosody
+     * in the output audio.
      * </pre>
      *
      * <code>string text = 1;</code>
@@ -692,9 +895,8 @@ public Builder setText(java.lang.String value) {
      *
      * <pre>
      * The raw text to be synthesized. It is recommended that each input
-     * contains complete, terminating sentences, as this will likely result in
-     * better prosody in the output audio. That being said, users are free to
-     * input text however they please.
+     * contains complete, terminating sentences, which results in better prosody
+     * in the output audio.
      * </pre>
      *
      * <code>string text = 1;</code>
@@ -715,9 +917,8 @@ public Builder clearText() {
      *
      * <pre>
      * The raw text to be synthesized. It is recommended that each input
-     * contains complete, terminating sentences, as this will likely result in
-     * better prosody in the output audio. That being said, users are free to
-     * input text however they please.
+     * contains complete, terminating sentences, which results in better prosody
+     * in the output audio.
      * </pre>
      *
      * <code>string text = 1;</code>
@@ -736,6 +937,277 @@ public Builder setTextBytes(com.google.protobuf.ByteString value) {
       return this;
     }
 
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @return Whether the markup field is set.
+     */
+    @java.lang.Override
+    public boolean hasMarkup() {
+      return inputSourceCase_ == 5;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @return The markup.
+     */
+    @java.lang.Override
+    public java.lang.String getMarkup() {
+      java.lang.Object ref = "";
+      if (inputSourceCase_ == 5) {
+        ref = inputSource_;
+      }
+      if (!(ref instanceof java.lang.String)) {
+        com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        if (inputSourceCase_ == 5) {
+          inputSource_ = s;
+        }
+        return s;
+      } else {
+        return (java.lang.String) ref;
+      }
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @return The bytes for markup.
+     */
+    @java.lang.Override
+    public com.google.protobuf.ByteString getMarkupBytes() {
+      java.lang.Object ref = "";
+      if (inputSourceCase_ == 5) {
+        ref = inputSource_;
+      }
+      if (ref instanceof String) {
+        com.google.protobuf.ByteString b =
+            com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref);
+        if (inputSourceCase_ == 5) {
+          inputSource_ = b;
+        }
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @param value The markup to set.
+     * @return This builder for chaining.
+     */
+    public Builder setMarkup(java.lang.String value) {
+      if (value == null) {
+        throw new NullPointerException();
+      }
+      inputSourceCase_ = 5;
+      inputSource_ = value;
+      onChanged();
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @return This builder for chaining.
+     */
+    public Builder clearMarkup() {
+      if (inputSourceCase_ == 5) {
+        inputSourceCase_ = 0;
+        inputSource_ = null;
+        onChanged();
+      }
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @param value The bytes for markup to set.
+     * @return This builder for chaining.
+     */
+    public Builder setMarkupBytes(com.google.protobuf.ByteString value) {
+      if (value == null) {
+        throw new NullPointerException();
+      }
+      checkByteStringIsUtf8(value);
+      inputSourceCase_ = 5;
+      inputSource_ = value;
+      onChanged();
+      return this;
+    }
+
+    private java.lang.Object prompt_ = "";
+
+    /**
+     *
+     *
+     * <pre>
+     * This is system instruction supported only for controllable voice models.
+     * </pre>
+     *
+     * <code>optional string prompt = 6;</code>
+     *
+     * @return Whether the prompt field is set.
+     */
+    public boolean hasPrompt() {
+      return ((bitField0_ & 0x00000004) != 0);
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * This is system instruction supported only for controllable voice models.
+     * </pre>
+     *
+     * <code>optional string prompt = 6;</code>
+     *
+     * @return The prompt.
+     */
+    public java.lang.String getPrompt() {
+      java.lang.Object ref = prompt_;
+      if (!(ref instanceof java.lang.String)) {
+        com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        prompt_ = s;
+        return s;
+      } else {
+        return (java.lang.String) ref;
+      }
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * This is system instruction supported only for controllable voice models.
+     * </pre>
+     *
+     * <code>optional string prompt = 6;</code>
+     *
+     * @return The bytes for prompt.
+     */
+    public com.google.protobuf.ByteString getPromptBytes() {
+      java.lang.Object ref = prompt_;
+      if (ref instanceof String) {
+        com.google.protobuf.ByteString b =
+            com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref);
+        prompt_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * This is system instruction supported only for controllable voice models.
+     * </pre>
+     *
+     * <code>optional string prompt = 6;</code>
+     *
+     * @param value The prompt to set.
+     * @return This builder for chaining.
+     */
+    public Builder setPrompt(java.lang.String value) {
+      if (value == null) {
+        throw new NullPointerException();
+      }
+      prompt_ = value;
+      bitField0_ |= 0x00000004;
+      onChanged();
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * This is system instruction supported only for controllable voice models.
+     * </pre>
+     *
+     * <code>optional string prompt = 6;</code>
+     *
+     * @return This builder for chaining.
+     */
+    public Builder clearPrompt() {
+      prompt_ = getDefaultInstance().getPrompt();
+      bitField0_ = (bitField0_ & ~0x00000004);
+      onChanged();
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * This is system instruction supported only for controllable voice models.
+     * </pre>
+     *
+     * <code>optional string prompt = 6;</code>
+     *
+     * @param value The bytes for prompt to set.
+     * @return This builder for chaining.
+     */
+    public Builder setPromptBytes(com.google.protobuf.ByteString value) {
+      if (value == null) {
+        throw new NullPointerException();
+      }
+      checkByteStringIsUtf8(value);
+      prompt_ = value;
+      bitField0_ |= 0x00000004;
+      onChanged();
+      return this;
+    }
+
     @java.lang.Override
     public final Builder setUnknownFields(final com.google.protobuf.UnknownFieldSet unknownFields) {
       return super.setUnknownFields(unknownFields);
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesisInputOrBuilder.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesisInputOrBuilder.java
index 9aa5f2b75cd5..793efa2ed3b8 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesisInputOrBuilder.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesisInputOrBuilder.java
@@ -29,9 +29,8 @@ public interface StreamingSynthesisInputOrBuilder
    *
    * <pre>
    * The raw text to be synthesized. It is recommended that each input
-   * contains complete, terminating sentences, as this will likely result in
-   * better prosody in the output audio. That being said, users are free to
-   * input text however they please.
+   * contains complete, terminating sentences, which results in better prosody
+   * in the output audio.
    * </pre>
    *
    * <code>string text = 1;</code>
@@ -45,9 +44,8 @@ public interface StreamingSynthesisInputOrBuilder
    *
    * <pre>
    * The raw text to be synthesized. It is recommended that each input
-   * contains complete, terminating sentences, as this will likely result in
-   * better prosody in the output audio. That being said, users are free to
-   * input text however they please.
+   * contains complete, terminating sentences, which results in better prosody
+   * in the output audio.
    * </pre>
    *
    * <code>string text = 1;</code>
@@ -61,9 +59,8 @@ public interface StreamingSynthesisInputOrBuilder
    *
    * <pre>
    * The raw text to be synthesized. It is recommended that each input
-   * contains complete, terminating sentences, as this will likely result in
-   * better prosody in the output audio. That being said, users are free to
-   * input text however they please.
+   * contains complete, terminating sentences, which results in better prosody
+   * in the output audio.
    * </pre>
    *
    * <code>string text = 1;</code>
@@ -72,6 +69,87 @@ public interface StreamingSynthesisInputOrBuilder
    */
   com.google.protobuf.ByteString getTextBytes();
 
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return Whether the markup field is set.
+   */
+  boolean hasMarkup();
+
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return The markup.
+   */
+  java.lang.String getMarkup();
+
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return The bytes for markup.
+   */
+  com.google.protobuf.ByteString getMarkupBytes();
+
+  /**
+   *
+   *
+   * <pre>
+   * This is system instruction supported only for controllable voice models.
+   * </pre>
+   *
+   * <code>optional string prompt = 6;</code>
+   *
+   * @return Whether the prompt field is set.
+   */
+  boolean hasPrompt();
+
+  /**
+   *
+   *
+   * <pre>
+   * This is system instruction supported only for controllable voice models.
+   * </pre>
+   *
+   * <code>optional string prompt = 6;</code>
+   *
+   * @return The prompt.
+   */
+  java.lang.String getPrompt();
+
+  /**
+   *
+   *
+   * <pre>
+   * This is system instruction supported only for controllable voice models.
+   * </pre>
+   *
+   * <code>optional string prompt = 6;</code>
+   *
+   * @return The bytes for prompt.
+   */
+  com.google.protobuf.ByteString getPromptBytes();
+
   com.google.cloud.texttospeech.v1beta1.StreamingSynthesisInput.InputSourceCase
       getInputSourceCase();
 }
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesizeConfig.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesizeConfig.java
index 206782e70a8c..c523ade03557 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesizeConfig.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesizeConfig.java
@@ -182,6 +182,93 @@ public com.google.cloud.texttospeech.v1beta1.StreamingAudioConfig getStreamingAu
         : streamingAudioConfig_;
   }
 
+  public static final int CUSTOM_PRONUNCIATIONS_FIELD_NUMBER = 5;
+  private com.google.cloud.texttospeech.v1beta1.CustomPronunciations customPronunciations_;
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
+   * customizations.
+   *
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
+   *
+   * In order to customize the pronunciation of a phrase, there must be an exact
+   * match of the phrase in the input types. If using SSML, the phrase must not
+   * be inside a phoneme tag.
+   * </pre>
+   *
+   * <code>
+   * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+   * </code>
+   *
+   * @return Whether the customPronunciations field is set.
+   */
+  @java.lang.Override
+  public boolean hasCustomPronunciations() {
+    return ((bitField0_ & 0x00000004) != 0);
+  }
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
+   * customizations.
+   *
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
+   *
+   * In order to customize the pronunciation of a phrase, there must be an exact
+   * match of the phrase in the input types. If using SSML, the phrase must not
+   * be inside a phoneme tag.
+   * </pre>
+   *
+   * <code>
+   * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+   * </code>
+   *
+   * @return The customPronunciations.
+   */
+  @java.lang.Override
+  public com.google.cloud.texttospeech.v1beta1.CustomPronunciations getCustomPronunciations() {
+    return customPronunciations_ == null
+        ? com.google.cloud.texttospeech.v1beta1.CustomPronunciations.getDefaultInstance()
+        : customPronunciations_;
+  }
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
+   * customizations.
+   *
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
+   *
+   * In order to customize the pronunciation of a phrase, there must be an exact
+   * match of the phrase in the input types. If using SSML, the phrase must not
+   * be inside a phoneme tag.
+   * </pre>
+   *
+   * <code>
+   * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+   * </code>
+   */
+  @java.lang.Override
+  public com.google.cloud.texttospeech.v1beta1.CustomPronunciationsOrBuilder
+      getCustomPronunciationsOrBuilder() {
+    return customPronunciations_ == null
+        ? com.google.cloud.texttospeech.v1beta1.CustomPronunciations.getDefaultInstance()
+        : customPronunciations_;
+  }
+
   private byte memoizedIsInitialized = -1;
 
   @java.lang.Override
@@ -202,6 +289,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io
     if (((bitField0_ & 0x00000002) != 0)) {
       output.writeMessage(4, getStreamingAudioConfig());
     }
+    if (((bitField0_ & 0x00000004) != 0)) {
+      output.writeMessage(5, getCustomPronunciations());
+    }
     getUnknownFields().writeTo(output);
   }
 
@@ -218,6 +308,10 @@ public int getSerializedSize() {
       size +=
           com.google.protobuf.CodedOutputStream.computeMessageSize(4, getStreamingAudioConfig());
     }
+    if (((bitField0_ & 0x00000004) != 0)) {
+      size +=
+          com.google.protobuf.CodedOutputStream.computeMessageSize(5, getCustomPronunciations());
+    }
     size += getUnknownFields().getSerializedSize();
     memoizedSize = size;
     return size;
@@ -242,6 +336,10 @@ public boolean equals(final java.lang.Object obj) {
     if (hasStreamingAudioConfig()) {
       if (!getStreamingAudioConfig().equals(other.getStreamingAudioConfig())) return false;
     }
+    if (hasCustomPronunciations() != other.hasCustomPronunciations()) return false;
+    if (hasCustomPronunciations()) {
+      if (!getCustomPronunciations().equals(other.getCustomPronunciations())) return false;
+    }
     if (!getUnknownFields().equals(other.getUnknownFields())) return false;
     return true;
   }
@@ -261,6 +359,10 @@ public int hashCode() {
       hash = (37 * hash) + STREAMING_AUDIO_CONFIG_FIELD_NUMBER;
       hash = (53 * hash) + getStreamingAudioConfig().hashCode();
     }
+    if (hasCustomPronunciations()) {
+      hash = (37 * hash) + CUSTOM_PRONUNCIATIONS_FIELD_NUMBER;
+      hash = (53 * hash) + getCustomPronunciations().hashCode();
+    }
     hash = (29 * hash) + getUnknownFields().hashCode();
     memoizedHashCode = hash;
     return hash;
@@ -405,6 +507,7 @@ private void maybeForceBuilderInitialization() {
       if (com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders) {
         getVoiceFieldBuilder();
         getStreamingAudioConfigFieldBuilder();
+        getCustomPronunciationsFieldBuilder();
       }
     }
 
@@ -422,6 +525,11 @@ public Builder clear() {
         streamingAudioConfigBuilder_.dispose();
         streamingAudioConfigBuilder_ = null;
       }
+      customPronunciations_ = null;
+      if (customPronunciationsBuilder_ != null) {
+        customPronunciationsBuilder_.dispose();
+        customPronunciationsBuilder_ = null;
+      }
       return this;
     }
 
@@ -472,6 +580,13 @@ private void buildPartial0(
                 : streamingAudioConfigBuilder_.build();
         to_bitField0_ |= 0x00000002;
       }
+      if (((from_bitField0_ & 0x00000004) != 0)) {
+        result.customPronunciations_ =
+            customPronunciationsBuilder_ == null
+                ? customPronunciations_
+                : customPronunciationsBuilder_.build();
+        to_bitField0_ |= 0x00000004;
+      }
       result.bitField0_ |= to_bitField0_;
     }
 
@@ -529,6 +644,9 @@ public Builder mergeFrom(
       if (other.hasStreamingAudioConfig()) {
         mergeStreamingAudioConfig(other.getStreamingAudioConfig());
       }
+      if (other.hasCustomPronunciations()) {
+        mergeCustomPronunciations(other.getCustomPronunciations());
+      }
       this.mergeUnknownFields(other.getUnknownFields());
       onChanged();
       return this;
@@ -568,6 +686,13 @@ public Builder mergeFrom(
                 bitField0_ |= 0x00000002;
                 break;
               } // case 34
+            case 42:
+              {
+                input.readMessage(
+                    getCustomPronunciationsFieldBuilder().getBuilder(), extensionRegistry);
+                bitField0_ |= 0x00000004;
+                break;
+              } // case 42
             default:
               {
                 if (!super.parseUnknownField(input, extensionRegistry, tag)) {
@@ -1021,6 +1146,306 @@ public Builder clearStreamingAudioConfig() {
       return streamingAudioConfigBuilder_;
     }
 
+    private com.google.cloud.texttospeech.v1beta1.CustomPronunciations customPronunciations_;
+    private com.google.protobuf.SingleFieldBuilderV3<
+            com.google.cloud.texttospeech.v1beta1.CustomPronunciations,
+            com.google.cloud.texttospeech.v1beta1.CustomPronunciations.Builder,
+            com.google.cloud.texttospeech.v1beta1.CustomPronunciationsOrBuilder>
+        customPronunciationsBuilder_;
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
+     * customizations.
+     *
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
+     *
+     * In order to customize the pronunciation of a phrase, there must be an exact
+     * match of the phrase in the input types. If using SSML, the phrase must not
+     * be inside a phoneme tag.
+     * </pre>
+     *
+     * <code>
+     * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     *
+     * @return Whether the customPronunciations field is set.
+     */
+    public boolean hasCustomPronunciations() {
+      return ((bitField0_ & 0x00000004) != 0);
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
+     * customizations.
+     *
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
+     *
+     * In order to customize the pronunciation of a phrase, there must be an exact
+     * match of the phrase in the input types. If using SSML, the phrase must not
+     * be inside a phoneme tag.
+     * </pre>
+     *
+     * <code>
+     * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     *
+     * @return The customPronunciations.
+     */
+    public com.google.cloud.texttospeech.v1beta1.CustomPronunciations getCustomPronunciations() {
+      if (customPronunciationsBuilder_ == null) {
+        return customPronunciations_ == null
+            ? com.google.cloud.texttospeech.v1beta1.CustomPronunciations.getDefaultInstance()
+            : customPronunciations_;
+      } else {
+        return customPronunciationsBuilder_.getMessage();
+      }
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
+     * customizations.
+     *
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
+     *
+     * In order to customize the pronunciation of a phrase, there must be an exact
+     * match of the phrase in the input types. If using SSML, the phrase must not
+     * be inside a phoneme tag.
+     * </pre>
+     *
+     * <code>
+     * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     */
+    public Builder setCustomPronunciations(
+        com.google.cloud.texttospeech.v1beta1.CustomPronunciations value) {
+      if (customPronunciationsBuilder_ == null) {
+        if (value == null) {
+          throw new NullPointerException();
+        }
+        customPronunciations_ = value;
+      } else {
+        customPronunciationsBuilder_.setMessage(value);
+      }
+      bitField0_ |= 0x00000004;
+      onChanged();
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
+     * customizations.
+     *
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
+     *
+     * In order to customize the pronunciation of a phrase, there must be an exact
+     * match of the phrase in the input types. If using SSML, the phrase must not
+     * be inside a phoneme tag.
+     * </pre>
+     *
+     * <code>
+     * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     */
+    public Builder setCustomPronunciations(
+        com.google.cloud.texttospeech.v1beta1.CustomPronunciations.Builder builderForValue) {
+      if (customPronunciationsBuilder_ == null) {
+        customPronunciations_ = builderForValue.build();
+      } else {
+        customPronunciationsBuilder_.setMessage(builderForValue.build());
+      }
+      bitField0_ |= 0x00000004;
+      onChanged();
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
+     * customizations.
+     *
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
+     *
+     * In order to customize the pronunciation of a phrase, there must be an exact
+     * match of the phrase in the input types. If using SSML, the phrase must not
+     * be inside a phoneme tag.
+     * </pre>
+     *
+     * <code>
+     * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     */
+    public Builder mergeCustomPronunciations(
+        com.google.cloud.texttospeech.v1beta1.CustomPronunciations value) {
+      if (customPronunciationsBuilder_ == null) {
+        if (((bitField0_ & 0x00000004) != 0)
+            && customPronunciations_ != null
+            && customPronunciations_
+                != com.google.cloud.texttospeech.v1beta1.CustomPronunciations
+                    .getDefaultInstance()) {
+          getCustomPronunciationsBuilder().mergeFrom(value);
+        } else {
+          customPronunciations_ = value;
+        }
+      } else {
+        customPronunciationsBuilder_.mergeFrom(value);
+      }
+      if (customPronunciations_ != null) {
+        bitField0_ |= 0x00000004;
+        onChanged();
+      }
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
+     * customizations.
+     *
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
+     *
+     * In order to customize the pronunciation of a phrase, there must be an exact
+     * match of the phrase in the input types. If using SSML, the phrase must not
+     * be inside a phoneme tag.
+     * </pre>
+     *
+     * <code>
+     * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     */
+    public Builder clearCustomPronunciations() {
+      bitField0_ = (bitField0_ & ~0x00000004);
+      customPronunciations_ = null;
+      if (customPronunciationsBuilder_ != null) {
+        customPronunciationsBuilder_.dispose();
+        customPronunciationsBuilder_ = null;
+      }
+      onChanged();
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
+     * customizations.
+     *
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
+     *
+     * In order to customize the pronunciation of a phrase, there must be an exact
+     * match of the phrase in the input types. If using SSML, the phrase must not
+     * be inside a phoneme tag.
+     * </pre>
+     *
+     * <code>
+     * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     */
+    public com.google.cloud.texttospeech.v1beta1.CustomPronunciations.Builder
+        getCustomPronunciationsBuilder() {
+      bitField0_ |= 0x00000004;
+      onChanged();
+      return getCustomPronunciationsFieldBuilder().getBuilder();
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
+     * customizations.
+     *
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
+     *
+     * In order to customize the pronunciation of a phrase, there must be an exact
+     * match of the phrase in the input types. If using SSML, the phrase must not
+     * be inside a phoneme tag.
+     * </pre>
+     *
+     * <code>
+     * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     */
+    public com.google.cloud.texttospeech.v1beta1.CustomPronunciationsOrBuilder
+        getCustomPronunciationsOrBuilder() {
+      if (customPronunciationsBuilder_ != null) {
+        return customPronunciationsBuilder_.getMessageOrBuilder();
+      } else {
+        return customPronunciations_ == null
+            ? com.google.cloud.texttospeech.v1beta1.CustomPronunciations.getDefaultInstance()
+            : customPronunciations_;
+      }
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
+     * customizations.
+     *
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
+     *
+     * In order to customize the pronunciation of a phrase, there must be an exact
+     * match of the phrase in the input types. If using SSML, the phrase must not
+     * be inside a phoneme tag.
+     * </pre>
+     *
+     * <code>
+     * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+     * </code>
+     */
+    private com.google.protobuf.SingleFieldBuilderV3<
+            com.google.cloud.texttospeech.v1beta1.CustomPronunciations,
+            com.google.cloud.texttospeech.v1beta1.CustomPronunciations.Builder,
+            com.google.cloud.texttospeech.v1beta1.CustomPronunciationsOrBuilder>
+        getCustomPronunciationsFieldBuilder() {
+      if (customPronunciationsBuilder_ == null) {
+        customPronunciationsBuilder_ =
+            new com.google.protobuf.SingleFieldBuilderV3<
+                com.google.cloud.texttospeech.v1beta1.CustomPronunciations,
+                com.google.cloud.texttospeech.v1beta1.CustomPronunciations.Builder,
+                com.google.cloud.texttospeech.v1beta1.CustomPronunciationsOrBuilder>(
+                getCustomPronunciations(), getParentForChildren(), isClean());
+        customPronunciations_ = null;
+      }
+      return customPronunciationsBuilder_;
+    }
+
     @java.lang.Override
     public final Builder setUnknownFields(final com.google.protobuf.UnknownFieldSet unknownFields) {
       return super.setUnknownFields(unknownFields);
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesizeConfigOrBuilder.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesizeConfigOrBuilder.java
index cc325e3c7b25..e3c6ea93c016 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesizeConfigOrBuilder.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/StreamingSynthesizeConfigOrBuilder.java
@@ -110,4 +110,75 @@ public interface StreamingSynthesizeConfigOrBuilder
    */
   com.google.cloud.texttospeech.v1beta1.StreamingAudioConfigOrBuilder
       getStreamingAudioConfigOrBuilder();
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
+   * customizations.
+   *
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
+   *
+   * In order to customize the pronunciation of a phrase, there must be an exact
+   * match of the phrase in the input types. If using SSML, the phrase must not
+   * be inside a phoneme tag.
+   * </pre>
+   *
+   * <code>
+   * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+   * </code>
+   *
+   * @return Whether the customPronunciations field is set.
+   */
+  boolean hasCustomPronunciations();
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
+   * customizations.
+   *
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
+   *
+   * In order to customize the pronunciation of a phrase, there must be an exact
+   * match of the phrase in the input types. If using SSML, the phrase must not
+   * be inside a phoneme tag.
+   * </pre>
+   *
+   * <code>
+   * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+   * </code>
+   *
+   * @return The customPronunciations.
+   */
+  com.google.cloud.texttospeech.v1beta1.CustomPronunciations getCustomPronunciations();
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
+   * customizations.
+   *
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
+   *
+   * In order to customize the pronunciation of a phrase, there must be an exact
+   * match of the phrase in the input types. If using SSML, the phrase must not
+   * be inside a phoneme tag.
+   * </pre>
+   *
+   * <code>
+   * .google.cloud.texttospeech.v1beta1.CustomPronunciations custom_pronunciations = 5 [(.google.api.field_behavior) = OPTIONAL];
+   * </code>
+   */
+  com.google.cloud.texttospeech.v1beta1.CustomPronunciationsOrBuilder
+      getCustomPronunciationsOrBuilder();
 }
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/SynthesisInput.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/SynthesisInput.java
index df509343e9d0..372e7933134f 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/SynthesisInput.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/SynthesisInput.java
@@ -76,6 +76,7 @@ public enum InputSourceCase
           com.google.protobuf.Internal.EnumLite,
           com.google.protobuf.AbstractMessage.InternalOneOfEnum {
     TEXT(1),
+    MARKUP(5),
     SSML(2),
     MULTI_SPEAKER_MARKUP(4),
     INPUTSOURCE_NOT_SET(0);
@@ -99,6 +100,8 @@ public static InputSourceCase forNumber(int value) {
       switch (value) {
         case 1:
           return TEXT;
+        case 5:
+          return MARKUP;
         case 2:
           return SSML;
         case 4:
@@ -192,6 +195,82 @@ public com.google.protobuf.ByteString getTextBytes() {
     }
   }
 
+  public static final int MARKUP_FIELD_NUMBER = 5;
+
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return Whether the markup field is set.
+   */
+  public boolean hasMarkup() {
+    return inputSourceCase_ == 5;
+  }
+
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return The markup.
+   */
+  public java.lang.String getMarkup() {
+    java.lang.Object ref = "";
+    if (inputSourceCase_ == 5) {
+      ref = inputSource_;
+    }
+    if (ref instanceof java.lang.String) {
+      return (java.lang.String) ref;
+    } else {
+      com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref;
+      java.lang.String s = bs.toStringUtf8();
+      if (inputSourceCase_ == 5) {
+        inputSource_ = s;
+      }
+      return s;
+    }
+  }
+
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return The bytes for markup.
+   */
+  public com.google.protobuf.ByteString getMarkupBytes() {
+    java.lang.Object ref = "";
+    if (inputSourceCase_ == 5) {
+      ref = inputSource_;
+    }
+    if (ref instanceof java.lang.String) {
+      com.google.protobuf.ByteString b =
+          com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref);
+      if (inputSourceCase_ == 5) {
+        inputSource_ = b;
+      }
+      return b;
+    } else {
+      return (com.google.protobuf.ByteString) ref;
+    }
+  }
+
   public static final int SSML_FIELD_NUMBER = 2;
 
   /**
@@ -342,18 +421,16 @@ public com.google.cloud.texttospeech.v1beta1.MultiSpeakerMarkup getMultiSpeakerM
    *
    *
    * <pre>
-   * Optional. The pronunciation customizations to be applied to the input. If
-   * this is set, the input will be synthesized using the given pronunciation
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
    * customizations.
    *
-   * The initial support will be for EFIGS (English, French,
-   * Italian, German, Spanish) languages, as provided in
-   * VoiceSelectionParams. Journey and Instant Clone voices are
-   * not supported yet.
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
    *
    * In order to customize the pronunciation of a phrase, there must be an exact
    * match of the phrase in the input types. If using SSML, the phrase must not
-   * be inside a phoneme tag (entirely or partially).
+   * be inside a phoneme tag.
    * </pre>
    *
    * <code>
@@ -371,18 +448,16 @@ public boolean hasCustomPronunciations() {
    *
    *
    * <pre>
-   * Optional. The pronunciation customizations to be applied to the input. If
-   * this is set, the input will be synthesized using the given pronunciation
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
    * customizations.
    *
-   * The initial support will be for EFIGS (English, French,
-   * Italian, German, Spanish) languages, as provided in
-   * VoiceSelectionParams. Journey and Instant Clone voices are
-   * not supported yet.
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
    *
    * In order to customize the pronunciation of a phrase, there must be an exact
    * match of the phrase in the input types. If using SSML, the phrase must not
-   * be inside a phoneme tag (entirely or partially).
+   * be inside a phoneme tag.
    * </pre>
    *
    * <code>
@@ -402,18 +477,16 @@ public com.google.cloud.texttospeech.v1beta1.CustomPronunciations getCustomPronu
    *
    *
    * <pre>
-   * Optional. The pronunciation customizations to be applied to the input. If
-   * this is set, the input will be synthesized using the given pronunciation
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
    * customizations.
    *
-   * The initial support will be for EFIGS (English, French,
-   * Italian, German, Spanish) languages, as provided in
-   * VoiceSelectionParams. Journey and Instant Clone voices are
-   * not supported yet.
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
    *
    * In order to customize the pronunciation of a phrase, there must be an exact
    * match of the phrase in the input types. If using SSML, the phrase must not
-   * be inside a phoneme tag (entirely or partially).
+   * be inside a phoneme tag.
    * </pre>
    *
    * <code>
@@ -455,6 +528,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io
       output.writeMessage(
           4, (com.google.cloud.texttospeech.v1beta1.MultiSpeakerMarkup) inputSource_);
     }
+    if (inputSourceCase_ == 5) {
+      com.google.protobuf.GeneratedMessageV3.writeString(output, 5, inputSource_);
+    }
     getUnknownFields().writeTo(output);
   }
 
@@ -479,6 +555,9 @@ public int getSerializedSize() {
           com.google.protobuf.CodedOutputStream.computeMessageSize(
               4, (com.google.cloud.texttospeech.v1beta1.MultiSpeakerMarkup) inputSource_);
     }
+    if (inputSourceCase_ == 5) {
+      size += com.google.protobuf.GeneratedMessageV3.computeStringSize(5, inputSource_);
+    }
     size += getUnknownFields().getSerializedSize();
     memoizedSize = size;
     return size;
@@ -504,6 +583,9 @@ public boolean equals(final java.lang.Object obj) {
       case 1:
         if (!getText().equals(other.getText())) return false;
         break;
+      case 5:
+        if (!getMarkup().equals(other.getMarkup())) return false;
+        break;
       case 2:
         if (!getSsml().equals(other.getSsml())) return false;
         break;
@@ -533,6 +615,10 @@ public int hashCode() {
         hash = (37 * hash) + TEXT_FIELD_NUMBER;
         hash = (53 * hash) + getText().hashCode();
         break;
+      case 5:
+        hash = (37 * hash) + MARKUP_FIELD_NUMBER;
+        hash = (53 * hash) + getMarkup().hashCode();
+        break;
       case 2:
         hash = (37 * hash) + SSML_FIELD_NUMBER;
         hash = (53 * hash) + getSsml().hashCode();
@@ -744,7 +830,7 @@ public com.google.cloud.texttospeech.v1beta1.SynthesisInput buildPartial() {
     private void buildPartial0(com.google.cloud.texttospeech.v1beta1.SynthesisInput result) {
       int from_bitField0_ = bitField0_;
       int to_bitField0_ = 0;
-      if (((from_bitField0_ & 0x00000008) != 0)) {
+      if (((from_bitField0_ & 0x00000010) != 0)) {
         result.customPronunciations_ =
             customPronunciationsBuilder_ == null
                 ? customPronunciations_
@@ -819,6 +905,13 @@ public Builder mergeFrom(com.google.cloud.texttospeech.v1beta1.SynthesisInput ot
             onChanged();
             break;
           }
+        case MARKUP:
+          {
+            inputSourceCase_ = 5;
+            inputSource_ = other.inputSource_;
+            onChanged();
+            break;
+          }
         case SSML:
           {
             inputSourceCase_ = 2;
@@ -880,7 +973,7 @@ public Builder mergeFrom(
               {
                 input.readMessage(
                     getCustomPronunciationsFieldBuilder().getBuilder(), extensionRegistry);
-                bitField0_ |= 0x00000008;
+                bitField0_ |= 0x00000010;
                 break;
               } // case 26
             case 34:
@@ -890,6 +983,13 @@ public Builder mergeFrom(
                 inputSourceCase_ = 4;
                 break;
               } // case 34
+            case 42:
+              {
+                java.lang.String s = input.readStringRequireUtf8();
+                inputSourceCase_ = 5;
+                inputSource_ = s;
+                break;
+              } // case 42
             default:
               {
                 if (!super.parseUnknownField(input, extensionRegistry, tag)) {
@@ -1062,6 +1162,151 @@ public Builder setTextBytes(com.google.protobuf.ByteString value) {
       return this;
     }
 
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @return Whether the markup field is set.
+     */
+    @java.lang.Override
+    public boolean hasMarkup() {
+      return inputSourceCase_ == 5;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @return The markup.
+     */
+    @java.lang.Override
+    public java.lang.String getMarkup() {
+      java.lang.Object ref = "";
+      if (inputSourceCase_ == 5) {
+        ref = inputSource_;
+      }
+      if (!(ref instanceof java.lang.String)) {
+        com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        if (inputSourceCase_ == 5) {
+          inputSource_ = s;
+        }
+        return s;
+      } else {
+        return (java.lang.String) ref;
+      }
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @return The bytes for markup.
+     */
+    @java.lang.Override
+    public com.google.protobuf.ByteString getMarkupBytes() {
+      java.lang.Object ref = "";
+      if (inputSourceCase_ == 5) {
+        ref = inputSource_;
+      }
+      if (ref instanceof String) {
+        com.google.protobuf.ByteString b =
+            com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref);
+        if (inputSourceCase_ == 5) {
+          inputSource_ = b;
+        }
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @param value The markup to set.
+     * @return This builder for chaining.
+     */
+    public Builder setMarkup(java.lang.String value) {
+      if (value == null) {
+        throw new NullPointerException();
+      }
+      inputSourceCase_ = 5;
+      inputSource_ = value;
+      onChanged();
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @return This builder for chaining.
+     */
+    public Builder clearMarkup() {
+      if (inputSourceCase_ == 5) {
+        inputSourceCase_ = 0;
+        inputSource_ = null;
+        onChanged();
+      }
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Markup for HD voices specifically. This field may not be used with any
+     * other voices.
+     * </pre>
+     *
+     * <code>string markup = 5;</code>
+     *
+     * @param value The bytes for markup to set.
+     * @return This builder for chaining.
+     */
+    public Builder setMarkupBytes(com.google.protobuf.ByteString value) {
+      if (value == null) {
+        throw new NullPointerException();
+      }
+      checkByteStringIsUtf8(value);
+      inputSourceCase_ = 5;
+      inputSource_ = value;
+      onChanged();
+      return this;
+    }
+
     /**
      *
      *
@@ -1469,18 +1714,16 @@ public Builder clearMultiSpeakerMarkup() {
      *
      *
      * <pre>
-     * Optional. The pronunciation customizations to be applied to the input. If
-     * this is set, the input will be synthesized using the given pronunciation
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
      * customizations.
      *
-     * The initial support will be for EFIGS (English, French,
-     * Italian, German, Spanish) languages, as provided in
-     * VoiceSelectionParams. Journey and Instant Clone voices are
-     * not supported yet.
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
      *
      * In order to customize the pronunciation of a phrase, there must be an exact
      * match of the phrase in the input types. If using SSML, the phrase must not
-     * be inside a phoneme tag (entirely or partially).
+     * be inside a phoneme tag.
      * </pre>
      *
      * <code>
@@ -1490,25 +1733,23 @@ public Builder clearMultiSpeakerMarkup() {
      * @return Whether the customPronunciations field is set.
      */
     public boolean hasCustomPronunciations() {
-      return ((bitField0_ & 0x00000008) != 0);
+      return ((bitField0_ & 0x00000010) != 0);
     }
 
     /**
      *
      *
      * <pre>
-     * Optional. The pronunciation customizations to be applied to the input. If
-     * this is set, the input will be synthesized using the given pronunciation
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
      * customizations.
      *
-     * The initial support will be for EFIGS (English, French,
-     * Italian, German, Spanish) languages, as provided in
-     * VoiceSelectionParams. Journey and Instant Clone voices are
-     * not supported yet.
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
      *
      * In order to customize the pronunciation of a phrase, there must be an exact
      * match of the phrase in the input types. If using SSML, the phrase must not
-     * be inside a phoneme tag (entirely or partially).
+     * be inside a phoneme tag.
      * </pre>
      *
      * <code>
@@ -1531,18 +1772,16 @@ public com.google.cloud.texttospeech.v1beta1.CustomPronunciations getCustomPronu
      *
      *
      * <pre>
-     * Optional. The pronunciation customizations to be applied to the input. If
-     * this is set, the input will be synthesized using the given pronunciation
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
      * customizations.
      *
-     * The initial support will be for EFIGS (English, French,
-     * Italian, German, Spanish) languages, as provided in
-     * VoiceSelectionParams. Journey and Instant Clone voices are
-     * not supported yet.
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
      *
      * In order to customize the pronunciation of a phrase, there must be an exact
      * match of the phrase in the input types. If using SSML, the phrase must not
-     * be inside a phoneme tag (entirely or partially).
+     * be inside a phoneme tag.
      * </pre>
      *
      * <code>
@@ -1559,7 +1798,7 @@ public Builder setCustomPronunciations(
       } else {
         customPronunciationsBuilder_.setMessage(value);
       }
-      bitField0_ |= 0x00000008;
+      bitField0_ |= 0x00000010;
       onChanged();
       return this;
     }
@@ -1568,18 +1807,16 @@ public Builder setCustomPronunciations(
      *
      *
      * <pre>
-     * Optional. The pronunciation customizations to be applied to the input. If
-     * this is set, the input will be synthesized using the given pronunciation
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
      * customizations.
      *
-     * The initial support will be for EFIGS (English, French,
-     * Italian, German, Spanish) languages, as provided in
-     * VoiceSelectionParams. Journey and Instant Clone voices are
-     * not supported yet.
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
      *
      * In order to customize the pronunciation of a phrase, there must be an exact
      * match of the phrase in the input types. If using SSML, the phrase must not
-     * be inside a phoneme tag (entirely or partially).
+     * be inside a phoneme tag.
      * </pre>
      *
      * <code>
@@ -1593,7 +1830,7 @@ public Builder setCustomPronunciations(
       } else {
         customPronunciationsBuilder_.setMessage(builderForValue.build());
       }
-      bitField0_ |= 0x00000008;
+      bitField0_ |= 0x00000010;
       onChanged();
       return this;
     }
@@ -1602,18 +1839,16 @@ public Builder setCustomPronunciations(
      *
      *
      * <pre>
-     * Optional. The pronunciation customizations to be applied to the input. If
-     * this is set, the input will be synthesized using the given pronunciation
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
      * customizations.
      *
-     * The initial support will be for EFIGS (English, French,
-     * Italian, German, Spanish) languages, as provided in
-     * VoiceSelectionParams. Journey and Instant Clone voices are
-     * not supported yet.
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
      *
      * In order to customize the pronunciation of a phrase, there must be an exact
      * match of the phrase in the input types. If using SSML, the phrase must not
-     * be inside a phoneme tag (entirely or partially).
+     * be inside a phoneme tag.
      * </pre>
      *
      * <code>
@@ -1623,7 +1858,7 @@ public Builder setCustomPronunciations(
     public Builder mergeCustomPronunciations(
         com.google.cloud.texttospeech.v1beta1.CustomPronunciations value) {
       if (customPronunciationsBuilder_ == null) {
-        if (((bitField0_ & 0x00000008) != 0)
+        if (((bitField0_ & 0x00000010) != 0)
             && customPronunciations_ != null
             && customPronunciations_
                 != com.google.cloud.texttospeech.v1beta1.CustomPronunciations
@@ -1636,7 +1871,7 @@ public Builder mergeCustomPronunciations(
         customPronunciationsBuilder_.mergeFrom(value);
       }
       if (customPronunciations_ != null) {
-        bitField0_ |= 0x00000008;
+        bitField0_ |= 0x00000010;
         onChanged();
       }
       return this;
@@ -1646,18 +1881,16 @@ public Builder mergeCustomPronunciations(
      *
      *
      * <pre>
-     * Optional. The pronunciation customizations to be applied to the input. If
-     * this is set, the input will be synthesized using the given pronunciation
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
      * customizations.
      *
-     * The initial support will be for EFIGS (English, French,
-     * Italian, German, Spanish) languages, as provided in
-     * VoiceSelectionParams. Journey and Instant Clone voices are
-     * not supported yet.
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
      *
      * In order to customize the pronunciation of a phrase, there must be an exact
      * match of the phrase in the input types. If using SSML, the phrase must not
-     * be inside a phoneme tag (entirely or partially).
+     * be inside a phoneme tag.
      * </pre>
      *
      * <code>
@@ -1665,7 +1898,7 @@ public Builder mergeCustomPronunciations(
      * </code>
      */
     public Builder clearCustomPronunciations() {
-      bitField0_ = (bitField0_ & ~0x00000008);
+      bitField0_ = (bitField0_ & ~0x00000010);
       customPronunciations_ = null;
       if (customPronunciationsBuilder_ != null) {
         customPronunciationsBuilder_.dispose();
@@ -1679,18 +1912,16 @@ public Builder clearCustomPronunciations() {
      *
      *
      * <pre>
-     * Optional. The pronunciation customizations to be applied to the input. If
-     * this is set, the input will be synthesized using the given pronunciation
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
      * customizations.
      *
-     * The initial support will be for EFIGS (English, French,
-     * Italian, German, Spanish) languages, as provided in
-     * VoiceSelectionParams. Journey and Instant Clone voices are
-     * not supported yet.
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
      *
      * In order to customize the pronunciation of a phrase, there must be an exact
      * match of the phrase in the input types. If using SSML, the phrase must not
-     * be inside a phoneme tag (entirely or partially).
+     * be inside a phoneme tag.
      * </pre>
      *
      * <code>
@@ -1699,7 +1930,7 @@ public Builder clearCustomPronunciations() {
      */
     public com.google.cloud.texttospeech.v1beta1.CustomPronunciations.Builder
         getCustomPronunciationsBuilder() {
-      bitField0_ |= 0x00000008;
+      bitField0_ |= 0x00000010;
       onChanged();
       return getCustomPronunciationsFieldBuilder().getBuilder();
     }
@@ -1708,18 +1939,16 @@ public Builder clearCustomPronunciations() {
      *
      *
      * <pre>
-     * Optional. The pronunciation customizations to be applied to the input. If
-     * this is set, the input will be synthesized using the given pronunciation
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
      * customizations.
      *
-     * The initial support will be for EFIGS (English, French,
-     * Italian, German, Spanish) languages, as provided in
-     * VoiceSelectionParams. Journey and Instant Clone voices are
-     * not supported yet.
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
      *
      * In order to customize the pronunciation of a phrase, there must be an exact
      * match of the phrase in the input types. If using SSML, the phrase must not
-     * be inside a phoneme tag (entirely or partially).
+     * be inside a phoneme tag.
      * </pre>
      *
      * <code>
@@ -1741,18 +1970,16 @@ public Builder clearCustomPronunciations() {
      *
      *
      * <pre>
-     * Optional. The pronunciation customizations to be applied to the input. If
-     * this is set, the input will be synthesized using the given pronunciation
+     * Optional. The pronunciation customizations are applied to the input. If
+     * this is set, the input is synthesized using the given pronunciation
      * customizations.
      *
-     * The initial support will be for EFIGS (English, French,
-     * Italian, German, Spanish) languages, as provided in
-     * VoiceSelectionParams. Journey and Instant Clone voices are
-     * not supported yet.
+     * The initial support is for en-us, with plans to expand to other locales in
+     * the future. Instant Clone voices aren't supported.
      *
      * In order to customize the pronunciation of a phrase, there must be an exact
      * match of the phrase in the input types. If using SSML, the phrase must not
-     * be inside a phoneme tag (entirely or partially).
+     * be inside a phoneme tag.
      * </pre>
      *
      * <code>
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/SynthesisInputOrBuilder.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/SynthesisInputOrBuilder.java
index 8f27b3476b7f..8e6a92c92b3e 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/SynthesisInputOrBuilder.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/SynthesisInputOrBuilder.java
@@ -63,6 +63,48 @@ public interface SynthesisInputOrBuilder
    */
   com.google.protobuf.ByteString getTextBytes();
 
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return Whether the markup field is set.
+   */
+  boolean hasMarkup();
+
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return The markup.
+   */
+  java.lang.String getMarkup();
+
+  /**
+   *
+   *
+   * <pre>
+   * Markup for HD voices specifically. This field may not be used with any
+   * other voices.
+   * </pre>
+   *
+   * <code>string markup = 5;</code>
+   *
+   * @return The bytes for markup.
+   */
+  com.google.protobuf.ByteString getMarkupBytes();
+
   /**
    *
    *
@@ -159,18 +201,16 @@ public interface SynthesisInputOrBuilder
    *
    *
    * <pre>
-   * Optional. The pronunciation customizations to be applied to the input. If
-   * this is set, the input will be synthesized using the given pronunciation
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
    * customizations.
    *
-   * The initial support will be for EFIGS (English, French,
-   * Italian, German, Spanish) languages, as provided in
-   * VoiceSelectionParams. Journey and Instant Clone voices are
-   * not supported yet.
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
    *
    * In order to customize the pronunciation of a phrase, there must be an exact
    * match of the phrase in the input types. If using SSML, the phrase must not
-   * be inside a phoneme tag (entirely or partially).
+   * be inside a phoneme tag.
    * </pre>
    *
    * <code>
@@ -185,18 +225,16 @@ public interface SynthesisInputOrBuilder
    *
    *
    * <pre>
-   * Optional. The pronunciation customizations to be applied to the input. If
-   * this is set, the input will be synthesized using the given pronunciation
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
    * customizations.
    *
-   * The initial support will be for EFIGS (English, French,
-   * Italian, German, Spanish) languages, as provided in
-   * VoiceSelectionParams. Journey and Instant Clone voices are
-   * not supported yet.
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
    *
    * In order to customize the pronunciation of a phrase, there must be an exact
    * match of the phrase in the input types. If using SSML, the phrase must not
-   * be inside a phoneme tag (entirely or partially).
+   * be inside a phoneme tag.
    * </pre>
    *
    * <code>
@@ -211,18 +249,16 @@ public interface SynthesisInputOrBuilder
    *
    *
    * <pre>
-   * Optional. The pronunciation customizations to be applied to the input. If
-   * this is set, the input will be synthesized using the given pronunciation
+   * Optional. The pronunciation customizations are applied to the input. If
+   * this is set, the input is synthesized using the given pronunciation
    * customizations.
    *
-   * The initial support will be for EFIGS (English, French,
-   * Italian, German, Spanish) languages, as provided in
-   * VoiceSelectionParams. Journey and Instant Clone voices are
-   * not supported yet.
+   * The initial support is for en-us, with plans to expand to other locales in
+   * the future. Instant Clone voices aren't supported.
    *
    * In order to customize the pronunciation of a phrase, there must be an exact
    * match of the phrase in the input types. If using SSML, the phrase must not
-   * be inside a phoneme tag (entirely or partially).
+   * be inside a phoneme tag.
    * </pre>
    *
    * <code>
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechProto.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechProto.java
index cebb0973ca76..212f06d36ed1 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechProto.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/TextToSpeechProto.java
@@ -154,48 +154,52 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
           + "TimepointType\022\036\n"
           + "\032TIMEPOINT_TYPE_UNSPECIFIED\020\000\022\r\n"
           + "\tSSML_MARK\020\001B\031\n"
-          + "\027_advanced_voice_options\"\337\002\n"
+          + "\027_advanced_voice_options\"\247\003\n"
           + "\031CustomPronunciationParams\022\023\n"
           + "\006phrase\030\001 \001(\tH\000\210\001\001\022m\n"
           + "\021phonetic_encoding\030\002 \001(\0162M.google.cloud.texttospeech.v1bet"
           + "a1.CustomPronunciationParams.PhoneticEncodingH\001\210\001\001\022\032\n\r"
-          + "pronunciation\030\003 \001(\tH\002\210\001\001\"o\n"
+          + "pronunciation\030\003 \001(\tH\002\210\001\001\"\266\001\n"
           + "\020PhoneticEncoding\022!\n"
           + "\035PHONETIC_ENCODING_UNSPECIFIED\020\000\022\031\n"
           + "\025PHONETIC_ENCODING_IPA\020\001\022\035\n"
-          + "\031PHONETIC_ENCODING_X_SAMPA\020\002B\t\n"
+          + "\031PHONETIC_ENCODING_X_SAMPA\020\002\022\'\n"
+          + "#PHONETIC_ENCODING_JAPANESE_YOMIGANA\020\003\022\034\n"
+          + "\030PHONETIC_ENCODING_PINYIN\020\004B\t\n"
           + "\007_phraseB\024\n"
           + "\022_phonetic_encodingB\020\n"
           + "\016_pronunciation\"l\n"
           + "\024CustomPronunciations\022T\n"
-          + "\016pronunciations\030\001"
-          + " \003(\0132<.google.cloud.texttospeech.v1beta1.CustomPronunciationParams\"\225\001\n"
+          + "\016pronunciations\030\001 \003"
+          + "(\0132<.google.cloud.texttospeech.v1beta1.CustomPronunciationParams\"\225\001\n"
           + "\022MultiSpeakerMarkup\022N\n"
-          + "\005turns\030\001 \003(\0132:.google."
-          + "cloud.texttospeech.v1beta1.MultiSpeakerMarkup.TurnB\003\340A\002\032/\n"
+          + "\005turns\030\001 \003(\0132:.google.cloud.te"
+          + "xttospeech.v1beta1.MultiSpeakerMarkup.TurnB\003\340A\002\032/\n"
           + "\004Turn\022\024\n"
           + "\007speaker\030\001 \001(\tB\003\340A\002\022\021\n"
-          + "\004text\030\002 \001(\tB\003\340A\002\"\364\001\n"
+          + "\004text\030\002 \001(\tB\003\340A\002\"\206\002\n"
           + "\016SynthesisInput\022\016\n"
-          + "\004text\030\001 \001(\tH\000\022\016\n"
+          + "\004text\030\001 \001(\tH\000\022\020\n"
+          + "\006markup\030\005 \001(\tH\000\022\016\n"
           + "\004ssml\030\002 \001(\tH\000\022U\n"
-          + "\024multi_speaker_markup\030\004 \001(\01325.google.cloud"
-          + ".texttospeech.v1beta1.MultiSpeakerMarkupH\000\022[\n"
-          + "\025custom_pronunciations\030\003 \001(\01327.goog"
-          + "le.cloud.texttospeech.v1beta1.CustomPronunciationsB\003\340A\001B\016\n"
-          + "\014input_source\"\244\002\n"
+          + "\024multi_speaker_markup\030\004 \001(\01325.go"
+          + "ogle.cloud.texttospeech.v1beta1.MultiSpeakerMarkupH\000\022[\n"
+          + "\025custom_pronunciations\030\003 "
+          + "\001(\01327.google.cloud.texttospeech.v1beta1.CustomPronunciationsB\003\340A\001B\016\n"
+          + "\014input_source\"\275\002\n"
           + "\024VoiceSelectionParams\022\032\n\r"
           + "language_code\030\001 \001(\tB\003\340A\002\022\014\n"
           + "\004name\030\002 \001(\t\022G\n"
-          + "\013ssml_gender\030\003 \001(\0162"
-          + "2.google.cloud.texttospeech.v1beta1.SsmlVoiceGender\022J\n"
-          + "\014custom_voice\030\004 \001(\01324.goog"
-          + "le.cloud.texttospeech.v1beta1.CustomVoiceParams\022M\n"
-          + "\013voice_clone\030\005 \001(\01323.google.cl"
-          + "oud.texttospeech.v1beta1.VoiceCloneParamsB\003\340A\001\"\366\001\n"
+          + "\013ssml_gender\030\003"
+          + " \001(\01622.google.cloud.texttospeech.v1beta1.SsmlVoiceGender\022J\n"
+          + "\014custom_voice\030\004 "
+          + "\001(\01324.google.cloud.texttospeech.v1beta1.CustomVoiceParams\022M\n"
+          + "\013voice_clone\030\005 \001(\01323"
+          + ".google.cloud.texttospeech.v1beta1.VoiceCloneParamsB\003\340A\001\022\027\n\n"
+          + "model_name\030\006 \001(\tB\003\340A\001\"\366\001\n"
           + "\013AudioConfig\022M\n"
-          + "\016audio_encoding\030\001"
-          + " \001(\01620.google.cloud.texttospeech.v1beta1.AudioEncodingB\003\340A\002\022\035\n\r"
+          + "\016audio_encoding\030\001 \001("
+          + "\01620.google.cloud.texttospeech.v1beta1.AudioEncodingB\003\340A\002\022\035\n\r"
           + "speaking_rate\030\002 \001(\001B\006\340A\004\340A\001\022\025\n"
           + "\005pitch\030\003 \001(\001B\006\340A\004\340A\001\022\036\n"
           + "\016volume_gain_db\030\004 \001(\001B\006\340A\004\340A\001\022\036\n"
@@ -204,8 +208,8 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
           + "\021CustomVoiceParams\0222\n"
           + "\005model\030\001 \001(\tB#\340A\002\372A\035\n"
           + "\033automl.googleapis.com/Model\022a\n"
-          + "\016reported_usage\030\003 \001(\0162B.goog"
-          + "le.cloud.texttospeech.v1beta1.CustomVoiceParams.ReportedUsageB\005\030\001\340A\001\"J\n\r"
+          + "\016reported_usage\030\003 \001(\0162B.google.cl"
+          + "oud.texttospeech.v1beta1.CustomVoiceParams.ReportedUsageB\005\030\001\340A\001\"J\n\r"
           + "ReportedUsage\022\036\n"
           + "\032REPORTED_USAGE_UNSPECIFIED\020\000\022\014\n"
           + "\010REALTIME\020\001\022\013\n"
@@ -219,24 +223,30 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
           + " \001(\0132..google.cloud.texttospeech.v1beta1.AudioConfig\"4\n"
           + "\tTimepoint\022\021\n"
           + "\tmark_name\030\004 \001(\t\022\024\n"
-          + "\014time_seconds\030\003 \001(\001\"\205\001\n"
+          + "\014time_seconds\030\003 \001(\001\"\244\001\n"
           + "\024StreamingAudioConfig\022M\n"
-          + "\016audio_encoding\030\001"
-          + " \001(\01620.google.cloud.texttospeech.v1beta1.AudioEncodingB\003\340A\002\022\036\n"
-          + "\021sample_rate_hertz\030\002 \001(\005B\003\340A\001\"\306\001\n"
+          + "\016audio_encoding\030\001 \001"
+          + "(\01620.google.cloud.texttospeech.v1beta1.AudioEncodingB\003\340A\002\022\036\n"
+          + "\021sample_rate_hertz\030\002 \001(\005B\003\340A\001\022\035\n\r"
+          + "speaking_rate\030\003 \001(\001B\006\340A\004\340A\001\"\243\002\n"
           + "\031StreamingSynthesizeConfig\022K\n"
-          + "\005voice\030\001 \001(\01327.google.cloud.textt"
-          + "ospeech.v1beta1.VoiceSelectionParamsB\003\340A\002\022\\\n"
-          + "\026streaming_audio_config\030\004 \001(\01327.goog"
-          + "le.cloud.texttospeech.v1beta1.StreamingAudioConfigB\003\340A\001\"9\n"
+          + "\005voice\030\001"
+          + " \001(\01327.google.cloud.texttospeech.v1beta1.VoiceSelectionParamsB\003\340A\002\022\\\n"
+          + "\026streaming_audio_config\030\004 \001(\01327.google.cloud.textt"
+          + "ospeech.v1beta1.StreamingAudioConfigB\003\340A\001\022[\n"
+          + "\025custom_pronunciations\030\005 \001(\01327.googl"
+          + "e.cloud.texttospeech.v1beta1.CustomPronunciationsB\003\340A\001\"k\n"
           + "\027StreamingSynthesisInput\022\016\n"
-          + "\004text\030\001 \001(\tH\000B\016\n"
-          + "\014input_source\"\330\001\n"
+          + "\004text\030\001 \001(\tH\000\022\020\n"
+          + "\006markup\030\005 \001(\tH\000\022\023\n"
+          + "\006prompt\030\006 \001(\tH\001\210\001\001B\016\n"
+          + "\014input_sourceB\t\n"
+          + "\007_prompt\"\330\001\n"
           + "\032StreamingSynthesizeRequest\022X\n"
-          + "\020streaming_config\030\001"
-          + " \001(\0132<.google.cloud.texttospeech.v1beta1.StreamingSynthesizeConfigH\000\022K\n"
-          + "\005input\030\002"
-          + " \001(\0132:.google.cloud.texttospeech.v1beta1.StreamingSynthesisInputH\000B\023\n"
+          + "\020streaming_config\030\001 \001(\0132<.google.cloud.text"
+          + "tospeech.v1beta1.StreamingSynthesizeConfigH\000\022K\n"
+          + "\005input\030\002 \001(\0132:.google.cloud.textt"
+          + "ospeech.v1beta1.StreamingSynthesisInputH\000B\023\n"
           + "\021streaming_request\"4\n"
           + "\033StreamingSynthesizeResponse\022\025\n\r"
           + "audio_content\030\001 \001(\014*W\n"
@@ -244,8 +254,7 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
           + "\035SSML_VOICE_GENDER_UNSPECIFIED\020\000\022\010\n"
           + "\004MALE\020\001\022\n\n"
           + "\006FEMALE\020\002\022\013\n"
-          + "\007NEUTRAL\020\003*\203\001\n"
-          + "\r"
+          + "\007NEUTRAL\020\003*\214\001\n\r"
           + "AudioEncoding\022\036\n"
           + "\032AUDIO_ENCODING_UNSPECIFIED\020\000\022\014\n"
           + "\010LINEAR16\020\001\022\007\n"
@@ -254,26 +263,27 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
           + "\010OGG_OPUS\020\003\022\t\n"
           + "\005MULAW\020\005\022\010\n"
           + "\004ALAW\020\006\022\007\n"
-          + "\003PCM\020\0072\357\004\n"
+          + "\003PCM\020\007\022\007\n"
+          + "\003M4A\020\0102\357\004\n"
           + "\014TextToSpeech\022\242\001\n\n"
-          + "ListVoices\0224.google.cloud.texttospeech.v1beta1.L"
-          + "istVoicesRequest\0325.google.cloud.texttospeech.v1beta1.ListVoicesResponse\"\'\332A\r"
+          + "ListVoices\0224.google.cloud.texttospeech.v1beta1.ListVoicesRequest\0325.goo"
+          + "gle.cloud.texttospeech.v1beta1.ListVoicesResponse\"\'\332A\r"
           + "language_code\202\323\344\223\002\021\022\017/v1beta1/voices\022\313\001\n"
-          + "\020SynthesizeSpeech\022:.google.cloud.texttospeec"
-          + "h.v1beta1.SynthesizeSpeechRequest\032;.google.cloud.texttospeech.v1beta1.Synthesize"
-          + "SpeechResponse\">\332A\030input,voice,audio_con"
-          + "fig\202\323\344\223\002\035\"\030/v1beta1/text:synthesize:\001*\022\232\001\n"
-          + "\023StreamingSynthesize\022=.google.cloud.texttospeech.v1beta1.StreamingSynthesizeRe"
-          + "quest\032>.google.cloud.texttospeech.v1beta"
-          + "1.StreamingSynthesizeResponse\"\000(\0010\001\032O\312A\033"
-          + "texttospeech.googleapis.com\322A.https://www.googleapis.com/auth/cloud-platformB\325\002\n"
-          + "%com.google.cloud.texttospeech.v1beta1B\021TextToSpeechProtoP\001ZIcloud.google.com/go"
-          + "/texttospeech/apiv1beta1/texttospeechpb;"
-          + "texttospeechpb\242\002\004CTTS\252\002!Google.Cloud.Tex"
-          + "tToSpeech.V1Beta1\312\002!Google\\Cloud\\TextToS"
-          + "peech\\V1beta1\352\002$Google::Cloud::TextToSpeech::V1beta1\352AU\n"
-          + "\033automl.googleapis.com/Model\0226projects/{project}/locations/{loca"
-          + "tion}/models/{model}b\006proto3"
+          + "\020SynthesizeSpeech\022:.google.cloud.texttospeech.v1beta1.SynthesizeSp"
+          + "eechRequest\032;.google.cloud.texttospeech."
+          + "v1beta1.SynthesizeSpeechResponse\">\332A\030inp"
+          + "ut,voice,audio_config\202\323\344\223\002\035\"\030/v1beta1/text:synthesize:\001*\022\232\001\n"
+          + "\023StreamingSynthesize\022=.google.cloud.texttospeech.v1beta1.Str"
+          + "eamingSynthesizeRequest\032>.google.cloud.texttospeech.v1beta1.StreamingSynthesizeR"
+          + "esponse\"\000(\0010\001\032O\312A\033texttospeech.googleapi"
+          + "s.com\322A.https://www.googleapis.com/auth/cloud-platformB\325\002\n"
+          + "%com.google.cloud.texttospeech.v1beta1B\021TextToSpeechProtoP\001ZIc"
+          + "loud.google.com/go/texttospeech/apiv1bet"
+          + "a1/texttospeechpb;texttospeechpb\242\002\004CTTS\252"
+          + "\002!Google.Cloud.TextToSpeech.V1Beta1\312\002!Go"
+          + "ogle\\Cloud\\TextToSpeech\\V1beta1\352\002$Google::Cloud::TextToSpeech::V1beta1\352AU\n"
+          + "\033automl.googleapis.com/Model\0226projects/{projec"
+          + "t}/locations/{location}/models/{model}b\006proto3"
     };
     descriptor =
         com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom(
@@ -364,7 +374,7 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
         new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
             internal_static_google_cloud_texttospeech_v1beta1_SynthesisInput_descriptor,
             new java.lang.String[] {
-              "Text", "Ssml", "MultiSpeakerMarkup", "CustomPronunciations", "InputSource",
+              "Text", "Markup", "Ssml", "MultiSpeakerMarkup", "CustomPronunciations", "InputSource",
             });
     internal_static_google_cloud_texttospeech_v1beta1_VoiceSelectionParams_descriptor =
         getDescriptor().getMessageTypes().get(9);
@@ -372,7 +382,7 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
         new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
             internal_static_google_cloud_texttospeech_v1beta1_VoiceSelectionParams_descriptor,
             new java.lang.String[] {
-              "LanguageCode", "Name", "SsmlGender", "CustomVoice", "VoiceClone",
+              "LanguageCode", "Name", "SsmlGender", "CustomVoice", "VoiceClone", "ModelName",
             });
     internal_static_google_cloud_texttospeech_v1beta1_AudioConfig_descriptor =
         getDescriptor().getMessageTypes().get(10);
@@ -425,7 +435,7 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
         new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
             internal_static_google_cloud_texttospeech_v1beta1_StreamingAudioConfig_descriptor,
             new java.lang.String[] {
-              "AudioEncoding", "SampleRateHertz",
+              "AudioEncoding", "SampleRateHertz", "SpeakingRate",
             });
     internal_static_google_cloud_texttospeech_v1beta1_StreamingSynthesizeConfig_descriptor =
         getDescriptor().getMessageTypes().get(16);
@@ -433,7 +443,7 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
         new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
             internal_static_google_cloud_texttospeech_v1beta1_StreamingSynthesizeConfig_descriptor,
             new java.lang.String[] {
-              "Voice", "StreamingAudioConfig",
+              "Voice", "StreamingAudioConfig", "CustomPronunciations",
             });
     internal_static_google_cloud_texttospeech_v1beta1_StreamingSynthesisInput_descriptor =
         getDescriptor().getMessageTypes().get(17);
@@ -441,7 +451,7 @@ public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
         new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
             internal_static_google_cloud_texttospeech_v1beta1_StreamingSynthesisInput_descriptor,
             new java.lang.String[] {
-              "Text", "InputSource",
+              "Text", "Markup", "Prompt", "InputSource",
             });
     internal_static_google_cloud_texttospeech_v1beta1_StreamingSynthesizeRequest_descriptor =
         getDescriptor().getMessageTypes().get(18);
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/VoiceSelectionParams.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/VoiceSelectionParams.java
index a03d28c85186..076cc65b234a 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/VoiceSelectionParams.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/VoiceSelectionParams.java
@@ -43,6 +43,7 @@ private VoiceSelectionParams() {
     languageCode_ = "";
     name_ = "";
     ssmlGender_ = 0;
+    modelName_ = "";
   }
 
   @java.lang.Override
@@ -312,8 +313,8 @@ public com.google.cloud.texttospeech.v1beta1.CustomVoiceParams getCustomVoice()
    *
    * <pre>
    * Optional. The configuration for a voice clone. If
-   * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-   * voice clone matching the specified configuration.
+   * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+   * clone matching the specified configuration.
    * </pre>
    *
    * <code>
@@ -332,8 +333,8 @@ public boolean hasVoiceClone() {
    *
    * <pre>
    * Optional. The configuration for a voice clone. If
-   * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-   * voice clone matching the specified configuration.
+   * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+   * clone matching the specified configuration.
    * </pre>
    *
    * <code>
@@ -354,8 +355,8 @@ public com.google.cloud.texttospeech.v1beta1.VoiceCloneParams getVoiceClone() {
    *
    * <pre>
    * Optional. The configuration for a voice clone. If
-   * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-   * voice clone matching the specified configuration.
+   * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+   * clone matching the specified configuration.
    * </pre>
    *
    * <code>
@@ -369,6 +370,61 @@ public com.google.cloud.texttospeech.v1beta1.VoiceCloneParamsOrBuilder getVoiceC
         : voiceClone_;
   }
 
+  public static final int MODEL_NAME_FIELD_NUMBER = 6;
+
+  @SuppressWarnings("serial")
+  private volatile java.lang.Object modelName_ = "";
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. The name of the model. If set, the service will choose the model
+   * matching the specified configuration.
+   * </pre>
+   *
+   * <code>string model_name = 6 [(.google.api.field_behavior) = OPTIONAL];</code>
+   *
+   * @return The modelName.
+   */
+  @java.lang.Override
+  public java.lang.String getModelName() {
+    java.lang.Object ref = modelName_;
+    if (ref instanceof java.lang.String) {
+      return (java.lang.String) ref;
+    } else {
+      com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref;
+      java.lang.String s = bs.toStringUtf8();
+      modelName_ = s;
+      return s;
+    }
+  }
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. The name of the model. If set, the service will choose the model
+   * matching the specified configuration.
+   * </pre>
+   *
+   * <code>string model_name = 6 [(.google.api.field_behavior) = OPTIONAL];</code>
+   *
+   * @return The bytes for modelName.
+   */
+  @java.lang.Override
+  public com.google.protobuf.ByteString getModelNameBytes() {
+    java.lang.Object ref = modelName_;
+    if (ref instanceof java.lang.String) {
+      com.google.protobuf.ByteString b =
+          com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref);
+      modelName_ = b;
+      return b;
+    } else {
+      return (com.google.protobuf.ByteString) ref;
+    }
+  }
+
   private byte memoizedIsInitialized = -1;
 
   @java.lang.Override
@@ -400,6 +456,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io
     if (((bitField0_ & 0x00000002) != 0)) {
       output.writeMessage(5, getVoiceClone());
     }
+    if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(modelName_)) {
+      com.google.protobuf.GeneratedMessageV3.writeString(output, 6, modelName_);
+    }
     getUnknownFields().writeTo(output);
   }
 
@@ -426,6 +485,9 @@ public int getSerializedSize() {
     if (((bitField0_ & 0x00000002) != 0)) {
       size += com.google.protobuf.CodedOutputStream.computeMessageSize(5, getVoiceClone());
     }
+    if (!com.google.protobuf.GeneratedMessageV3.isStringEmpty(modelName_)) {
+      size += com.google.protobuf.GeneratedMessageV3.computeStringSize(6, modelName_);
+    }
     size += getUnknownFields().getSerializedSize();
     memoizedSize = size;
     return size;
@@ -453,6 +515,7 @@ public boolean equals(final java.lang.Object obj) {
     if (hasVoiceClone()) {
       if (!getVoiceClone().equals(other.getVoiceClone())) return false;
     }
+    if (!getModelName().equals(other.getModelName())) return false;
     if (!getUnknownFields().equals(other.getUnknownFields())) return false;
     return true;
   }
@@ -478,6 +541,8 @@ public int hashCode() {
       hash = (37 * hash) + VOICE_CLONE_FIELD_NUMBER;
       hash = (53 * hash) + getVoiceClone().hashCode();
     }
+    hash = (37 * hash) + MODEL_NAME_FIELD_NUMBER;
+    hash = (53 * hash) + getModelName().hashCode();
     hash = (29 * hash) + getUnknownFields().hashCode();
     memoizedHashCode = hash;
     return hash;
@@ -642,6 +707,7 @@ public Builder clear() {
         voiceCloneBuilder_.dispose();
         voiceCloneBuilder_ = null;
       }
+      modelName_ = "";
       return this;
     }
 
@@ -697,6 +763,9 @@ private void buildPartial0(com.google.cloud.texttospeech.v1beta1.VoiceSelectionP
         result.voiceClone_ = voiceCloneBuilder_ == null ? voiceClone_ : voiceCloneBuilder_.build();
         to_bitField0_ |= 0x00000002;
       }
+      if (((from_bitField0_ & 0x00000020) != 0)) {
+        result.modelName_ = modelName_;
+      }
       result.bitField0_ |= to_bitField0_;
     }
 
@@ -765,6 +834,11 @@ public Builder mergeFrom(com.google.cloud.texttospeech.v1beta1.VoiceSelectionPar
       if (other.hasVoiceClone()) {
         mergeVoiceClone(other.getVoiceClone());
       }
+      if (!other.getModelName().isEmpty()) {
+        modelName_ = other.modelName_;
+        bitField0_ |= 0x00000020;
+        onChanged();
+      }
       this.mergeUnknownFields(other.getUnknownFields());
       onChanged();
       return this;
@@ -821,6 +895,12 @@ public Builder mergeFrom(
                 bitField0_ |= 0x00000010;
                 break;
               } // case 42
+            case 50:
+              {
+                modelName_ = input.readStringRequireUtf8();
+                bitField0_ |= 0x00000020;
+                break;
+              } // case 50
             default:
               {
                 if (!super.parseUnknownField(input, extensionRegistry, tag)) {
@@ -1466,8 +1546,8 @@ public com.google.cloud.texttospeech.v1beta1.CustomVoiceParams.Builder getCustom
      *
      * <pre>
      * Optional. The configuration for a voice clone. If
-     * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-     * voice clone matching the specified configuration.
+     * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+     * clone matching the specified configuration.
      * </pre>
      *
      * <code>
@@ -1485,8 +1565,8 @@ public boolean hasVoiceClone() {
      *
      * <pre>
      * Optional. The configuration for a voice clone. If
-     * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-     * voice clone matching the specified configuration.
+     * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+     * clone matching the specified configuration.
      * </pre>
      *
      * <code>
@@ -1510,8 +1590,8 @@ public com.google.cloud.texttospeech.v1beta1.VoiceCloneParams getVoiceClone() {
      *
      * <pre>
      * Optional. The configuration for a voice clone. If
-     * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-     * voice clone matching the specified configuration.
+     * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+     * clone matching the specified configuration.
      * </pre>
      *
      * <code>
@@ -1537,8 +1617,8 @@ public Builder setVoiceClone(com.google.cloud.texttospeech.v1beta1.VoiceClonePar
      *
      * <pre>
      * Optional. The configuration for a voice clone. If
-     * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-     * voice clone matching the specified configuration.
+     * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+     * clone matching the specified configuration.
      * </pre>
      *
      * <code>
@@ -1562,8 +1642,8 @@ public Builder setVoiceClone(
      *
      * <pre>
      * Optional. The configuration for a voice clone. If
-     * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-     * voice clone matching the specified configuration.
+     * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+     * clone matching the specified configuration.
      * </pre>
      *
      * <code>
@@ -1595,8 +1675,8 @@ public Builder mergeVoiceClone(com.google.cloud.texttospeech.v1beta1.VoiceCloneP
      *
      * <pre>
      * Optional. The configuration for a voice clone. If
-     * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-     * voice clone matching the specified configuration.
+     * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+     * clone matching the specified configuration.
      * </pre>
      *
      * <code>
@@ -1619,8 +1699,8 @@ public Builder clearVoiceClone() {
      *
      * <pre>
      * Optional. The configuration for a voice clone. If
-     * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-     * voice clone matching the specified configuration.
+     * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+     * clone matching the specified configuration.
      * </pre>
      *
      * <code>
@@ -1638,8 +1718,8 @@ public com.google.cloud.texttospeech.v1beta1.VoiceCloneParams.Builder getVoiceCl
      *
      * <pre>
      * Optional. The configuration for a voice clone. If
-     * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-     * voice clone matching the specified configuration.
+     * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+     * clone matching the specified configuration.
      * </pre>
      *
      * <code>
@@ -1662,8 +1742,8 @@ public com.google.cloud.texttospeech.v1beta1.VoiceCloneParams.Builder getVoiceCl
      *
      * <pre>
      * Optional. The configuration for a voice clone. If
-     * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-     * voice clone matching the specified configuration.
+     * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+     * clone matching the specified configuration.
      * </pre>
      *
      * <code>
@@ -1687,6 +1767,122 @@ public com.google.cloud.texttospeech.v1beta1.VoiceCloneParams.Builder getVoiceCl
       return voiceCloneBuilder_;
     }
 
+    private java.lang.Object modelName_ = "";
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The name of the model. If set, the service will choose the model
+     * matching the specified configuration.
+     * </pre>
+     *
+     * <code>string model_name = 6 [(.google.api.field_behavior) = OPTIONAL];</code>
+     *
+     * @return The modelName.
+     */
+    public java.lang.String getModelName() {
+      java.lang.Object ref = modelName_;
+      if (!(ref instanceof java.lang.String)) {
+        com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref;
+        java.lang.String s = bs.toStringUtf8();
+        modelName_ = s;
+        return s;
+      } else {
+        return (java.lang.String) ref;
+      }
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The name of the model. If set, the service will choose the model
+     * matching the specified configuration.
+     * </pre>
+     *
+     * <code>string model_name = 6 [(.google.api.field_behavior) = OPTIONAL];</code>
+     *
+     * @return The bytes for modelName.
+     */
+    public com.google.protobuf.ByteString getModelNameBytes() {
+      java.lang.Object ref = modelName_;
+      if (ref instanceof String) {
+        com.google.protobuf.ByteString b =
+            com.google.protobuf.ByteString.copyFromUtf8((java.lang.String) ref);
+        modelName_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The name of the model. If set, the service will choose the model
+     * matching the specified configuration.
+     * </pre>
+     *
+     * <code>string model_name = 6 [(.google.api.field_behavior) = OPTIONAL];</code>
+     *
+     * @param value The modelName to set.
+     * @return This builder for chaining.
+     */
+    public Builder setModelName(java.lang.String value) {
+      if (value == null) {
+        throw new NullPointerException();
+      }
+      modelName_ = value;
+      bitField0_ |= 0x00000020;
+      onChanged();
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The name of the model. If set, the service will choose the model
+     * matching the specified configuration.
+     * </pre>
+     *
+     * <code>string model_name = 6 [(.google.api.field_behavior) = OPTIONAL];</code>
+     *
+     * @return This builder for chaining.
+     */
+    public Builder clearModelName() {
+      modelName_ = getDefaultInstance().getModelName();
+      bitField0_ = (bitField0_ & ~0x00000020);
+      onChanged();
+      return this;
+    }
+
+    /**
+     *
+     *
+     * <pre>
+     * Optional. The name of the model. If set, the service will choose the model
+     * matching the specified configuration.
+     * </pre>
+     *
+     * <code>string model_name = 6 [(.google.api.field_behavior) = OPTIONAL];</code>
+     *
+     * @param value The bytes for modelName to set.
+     * @return This builder for chaining.
+     */
+    public Builder setModelNameBytes(com.google.protobuf.ByteString value) {
+      if (value == null) {
+        throw new NullPointerException();
+      }
+      checkByteStringIsUtf8(value);
+      modelName_ = value;
+      bitField0_ |= 0x00000020;
+      onChanged();
+      return this;
+    }
+
     @java.lang.Override
     public final Builder setUnknownFields(final com.google.protobuf.UnknownFieldSet unknownFields) {
       return super.setUnknownFields(unknownFields);
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/VoiceSelectionParamsOrBuilder.java b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/VoiceSelectionParamsOrBuilder.java
index f0f99b202cfc..52b22d249088 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/VoiceSelectionParamsOrBuilder.java
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/java/com/google/cloud/texttospeech/v1beta1/VoiceSelectionParamsOrBuilder.java
@@ -182,8 +182,8 @@ public interface VoiceSelectionParamsOrBuilder
    *
    * <pre>
    * Optional. The configuration for a voice clone. If
-   * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-   * voice clone matching the specified configuration.
+   * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+   * clone matching the specified configuration.
    * </pre>
    *
    * <code>
@@ -199,8 +199,8 @@ public interface VoiceSelectionParamsOrBuilder
    *
    * <pre>
    * Optional. The configuration for a voice clone. If
-   * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-   * voice clone matching the specified configuration.
+   * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+   * clone matching the specified configuration.
    * </pre>
    *
    * <code>
@@ -216,8 +216,8 @@ public interface VoiceSelectionParamsOrBuilder
    *
    * <pre>
    * Optional. The configuration for a voice clone. If
-   * [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-   * voice clone matching the specified configuration.
+   * [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+   * clone matching the specified configuration.
    * </pre>
    *
    * <code>
@@ -225,4 +225,32 @@ public interface VoiceSelectionParamsOrBuilder
    * </code>
    */
   com.google.cloud.texttospeech.v1beta1.VoiceCloneParamsOrBuilder getVoiceCloneOrBuilder();
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. The name of the model. If set, the service will choose the model
+   * matching the specified configuration.
+   * </pre>
+   *
+   * <code>string model_name = 6 [(.google.api.field_behavior) = OPTIONAL];</code>
+   *
+   * @return The modelName.
+   */
+  java.lang.String getModelName();
+
+  /**
+   *
+   *
+   * <pre>
+   * Optional. The name of the model. If set, the service will choose the model
+   * matching the specified configuration.
+   * </pre>
+   *
+   * <code>string model_name = 6 [(.google.api.field_behavior) = OPTIONAL];</code>
+   *
+   * @return The bytes for modelName.
+   */
+  com.google.protobuf.ByteString getModelNameBytes();
 }
diff --git a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/proto/google/cloud/texttospeech/v1beta1/cloud_tts.proto b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/proto/google/cloud/texttospeech/v1beta1/cloud_tts.proto
index eeec64655f1a..204a159d94ac 100644
--- a/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/proto/google/cloud/texttospeech/v1beta1/cloud_tts.proto
+++ b/java-texttospeech/proto-google-cloud-texttospeech-v1beta1/src/main/proto/google/cloud/texttospeech/v1beta1/cloud_tts.proto
@@ -59,7 +59,7 @@ service TextToSpeech {
     option (google.api.method_signature) = "input,voice,audio_config";
   }
 
-  // Performs bidirectional streaming speech synthesis: receive audio while
+  // Performs bidirectional streaming speech synthesis: receives audio while
   // sending text.
   rpc StreamingSynthesize(stream StreamingSynthesizeRequest)
       returns (stream StreamingSynthesizeResponse) {}
@@ -88,7 +88,8 @@ enum SsmlVoiceGender {
 // Configuration to set up audio encoder. The encoding determines the output
 // audio format that we'd like.
 enum AudioEncoding {
-  // Not specified. Will return result
+  // Not specified. Only used by GenerateVoiceCloningKey. Otherwise, will return
+  // result
   // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
   AUDIO_ENCODING_UNSPECIFIED = 0;
 
@@ -102,7 +103,7 @@ enum AudioEncoding {
   // MP3 at 64kbps.
   MP3_64_KBPS = 4;
 
-  // Opus encoded audio wrapped in an ogg container. The result will be a
+  // Opus encoded audio wrapped in an ogg container. The result is a
   // file which can be played natively on Android, and in browsers (at least
   // Chrome and Firefox). The quality of the encoding is considerably higher
   // than MP3 while using approximately the same bitrate.
@@ -117,9 +118,12 @@ enum AudioEncoding {
   ALAW = 6;
 
   // Uncompressed 16-bit signed little-endian samples (Linear PCM).
-  // Note that as opposed to LINEAR16, audio will not be wrapped in a WAV (or
+  // Note that as opposed to LINEAR16, audio won't be wrapped in a WAV (or
   // any other) header.
   PCM = 7;
+
+  // M4A audio.
+  M4A = 8;
 }
 
 // The top-level message sent by the client for the `ListVoices` method.
@@ -160,8 +164,8 @@ message Voice {
 
 // Used for advanced voice options.
 message AdvancedVoiceOptions {
-  // Only for Journey voices. If false, the synthesis will be context aware
-  // and have higher latency.
+  // Only for Journey voices. If false, the synthesis is context aware
+  // and has a higher latency.
   optional bool low_latency_journey_synthesis = 1;
 }
 
@@ -199,18 +203,41 @@ message CustomPronunciationParams {
     // Not specified.
     PHONETIC_ENCODING_UNSPECIFIED = 0;
 
-    // IPA. (e.g. apple -> ˈæpəl )
+    // IPA, such as apple -> ˈæpəl.
     // https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
     PHONETIC_ENCODING_IPA = 1;
 
-    // X-SAMPA (e.g. apple -> "{p@l" )
+    // X-SAMPA, such as apple -> "{p@l".
     // https://en.wikipedia.org/wiki/X-SAMPA
     PHONETIC_ENCODING_X_SAMPA = 2;
+
+    // For reading-to-pron conversion to work well, the `pronunciation` field
+    //  should only contain Kanji, Hiragana, and Katakana.
+    //
+    // The pronunciation can also contain pitch accents.
+    // The start of a pitch phrase is specified with `^` and the down-pitch
+    // position is specified with `!`, for example:
+    //
+    //     phrase:端  pronunciation:^はし
+    //     phrase:箸  pronunciation:^は!し
+    //     phrase:橋  pronunciation:^はし!
+    //
+    // We currently only support the Tokyo dialect, which allows at most one
+    // down-pitch per phrase (i.e. at most one `!` between `^`).
+    PHONETIC_ENCODING_JAPANESE_YOMIGANA = 3;
+
+    // Used to specify pronunciations for Mandarin words. See
+    // https://en.wikipedia.org/wiki/Pinyin.
+    //
+    // For example: 朝阳, the pronunciation is "chao2 yang2". The number
+    // represents the tone, and there is a space between syllables. Neutral
+    // tones are represented by 5, for example 孩子 "hai2 zi5".
+    PHONETIC_ENCODING_PINYIN = 4;
   }
 
-  // The phrase to which the customization will be applied.
-  // The phrase can be multiple words (in the case of proper nouns etc), but
-  // should not span to a whole sentence.
+  // The phrase to which the customization is applied.
+  // The phrase can be multiple words, such as proper nouns, but shouldn't span
+  // the length of the sentence.
   optional string phrase = 1;
 
   // The phonetic encoding of the phrase.
@@ -223,13 +250,13 @@ message CustomPronunciationParams {
 
 // A collection of pronunciation customizations.
 message CustomPronunciations {
-  // The pronunciation customizations to be applied.
+  // The pronunciation customizations are applied.
   repeated CustomPronunciationParams pronunciations = 1;
 }
 
 // A collection of turns for multi-speaker synthesis.
 message MultiSpeakerMarkup {
-  // A Multi-speaker turn.
+  // A multi-speaker turn.
   message Turn {
     // Required. The speaker of the turn, for example, 'O' or 'Q'. Please refer
     // to documentation for available speakers.
@@ -253,6 +280,10 @@ message SynthesisInput {
     // The raw text to be synthesized.
     string text = 1;
 
+    // Markup for HD voices specifically. This field may not be used with any
+    // other voices.
+    string markup = 5;
+
     // The SSML document to be synthesized. The SSML document must be valid
     // and well-formed. Otherwise the RPC will fail and return
     // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. For
@@ -265,18 +296,16 @@ message SynthesisInput {
     MultiSpeakerMarkup multi_speaker_markup = 4;
   }
 
-  // Optional. The pronunciation customizations to be applied to the input. If
-  // this is set, the input will be synthesized using the given pronunciation
+  // Optional. The pronunciation customizations are applied to the input. If
+  // this is set, the input is synthesized using the given pronunciation
   // customizations.
   //
-  // The initial support will be for EFIGS (English, French,
-  // Italian, German, Spanish) languages, as provided in
-  // VoiceSelectionParams. Journey and Instant Clone voices are
-  // not supported yet.
+  // The initial support is for en-us, with plans to expand to other locales in
+  // the future. Instant Clone voices aren't supported.
   //
   // In order to customize the pronunciation of a phrase, there must be an exact
   // match of the phrase in the input types. If using SSML, the phrase must not
-  // be inside a phoneme tag (entirely or partially).
+  // be inside a phoneme tag.
   CustomPronunciations custom_pronunciations = 3
       [(google.api.field_behavior) = OPTIONAL];
 }
@@ -314,9 +343,13 @@ message VoiceSelectionParams {
   CustomVoiceParams custom_voice = 4;
 
   // Optional. The configuration for a voice clone. If
-  // [VoiceCloneParams.voice_clone_key] is set, the service will choose the
-  // voice clone matching the specified configuration.
+  // [VoiceCloneParams.voice_clone_key] is set, the service chooses the voice
+  // clone matching the specified configuration.
   VoiceCloneParams voice_clone = 5 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The name of the model. If set, the service will choose the model
+  // matching the specified configuration.
+  string model_name = 6 [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Description of audio data to be synthesized.
@@ -324,10 +357,10 @@ message AudioConfig {
   // Required. The format of the audio byte stream.
   AudioEncoding audio_encoding = 1 [(google.api.field_behavior) = REQUIRED];
 
-  // Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is
+  // Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
   // the normal native speed supported by the specific voice. 2.0 is twice as
   // fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
-  // speed. Any other values < 0.25 or > 4.0 will return an error.
+  // speed. Any other values < 0.25 or > 2.0 will return an error.
   double speaking_rate = 2 [
     (google.api.field_behavior) = INPUT_ONLY,
     (google.api.field_behavior) = OPTIONAL
@@ -440,12 +473,21 @@ message Timepoint {
 // Description of the desired output audio data.
 message StreamingAudioConfig {
   // Required. The format of the audio byte stream.
-  // For now, streaming only supports PCM and OGG_OPUS. All other encodings
-  // will return an error.
+  // Streaming supports PCM, ALAW, MULAW and OGG_OPUS. All other encodings
+  // return an error.
   AudioEncoding audio_encoding = 1 [(google.api.field_behavior) = REQUIRED];
 
   // Optional. The synthesis sample rate (in hertz) for this audio.
   int32 sample_rate_hertz = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Input only. Speaking rate/speed, in the range [0.25, 2.0]. 1.0 is
+  // the normal native speed supported by the specific voice. 2.0 is twice as
+  // fast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0
+  // speed. Any other values < 0.25 or > 2.0 will return an error.
+  double speaking_rate = 3 [
+    (google.api.field_behavior) = INPUT_ONLY,
+    (google.api.field_behavior) = OPTIONAL
+  ];
 }
 
 // Provides configuration information for the StreamingSynthesize request.
@@ -456,17 +498,36 @@ message StreamingSynthesizeConfig {
   // Optional. The configuration of the synthesized audio.
   StreamingAudioConfig streaming_audio_config = 4
       [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The pronunciation customizations are applied to the input. If
+  // this is set, the input is synthesized using the given pronunciation
+  // customizations.
+  //
+  // The initial support is for en-us, with plans to expand to other locales in
+  // the future. Instant Clone voices aren't supported.
+  //
+  // In order to customize the pronunciation of a phrase, there must be an exact
+  // match of the phrase in the input types. If using SSML, the phrase must not
+  // be inside a phoneme tag.
+  CustomPronunciations custom_pronunciations = 5
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Input to be synthesized.
 message StreamingSynthesisInput {
   oneof input_source {
     // The raw text to be synthesized. It is recommended that each input
-    // contains complete, terminating sentences, as this will likely result in
-    // better prosody in the output audio. That being said, users are free to
-    // input text however they please.
+    // contains complete, terminating sentences, which results in better prosody
+    // in the output audio.
     string text = 1;
+
+    // Markup for HD voices specifically. This field may not be used with any
+    // other voices.
+    string markup = 5;
   }
+
+  // This is system instruction supported only for controllable voice models.
+  optional string prompt = 6;
 }
 
 // Request message for the `StreamingSynthesize` method. Multiple