Skip to content

Commit 612eae4

Browse files
Simplified next set of Speech Samples (set-3) (#12158)
* Simplified set of Speech Samples * Update comments and imports
1 parent 8dbae7e commit 612eae4

11 files changed

+174
-214
lines changed

speech/snippets/transcribe_feature_in_recognizer.py

Lines changed: 54 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,51 +11,73 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
import os
1415

15-
16-
import argparse
17-
18-
# [START speech_transcribe_feature_in_recognizer]
19-
from google.cloud.speech_v2 import SpeechClient
2016
from google.cloud.speech_v2.types import cloud_speech
2117

18+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
19+
2220

2321
def transcribe_feature_in_recognizer(
24-
project_id: str,
25-
recognizer_id: str,
2622
audio_file: str,
23+
recognizer_id: str,
2724
) -> cloud_speech.RecognizeResponse:
28-
"""Transcribe an audio file using an existing recognizer."""
25+
"""Use an existing recognizer or create a new one to transcribe an audio file.
26+
Args:
27+
audio_file (str): The path to the audio file to be transcribed.
28+
Example: "resources/audio.wav"
29+
recognizer_id (str): The ID of the recognizer to be used or created. ID should be unique
30+
within the project and location.
31+
Returns:
32+
cloud_speech.RecognizeResponse: The response containing the transcription results.
33+
"""
34+
# [START speech_transcribe_feature_in_recognizer]
35+
36+
from google.cloud.speech_v2 import SpeechClient
37+
from google.cloud.speech_v2.types import cloud_speech
38+
39+
from google.api_core.exceptions import NotFound
40+
2941
# Instantiates a client
3042
client = SpeechClient()
3143

32-
request = cloud_speech.CreateRecognizerRequest(
33-
parent=f"projects/{project_id}/locations/global",
34-
recognizer_id=recognizer_id,
35-
recognizer=cloud_speech.Recognizer(
36-
default_recognition_config=cloud_speech.RecognitionConfig(
37-
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
38-
language_codes=["en-US"],
39-
model="latest_long",
40-
features=cloud_speech.RecognitionFeatures(
41-
enable_automatic_punctuation=True,
44+
# TODO(developer): Update and un-comment below line
45+
# PROJECT_ID = "your-project-id"
46+
# recognizer_id = "id-recognizer"
47+
recognizer_name = (
48+
f"projects/{PROJECT_ID}/locations/global/recognizers/{recognizer_id}"
49+
)
50+
try:
51+
# Use an existing recognizer
52+
recognizer = client.get_recognizer(name=recognizer_name)
53+
print("Using existing Recognizer:", recognizer.name)
54+
except NotFound:
55+
# Create a new recognizer
56+
request = cloud_speech.CreateRecognizerRequest(
57+
parent=f"projects/{PROJECT_ID}/locations/global",
58+
recognizer_id=recognizer_id,
59+
recognizer=cloud_speech.Recognizer(
60+
default_recognition_config=cloud_speech.RecognitionConfig(
61+
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
62+
language_codes=["en-US"],
63+
model="latest_long",
64+
features=cloud_speech.RecognitionFeatures(
65+
enable_automatic_punctuation=True,
66+
),
4267
),
4368
),
44-
),
45-
)
46-
47-
operation = client.create_recognizer(request=request)
48-
recognizer = operation.result()
49-
50-
print("Created Recognizer:", recognizer.name)
69+
)
70+
operation = client.create_recognizer(request=request)
71+
recognizer = operation.result()
72+
print("Created Recognizer:", recognizer.name)
5173

5274
# Reads a file as bytes
5375
with open(audio_file, "rb") as f:
54-
content = f.read()
76+
audio_content = f.read()
5577

5678
request = cloud_speech.RecognizeRequest(
57-
recognizer=f"projects/{project_id}/locations/global/recognizers/{recognizer_id}",
58-
content=content,
79+
recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/{recognizer_id}",
80+
content=audio_content,
5981
)
6082

6183
# Transcribes the audio into text
@@ -64,18 +86,12 @@ def transcribe_feature_in_recognizer(
6486
for result in response.results:
6587
print(f"Transcript: {result.alternatives[0].transcript}")
6688

67-
return response
68-
89+
# [END speech_transcribe_feature_in_recognizer]
6990

70-
# [END speech_transcribe_feature_in_recognizer]
91+
return response
7192

7293

7394
if __name__ == "__main__":
74-
parser = argparse.ArgumentParser(
75-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
95+
transcribe_feature_in_recognizer(
96+
audio_file="resources/audio.wav", recognizer_id="id-recognizer"
7697
)
77-
parser.add_argument("project_id", help="GCP Project ID")
78-
parser.add_argument("recognizer_id", help="Recognizer ID to use for recogniition")
79-
parser.add_argument("audio_file", help="Audio file to stream")
80-
args = parser.parse_args()
81-
transcribe_feature_in_recognizer(args.project_id, args.audio_file)

speech/snippets/transcribe_feature_in_recognizer_test.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,33 +24,46 @@
2424
import transcribe_feature_in_recognizer
2525

2626
_RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
27+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
2728

2829

29-
def delete_recognizer(project_id: str, recognizer_id: str) -> None:
30+
def delete_recognizer(recognizer_id: str) -> None:
3031
client = SpeechClient()
3132
request = cloud_speech.DeleteRecognizerRequest(
32-
name=f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
33+
name=f"projects/{PROJECT_ID}/locations/global/recognizers/{recognizer_id}"
3334
)
3435
client.delete_recognizer(request=request)
3536

3637

3738
def test_transcribe_feature_in_recognizer(
3839
capsys: pytest.CaptureFixture, request: pytest.FixtureRequest
3940
) -> None:
40-
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
4141
recognizer_id = "recognizer-" + str(uuid4())
4242

4343
def cleanup() -> None:
44-
delete_recognizer(project_id, recognizer_id)
44+
delete_recognizer(recognizer_id)
4545

4646
request.addfinalizer(cleanup)
4747

48-
response = transcribe_feature_in_recognizer.transcribe_feature_in_recognizer(
49-
project_id, recognizer_id, os.path.join(_RESOURCES, "audio.wav")
48+
response_with_new_recognizer = (
49+
transcribe_feature_in_recognizer.transcribe_feature_in_recognizer(
50+
os.path.join(_RESOURCES, "audio.wav"), recognizer_id
51+
)
5052
)
51-
53+
# Call function one more time to test work with the existing recognizer
54+
response_with_existing_recognizer = (
55+
transcribe_feature_in_recognizer.transcribe_feature_in_recognizer(
56+
os.path.join(_RESOURCES, "fair.wav"), recognizer_id
57+
)
58+
)
59+
out, _ = capsys.readouterr()
5260
assert re.search(
5361
r"How old is the Brooklyn Bridge?",
54-
response.results[0].alternatives[0].transcript,
62+
response_with_new_recognizer.results[0].alternatives[0].transcript,
63+
re.DOTALL | re.I,
64+
)
65+
assert re.search(
66+
r"is fair",
67+
response_with_existing_recognizer.results[0].alternatives[0].transcript,
5568
re.DOTALL | re.I,
5669
)

speech/snippets/transcribe_file_v2.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,32 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import os
1516

16-
import argparse
17-
18-
# [START speech_transcribe_file_v2]
19-
from google.cloud.speech_v2 import SpeechClient
2017
from google.cloud.speech_v2.types import cloud_speech
2118

19+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
20+
21+
22+
def transcribe_file_v2() -> cloud_speech.RecognizeResponse:
23+
"""Transcribe an audio file using Google Cloud Speech-to-Text API v2.
24+
Returns:
25+
cloud_speech.RecognizeResponse: The response containing the transcription results.
26+
"""
27+
# [START speech_transcribe_file_v2]
28+
29+
from google.cloud.speech_v2 import SpeechClient
30+
from google.cloud.speech_v2.types import cloud_speech
31+
32+
# TODO(developer): Update and un-comment below line
33+
# PROJECT_ID = "your-project-id"
2234

23-
def transcribe_file_v2(
24-
project_id: str,
25-
audio_file: str,
26-
) -> cloud_speech.RecognizeResponse:
2735
# Instantiates a client
2836
client = SpeechClient()
2937

3038
# Reads a file as bytes
31-
with open(audio_file, "rb") as f:
32-
content = f.read()
39+
with open("resources/audio.wav", "rb") as f:
40+
audio_content = f.read()
3341

3442
config = cloud_speech.RecognitionConfig(
3543
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
@@ -38,9 +46,9 @@ def transcribe_file_v2(
3846
)
3947

4048
request = cloud_speech.RecognizeRequest(
41-
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
49+
recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
4250
config=config,
43-
content=content,
51+
content=audio_content,
4452
)
4553

4654
# Transcribes the audio into text
@@ -49,17 +57,6 @@ def transcribe_file_v2(
4957
for result in response.results:
5058
print(f"Transcript: {result.alternatives[0].transcript}")
5159

52-
return response
60+
# [END speech_transcribe_file_v2]
5361

54-
55-
# [END speech_transcribe_file_v2]
56-
57-
58-
if __name__ == "__main__":
59-
parser = argparse.ArgumentParser(
60-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
61-
)
62-
parser.add_argument("project_id", help="GCP Project ID")
63-
parser.add_argument("audio_file", help="Audio file to stream")
64-
args = parser.parse_args()
65-
transcribe_file_v2(args.project_id, args.audio_file)
62+
return response

speech/snippets/transcribe_file_v2_test.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,15 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import os
1615
import re
1716

1817
import pytest
1918

2019
import transcribe_file_v2
2120

22-
_RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
23-
2421

2522
def test_transcribe_file_v2(capsys: pytest.CaptureFixture) -> None:
26-
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
27-
28-
response = transcribe_file_v2.transcribe_file_v2(
29-
project_id, os.path.join(_RESOURCES, "audio.wav")
30-
)
31-
23+
response = transcribe_file_v2.transcribe_file_v2()
3224
assert re.search(
3325
r"how old is the Brooklyn Bridge",
3426
response.results[0].alternatives[0].transcript,

speech/snippets/transcribe_gcs_v2.py

Lines changed: 17 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,41 +11,39 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
import os
1415

15-
16-
import argparse
17-
18-
# [START speech_transcribe_gcs_v2]
19-
from google.cloud.speech_v2 import SpeechClient
2016
from google.cloud.speech_v2.types import cloud_speech
2117

18+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
2219

23-
def transcribe_gcs_v2(
24-
project_id: str,
25-
gcs_uri: str,
26-
) -> cloud_speech.RecognizeResponse:
27-
"""Transcribes audio from a Google Cloud Storage URI.
28-
29-
Args:
30-
project_id: The GCP project ID.
31-
gcs_uri: The Google Cloud Storage URI.
3220

21+
def transcribe_gcs_v2() -> cloud_speech.RecognizeResponse:
22+
"""Transcribe an audio file using Google Cloud Speech-to-Text API v2.
3323
Returns:
34-
The RecognizeResponse.
24+
cloud_speech.RecognizeResponse: The response containing the transcription results.
3525
"""
26+
# [START speech_transcribe_gcs_v2]
27+
28+
from google.cloud.speech_v2 import SpeechClient
29+
from google.cloud.speech_v2.types import cloud_speech
30+
3631
# Instantiates a client
3732
client = SpeechClient()
3833

34+
# TODO(developer): Update and un-comment below line
35+
# PROJECT_ID = "your-project-id"
36+
3937
config = cloud_speech.RecognitionConfig(
4038
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
4139
language_codes=["en-US"],
4240
model="long",
4341
)
4442

4543
request = cloud_speech.RecognizeRequest(
46-
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
44+
recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
4745
config=config,
48-
uri=gcs_uri,
46+
uri="gs://cloud-samples-data/speech/audio.flac", # URI of the audio file in GCS
4947
)
5048

5149
# Transcribes the audio into text
@@ -54,17 +52,6 @@ def transcribe_gcs_v2(
5452
for result in response.results:
5553
print(f"Transcript: {result.alternatives[0].transcript}")
5654

57-
return response
58-
59-
60-
# [END speech_transcribe_gcs_v2]
55+
# [END speech_transcribe_gcs_v2]
6156

62-
63-
if __name__ == "__main__":
64-
parser = argparse.ArgumentParser(
65-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
66-
)
67-
parser.add_argument("project_id", help="GCP Project ID")
68-
parser.add_argument("gcs_uri", help="URI to GCS file")
69-
args = parser.parse_args()
70-
transcribe_gcs_v2(args.project_id, args.gcs_uri)
57+
return response

speech/snippets/transcribe_gcs_v2_test.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import os
1615
import re
1716

1817
from google.api_core.retry import Retry
@@ -21,14 +20,9 @@
2120
import transcribe_gcs_v2
2221

2322

24-
_TEST_AUDIO_FILE_PATH = "gs://cloud-samples-data/speech/audio.flac"
25-
26-
2723
@Retry()
2824
def test_transcribe_gcs_v2(capsys: pytest.CaptureFixture) -> None:
29-
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
30-
31-
response = transcribe_gcs_v2.transcribe_gcs_v2(project_id, _TEST_AUDIO_FILE_PATH)
25+
response = transcribe_gcs_v2.transcribe_gcs_v2()
3226

3327
assert re.search(
3428
r"how old is the Brooklyn Bridge",

0 commit comments

Comments
 (0)