11
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
# See the License for the specific language governing permissions and
13
13
# limitations under the License.
14
+ import os
14
15
15
-
16
- import argparse
17
-
18
- # [START speech_transcribe_feature_in_recognizer]
19
- from google .cloud .speech_v2 import SpeechClient
20
16
from google .cloud .speech_v2 .types import cloud_speech
21
17
18
+ PROJECT_ID = os .getenv ("GOOGLE_CLOUD_PROJECT" )
19
+
22
20
23
21
def transcribe_feature_in_recognizer (
24
- project_id : str ,
25
- recognizer_id : str ,
26
22
audio_file : str ,
23
+ recognizer_id : str ,
27
24
) -> cloud_speech .RecognizeResponse :
28
- """Transcribe an audio file using an existing recognizer."""
25
+ """Use an existing recognizer or create a new one to transcribe an audio file.
26
+ Args:
27
+ audio_file (str): The path to the audio file to be transcribed.
28
+ Example: "resources/audio.wav"
29
+ recognizer_id (str): The ID of the recognizer to be used or created. ID should be unique
30
+ within the project and location.
31
+ Returns:
32
+ cloud_speech.RecognizeResponse: The response containing the transcription results.
33
+ """
34
+ # [START speech_transcribe_feature_in_recognizer]
35
+
36
+ from google .cloud .speech_v2 import SpeechClient
37
+ from google .cloud .speech_v2 .types import cloud_speech
38
+
39
+ from google .api_core .exceptions import NotFound
40
+
29
41
# Instantiates a client
30
42
client = SpeechClient ()
31
43
32
- request = cloud_speech .CreateRecognizerRequest (
33
- parent = f"projects/{ project_id } /locations/global" ,
34
- recognizer_id = recognizer_id ,
35
- recognizer = cloud_speech .Recognizer (
36
- default_recognition_config = cloud_speech .RecognitionConfig (
37
- auto_decoding_config = cloud_speech .AutoDetectDecodingConfig (),
38
- language_codes = ["en-US" ],
39
- model = "latest_long" ,
40
- features = cloud_speech .RecognitionFeatures (
41
- enable_automatic_punctuation = True ,
44
+ # TODO(developer): Update and un-comment below line
45
+ # PROJECT_ID = "your-project-id"
46
+ # recognizer_id = "id-recognizer"
47
+ recognizer_name = (
48
+ f"projects/{ PROJECT_ID } /locations/global/recognizers/{ recognizer_id } "
49
+ )
50
+ try :
51
+ # Use an existing recognizer
52
+ recognizer = client .get_recognizer (name = recognizer_name )
53
+ print ("Using existing Recognizer:" , recognizer .name )
54
+ except NotFound :
55
+ # Create a new recognizer
56
+ request = cloud_speech .CreateRecognizerRequest (
57
+ parent = f"projects/{ PROJECT_ID } /locations/global" ,
58
+ recognizer_id = recognizer_id ,
59
+ recognizer = cloud_speech .Recognizer (
60
+ default_recognition_config = cloud_speech .RecognitionConfig (
61
+ auto_decoding_config = cloud_speech .AutoDetectDecodingConfig (),
62
+ language_codes = ["en-US" ],
63
+ model = "latest_long" ,
64
+ features = cloud_speech .RecognitionFeatures (
65
+ enable_automatic_punctuation = True ,
66
+ ),
42
67
),
43
68
),
44
- ),
45
- )
46
-
47
- operation = client .create_recognizer (request = request )
48
- recognizer = operation .result ()
49
-
50
- print ("Created Recognizer:" , recognizer .name )
69
+ )
70
+ operation = client .create_recognizer (request = request )
71
+ recognizer = operation .result ()
72
+ print ("Created Recognizer:" , recognizer .name )
51
73
52
74
# Reads a file as bytes
53
75
with open (audio_file , "rb" ) as f :
54
- content = f .read ()
76
+ audio_content = f .read ()
55
77
56
78
request = cloud_speech .RecognizeRequest (
57
- recognizer = f"projects/{ project_id } /locations/global/recognizers/{ recognizer_id } " ,
58
- content = content ,
79
+ recognizer = f"projects/{ PROJECT_ID } /locations/global/recognizers/{ recognizer_id } " ,
80
+ content = audio_content ,
59
81
)
60
82
61
83
# Transcribes the audio into text
@@ -64,18 +86,12 @@ def transcribe_feature_in_recognizer(
64
86
for result in response .results :
65
87
print (f"Transcript: { result .alternatives [0 ].transcript } " )
66
88
67
- return response
68
-
89
+ # [END speech_transcribe_feature_in_recognizer]
69
90
70
- # [END speech_transcribe_feature_in_recognizer]
91
+ return response
71
92
72
93
73
94
if __name__ == "__main__" :
74
- parser = argparse . ArgumentParser (
75
- description = __doc__ , formatter_class = argparse . RawDescriptionHelpFormatter
95
+ transcribe_feature_in_recognizer (
96
+ audio_file = "resources/audio.wav" , recognizer_id = "id-recognizer"
76
97
)
77
- parser .add_argument ("project_id" , help = "GCP Project ID" )
78
- parser .add_argument ("recognizer_id" , help = "Recognizer ID to use for recogniition" )
79
- parser .add_argument ("audio_file" , help = "Audio file to stream" )
80
- args = parser .parse_args ()
81
- transcribe_feature_in_recognizer (args .project_id , args .audio_file )
0 commit comments