|
18 | 18 | Google Cloud API. |
19 | 19 |
|
20 | 20 | Usage Examples: |
21 | | - python beta_snippets.py transcription \ |
22 | | - gs://python-docs-samples-tests/video/googlework_tiny.mp4 |
23 | | -
|
24 | | - python beta_snippets.py video-text-gcs \ |
25 | | - gs://python-docs-samples-tests/video/googlework_tiny.mp4 |
26 | | -
|
27 | 21 | python beta_snippets.py streaming-labels resources/cat.mp4 |
28 | 22 |
|
29 | 23 | python beta_snippets.py streaming-shot-change resources/cat.mp4 |
|
49 | 43 | import io |
50 | 44 |
|
51 | 45 |
|
52 | | -def speech_transcription(input_uri, timeout=180): |
53 | | - # [START video_speech_transcription_gcs_beta] |
54 | | - """Transcribe speech from a video stored on GCS.""" |
55 | | - from google.cloud import videointelligence_v1p1beta1 as videointelligence |
56 | | - |
57 | | - video_client = videointelligence.VideoIntelligenceServiceClient() |
58 | | - |
59 | | - features = [videointelligence.Feature.SPEECH_TRANSCRIPTION] |
60 | | - |
61 | | - config = videointelligence.SpeechTranscriptionConfig( |
62 | | - language_code="en-US", enable_automatic_punctuation=True |
63 | | - ) |
64 | | - video_context = videointelligence.VideoContext(speech_transcription_config=config) |
65 | | - |
66 | | - operation = video_client.annotate_video( |
67 | | - request={ |
68 | | - "features": features, |
69 | | - "input_uri": input_uri, |
70 | | - "video_context": video_context, |
71 | | - } |
72 | | - ) |
73 | | - |
74 | | - print("\nProcessing video for speech transcription.") |
75 | | - |
76 | | - result = operation.result(timeout) |
77 | | - |
78 | | - # There is only one annotation_result since only |
79 | | - # one video is processed. |
80 | | - annotation_results = result.annotation_results[0] |
81 | | - for speech_transcription in annotation_results.speech_transcriptions: |
82 | | - # The number of alternatives for each transcription is limited by |
83 | | - # SpeechTranscriptionConfig.max_alternatives. |
84 | | - # Each alternative is a different possible transcription |
85 | | - # and has its own confidence score. |
86 | | - for alternative in speech_transcription.alternatives: |
87 | | - print("Alternative level information:") |
88 | | - |
89 | | - print("Transcript: {}".format(alternative.transcript)) |
90 | | - print("Confidence: {}\n".format(alternative.confidence)) |
91 | | - |
92 | | - print("Word level information:") |
93 | | - for word_info in alternative.words: |
94 | | - word = word_info.word |
95 | | - start_time = word_info.start_time |
96 | | - end_time = word_info.end_time |
97 | | - print( |
98 | | - "\t{}s - {}s: {}".format( |
99 | | - start_time.seconds + start_time.microseconds * 1e-6, |
100 | | - end_time.seconds + end_time.microseconds * 1e-6, |
101 | | - word, |
102 | | - ) |
103 | | - ) |
104 | | - # [END video_speech_transcription_gcs_beta] |
105 | | - |
106 | | - |
107 | | -def video_detect_text_gcs(input_uri): |
108 | | - # [START video_detect_text_gcs_beta] |
109 | | - """Detect text in a video stored on GCS.""" |
110 | | - from google.cloud import videointelligence_v1p2beta1 as videointelligence |
111 | | - |
112 | | - video_client = videointelligence.VideoIntelligenceServiceClient() |
113 | | - features = [videointelligence.Feature.TEXT_DETECTION] |
114 | | - |
115 | | - operation = video_client.annotate_video( |
116 | | - request={"features": features, "input_uri": input_uri} |
117 | | - ) |
118 | | - |
119 | | - print("\nProcessing video for text detection.") |
120 | | - result = operation.result(timeout=300) |
121 | | - |
122 | | - # The first result is retrieved because a single video was processed. |
123 | | - annotation_result = result.annotation_results[0] |
124 | | - |
125 | | - # Get only the first result |
126 | | - text_annotation = annotation_result.text_annotations[0] |
127 | | - print("\nText: {}".format(text_annotation.text)) |
128 | | - |
129 | | - # Get the first text segment |
130 | | - text_segment = text_annotation.segments[0] |
131 | | - start_time = text_segment.segment.start_time_offset |
132 | | - end_time = text_segment.segment.end_time_offset |
133 | | - print( |
134 | | - "start_time: {}, end_time: {}".format( |
135 | | - start_time.seconds + start_time.microseconds * 1e-6, |
136 | | - end_time.seconds + end_time.microseconds * 1e-6, |
137 | | - ) |
138 | | - ) |
139 | | - |
140 | | - print("Confidence: {}".format(text_segment.confidence)) |
141 | | - |
142 | | - # Show the result for the first frame in this segment. |
143 | | - frame = text_segment.frames[0] |
144 | | - time_offset = frame.time_offset |
145 | | - print( |
146 | | - "Time offset for the first frame: {}".format( |
147 | | - time_offset.seconds + time_offset.microseconds * 1e-6 |
148 | | - ) |
149 | | - ) |
150 | | - print("Rotated Bounding Box Vertices:") |
151 | | - for vertex in frame.rotated_bounding_box.vertices: |
152 | | - print("\tVertex.x: {}, Vertex.y: {}".format(vertex.x, vertex.y)) |
153 | | - # [END video_detect_text_gcs_beta] |
154 | | - return annotation_result.text_annotations |
155 | | - |
156 | | - |
157 | | -def video_detect_text(path): |
158 | | - # [START video_detect_text_beta] |
159 | | - """Detect text in a local video.""" |
160 | | - from google.cloud import videointelligence_v1p2beta1 as videointelligence |
161 | | - |
162 | | - video_client = videointelligence.VideoIntelligenceServiceClient() |
163 | | - features = [videointelligence.Feature.TEXT_DETECTION] |
164 | | - video_context = videointelligence.VideoContext() |
165 | | - |
166 | | - with io.open(path, "rb") as file: |
167 | | - input_content = file.read() |
168 | | - |
169 | | - operation = video_client.annotate_video( |
170 | | - request={ |
171 | | - "features": features, |
172 | | - "input_content": input_content, |
173 | | - "video_context": video_context, |
174 | | - } |
175 | | - ) |
176 | | - |
177 | | - print("\nProcessing video for text detection.") |
178 | | - result = operation.result(timeout=300) |
179 | | - |
180 | | - # The first result is retrieved because a single video was processed. |
181 | | - annotation_result = result.annotation_results[0] |
182 | | - |
183 | | - # Get only the first result |
184 | | - text_annotation = annotation_result.text_annotations[0] |
185 | | - print("\nText: {}".format(text_annotation.text)) |
186 | | - |
187 | | - # Get the first text segment |
188 | | - text_segment = text_annotation.segments[0] |
189 | | - start_time = text_segment.segment.start_time_offset |
190 | | - end_time = text_segment.segment.end_time_offset |
191 | | - print( |
192 | | - "start_time: {}, end_time: {}".format( |
193 | | - start_time.seconds + start_time.microseconds * 1e-6, |
194 | | - end_time.seconds + end_time.microseconds * 1e-6, |
195 | | - ) |
196 | | - ) |
197 | | - |
198 | | - print("Confidence: {}".format(text_segment.confidence)) |
199 | | - |
200 | | - # Show the result for the first frame in this segment. |
201 | | - frame = text_segment.frames[0] |
202 | | - time_offset = frame.time_offset |
203 | | - print( |
204 | | - "Time offset for the first frame: {}".format( |
205 | | - time_offset.seconds + time_offset.microseconds * 1e-6 |
206 | | - ) |
207 | | - ) |
208 | | - print("Rotated Bounding Box Vertices:") |
209 | | - for vertex in frame.rotated_bounding_box.vertices: |
210 | | - print("\tVertex.x: {}, Vertex.y: {}".format(vertex.x, vertex.y)) |
211 | | - # [END video_detect_text_beta] |
212 | | - return annotation_result.text_annotations |
213 | | - |
214 | | - |
215 | 46 | def detect_labels_streaming(path): |
216 | 47 | # [START video_streaming_label_detection_beta] |
217 | 48 | from google.cloud import videointelligence_v1p3beta1 as videointelligence |
@@ -826,21 +657,6 @@ def stream_generator(): |
826 | 657 | ) |
827 | 658 | subparsers = parser.add_subparsers(dest="command") |
828 | 659 |
|
829 | | - speech_transcription_parser = subparsers.add_parser( |
830 | | - "transcription", help=speech_transcription.__doc__ |
831 | | - ) |
832 | | - speech_transcription_parser.add_argument("gcs_uri") |
833 | | - |
834 | | - video_text_gcs_parser = subparsers.add_parser( |
835 | | - "video-text-gcs", help=video_detect_text_gcs.__doc__ |
836 | | - ) |
837 | | - video_text_gcs_parser.add_argument("gcs_uri") |
838 | | - |
839 | | - video_text_parser = subparsers.add_parser( |
840 | | - "video-text", help=video_detect_text.__doc__ |
841 | | - ) |
842 | | - video_text_parser.add_argument("path") |
843 | | - |
844 | 660 | video_streaming_labels_parser = subparsers.add_parser( |
845 | 661 | "streaming-labels", help=detect_labels_streaming.__doc__ |
846 | 662 | ) |
@@ -892,13 +708,7 @@ def stream_generator(): |
892 | 708 |
|
893 | 709 | args = parser.parse_args() |
894 | 710 |
|
895 | | - if args.command == "transcription": |
896 | | - speech_transcription(args.gcs_uri) |
897 | | - elif args.command == "video-text-gcs": |
898 | | - video_detect_text_gcs(args.gcs_uri) |
899 | | - elif args.command == "video-text": |
900 | | - video_detect_text(args.path) |
901 | | - elif args.command == "streaming-labels": |
| 711 | + if args.command == "streaming-labels": |
902 | 712 | detect_labels_streaming(args.path) |
903 | 713 | elif args.command == "streaming-shot-change": |
904 | 714 | detect_shot_change_streaming(args.path) |
|
0 commit comments