@@ -187,60 +187,72 @@ async fn video_worker(
187187 }
188188 } ;
189189
190+ log:: info!( "Getting transcripts for video_id {}" , video. id. video_id) ;
190191 let transcripts = get_transcript ( & video. id . video_id ) . await . map_err ( |e| {
191- log:: error!(
192- "Failed to get transcript for video {}: {}" ,
193- video. id. video_id,
194- e
195- ) ;
196- BroccoliError :: Job ( "Failed to get transcript" . to_string ( ) )
197- } ) ?;
198-
199- for transcript in transcripts {
200- let create_chunk_data = ChunkReqPayload {
201- chunk_html : Some ( transcript. text ) ,
202- semantic_content : None ,
203- link : Some ( format ! (
204- "https://www.youtube.com/watch?v={}&t={}" ,
205- video. id. video_id,
206- transcript. start. as_secs( )
207- ) ) ,
208- tag_set : None ,
209- metadata : Some ( json ! ( {
210- "heading" : video. snippet. title. clone( ) ,
211- "title" : video. snippet. title. clone( ) ,
212- "url" : format!( "https://www.youtube.com/watch?v={}" , video. id. video_id) ,
213- "hierarchy" : video. snippet. title. clone( ) ,
214- "description" : video. snippet. description. clone( ) ,
215- "yt_preview_src" : video. snippet. thumbnails. high. url. clone( ) ,
216- } ) ) ,
217- group_ids : Some ( vec ! [ chunk_group. id] ) ,
218- group_tracking_ids : None ,
219- location : None ,
220- tracking_id : None ,
221- upsert_by_tracking_id : None ,
222- time_stamp : Some ( video. snippet . publish_time . clone ( ) ) ,
223- weight : None ,
224- split_avg : None ,
225- convert_html_to_text : None ,
226- image_urls : Some ( vec ! [ video. snippet. thumbnails. high. url. clone( ) ] ) ,
227- num_value : None ,
228- fulltext_boost : None ,
229- semantic_boost : None ,
230- } ;
231-
232- chunks. push ( create_chunk_data) ;
192+ BroccoliError :: Job ( format ! (
193+ "Failed to get transcript for video_id {}: {}" ,
194+ video. id. video_id, e
195+ ) )
196+ } ) ;
197+
198+ match transcripts {
199+ Ok ( transcripts) => {
200+ for transcript in transcripts {
201+ let create_chunk_data = ChunkReqPayload {
202+ chunk_html : Some ( transcript. text ) ,
203+ semantic_content : None ,
204+ link : Some ( format ! (
205+ "https://www.youtube.com/watch?v={}&t={}" ,
206+ video. id. video_id,
207+ transcript. start. as_secs( )
208+ ) ) ,
209+ tag_set : None ,
210+ metadata : Some ( json ! ( {
211+ "heading" : video. snippet. title. clone( ) ,
212+ "title" : video. snippet. title. clone( ) ,
213+ "url" : format!( "https://www.youtube.com/watch?v={}" , video. id. video_id) ,
214+ "hierarchy" : video. snippet. title. clone( ) ,
215+ "description" : video. snippet. description. clone( ) ,
216+ "yt_preview_src" : video. snippet. thumbnails. high. url. clone( ) ,
217+ } ) ) ,
218+ group_ids : Some ( vec ! [ chunk_group. id] ) ,
219+ group_tracking_ids : None ,
220+ location : None ,
221+ tracking_id : None ,
222+ upsert_by_tracking_id : None ,
223+ time_stamp : Some ( video. snippet . publish_time . clone ( ) ) ,
224+ weight : None ,
225+ split_avg : None ,
226+ convert_html_to_text : None ,
227+ image_urls : Some ( vec ! [ video. snippet. thumbnails. high. url. clone( ) ] ) ,
228+ num_value : None ,
229+ fulltext_boost : None ,
230+ semantic_boost : None ,
231+ } ;
232+
233+ chunks. push ( create_chunk_data) ;
234+ }
235+
236+ log:: info!(
237+ "Sending {} chunks from transcript of video {}" ,
238+ chunks. len( ) ,
239+ video. id. video_id
240+ ) ;
241+
242+ send_chunks (
243+ dataset_org_plan_sub. clone ( ) ,
244+ chunks,
245+ video. id . video_id . clone ( ) ,
246+ pool. clone ( ) ,
247+ redis_conn. clone ( ) ,
248+ event_queue. clone ( ) ,
249+ )
250+ . await ?;
251+ }
252+ Err ( e) => {
253+ log:: error!( "Failed to get transcript for video {}" , e) ;
254+ }
233255 }
234-
235- send_chunks (
236- dataset_org_plan_sub. clone ( ) ,
237- chunks,
238- video. id . video_id . clone ( ) ,
239- pool. clone ( ) ,
240- redis_conn. clone ( ) ,
241- event_queue. clone ( ) ,
242- )
243- . await ?;
244256 }
245257
246258 Ok ( ( ) )
0 commit comments