Skip to content

Commit 28a93b0

Browse files
cdxkerskeptrunedev
authored andcommitted
feature: don't quit video worker job if a single video fails
1 parent 7beea8d commit 28a93b0

File tree

1 file changed

+64
-52
lines changed

1 file changed

+64
-52
lines changed

server/src/bin/video-worker.rs

Lines changed: 64 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -187,60 +187,72 @@ async fn video_worker(
187187
}
188188
};
189189

190+
log::info!("Getting transcripts for video_id {}", video.id.video_id);
190191
let transcripts = get_transcript(&video.id.video_id).await.map_err(|e| {
191-
log::error!(
192-
"Failed to get transcript for video {}: {}",
193-
video.id.video_id,
194-
e
195-
);
196-
BroccoliError::Job("Failed to get transcript".to_string())
197-
})?;
198-
199-
for transcript in transcripts {
200-
let create_chunk_data = ChunkReqPayload {
201-
chunk_html: Some(transcript.text),
202-
semantic_content: None,
203-
link: Some(format!(
204-
"https://www.youtube.com/watch?v={}&t={}",
205-
video.id.video_id,
206-
transcript.start.as_secs()
207-
)),
208-
tag_set: None,
209-
metadata: Some(json!({
210-
"heading": video.snippet.title.clone(),
211-
"title": video.snippet.title.clone(),
212-
"url": format!("https://www.youtube.com/watch?v={}", video.id.video_id),
213-
"hierarchy": video.snippet.title.clone(),
214-
"description": video.snippet.description.clone(),
215-
"yt_preview_src": video.snippet.thumbnails.high.url.clone(),
216-
})),
217-
group_ids: Some(vec![chunk_group.id]),
218-
group_tracking_ids: None,
219-
location: None,
220-
tracking_id: None,
221-
upsert_by_tracking_id: None,
222-
time_stamp: Some(video.snippet.publish_time.clone()),
223-
weight: None,
224-
split_avg: None,
225-
convert_html_to_text: None,
226-
image_urls: Some(vec![video.snippet.thumbnails.high.url.clone()]),
227-
num_value: None,
228-
fulltext_boost: None,
229-
semantic_boost: None,
230-
};
231-
232-
chunks.push(create_chunk_data);
192+
BroccoliError::Job(format!(
193+
"Failed to get transcript for video_id {}: {}",
194+
video.id.video_id, e
195+
))
196+
});
197+
198+
match transcripts {
199+
Ok(transcripts) => {
200+
for transcript in transcripts {
201+
let create_chunk_data = ChunkReqPayload {
202+
chunk_html: Some(transcript.text),
203+
semantic_content: None,
204+
link: Some(format!(
205+
"https://www.youtube.com/watch?v={}&t={}",
206+
video.id.video_id,
207+
transcript.start.as_secs()
208+
)),
209+
tag_set: None,
210+
metadata: Some(json!({
211+
"heading": video.snippet.title.clone(),
212+
"title": video.snippet.title.clone(),
213+
"url": format!("https://www.youtube.com/watch?v={}", video.id.video_id),
214+
"hierarchy": video.snippet.title.clone(),
215+
"description": video.snippet.description.clone(),
216+
"yt_preview_src": video.snippet.thumbnails.high.url.clone(),
217+
})),
218+
group_ids: Some(vec![chunk_group.id]),
219+
group_tracking_ids: None,
220+
location: None,
221+
tracking_id: None,
222+
upsert_by_tracking_id: None,
223+
time_stamp: Some(video.snippet.publish_time.clone()),
224+
weight: None,
225+
split_avg: None,
226+
convert_html_to_text: None,
227+
image_urls: Some(vec![video.snippet.thumbnails.high.url.clone()]),
228+
num_value: None,
229+
fulltext_boost: None,
230+
semantic_boost: None,
231+
};
232+
233+
chunks.push(create_chunk_data);
234+
}
235+
236+
log::info!(
237+
"Sending {} chunks from transcript of video {}",
238+
chunks.len(),
239+
video.id.video_id
240+
);
241+
242+
send_chunks(
243+
dataset_org_plan_sub.clone(),
244+
chunks,
245+
video.id.video_id.clone(),
246+
pool.clone(),
247+
redis_conn.clone(),
248+
event_queue.clone(),
249+
)
250+
.await?;
251+
}
252+
Err(e) => {
253+
log::error!("Failed to get transcript for video {}", e);
254+
}
233255
}
234-
235-
send_chunks(
236-
dataset_org_plan_sub.clone(),
237-
chunks,
238-
video.id.video_id.clone(),
239-
pool.clone(),
240-
redis_conn.clone(),
241-
event_queue.clone(),
242-
)
243-
.await?;
244256
}
245257

246258
Ok(())

0 commit comments

Comments
 (0)