Skip to content

Commit 54a1ea1

Browse files
committed
Improve
1 parent 0d76d17 commit 54a1ea1

File tree

3 files changed

+44
-18
lines changed

3 files changed

+44
-18
lines changed

src/main.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,10 @@ fn tts_config(config: &Config, provider: &Provider) -> TTSConfig {
200200
}
201201
}
202202

203+
pub(crate) fn audio_format(config: &Config) -> String {
204+
config.audio_format.clone().unwrap_or("mp3".to_string())
205+
}
206+
203207
pub(crate) async fn build(
204208
input: PathBuf,
205209
config: &Config,
@@ -218,10 +222,7 @@ pub(crate) async fn build(
218222
panic!("No slides found in input file: {}", input.display());
219223
}
220224
image::generate_images(&input, out_dir);
221-
let audio_ext = tts_config
222-
.output_format
223-
.clone()
224-
.unwrap_or("mp3".to_string());
225+
let audio_ext = audio_format(config);
225226
let cache = args.cache.unwrap();
226227
audio::generate_audio_files(
227228
&provider,
@@ -236,7 +237,7 @@ pub(crate) async fn build(
236237
let output = "out.mp4";
237238
if release {
238239
let audio_codec = audio_codec.unwrap();
239-
video::combine_video(out_dir, &slides, output, &audio_codec, &audio_ext);
240+
video::combine_video(out_dir, &slides, &config, &provider, output, &audio_codec);
240241
}
241242
slides
242243
}

src/video.rs

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
use crate::audio_format;
12
use crate::path::audio_path;
23
use crate::path::image_path;
34
use crate::slide::Slide;
5+
use crate::Config;
6+
use crate::Provider;
47
use chrono::NaiveTime;
58
use chrono::SubsecRound;
69
use chrono::Timelike;
@@ -143,29 +146,54 @@ fn stream_index(slide: &Slide, stream: Stream) -> usize {
143146
}
144147
}
145148

149+
/// Pause duration for transitions.
150+
///
151+
/// Sentences normally have a pause between them. Without this pause,
152+
/// sentences around slide transitions will be too close to each other.
153+
/// According to Goldman-Eisler (1968), articulatory pauses are typically
154+
/// below 250 ms while hesitation pauses are typically above that.
155+
fn transition_pause(config: &Config, provider: &Provider) -> chrono::Duration {
156+
// Google does not automatically have a pause between audio clips.
157+
if provider == &Provider::Google {
158+
return chrono::Duration::milliseconds(200);
159+
}
160+
if let Some(model) = &config.model {
161+
// Nor does the Zyphra Zonos model.
162+
if model.to_lowercase().contains("zonos") {
163+
return chrono::Duration::milliseconds(200);
164+
}
165+
}
166+
chrono::Duration::milliseconds(0)
167+
}
168+
146169
pub(crate) fn combine_video(
147170
dir: &str,
148171
slides: &Vec<Slide>,
172+
config: &Config,
173+
provider: &Provider,
149174
output: &str,
150175
audio_codec: &str,
151-
audio_ext: &str,
152176
) {
177+
let audio_ext = audio_format(config);
153178
tracing::info!("Combining images and audio into one video...");
154179
let output = Path::new(dir).join(output);
155180
let output_path = output.to_str().unwrap();
156181

157182
let mut cmd = std::process::Command::new("ffmpeg");
158183
cmd.arg("-y");
159-
for slide in slides {
160-
let audio_path = audio_path(dir, slide, audio_ext);
184+
let n = slides.len();
185+
for (i, slide) in slides.iter().enumerate() {
186+
let audio_path = audio_path(dir, slide, &audio_ext);
161187
cmd.arg("-i").arg(&audio_path);
162188
let image_path = image_path(dir, slide);
163-
// Sentences normally have a pause between them. Without this pause,
164-
// sentences around slide transitions will be too close to each other.
165-
// According to Goldman-Eisler (1968), articulatory pauses are typically
166-
// below 250 ms while hesitation pauses are typically above that.
167-
let transition_pause = chrono::Duration::milliseconds(200);
168-
let duration = probe_duration(&audio_path).unwrap() + transition_pause;
189+
let pause = if i < n - 1 {
190+
transition_pause(config, provider)
191+
} else {
192+
// Sometimes the audio is trimmed at the end. Adding a small pause
193+
// to avoid this.
194+
chrono::Duration::milliseconds(500)
195+
};
196+
let duration = probe_duration(&audio_path).unwrap() + pause;
169197
cmd.arg("-loop")
170198
.arg("1")
171199
.arg("-framerate")

src/watch.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::audio_format;
12
use crate::build;
23
use crate::slide::Slide;
34
use crate::Arguments;
@@ -132,10 +133,6 @@ fn timestamp() -> u64 {
132133
.as_secs()
133134
}
134135

135-
fn audio_format(config: &Config) -> String {
136-
config.audio_format.clone().unwrap_or("mp3".to_string())
137-
}
138-
139136
fn move_files_into_public(args: &Arguments, config: &Config, slides: &[Slide]) -> u64 {
140137
let public_path = public_dir(args);
141138
let out_dir = &args.out_dir;

0 commit comments

Comments
 (0)