Skip to content

Commit 54392f2

Browse files
committed
exclude rodio from apps/ai build
1 parent d0b4a45 commit 54392f2

File tree

8 files changed

+37
-59
lines changed

8 files changed

+37
-59
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apps/ai/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ RUN mkdir -p apps/ai/src && echo "fn main() {}" > apps/ai/src/main.rs
1313
RUN cargo chef prepare --recipe-path recipe.json
1414

1515
FROM rust:${RUST_VERSION}-bookworm AS build
16-
RUN apt-get update && apt-get install -y pkg-config libasound2-dev && rm -rf /var/lib/apt/lists/*
1716
RUN cargo install cargo-chef sccache --locked
1817
ENV RUSTC_WRAPPER=sccache \
1918
SCCACHE_DIR=/sccache

crates/owhisper-client/Cargo.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@ name = "owhisper-client"
33
version = "0.0.1"
44
edition = "2024"
55

6+
[features]
7+
default = []
8+
argmax = ["hypr-audio-utils"]
9+
610
[dependencies]
7-
hypr-audio = { workspace = true }
8-
hypr-audio-utils = { workspace = true }
11+
hypr-audio-utils = { workspace = true, optional = true }
912
hypr-language = { workspace = true }
1013
hypr-ws-client = { workspace = true }
1114

@@ -28,6 +31,7 @@ tracing = { workspace = true }
2831
url = { workspace = true }
2932

3033
[dev-dependencies]
34+
hypr-audio-utils = { workspace = true }
3135
hypr-data = { workspace = true }
3236

3337
deepgram = { workspace = true, features = ["listen"] }

crates/owhisper-client/src/adapter/argmax/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
#[cfg(feature = "argmax")]
12
mod batch;
23
mod keywords;
34
mod language;
45
mod live;
56

7+
#[cfg(feature = "argmax")]
8+
pub use batch::{StreamingBatchConfig, StreamingBatchEvent, StreamingBatchStream};
9+
610
#[derive(Clone, Default)]
711
pub struct ArgmaxAdapter;
812

crates/owhisper-client/src/adapter/deepgram/batch.rs

Lines changed: 23 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use std::path::{Path, PathBuf};
22

3-
use hypr_audio_utils::{Source, f32_to_i16_bytes, resample_audio, source_from_path};
43
use owhisper_interface::ListenParams;
54
use owhisper_interface::batch::Response as BatchResponse;
65

@@ -26,28 +25,38 @@ impl BatchSttAdapter for DeepgramAdapter {
2625
}
2726
}
2827

28+
fn mime_type_from_extension(path: &Path) -> &'static str {
29+
match path.extension().and_then(|e| e.to_str()) {
30+
Some("mp3") => "audio/mpeg",
31+
Some("mp4") => "audio/mp4",
32+
Some("m4a") => "audio/mp4",
33+
Some("wav") => "audio/wav",
34+
Some("webm") => "audio/webm",
35+
Some("ogg") => "audio/ogg",
36+
Some("flac") => "audio/flac",
37+
_ => "application/octet-stream",
38+
}
39+
}
40+
2941
async fn do_transcribe_file(
3042
client: &ClientWithMiddleware,
3143
api_base: &str,
3244
api_key: &str,
3345
params: &ListenParams,
3446
file_path: PathBuf,
3547
) -> Result<BatchResponse, Error> {
36-
let (audio_data, sample_rate) = decode_audio_to_linear16(file_path).await?;
48+
let audio_data = tokio::fs::read(&file_path)
49+
.await
50+
.map_err(|e| Error::AudioProcessing(format!("failed to read file: {}", e)))?;
3751

38-
let url = {
39-
let mut url = build_batch_url(
40-
api_base,
41-
params,
42-
&DeepgramLanguageStrategy,
43-
&DeepgramKeywordStrategy,
44-
);
45-
url.query_pairs_mut()
46-
.append_pair("sample_rate", &sample_rate.to_string());
47-
url
48-
};
52+
let content_type = mime_type_from_extension(&file_path);
4953

50-
let content_type = format!("audio/raw;encoding=linear16;rate={}", sample_rate);
54+
let url = build_batch_url(
55+
api_base,
56+
params,
57+
&DeepgramLanguageStrategy,
58+
&DeepgramKeywordStrategy,
59+
);
5160

5261
let response = client
5362
.post(url)
@@ -69,45 +78,6 @@ async fn do_transcribe_file(
6978
}
7079
}
7180

72-
async fn decode_audio_to_linear16(path: PathBuf) -> Result<(bytes::Bytes, u32), Error> {
73-
tokio::task::spawn_blocking(move || -> Result<(bytes::Bytes, u32), Error> {
74-
let decoder =
75-
source_from_path(&path).map_err(|err| Error::AudioProcessing(err.to_string()))?;
76-
77-
let channels = decoder.channels().max(1);
78-
let sample_rate = decoder.sample_rate();
79-
80-
let samples = resample_audio(decoder, sample_rate)
81-
.map_err(|err| Error::AudioProcessing(err.to_string()))?;
82-
83-
let samples = if channels == 1 {
84-
samples
85-
} else {
86-
let channels_usize = channels as usize;
87-
let mut mono = Vec::with_capacity(samples.len() / channels_usize);
88-
for frame in samples.chunks(channels_usize) {
89-
if frame.is_empty() {
90-
continue;
91-
}
92-
let sum: f32 = frame.iter().copied().sum();
93-
mono.push(sum / frame.len() as f32);
94-
}
95-
mono
96-
};
97-
98-
if samples.is_empty() {
99-
return Err(Error::AudioProcessing(
100-
"audio file contains no samples".to_string(),
101-
));
102-
}
103-
104-
let bytes = f32_to_i16_bytes(samples.into_iter());
105-
106-
Ok((bytes, sample_rate))
107-
})
108-
.await?
109-
}
110-
11181
#[cfg(test)]
11282
mod tests {
11383
use super::*;

crates/owhisper-client/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ pub use adapter::{
1515
FireworksAdapter, GladiaAdapter, OpenAIAdapter, RealtimeSttAdapter, SonioxAdapter,
1616
append_provider_param, is_local_host,
1717
};
18+
#[cfg(feature = "argmax")]
19+
pub use adapter::{StreamingBatchConfig, StreamingBatchEvent, StreamingBatchStream};
20+
1821
pub use batch::{BatchClient, BatchClientBuilder};
1922
pub use error::Error;
2023
pub use hypr_ws_client;

crates/transcribe-proxy/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ url = { workspace = true }
3232
hypr-audio-utils = { workspace = true }
3333
hypr-data = { workspace = true }
3434
hypr-language = { workspace = true }
35-
owhisper-client = { workspace = true }
3635
owhisper-interface = { workspace = true }
3736

3837
rodio = { workspace = true }

plugins/listener2/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ specta-typescript = { workspace = true }
1717
hypr-audio-utils = { workspace = true }
1818
hypr-language = { workspace = true }
1919

20-
owhisper-client = { workspace = true }
20+
owhisper-client = { workspace = true, features = ["argmax"] }
2121
owhisper-interface = { workspace = true }
2222

2323
tauri = { workspace = true, features = ["specta", "test"] }

0 commit comments

Comments
 (0)