Skip to content

Commit a3029ff

Browse files
committed
add speaker to vtt thing
1 parent 7123e89 commit a3029ff

File tree

6 files changed

+199
-209
lines changed

6 files changed

+199
-209
lines changed

apps/desktop/src/components/main/body/sessions/outer-header/overflow/export-transcript.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ export function ExportTranscript({ sessionId }: { sessionId: string }) {
4141
text: row.text as string,
4242
start_ms: row.start_ms as number,
4343
end_ms: row.end_ms as number,
44+
speaker: null,
4445
});
4546
}
4647
}
Lines changed: 103 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -1,196 +1,128 @@
11
// @ts-nocheck
2-
/** tauri-specta globals **/
3-
import { Channel as TAURI_CHANNEL, invoke as TAURI_INVOKE } from "@tauri-apps/api/core";
4-
import * as TAURI_API_EVENT from "@tauri-apps/api/event";
5-
import { type WebviewWindow as __WebviewWindow__ } from "@tauri-apps/api/webviewWindow";
62

73
// This file was generated by [tauri-specta](https://github.com/oscartbeaumont/tauri-specta). Do not edit this file manually.
84

95
/** user-defined commands **/
106

7+
118
export const commands = {
12-
async runBatch(params: BatchParams): Promise<Result<null, string>> {
9+
async runBatch(params: BatchParams) : Promise<Result<null, string>> {
1310
try {
14-
return {
15-
status: "ok",
16-
data: await TAURI_INVOKE("plugin:listener2|run_batch", { params }),
17-
};
18-
} catch (e) {
19-
if (e instanceof Error) throw e;
20-
else return { status: "error", error: e as any };
21-
}
22-
},
23-
async parseSubtitle(path: string): Promise<Result<Subtitle, string>> {
11+
return { status: "ok", data: await TAURI_INVOKE("plugin:listener2|run_batch", { params }) };
12+
} catch (e) {
13+
if(e instanceof Error) throw e;
14+
else return { status: "error", error: e as any };
15+
}
16+
},
17+
async parseSubtitle(path: string) : Promise<Result<Subtitle, string>> {
2418
try {
25-
return {
26-
status: "ok",
27-
data: await TAURI_INVOKE("plugin:listener2|parse_subtitle", { path }),
28-
};
29-
} catch (e) {
30-
if (e instanceof Error) throw e;
31-
else return { status: "error", error: e as any };
32-
}
33-
},
34-
async exportToVtt(sessionId: string, words: VttWord[]): Promise<Result<string, string>> {
19+
return { status: "ok", data: await TAURI_INVOKE("plugin:listener2|parse_subtitle", { path }) };
20+
} catch (e) {
21+
if(e instanceof Error) throw e;
22+
else return { status: "error", error: e as any };
23+
}
24+
},
25+
async exportToVtt(sessionId: string, words: VttWord[]) : Promise<Result<string, string>> {
3526
try {
36-
return {
37-
status: "ok",
38-
data: await TAURI_INVOKE("plugin:listener2|export_to_vtt", {
39-
sessionId,
40-
words,
41-
}),
42-
};
43-
} catch (e) {
44-
if (e instanceof Error) throw e;
45-
else return { status: "error", error: e as any };
46-
}
47-
},
48-
};
27+
return { status: "ok", data: await TAURI_INVOKE("plugin:listener2|export_to_vtt", { sessionId, words }) };
28+
} catch (e) {
29+
if(e instanceof Error) throw e;
30+
else return { status: "error", error: e as any };
31+
}
32+
}
33+
}
4934

5035
/** user-defined events **/
5136

37+
5238
export const events = __makeEvents__<{
53-
batchEvent: BatchEvent;
39+
batchEvent: BatchEvent
5440
}>({
55-
batchEvent: "plugin:listener2:batch-event",
56-
});
41+
batchEvent: "plugin:listener2:batch-event"
42+
})
5743

5844
/** user-defined constants **/
5945

46+
47+
6048
/** user-defined types **/
6149

62-
export type BatchAlternatives = {
63-
transcript: string;
64-
confidence: number;
65-
words?: BatchWord[];
66-
};
67-
export type BatchChannel = { alternatives: BatchAlternatives[] };
68-
export type BatchEvent =
69-
| { type: "batchStarted"; session_id: string }
70-
| { type: "batchResponse"; session_id: string; response: BatchResponse }
71-
| {
72-
type: "batchProgress";
73-
session_id: string;
74-
response: StreamResponse;
75-
percentage: number;
76-
}
77-
| { type: "batchFailed"; session_id: string; error: string };
78-
export type BatchParams = {
79-
session_id: string;
80-
provider: BatchProvider;
81-
file_path: string;
82-
model?: string | null;
83-
base_url: string;
84-
api_key: string;
85-
languages?: string[];
86-
keywords?: string[];
87-
};
88-
export type BatchProvider = "deepgram" | "soniox" | "assemblyai" | "am";
89-
export type BatchResponse = { metadata: JsonValue; results: BatchResults };
90-
export type BatchResults = { channels: BatchChannel[] };
91-
export type BatchWord = {
92-
word: string;
93-
start: number;
94-
end: number;
95-
confidence: number;
96-
speaker: number | null;
97-
punctuated_word: string | null;
98-
};
99-
export type JsonValue =
100-
| null
101-
| boolean
102-
| number
103-
| string
104-
| JsonValue[]
105-
| Partial<{ [key in string]: JsonValue }>;
106-
export type StreamAlternatives = {
107-
transcript: string;
108-
words: StreamWord[];
109-
confidence: number;
110-
languages?: string[];
111-
};
112-
export type StreamChannel = { alternatives: StreamAlternatives[] };
113-
export type StreamExtra = { started_unix_millis: number };
114-
export type StreamMetadata = {
115-
request_id: string;
116-
model_info: StreamModelInfo;
117-
model_uuid: string;
118-
extra?: StreamExtra;
119-
};
120-
export type StreamModelInfo = { name: string; version: string; arch: string };
121-
export type StreamResponse =
122-
| {
123-
type: "Results";
124-
start: number;
125-
duration: number;
126-
is_final: boolean;
127-
speech_final: boolean;
128-
from_finalize: boolean;
129-
channel: StreamChannel;
130-
metadata: StreamMetadata;
131-
channel_index: number[];
132-
}
133-
| {
134-
type: "Metadata";
135-
request_id: string;
136-
created: string;
137-
duration: number;
138-
channels: number;
139-
}
140-
| { type: "SpeechStarted"; channel: number[]; timestamp: number }
141-
| { type: "UtteranceEnd"; channel: number[]; last_word_end: number };
142-
export type StreamWord = {
143-
word: string;
144-
start: number;
145-
end: number;
146-
confidence: number;
147-
speaker: number | null;
148-
punctuated_word: string | null;
149-
language: string | null;
150-
};
151-
export type Subtitle = { tokens: Token[] };
152-
export type Token = { text: string; start_time: number; end_time: number };
153-
export type VttWord = { text: string; start_ms: number; end_ms: number };
50+
export type BatchAlternatives = { transcript: string; confidence: number; words?: BatchWord[] }
51+
export type BatchChannel = { alternatives: BatchAlternatives[] }
52+
export type BatchEvent = { type: "batchStarted"; session_id: string } | { type: "batchResponse"; session_id: string; response: BatchResponse } | { type: "batchProgress"; session_id: string; response: StreamResponse; percentage: number } | { type: "batchFailed"; session_id: string; error: string }
53+
export type BatchParams = { session_id: string; provider: BatchProvider; file_path: string; model?: string | null; base_url: string; api_key: string; languages?: string[]; keywords?: string[] }
54+
export type BatchProvider = "deepgram" | "soniox" | "assemblyai" | "am"
55+
export type BatchResponse = { metadata: JsonValue; results: BatchResults }
56+
export type BatchResults = { channels: BatchChannel[] }
57+
export type BatchWord = { word: string; start: number; end: number; confidence: number; speaker: number | null; punctuated_word: string | null }
58+
export type JsonValue = null | boolean | number | string | JsonValue[] | Partial<{ [key in string]: JsonValue }>
59+
export type StreamAlternatives = { transcript: string; words: StreamWord[]; confidence: number; languages?: string[] }
60+
export type StreamChannel = { alternatives: StreamAlternatives[] }
61+
export type StreamExtra = { started_unix_millis: number }
62+
export type StreamMetadata = { request_id: string; model_info: StreamModelInfo; model_uuid: string; extra?: StreamExtra }
63+
export type StreamModelInfo = { name: string; version: string; arch: string }
64+
export type StreamResponse = { type: "Results"; start: number; duration: number; is_final: boolean; speech_final: boolean; from_finalize: boolean; channel: StreamChannel; metadata: StreamMetadata; channel_index: number[] } | { type: "Metadata"; request_id: string; created: string; duration: number; channels: number } | { type: "SpeechStarted"; channel: number[]; timestamp: number } | { type: "UtteranceEnd"; channel: number[]; last_word_end: number }
65+
export type StreamWord = { word: string; start: number; end: number; confidence: number; speaker: number | null; punctuated_word: string | null; language: string | null }
66+
export type Subtitle = { tokens: Token[] }
67+
export type Token = { text: string; start_time: number; end_time: number; speaker: string | null }
68+
export type VttWord = { text: string; start_ms: number; end_ms: number; speaker: string | null }
69+
70+
/** tauri-specta globals **/
71+
72+
import {
73+
invoke as TAURI_INVOKE,
74+
Channel as TAURI_CHANNEL,
75+
} from "@tauri-apps/api/core";
76+
import * as TAURI_API_EVENT from "@tauri-apps/api/event";
77+
import { type WebviewWindow as __WebviewWindow__ } from "@tauri-apps/api/webviewWindow";
15478

15579
type __EventObj__<T> = {
156-
listen: (cb: TAURI_API_EVENT.EventCallback<T>) => ReturnType<typeof TAURI_API_EVENT.listen<T>>;
157-
once: (cb: TAURI_API_EVENT.EventCallback<T>) => ReturnType<typeof TAURI_API_EVENT.once<T>>;
158-
emit: null extends T
159-
? (payload?: T) => ReturnType<typeof TAURI_API_EVENT.emit>
160-
: (payload: T) => ReturnType<typeof TAURI_API_EVENT.emit>;
80+
listen: (
81+
cb: TAURI_API_EVENT.EventCallback<T>,
82+
) => ReturnType<typeof TAURI_API_EVENT.listen<T>>;
83+
once: (
84+
cb: TAURI_API_EVENT.EventCallback<T>,
85+
) => ReturnType<typeof TAURI_API_EVENT.once<T>>;
86+
emit: null extends T
87+
? (payload?: T) => ReturnType<typeof TAURI_API_EVENT.emit>
88+
: (payload: T) => ReturnType<typeof TAURI_API_EVENT.emit>;
16189
};
16290

163-
export type Result<T, E> = { status: "ok"; data: T } | { status: "error"; error: E };
164-
165-
function __makeEvents__<T extends Record<string, any>>(mappings: Record<keyof T, string>) {
166-
return new Proxy(
167-
{} as unknown as {
168-
[K in keyof T]: __EventObj__<T[K]> & {
169-
(handle: __WebviewWindow__): __EventObj__<T[K]>;
170-
};
171-
},
172-
{
173-
get: (_, event) => {
174-
const name = mappings[event as keyof T];
175-
176-
return new Proxy((() => {}) as any, {
177-
apply: (_, __, [window]: [__WebviewWindow__]) => ({
178-
listen: (arg: any) => window.listen(name, arg),
179-
once: (arg: any) => window.once(name, arg),
180-
emit: (arg: any) => window.emit(name, arg),
181-
}),
182-
get: (_, command: keyof __EventObj__<any>) => {
183-
switch (command) {
184-
case "listen":
185-
return (arg: any) => TAURI_API_EVENT.listen(name, arg);
186-
case "once":
187-
return (arg: any) => TAURI_API_EVENT.once(name, arg);
188-
case "emit":
189-
return (arg: any) => TAURI_API_EVENT.emit(name, arg);
190-
}
191-
},
192-
});
193-
},
194-
},
195-
);
91+
export type Result<T, E> =
92+
| { status: "ok"; data: T }
93+
| { status: "error"; error: E };
94+
95+
function __makeEvents__<T extends Record<string, any>>(
96+
mappings: Record<keyof T, string>,
97+
) {
98+
return new Proxy(
99+
{} as unknown as {
100+
[K in keyof T]: __EventObj__<T[K]> & {
101+
(handle: __WebviewWindow__): __EventObj__<T[K]>;
102+
};
103+
},
104+
{
105+
get: (_, event) => {
106+
const name = mappings[event as keyof T];
107+
108+
return new Proxy((() => {}) as any, {
109+
apply: (_, __, [window]: [__WebviewWindow__]) => ({
110+
listen: (arg: any) => window.listen(name, arg),
111+
once: (arg: any) => window.once(name, arg),
112+
emit: (arg: any) => window.emit(name, arg),
113+
}),
114+
get: (_, command: keyof __EventObj__<any>) => {
115+
switch (command) {
116+
case "listen":
117+
return (arg: any) => TAURI_API_EVENT.listen(name, arg);
118+
case "once":
119+
return (arg: any) => TAURI_API_EVENT.once(name, arg);
120+
case "emit":
121+
return (arg: any) => TAURI_API_EVENT.emit(name, arg);
122+
}
123+
},
124+
});
125+
},
126+
},
127+
);
196128
}

plugins/listener2/src/commands.rs

Lines changed: 3 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
use tauri::Manager;
2-
use tauri_plugin_path2::Path2PluginExt;
3-
41
use crate::{BatchParams, Listener2PluginExt, Subtitle, VttWord};
52

63
#[tauri::command]
@@ -18,12 +15,10 @@ pub async fn run_batch<R: tauri::Runtime>(
1815
#[tauri::command]
1916
#[specta::specta]
2017
pub async fn parse_subtitle<R: tauri::Runtime>(
21-
_app: tauri::AppHandle<R>,
18+
app: tauri::AppHandle<R>,
2219
path: String,
2320
) -> Result<Subtitle, String> {
24-
use aspasia::TimedSubtitleFile;
25-
let sub = TimedSubtitleFile::new(&path).unwrap();
26-
Ok(sub.into())
21+
app.listener2().parse_subtitle(path)
2722
}
2823

2924
#[tauri::command]
@@ -33,35 +28,5 @@ pub async fn export_to_vtt<R: tauri::Runtime>(
3328
session_id: String,
3429
words: Vec<VttWord>,
3530
) -> Result<String, String> {
36-
use aspasia::{Moment, Subtitle, WebVttSubtitle, webvtt::WebVttCue};
37-
38-
let base = app.path2().base().map_err(|e| e.to_string())?;
39-
let session_dir = base.join("sessions").join(&session_id);
40-
41-
std::fs::create_dir_all(&session_dir).map_err(|e| e.to_string())?;
42-
43-
let vtt_path = session_dir.join("transcript.vtt");
44-
45-
let cues: Vec<WebVttCue> = words
46-
.into_iter()
47-
.map(|word| {
48-
let start_i64 = i64::try_from(word.start_ms)
49-
.map_err(|_| format!("start_ms {} exceeds i64::MAX", word.start_ms))?;
50-
let end_i64 = i64::try_from(word.end_ms)
51-
.map_err(|_| format!("end_ms {} exceeds i64::MAX", word.end_ms))?;
52-
53-
Ok(WebVttCue {
54-
identifier: None,
55-
text: word.text,
56-
settings: None,
57-
start: Moment::from(start_i64),
58-
end: Moment::from(end_i64),
59-
})
60-
})
61-
.collect::<Result<_, String>>()?;
62-
63-
let vtt = WebVttSubtitle::builder().cues(cues).build();
64-
vtt.export(&vtt_path).map_err(|e| e.to_string())?;
65-
66-
Ok(vtt_path.to_string_lossy().to_string())
31+
app.listener2().export_to_vtt(session_id, words)
6732
}

0 commit comments

Comments
 (0)