Skip to content

Commit eebdc83

Browse files
committed
fix word level timestamps not received by frontend
1 parent a361465 commit eebdc83

File tree

2 files changed

+120
-82
lines changed

2 files changed

+120
-82
lines changed

internal/transcription/unified_service.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,7 @@ func (u *UnifiedTranscriptionService) convertTranscriptResultToJSON(result *inte
773773
Word string `json:"word"`
774774
Score float64 `json:"score"`
775775
Speaker *string `json:"speaker,omitempty"`
776-
} `json:"words,omitempty"`
776+
} `json:"word_segments,omitempty"`
777777
Language string `json:"language"`
778778
Text string `json:"text"`
779779
}{

web/frontend/src/components/AudioDetailView.tsx

Lines changed: 119 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { useState, useEffect, useRef, memo } from "react";
1+
import { useState, useEffect, useRef, memo, useCallback } from "react";
22
import { createPortal } from "react-dom";
33
import { ArrowLeft, Play, Pause, List, AlignLeft, MessageCircle, Download, FileText, FileJson, FileImage, Check, StickyNote, Plus, X, Sparkles, Pencil, ChevronUp, ChevronDown, Info, Clock, Settings, Users, Loader2 } from "lucide-react";
44
import WaveSurfer from "wavesurfer.js";
@@ -176,6 +176,15 @@ export const AudioDetailView = memo(function AudioDetailView({ audioId }: AudioD
176176
const { getAuthHeaders } = useAuth();
177177
const [audioFile, setAudioFile] = useState<AudioFile | null>(null);
178178
const [transcript, setTranscript] = useState<Transcript | null>(null);
179+
180+
// Debug transcript changes
181+
useEffect(() => {
182+
console.log("[DEBUG] *** TRANSCRIPT STATE CHANGED ***");
183+
console.log("[DEBUG] transcript:", transcript);
184+
console.log("[DEBUG] has word_segments:", !!transcript?.word_segments);
185+
console.log("[DEBUG] word_segments length:", transcript?.word_segments?.length);
186+
console.log("[DEBUG] transcript.text length:", transcript?.text?.length);
187+
}, [transcript]);
179188
const [loading, setLoading] = useState(true);
180189
const [isPlaying, setIsPlaying] = useState(false);
181190
const [transcriptMode, setTranscriptMode] = useState<"compact" | "expanded">(
@@ -367,6 +376,91 @@ useEffect(() => {
367376

368377
// Former floating-controls visibility logic removed: controls are always fixed.
369378

379+
const initializeWaveSurfer = useCallback(async () => {
380+
if (!waveformRef.current || !audioFile) return;
381+
382+
try {
383+
// First, try to load the audio file manually to check if it's accessible
384+
const audioUrl = `/api/v1/transcription/${audioId}/audio`;
385+
386+
const response = await fetch(audioUrl, {
387+
headers: {
388+
...getAuthHeaders(),
389+
},
390+
});
391+
392+
if (!response.ok) {
393+
console.error(
394+
"Audio file request failed:",
395+
response.status,
396+
response.statusText,
397+
);
398+
const errorText = await response.text();
399+
console.error("Error response body:", errorText);
400+
return;
401+
}
402+
403+
404+
// Theme-aware colors
405+
const isDark = theme === "dark";
406+
const waveColor = isDark ? "#4b5563" : "#d1d5db"; // dark: gray-600, light: gray-300
407+
const progressColor = "#3b82f6"; // blue-500 for both themes
408+
409+
// Create WaveSurfer instance
410+
wavesurferRef.current = WaveSurfer.create({
411+
container: waveformRef.current,
412+
waveColor,
413+
progressColor,
414+
barWidth: 2,
415+
barGap: 1,
416+
barRadius: 2,
417+
height: 80,
418+
normalize: true,
419+
backend: "WebAudio",
420+
});
421+
422+
// Load the audio blob
423+
const audioBlob = await response.blob();
424+
const audioObjectURL = URL.createObjectURL(audioBlob);
425+
426+
await wavesurferRef.current.load(audioObjectURL);
427+
428+
429+
wavesurferRef.current.on("error", (error) => {
430+
console.error("WaveSurfer error:", error);
431+
});
432+
433+
wavesurferRef.current.on("play", () => {
434+
setIsPlaying(true);
435+
});
436+
437+
wavesurferRef.current.on("pause", () => {
438+
setIsPlaying(false);
439+
});
440+
441+
wavesurferRef.current.on("finish", () => {
442+
setIsPlaying(false);
443+
setCurrentWordIndex(null);
444+
});
445+
446+
// Add time update listener for word highlighting
447+
wavesurferRef.current.on("audioprocess", (time) => {
448+
setCurrentTime(time);
449+
});
450+
451+
// Add ready event listener for immediate time updates when seeking
452+
wavesurferRef.current.on("ready", () => {
453+
// Set up additional event listeners after WaveSurfer is ready
454+
wavesurferRef.current?.on("interaction", () => {
455+
const currentTime = wavesurferRef.current?.getCurrentTime() || 0;
456+
setCurrentTime(currentTime);
457+
});
458+
});
459+
} catch (error) {
460+
console.error("Failed to initialize WaveSurfer:", error);
461+
}
462+
}, [audioId, audioFile, theme, getAuthHeaders]);
463+
370464
// Initialize WaveSurfer when audioFile is available - with proper DOM timing
371465
useEffect(() => {
372466
if (!audioFile) {
@@ -394,7 +488,7 @@ useEffect(() => {
394488
wavesurferRef.current = null;
395489
}
396490
};
397-
}, [audioFile?.id, audioFile?.audio_path]);
491+
}, [audioFile?.id, audioFile?.audio_path, initializeWaveSurfer]);
398492

399493
// Update current word index based on audio time
400494
useEffect(() => {
@@ -425,7 +519,7 @@ useEffect(() => {
425519
if (!isPlaying && currentTime === 0) {
426520
setCurrentWordIndex(null);
427521
}
428-
}, [currentTime, transcript?.word_segments, isPlaying, currentWordIndex]);
522+
}, [currentTime, transcript?.word_segments, isPlaying]);
429523

430524
// Auto-scroll to highlighted word
431525
useEffect(() => {
@@ -452,6 +546,7 @@ useEffect(() => {
452546
}, [currentWordIndex]);
453547

454548
const fetchTranscriptOnly = async () => {
549+
console.log("[DEBUG] *** fetchTranscriptOnly CALLED ***");
455550
try {
456551
const transcriptResponse = await fetch(
457552
`/api/v1/transcription/${audioId}/transcript`,
@@ -464,22 +559,29 @@ useEffect(() => {
464559

465560
if (transcriptResponse.ok) {
466561
const transcriptData = await transcriptResponse.json();
562+
console.log("[DEBUG] fetchTranscriptOnly - transcriptData:", transcriptData);
467563

468564
// The API returns transcript data in a nested structure
469565
if (transcriptData.transcript) {
566+
console.log("[DEBUG] transcript has word_segments:", !!transcriptData.transcript.word_segments);
567+
console.log("[DEBUG] word_segments length:", transcriptData.transcript.word_segments?.length);
470568
// Check if transcript has segments or text
471569
if (typeof transcriptData.transcript === "string") {
570+
console.log("[DEBUG] Setting transcript as STRING");
472571
setTranscript({ text: transcriptData.transcript });
473572
} else if (transcriptData.transcript.text) {
573+
console.log("[DEBUG] Setting transcript with TEXT and word_segments");
474574
setTranscript({
475575
text: transcriptData.transcript.text,
476576
segments: transcriptData.transcript.segments,
477577
word_segments: transcriptData.transcript.word_segments,
478578
});
479579
} else if (transcriptData.transcript.segments) {
580+
console.log("[DEBUG] Setting transcript with SEGMENTS and word_segments");
480581
setTranscript({
481582
text: "",
482583
segments: transcriptData.transcript.segments,
584+
word_segments: transcriptData.transcript.word_segments,
483585
});
484586
}
485587
}
@@ -543,19 +645,26 @@ useEffect(() => {
543645

544646
if (transcriptResponse.ok) {
545647
const transcriptData = await transcriptResponse.json();
648+
console.log("[DEBUG] *** fetchAudioDetails TRANSCRIPT LOADING ***");
649+
console.log("[DEBUG] initial transcriptData:", transcriptData);
546650

547651
// The API returns transcript data in a nested structure
548652
if (transcriptData.transcript) {
653+
console.log("[DEBUG] initial transcript has word_segments:", !!transcriptData.transcript.word_segments);
654+
console.log("[DEBUG] initial word_segments length:", transcriptData.transcript.word_segments?.length);
549655
// Check if transcript has segments or text
550656
if (typeof transcriptData.transcript === "string") {
657+
console.log("[DEBUG] INITIAL: Setting transcript as STRING");
551658
setTranscript({ text: transcriptData.transcript });
552659
} else if (transcriptData.transcript.text) {
660+
console.log("[DEBUG] INITIAL: Setting transcript with TEXT and word_segments");
553661
setTranscript({
554662
text: transcriptData.transcript.text,
555663
segments: transcriptData.transcript.segments,
556664
word_segments: transcriptData.transcript.word_segments,
557665
});
558666
} else if (transcriptData.transcript.segments) {
667+
console.log("[DEBUG] INITIAL: Setting transcript with SEGMENTS only");
559668
// If only segments, combine them into text
560669
const fullText = transcriptData.transcript.segments
561670
.map((segment: any) => segment.text)
@@ -618,82 +727,6 @@ useEffect(() => {
618727
fetchExecutionData();
619728
};
620729

621-
const initializeWaveSurfer = async () => {
622-
if (!waveformRef.current || !audioFile) return;
623-
624-
try {
625-
// First, try to load the audio file manually to check if it's accessible
626-
const audioUrl = `/api/v1/transcription/${audioId}/audio`;
627-
628-
const response = await fetch(audioUrl, {
629-
headers: {
630-
...getAuthHeaders(),
631-
},
632-
});
633-
634-
if (!response.ok) {
635-
console.error(
636-
"Audio file request failed:",
637-
response.status,
638-
response.statusText,
639-
);
640-
const errorText = await response.text();
641-
console.error("Error response body:", errorText);
642-
return;
643-
}
644-
645-
646-
// Theme-aware colors
647-
const isDark = theme === "dark";
648-
const waveColor = isDark ? "#4b5563" : "#d1d5db"; // dark: gray-600, light: gray-300
649-
const progressColor = "#3b82f6"; // blue-500 for both themes
650-
651-
// Create WaveSurfer instance
652-
wavesurferRef.current = WaveSurfer.create({
653-
container: waveformRef.current,
654-
waveColor,
655-
progressColor,
656-
barWidth: 2,
657-
barGap: 1,
658-
barRadius: 2,
659-
height: 80,
660-
normalize: true,
661-
backend: "WebAudio",
662-
});
663-
664-
// Load the audio blob
665-
const audioBlob = await response.blob();
666-
const audioObjectURL = URL.createObjectURL(audioBlob);
667-
668-
await wavesurferRef.current.load(audioObjectURL);
669-
670-
671-
wavesurferRef.current.on("error", (error) => {
672-
console.error("WaveSurfer error:", error);
673-
});
674-
675-
wavesurferRef.current.on("play", () => {
676-
setIsPlaying(true);
677-
});
678-
679-
wavesurferRef.current.on("pause", () => {
680-
setIsPlaying(false);
681-
});
682-
683-
wavesurferRef.current.on("finish", () => {
684-
setIsPlaying(false);
685-
setCurrentWordIndex(null);
686-
});
687-
688-
// Add time update listener for word highlighting
689-
wavesurferRef.current.on("audioprocess", (time) => {
690-
setCurrentTime(time);
691-
});
692-
} catch (error) {
693-
console.error("Failed to initialize WaveSurfer:", error);
694-
}
695-
};
696-
697730
const togglePlayPause = () => {
698731
if (wavesurferRef.current) {
699732
wavesurferRef.current.playPause();
@@ -840,7 +873,12 @@ useEffect(() => {
840873

841874
// Render transcript with word-level highlighting
842875
const renderHighlightedTranscript = () => {
876+
console.log("[DEBUG] renderHighlightedTranscript - transcript:", transcript);
877+
console.log("[DEBUG] has word_segments:", !!transcript?.word_segments);
878+
console.log("[DEBUG] word_segments length:", transcript?.word_segments?.length);
879+
843880
if (!transcript?.word_segments || transcript.word_segments.length === 0) {
881+
console.log("[DEBUG] No word_segments, returning plain text:", transcript?.text?.substring(0, 100) + "...");
844882
return transcript?.text || '';
845883
}
846884

@@ -1665,9 +1703,9 @@ useEffect(() => {
16651703
ref={transcriptRef}
16661704
className="prose prose-gray dark:prose-invert max-w-none relative select-text cursor-text"
16671705
>
1668-
<p className="text-gray-700 dark:text-gray-300 leading-relaxed break-words select-text">
1706+
<div className="text-gray-700 dark:text-gray-300 leading-relaxed break-words select-text">
16691707
{renderHighlightedTranscript()}
1670-
</p>
1708+
</div>
16711709

16721710
{/* Selection bubble and editor moved to portal */}
16731711
</div>

0 commit comments

Comments
 (0)