Skip to content

Commit 1d5e77f

Browse files
committed
improve right button handling
1 parent 50c9749 commit 1d5e77f

File tree

2 files changed

+207
-18
lines changed

2 files changed

+207
-18
lines changed

lib/services/ai_service.dart

Lines changed: 89 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -530,35 +530,106 @@ class AIService {
530530
try {
531531
await _showProcessingMessage();
532532

533-
// Transcribe the audio
534-
debugPrint('AIService: Transcribing conversation audio...');
535-
String? transcription;
536-
if (_whisperService != null) {
537-
transcription = await _whisperService!.transcribe(audioData);
538-
} else {
533+
// Ensure whisper service is available
534+
if (_whisperService == null) {
539535
debugPrint('AIService: WhisperService is null, initializing...');
540536
await _initWhisperService();
541-
if (_whisperService != null) {
542-
transcription = await _whisperService!.transcribe(audioData);
543-
}
544537
}
545538

539+
if (_whisperService == null) {
540+
await _showErrorMessage('Could not initialize transcription service');
541+
return;
542+
}
543+
544+
// Use diarization to identify speakers in the conversation
545+
debugPrint('AIService: Transcribing with speaker diarization...');
546+
Map<String, dynamic> result;
547+
try {
548+
result =
549+
await _whisperService!.transcribeWithDiarization(audioData);
550+
} catch (e) {
551+
debugPrint('AIService: Diarization failed, falling back to plain transcription: $e');
552+
final plainText = await _whisperService!.transcribe(audioData);
553+
result = {'text': plainText, 'segments': [], 'language': null};
554+
}
555+
556+
final transcription = result['text'] as String?;
546557
if (transcription == null || transcription.isEmpty) {
547558
debugPrint('AIService: Conversation transcription failed or empty');
548559
await _showErrorMessage('Could not transcribe conversation');
549560
return;
550561
}
551562

552-
debugPrint('AIService: Conversation transcription: $transcription');
563+
// Build speaker-attributed transcription from segments if available
564+
final segments = result['segments'] as List<dynamic>? ?? [];
565+
String formattedTranscription;
566+
if (segments.isNotEmpty &&
567+
segments.any((s) => s is Map && s.containsKey('speaker'))) {
568+
final buffer = StringBuffer();
569+
String? currentSpeaker;
570+
for (final seg in segments) {
571+
if (seg is Map) {
572+
final speaker = seg['speaker'] as String? ?? 'SPEAKER_00';
573+
final text = (seg['text'] as String? ?? '').trim();
574+
if (text.isEmpty) continue;
575+
if (speaker != currentSpeaker) {
576+
if (buffer.isNotEmpty) buffer.writeln();
577+
buffer.write('[$speaker]: ');
578+
currentSpeaker = speaker;
579+
}
580+
buffer.write('$text ');
581+
}
582+
}
583+
formattedTranscription = buffer.toString().trim();
584+
} else {
585+
formattedTranscription = transcription;
586+
}
587+
588+
debugPrint(
589+
'AIService: Diarized transcription: ${formattedTranscription.substring(0, formattedTranscription.length.clamp(0, 200))}');
590+
591+
// Build speaker-aware summary prompt
592+
final hasSpeakers = segments.isNotEmpty &&
593+
segments.any((s) => s is Map && s.containsKey('speaker'));
594+
final speakerSet = <String>{};
595+
if (hasSpeakers) {
596+
for (final seg in segments) {
597+
if (seg is Map && seg['speaker'] != null) {
598+
speakerSet.add(seg['speaker'] as String);
599+
}
600+
}
601+
}
553602

554-
// Wrap transcription in a conversation summary prompt
555-
final prompt = 'The following is a transcription of a recorded '
556-
'conversation. Please:\n'
557-
'1. Summarize the conversation\n'
558-
'2. Extract potentially important notes and highlights\n'
559-
'3. Identify specific goals if mentioned\n'
560-
'4. List any action items\n\n'
561-
'Transcription:\n$transcription';
603+
String prompt;
604+
if (hasSpeakers && speakerSet.length > 1) {
605+
prompt = 'The following is a speaker-diarized transcription of a '
606+
'recorded conversation with ${speakerSet.length} speakers '
607+
'(${speakerSet.join(", ")}). Please provide:\n\n'
608+
'## Summary\n'
609+
'A concise summary of the conversation.\n\n'
610+
'## Key Points\n'
611+
'Important information, decisions, or highlights.\n\n'
612+
'## Action Items\n'
613+
'List action items grouped by speaker. For each item, note:\n'
614+
'- Who is responsible (which speaker)\n'
615+
'- What they need to do\n'
616+
'- Any deadlines or priorities mentioned\n\n'
617+
'## Questions & Follow-ups\n'
618+
'Any unresolved questions or topics that need follow-up.\n\n'
619+
'Transcription:\n$formattedTranscription';
620+
} else {
621+
prompt = 'The following is a transcription of a recorded '
622+
'conversation. Please provide:\n\n'
623+
'## Summary\n'
624+
'A concise summary of what was discussed.\n\n'
625+
'## Key Points\n'
626+
'Important information, decisions, or highlights.\n\n'
627+
'## Action Items\n'
628+
'Any action items or tasks mentioned.\n\n'
629+
'## Questions & Follow-ups\n'
630+
'Any unresolved questions or topics that need follow-up.\n\n'
631+
'Transcription:\n$formattedTranscription';
632+
}
562633

563634
// Send to AGiXT
564635
await _sendMessageToAGiXT(prompt);

lib/services/whisper.dart

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import 'package:speech_to_text/speech_to_text.dart' as stt;
1111
import 'package:flutter_sound/flutter_sound.dart';
1212
import 'package:web_socket_channel/web_socket_channel.dart';
1313

14+
import 'package:http/http.dart' as http;
15+
1416
import 'package:agixt/services/secure_storage_service.dart';
1517
import 'package:agixt/utils/url_security.dart';
1618
import 'package:agixt/models/agixt/auth/auth.dart';
@@ -28,6 +30,18 @@ abstract class WhisperService {
2830

2931
Future<String> transcribe(Uint8List voiceData);
3032

33+
/// Transcribe audio with speaker diarization.
34+
/// Returns a map with 'text' (speaker-attributed), 'segments' (list of
35+
/// segment maps with 'speaker', 'text', 'start', 'end'), and 'language'.
36+
Future<Map<String, dynamic>> transcribeWithDiarization(
37+
Uint8List voiceData, {
38+
int? numSpeakers,
39+
}) async {
40+
// Default implementation falls back to plain transcription
41+
final text = await transcribe(voiceData);
42+
return {'text': text, 'segments': [], 'language': null};
43+
}
44+
3145
// Method for AGiXT AI integration that returns a simulated transcription
3246
Future<String?> getTranscription() async {
3347
try {
@@ -297,6 +311,110 @@ class WhisperRemoteService implements WhisperService {
297311
}
298312
}
299313

314+
/// Build a WAV file from raw PCM data and return the file path
315+
Future<String> _buildWavFile(Uint8List voiceData) async {
316+
final Directory documentDirectory =
317+
await getApplicationDocumentsDirectory();
318+
final String wavPath = '${documentDirectory.path}/${Uuid().v4()}.wav';
319+
320+
final int sampleRate = 16000;
321+
final int numChannels = 1;
322+
final int byteRate = sampleRate * numChannels * 2;
323+
final int blockAlign = numChannels * 2;
324+
final int bitsPerSample = 16;
325+
final int dataSize = voiceData.length;
326+
final int chunkSize = 36 + dataSize;
327+
328+
final List<int> header = [
329+
...ascii.encode('RIFF'),
330+
chunkSize & 0xff, (chunkSize >> 8) & 0xff,
331+
(chunkSize >> 16) & 0xff, (chunkSize >> 24) & 0xff,
332+
...ascii.encode('WAVE'),
333+
...ascii.encode('fmt '),
334+
16, 0, 0, 0,
335+
1, 0,
336+
numChannels, 0,
337+
sampleRate & 0xff, (sampleRate >> 8) & 0xff,
338+
(sampleRate >> 16) & 0xff, (sampleRate >> 24) & 0xff,
339+
byteRate & 0xff, (byteRate >> 8) & 0xff,
340+
(byteRate >> 16) & 0xff, (byteRate >> 24) & 0xff,
341+
blockAlign, 0,
342+
bitsPerSample, 0,
343+
...ascii.encode('data'),
344+
dataSize & 0xff, (dataSize >> 8) & 0xff,
345+
(dataSize >> 16) & 0xff, (dataSize >> 24) & 0xff,
346+
];
347+
header.addAll(voiceData.toList());
348+
349+
final audioFile = File(wavPath);
350+
await audioFile.writeAsBytes(Uint8List.fromList(header));
351+
return wavPath;
352+
}
353+
354+
@override
355+
Future<Map<String, dynamic>> transcribeWithDiarization(
356+
Uint8List voiceData, {
357+
int? numSpeakers,
358+
}) async {
359+
debugPrint(
360+
'Transcribing with diarization: ${voiceData.length} bytes');
361+
await init();
362+
363+
final wavPath = await _buildWavFile(voiceData);
364+
365+
try {
366+
final url = await getBaseURL();
367+
final sanitizedUrl = UrlSecurity.sanitizeBaseUrl(
368+
url!,
369+
allowHttpOnLocalhost: true,
370+
);
371+
final apiKey = await getApiKey();
372+
final model = await getModel() ?? 'whisper-1';
373+
374+
// Use multipart request to pass enable_diarization param
375+
final uri = Uri.parse('$sanitizedUrl/v1/audio/transcriptions');
376+
final request = http.MultipartRequest('POST', uri);
377+
request.headers['Authorization'] = 'Bearer ${apiKey ?? ""}';
378+
request.files.add(await http.MultipartFile.fromPath(
379+
'file',
380+
wavPath,
381+
filename: 'audio.wav',
382+
));
383+
request.fields['model'] = model;
384+
request.fields['enable_diarization'] = 'true';
385+
request.fields['response_format'] = 'verbose_json';
386+
if (numSpeakers != null) {
387+
request.fields['num_speakers'] = numSpeakers.toString();
388+
}
389+
390+
final streamedResponse = await request.send().timeout(
391+
const Duration(seconds: 120),
392+
);
393+
final responseBody = await streamedResponse.stream.bytesToString();
394+
395+
if (streamedResponse.statusCode != 200) {
396+
throw Exception(
397+
'Diarization request failed (${streamedResponse.statusCode}): $responseBody');
398+
}
399+
400+
final result = jsonDecode(responseBody) as Map<String, dynamic>;
401+
debugPrint('Diarization result: ${result['text']?.toString().substring(0, (result['text']?.toString().length ?? 0).clamp(0, 100))}...');
402+
403+
// Clean up
404+
await File(wavPath).delete();
405+
406+
return result;
407+
} catch (e) {
408+
try {
409+
await File(wavPath).delete();
410+
} catch (_) {}
411+
debugPrint('Diarization transcription error: $e');
412+
// Fall back to plain transcription
413+
final text = await transcribe(voiceData);
414+
return {'text': text, 'segments': [], 'language': null};
415+
}
416+
}
417+
300418
@override
301419
Future<String?> getTranscription() async {
302420
// Call the implementation from the abstract class

0 commit comments

Comments
 (0)