Skip to content

Commit c5daba5

Browse files
committed
feat: Add ElevenLabs TTS integration and update dependencies
1 parent 8e812b8 commit c5daba5

File tree

4 files changed

+93
-8
lines changed

4 files changed

+93
-8
lines changed

a.eyes/package-lock.json

Lines changed: 29 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

a.eyes/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
"@react-navigation/native": "^6.1.9",
1414
"@react-navigation/native-stack": "^6.9.17",
1515
"axios": "^1.6.2",
16+
"base-64": "^1.0.0",
1617
"dotenv": "^16.5.0",
1718
"expo": "~52.0.0",
19+
"expo-av": "~15.0.2",
1820
"expo-camera": "~16.0.18",
1921
"expo-dev-client": "~5.0.20",
20-
"expo-file-system": "~18.0.11",
22+
"expo-file-system": "~18.0.12",
2123
"expo-image-manipulator": "~13.0.6",
2224
"expo-image-picker": "~16.0.6",
2325
"expo-speech": "~13.0.1",

a.eyes/screens/CameraScreen.js

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,65 @@ import React, { useRef, useState, useEffect } from 'react';
22
import { StyleSheet, Text, View, TouchableOpacity, ActivityIndicator, Alert } from 'react-native';
33
import { CameraView, useCameraPermissions, useMicrophonePermissions } from 'expo-camera';
44
import * as Speech from 'expo-speech';
5+
import { Audio } from 'expo-av';
56
import { MaterialIcons } from '@expo/vector-icons';
67
import { analyzeImage } from '../services/imageRecognitionService';
78
import { saveHistoryEntry, loadHistory } from '../services/storageService';
9+
import * as FileSystem from 'expo-file-system';
10+
import { decode as atob, encode as btoa } from 'base-64';
11+
12+
const ELEVENLABS_API_KEY = '';
13+
const ELEVENLABS_VOICE_ID = '56AoDkrOh6qfVPDXZ7Pt';
14+
15+
function arrayBufferToBase64(buffer) {
16+
let binary = '';
17+
const bytes = new Uint8Array(buffer);
18+
const len = bytes.byteLength;
19+
for (let i = 0; i < len; i++) {
20+
binary += String.fromCharCode(bytes[i]);
21+
}
22+
return btoa(binary);
23+
}
24+
25+
export async function speakWithElevenLabs(text) {
26+
try {
27+
const response = await fetch(
28+
`https://api.elevenlabs.io/v1/text-to-speech/${ELEVENLABS_VOICE_ID}`,
29+
{
30+
method: 'POST',
31+
headers: {
32+
'Content-Type': 'application/json',
33+
'xi-api-key': ELEVENLABS_API_KEY,
34+
},
35+
body: JSON.stringify({
36+
text: text,
37+
voice_settings: {
38+
stability: 0.5,
39+
similarity_boost: 0.75,
40+
}
41+
}),
42+
}
43+
);
44+
45+
if (!response.ok) {
46+
console.error('Error from ElevenLabs:', await response.text());
47+
return;
48+
}
49+
50+
// Convert arrayBuffer to base64
51+
const arrayBuffer = await response.arrayBuffer();
52+
const base64Audio = arrayBufferToBase64(arrayBuffer);
53+
const fileUri = FileSystem.cacheDirectory + `tts-${Date.now()}.mp3`;
54+
await FileSystem.writeAsStringAsync(fileUri, base64Audio, { encoding: FileSystem.EncodingType.Base64 });
55+
56+
const soundObject = new Audio.Sound();
57+
await soundObject.loadAsync({ uri: fileUri });
58+
await soundObject.playAsync();
59+
60+
} catch (error) {
61+
console.error('Error:', error);
62+
}
63+
}
864

965
export default function CameraScreen({ navigate }) {
1066
const cameraRef = useRef(null);
@@ -15,7 +71,7 @@ export default function CameraScreen({ navigate }) {
1571
const [ttsEnabled, setTtsEnabled] = useState(true);
1672
const [autoCapture, setAutoCapture] = useState(false);
1773
const intervalRef = useRef(null);
18-
74+
1975
// Load history count for badge
2076
useEffect(() => {
2177
refreshHistoryCount();
@@ -74,7 +130,7 @@ export default function CameraScreen({ navigate }) {
74130
// TTS wrapper
75131
const speakIfEnabled = (text) => {
76132
if (ttsEnabled) {
77-
Speech.speak(text, { rate: 0.9, pitch: 1.0 });
133+
speakWithElevenLabs(text);
78134
}
79135
};
80136

a.eyes/screens/ChatScreen.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@ import React, { useState, useEffect, useRef } from 'react';
22
import { View, Text, Image, FlatList, TouchableOpacity, TextInput, ActivityIndicator, StyleSheet, KeyboardAvoidingView, Platform } from 'react-native';
33
import { MaterialIcons } from '@expo/vector-icons';
44
import * as Speech from 'expo-speech';
5+
import { Audio } from 'expo-av';
56
import AsyncStorage from '@react-native-async-storage/async-storage';
67
import { chatWithImage } from '../services/chatService';
8+
import { speakWithElevenLabs } from './CameraScreen';
79

810
const CHAT_HISTORY_KEY = 'a.eyes.image_chats';
911

@@ -59,7 +61,7 @@ export default function ChatScreen({ navigate, route }) {
5961
const updatedLog = [...newLog, aiMsg];
6062
setChatLog(updatedLog);
6163
saveChat(updatedLog);
62-
if (ttsEnabled) Speech.speak(aiReply, { rate: 0.9, pitch: 1.0 });
64+
if (ttsEnabled) await speakWithElevenLabs(aiReply); // <-- Use ElevenLabs TTS here
6365
} catch (e) {
6466
const errMsg = { sender: 'ai', text: "Sorry, I couldn't reply due to a network error.", timestamp: new Date().toISOString() };
6567
const updatedLog = [...newLog, errMsg];

0 commit comments

Comments
 (0)