Skip to content

Commit 88e2355

Browse files
committed
voice mode
1 parent b0684b3 commit 88e2355

File tree

5 files changed

+337
-19
lines changed

5 files changed

+337
-19
lines changed

apps/mobile/app.json

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
},
1717
"ios": {
1818
"supportsTablet": true,
19-
"bundleIdentifier": "com.posthog.mobile"
19+
"bundleIdentifier": "com.posthog.mobile",
20+
"infoPlist": {
21+
"NSMicrophoneUsageDescription": "Allow PostHog to use your microphone for voice-to-text input"
22+
}
2023
},
2124
"android": {
2225
"adaptiveIcon": {
@@ -25,13 +28,20 @@
2528
},
2629
"edgeToEdgeEnabled": true,
2730
"predictiveBackGestureEnabled": false,
28-
"package": "com.posthog.mobile"
31+
"package": "com.posthog.mobile",
32+
"permissions": ["android.permission.RECORD_AUDIO"]
2933
},
3034
"web": {
3135
"favicon": "./assets/favicon.png"
3236
},
3337
"plugins": [
3438
"expo-router",
39+
[
40+
"expo-av",
41+
{
42+
"microphonePermission": "Allow PostHog to use your microphone for voice-to-text input"
43+
}
44+
],
3545
[
3646
"expo-font",
3747
{

apps/mobile/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
"dependencies": {
1616
"@expo/ui": "0.2.0-beta.9",
1717
"@react-native-async-storage/async-storage": "^2.2.0",
18+
"expo-av": "~16.0.8",
19+
"expo-file-system": "~19.0.20",
20+
"openai": "^4.77.0",
1821
"@tanstack/react-query": "^5.90.12",
1922
"date-fns": "^4.1.0",
2023
"expo": "~54.0.27",

apps/mobile/src/components/ChatInput.tsx

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,16 @@
11
import { Circle, Host, TextField, type TextFieldRef } from "@expo/ui/swift-ui";
22
import { clipped, glassEffect, padding } from "@expo/ui/swift-ui/modifiers";
3-
import { ArrowUp, Microphone } from "phosphor-react-native";
3+
import { ArrowUp, Microphone, Stop } from "phosphor-react-native";
44
import { useRef, useState } from "react";
5-
import { Platform, TextInput, TouchableOpacity, View } from "react-native";
5+
import {
6+
ActivityIndicator,
7+
Platform,
8+
TextInput,
9+
TouchableOpacity,
10+
View,
11+
} from "react-native";
612
import { useSafeAreaInsets } from "react-native-safe-area-context";
13+
import { useVoiceRecording } from "../hooks/useVoiceRecording";
714

815
interface ChatInputProps {
916
onSend: (message: string) => void;
@@ -19,6 +26,11 @@ export function ChatInput({
1926
const insets = useSafeAreaInsets();
2027
const [message, setMessage] = useState("");
2128
const textFieldRef = useRef<TextFieldRef>(null);
29+
const { status, startRecording, stopRecording, cancelRecording } =
30+
useVoiceRecording();
31+
32+
const isRecording = status === "recording";
33+
const isTranscribing = status === "transcribing";
2234

2335
const handleSend = () => {
2436
const trimmed = message.trim();
@@ -28,7 +40,27 @@ export function ChatInput({
2840
textFieldRef.current?.setText("");
2941
};
3042

31-
const canSend = message.trim().length > 0 && !disabled;
43+
const handleMicPress = async () => {
44+
if (isRecording) {
45+
const transcript = await stopRecording();
46+
if (transcript) {
47+
setMessage((prev) => (prev ? `${prev} ${transcript}` : transcript));
48+
textFieldRef.current?.setText(
49+
message ? `${message} ${transcript}` : transcript,
50+
);
51+
}
52+
} else if (!isTranscribing) {
53+
await startRecording();
54+
}
55+
};
56+
57+
const handleMicLongPress = async () => {
58+
if (isRecording) {
59+
await cancelRecording();
60+
}
61+
};
62+
63+
const canSend = message.trim().length > 0 && !disabled && !isRecording;
3264

3365
if (Platform.OS === "ios") {
3466
return (
@@ -64,8 +96,10 @@ export function ChatInput({
6496

6597
{/* Mic / Send button */}
6698
<TouchableOpacity
67-
onPress={canSend ? handleSend : undefined}
99+
onPress={canSend ? handleSend : handleMicPress}
100+
onLongPress={handleMicLongPress}
68101
activeOpacity={0.7}
102+
disabled={isTranscribing || disabled}
69103
className="h-[34px] w-[34px] items-center justify-center"
70104
>
71105
{/* Glass Background */}
@@ -83,8 +117,12 @@ export function ChatInput({
83117
</View>
84118

85119
{/* Icon */}
86-
{canSend ? (
120+
{isTranscribing ? (
121+
<ActivityIndicator size="small" color="#FFFFFF" />
122+
) : canSend ? (
87123
<ArrowUp size={20} color="#FFFFFF" weight="bold" />
124+
) : isRecording ? (
125+
<Stop size={20} color="#EF4444" weight="fill" />
88126
) : (
89127
<Microphone size={20} color="#FFFFFF" />
90128
)}
@@ -121,12 +159,18 @@ export function ChatInput({
121159

122160
{/* Mic / Send button */}
123161
<TouchableOpacity
124-
onPress={canSend ? handleSend : undefined}
125-
className="h-[34px] w-[34px] items-center justify-center rounded-full bg-white/10"
162+
onPress={canSend ? handleSend : handleMicPress}
163+
onLongPress={handleMicLongPress}
164+
disabled={isTranscribing || disabled}
165+
className={`h-[34px] w-[34px] items-center justify-center rounded-full ${isRecording ? "bg-red-500/20" : "bg-white/10"}`}
126166
activeOpacity={0.7}
127167
>
128-
{canSend ? (
168+
{isTranscribing ? (
169+
<ActivityIndicator size="small" color="#FFFFFF" />
170+
) : canSend ? (
129171
<ArrowUp size={20} color="#FFFFFF" weight="bold" />
172+
) : isRecording ? (
173+
<Stop size={20} color="#EF4444" weight="fill" />
130174
) : (
131175
<Microphone size={20} color="#FFFFFF" />
132176
)}
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import { Audio } from "expo-av";
2+
import { File } from "expo-file-system";
3+
import { useCallback, useRef, useState } from "react";
4+
5+
type RecordingStatus = "idle" | "recording" | "transcribing" | "error";
6+
7+
interface UseVoiceRecordingReturn {
8+
status: RecordingStatus;
9+
error: string | null;
10+
startRecording: () => Promise<void>;
11+
stopRecording: () => Promise<string | null>;
12+
cancelRecording: () => Promise<void>;
13+
}
14+
15+
export function useVoiceRecording(): UseVoiceRecordingReturn {
16+
const [status, setStatus] = useState<RecordingStatus>("idle");
17+
const [error, setError] = useState<string | null>(null);
18+
const recordingRef = useRef<Audio.Recording | null>(null);
19+
20+
const startRecording = useCallback(async () => {
21+
try {
22+
setError(null);
23+
24+
// Request permissions
25+
const { granted } = await Audio.requestPermissionsAsync();
26+
if (!granted) {
27+
setError("Microphone permission is required");
28+
setStatus("error");
29+
return;
30+
}
31+
32+
// Configure audio mode for recording
33+
await Audio.setAudioModeAsync({
34+
allowsRecordingIOS: true,
35+
playsInSilentModeIOS: true,
36+
});
37+
38+
// Create and start recording
39+
const recording = new Audio.Recording();
40+
await recording.prepareToRecordAsync(
41+
Audio.RecordingOptionsPresets.HIGH_QUALITY,
42+
);
43+
await recording.startAsync();
44+
recordingRef.current = recording;
45+
setStatus("recording");
46+
} catch (err) {
47+
console.error("Failed to start recording:", err);
48+
setError("Failed to start recording");
49+
setStatus("error");
50+
}
51+
}, []);
52+
53+
const stopRecording = useCallback(async (): Promise<string | null> => {
54+
if (!recordingRef.current) {
55+
return null;
56+
}
57+
58+
try {
59+
setStatus("transcribing");
60+
61+
// Stop recording and get URI
62+
await recordingRef.current.stopAndUnloadAsync();
63+
const uri = recordingRef.current.getURI();
64+
recordingRef.current = null;
65+
66+
// Reset audio mode
67+
await Audio.setAudioModeAsync({
68+
allowsRecordingIOS: false,
69+
});
70+
71+
if (!uri) {
72+
setError("No recording found");
73+
setStatus("error");
74+
return null;
75+
}
76+
77+
const openaiApiKey = process.env.EXPO_PUBLIC_OPENAI_API_KEY;
78+
if (!openaiApiKey) {
79+
setError("EXPO_PUBLIC_OPENAI_API_KEY not set");
80+
setStatus("error");
81+
return null;
82+
}
83+
84+
// Create form data with the recording file
85+
const formData = new FormData();
86+
formData.append("file", {
87+
uri,
88+
type: "audio/m4a",
89+
name: "recording.m4a",
90+
} as unknown as Blob);
91+
formData.append("model", "gpt-4o-transcribe");
92+
93+
// Call OpenAI transcription API
94+
const response = await fetch(
95+
"https://api.openai.com/v1/audio/transcriptions",
96+
{
97+
method: "POST",
98+
headers: {
99+
Authorization: `Bearer ${openaiApiKey}`,
100+
},
101+
body: formData,
102+
},
103+
);
104+
105+
// Clean up the temp file
106+
const recordingFile = new File(uri);
107+
if (recordingFile.exists) {
108+
await recordingFile.delete();
109+
}
110+
111+
if (!response.ok) {
112+
const errorData = await response.text();
113+
throw new Error(`Transcription failed: ${errorData}`);
114+
}
115+
116+
const data = await response.json();
117+
setStatus("idle");
118+
return data.text;
119+
} catch (err) {
120+
console.error("Failed to transcribe:", err);
121+
const errorMessage =
122+
err instanceof Error ? err.message : "Transcription failed";
123+
setError(errorMessage);
124+
setStatus("error");
125+
return null;
126+
}
127+
}, []);
128+
129+
const cancelRecording = useCallback(async () => {
130+
if (recordingRef.current) {
131+
try {
132+
await recordingRef.current.stopAndUnloadAsync();
133+
const uri = recordingRef.current.getURI();
134+
if (uri) {
135+
const file = new File(uri);
136+
if (file.exists) {
137+
await file.delete();
138+
}
139+
}
140+
} catch {
141+
// Ignore cleanup errors
142+
}
143+
recordingRef.current = null;
144+
}
145+
146+
await Audio.setAudioModeAsync({
147+
allowsRecordingIOS: false,
148+
});
149+
150+
setStatus("idle");
151+
setError(null);
152+
}, []);
153+
154+
return {
155+
status,
156+
error,
157+
startRecording,
158+
stopRecording,
159+
cancelRecording,
160+
};
161+
}

0 commit comments

Comments
 (0)