voice mode

skoob13 · skoob13 · commit 88e2355c6d67 · 2025-12-10T16:04:45.000+01:00
diff --git a/apps/mobile/app.json b/apps/mobile/app.json
@@ -16,7 +16,10 @@
     },
     "ios": {
       "supportsTablet": true,
-      "bundleIdentifier": "com.posthog.mobile"
+      "bundleIdentifier": "com.posthog.mobile",
+      "infoPlist": {
+        "NSMicrophoneUsageDescription": "Allow PostHog to use your microphone for voice-to-text input"
+      }
     },
     "android": {
       "adaptiveIcon": {
@@ -25,13 +28,20 @@
       },
       "edgeToEdgeEnabled": true,
       "predictiveBackGestureEnabled": false,
-      "package": "com.posthog.mobile"
+      "package": "com.posthog.mobile",
+      "permissions": ["android.permission.RECORD_AUDIO"]
     },
     "web": {
       "favicon": "./assets/favicon.png"
     },
     "plugins": [
       "expo-router",
+      [
+        "expo-av",
+        {
+          "microphonePermission": "Allow PostHog to use your microphone for voice-to-text input"
+        }
+      ],
       [
         "expo-font",
         {
diff --git a/apps/mobile/package.json b/apps/mobile/package.json
@@ -15,6 +15,9 @@
   "dependencies": {
     "@expo/ui": "0.2.0-beta.9",
     "@react-native-async-storage/async-storage": "^2.2.0",
+    "expo-av": "~16.0.8",
+    "expo-file-system": "~19.0.20",
+    "openai": "^4.77.0",
     "@tanstack/react-query": "^5.90.12",
     "date-fns": "^4.1.0",
     "expo": "~54.0.27",
diff --git a/apps/mobile/src/components/ChatInput.tsx b/apps/mobile/src/components/ChatInput.tsx
@@ -1,9 +1,16 @@
 import { Circle, Host, TextField, type TextFieldRef } from "@expo/ui/swift-ui";
 import { clipped, glassEffect, padding } from "@expo/ui/swift-ui/modifiers";
-import { ArrowUp, Microphone } from "phosphor-react-native";
+import { ArrowUp, Microphone, Stop } from "phosphor-react-native";
 import { useRef, useState } from "react";
-import { Platform, TextInput, TouchableOpacity, View } from "react-native";
+import {
+  ActivityIndicator,
+  Platform,
+  TextInput,
+  TouchableOpacity,
+  View,
+} from "react-native";
 import { useSafeAreaInsets } from "react-native-safe-area-context";
+import { useVoiceRecording } from "../hooks/useVoiceRecording";
 
 interface ChatInputProps {
   onSend: (message: string) => void;
@@ -19,6 +26,11 @@ export function ChatInput({
   const insets = useSafeAreaInsets();
   const [message, setMessage] = useState("");
   const textFieldRef = useRef<TextFieldRef>(null);
+  const { status, startRecording, stopRecording, cancelRecording } =
+    useVoiceRecording();
+
+  const isRecording = status === "recording";
+  const isTranscribing = status === "transcribing";
 
   const handleSend = () => {
     const trimmed = message.trim();
@@ -28,7 +40,27 @@ export function ChatInput({
     textFieldRef.current?.setText("");
   };
 
-  const canSend = message.trim().length > 0 && !disabled;
+  const handleMicPress = async () => {
+    if (isRecording) {
+      const transcript = await stopRecording();
+      if (transcript) {
+        setMessage((prev) => (prev ? `${prev} ${transcript}` : transcript));
+        textFieldRef.current?.setText(
+          message ? `${message} ${transcript}` : transcript,
+        );
+      }
+    } else if (!isTranscribing) {
+      await startRecording();
+    }
+  };
+
+  const handleMicLongPress = async () => {
+    if (isRecording) {
+      await cancelRecording();
+    }
+  };
+
+  const canSend = message.trim().length > 0 && !disabled && !isRecording;
 
   if (Platform.OS === "ios") {
     return (
@@ -64,8 +96,10 @@ export function ChatInput({
 
           {/* Mic / Send button */}
           <TouchableOpacity
-            onPress={canSend ? handleSend : undefined}
+            onPress={canSend ? handleSend : handleMicPress}
+            onLongPress={handleMicLongPress}
             activeOpacity={0.7}
+            disabled={isTranscribing || disabled}
             className="h-[34px] w-[34px] items-center justify-center"
           >
             {/* Glass Background */}
@@ -83,8 +117,12 @@ export function ChatInput({
             </View>
 
             {/* Icon */}
-            {canSend ? (
+            {isTranscribing ? (
+              <ActivityIndicator size="small" color="#FFFFFF" />
+            ) : canSend ? (
               <ArrowUp size={20} color="#FFFFFF" weight="bold" />
+            ) : isRecording ? (
+              <Stop size={20} color="#EF4444" weight="fill" />
             ) : (
               <Microphone size={20} color="#FFFFFF" />
             )}
@@ -121,12 +159,18 @@ export function ChatInput({
 
         {/* Mic / Send button */}
         <TouchableOpacity
-          onPress={canSend ? handleSend : undefined}
-          className="h-[34px] w-[34px] items-center justify-center rounded-full bg-white/10"
+          onPress={canSend ? handleSend : handleMicPress}
+          onLongPress={handleMicLongPress}
+          disabled={isTranscribing || disabled}
+          className={`h-[34px] w-[34px] items-center justify-center rounded-full ${isRecording ? "bg-red-500/20" : "bg-white/10"}`}
           activeOpacity={0.7}
         >
-          {canSend ? (
+          {isTranscribing ? (
+            <ActivityIndicator size="small" color="#FFFFFF" />
+          ) : canSend ? (
             <ArrowUp size={20} color="#FFFFFF" weight="bold" />
+          ) : isRecording ? (
+            <Stop size={20} color="#EF4444" weight="fill" />
           ) : (
             <Microphone size={20} color="#FFFFFF" />
           )}
diff --git a/apps/mobile/src/hooks/useVoiceRecording.ts b/apps/mobile/src/hooks/useVoiceRecording.ts
@@ -0,0 +1,161 @@
+import { Audio } from "expo-av";
+import { File } from "expo-file-system";
+import { useCallback, useRef, useState } from "react";
+
+type RecordingStatus = "idle" | "recording" | "transcribing" | "error";
+
+interface UseVoiceRecordingReturn {
+  status: RecordingStatus;
+  error: string | null;
+  startRecording: () => Promise<void>;
+  stopRecording: () => Promise<string | null>;
+  cancelRecording: () => Promise<void>;
+}
+
+export function useVoiceRecording(): UseVoiceRecordingReturn {
+  const [status, setStatus] = useState<RecordingStatus>("idle");
+  const [error, setError] = useState<string | null>(null);
+  const recordingRef = useRef<Audio.Recording | null>(null);
+
+  const startRecording = useCallback(async () => {
+    try {
+      setError(null);
+
+      // Request permissions
+      const { granted } = await Audio.requestPermissionsAsync();
+      if (!granted) {
+        setError("Microphone permission is required");
+        setStatus("error");
+        return;
+      }
+
+      // Configure audio mode for recording
+      await Audio.setAudioModeAsync({
+        allowsRecordingIOS: true,
+        playsInSilentModeIOS: true,
+      });
+
+      // Create and start recording
+      const recording = new Audio.Recording();
+      await recording.prepareToRecordAsync(
+        Audio.RecordingOptionsPresets.HIGH_QUALITY,
+      );
+      await recording.startAsync();
+      recordingRef.current = recording;
+      setStatus("recording");
+    } catch (err) {
+      console.error("Failed to start recording:", err);
+      setError("Failed to start recording");
+      setStatus("error");
+    }
+  }, []);
+
+  const stopRecording = useCallback(async (): Promise<string | null> => {
+    if (!recordingRef.current) {
+      return null;
+    }
+
+    try {
+      setStatus("transcribing");
+
+      // Stop recording and get URI
+      await recordingRef.current.stopAndUnloadAsync();
+      const uri = recordingRef.current.getURI();
+      recordingRef.current = null;
+
+      // Reset audio mode
+      await Audio.setAudioModeAsync({
+        allowsRecordingIOS: false,
+      });
+
+      if (!uri) {
+        setError("No recording found");
+        setStatus("error");
+        return null;
+      }
+
+      const openaiApiKey = process.env.EXPO_PUBLIC_OPENAI_API_KEY;
+      if (!openaiApiKey) {
+        setError("EXPO_PUBLIC_OPENAI_API_KEY not set");
+        setStatus("error");
+        return null;
+      }
+
+      // Create form data with the recording file
+      const formData = new FormData();
+      formData.append("file", {
+        uri,
+        type: "audio/m4a",
+        name: "recording.m4a",
+      } as unknown as Blob);
+      formData.append("model", "gpt-4o-transcribe");
+
+      // Call OpenAI transcription API
+      const response = await fetch(
+        "https://api.openai.com/v1/audio/transcriptions",
+        {
+          method: "POST",
+          headers: {
+            Authorization: `Bearer ${openaiApiKey}`,
+          },
+          body: formData,
+        },
+      );
+
+      // Clean up the temp file
+      const recordingFile = new File(uri);
+      if (recordingFile.exists) {
+        await recordingFile.delete();
+      }
+
+      if (!response.ok) {
+        const errorData = await response.text();
+        throw new Error(`Transcription failed: ${errorData}`);
+      }
+
+      const data = await response.json();
+      setStatus("idle");
+      return data.text;
+    } catch (err) {
+      console.error("Failed to transcribe:", err);
+      const errorMessage =
+        err instanceof Error ? err.message : "Transcription failed";
+      setError(errorMessage);
+      setStatus("error");
+      return null;
+    }
+  }, []);
+
+  const cancelRecording = useCallback(async () => {
+    if (recordingRef.current) {
+      try {
+        await recordingRef.current.stopAndUnloadAsync();
+        const uri = recordingRef.current.getURI();
+        if (uri) {
+          const file = new File(uri);
+          if (file.exists) {
+            await file.delete();
+          }
+        }
+      } catch {
+        // Ignore cleanup errors
+      }
+      recordingRef.current = null;
+    }
+
+    await Audio.setAudioModeAsync({
+      allowsRecordingIOS: false,
+    });
+
+    setStatus("idle");
+    setError(null);
+  }, []);
+
+  return {
+    status,
+    error,
+    startRecording,
+    stopRecording,
+    cancelRecording,
+  };
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml