Language portuguese


I would like to send audios speaking in Portuguese, I saw that Deepgram can transcribe it but I've already tried using a different model in main.py and it doesn't work, could you help me?

"""Main file for the Jarvis project"""
import os
from os import PathLike
from time import time
import asyncio
from typing import Union

from dotenv import load_dotenv
import openai
from deepgram import Deepgram
import pygame
from pygame import mixer
import elevenlabs

from record import speech_to_text

# Load API keys
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
DEEPGRAM_API_KEY = "c60a9288752c18057da16e3c894b7ebbefa551ab"
elevenlabs.set_api_key(os.getenv("ELEVENLABS_API_KEY"))

# Initialize APIs
gpt_client = openai.Client(api_key=OPENAI_API_KEY)
deepgram = Deepgram(DEEPGRAM_API_KEY)
# mixer is a pygame module for playing audio
mixer.init()

# Change the context if you want to change Jarvis' personality
context = "Você é Jarvis, assistente humano de Alex. Você é espirituoso e cheio de personalidade. Suas respostas devem ser limitadas a uma ou duas frases curtas."
conversation = {"Conversation": []}
RECORDING_PATH = "audio/recording.wav"


def request_gpt(prompt: str) -> str:
    """
    Send a prompt to the GPT-3 API and return the response.

    Args:
        - state: The current state of the app.
        - prompt: The prompt to send to the API.

    Returns:
        The response from the API.
    """
    response = gpt_client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": f"{prompt}",
            }
        ],
        model="gpt-3.5-turbo",
    )
    return response.choices[0].message.content


async def transcribe(file_name: Union[str, bytes, PathLike[str], PathLike[bytes]], language='pt-BR'):
    """
    Transcribe audio using Deepgram API.

    Args:
        - file_name: The name of the file to transcribe.
        - language: The language to detect and transcribe. Default is 'pt' for Portuguese.

    Returns:
        The response from the API.
    """
    with open(file_name, "rb") as audio:
        source = {"buffer": audio, "mimetype": "audio/wav"}
        params = {'model': 'nova-2-general', 'detect_language': 'true', 'language': language}
        response = await deepgram.transcription.prerecorded(source, parameters=params)
        detected_language = None
        if "alternatives" in response["results"]["channels"][0]:
            detected_language = response["results"]["channels"][0]["alternatives"][0].get("language_code")
        if detected_language is None or detected_language != language:
            params['language'] = language
            async with aiohttp.ClientSession() as session:
                response = await deepgram.transcription.prerecorded(source, parameters=params, session=session)
        return response["results"]["channels"][0]["alternatives"][0]["words"]


def log(log: str):
    """
    Print and write to status.txt
    """
    print(log)
    with open("status.txt", "w") as f:
        f.write(log)


if __name__ == "__main__":
    while True:
        # Record audio
        log("Listening...")
        speech_to_text()
        log("Done listening")

        # Transcribe audio
        current_time = time()
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        words = loop.run_until_complete(transcribe(RECORDING_PATH, language='pt-BR'))
        string_words = " ".join(word_dict.get("word") for word_dict in words if "word" in word_dict)
        with open("conv.txt", "a") as f:
            f.write(f"{string_words}\n")
        transcription_time = time() - current_time
        log(f"Finished transcribing in {transcription_time:.2f} seconds.")

        # Get response from GPT-3
        current_time = time()
        context += f"\nAlex: {string_words}\nJarvis: "
        response = request_gpt(context)
        context += response
        gpt_time = time() - current_time
        log(f"Finished generating response in {gpt_time:.2f} seconds.")

        # Convert response to audio
        current_time = time()
        audio = elevenlabs.generate(
            text=response, voice="Adam", model="eleven_monolingual_v1"
        )
        elevenlabs.save(audio, "audio/response.wav")
        audio_time = time() - current_time
        log(f"Finished generating audio in {audio_time:.2f} seconds.")

        # Play response
        log("Speaking...")
        sound = mixer.Sound("audio/response.wav")
        # Add response as a new line to conv.txt
        with open("conv.txt", "a") as f:
            f.write(f"{response}\n")
        sound.play()
        pygame.time.wait(int(sound.get_length() * 1000))
        print(f"\n --- USER: {string_words}\n --- JARVIS: {response}\n")


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Language portuguese #5

Load API keys

Initialize APIs

mixer is a pygame module for playing audio

Change the context if you want to change Jarvis' personality

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Language portuguese #5

Description

Load API keys

Initialize APIs

mixer is a pygame module for playing audio

Change the context if you want to change Jarvis' personality

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions