Skip to content

Commit 41606e6

Browse files
authored
Merge pull request #16 from ks6088ts-labs/feature/issue-15_ai-speech-app
add transcription solution
2 parents a307f43 + 1aae46a commit 41606e6

File tree

11 files changed

+220
-70
lines changed

11 files changed

+220
-70
lines changed

.env.sample

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# Basic
2-
SOLUTION_NAME = "azure-ai-services-solutions"
2+
SOLUTION_NAME = "SANDBOX"
33
BACKEND_URL = "http://localhost:8000"

azure_ai_speech.env.sample

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
AZURE_AI_SPEECH_SUBSCRIPTION_KEY="<your-subscription-key>"
2+
AZURE_AI_SPEECH_REGION="japaneast"
3+
AZURE_AI_SPEECH_RECOGNITION_LANGUAGE="ja-JP"

docs/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,12 @@
2727

2828
- [Quickstart: Create a Python function in Azure from the command line](https://learn.microsoft.com/en-us/azure/azure-functions/create-first-function-cli-python?tabs=linux%2Cbash%2Cazure-cli%2Cbrowser)
2929
- [Using FastAPI Framework with Azure Functions](https://learn.microsoft.com/en-us/samples/azure-samples/fastapi-on-azure-functions/fastapi-on-azure-functions/)
30+
31+
### Azure AI Speech Service
32+
33+
- [How to recognize speech > Recognize speech from a microphone](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/how-to-recognize-speech?pivots=programming-language-python#recognize-speech-from-a-microphone)
34+
- [Speech SDK trowing error: Exception with an error code: 0xe (SPXERR_MIC_NOT_AVAILABLE)](https://stackoverflow.com/a/75731356)
35+
- [Failed to initialize platform (azure-c-shared). Error: 2176 #2204](https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues/2204)
36+
- [Quickstart: Install the Speech SDK](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/quickstarts/setup-platform?tabs=linux%2Cubuntu%2Cdotnetcli%2Cdotnet%2Cjre%2Cmaven%2Cnodejs%2Cmac%2Cpypi&pivots=programming-language-python)
37+
- [How to recognize speech > Use continuous recognition](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/how-to-recognize-speech?pivots=programming-language-python#use-continuous-recognition)
38+
- [Task 02 - Perform speech requests with Streamlit (40 minutes)](https://microsoft.github.io/TechExcel-Implementing-automation-practices-using-Azure-OpenAI/docs/04_implement_audio_transcription/0402.html)

frontend/entrypoint.py

Lines changed: 14 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,23 @@
1-
import asyncio
21
import logging
3-
from urllib.parse import urljoin
42

5-
import aiohttp
6-
import streamlit as st
7-
8-
from backend.schemas import azure_openai as azure_openai_schemas
3+
from frontend.solutions import sandbox, transcription
4+
from frontend.solutions.types import SolutionType
95

106
logger = logging.getLogger(__name__)
117

128

13-
async def http_get(url: str) -> dict:
14-
async with aiohttp.ClientSession() as session:
15-
async with session.get(url) as response:
16-
response.raise_for_status()
17-
return await response.json()
18-
19-
20-
async def http_post(url: str, data: dict) -> dict:
21-
async with aiohttp.ClientSession() as session:
22-
async with session.post(
23-
url=url,
24-
json=data,
25-
) as response:
26-
response.raise_for_status()
27-
return await response.json()
28-
29-
309
def start(
31-
solution_name: str,
10+
solution_type: SolutionType,
3211
backend_url: str,
3312
log_level: int,
34-
):
35-
logger.setLevel(log_level)
36-
logger.debug(f"set log level to {log_level}")
37-
38-
st.write(f"Solution name: {solution_name}")
39-
40-
# GET
41-
if st.button("GET"):
42-
logger.info("Fetching data from backend...")
43-
try:
44-
with st.spinner("Calling API..."):
45-
response = asyncio.run(http_get(url=urljoin(base=backend_url, url="")))
46-
st.write(response)
47-
logger.info("Data fetched successfully.")
48-
except Exception as e:
49-
st.write(f"Error: {e}")
50-
logger.error(f"Error: {e}")
51-
52-
st.write("---")
53-
54-
# POST
55-
prompt = st.text_input(
56-
label="Prompt",
57-
value="Hello",
58-
)
59-
if st.button("POST"):
60-
logger.info("Posting data to backend...")
61-
try:
62-
with st.spinner("Calling API..."):
63-
response = asyncio.run(
64-
http_post(
65-
url=urljoin(base=backend_url, url="/azure_openai/chat_completions/"),
66-
data=azure_openai_schemas.ChatCompletionRequest(
67-
content=prompt,
68-
stream=False,
69-
).model_dump(),
70-
)
71-
)
72-
st.write(response)
73-
logger.info("Data posted successfully.")
74-
except Exception as e:
75-
st.write(f"Error: {e}")
76-
logger.error(f"Error: {e}")
13+
) -> None:
14+
if solution_type == SolutionType.TRANSCRIPTION:
15+
return transcription.start(
16+
backend_url=backend_url,
17+
log_level=log_level,
18+
)
19+
if solution_type == SolutionType.SANDBOX:
20+
return sandbox.start(
21+
backend_url=backend_url,
22+
log_level=log_level,
23+
)

frontend/solutions/__init__.py

Whitespace-only changes.

frontend/solutions/sandbox.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import asyncio
2+
import logging
3+
from urllib.parse import urljoin
4+
5+
import aiohttp
6+
import streamlit as st
7+
8+
from backend.schemas import azure_openai as azure_openai_schemas
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
async def http_get(url: str) -> dict:
14+
async with aiohttp.ClientSession() as session:
15+
async with session.get(url) as response:
16+
response.raise_for_status()
17+
return await response.json()
18+
19+
20+
async def http_post(url: str, data: dict) -> dict:
21+
async with aiohttp.ClientSession() as session:
22+
async with session.post(
23+
url=url,
24+
json=data,
25+
) as response:
26+
response.raise_for_status()
27+
return await response.json()
28+
29+
30+
def start(
31+
backend_url: str,
32+
log_level: int,
33+
):
34+
logger.setLevel(log_level)
35+
logger.debug(f"set log level to {log_level}")
36+
37+
st.write("Misc solution")
38+
39+
# GET
40+
if st.button("GET"):
41+
logger.info("Fetching data from backend...")
42+
try:
43+
with st.spinner("Calling API..."):
44+
response = asyncio.run(http_get(url=urljoin(base=backend_url, url="")))
45+
st.write(response)
46+
logger.info("Data fetched successfully.")
47+
except Exception as e:
48+
st.write(f"Error: {e}")
49+
logger.error(f"Error: {e}")
50+
51+
st.write("---")
52+
53+
# POST
54+
prompt = st.text_input(
55+
label="Prompt",
56+
value="Hello",
57+
)
58+
if st.button("POST"):
59+
logger.info("Posting data to backend...")
60+
try:
61+
with st.spinner("Calling API..."):
62+
response = asyncio.run(
63+
http_post(
64+
url=urljoin(base=backend_url, url="/azure_openai/chat_completions/"),
65+
data=azure_openai_schemas.ChatCompletionRequest(
66+
content=prompt,
67+
stream=False,
68+
).model_dump(),
69+
)
70+
)
71+
st.write(response)
72+
logger.info("Data posted successfully.")
73+
except Exception as e:
74+
st.write(f"Error: {e}")
75+
logger.error(f"Error: {e}")
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import logging
2+
import time
3+
from os import getenv
4+
5+
import azure.cognitiveservices.speech as speechsdk
6+
import streamlit as st
7+
from azure.cognitiveservices.speech.speech import SpeechRecognitionEventArgs
8+
from dotenv import load_dotenv
9+
10+
load_dotenv("azure_ai_speech.env")
11+
logger = logging.getLogger(__name__)
12+
done = False
13+
14+
15+
def transcript(
16+
subscription: str,
17+
region: str,
18+
speech_recognition_language: str,
19+
):
20+
speech_recognizer = speechsdk.SpeechRecognizer(
21+
speech_config=speechsdk.SpeechConfig(
22+
subscription=subscription,
23+
region=region,
24+
speech_recognition_language=speech_recognition_language,
25+
),
26+
audio_config=speechsdk.audio.AudioConfig(
27+
use_default_microphone=True,
28+
),
29+
)
30+
31+
def stop_cb(evt: SpeechRecognitionEventArgs):
32+
logger.debug(f"CLOSING on {evt}")
33+
speech_recognizer.stop_continuous_recognition()
34+
35+
def recognized_cb(evt: SpeechRecognitionEventArgs):
36+
logger.debug(f"RECOGNIZED: {evt}")
37+
new_text = evt.result.text.strip()
38+
logger.info(new_text)
39+
# FIXME: App does not show the transcription
40+
41+
speech_recognizer.recognizing.connect(lambda evt: logger.debug(f"RECOGNIZING: {evt}"))
42+
speech_recognizer.recognized.connect(recognized_cb)
43+
speech_recognizer.session_started.connect(lambda evt: logger.debug(f"SESSION STARTED: {evt}"))
44+
speech_recognizer.session_stopped.connect(lambda evt: logger.debug(f"SESSION STOPPED {evt}"))
45+
speech_recognizer.canceled.connect(lambda evt: logger.debug(f"CANCELED {evt}"))
46+
speech_recognizer.session_stopped.connect(stop_cb)
47+
speech_recognizer.canceled.connect(stop_cb)
48+
49+
speech_recognizer.start_continuous_recognition()
50+
51+
global done
52+
53+
if st.button("Stop transcription", key="stop_transcription"):
54+
# FIXME: App does not stop transcription
55+
logger.info("Stop transcription")
56+
speech_recognizer.stop_continuous_recognition()
57+
done = True
58+
59+
while done is False:
60+
time.sleep(0.5)
61+
62+
63+
def start(
64+
backend_url: str,
65+
log_level: int,
66+
):
67+
global done
68+
69+
logger.setLevel(log_level)
70+
logger.debug(f"set log level to {log_level}")
71+
72+
st.write("Transcription")
73+
74+
if st.button("Start transcription", key="start_transcription"):
75+
logger.info("Start transcription...")
76+
done = False
77+
try:
78+
with st.spinner("Transcribing..."):
79+
transcript(
80+
subscription=getenv("AZURE_AI_SPEECH_SUBSCRIPTION_KEY"),
81+
region=getenv("AZURE_AI_SPEECH_REGION"),
82+
speech_recognition_language=getenv("AZURE_AI_SPEECH_RECOGNITION_LANGUAGE"),
83+
)
84+
except Exception as e:
85+
st.write(f"Error: {e}")
86+
logger.error(f"Error: {e}")

frontend/solutions/types.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from enum import Enum
2+
3+
4+
class SolutionType(Enum):
5+
TRANSCRIPTION = "TRANSCRIPTION"
6+
SANDBOX = "SANDBOX"

main.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,17 @@ def frontend(
3939
backend_url: Annotated[str, typer.Option(help="Backend URL")] = os.getenv("BACKEND_URL", "http://localhost:8000/"),
4040
):
4141
from frontend.entrypoint import start
42+
from frontend.solutions.types import SolutionType
43+
44+
# convert solution_name to SolutionType
45+
try:
46+
solution_type = SolutionType(solution_name.upper())
47+
except ValueError:
48+
typer.echo(f"Invalid solution name: {solution_name}", err=True)
49+
raise typer.Exit(code=1)
4250

4351
start(
44-
solution_name=solution_name,
52+
solution_type=solution_type,
4553
backend_url=backend_url,
4654
log_level=log_level,
4755
)

poetry.lock

Lines changed: 16 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)