Skip to content

Commit 1dc0fde

Browse files
authored
Merge pull request #19 from ks6088ts-labs/copilot/fix-18
Add Speeches service with Azure AI Speech integration for batch transcription
2 parents 5e5d39c + 0aa4912 commit 1dc0fde

File tree

8 files changed

+698
-1
lines changed

8 files changed

+698
-1
lines changed

.env.template

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,7 @@ AZURE_COSMOSDB_CONTAINER_NAME="items"
1313
# Azure Blob Storage
1414
AZURE_BLOB_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=https;AccountName=<YOUR_STORAGE_ACCOUNT>;AccountKey=<YOUR_ACCOUNT_KEY>;EndpointSuffix=core.windows.net"
1515
AZURE_BLOB_STORAGE_CONTAINER_NAME="files"
16+
17+
# Azure AI Speech
18+
AZURE_AI_SPEECH_API_KEY="<YOUR_AZURE_AI_SPEECH_API_KEY>"
19+
AZURE_AI_SPEECH_ENDPOINT="https://<speech-api-name>.cognitiveservices.azure.com/"

docs/index.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,44 @@ uv run python scripts/files.py delete-multiple-files "file1.txt" "file2.jpg" "fi
7373
uv run python scripts/files.py delete-multiple-files "file1.txt" "file2.jpg" "file3.pdf" --force
7474
```
7575

76+
### Speeches Service
77+
78+
```shell
79+
AZURE_BLOB_STORAGE_CONTAINER_SAS_TOKEN="<your_sas_token>"
80+
AZURE_BLOB_STORAGE_CONTAINER_URL="https://<storage_account_name>.blob.core.windows.net/<container_name>"
81+
FILE_NAME="path/to/your/audio/file.wav"
82+
URL="${AZURE_BLOB_STORAGE_CONTAINER_URL}/${FILE_NAME}?${AZURE_BLOB_STORAGE_CONTAINER_SAS_TOKEN}"
83+
84+
# Help
85+
uv run python scripts/speeches.py --help
86+
87+
# Create a new transcription job
88+
uv run python scripts/speeches.py create-transcription "$URL" \
89+
--locale "ja-JP" \
90+
--name "My Transcription"
91+
92+
# Get transcription job status
93+
uv run python scripts/speeches.py get-transcription "$JOB_ID"
94+
95+
# Wait for transcription completion
96+
uv run python scripts/speeches.py wait-for-completion "$JOB_ID" --timeout 300 --interval 10
97+
98+
# Get transcription files
99+
uv run python scripts/speeches.py get-transcription-files "$JOB_ID"
100+
101+
# Get transcription result
102+
uv run python scripts/speeches.py get-transcription-result "https://<contentUrl>" --save "result.json"
103+
104+
# List all transcription jobs
105+
uv run python scripts/speeches.py list-transcriptions
106+
107+
# Delete transcription job
108+
uv run python scripts/speeches.py delete-transcription "$JOB_ID"
109+
110+
# Delete transcription job (without confirmation)
111+
uv run python scripts/speeches.py delete-transcription "$JOB_ID" --force
112+
```
113+
76114
## MCP
77115

78116
- [FastAPI-MCP](https://github.com/tadata-org/fastapi_mcp)
@@ -126,3 +164,7 @@ az resource update \
126164
- [FastAPI のテレメトリデータを Azure Application Insights に送る](https://qiita.com/hoto17296/items/2f366dfabdbe3d1d4e97)
127165
- [【Azure Functions】 - Application Insights のログが表示されない問題](https://zenn.dev/headwaters/articles/ff19f7e1b99b44)
128166
- [opentelemetry-instrumentation-fastapi (python) から OpenTelemetry に入門する](https://zenn.dev/taxin/articles/opentelemetry-fast-api-instrumentation-basics)
167+
168+
### Azure AI Speech
169+
170+
- [バッチ文字起こしとは](https://learn.microsoft.com/ja-jp/azure/ai-services/speech-service/batch-transcription)

scripts/speeches.py

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
#!/usr/bin/env python
2+
# filepath: /home/runner/work/template-fastapi/template-fastapi/scripts/speeches.py
3+
4+
import json
5+
import time
6+
7+
import typer
8+
from rich.console import Console
9+
from rich.progress import Progress, SpinnerColumn, TextColumn
10+
from rich.table import Table
11+
12+
from template_fastapi.models.speech import BatchTranscriptionRequest, TranscriptionStatus
13+
from template_fastapi.repositories.speeches import SpeechRepository
14+
15+
app = typer.Typer()
16+
console = Console()
17+
speech_repo = SpeechRepository()
18+
19+
20+
@app.command()
21+
def create_transcription(
22+
content_urls: list[str] = typer.Argument(..., help="転写するファイルのURL(複数指定可能)"),
23+
locale: str = typer.Option("ja-JP", "--locale", "-l", help="言語設定"),
24+
display_name: str = typer.Option(None, "--name", "-n", help="転写ジョブの表示名"),
25+
):
26+
"""新しい転写ジョブを作成する"""
27+
console.print("[bold green]転写ジョブを作成します[/bold green]")
28+
console.print(f"ファイルURL: {', '.join(content_urls)}")
29+
console.print(f"言語設定: {locale}")
30+
31+
try:
32+
request = BatchTranscriptionRequest(
33+
content_urls=content_urls,
34+
locale=locale,
35+
display_name=display_name or "CLI Batch Transcription",
36+
)
37+
38+
response = speech_repo.create_transcription_job(request)
39+
40+
console.print("✅ [bold green]転写ジョブが正常に作成されました[/bold green]")
41+
console.print(f"ジョブID: {response.job_id}")
42+
console.print(f"ステータス: {response.status.value}")
43+
44+
if response.message:
45+
console.print(f"メッセージ: {response.message}")
46+
47+
except Exception as e:
48+
console.print(f"❌ [bold red]エラー[/bold red]: {str(e)}")
49+
50+
51+
@app.command()
52+
def get_transcription(
53+
job_id: str = typer.Argument(..., help="転写ジョブID"),
54+
):
55+
"""転写ジョブの状態を取得する"""
56+
console.print("[bold green]転写ジョブの状態を取得します[/bold green]")
57+
console.print(f"ジョブID: {job_id}")
58+
59+
try:
60+
job = speech_repo.get_transcription_job(job_id)
61+
62+
console.print("\n[bold blue]転写ジョブ情報[/bold blue]:")
63+
console.print(f"ID: {job.id}")
64+
console.print(f"名前: {job.name}")
65+
console.print(f"ステータス: {job.status.value}")
66+
console.print(f"作成日時: {job.created_date_time}")
67+
console.print(f"最終更新日時: {job.last_action_date_time}")
68+
69+
if job.links:
70+
console.print(f"リンク: {json.dumps(job.links, indent=2, ensure_ascii=False)}")
71+
72+
except Exception as e:
73+
console.print(f"❌ [bold red]エラー[/bold red]: {str(e)}")
74+
75+
76+
@app.command()
77+
def get_transcription_files(
78+
job_id: str = typer.Argument(..., help="転写ジョブID"),
79+
):
80+
"""転写ジョブのファイル一覧を取得する"""
81+
console.print("[bold green]転写ファイル一覧を取得します[/bold green]")
82+
console.print(f"ジョブID: {job_id}")
83+
84+
try:
85+
files = speech_repo.get_transcription_files(job_id)
86+
87+
if not files:
88+
console.print("[yellow]転写ファイルが見つかりませんでした[/yellow]")
89+
return
90+
91+
# テーブルで表示
92+
table = Table(title="転写ファイル一覧")
93+
table.add_column("名前", style="cyan")
94+
table.add_column("種類", style="green")
95+
table.add_column("リンク", style="yellow")
96+
for file in files:
97+
print(file)
98+
99+
for file in files:
100+
table.add_row(
101+
file.get("name", "N/A"), file.get("kind", "N/A"), file.get("links", {}).get("contentUrl", "N/A")
102+
)
103+
104+
console.print(table)
105+
console.print(f"[bold blue]合計: {len(files)}件[/bold blue]")
106+
107+
except Exception as e:
108+
console.print(f"❌ [bold red]エラー[/bold red]: {str(e)}")
109+
110+
111+
@app.command()
112+
def get_transcription_result(
113+
file_url: str = typer.Argument(..., help="転写結果ファイルのURL"),
114+
save_file: str = typer.Option(None, "--save", "-s", help="結果を保存するファイル名"),
115+
):
116+
"""転写結果を取得する"""
117+
console.print("[bold green]転写結果を取得します[/bold green]")
118+
console.print(f"ファイルURL: {file_url}")
119+
120+
try:
121+
result = speech_repo.get_transcription_result(file_url)
122+
123+
console.print("\n[bold blue]転写結果[/bold blue]:")
124+
console.print(f"ソース: {result.source}")
125+
console.print(f"タイムスタンプ: {result.timestamp}")
126+
console.print(f"継続時間: {result.duration_in_ticks}")
127+
128+
if result.combined_recognized_phrases:
129+
console.print("\n[bold yellow]統合認識フレーズ[/bold yellow]:")
130+
for phrase in result.combined_recognized_phrases:
131+
console.print(f"- {phrase.get('display', 'N/A')}")
132+
133+
if result.recognized_phrases:
134+
console.print(f"\n[bold yellow]認識フレーズ({len(result.recognized_phrases)}件)[/bold yellow]:")
135+
for i, phrase in enumerate(result.recognized_phrases[:5]): # 最初の5件のみ表示
136+
console.print(f"{i + 1}. {phrase.get('display', 'N/A')}")
137+
138+
if len(result.recognized_phrases) > 5:
139+
console.print(f"... および {len(result.recognized_phrases) - 5} 件の追加フレーズ")
140+
141+
# ファイルに保存
142+
if save_file:
143+
with open(save_file, "w", encoding="utf-8") as f:
144+
json.dump(result.dict(), f, ensure_ascii=False, indent=2, default=str)
145+
console.print(f"✅ 結果を {save_file} に保存しました")
146+
147+
except Exception as e:
148+
console.print(f"❌ [bold red]エラー[/bold red]: {str(e)}")
149+
150+
151+
@app.command()
152+
def delete_transcription(
153+
job_id: str = typer.Argument(..., help="転写ジョブID"),
154+
force: bool = typer.Option(False, "--force", "-f", help="確認なしで削除"),
155+
):
156+
"""転写ジョブを削除する"""
157+
console.print("[bold yellow]転写ジョブを削除します[/bold yellow]")
158+
console.print(f"ジョブID: {job_id}")
159+
160+
if not force:
161+
confirm = typer.confirm("本当に削除しますか?")
162+
if not confirm:
163+
console.print("削除をキャンセルしました")
164+
return
165+
166+
try:
167+
success = speech_repo.delete_transcription_job(job_id)
168+
169+
if success:
170+
console.print(f"✅ [bold green]転写ジョブ '{job_id}' を正常に削除しました[/bold green]")
171+
else:
172+
console.print("❌ [bold red]転写ジョブの削除に失敗しました[/bold red]")
173+
174+
except Exception as e:
175+
console.print(f"❌ [bold red]エラー[/bold red]: {str(e)}")
176+
177+
178+
@app.command()
179+
def list_transcriptions():
180+
"""転写ジョブの一覧を取得する"""
181+
console.print("[bold green]転写ジョブ一覧を取得します[/bold green]")
182+
183+
try:
184+
jobs = speech_repo.list_transcription_jobs()
185+
186+
if not jobs:
187+
console.print("[yellow]転写ジョブが見つかりませんでした[/yellow]")
188+
return
189+
190+
# テーブルで表示
191+
table = Table(title="転写ジョブ一覧")
192+
table.add_column("ID", style="cyan")
193+
table.add_column("名前", style="green")
194+
table.add_column("ステータス", style="yellow")
195+
table.add_column("作成日時", style="magenta")
196+
table.add_column("最終更新日時", style="blue")
197+
198+
for job in jobs:
199+
table.add_row(
200+
job.id,
201+
job.name or "N/A",
202+
job.status.value,
203+
str(job.created_date_time) if job.created_date_time else "N/A",
204+
str(job.last_action_date_time) if job.last_action_date_time else "N/A",
205+
)
206+
207+
console.print(table)
208+
console.print(f"[bold blue]合計: {len(jobs)}件[/bold blue]")
209+
210+
except Exception as e:
211+
console.print(f"❌ [bold red]エラー[/bold red]: {str(e)}")
212+
213+
214+
@app.command()
215+
def wait_for_completion(
216+
job_id: str = typer.Argument(..., help="転写ジョブID"),
217+
timeout: int = typer.Option(300, "--timeout", "-t", help="タイムアウト時間(秒)"),
218+
interval: int = typer.Option(10, "--interval", "-i", help="チェック間隔(秒)"),
219+
):
220+
"""転写ジョブの完了を待つ"""
221+
console.print("[bold green]転写ジョブの完了を待ちます[/bold green]")
222+
console.print(f"ジョブID: {job_id}")
223+
console.print(f"タイムアウト: {timeout}秒")
224+
console.print(f"チェック間隔: {interval}秒")
225+
226+
start_time = time.time()
227+
228+
with Progress(
229+
SpinnerColumn(),
230+
TextColumn("[progress.description]{task.description}"),
231+
transient=True,
232+
) as progress:
233+
task = progress.add_task(description="転写処理中...", total=None)
234+
235+
while time.time() - start_time < timeout:
236+
try:
237+
job = speech_repo.get_transcription_job(job_id)
238+
239+
if job.status == TranscriptionStatus.SUCCEEDED:
240+
progress.update(task, description="✅ 転写が完了しました")
241+
console.print("✅ [bold green]転写ジョブが正常に完了しました[/bold green]")
242+
console.print(f"ジョブID: {job.id}")
243+
console.print(f"最終更新日時: {job.last_action_date_time}")
244+
return
245+
elif job.status == TranscriptionStatus.FAILED:
246+
progress.update(task, description="❌ 転写が失敗しました")
247+
console.print("❌ [bold red]転写ジョブが失敗しました[/bold red]")
248+
console.print(f"ジョブID: {job.id}")
249+
return
250+
elif job.status == TranscriptionStatus.RUNNING:
251+
progress.update(task, description="🔄 転写処理中...")
252+
else:
253+
progress.update(task, description=f"⏳ 待機中 ({job.status.value})")
254+
255+
time.sleep(interval)
256+
257+
except Exception as e:
258+
progress.update(task, description=f"❌ エラー: {str(e)}")
259+
console.print(f"❌ [bold red]エラー[/bold red]: {str(e)}")
260+
return
261+
262+
# タイムアウト
263+
console.print(f"⏰ [bold yellow]タイムアウトしました({timeout}秒)[/bold yellow]")
264+
console.print("転写ジョブはまだ処理中の可能性があります")
265+
266+
267+
if __name__ == "__main__":
268+
app()

template_fastapi/app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
1212
from opentelemetry.trace import Span
1313

14-
from template_fastapi.routers import demos, files, foodies, games, items
14+
from template_fastapi.routers import demos, files, foodies, games, items, speeches
1515

1616
app = FastAPI()
1717

@@ -41,3 +41,4 @@ def server_request_hook(span: Span, scope: dict):
4141
app.include_router(games.router)
4242
app.include_router(foodies.router)
4343
app.include_router(files.router)
44+
app.include_router(speeches.router)

0 commit comments

Comments
 (0)