add sound operator tool

ks6088ts · ks6088ts · commit 3556faf68b82 · 2025-08-26T12:53:32.000+09:00
diff --git a/.gitignore b/.gitignore
@@ -166,3 +166,4 @@ assets/
 .langgraph_api
 generated/
 *.db
+*.wav
diff --git a/docs/references.md b/docs/references.md
@@ -58,3 +58,10 @@
 ### n8n
 
 - [Hosting n8n / Installation / Server setups / Docker-Compose](https://docs.n8n.io/hosting/installation/server-setups/docker-compose/)
+
+### Audio
+
+- [Python の sounddevice を改めて試す](https://zenn.dev/kun432/scraps/f56760d41fc5aa)
+- [How To Install libportaudio2 on Ubuntu 22.04](https://www.installati.one/install-libportaudio2-ubuntu-22-04/): `sudo apt-get -y install libportaudio2`
+- [python-sounddevice](https://github.com/spatialaudio/python-sounddevice)
+- [python-soundfile](https://github.com/bastibe/python-soundfile)
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,6 +29,8 @@ dependencies = [
     "pypdf>=5.9.0",
     "python-dotenv>=1.1.0",
     "qdrant-client>=1.15.1",
+    "sounddevice>=0.5.2",
+    "soundfile>=0.13.1",
     "streamlit>=1.48.0",
     "typer>=0.16.0",
     "youtube-transcript-api>=1.2.2",
diff --git a/scripts/sound_operator.py b/scripts/sound_operator.py
@@ -0,0 +1,115 @@
+import logging
+
+import sounddevice as sd
+import soundfile as sf
+import typer
+from dotenv import load_dotenv
+
+from template_langgraph.loggers import get_logger
+
+# Initialize the Typer application
+app = typer.Typer(
+    add_completion=False,
+    help="agent runner CLI",
+)
+
+# Set up logging
+logger = get_logger(__name__)
+
+
+@app.command()
+def play(
+    file: str = typer.Option(
+        "input.wav",
+        "--file",
+        "-f",
+        help="Path to the audio file to play",
+    ),
+    verbose: bool = typer.Option(
+        False,
+        "--verbose",
+        "-v",
+        help="Enable verbose output",
+    ),
+):
+    # Set up logging
+    if verbose:
+        logger.setLevel(logging.DEBUG)
+
+    data, fs = sf.read(
+        file=file,
+        dtype="float32",
+    )
+
+    logger.info(f"Sampling rate: {fs}")
+    logger.info(f"Channels: {data.shape[1] if len(data.shape) > 1 else 1}")
+    logger.info(f"Data type: {data.dtype}")
+    logger.info(f"Duration: {len(data) / fs:.2f} seconds")
+
+    sd.play(data, fs)
+    sd.wait()
+
+
+@app.command()
+def list_devices(
+    verbose: bool = typer.Option(
+        False,
+        "--verbose",
+        "-v",
+        help="Enable verbose output",
+    ),
+):
+    # Set up logging
+    if verbose:
+        logger.setLevel(logging.DEBUG)
+
+    for idx, device in enumerate(sd.query_devices()):
+        logger.info(f"Device {idx}:")
+        for key, value in device.items():
+            logger.info(f"  {key}: {value}")
+
+
+@app.command()
+def record(
+    duration: float = typer.Option(
+        5.0,
+        "--duration",
+        "-d",
+        help="Duration to record audio (in seconds)",
+    ),
+    output: str = typer.Option(
+        "output.wav",
+        "--output",
+        "-o",
+        help="Path to the output audio file",
+    ),
+    verbose: bool = typer.Option(
+        False,
+        "--verbose",
+        "-v",
+        help="Enable verbose output",
+    ),
+):
+    # Set up logging
+    if verbose:
+        logger.setLevel(logging.DEBUG)
+
+    logger.info(f"Recording audio for {duration} seconds...")
+    samplerate = 44100
+    channels = 2
+    recording = sd.rec(
+        frames=int(duration * samplerate),
+        samplerate=samplerate,
+        channels=channels,
+    )
+    sd.wait()
+    sf.write(output, recording, samplerate)
+    logger.info(f"Recording saved to {output}")
+
+
+if __name__ == "__main__":
+    load_dotenv(
+        override=True,
+        verbose=True,
+    )
+    app()
diff --git a/uv.lock b/uv.lock