diff --git a/Cargo.lock b/Cargo.lock index 0c18d5f363..fec9249c90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1431,6 +1431,31 @@ dependencies = [ "windows-core 0.60.1", ] +[[package]] +name = "cap-multi-recorder" +version = "0.1.0" +dependencies = [ + "anyhow", + "cap-enc-avfoundation", + "cap-media-info", + "cap-recording", + "cap-timestamp", + "cidre 0.11.0", + "clap", + "ffmpeg-next", + "flume", + "futures", + "json5", + "kameo", + "scap-direct3d", + "scap-screencapturekit", + "scap-targets", + "serde", + "serde_json", + "tokio", + "windows 0.60.0", +] + [[package]] name = "cap-project" version = "0.1.0" @@ -4545,6 +4570,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "json5" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1" +dependencies = [ + "pest", + "pest_derive", + "serde", +] + [[package]] name = "jsonptr" version = "0.6.3" @@ -6217,6 +6253,49 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pest" +version = "2.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "989e7521a040efde50c3ab6bbadafbe15ab6dc042686926be59ac35d74607df4" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "187da9a3030dbafabbbfb20cb323b976dc7b7ce91fcd84f2f74d6e31d378e2de" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49b401d98f5757ebe97a26085998d6c0eecec4995cad6ab7fc30ffdf4b052843" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "pest_meta" +version = "2.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f27a2cfee9f9039c4d86faa5af122a0ac3851441a34865b8a043b46be0065a" +dependencies = [ + "pest", + "sha2", +] + [[package]] name = "petgraph" version = "0.6.5" @@ -9932,6 +10011,12 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "uds" version = "0.4.2" diff --git a/Cargo.toml b/Cargo.toml index dc023746e9..206854c9e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "2" -members = ["apps/cli", "apps/desktop/src-tauri", "crates/*"] + members = ["apps/cli", "apps/desktop/src-tauri", "apps/multi-recorder", "crates/*"] [workspace.dependencies] anyhow = { version = "1.0.86" } @@ -40,9 +40,10 @@ sentry = { version = "0.42.0", features = [ "debug-images", ] } tracing = "0.1.41" -futures = "0.3.31" - -cidre = { git = "https://github.com/CapSoftware/cidre", rev = "bf84b67079a8", features = [ + futures = "0.3.31" + kameo = "0.17.2" + + cidre = { git = "https://github.com/CapSoftware/cidre", rev = "bf84b67079a8", features = [ "macos_13_0", "cv", "cf", diff --git a/apps/desktop/src-tauri/src/captions.rs b/apps/desktop/src-tauri/src/captions.rs index b00d3a21b8..785d836397 100644 --- a/apps/desktop/src-tauri/src/captions.rs +++ b/apps/desktop/src-tauri/src/captions.rs @@ -270,27 +270,27 @@ async fn extract_audio_from_video(video_path: &str, output_path: &PathBuf) -> Re } } Err(e) => { - log::error!("Failed to resample chunk {chunk_idx}: {e}"); + log::error!("Failed to resample chunk {chunk_idx}: {:#}", e); continue; } } if let Err(e) = encoder.send_frame(&output_frame) { - log::error!("Failed to send frame to encoder: {e}"); + log::error!("Failed to send frame to encoder: {:#}", e); continue; } // Process each encoded packet loop { let mut packet = ffmpeg::Packet::empty(); - match encoder.receive_packet(&mut packet) { - Ok(_) => { - if let Err(e) = packet.write_interleaved(&mut output) { - log::error!("Failed to write packet: {e}"); - } - } - Err(_) => break, - } + match encoder.receive_packet(&mut packet) { + Ok(_) => { + if let Err(e) = packet.write_interleaved(&mut output) { + log::error!("Failed to write packet: {:#}", e); + } + } + Err(_) => break, + } } } } diff --git a/apps/desktop/src-tauri/src/frame_ws.rs b/apps/desktop/src-tauri/src/frame_ws.rs index 1276f21087..06d21865c8 100644 --- a/apps/desktop/src-tauri/src/frame_ws.rs +++ b/apps/desktop/src-tauri/src/frame_ws.rs @@ -51,13 +51,13 @@ pub async fn create_frame_ws(frame_rx: Receiver) -> (u16, CancellationT frame.data.extend_from_slice(&frame.width.to_le_bytes()); if let Err(e) = socket.send(Message::Binary(frame.data)).await { - tracing::error!("Failed to send frame to socket: {:?}", e); + tracing::error!("Failed to send frame to socket: {:#}", e); break; } } Err(e) => { tracing::error!( - "Connection has been lost! Shutting down websocket server: {:?}", + "Connection has been lost! Shutting down websocket server: {:#}", e ); break; diff --git a/apps/multi-recorder/.planning-summary.md b/apps/multi-recorder/.planning-summary.md new file mode 100644 index 0000000000..10560fd923 --- /dev/null +++ b/apps/multi-recorder/.planning-summary.md @@ -0,0 +1,93 @@ +# Planning Summary + +This directory contains comprehensive planning documentation for the multi-recorder CLI tool. + +## What We've Planned + +A flexible CLI tool that allows users to: +1. Capture multiple input sources (displays, cameras, microphones) +2. Route them to multiple output files simultaneously +3. Configure sources with varying levels of detail +4. Support both simple CLI usage and complex config files + +## Key Innovation: Three-Level Configuration + +### Level 1: Simple IDs (Zero JSON) +```bash +cap-multi-recorder record --display 0 output.mp4 +``` +Perfect for quick recordings with sensible defaults. + +### Level 2: Inline JSON Settings +```bash +cap-multi-recorder record \ + --display '{"id":0,"settings":{"fps":60}}' output.mp4 +``` +Per-source control without creating files. + +### Level 3: File References +```bash +cap-multi-recorder record --display @config.json output.mp4 +``` +Reusable, version-controlled configurations. + +### Level 4: Full Config Files +```bash +cap-multi-recorder record streaming-setup.json +``` +Complete recording setups with named inputs and outputs. + +## Planning Documents + +- **[PLAN.md](./PLAN.md)** - Main implementation plan +- **[PLAN-UNIFIED.md](./PLAN-UNIFIED.md)** - Detailed unified approach +- **[PLAN-JSON-CONFIG.md](./PLAN-JSON-CONFIG.md)** - Full config format +- **[INPUT-PATTERNS.md](./INPUT-PATTERNS.md)** - Pattern comparison guide +- **[README.md](./README.md)** - Overview and quick start + +## Architecture Highlights + +- Built on `cap-recording` crate's `OutputPipeline` +- N→M routing: any sources to any outputs +- Platform-native encoders (AVFoundation, Media Foundation) +- Shared input sources across outputs +- Independent pipeline control per output + +## Example Scenarios + +### Screen + Camera + Mic to One File +```bash +cap-multi-recorder record \ + --display 0 recording.mp4 \ + --camera 0 recording.mp4 \ + --microphone "Blue Yeti" recording.mp4 +``` + +### Each Source to Separate File +```bash +cap-multi-recorder record \ + --display 0 screen.mp4 \ + --camera 0 webcam.mp4 \ + --microphone "Blue Yeti" audio.ogg +``` + +### Complex Multi-Output +```bash +cap-multi-recorder record \ + --display 0 screen.mp4 full.mp4 \ + --camera 0 webcam.mp4 full.mp4 \ + --microphone "Blue Yeti" audio.ogg full.mp4 +``` +Creates 3 files: screen-only, webcam-only, and combined. + +## What's Next + +Implementation will follow the phases outlined in PLAN.md: +1. Core infrastructure +2. Input discovery & management +3. Output pipeline construction +4. Recording control +5. Error handling & validation +6. User experience + +See PLAN.md for detailed implementation roadmap. diff --git a/apps/multi-recorder/Cargo.toml b/apps/multi-recorder/Cargo.toml new file mode 100644 index 0000000000..fc9d72668b --- /dev/null +++ b/apps/multi-recorder/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "cap-multi-recorder" +version = "0.1.0" +edition = "2024" + + [dependencies] + cap-recording = { path = "../../crates/recording" } + cap-media-info = { path = "../../crates/media-info" } + cap-timestamp = { path = "../../crates/timestamp" } + scap-targets = { path = "../../crates/scap-targets" } + clap = { version = "4.0", features = ["derive"] } + json5 = "0.4" + serde = { version = "1.0", features = ["derive"] } + serde_json = "1.0" + anyhow = "1.0" + tokio = { version = "1.0", features = ["full"] } + flume = { workspace = true } + ffmpeg = { workspace = true } + kameo = { workspace = true } + futures = { workspace = true } + +[target.'cfg(target_os = "macos")'.dependencies] +cap-enc-avfoundation = { path = "../../crates/enc-avfoundation" } +scap-screencapturekit = { path = "../../crates/scap-screencapturekit" } +cidre = { workspace = true, default-features = false } + +[target.'cfg(windows)'.dependencies] +scap-direct3d = { path = "../../crates/scap-direct3d" } +windows = { version = "0.60", features = ["Win32_Graphics_Direct3D11", "Win32_Graphics_Dxgi"] } + +[lints] +workspace = true diff --git a/apps/multi-recorder/INPUT-PATTERNS.md b/apps/multi-recorder/INPUT-PATTERNS.md new file mode 100644 index 0000000000..0c7b5841c2 --- /dev/null +++ b/apps/multi-recorder/INPUT-PATTERNS.md @@ -0,0 +1,285 @@ +# Input Specification Patterns + +This document compares the three ways to specify input sources in the multi-recorder CLI. + +## Pattern 1: Simple Identifier + +**When to use**: Quick recordings, defaults are fine, minimal configuration needed. + +### Display +```bash +--display 0 output.mp4 +--display primary output.mp4 +``` + +### Camera +```bash +--camera 0 output.mp4 +--camera default output.mp4 +``` + +### Microphone +```bash +--microphone "Blue Yeti" output.mp4 +--microphone default output.mp4 +``` + +**Characteristics**: +- ✅ Fastest to type +- ✅ No JSON knowledge required +- ✅ Uses global defaults (`--fps`, `--cursor`) +- ❌ No per-source customization + +## Pattern 2: Inline JSON + +**When to use**: Need per-source settings, one-off configurations, don't want separate files. + +### Display +```bash +--display '{"id":0,"settings":{"fps":60,"show_cursor":true}}' output.mp4 +``` + +### Camera +```bash +--camera '{"id":0,"settings":{"resolution":{"width":1920,"height":1080},"fps":30}}' output.mp4 +``` + +### Microphone +```bash +--microphone '{"label":"Blue Yeti","settings":{}}' output.mp4 +``` + +### Area (requires JSON) +```bash +--display '{"type":"area","screen":0,"bounds":{"x":100,"y":100,"width":1920,"height":1080},"settings":{"fps":60}}' output.mp4 +``` + +**Characteristics**: +- ✅ Full control over settings +- ✅ Self-contained in command +- ✅ Can override global defaults per-source +- ❌ Verbose for complex settings +- ❌ Error-prone (JSON syntax) +- ❌ Not reusable across commands + +## Pattern 3: File Reference + +**When to use**: Reusable configurations, complex settings, team sharing, version control. + +### Display +Create `high-quality-display.json`: +```json +{ + "id": 0, + "settings": { + "fps": 60, + "show_cursor": true + } +} +``` + +Use it: +```bash +--display @high-quality-display.json output.mp4 +``` + +### Camera +Create `1080p-camera.json`: +```json +{ + "id": 0, + "settings": { + "resolution": { + "width": 1920, + "height": 1080 + }, + "fps": 30 + } +} +``` + +Use it: +```bash +--camera @configs/1080p-camera.json output.mp4 +``` + +### Area Capture +Create `game-window.json`: +```json +{ + "type": "area", + "screen": 0, + "bounds": { + "x": 100, + "y": 100, + "width": 1920, + "height": 1080 + }, + "settings": { + "fps": 120, + "show_cursor": false + } +} +``` + +Use it: +```bash +--display @game-window.json gameplay.mp4 +``` + +**Characteristics**: +- ✅ Reusable across commands +- ✅ Easier to maintain complex configs +- ✅ Team can share via git +- ✅ Readable and documented +- ✅ Can use comments (in YAML) +- ❌ Extra file management +- ❌ Requires file path knowledge + +## Mixing Patterns + +You can mix patterns in the same command: + +```bash +cap-multi-recorder record \ + --display 0 screen-default.mp4 \ + --display '{"id":0,"settings":{"fps":60}}' screen-60fps.mp4 \ + --display @high-quality-display.json screen-hq.mp4 \ + --camera @1080p-camera.json webcam.mp4 \ + --microphone "Blue Yeti" audio.mp4 +``` + +This creates 5 outputs: +- `screen-default.mp4`: Display 0 with defaults +- `screen-60fps.mp4`: Display 0 at 60fps +- `screen-hq.mp4`: Display 0 with settings from file +- `webcam.mp4`: Camera from config file +- `audio.mp4`: Microphone with defaults + +## Comparison Table + +| Feature | Simple ID | Inline JSON | File Reference | +|---------|-----------|-------------|----------------| +| Speed | ⭐⭐⭐ | ⭐ | ⭐⭐ | +| Readability | ⭐⭐⭐ | ⭐ | ⭐⭐⭐ | +| Per-source settings | ❌ | ✅ | ✅ | +| Reusability | ❌ | ❌ | ✅ | +| Version control | N/A | ❌ | ✅ | +| Team sharing | N/A | ❌ | ✅ | +| Area capture | ❌ | ✅ | ✅ | +| Error-prone | ❌ | ⭐⭐⭐ | ⭐ | +| Setup required | ❌ | ❌ | ✅ | + +## Recommendations + +### Use Simple ID when: +- Recording quick tests or demos +- Default settings are sufficient +- Learning the tool +- Minimal configuration needed + +### Use Inline JSON when: +- Need specific settings for one-off recording +- Don't want to create separate files +- Settings are simple enough to type +- Documenting exact command in README + +### Use File Reference when: +- Same configuration used repeatedly +- Complex settings (area bounds, resolutions) +- Working in a team +- Want version-controlled configs +- Building library of reusable configurations + +## Full Config File + +For very complex scenarios with multiple inputs and outputs, use a full config file instead: + +```bash +cap-multi-recorder record streaming-setup.json +``` + +Where `streaming-setup.json`: +```json +{ + "inputs": { + "main_display": { + "type": "display", + "id": 0, + "settings": {"fps": 60} + }, + "webcam": { + "type": "camera", + "id": 0, + "settings": {} + }, + "mic": { + "type": "microphone", + "label": "Blue Yeti", + "settings": {} + } + }, + "outputs": { + "recording.mp4": { + "video": "main_display", + "audio": ["mic"] + } + } +} +``` + +See [PLAN-JSON-CONFIG.md](./PLAN-JSON-CONFIG.md) for full config file format. + +## Best Practices + +1. **Start Simple**: Begin with simple IDs, add complexity as needed +2. **Organize Configs**: Keep reusable configs in a `configs/` directory +3. **Name Descriptively**: Use clear names like `60fps-display.json`, `1080p-camera.json` +4. **Document Settings**: Add comments in YAML files explaining non-obvious settings +5. **Version Control**: Commit reusable configs to git +6. **Team Templates**: Share common configs with team members +7. **Validate First**: Use `cap-multi-recorder validate` to check configs before recording + +## Examples Collection + +### Gaming Setup +```bash +cap-multi-recorder record \ + --display @configs/gaming-display-120fps.json gameplay.mp4 \ + --camera @configs/facecam-720p.json facecam.mp4 \ + --microphone "Blue Yeti" gameplay.mp4 facecam.mp4 \ + --system-audio gameplay.mp4 +``` + +### Presentation Recording +```bash +cap-multi-recorder record \ + --display 0 presentation.mp4 \ + --camera @configs/webcam-corner.json presentation.mp4 \ + --microphone "MacBook Pro Microphone" presentation.mp4 +``` + +### Multi-Monitor Workspace +```bash +cap-multi-recorder record \ + --display '{"id":0,"settings":{"fps":30}}' left-monitor.mp4 \ + --display '{"id":1,"settings":{"fps":30}}' right-monitor.mp4 \ + --microphone default both-monitors.mp4 +``` + +Wait, that last example shows `both-monitors.mp4` which isn't defined. Let me fix it: + +```bash +cap-multi-recorder record \ + --display '{"id":0,"settings":{"fps":30}}' left-monitor.mp4 \ + --display '{"id":1,"settings":{"fps":30}}' right-monitor.mp4 \ + --microphone default left-monitor.mp4 right-monitor.mp4 +``` + +### Podcast Recording +```bash +cap-multi-recorder record \ + --microphone "Host Microphone" host.ogg mixed.ogg \ + --microphone "Guest Microphone" guest.ogg mixed.ogg \ + --system-audio mixed.ogg +``` diff --git a/apps/multi-recorder/PLAN-JSON-CONFIG.md b/apps/multi-recorder/PLAN-JSON-CONFIG.md new file mode 100644 index 0000000000..61d9d6e92c --- /dev/null +++ b/apps/multi-recorder/PLAN-JSON-CONFIG.md @@ -0,0 +1,935 @@ +# Multi-Recorder CLI Plan - JSON Configuration Approach + +## Overview + +An alternative approach to multi-recorder configuration that separates input source declarations from output routing. This two-phase approach mirrors the internal architecture more closely and provides clearer separation of concerns. + +## Core Design Philosophy + +**Two-Phase Declaration**: +1. **Declare Inputs**: Define sources with IDs, types, and settings +2. **Declare Outputs**: Specify output files and which input IDs feed them + +This approach provides: +- Clearer input reuse across outputs +- Explicit configuration of source settings +- Better alignment with the internal `InputManager` → `OutputPipeline` architecture +- More maintainable configs for complex scenarios + +## Configuration Format + +### JSON Structure + +```json +{ + "settings": { + "fps": 30, + "show_cursor": true + }, + "inputs": { + "main_display": { + "type": "display", + "id": 0, + "settings": { + "show_cursor": true, + "fps": 60 + } + }, + "webcam": { + "type": "camera", + "id": 0, + "settings": {} + }, + "blue_yeti": { + "type": "microphone", + "label": "Blue Yeti", + "settings": {} + }, + "focusrite": { + "type": "microphone", + "label": "Focusrite USB", + "settings": {} + }, + "sys_audio": { + "type": "system-audio", + "settings": {} + } + }, + "outputs": { + "full_recording.mp4": { + "video": "main_display", + "audio": ["blue_yeti", "focusrite", "sys_audio"] + }, + "camera_only.mp4": { + "video": "webcam", + "audio": ["blue_yeti"] + }, + "audio_backup.ogg": { + "audio": ["focusrite"] + } + } +} +``` + +### YAML Alternative + +```yaml +settings: + fps: 30 + show_cursor: true + +inputs: + main_display: + type: display + id: 0 + settings: + show_cursor: true + fps: 60 + + webcam: + type: camera + id: 0 + settings: {} + + blue_yeti: + type: microphone + label: "Blue Yeti" + settings: {} + + focusrite: + type: microphone + label: "Focusrite USB" + settings: {} + + sys_audio: + type: system-audio + settings: {} + +outputs: + full_recording.mp4: + video: main_display + audio: [blue_yeti, focusrite, sys_audio] + + camera_only.mp4: + video: webcam + audio: [blue_yeti] + + audio_backup.ogg: + audio: [focusrite] +``` + +## Input Types Specification + +### Display Input + +```json +{ + "type": "display", + "id": 0, + "settings": { + "show_cursor": true, + "fps": 60 + } +} +``` + +**Fields:** +- `type`: `"display"` +- `id`: Display ID (number or "primary") +- `settings`: + - `show_cursor`: bool (default: true) + - `fps`: number (default: 30) + +### Window Input + +```json +{ + "type": "window", + "id": 12345, + "settings": { + "show_cursor": true, + "fps": 30 + } +} +``` + +**Fields:** +- `type`: `"window"` +- `id`: Window ID (number) +- `settings`: + - `show_cursor`: bool (default: true) + - `fps`: number (default: 30) + +### Area Input + +```json +{ + "type": "area", + "screen": 0, + "bounds": { + "x": 100, + "y": 100, + "width": 1920, + "height": 1080 + }, + "settings": { + "show_cursor": true, + "fps": 30 + } +} +``` + +**Fields:** +- `type`: `"area"` +- `screen`: Display ID to capture from +- `bounds`: Rectangle defining capture area + - `x`, `y`: Position + - `width`, `height`: Size +- `settings`: + - `show_cursor`: bool (default: true) + - `fps`: number (default: 30) + +### Camera Input + +```json +{ + "type": "camera", + "id": 0, + "settings": { + "resolution": { + "width": 1920, + "height": 1080 + }, + "fps": 30 + } +} +``` + +**Fields:** +- `type`: `"camera"` +- `id`: Camera ID (number or device name) +- `settings`: + - `resolution`: Optional preferred resolution + - `fps`: number (default: 30) + +### Microphone Input + +```json +{ + "type": "microphone", + "label": "Blue Yeti", + "settings": {} +} +``` + +**Fields:** +- `type`: `"microphone"` +- `label`: Device label/name (or "default") +- `settings`: Currently empty, reserved for future use + +### System Audio Input + +```json +{ + "type": "system-audio", + "settings": {} +} +``` + +**Fields:** +- `type`: `"system-audio"` +- `settings`: Platform-specific settings (future) + +## Output Specification + +### Output Entry + +```json +{ + "path/to/output.mp4": { + "video": "input_id", + "audio": ["input_id1", "input_id2"], + "format": "mp4", + "settings": { + "bitrate": "5M" + } + } +} +``` + +**Fields:** +- Key: Output file path +- `video`: Optional input ID for video source +- `audio`: Optional array of input IDs for audio sources +- `format`: Optional format override (inferred from extension by default) +- `settings`: Optional format-specific settings + +## CLI Integration + +### Config File Mode + +```bash +cap-multi-recorder record config.json +``` + +Loads configuration from JSON/YAML file. + +### Generate Config Mode + +```bash +cap-multi-recorder generate-config [OPTIONS] > config.json +``` + +Interactive mode that generates a configuration file by: +1. Listing available inputs +2. Prompting user to select and name inputs +3. Prompting user to define outputs +4. Outputting JSON/YAML configuration + +**Options:** +- `--format json|yaml` - Output format (default: json) +- `--interactive` - Interactive mode (default) +- `--template` - Generate template with all input types + +### Hybrid Mode (CLI + Config) + +```bash +cap-multi-recorder record config.json \ + --add-input microphone:backup="Backup Mic" \ + --add-output backup.ogg:backup +``` + +Load config but allow CLI overrides: +- `--add-input =` - Add input to config +- `--add-output :` - Add output routing +- `--override-input .=` - Override input setting + +## Data Structures + +### Configuration Schema + +```rust +// src/config.rs + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::PathBuf; + +#[derive(Debug, Serialize, Deserialize)] +pub struct Config { + #[serde(default)] + pub settings: GlobalSettings, + + pub inputs: HashMap, + + pub outputs: HashMap, +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct GlobalSettings { + #[serde(default = "default_fps")] + pub fps: u32, + + #[serde(default = "default_true")] + pub show_cursor: bool, +} + +fn default_fps() -> u32 { 30 } +fn default_true() -> bool { true } + +#[derive(Debug, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "kebab-case")] +pub enum InputConfig { + Display { + id: DisplayIdOrName, + #[serde(default)] + settings: DisplaySettings, + }, + Window { + id: u64, + #[serde(default)] + settings: WindowSettings, + }, + Area { + screen: DisplayIdOrName, + bounds: AreaBounds, + #[serde(default)] + settings: AreaSettings, + }, + Camera { + id: CameraIdOrName, + #[serde(default)] + settings: CameraSettings, + }, + Microphone { + label: String, + #[serde(default)] + settings: MicrophoneSettings, + }, + #[serde(rename = "system-audio")] + SystemAudio { + #[serde(default)] + settings: SystemAudioSettings, + }, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(untagged)] +pub enum DisplayIdOrName { + Id(u32), + Name(String), // "primary", "secondary", etc. +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(untagged)] +pub enum CameraIdOrName { + Id(u32), + Name(String), +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct DisplaySettings { + pub show_cursor: Option, + pub fps: Option, +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct WindowSettings { + pub show_cursor: Option, + pub fps: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct AreaBounds { + pub x: f64, + pub y: f64, + pub width: f64, + pub height: f64, +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct AreaSettings { + pub show_cursor: Option, + pub fps: Option, +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct CameraSettings { + pub resolution: Option, + pub fps: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Resolution { + pub width: u32, + pub height: u32, +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct MicrophoneSettings {} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct SystemAudioSettings {} + +#[derive(Debug, Serialize, Deserialize)] +pub struct OutputConfig { + #[serde(skip_serializing_if = "Option::is_none")] + pub video: Option, + + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub audio: Vec, + + #[serde(skip_serializing_if = "Option::is_none")] + pub format: Option, + + #[serde(default, skip_serializing_if = "Option::is_none")] + pub settings: Option, +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct OutputSettings { + pub bitrate: Option, + // Future: quality, codec, etc. +} +``` + +### Validation + +```rust +// src/config.rs + +impl Config { + pub fn validate(&self) -> Result<(), ConfigError> { + // Validate inputs + if self.inputs.is_empty() { + return Err(ConfigError::NoInputs); + } + + for (name, input) in &self.inputs { + self.validate_input(name, input)?; + } + + // Validate outputs + if self.outputs.is_empty() { + return Err(ConfigError::NoOutputs); + } + + for (path, output) in &self.outputs { + self.validate_output(path, output)?; + } + + Ok(()) + } + + fn validate_input(&self, name: &str, input: &InputConfig) -> Result<(), ConfigError> { + // Validate input-specific constraints + match input { + InputConfig::Display { id, .. } => { + // Check display exists (if possible) + } + InputConfig::Camera { id, .. } => { + // Check camera exists (if possible) + } + // ... etc + } + + Ok(()) + } + + fn validate_output(&self, path: &PathBuf, output: &OutputConfig) -> Result<(), ConfigError> { + // Rule 1: Output must have at least one source + if output.video.is_none() && output.audio.is_empty() { + return Err(ConfigError::OutputNoSources { + path: path.clone(), + }); + } + + // Rule 2: Video input must exist and be a video source + if let Some(video_id) = &output.video { + let input = self.inputs.get(video_id) + .ok_or_else(|| ConfigError::InputNotFound { + name: video_id.clone(), + output: path.clone(), + })?; + + if !input.is_video_source() { + return Err(ConfigError::InputNotVideoSource { + name: video_id.clone(), + output: path.clone(), + }); + } + } + + // Rule 3: Audio inputs must exist and be audio sources + for audio_id in &output.audio { + let input = self.inputs.get(audio_id) + .ok_or_else(|| ConfigError::InputNotFound { + name: audio_id.clone(), + output: path.clone(), + })?; + + if !input.is_audio_source() { + return Err(ConfigError::InputNotAudioSource { + name: audio_id.clone(), + output: path.clone(), + }); + } + } + + // Rule 4: Check format compatibility + let format = output.format.as_ref() + .or_else(|| path.extension()?.to_str()) + .ok_or_else(|| ConfigError::UnknownFormat { + path: path.clone(), + })?; + + match format { + "mp4" => { + // MP4 needs video or audio + if output.video.is_none() && output.audio.is_empty() { + return Err(ConfigError::FormatRequiresVideo { + format: format.to_string(), + path: path.clone(), + }); + } + } + "ogg" => { + // Ogg is audio-only + if output.video.is_some() { + return Err(ConfigError::FormatAudioOnly { + format: format.to_string(), + path: path.clone(), + }); + } + } + _ => return Err(ConfigError::UnsupportedFormat { + format: format.to_string(), + }), + } + + Ok(()) + } +} + +impl InputConfig { + pub fn is_video_source(&self) -> bool { + matches!( + self, + InputConfig::Display { .. } + | InputConfig::Window { .. } + | InputConfig::Area { .. } + | InputConfig::Camera { .. } + ) + } + + pub fn is_audio_source(&self) -> bool { + matches!( + self, + InputConfig::Microphone { .. } | InputConfig::SystemAudio { .. } + ) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum ConfigError { + #[error("No inputs defined")] + NoInputs, + + #[error("No outputs defined")] + NoOutputs, + + #[error("Output {path:?} has no sources")] + OutputNoSources { path: PathBuf }, + + #[error("Input '{name}' not found (referenced by output {output:?})")] + InputNotFound { name: String, output: PathBuf }, + + #[error("Input '{name}' is not a video source (output {output:?})")] + InputNotVideoSource { name: String, output: PathBuf }, + + #[error("Input '{name}' is not an audio source (output {output:?})")] + InputNotAudioSource { name: String, output: PathBuf }, + + #[error("Unknown format for output {path:?}")] + UnknownFormat { path: PathBuf }, + + #[error("Format {format} requires video source (output {path:?})")] + FormatRequiresVideo { format: String, path: PathBuf }, + + #[error("Format {format} is audio-only (output {path:?})")] + FormatAudioOnly { format: String, path: PathBuf }, + + #[error("Unsupported format: {format}")] + UnsupportedFormat { format: String }, +} +``` + +## Example Configurations + +### Example 1: Simple Screen Recording + +```json +{ + "inputs": { + "screen": { + "type": "display", + "id": 0, + "settings": {} + } + }, + "outputs": { + "recording.mp4": { + "video": "screen" + } + } +} +``` + +### Example 2: Screen + Camera + Mic + +```json +{ + "settings": { + "fps": 30 + }, + "inputs": { + "screen": { + "type": "display", + "id": "primary", + "settings": { + "show_cursor": true + } + }, + "webcam": { + "type": "camera", + "id": 0, + "settings": {} + }, + "mic": { + "type": "microphone", + "label": "Blue Yeti", + "settings": {} + } + }, + "outputs": { + "full-recording.mp4": { + "video": "screen", + "audio": ["mic"] + } + } +} +``` + +### Example 3: Multi-Output Recording + +```json +{ + "inputs": { + "screen": { + "type": "display", + "id": 0, + "settings": {} + }, + "webcam": { + "type": "camera", + "id": 0, + "settings": {} + }, + "mic1": { + "type": "microphone", + "label": "Blue Yeti", + "settings": {} + }, + "mic2": { + "type": "microphone", + "label": "Focusrite", + "settings": {} + }, + "sys": { + "type": "system-audio", + "settings": {} + } + }, + "outputs": { + "screen-only.mp4": { + "video": "screen" + }, + "webcam-only.mp4": { + "video": "webcam", + "audio": ["mic1"] + }, + "audio-mix.ogg": { + "audio": ["mic1", "mic2"] + }, + "full-recording.mp4": { + "video": "screen", + "audio": ["mic1", "mic2", "sys"] + } + } +} +``` + +### Example 4: Multi-Display Setup + +```json +{ + "inputs": { + "left_monitor": { + "type": "display", + "id": 0, + "settings": { + "fps": 60 + } + }, + "right_monitor": { + "type": "display", + "id": 1, + "settings": { + "fps": 60 + } + }, + "mic": { + "type": "microphone", + "label": "default", + "settings": {} + } + }, + "outputs": { + "left-display.mp4": { + "video": "left_monitor", + "audio": ["mic"] + }, + "right-display.mp4": { + "video": "right_monitor", + "audio": ["mic"] + } + } +} +``` + +### Example 5: Area Capture with Multiple Outputs + +```json +{ + "inputs": { + "game_window": { + "type": "area", + "screen": 0, + "bounds": { + "x": 100, + "y": 100, + "width": 1920, + "height": 1080 + }, + "settings": { + "fps": 60, + "show_cursor": false + } + }, + "facecam": { + "type": "camera", + "id": 0, + "settings": {} + }, + "game_audio": { + "type": "system-audio", + "settings": {} + }, + "commentary": { + "type": "microphone", + "label": "Blue Yeti", + "settings": {} + } + }, + "outputs": { + "gameplay.mp4": { + "video": "game_window", + "audio": ["game_audio", "commentary"] + }, + "facecam.mp4": { + "video": "facecam", + "audio": ["commentary"] + }, + "commentary-backup.ogg": { + "audio": ["commentary"] + } + } +} +``` + +## CLI Commands + +### Record from Config + +```bash +cap-multi-recorder record config.json +``` + +### Validate Config + +```bash +cap-multi-recorder validate config.json +``` + +Output: +``` +✓ Config is valid +✓ 5 inputs defined +✓ 3 outputs defined +✓ All input references resolved +✓ All format constraints satisfied +``` + +Or with errors: +``` +✗ Config validation failed: + - Output 'recording.mp4' references unknown input 'webcam' + - Output 'audio.ogg' has video source (format is audio-only) + - Input 'screen' has no outputs +``` + +### Generate Config + +```bash +cap-multi-recorder generate-config --interactive +``` + +Interactive prompts: +1. "Select input sources to add:" +2. For each source: "Name this input:", "Configure settings?" +3. "Define outputs:" +4. For each output: "File path:", "Select video source:", "Select audio sources:" +5. Output JSON/YAML + +```bash +cap-multi-recorder generate-config --template > template.json +``` + +Generates template with all input types documented. + +### List Inputs + +```bash +cap-multi-recorder list --displays --cameras --microphones +``` + +Output: +```json +{ + "displays": [ + {"id": 0, "name": "Built-in Display", "resolution": "2880x1800"}, + {"id": 1, "name": "LG Monitor", "resolution": "3840x2160"} + ], + "cameras": [ + {"id": 0, "name": "FaceTime HD Camera"}, + {"id": 1, "name": "Logitech Webcam"} + ], + "microphones": [ + {"label": "Blue Yeti", "default": true}, + {"label": "Focusrite USB"} + ] +} +``` + +## Advantages of This Approach + +1. **Clear Separation**: Inputs and outputs are independently defined +2. **Reusable Inputs**: Named inputs can be referenced by multiple outputs +3. **Settings per Input**: Each input has its own configuration +4. **Easy Validation**: Can validate input references before initializing hardware +5. **Better for Complex Configs**: More maintainable for scenarios with many inputs/outputs +6. **Tool-Friendly**: Easier to build GUIs/TUIs that generate configs +7. **Version Control**: Config files are more readable and diffable +8. **Composable**: Can merge multiple config files or override sections + +## Disadvantages + +1. **More Verbose**: Simple scenarios require more configuration +2. **Learning Curve**: Users must understand two-phase structure +3. **Indirection**: Must lookup input IDs to understand routing + +## Migration Path + +Both approaches can coexist: + +1. **Keep original CLI** for simple use cases +2. **Add config file support** for complex scenarios +3. **Provide converter**: `cap-multi-recorder convert` to go from CLI to config format + +## Implementation Priority + +1. Implement config file parsing and validation +2. Add `validate` command +3. Implement `list` command with JSON output +4. Add `generate-config` interactive mode +5. Integrate with existing record command +6. Add conversion utilities + +## Future Enhancements + +1. **Config templates**: Pre-built configs for common scenarios +2. **Config inheritance**: Base configs + overrides +3. **Environment variables**: `${ENV_VAR}` substitution in configs +4. **Input groups**: Define groups of inputs to simplify routing +5. **Conditional inputs**: Platform-specific input definitions +6. **Profile support**: Multiple named configurations in one file diff --git a/apps/multi-recorder/PLAN-UNIFIED.md b/apps/multi-recorder/PLAN-UNIFIED.md new file mode 100644 index 0000000000..3dab738ff5 --- /dev/null +++ b/apps/multi-recorder/PLAN-UNIFIED.md @@ -0,0 +1,798 @@ +# Multi-Recorder CLI Plan - Unified Approach + +## Overview + +A flexible CLI tool that supports both command-line routing and complex configuration files. The key principle: **Define inputs, route to outputs**. + +## Core Design Philosophy + +**Two-Phase Declaration**: +1. **Define Inputs**: Declare all input sources with `--input --type [--options ]` +2. **Route to Outputs**: Map inputs to output files with `--output ...` +3. **File Configs**: For complex scenarios, use complete JSON/YAML config files + +This approach provides: +- Clean separation of source definition from routing +- Natural composability (inputs are reusable) +- Simple CLI for basic use cases with json5 syntax +- Full config files for complex, repeatable scenarios +- No forced abstraction - use what you need + +## CLI Interface + +### Basic Pattern + +```bash +cap-multi-recorder \ + --input screen --type display --options {id:0,fps:60} \ + --input mic --type microphone --options {deviceId:"default"} \ + --output out.mp4 screen mic \ + --output mic-only.mp3 mic +``` + +**Flow**: +1. Define `screen` input as display #0 at 60fps +2. Define `mic` input as default microphone +3. Create `out.mp4` with screen + mic +4. Create `mic-only.mp3` with mic only + +### Simple Example (No Options) + +```bash +cap-multi-recorder \ + --input screen --type display \ + --input mic --type microphone \ + --output recording.mp4 screen mic +``` + +### Complex Multi-Source Example + +```bash +cap-multi-recorder \ + --input main --type display --options {id:0,fps:60,show_cursor:true} \ + --input secondary --type display --options {id:1,fps:30} \ + --input cam --type camera --options {id:0,resolution:{width:1920,height:1080}} \ + --input mic --type microphone --options {label:"Blue Yeti"} \ + --output full.mp4 main cam mic \ + --output screen-only.mp4 main mic \ + --output secondary.mp4 secondary mic \ + --output webcam.mp4 cam +``` + +### Full Config File Mode + +```bash +cap-multi-recorder config.json +``` + +Where `config.json` uses the two-phase format from PLAN-JSON-CONFIG.md. + +## Command-Line Argument Format + +### Input Declaration + +**Pattern**: `--input --type [--options ]` + +```bash +# Display input +--input screen --type display --options {id:0,fps:60,show_cursor:true} + +# Camera input +--input cam --type camera --options {id:0,resolution:{width:1920,height:1080},fps:30} + +# Microphone input +--input mic --type microphone --options {label:"Blue Yeti"} + +# Window input +--input win --type window --options {id:12345,fps:30} + +# Without options (uses defaults) +--input screen --type display +``` + +**Input Types**: +- `display`: Screen capture +- `camera`: Webcam/camera device +- `microphone`: Audio input device +- `window`: Window capture + +### Output Declaration + +**Pattern**: `--output ...` + +```bash +# Single input +--output video.mp4 screen + +# Multiple inputs (composited) +--output full.mp4 screen cam mic + +# Same input to multiple outputs +--input screen --type display +--output out1.mp4 screen +--output out2.mp4 screen + +# Different combinations +--output screen-only.mp4 screen +--output audio-only.mp3 mic +--output full.mp4 screen cam mic +``` + +### Detailed Syntax + +```bash +cap-multi-recorder [OPTIONS] [CONFIG_FILE] + +Input Declaration (repeatable): + --input + Unique name for this input source + + --type + Input type: display | camera | microphone | window + (Must follow --input) + + --options + Input-specific configuration in json5 format + (Optional, must follow --type) + +Output Declaration (repeatable): + --output ... + Create output file with specified inputs + PATH: Output file path + INPUT: One or more input names + +Config File: + [CONFIG_FILE] + Full JSON/YAML config file (disables CLI mode) +``` + +## JSON5 Options Schema + +### Display Options + +```json5 +{ + id: 0, // Display index (0, 1, ...) or "primary" + fps: 60, // Frame rate (optional) + show_cursor: true // Show cursor in capture (optional) +} +``` + +### Window Options + +```json5 +{ + id: 12345, // Window ID + fps: 30, // Frame rate (optional) + show_cursor: true // Show cursor (optional) +} +``` + +### Camera Options + +```json5 +{ + id: 0, // Camera index or "default" + resolution: { // Desired resolution (optional) + width: 1920, + height: 1080 + }, + fps: 30 // Frame rate (optional) +} +``` + +### Microphone Options + +```json5 +{ + label: "Blue Yeti", // Device label or "default" + // No additional settings currently +} +``` + +**Note**: json5 allows: +- Unquoted keys: `{id:0}` instead of `{"id":0}` +- Comments: `{id:0 /* main display */}` +- Trailing commas: `{id:0,fps:60,}` +- Single quotes: `{label:'Blue Yeti'}` + +## Full Config File Format + +For complex scenarios, use complete config files with two-phase declaration: + +```json +{ + "settings": { + "fps": 30, + "show_cursor": true + }, + "inputs": { + "main_display": { + "type": "display", + "id": 0, + "settings": { + "fps": 60 + } + }, + "webcam": { + "type": "camera", + "id": 0, + "settings": {} + }, + "mic": { + "type": "microphone", + "label": "Blue Yeti", + "settings": {} + } + }, + "outputs": { + "recording.mp4": { + "video": "main_display", + "audio": ["mic"] + } + } +} +``` + +## Examples + +### Example 1: Simple Screen Recording + +```bash +cap-multi-recorder \ + --input screen --type display \ + --output recording.mp4 screen +``` + +### Example 2: Screen + Microphone + +```bash +cap-multi-recorder \ + --input screen --type display \ + --input mic --type microphone \ + --output recording.mp4 screen mic +``` + +### Example 3: High-FPS Gaming Capture + +```bash +cap-multi-recorder \ + --input screen --type display --options {id:0,fps:120,show_cursor:false} \ + --input mic --type microphone --options {label:"Blue Yeti"} \ + --output gameplay.mp4 screen mic +``` + +### Example 4: Multiple Outputs from Same Sources + +```bash +cap-multi-recorder \ + --input screen --type display --options {id:0,fps:60} \ + --input mic --type microphone \ + --output full.mp4 screen mic \ + --output video-only.mp4 screen \ + --output audio-only.mp3 mic +``` + +Result: +- `full.mp4`: screen + mic +- `video-only.mp4`: screen only +- `audio-only.mp3`: mic only + +### Example 5: Multi-Display Recording + +```bash +cap-multi-recorder \ + --input left --type display --options {id:0,fps:60} \ + --input right --type display --options {id:1,fps:60} \ + --input mic --type microphone \ + --output left-monitor.mp4 left mic \ + --output right-monitor.mp4 right mic +``` + +### Example 6: Complex Multi-Source Setup + +```bash +cap-multi-recorder \ + --input screen --type display --options {id:0,fps:60} \ + --input cam --type camera --options {id:0,resolution:{width:1920,height:1080},fps:30} \ + --input mic --type microphone --options {label:"Blue Yeti"} \ + --output full.mp4 screen cam mic \ + --output screen-only.mp4 screen \ + --output webcam.mp4 cam \ + --output audio.mp3 mic +``` + +Result: +- `full.mp4`: screen + camera + mic (composited) +- `screen-only.mp4`: screen capture only +- `webcam.mp4`: camera only +- `audio.mp3`: microphone only + +### Example 7: Full Config File + +```bash +cap-multi-recorder config.json +``` + +Where `config.json` contains complete input/output configuration. + +## Implementation + +### CLI Argument Parsing + +```rust +// src/main.rs + +use clap::Parser; +use json5; + +#[derive(Parser)] +#[command( + name = "cap-multi-recorder", + about = "Record multiple audio/video sources to multiple outputs" +)] +struct Cli { + /// Config file (disables CLI mode) + config: Option, +} + +#[derive(Parser)] +struct InputGroup { + /// Input name + #[arg(long)] + input: String, + + /// Input type: display | camera | microphone | window + #[arg(long, requires = "input")] + r#type: InputType, + + /// Input options (json5 format) + #[arg(long, requires = "type")] + options: Option, +} + +#[derive(Parser)] +struct OutputGroup { + /// Output file path + #[arg(long)] + output: PathBuf, + + /// Input names to include + #[arg(requires = "output")] + inputs: Vec, +} + +#[derive(Clone, Copy, ValueEnum)] +enum InputType { + Display, + Camera, + Microphone, + Window, +} + +// Manual parsing since clap doesn't easily support grouped repeating args +fn parse_cli_args() -> Result { + let args: Vec = std::env::args().collect(); + + let mut inputs = Vec::new(); + let mut outputs = Vec::new(); + let mut i = 1; + + while i < args.len() { + match args[i].as_str() { + "--input" => { + let name = args.get(i + 1).context("--input requires NAME")?; + let type_flag = args.get(i + 2).context("--input requires --type")?; + if type_flag != "--type" { + bail!("--input must be followed by --type"); + } + let input_type = args.get(i + 3).context("--type requires TYPE")?; + + let mut options = None; + let mut consumed = 4; + + if args.get(i + 4).map(|s| s.as_str()) == Some("--options") { + options = Some(args.get(i + 5).context("--options requires JSON5")?.clone()); + consumed = 6; + } + + inputs.push(InputDecl { + name: name.clone(), + input_type: parse_input_type(input_type)?, + options, + }); + + i += consumed; + } + "--output" => { + let path = args.get(i + 1).context("--output requires PATH")?; + let mut input_names = Vec::new(); + let mut j = i + 2; + + while j < args.len() && !args[j].starts_with("--") { + input_names.push(args[j].clone()); + j += 1; + } + + if input_names.is_empty() { + bail!("--output requires at least one input name"); + } + + outputs.push(OutputDecl { + path: PathBuf::from(path), + inputs: input_names, + }); + + i = j; + } + _ => { + // Check if it's a config file (positional arg) + if !args[i].starts_with("--") { + return Ok(CliConfig::File(PathBuf::from(&args[i]))); + } + bail!("Unknown argument: {}", args[i]); + } + } + } + + Ok(CliConfig::Routing { inputs, outputs }) +} + +struct InputDecl { + name: String, + input_type: InputType, + options: Option, +} + +struct OutputDecl { + path: PathBuf, + inputs: Vec, +} + +enum CliConfig { + Routing { inputs: Vec, outputs: Vec }, + File(PathBuf), +} +``` + +### Options Parsing + +```rust +// src/config.rs + +use json5; + +fn parse_input_config( + name: &str, + input_type: InputType, + options: Option<&str>, +) -> Result { + match input_type { + InputType::Display => { + let opts: DisplayOptions = if let Some(json5_str) = options { + json5::from_str(json5_str) + .with_context(|| format!("Invalid display options for '{}'", name))? + } else { + DisplayOptions::default() + }; + + Ok(InputConfig::Display(DisplayInputConfig { + id: opts.id, + fps: opts.fps, + show_cursor: opts.show_cursor.unwrap_or(true), + })) + } + InputType::Camera => { + let opts: CameraOptions = if let Some(json5_str) = options { + json5::from_str(json5_str) + .with_context(|| format!("Invalid camera options for '{}'", name))? + } else { + CameraOptions::default() + }; + + Ok(InputConfig::Camera(CameraInputConfig { + id: opts.id, + resolution: opts.resolution, + fps: opts.fps, + })) + } + InputType::Microphone => { + let opts: MicrophoneOptions = if let Some(json5_str) = options { + json5::from_str(json5_str) + .with_context(|| format!("Invalid microphone options for '{}'", name))? + } else { + MicrophoneOptions::default() + }; + + Ok(InputConfig::Microphone(MicrophoneInputConfig { + label: opts.label.unwrap_or_else(|| "default".to_string()), + })) + } + InputType::Window => { + let opts: WindowOptions = if let Some(json5_str) = options { + json5::from_str(json5_str) + .with_context(|| format!("Invalid window options for '{}'", name))? + } else { + bail!("Window input requires id in options"); + }; + + Ok(InputConfig::Window(WindowInputConfig { + id: opts.id, + fps: opts.fps, + show_cursor: opts.show_cursor.unwrap_or(true), + })) + } + } +} + +#[derive(Deserialize, Default)] +struct DisplayOptions { + id: Option, + fps: Option, + show_cursor: Option, +} + +#[derive(Deserialize, Default)] +struct CameraOptions { + id: Option, + resolution: Option, + fps: Option, +} + +#[derive(Deserialize, Default)] +struct MicrophoneOptions { + label: Option, +} + +#[derive(Deserialize)] +struct WindowOptions { + id: u32, + fps: Option, + show_cursor: Option, +} + +#[derive(Deserialize)] +struct Resolution { + width: u32, + height: u32, +} +``` + +### Routing Construction + +```rust +// src/routing.rs + +pub struct Routing { + pub inputs: HashMap, + pub outputs: HashMap, +} + +pub struct OutputConfig { + pub path: PathBuf, + pub video_input: Option, + pub audio_inputs: Vec, +} + +pub fn build_routing(cli_config: CliConfig) -> Result { + match cli_config { + CliConfig::File(path) => { + let contents = std::fs::read_to_string(&path)?; + let config: FileConfig = json5::from_str(&contents)?; + config.validate()?; + file_config_to_routing(config) + } + CliConfig::Routing { inputs, outputs } => { + cli_routing_to_routing(inputs, outputs) + } + } +} + +fn cli_routing_to_routing( + input_decls: Vec, + output_decls: Vec, +) -> Result { + let mut inputs = HashMap::new(); + + // Parse all inputs + for decl in input_decls { + let config = parse_input_config(&decl.name, decl.input_type, decl.options.as_deref())?; + inputs.insert(decl.name.clone(), config); + } + + // Build outputs + let mut outputs = HashMap::new(); + for decl in output_decls { + // Determine which inputs are video vs audio + let mut video_input = None; + let mut audio_inputs = Vec::new(); + + for input_name in &decl.inputs { + let input_config = inputs.get(input_name) + .with_context(|| format!("Unknown input '{}' in output '{}'", input_name, decl.path.display()))?; + + match input_config { + InputConfig::Display(_) | InputConfig::Camera(_) | InputConfig::Window(_) => { + if video_input.is_some() { + bail!("Output '{}' has multiple video inputs", decl.path.display()); + } + video_input = Some(input_name.clone()); + } + InputConfig::Microphone(_) => { + audio_inputs.push(input_name.clone()); + } + } + } + + outputs.insert(decl.path.clone(), OutputConfig { + path: decl.path.clone(), + video_input, + audio_inputs, + }); + } + + // Validate + validate_routing(&inputs, &outputs)?; + + Ok(Routing { inputs, outputs }) +} +``` + +## Validation + +### CLI Mode Validation + +```rust +pub fn validate_routing(mapping: &SourceOutputMapping) -> Result<()> { + // Same validation as before: + // 1. At least one source + // 2. Each source has ≥1 output + // 3. Each output has ≥1 source + // 4. Format compatibility + // 5. Only one video source per output + + Ok(()) +} +``` + +### JSON Validation + +When parsing inline JSON or file references, validate schema: + +```rust +fn parse_display_spec(spec: SourceSpec, global: &GlobalSettings) -> Result { + let config = match spec { + SourceSpec::Simple(id) => DisplayInputConfig { + id: parse_display_id(&id)?, + settings: DisplaySettings::from_global(global), + }, + SourceSpec::Json(json) => { + // Validate against schema + serde_json::from_value::(json) + .context("Invalid display configuration")? + } + SourceSpec::File(path) => { + let contents = fs::read_to_string(&path) + .with_context(|| format!("Failed to read {}", path.display()))?; + serde_json::from_str(&contents) + .with_context(|| format!("Invalid JSON in {}", path.display()))? + } + }; + + // Additional validation + validate_display_config(&config)?; + + Ok(config) +} +``` + +## Help Text + +``` +USAGE: + cap-multi-recorder record [OPTIONS] [CONFIG_FILE] + +ROUTING: + Specify sources and their target outputs via CLI flags. + Each source can be: + - Simple ID: --display 0 output.mp4 + - JSON: --display '{"id":0,"settings":{"fps":60}}' output.mp4 + - File: --display @config.json output.mp4 + +OPTIONS: + --display ... + Capture display. SPEC: ID | JSON | @file + + --camera ... + Capture camera. SPEC: ID | JSON | @file + + --microphone ... + Capture microphone. SPEC: label | JSON | @file + + --window ... + Capture window. SPEC: ID | JSON | @file + + --system-audio ... + Capture system audio + + --fps + Default frame rate for video sources + + --cursor + Default cursor visibility + +CONFIG FILE: + [CONFIG_FILE] + Use full JSON/YAML config (disables CLI routing) + +EXAMPLES: + # Simple + cap-multi-recorder record --display 0 output.mp4 + + # With settings + cap-multi-recorder record \ + --display '{"id":0,"settings":{"fps":60}}' output.mp4 + + # Multiple sources + cap-multi-recorder record \ + --display 0 screen.mp4 full.mp4 \ + --camera 0 webcam.mp4 full.mp4 \ + --microphone "Blue Yeti" full.mp4 + + # Settings from file + cap-multi-recorder record \ + --display @display-config.json output.mp4 + + # Full config + cap-multi-recorder record config.json +``` + +## Advantages of Unified Approach + +1. **Simple for Simple Cases**: Just use IDs, no JSON required +2. **Flexible for Complex Cases**: Inline JSON or file references for detailed config +3. **No Forced Abstraction**: Use simple or complex forms as needed +4. **Gradual Complexity**: Start simple, add JSON when needed +5. **Both Worlds**: CLI for quick use, config files for repeatability +6. **Consistent**: Same JSON schema everywhere (CLI, files, full configs) + +## Migration Path + +Users can start simple and gradually increase complexity: + +1. **Day 1**: `--display 0 output.mp4` +2. **Week 1**: `--display '{"id":0,"settings":{"fps":60}}' output.mp4` +3. **Month 1**: `--display @display-60fps.json output.mp4` (reusable config) +4. **Month 2**: `cap-multi-recorder record streaming-setup.json` (full config) + +## Implementation Priority + +1. ✅ Core routing structures +2. ✅ Simple ID parsing (`--display 0`) +3. ✅ Inline JSON parsing (`--display '{...}'`) +4. ✅ File reference parsing (`--display @file.json`) +5. ✅ Full config file support +6. ✅ Validation for all modes +7. ✅ Help text and examples +8. ✅ Error messages for common mistakes + +## Testing Strategy + +### Unit Tests +- Parse simple IDs +- Parse inline JSON +- Parse file references +- Validate each format +- Error cases + +### Integration Tests +- CLI routing with simple IDs +- CLI routing with JSON +- CLI routing with files +- Full config files +- Mixed approaches + +### E2E Tests +- Record with CLI routing +- Record with full config +- Multiple formats in one command diff --git a/apps/multi-recorder/PLAN.md b/apps/multi-recorder/PLAN.md new file mode 100644 index 0000000000..fb81ede730 --- /dev/null +++ b/apps/multi-recorder/PLAN.md @@ -0,0 +1,749 @@ +# Multi-Recorder CLI Plan + +## Overview + +Build a flexible CLI tool that allows users to capture multiple input sources (screens, cameras, microphones) and declaratively route them to multiple output files. Each output file is powered by an `OutputPipeline` instance. The tool enables arbitrary N→M routing scenarios: + +- 1 source → 1 output (simple recording) +- N sources → 1 output (combined recording) +- 1 source → M outputs (duplicate/backup recording) +- N sources → M sources (complex multi-output scenarios) + +## Core Design Philosophy + +**Flexible Configuration Layers**: +- **CLI Routing**: Declare sources and their outputs via command-line flags +- **JSON Settings**: Optionally provide detailed source configuration via inline JSON or file references +- **Full Config Files**: For complex scenarios, use complete JSON/YAML configs with two-phase declaration + +See [PLAN-UNIFIED.md](./PLAN-UNIFIED.md) for detailed unified approach combining CLI and JSON. +See [PLAN-JSON-CONFIG.md](./PLAN-JSON-CONFIG.md) for full config file format specification. + +## Architecture + +### Core Components + +1. **Input Sources** + - Screen captures (displays, windows, areas) + - Cameras (via camera feeds) + - Microphones (via microphone feeds) + - System audio (platform-specific) + +2. **Output Pipelines** + - Each output file gets its own `OutputPipeline` instance + - Multiple outputs can share the same input sources (via broadcast/clone) + - Independent start/stop/pause/resume control per pipeline + +3. **Routing System** + - Declarative input→output mapping + - Sources can target multiple outputs + - Multiple sources can target the same output + - Validation ensures OutputPipeline constraints are met + +4. **Configuration System** + - Flexible CLI syntax for all routing scenarios + - Optional YAML/JSON configuration files for complex setups + - Interactive mode for discovering available devices + +## CLI Interface Design + +### Flexible Routing Syntax + +Each source type accepts its identifier followed by one or more output file paths: + +```bash +cap-multi-recorder record \ + --display [...] \ + --camera [...] \ + --microphone