━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
cd /home/john/voice
pip install -r requirements.txt# Interactive mode with menu
python tui.py interactive
# Analyze single file
python tui.py analyze sample.wav
# Batch analysis
python tui.py batch ./audio_samples/ -o ./results
# With custom options
python tui.py analyze sample.wav --output-dir ./my_results --no-vizfrom pipeline import VoiceManipulationDetector
detector = VoiceManipulationDetector()
report = detector.analyze('sample.wav', output_dir='results/')
print(f"Manipulation Detected: {report['ALTERATION_DETECTED']}")
print(f"Confidence: {report['CONFIDENCE']}")# Single file
python pipeline.py sample.wav
# Batch mode
python pipeline.py ./audio_directory --batch━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
The pipeline executes 4 phases:
- PHASE 1: Baseline F0 Analysis (isolates presented pitch)
- PHASE 2: Vocal Tract Analysis (extracts formants - physical characteristics)
- PHASE 3: Artifact Detection (3 independent methods)
- Pitch-Formant Incoherence
- Mel Spectrogram Artifacts
- Phase Decoherence / Transient Smearing
- PHASE 4: Report Synthesis (generates verified output)
All reports include:
- SHA-256 checksums of audio file
- Cryptographic signatures for tamper detection
- Chain of custody metadata
- Timestamp and pipeline version
- JSON: Machine-readable detailed report
- Markdown: Human-readable formatted report
- Visualizations: PNG plots showing analysis results
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
For each analyzed file sample.wav:
results/sample/
├── sample_report.json # Detailed JSON report with verification
├── sample_report.md # Markdown formatted report
├── sample_overview.png # Comprehensive overview plot
├── sample_mel_spectrogram.png # Mel spectrogram artifact analysis
├── sample_phase_analysis.png # Phase coherence plot
└── sample_pitch_formant_comparison.png # Pitch-formant comparison
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
{
"ASSET_ID": "sample_001",
"ALTERATION_DETECTED": true,
"CONFIDENCE": "99% (Very High)",
"PRESENTED_AS": "Female", // Based on F0 (pitch)
"PROBABLE_SEX": "Male", // Based on formants (physical)
"DECEPTION_BASELINE_F0": "221.5 Hz (Median)",
"PHYSICAL_BASELINE_FORMANTS": "F1: 498 Hz, F2: 1510 Hz, F3: 2490 Hz"
}Three independent detection methods:
- Pitch-Formant Incoherence: Mismatch between presented pitch and physical characteristics
- Time Manipulation: Phase artifacts from time-stretching
- Spectral Artifacts: Unnatural harmonics or consistent noise floor
- 99% (Very High): All 3 detection methods triggered
- 85% (High): 2 detection methods triggered
- 60-75% (Medium): 1 detection method triggered
- 0% (Low): No manipulation detected
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Verify report integrity:
from verification import OutputVerifier
verifier = OutputVerifier()
result = verifier.verify_report('results/sample_report.json')
if result['valid']:
print(f"✓ Report verified - created {result['timestamp']}")
else:
print(f"✗ Verification failed: {result['error']}")detector = VoiceManipulationDetector()
reports = detector.batch_analyze(
audio_dir='./samples',
output_dir='./batch_results',
pattern='*.wav'
)
# Summary statistics
manipulated = sum(1 for r in reports if r['ALTERATION_DETECTED'])
print(f"Detected manipulation in {manipulated}/{len(reports)} files")from verification import ReportExporter
exporter = ReportExporter()
exporter.export_csv_summary(reports, 'summary.csv')━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Run comprehensive test suite:
python test_pipeline.pyThis will:
- Generate 6 synthetic test samples (clean + manipulated)
- Analyze each sample
- Verify detection accuracy
- Test verification system
- Generate full reports and visualizations
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
python tui.py analyze suspicious_call.wavpython tui.py batch /audio/evidence/ -p "*.mp3" -o /results/case_001/from pipeline import VoiceManipulationDetector
detector = VoiceManipulationDetector()
report = detector.analyze('evidence.wav')
# Access specific findings
findings = report['DETAILED_FINDINGS']
phase3 = findings['phase3_artifacts']
if phase3['pitch_formant_incoherence']['detected']:
print(f"Incoherence confidence: {phase3['pitch_formant_incoherence']['confidence']}")from example import example_create_test_sample
# Creates a known-manipulated sample for testing
example_create_test_sample()━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Solution: Run pip install -r requirements.txt
Solution: Ensure audio file is valid WAV/MP3 format
Expected: Synthetic audio has unnatural characteristics that trigger detection
Solution: Use duration parameter:
y, sr = librosa.load('large.wav', duration=30.0) # First 30 seconds only━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
- WAV, MP3, FLAC, OGG, M4A (via librosa)
- Python >= 3.10
- 4GB RAM minimum
- Linux/macOS/Windows
- F0 Extraction:
librosa.piptrackfor robust pitch detection - Formant Analysis: Praat-Parselmouth Burg algorithm
- Phase Analysis: STFT with phase coherence metrics
- Spectral Analysis: Mel spectrogram with artifact detection
- Sandboxed execution recommended
- Read-only file permissions
- No network access required
- Cryptographic verification of all outputs
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
- Tactical Implementation Specification (TIS): See project README
- Source Code:
/home/john/voice/ - Test Suite:
test_pipeline.py - Examples:
example.py
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━