diff --git a/manage.py b/manage.py index c06ca21d..f5d1df4e 100644 --- a/manage.py +++ b/manage.py @@ -6,6 +6,10 @@ from mod_regression.update_regression import update_expected_results from run import app +import json +from pathlib import Path +from mod_regression.sample_inventory import inventory_samples + @app.cli.command('update') @click.argument('path_to_ccex') @@ -29,6 +33,35 @@ def update_results(path_to_ccex): click.echo('update function finished') return 0 +def inventory_command(): + import argparse + parser = argparse.ArgumentParser(description="Generate sample inventory") + parser.add_argument( + "--samples", + default="TestData", + help="Path to samples directory" + ) + parser.add_argument( + "--output", + default="metadata/sample_inventory.json", + help="Output JSON file" + ) + + args = parser.parse_args() + + samples_dir = Path(args.samples) + out = Path(args.output) + + inventory = inventory_samples(samples_dir) + + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(json.dumps(inventory, indent=2)) + + print(f"Inventory written: {out} ({len(inventory)} samples)") if __name__ == '__main__': app.cli() + import sys + if len(sys.argv) > 1 and sys.argv[1] == "inventory": + sys.argv.pop(1) + inventory_command() diff --git a/mod_regression/sample_inventory.py b/mod_regression/sample_inventory.py new file mode 100644 index 00000000..83e1ad38 --- /dev/null +++ b/mod_regression/sample_inventory.py @@ -0,0 +1,90 @@ +import json +import subprocess +import hashlib +from pathlib import Path + + +def _run(cmd): + try: + return subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + timeout=30, + check=False + ) + except (OSError, subprocess.SubprocessError): + return None + + + +def sha256sum(path: Path) -> str: + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + h.update(chunk) + return h.hexdigest() + + +def probe_sample(sample_path: Path) -> dict: + result = { + "path": str(sample_path), + "sha256": sha256sum(sample_path), + "container": None, + "streams": [], + "caption_types_detected": [], + "duration_sec": None, + } + + # ---- ffprobe ---- + ffprobe = _run([ + "ffprobe", + "-v", "error", + "-show_format", + "-show_streams", + "-print_format", "json", + str(sample_path) + ]) + + if ffprobe and ffprobe.returncode == 0: + try: + meta = json.loads(ffprobe.stdout) + fmt = meta.get("format", {}) + result["container"] = fmt.get("format_name") + if "duration" in fmt: + result["duration_sec"] = float(fmt["duration"]) + + for s in meta.get("streams", []): + result["streams"].append({ + "type": s.get("codec_type"), + "codec": s.get("codec_name") + }) + except (ValueError, KeyError): + pass + + # ---- CCExtractor ---- + cce = _run([ + "ccextractor", + str(sample_path), + "-stdout" + ]) + + if cce and cce.returncode == 0: + stderr = (cce.stderr or "").lower() + if "608" in stderr: + result["caption_types_detected"].append("CEA-608") + if "708" in stderr: + result["caption_types_detected"].append("CEA-708") + if "dvb" in stderr: + result["caption_types_detected"].append("DVB") + + return result + + +def inventory_samples(sample_root: Path) -> list: + inventory = [] + for p in sample_root.rglob("*"): + if p.is_file(): + inventory.append(probe_sample(p)) + return inventory diff --git a/tests/test_regression/test_inventory_command.py b/tests/test_regression/test_inventory_command.py new file mode 100644 index 00000000..0be1e680 --- /dev/null +++ b/tests/test_regression/test_inventory_command.py @@ -0,0 +1,13 @@ +from pathlib import Path +import json +from mod_regression.sample_inventory import inventory_samples + + +def test_inventory_multiple_files(tmp_path): + (tmp_path / "a.ts").write_bytes(b"a") + (tmp_path / "b.ts").write_bytes(b"b") + + inventory = inventory_samples(tmp_path) + + assert len(inventory) == 2 + assert all("sha256" in i for i in inventory) diff --git a/tests/test_regression/test_sample_inventory.py b/tests/test_regression/test_sample_inventory.py new file mode 100644 index 00000000..f86da3c6 --- /dev/null +++ b/tests/test_regression/test_sample_inventory.py @@ -0,0 +1,30 @@ +import math +import tempfile +from pathlib import Path +from unittest import mock +from mod_regression.sample_inventory import probe_sample + + +def fake_run(cmd, **kwargs): + class R: + returncode = 0 + stdout = ( + '{"format":{"format_name":"mpegts","duration":"10.0"},' + '"streams":[{"codec_type":"video","codec_name":"h264"}]}' + ) + stderr = "Detected CEA-608 captions" + return R() + + +@mock.patch("mod_regression.sample_inventory.subprocess.run", side_effect=fake_run) +def test_probe_sample_basic(mock_run): + with tempfile.TemporaryDirectory() as tmp: + f = Path(tmp) / "sample.ts" + f.write_bytes(b"dummy") + + result = probe_sample(f) + + assert result["container"] == "mpegts" + assert math.isclose(result["duration_sec"], 10.0, rel_tol=1e-9) + assert "CEA-608" in result["caption_types_detected"] + assert result["streams"][0]["codec"] == "h264"