Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from mod_regression.update_regression import update_expected_results
from run import app

import json
from pathlib import Path
from mod_regression.sample_inventory import inventory_samples


@app.cli.command('update')
@click.argument('path_to_ccex')
Expand All @@ -29,6 +33,35 @@ def update_results(path_to_ccex):
click.echo('update function finished')
return 0

def inventory_command():
import argparse
parser = argparse.ArgumentParser(description="Generate sample inventory")
parser.add_argument(
"--samples",
default="TestData",
help="Path to samples directory"
)
parser.add_argument(
"--output",
default="metadata/sample_inventory.json",
help="Output JSON file"
)

args = parser.parse_args()

samples_dir = Path(args.samples)
out = Path(args.output)

inventory = inventory_samples(samples_dir)

out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(inventory, indent=2))

print(f"Inventory written: {out} ({len(inventory)} samples)")

if __name__ == '__main__':
app.cli()
import sys
if len(sys.argv) > 1 and sys.argv[1] == "inventory":
sys.argv.pop(1)
inventory_command()
90 changes: 90 additions & 0 deletions mod_regression/sample_inventory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import json
import subprocess
import hashlib
from pathlib import Path


def _run(cmd):
try:
return subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=30,
check=False
)
except (OSError, subprocess.SubprocessError):
return None



def sha256sum(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
h.update(chunk)
return h.hexdigest()


def probe_sample(sample_path: Path) -> dict:
result = {
"path": str(sample_path),
"sha256": sha256sum(sample_path),
"container": None,
"streams": [],
"caption_types_detected": [],
"duration_sec": None,
}

# ---- ffprobe ----
ffprobe = _run([
"ffprobe",
"-v", "error",
"-show_format",
"-show_streams",
"-print_format", "json",
str(sample_path)
])

if ffprobe and ffprobe.returncode == 0:
try:
meta = json.loads(ffprobe.stdout)
fmt = meta.get("format", {})
result["container"] = fmt.get("format_name")
if "duration" in fmt:
result["duration_sec"] = float(fmt["duration"])

for s in meta.get("streams", []):
result["streams"].append({
"type": s.get("codec_type"),
"codec": s.get("codec_name")
})
except (ValueError, KeyError):
pass

# ---- CCExtractor ----
cce = _run([
"ccextractor",
str(sample_path),
"-stdout"
])

if cce and cce.returncode == 0:
stderr = (cce.stderr or "").lower()
if "608" in stderr:
result["caption_types_detected"].append("CEA-608")
if "708" in stderr:
result["caption_types_detected"].append("CEA-708")
if "dvb" in stderr:
result["caption_types_detected"].append("DVB")

return result


def inventory_samples(sample_root: Path) -> list:
inventory = []
for p in sample_root.rglob("*"):
if p.is_file():
inventory.append(probe_sample(p))
return inventory
13 changes: 13 additions & 0 deletions tests/test_regression/test_inventory_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from pathlib import Path
import json
from mod_regression.sample_inventory import inventory_samples


def test_inventory_multiple_files(tmp_path):
(tmp_path / "a.ts").write_bytes(b"a")
(tmp_path / "b.ts").write_bytes(b"b")

inventory = inventory_samples(tmp_path)

assert len(inventory) == 2
assert all("sha256" in i for i in inventory)
30 changes: 30 additions & 0 deletions tests/test_regression/test_sample_inventory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import math
import tempfile
from pathlib import Path
from unittest import mock
from mod_regression.sample_inventory import probe_sample


def fake_run(cmd, **kwargs):
class R:
returncode = 0
stdout = (
'{"format":{"format_name":"mpegts","duration":"10.0"},'
'"streams":[{"codec_type":"video","codec_name":"h264"}]}'
)
stderr = "Detected CEA-608 captions"
return R()


@mock.patch("mod_regression.sample_inventory.subprocess.run", side_effect=fake_run)
def test_probe_sample_basic(mock_run):
with tempfile.TemporaryDirectory() as tmp:
f = Path(tmp) / "sample.ts"
f.write_bytes(b"dummy")

result = probe_sample(f)

assert result["container"] == "mpegts"
assert math.isclose(result["duration_sec"], 10.0, rel_tol=1e-9)
assert "CEA-608" in result["caption_types_detected"]
assert result["streams"][0]["codec"] == "h264"