|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Convert Spectacular AI SDK "useBinaryRecorder: True" output files to videos using FFmpeg. |
| 4 | +""" |
| 5 | + |
| 6 | +import argparse |
| 7 | +import json |
| 8 | +import subprocess |
| 9 | +import shutil |
| 10 | + |
| 11 | +import os |
| 12 | +from os import listdir, makedirs |
| 13 | +from os.path import isfile, join, exists |
| 14 | + |
| 15 | +def define_args(parser): |
| 16 | + parser.add_argument('inputPath', help='Dataset input directory') |
| 17 | + parser.add_argument('outputPath', help='Converted dataset output directory') |
| 18 | + parser.add_argument('--crf', type=int, default=15, help='Constant Rate Factor for video encoding') |
| 19 | + parser.add_argument('--fps', type=int, default=30, help='Output video FPS metadata, in case it cannot be determined automatically') |
| 20 | + |
| 21 | +def readJson(filePath): |
| 22 | + with open(filePath) as f: |
| 23 | + return json.load(f) |
| 24 | + |
| 25 | +def readVideoMetadata(videoInputPath, fps): |
| 26 | + videoMetadataPath = videoInputPath + ".json" |
| 27 | + |
| 28 | + dataJsonlPath = os.path.join(os.path.dirname(videoInputPath), "data.jsonl") |
| 29 | + calibrationPath = os.path.join(os.path.dirname(videoInputPath), "calibration.json") |
| 30 | + metadata = { "ok": False } |
| 31 | + if isfile(videoMetadataPath): |
| 32 | + f = open(videoMetadataPath) |
| 33 | + metadata = json.load(f) |
| 34 | + metadata["ok"] = True |
| 35 | + elif exists(dataJsonlPath): |
| 36 | + t0 = None |
| 37 | + with open(dataJsonlPath, "r") as f: |
| 38 | + for line in f: |
| 39 | + d = json.loads(line) |
| 40 | + if 'frames' in d and len(d['frames']) > 0: |
| 41 | + frame = d['frames'][0] |
| 42 | + if 'width' in frame and 'height' in frame: |
| 43 | + metadata["width"] = frame['width'] |
| 44 | + metadata["height"] = frame['height'] |
| 45 | + if 'colorFormat' in frame: |
| 46 | + metadata["pixelFormat"] = frame['colorFormat'] |
| 47 | + if t0 is None: |
| 48 | + t0 = d['time'] |
| 49 | + elif 'fps' not in metadata: |
| 50 | + t1 = d['time'] |
| 51 | + metadata["fps"] = 1.0 / (t1 - t0) |
| 52 | + if 'fps' in metadata and 'width' in metadata and 'height' in metadata and 'pixelFormat' in metadata: |
| 53 | + metadata["ok"] = True |
| 54 | + break |
| 55 | + elif exists(calibrationPath): |
| 56 | + calibration = readJson(calibrationPath) |
| 57 | + if "cameras" not in calibration: return metadata |
| 58 | + if len(calibration["cameras"]) == 0: return metadata |
| 59 | + if "imageWidth" not in calibration["cameras"][0]: return metadata |
| 60 | + if "imageHeight" not in calibration["cameras"][0]: return metadata |
| 61 | + metadata["pixelFormat"] = "gray" |
| 62 | + metadata["width"] = calibration["cameras"][0]["imageWidth"] |
| 63 | + metadata["height"] = calibration["cameras"][0]["imageHeight"] |
| 64 | + metadata["fps"] = fps |
| 65 | + metadata["ok"] = True |
| 66 | + |
| 67 | + return metadata |
| 68 | + |
| 69 | +def getBytesPerPixel(pixelFormat): |
| 70 | + if pixelFormat == "gray": return 1 |
| 71 | + elif pixelFormat == "gray16le": return 2 |
| 72 | + elif pixelFormat == "rgb24": return 3 |
| 73 | + elif pixelFormat == "rgb32": return 4 |
| 74 | + raise RuntimeError("Invalid pixel format: {}".format(pixelFormat)) |
| 75 | + |
| 76 | +def convertVideo(args, video): |
| 77 | + videoInputPath = join(args.inputPath, video) |
| 78 | + videoOutputPath = join(args.outputPath, video) |
| 79 | + metadata = readVideoMetadata(videoInputPath, args.fps) |
| 80 | + |
| 81 | + if (metadata["ok"] == False): |
| 82 | + print("Cannot convert {}. Necessary metadata not found.".format(videoInputPath)) |
| 83 | + |
| 84 | + # Instead just copy the file to output path (needed for cases when only some videos are in the binary format). |
| 85 | + shutil.copyfile(videoInputPath, videoOutputPath) |
| 86 | + return |
| 87 | + |
| 88 | + print("Converting: {}".format(videoInputPath)) |
| 89 | + |
| 90 | + width = metadata["width"] |
| 91 | + height = metadata["height"] |
| 92 | + pixelFormat = metadata["pixelFormat"] |
| 93 | + |
| 94 | + cmd = [ |
| 95 | + "ffmpeg", |
| 96 | + "-pix_fmt", pixelFormat, |
| 97 | + "-y", |
| 98 | + "-f", "rawvideo", |
| 99 | + "-hide_banner", |
| 100 | + "-s", "{}x{}".format(width, height), |
| 101 | + "-r", "{}".format(metadata["fps"]), |
| 102 | + "-i", "-", |
| 103 | + "-an", |
| 104 | + ] |
| 105 | + |
| 106 | + isDepth = pixelFormat == "gray16le" |
| 107 | + if isDepth: |
| 108 | + cmd += [ |
| 109 | + "-vcodec", "ffv1", |
| 110 | + "-pix_fmt", "gray16le", |
| 111 | + ] |
| 112 | + else: |
| 113 | + cmd += [ |
| 114 | + "-vcodec", "libx264", |
| 115 | + "-pix_fmt", "yuv420p", |
| 116 | + "-crf", str(args.crf), |
| 117 | + ] |
| 118 | + |
| 119 | + videoOutputPathMkv = videoOutputPath.replace(".bin", ".mkv") |
| 120 | + cmd.append(videoOutputPathMkv) |
| 121 | + |
| 122 | + pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE) |
| 123 | + |
| 124 | + rawVideo = open(videoInputPath, "rb") |
| 125 | + while True: |
| 126 | + data = rawVideo.read(width * height * getBytesPerPixel(pixelFormat)) |
| 127 | + if not data: |
| 128 | + break |
| 129 | + pipe.stdin.write(data) |
| 130 | + pipe.stdin.close() |
| 131 | + |
| 132 | +def getVideoFileNamesInDirectory(path): |
| 133 | + files = [f for f in listdir(path) if isfile(join(path, f))] |
| 134 | + videos = [f for f in files if f.endswith('.bin')] |
| 135 | + return videos |
| 136 | + |
| 137 | +def copyVioFilesToOutputDirectory(inputPath, outputPath): |
| 138 | + dataJsonl = "data.jsonl" |
| 139 | + vioConfigYaml = "vio_config.yaml" |
| 140 | + calibrationJson = "calibration.json" |
| 141 | + shutil.copyfile(join(inputPath, dataJsonl), join(outputPath, dataJsonl)) |
| 142 | + shutil.copyfile(join(inputPath, vioConfigYaml), join(outputPath, vioConfigYaml)) |
| 143 | + shutil.copyfile(join(inputPath, calibrationJson), join(outputPath, calibrationJson)) |
| 144 | + |
| 145 | +def convert(args): |
| 146 | + if not exists(args.outputPath): makedirs(args.outputPath) |
| 147 | + |
| 148 | + copyVioFilesToOutputDirectory(args.inputPath, args.outputPath) |
| 149 | + |
| 150 | + videos = getVideoFileNamesInDirectory(args.inputPath) |
| 151 | + for video in videos: |
| 152 | + convertVideo(args, video) |
| 153 | + |
| 154 | +def define_subparser(subparsers): |
| 155 | + sub = subparsers.add_parser('binary', |
| 156 | + description="Convert data from Spectacular AI binary format to video format", |
| 157 | + epilog=__doc__, |
| 158 | + formatter_class=argparse.RawDescriptionHelpFormatter) |
| 159 | + sub.set_defaults(func=convert) |
| 160 | + return define_args(sub) |
| 161 | + |
| 162 | +if __name__ == '__main__': |
| 163 | + def parse_args(): |
| 164 | + import argparse |
| 165 | + parser = argparse.ArgumentParser(description=__doc__.strip()) |
| 166 | + define_args(parser) |
| 167 | + return parser.parse_args() |
| 168 | + |
| 169 | + args = parse_args() |
| 170 | + convert(args) |
0 commit comments