Skip to content

Commit b7b1904

Browse files
committed
Merge branch 'chrisr-12-main-vcmrs'
2 parents 96c6e80 + 345b297 commit b7b1904

File tree

19 files changed

+595
-82
lines changed

19 files changed

+595
-82
lines changed

cfgs/codec/default.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ eval_encode: "bitrate" # or "bitrate"
55
experiment: "_default" # e.g. "_preset_medium" name experiment preceded by "_"
66
output_dir: "${pipeline.output_dir_root}/${.type}${.experiment}/${dataset.datacatalog}/${dataset.config.dataset_name}/uncmp"
77
bitstream_name: "default.bin"
8+
vcm_mode: False
9+
output10b: ${.vcm_mode}
810
encoder_config:
911
qp: 'uncmp'
1012
nbit_quant: -1
13+
use_yuv: False

cfgs/codec/vcmrs.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ codec_paths:
77
cfg_file: "/local/path/vcmrs/Scripts/LD_inner.ini"
88
tmp_dir: "/temp/path"
99
mac_computation: False
10+
single_chunk: True
11+
vcm_mode: True # For VCM-RS output bit depth and options for conversion to PNG for evaluation
12+
output10b: ${.vcm_mode} # Expect VCM-RS to output 10b
1013

1114
verbosity: 1
1215

@@ -27,3 +30,7 @@ encoder_config:
2730
input_bitdepth: 10
2831
output_bitdepth: 10
2932
seq_roi_cfg_network: yolov3_1088x608
33+
descriptor_mode: vcm_ctc # Or [load|generate]
34+
descriptor_overwrite: False
35+
vcmrs_ver: v1.0
36+
use_yuv: ${..vcm_mode}

cfgs/pipeline/remote_inference.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ codec:
2525
skip_n_frames: 0 # This is encoder only option
2626
n_frames_to_be_encoded: -1 #(-1 = encode all input), This is encoder only option
2727
measure_complexity: "${codec.mac_computation}"
28-
vcm_mode: False
2928
nn_task:
3029
dump_results: False
3130
output_results_dir: "${codec.output_dir}/output_results"
@@ -36,4 +35,4 @@ evaluation:
3635
visualization:
3736
save_visualization: "${codec.save_visualization}"
3837
visualization_dir: "${codec.output_dir}/visualization"
39-
threshold: 0 # only for detectron2, 0 means default setting of detectron2
38+
threshold: 0 # only for detectron2, 0 means default setting of detectron2

compressai_vision/codecs/encdec_utils/png_yuv.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,14 @@ def __init__(
4444
self,
4545
chroma_format: str,
4646
input_bitdepth: int,
47+
use_yuv: bool,
4748
frame_rate,
4849
ffmpeg_loglevel: str,
4950
logger: logging.Logger,
5051
):
5152
self.chroma_format = chroma_format
5253
self.input_bitdepth = input_bitdepth
54+
self.use_yuv = use_yuv
5355
self.frame_rate = frame_rate
5456
self.ffmpeg_loglevel = ffmpeg_loglevel
5557
self.logger = logger
@@ -116,11 +118,13 @@ def __call__(self, input: Dict, file_prefix: str):
116118
file_prefix = f"{file_prefix}_{frame_width}x{frame_height}_{self.frame_rate}fps_{input_bitdepth}bit_p{chroma_format}"
117119
yuv_in_path = f"{file_prefix}_input.yuv"
118120

119-
pix_fmt_suffix = "10le" if input_bitdepth == 10 else ""
120121
chroma_format = "gray" if chroma_format == "400" else f"yuv{chroma_format}p"
121122

122-
# Use existing YUV (if found):
123-
if yuv_file is not None:
123+
# Use existing YUV (if found and indicated for use):
124+
if self.use_yuv:
125+
assert (
126+
yuv_file is not None
127+
), "Parameter 'use_yuv' set True but YUV file not found."
124128
size = yuv_file.stat().st_size
125129
bytes_per_luma_sample = {"yuv420p": 1.5}[chroma_format]
126130
bytes_per_sample = (input_bitdepth + 7) >> 3
@@ -140,6 +144,7 @@ def __call__(self, input: Dict, file_prefix: str):
140144

141145
# TODO (fracape)
142146
# we don't enable skipping frames (codec.skip_n_frames) nor use n_frames_to_be_encoded in video mode
147+
pix_fmt_suffix = "10le" if input_bitdepth == 10 else ""
143148

144149
convert_cmd = [
145150
"ffmpeg",

compressai_vision/codecs/std_codecs.py

Lines changed: 121 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
from .encdec_utils import *
5353
from .encdec_utils.png_yuv import PngFilesToYuvFileConverter, YuvFileToPngFilesConverter
5454
from .utils import MIN_MAX_DATASET, min_max_inv_normalization, min_max_normalization
55+
from .vcmrs_descriptors import get_descriptor_files
5556

5657

5758
def get_filesize(filepath: Union[Path, str]) -> int:
@@ -80,40 +81,6 @@ def load_bitstream(path):
8081
return buf.getvalue()
8182

8283

83-
# From VCM-RS Scripts/utils.py
84-
def update_cfg_from_ini(ini_file, cfg, section=None):
85-
current_section = ""
86-
with open(ini_file, "r") as f:
87-
lines = f.readlines()
88-
for line in lines:
89-
line = line.replace("\r", "").replace("\n", "").strip()
90-
if line.startswith("#"):
91-
continue
92-
if line.startswith("["):
93-
current_section = line.lstrip("[").rstrip("]")
94-
continue
95-
if section is None or section == current_section:
96-
pos = line.find("=")
97-
key = line[0:pos].strip()
98-
value = line[pos + 1 :].strip()
99-
cfg[key] = value
100-
101-
102-
# From VCM-RS Scripts/utils.py
103-
def get_descriptor_files(data_dir, scenario, cfg, dataset, video_id):
104-
main_dir = data_dir # os.path.dirname(os.path.dirname(data_dir))
105-
descriptor_variant = "TemporalResampleRatio4"
106-
if scenario == "AI_e2e" or scenario == "LD_e2e":
107-
descriptor_variant = "TemporalResampleOFF"
108-
descriptor_dir = os.path.join(main_dir, "Descriptors", descriptor_variant, dataset)
109-
roi_descriptor = os.path.join(descriptor_dir, "ROI", f"{video_id}.txt")
110-
spatial_descriptor = os.path.join(
111-
descriptor_dir, "SpatialResample", f"{video_id}.csv"
112-
)
113-
114-
return roi_descriptor, spatial_descriptor
115-
116-
11784
@register_codec("vtm")
11885
class VTM(nn.Module):
11986
"""Encoder/Decoder class for VVC - VTM reference software"""
@@ -184,7 +151,8 @@ def __init__(
184151

185152
self.convert_input_to_yuv = PngFilesToYuvFileConverter(
186153
chroma_format=self.enc_cfgs["chroma_format"],
187-
input_bitdepth=self.enc_cfgs["input_bitdepth"],
154+
input_bitdepth=int(self.enc_cfgs["input_bitdepth"]),
155+
use_yuv=self.enc_cfgs["use_yuv"],
188156
frame_rate=self.frame_rate,
189157
ffmpeg_loglevel=self.ffmpeg_loglevel,
190158
logger=self.logger,
@@ -568,6 +536,7 @@ def decode(
568536
org_img_size: Dict = None,
569537
remote_inference=False,
570538
vcm_mode=False,
539+
output10b=False,
571540
) -> Dict:
572541
"""
573542
Decodes the bitstream and returns the output features .
@@ -588,6 +557,8 @@ def decode(
588557
assert bitstream_path.is_file()
589558

590559
output_file_prefix = bitstream_path.stem
560+
if output10b: # VCM-RS under CTC outputs 10b YUV
561+
output_file_prefix = output_file_prefix.replace("8bit", "10bit")
591562

592563
dec_path = codec_output_dir / "dec"
593564
dec_path.mkdir(parents=True, exist_ok=True)
@@ -598,8 +569,6 @@ def decode(
598569

599570
if remote_inference: # remote inference pipeline
600571
yuv_dec_path = f"{dec_path}/{output_file_prefix}_dec.yuv"
601-
if vcm_mode:
602-
yuv_dec_path = yuv_dec_path.replace("8bit", "10bit")
603572
bitdepth = get_raw_video_file_info(yuv_dec_path.split("qp")[-1])["bitdepth"]
604573

605574
cmd = self.get_decode_cmd(
@@ -1017,8 +986,8 @@ def __init__(
1017986
**kwargs,
1018987
):
1019988
super().__init__(vision_model, dataset, **kwargs)
1020-
self.use_descriptors = True
1021989
self.tmp_dir = Path(self.codec_paths["tmp_dir"])
990+
self.single_chunk = kwargs["single_chunk"]
1022991

1023992
def get_check_list_of_paths(self):
1024993
self.cfg_file = Path(self.codec_paths["cfg_file"])
@@ -1050,15 +1019,95 @@ def get_encode_cmd(
10501019
Returns:
10511020
List[Any]: A list of strings representing the encoding command.
10521021
"""
1022+
1023+
# BEGIN - From VCM-RS Scripts/utils.py
1024+
def update_cfg_from_ini(ini_file, cfg, section=None):
1025+
current_section = ""
1026+
with open(ini_file, "r") as f:
1027+
lines = f.readlines()
1028+
for line in lines:
1029+
line = line.replace("\r", "").replace("\n", "").strip()
1030+
if line.startswith("#"):
1031+
continue
1032+
if line.startswith("["):
1033+
current_section = line.lstrip("[").rstrip("]")
1034+
continue
1035+
if section is None or section == current_section:
1036+
pos = line.find("=")
1037+
if pos == -1: # Addition: Make parsing robust to empty lines
1038+
continue
1039+
key = line[0:pos].strip()
1040+
value = line[pos + 1 :].strip()
1041+
cfg[key] = value
1042+
1043+
# END - From VCM-RS Scripts/utils.py
1044+
1045+
# Set modes for the descriptors
1046+
def add_descriptor_modes(descriptors, descriptor_mode):
1047+
# RoIDescriptorMode, SpatialDescriptorMode, ColorizeDescriptorMode, TemporalDescriptorMode
1048+
modes = {
1049+
"vcm_ctc": ["load", "UsingDescriptor", "load", "load"],
1050+
"load": ["load", "UsingDescriptor", "load", "load"],
1051+
"generate": ["save", "GeneratingDescriptor", "save", "save"],
1052+
}[descriptor_mode]
1053+
1054+
for descriptor_file, mode_key, mode in zip(
1055+
[
1056+
"RoIDescriptor",
1057+
"SpatialDescriptor",
1058+
"ColorizeDescriptorFile",
1059+
"TemporalDescriptor",
1060+
],
1061+
[
1062+
"RoIDescriptorMode",
1063+
"SpatialDescriptorMode",
1064+
"ColorizeDescriptorMode",
1065+
"TemporalDescriptorMode",
1066+
],
1067+
modes,
1068+
):
1069+
if descriptor_file in descriptors:
1070+
descriptors.update({mode_key: mode})
1071+
return descriptors
1072+
10531073
config = self.enc_cfgs["config"]
10541074
nn_intra_qp_offset = -5 # self.enc_cfgs["nn_intra_qp_offset"]
10551075
seq_roi_cfg_network = self.enc_cfgs["seq_roi_cfg_network"]
10561076
output_dir = os.path.dirname(str(bitstream_path))
10571077
recon_fname = str(bitstream_path).replace(".bin", ".yuv")
10581078
num_workers = 1
1079+
descriptor_mode = self.enc_cfgs["descriptor_mode"]
1080+
vcmrs_ver = self.enc_cfgs["vcmrs_ver"]
10591081

10601082
assert chroma_format == "420"
10611083

1084+
(
1085+
roi_descriptor_mode,
1086+
spatial_descriptor_mode,
1087+
colorize_descriptor_mode,
1088+
temporal_descriptor_mode,
1089+
) = {
1090+
"vcm_ctc": ["load", "UsingDescriptor", "load", "load"],
1091+
"load": ["load", "UsingDescriptor", "load", "load"],
1092+
"generate": ["save", "GeneratingDescriptor", "save", "save"],
1093+
}[descriptor_mode]
1094+
items = str(bitstream_path).split("/")
1095+
dataset = {
1096+
"SFUHW": "SFU",
1097+
"MPEGTVDTRACKING": "TVD",
1098+
"MPEGHIEVE": "HIEVE",
1099+
}[items[-5]]
1100+
sequence = items[-4]
1101+
for remove in ["sfu-hw-", "_val", "mpeg-"]:
1102+
sequence = sequence.replace(remove, "")
1103+
if dataset == "TVD":
1104+
# For VCM CTC using TVD clips: Replace second '-' (if present) with '_'
1105+
# E.g.: TVD-02-1 -> TVD-02_1
1106+
dash_cnt = len([c for c in sequence if c == "-"])
1107+
if dash_cnt == 2:
1108+
last = sequence.rfind("-")
1109+
sequence = sequence[:last] + "_" + sequence[last + 1 :]
1110+
10621111
cfg = {
10631112
"SourceWidth": width,
10641113
"SourceHeight": height,
@@ -1079,39 +1128,53 @@ def get_encode_cmd(
10791128
# FramesToBeEncoded" : nb_frames,
10801129
# "Configuration" : config,
10811130
# "input_files",
1131+
"NnlfSwitch": "Bypass",
10821132
}
1133+
update_cfg_from_ini(self.cfg_file, cfg)
10831134

1135+
tram = (
1136+
cfg["TemporalResamplingAdaptiveMethod"]
1137+
if "TemporalResamplingAdaptiveMethod" in cfg
1138+
else None
1139+
)
10841140
descriptor_dir = Path(self.cfg_file).parent.parent
1085-
update_cfg_from_ini(self.cfg_file, cfg)
1086-
items = str(bitstream_path).split("/")
1087-
dataset = {
1088-
"SFUHW": "SFU",
1089-
"MPEGTVDTRACKING": "TVD",
1090-
}[items[-5]]
1091-
sequence = items[-4]
1092-
for remove in ["sfu-hw-", "_val", "mpeg-"]:
1093-
sequence = sequence.replace(remove, "")
1094-
if dataset == "TVD":
1095-
sequence = sequence[:-2] + "_" + sequence[-1]
1096-
roi_descriptor, spatial_descriptor = get_descriptor_files(
1097-
descriptor_dir, config, None, dataset, sequence
1141+
descriptors = get_descriptor_files(
1142+
descriptor_mode, vcmrs_ver, descriptor_dir, config, dataset, sequence, tram
10981143
)
10991144

1100-
cfg.update(
1101-
{
1102-
"RoIDescriptor": roi_descriptor,
1103-
"SpatialDescriptor": spatial_descriptor,
1104-
}
1105-
)
1145+
gen_found = False
1146+
for descriptor in descriptors.values():
1147+
# When generate, check already exists
1148+
if descriptor_mode == "generate":
1149+
if os.path.isfile(descriptor):
1150+
print(
1151+
f"descriptor_mode is 'generate' but file {descriptor} already exists!"
1152+
)
1153+
if self.enc_cfgs["descriptor_overwrite"]:
1154+
Path(descriptor).unlink()
1155+
else:
1156+
gen_found = True
1157+
dirname = os.path.dirname(descriptor)
1158+
os.makedirs(dirname, exist_ok=True)
1159+
if gen_found:
1160+
sys.exit(1)
1161+
1162+
descriptors = add_descriptor_modes(descriptors, descriptor_mode)
1163+
cfg.update(descriptors)
11061164

11071165
cmd = [
11081166
sys.executable,
11091167
"-m",
11101168
"vcmrs.encoder",
1111-
"--single_chunk",
11121169
"--directory_as_video",
11131170
"--debug_source_checksum",
11141171
]
1172+
if self.single_chunk:
1173+
cmd.extend(
1174+
[
1175+
"--single_chunk",
1176+
]
1177+
)
11151178

11161179
for c in cfg.keys():
11171180
cmd.append("--" + c)

0 commit comments

Comments
 (0)