5252from .encdec_utils import *
5353from .encdec_utils .png_yuv import PngFilesToYuvFileConverter , YuvFileToPngFilesConverter
5454from .utils import MIN_MAX_DATASET , min_max_inv_normalization , min_max_normalization
55+ from .vcmrs_descriptors import get_descriptor_files
5556
5657
5758def get_filesize (filepath : Union [Path , str ]) -> int :
@@ -80,40 +81,6 @@ def load_bitstream(path):
8081 return buf .getvalue ()
8182
8283
83- # From VCM-RS Scripts/utils.py
84- def update_cfg_from_ini (ini_file , cfg , section = None ):
85- current_section = ""
86- with open (ini_file , "r" ) as f :
87- lines = f .readlines ()
88- for line in lines :
89- line = line .replace ("\r " , "" ).replace ("\n " , "" ).strip ()
90- if line .startswith ("#" ):
91- continue
92- if line .startswith ("[" ):
93- current_section = line .lstrip ("[" ).rstrip ("]" )
94- continue
95- if section is None or section == current_section :
96- pos = line .find ("=" )
97- key = line [0 :pos ].strip ()
98- value = line [pos + 1 :].strip ()
99- cfg [key ] = value
100-
101-
102- # From VCM-RS Scripts/utils.py
103- def get_descriptor_files (data_dir , scenario , cfg , dataset , video_id ):
104- main_dir = data_dir # os.path.dirname(os.path.dirname(data_dir))
105- descriptor_variant = "TemporalResampleRatio4"
106- if scenario == "AI_e2e" or scenario == "LD_e2e" :
107- descriptor_variant = "TemporalResampleOFF"
108- descriptor_dir = os .path .join (main_dir , "Descriptors" , descriptor_variant , dataset )
109- roi_descriptor = os .path .join (descriptor_dir , "ROI" , f"{ video_id } .txt" )
110- spatial_descriptor = os .path .join (
111- descriptor_dir , "SpatialResample" , f"{ video_id } .csv"
112- )
113-
114- return roi_descriptor , spatial_descriptor
115-
116-
11784@register_codec ("vtm" )
11885class VTM (nn .Module ):
11986 """Encoder/Decoder class for VVC - VTM reference software"""
@@ -184,7 +151,8 @@ def __init__(
184151
185152 self .convert_input_to_yuv = PngFilesToYuvFileConverter (
186153 chroma_format = self .enc_cfgs ["chroma_format" ],
187- input_bitdepth = self .enc_cfgs ["input_bitdepth" ],
154+ input_bitdepth = int (self .enc_cfgs ["input_bitdepth" ]),
155+ use_yuv = self .enc_cfgs ["use_yuv" ],
188156 frame_rate = self .frame_rate ,
189157 ffmpeg_loglevel = self .ffmpeg_loglevel ,
190158 logger = self .logger ,
@@ -568,6 +536,7 @@ def decode(
568536 org_img_size : Dict = None ,
569537 remote_inference = False ,
570538 vcm_mode = False ,
539+ output10b = False ,
571540 ) -> Dict :
572541 """
573542 Decodes the bitstream and returns the output features .
@@ -588,6 +557,8 @@ def decode(
588557 assert bitstream_path .is_file ()
589558
590559 output_file_prefix = bitstream_path .stem
560+ if output10b : # VCM-RS under CTC outputs 10b YUV
561+ output_file_prefix = output_file_prefix .replace ("8bit" , "10bit" )
591562
592563 dec_path = codec_output_dir / "dec"
593564 dec_path .mkdir (parents = True , exist_ok = True )
@@ -598,8 +569,6 @@ def decode(
598569
599570 if remote_inference : # remote inference pipeline
600571 yuv_dec_path = f"{ dec_path } /{ output_file_prefix } _dec.yuv"
601- if vcm_mode :
602- yuv_dec_path = yuv_dec_path .replace ("8bit" , "10bit" )
603572 bitdepth = get_raw_video_file_info (yuv_dec_path .split ("qp" )[- 1 ])["bitdepth" ]
604573
605574 cmd = self .get_decode_cmd (
@@ -1017,8 +986,8 @@ def __init__(
1017986 ** kwargs ,
1018987 ):
1019988 super ().__init__ (vision_model , dataset , ** kwargs )
1020- self .use_descriptors = True
1021989 self .tmp_dir = Path (self .codec_paths ["tmp_dir" ])
990+ self .single_chunk = kwargs ["single_chunk" ]
1022991
1023992 def get_check_list_of_paths (self ):
1024993 self .cfg_file = Path (self .codec_paths ["cfg_file" ])
@@ -1050,15 +1019,95 @@ def get_encode_cmd(
10501019 Returns:
10511020 List[Any]: A list of strings representing the encoding command.
10521021 """
1022+
1023+ # BEGIN - From VCM-RS Scripts/utils.py
1024+ def update_cfg_from_ini (ini_file , cfg , section = None ):
1025+ current_section = ""
1026+ with open (ini_file , "r" ) as f :
1027+ lines = f .readlines ()
1028+ for line in lines :
1029+ line = line .replace ("\r " , "" ).replace ("\n " , "" ).strip ()
1030+ if line .startswith ("#" ):
1031+ continue
1032+ if line .startswith ("[" ):
1033+ current_section = line .lstrip ("[" ).rstrip ("]" )
1034+ continue
1035+ if section is None or section == current_section :
1036+ pos = line .find ("=" )
1037+ if pos == - 1 : # Addition: Make parsing robust to empty lines
1038+ continue
1039+ key = line [0 :pos ].strip ()
1040+ value = line [pos + 1 :].strip ()
1041+ cfg [key ] = value
1042+
1043+ # END - From VCM-RS Scripts/utils.py
1044+
1045+ # Set modes for the descriptors
1046+ def add_descriptor_modes (descriptors , descriptor_mode ):
1047+ # RoIDescriptorMode, SpatialDescriptorMode, ColorizeDescriptorMode, TemporalDescriptorMode
1048+ modes = {
1049+ "vcm_ctc" : ["load" , "UsingDescriptor" , "load" , "load" ],
1050+ "load" : ["load" , "UsingDescriptor" , "load" , "load" ],
1051+ "generate" : ["save" , "GeneratingDescriptor" , "save" , "save" ],
1052+ }[descriptor_mode ]
1053+
1054+ for descriptor_file , mode_key , mode in zip (
1055+ [
1056+ "RoIDescriptor" ,
1057+ "SpatialDescriptor" ,
1058+ "ColorizeDescriptorFile" ,
1059+ "TemporalDescriptor" ,
1060+ ],
1061+ [
1062+ "RoIDescriptorMode" ,
1063+ "SpatialDescriptorMode" ,
1064+ "ColorizeDescriptorMode" ,
1065+ "TemporalDescriptorMode" ,
1066+ ],
1067+ modes ,
1068+ ):
1069+ if descriptor_file in descriptors :
1070+ descriptors .update ({mode_key : mode })
1071+ return descriptors
1072+
10531073 config = self .enc_cfgs ["config" ]
10541074 nn_intra_qp_offset = - 5 # self.enc_cfgs["nn_intra_qp_offset"]
10551075 seq_roi_cfg_network = self .enc_cfgs ["seq_roi_cfg_network" ]
10561076 output_dir = os .path .dirname (str (bitstream_path ))
10571077 recon_fname = str (bitstream_path ).replace (".bin" , ".yuv" )
10581078 num_workers = 1
1079+ descriptor_mode = self .enc_cfgs ["descriptor_mode" ]
1080+ vcmrs_ver = self .enc_cfgs ["vcmrs_ver" ]
10591081
10601082 assert chroma_format == "420"
10611083
1084+ (
1085+ roi_descriptor_mode ,
1086+ spatial_descriptor_mode ,
1087+ colorize_descriptor_mode ,
1088+ temporal_descriptor_mode ,
1089+ ) = {
1090+ "vcm_ctc" : ["load" , "UsingDescriptor" , "load" , "load" ],
1091+ "load" : ["load" , "UsingDescriptor" , "load" , "load" ],
1092+ "generate" : ["save" , "GeneratingDescriptor" , "save" , "save" ],
1093+ }[descriptor_mode ]
1094+ items = str (bitstream_path ).split ("/" )
1095+ dataset = {
1096+ "SFUHW" : "SFU" ,
1097+ "MPEGTVDTRACKING" : "TVD" ,
1098+ "MPEGHIEVE" : "HIEVE" ,
1099+ }[items [- 5 ]]
1100+ sequence = items [- 4 ]
1101+ for remove in ["sfu-hw-" , "_val" , "mpeg-" ]:
1102+ sequence = sequence .replace (remove , "" )
1103+ if dataset == "TVD" :
1104+ # For VCM CTC using TVD clips: Replace second '-' (if present) with '_'
1105+ # E.g.: TVD-02-1 -> TVD-02_1
1106+ dash_cnt = len ([c for c in sequence if c == "-" ])
1107+ if dash_cnt == 2 :
1108+ last = sequence .rfind ("-" )
1109+ sequence = sequence [:last ] + "_" + sequence [last + 1 :]
1110+
10621111 cfg = {
10631112 "SourceWidth" : width ,
10641113 "SourceHeight" : height ,
@@ -1079,39 +1128,53 @@ def get_encode_cmd(
10791128 # FramesToBeEncoded" : nb_frames,
10801129 # "Configuration" : config,
10811130 # "input_files",
1131+ "NnlfSwitch" : "Bypass" ,
10821132 }
1133+ update_cfg_from_ini (self .cfg_file , cfg )
10831134
1135+ tram = (
1136+ cfg ["TemporalResamplingAdaptiveMethod" ]
1137+ if "TemporalResamplingAdaptiveMethod" in cfg
1138+ else None
1139+ )
10841140 descriptor_dir = Path (self .cfg_file ).parent .parent
1085- update_cfg_from_ini (self .cfg_file , cfg )
1086- items = str (bitstream_path ).split ("/" )
1087- dataset = {
1088- "SFUHW" : "SFU" ,
1089- "MPEGTVDTRACKING" : "TVD" ,
1090- }[items [- 5 ]]
1091- sequence = items [- 4 ]
1092- for remove in ["sfu-hw-" , "_val" , "mpeg-" ]:
1093- sequence = sequence .replace (remove , "" )
1094- if dataset == "TVD" :
1095- sequence = sequence [:- 2 ] + "_" + sequence [- 1 ]
1096- roi_descriptor , spatial_descriptor = get_descriptor_files (
1097- descriptor_dir , config , None , dataset , sequence
1141+ descriptors = get_descriptor_files (
1142+ descriptor_mode , vcmrs_ver , descriptor_dir , config , dataset , sequence , tram
10981143 )
10991144
1100- cfg .update (
1101- {
1102- "RoIDescriptor" : roi_descriptor ,
1103- "SpatialDescriptor" : spatial_descriptor ,
1104- }
1105- )
1145+ gen_found = False
1146+ for descriptor in descriptors .values ():
1147+ # When generate, check already exists
1148+ if descriptor_mode == "generate" :
1149+ if os .path .isfile (descriptor ):
1150+ print (
1151+ f"descriptor_mode is 'generate' but file { descriptor } already exists!"
1152+ )
1153+ if self .enc_cfgs ["descriptor_overwrite" ]:
1154+ Path (descriptor ).unlink ()
1155+ else :
1156+ gen_found = True
1157+ dirname = os .path .dirname (descriptor )
1158+ os .makedirs (dirname , exist_ok = True )
1159+ if gen_found :
1160+ sys .exit (1 )
1161+
1162+ descriptors = add_descriptor_modes (descriptors , descriptor_mode )
1163+ cfg .update (descriptors )
11061164
11071165 cmd = [
11081166 sys .executable ,
11091167 "-m" ,
11101168 "vcmrs.encoder" ,
1111- "--single_chunk" ,
11121169 "--directory_as_video" ,
11131170 "--debug_source_checksum" ,
11141171 ]
1172+ if self .single_chunk :
1173+ cmd .extend (
1174+ [
1175+ "--single_chunk" ,
1176+ ]
1177+ )
11151178
11161179 for c in cfg .keys ():
11171180 cmd .append ("--" + c )
0 commit comments