diff --git a/benchmarks/BM_resnet50/model_repository/dali/pipeline.py b/benchmarks/BM_resnet50/model_repository/dali/pipeline.py index 18b6da22..4232d43d 100644 --- a/benchmarks/BM_resnet50/model_repository/dali/pipeline.py +++ b/benchmarks/BM_resnet50/model_repository/dali/pipeline.py @@ -24,24 +24,34 @@ def parse_args(): import argparse - parser = argparse.ArgumentParser(description="Serialize the pipeline and save it to a file") - parser.add_argument('file_path', type=str, help='The path where to save the serialized pipeline') + parser = argparse.ArgumentParser( + description="Serialize the pipeline and save it to a file") + parser.add_argument('file_path', + type=str, + help='The path where to save the serialized pipeline') return parser.parse_args() def preprocessing(images, device='gpu'): - images = dali.fn.decoders.image(images, device="mixed" if device == 'gpu' else 'cpu', output_type=types.RGB) - images = dali.fn.resize(images, resize_x=224, resize_y=224) - return dali.fn.crop_mirror_normalize(images, - dtype=types.FLOAT, - output_layout="HWC", - crop=(224, 224), - mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], - std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) + images = dali.fn.decoders.image( + images, + device="mixed" if device == 'gpu' else 'cpu', + output_type=types.RGB) + images = dali.fn.resize(images, resize_x=224, resize_y=224) + return dali.fn.crop_mirror_normalize( + images, + dtype=types.FLOAT, + output_layout="HWC", + crop=(224, 224), + mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], + std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) + @dali.pipeline_def(batch_size=1, num_threads=1, device_id=0) def pipe(): - images = dali.fn.external_source(device="cpu", name="DALI_INPUT_0", no_copy=True) + images = dali.fn.external_source(device="cpu", + name="DALI_INPUT_0", + no_copy=True) return preprocessing(images) diff --git a/benchmarks/BM_resnet50/scripts/dataset_preprocess.py b/benchmarks/BM_resnet50/scripts/dataset_preprocess.py index fb5453c2..b585929f 100644 --- a/benchmarks/BM_resnet50/scripts/dataset_preprocess.py +++ b/benchmarks/BM_resnet50/scripts/dataset_preprocess.py @@ -46,7 +46,11 @@ def parse_meta_mat(metafile) -> Dict[int, str]: meta = scipy.io.loadmat(metafile, squeeze_me=True)["synsets"] nums_children = list(zip(*meta))[4] - meta = [meta[idx] for idx, num_children in enumerate(nums_children) if num_children == 0] + meta = [ + meta[idx] + for idx, num_children in enumerate(nums_children) + if num_children == 0 + ] idcs, wnids = list(zip(*meta))[:2] idx_to_wnid = {idx: wnid for idx, wnid in zip(idcs, wnids)} return idx_to_wnid @@ -57,13 +61,17 @@ def _process_image(image_file, target_size): original_size = image.size # scale image to size where minimal size is _RESIZE_MIN - scale_factor = max(_RESIZE_MIN / original_size[0], _RESIZE_MIN / original_size[1]) - resize_to = int(original_size[0] * scale_factor), int(original_size[1] * scale_factor) + scale_factor = max(_RESIZE_MIN / original_size[0], + _RESIZE_MIN / original_size[1]) + resize_to = int(original_size[0] * scale_factor), int(original_size[1] * + scale_factor) resized_image = image.resize(resize_to) # central crop of image to target_size - left, upper = (resize_to[0] - target_size[0]) // 2, (resize_to[1] - target_size[1]) // 2 - cropped_image = resized_image.crop((left, upper, left + target_size[0], upper + target_size[1])) + left, upper = (resize_to[0] - target_size[0]) // 2, (resize_to[1] - + target_size[1]) // 2 + cropped_image = resized_image.crop( + (left, upper, left + target_size[0], upper + target_size[1])) return cropped_image @@ -73,27 +81,25 @@ def main(): parser = argparse.ArgumentParser(description="short_description") parser.add_argument( "--dataset-dir", - help="Path to dataset directory where imagenet archives are stored and processed files will be saved.", + help= + "Path to dataset directory where imagenet archives are stored and processed files will be saved.", required=False, default=DATASETS_DIR, ) - parser.add_argument( - '--save', - help='Save processed images.', - required=False, default=False - ) + parser.add_argument('--save', + help='Save processed images.', + required=False, + default=False) parser.add_argument( "--target-size", help="Size of target image. Format it as ,.", required=False, default=",".join(map(str, TARGET_SIZE)), ) - parser.add_argument( - '--perf-file', - required=False, - default=None, - help='Path to save a file with time measurements.' - ) + parser.add_argument('--perf-file', + required=False, + default=None, + help='Path to save a file with time measurements.') args = parser.parse_args() if args.dataset_dir is None: @@ -130,17 +136,20 @@ def main(): # remap WNID into index in sorted list of all WNIDs - this is how network outputs class available_wnids = sorted(set(labels_wnid)) - wnid_to_newidx = {wnid: new_cls for new_cls, wnid in enumerate(available_wnids)} + wnid_to_newidx = { + wnid: new_cls for new_cls, wnid in enumerate(available_wnids) + } labels = [wnid_to_newidx[wnid] for wnid in labels_wnid] if args.perf_file is None: - perf = False + perf = False else: - times = [] - perf = True + times = [] + perf = True output_dir = datasets_dir / IMAGENET_DIRNAME with tarfile.open(image_archive_path, mode="r") as image_archive_file: image_rel_paths = sorted(image_archive_file.getnames()) - for cls, image_rel_path in tqdm(zip(labels, image_rel_paths), total=len(image_rel_paths)): + for cls, image_rel_path in tqdm(zip(labels, image_rel_paths), + total=len(image_rel_paths)): output_path = output_dir / str(cls) / image_rel_path original_image_file = image_archive_file.extractfile(image_rel_path) file_data = original_image_file.read() @@ -148,14 +157,14 @@ def main(): processed_image = _process_image(io.BytesIO(file_data), target_size) end = time.perf_counter() if perf: - times.append(end-start) + times.append(end - start) if args.save: - output_path.parent.mkdir(parents=True, exist_ok=True) - processed_image.save(output_path.as_posix()) + output_path.parent.mkdir(parents=True, exist_ok=True) + processed_image.save(output_path.as_posix()) if perf: - with open(args.perf_file, 'w') as perf_file: - print(times, file=perf_file) + with open(args.perf_file, 'w') as perf_file: + print(times, file=perf_file) if __name__ == "__main__": diff --git a/benchmarks/BM_resnet50/scripts/model-loader.py b/benchmarks/BM_resnet50/scripts/model-loader.py index bc4634c2..acb1d1d9 100644 --- a/benchmarks/BM_resnet50/scripts/model-loader.py +++ b/benchmarks/BM_resnet50/scripts/model-loader.py @@ -23,25 +23,38 @@ import argparse import sys + def get_args(): - parser = argparse.ArgumentParser(description='Load or unload a model in Triton server.') - parser.add_argument('action', action='store', choices=['load', 'unload', 'reload']) - parser.add_argument('-u', '--url', required=False, action='store', default='localhost:8001', help='Server url.') - parser.add_argument('-m', '--model', required=True, action='store', help='Model name.') - return parser.parse_args() + parser = argparse.ArgumentParser( + description='Load or unload a model in Triton server.') + parser.add_argument('action', + action='store', + choices=['load', 'unload', 'reload']) + parser.add_argument('-u', + '--url', + required=False, + action='store', + default='localhost:8001', + help='Server url.') + parser.add_argument('-m', + '--model', + required=True, + action='store', + help='Model name.') + return parser.parse_args() def main(args): - client = t_client.InferenceServerClient(url=args.url) - if args.action in ['reload', 'unload']: - client.unload_model(args.model) - print('Successfully unloaded model', args.model) + client = t_client.InferenceServerClient(url=args.url) + if args.action in ['reload', 'unload']: + client.unload_model(args.model) + print('Successfully unloaded model', args.model) - if args.action in ['reload', 'load']: - client.load_model(args.model) - print('Successfully loaded model', args.model) + if args.action in ['reload', 'load']: + client.load_model(args.model) + print('Successfully loaded model', args.model) if __name__ == '__main__': - args = get_args() - main(args) + args = get_args() + main(args) diff --git a/benchmarks/BM_resnet50/scripts/prepare-input-data.py b/benchmarks/BM_resnet50/scripts/prepare-input-data.py index 0c544a27..abc7bb1f 100644 --- a/benchmarks/BM_resnet50/scripts/prepare-input-data.py +++ b/benchmarks/BM_resnet50/scripts/prepare-input-data.py @@ -27,7 +27,6 @@ import base64 as b64 import json - dali_extra_path = os.getenv('DALI_EXTRA_PATH', None) assert dali_extra_path is not None, "Please set DALI_EXTRA_PATH env variable." @@ -39,44 +38,63 @@ # choose 16 smallest samples chosen_set = [p for (_, p) in sized_images[:16]] -# choose medium sized image +# choose medium sized image chosen_sample = sized_images[8][1] + def save_sample_input(sample, dir_name, input_name): - Path(dir_name).mkdir(exist_ok=True) - shutil.copy(sample, Path(dir_name) / Path(input_name)) + Path(dir_name).mkdir(exist_ok=True) + shutil.copy(sample, Path(dir_name) / Path(input_name)) + def get_content(fpath): - with fpath.open("rb") as f: - content = f.read() - return { - 'content' : { - 'b64': b64.b64encode(content).decode('utf-8') - }, - 'shape': [len(content)] - } + with fpath.open("rb") as f: + content = f.read() + return { + 'content': { + 'b64': b64.b64encode(content).decode('utf-8') + }, + 'shape': [len(content)] + } + def save_json_dataset(files, dataset_filename, input_name): - contents = [get_content(fpath) for fpath in files] - inputs = [{input_name: content} for content in contents] - result_dict = {'data': inputs} - with open(dataset_filename, 'w') as dataset_file: - json.dump(result_dict, dataset_file) + contents = [get_content(fpath) for fpath in files] + inputs = [{input_name: content} for content in contents] + result_dict = {'data': inputs} + with open(dataset_filename, 'w') as dataset_file: + json.dump(result_dict, dataset_file) + def get_args(): - parser = argparse.ArgumentParser(description='Prepare perf_analyzer input data.') - parser.add_argument('-d', '--directory-name', required=False, action='store', default='inputs-data', - help='Directory name to store a single sample data.') - parser.add_argument('-i', '--input-name', required=False, action='store', default='input', - help='Input name.') - parser.add_argument('-f', '--dataset-filename', required=False, action='store', default='dataset.json', - help='Name of the created JSON dataset.') - return parser.parse_args() + parser = argparse.ArgumentParser( + description='Prepare perf_analyzer input data.') + parser.add_argument('-d', + '--directory-name', + required=False, + action='store', + default='inputs-data', + help='Directory name to store a single sample data.') + parser.add_argument('-i', + '--input-name', + required=False, + action='store', + default='input', + help='Input name.') + parser.add_argument('-f', + '--dataset-filename', + required=False, + action='store', + default='dataset.json', + help='Name of the created JSON dataset.') + return parser.parse_args() + def main(args): - save_sample_input(chosen_sample, args.directory_name, args.input_name) - save_json_dataset(chosen_set, args.dataset_filename, args.input_name) + save_sample_input(chosen_sample, args.directory_name, args.input_name) + save_json_dataset(chosen_set, args.dataset_filename, args.input_name) + if __name__ == '__main__': - args = get_args() - main(args) + args = get_args() + main(args) diff --git a/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_dali/pipeline.py b/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_dali/pipeline.py index 380abb91..0de43fd7 100644 --- a/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_dali/pipeline.py +++ b/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_dali/pipeline.py @@ -29,9 +29,8 @@ def _interleave_lists(*lists): Returns: iterator over interleaved list """ - assert all( - (len(lists[0]) == len(test_l) for test_l in lists) - ), "All lists have to have the same length" + assert all((len(lists[0]) == len(test_l) + for test_l in lists)), "All lists have to have the same length" return itertools.chain(*zip(*lists)) @@ -44,27 +43,27 @@ def _tuples2list(tuples: list): @dali.pipeline_def def dali_asr_pipeline( - train_pipeline, # True if training, False if validation - file_root, - file_list, - sample_rate, - silence_threshold, - resample_range, - discrete_resample_range, - window_size, - window_stride, - nfeatures, - nfft, - frame_splicing_factor, - dither_coeff, - pad_align, - preemph_coeff, - do_spectrogram_masking=False, - cutouts_generator=None, - shard_id=0, - n_shards=1, - preprocessing_device="gpu", - is_triton_pipeline=False, + train_pipeline, # True if training, False if validation + file_root, + file_list, + sample_rate, + silence_threshold, + resample_range, + discrete_resample_range, + window_size, + window_stride, + nfeatures, + nfft, + frame_splicing_factor, + dither_coeff, + pad_align, + preemph_coeff, + do_spectrogram_masking=False, + cutouts_generator=None, + shard_id=0, + n_shards=1, + preprocessing_device="gpu", + is_triton_pipeline=False, ): do_remove_silence = silence_threshold is not None @@ -73,7 +72,9 @@ def _div_ceil(dividend, divisor): if is_triton_pipeline: assert not train_pipeline, "Pipeline for Triton shall be a validation pipeline" - encoded = fn.external_source(device="cpu", name="DALI_INPUT_0", no_copy=True) + encoded = fn.external_source(device="cpu", + name="DALI_INPUT_0", + no_copy=True) else: encoded, label = fn.readers.file( device="cpu", @@ -89,11 +90,11 @@ def _div_ceil(dividend, divisor): if resample_range is not None: if discrete_resample_range: values = [resample_range[0], 1.0, resample_range[1]] - speed_perturbation_coeffs = fn.random.uniform(device="cpu", values=values) + speed_perturbation_coeffs = fn.random.uniform(device="cpu", + values=values) else: - speed_perturbation_coeffs = fn.random.uniform( - device="cpu", range=resample_range - ) + speed_perturbation_coeffs = fn.random.uniform(device="cpu", + range=resample_range) if train_pipeline and speed_perturbation_coeffs is not None: dec_sample_rate_arg = speed_perturbation_coeffs * sample_rate @@ -102,9 +103,10 @@ def _div_ceil(dividend, divisor): else: dec_sample_rate_arg = None - audio, _ = fn.decoders.audio( - encoded, sample_rate=dec_sample_rate_arg, dtype=types.FLOAT, downmix=True - ) + audio, _ = fn.decoders.audio(encoded, + sample_rate=dec_sample_rate_arg, + dtype=types.FLOAT, + downmix=True) if do_remove_silence: begin, length = fn.nonsilent_region(audio, cutoff_db=silence_threshold) audio = fn.slice(audio, begin, length, axes=[0]) @@ -115,7 +117,8 @@ def _div_ceil(dividend, divisor): audio = audio.gpu() if dither_coeff != 0.0: - audio = audio + fn.random.normal(device=preprocessing_device) * dither_coeff + audio = audio + fn.random.normal( + device=preprocessing_device) * dither_coeff audio = fn.preemphasis_filter(audio, preemph_coeff=preemph_coeff) @@ -126,25 +129,31 @@ def _div_ceil(dividend, divisor): window_step=window_stride * sample_rate, ) - mel_spec = fn.mel_filter_bank( - spec, sample_rate=sample_rate, nfilter=nfeatures, normalize=True - ) + mel_spec = fn.mel_filter_bank(spec, + sample_rate=sample_rate, + nfilter=nfeatures, + normalize=True) - log_features = fn.to_decibels( - mel_spec, multiplier=np.log(10), reference=1.0, cutoff_db=math.log(1e-20) - ) + log_features = fn.to_decibels(mel_spec, + multiplier=np.log(10), + reference=1.0, + cutoff_db=math.log(1e-20)) log_features_len = fn.shapes(log_features) if frame_splicing_factor != 1: log_features_len = _div_ceil(log_features_len, frame_splicing_factor) log_features = fn.normalize(log_features, axes=[1]) - log_features = fn.pad(log_features, axes=[1], fill_value=0, align=pad_align, shape=(-1,)) + log_features = fn.pad(log_features, + axes=[1], + fill_value=0, + align=pad_align, + shape=(-1,)) if train_pipeline and do_spectrogram_masking: - anchors, shapes = fn.external_source( - source=cutouts_generator, num_outputs=2, cycle=True - ) + anchors, shapes = fn.external_source(source=cutouts_generator, + num_outputs=2, + cycle=True) log_features = fn.erase( log_features, anchor=anchors, diff --git a/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_python/1/features.py b/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_python/1/features.py index c1c613d5..c07c9d4f 100644 --- a/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_python/1/features.py +++ b/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_python/1/features.py @@ -10,21 +10,27 @@ class BaseFeatures(nn.Module): """Base class for GPU accelerated audio preprocessing.""" __constants__ = ["pad_align", "pad_to_max_duration", "max_len"] - def __init__(self, pad_align, pad_to_max_duration, max_duration, - sample_rate, window_size, window_stride, spec_augment=None, + def __init__(self, + pad_align, + pad_to_max_duration, + max_duration, + sample_rate, + window_size, + window_stride, + spec_augment=None, cutout_augment=None): super(BaseFeatures, self).__init__() self.pad_align = pad_align self.pad_to_max_duration = pad_to_max_duration - self.win_length = int(sample_rate * window_size) # frame size + self.win_length = int(sample_rate * window_size) # frame size self.hop_length = int(sample_rate * window_stride) # Calculate maximum sequence length (# frames) if pad_to_max_duration: self.max_len = 1 + math.ceil( - (max_duration * sample_rate - self.win_length) / self.hop_length - ) + (max_duration * sample_rate - self.win_length) / + self.hop_length) if spec_augment is not None: self.spec_augment = SpecAugment(**spec_augment) @@ -73,12 +79,14 @@ def apply_padding(self, x): @torch.jit.script def normalize_batch(x, seq_len, normalize_type: str): -# print ("normalize_batch: x, seq_len, shapes: ", x.shape, seq_len, seq_len.shape) + # print ("normalize_batch: x, seq_len, shapes: ", x.shape, seq_len, seq_len.shape) if normalize_type == "per_feature": - x_mean = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - x_std = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) + x_mean = torch.zeros((seq_len.shape[0], x.shape[1]), + dtype=x.dtype, + device=x.device) + x_std = torch.zeros((seq_len.shape[0], x.shape[1]), + dtype=x.dtype, + device=x.device) for i in range(x.shape[0]): x_mean[i, :] = x[i, :, :seq_len[i]].mean(dim=1) x_std[i, :] = x[i, :, :seq_len[i]].std(dim=1) @@ -119,28 +127,49 @@ def stack_subsample_frames(x, x_lens, stacking: int = 1, subsampling: int = 1): if x.size(2) > x_lens.max().item(): assert abs(x.size(2) - x_lens.max().item()) <= 1 - x = x[:,:,:x_lens.max().item()] + x = x[:, :, :x_lens.max().item()] return x, x_lens class FilterbankFeatures(BaseFeatures): # For JIT, https://pytorch.org/docs/stable/jit.html#python-defined-constants - __constants__ = ["dither", "preemph", "n_fft", "hop_length", "win_length", - "log", "frame_splicing", "normalize"] + __constants__ = [ + "dither", "preemph", "n_fft", "hop_length", "win_length", "log", + "frame_splicing", "normalize" + ] + # torchscript: "center" removed due to a bug - def __init__(self, spec_augment=None, cutout_augment=None, - sample_rate=8000, window_size=0.02, window_stride=0.01, - window="hamming", normalize="per_feature", n_fft=None, - preemph=0.97, n_filt=64, lowfreq=0, highfreq=None, log=True, - dither=1e-5, pad_align=8, pad_to_max_duration=False, - max_duration=float('inf'), frame_splicing=1, device=None): - super(FilterbankFeatures, self).__init__( - pad_align=pad_align, pad_to_max_duration=pad_to_max_duration, - max_duration=max_duration, sample_rate=sample_rate, - window_size=window_size, window_stride=window_stride, - spec_augment=spec_augment, cutout_augment=cutout_augment) + def __init__(self, + spec_augment=None, + cutout_augment=None, + sample_rate=8000, + window_size=0.02, + window_stride=0.01, + window="hamming", + normalize="per_feature", + n_fft=None, + preemph=0.97, + n_filt=64, + lowfreq=0, + highfreq=None, + log=True, + dither=1e-5, + pad_align=8, + pad_to_max_duration=False, + max_duration=float('inf'), + frame_splicing=1, + device=None): + super(FilterbankFeatures, + self).__init__(pad_align=pad_align, + pad_to_max_duration=pad_to_max_duration, + max_duration=max_duration, + sample_rate=sample_rate, + window_size=window_size, + window_stride=window_stride, + spec_augment=spec_augment, + cutout_augment=cutout_augment) torch_windows = { 'hann': torch.hann_window, @@ -150,7 +179,7 @@ def __init__(self, spec_augment=None, cutout_augment=None, 'none': None, } - self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length)) + self.n_fft = n_fft or 2**math.ceil(math.log2(self.win_length)) self.normalize = normalize self.log = log @@ -163,24 +192,27 @@ def __init__(self, spec_augment=None, cutout_augment=None, window_fn = torch_windows.get(window, None) window_tensor = window_fn(self.win_length, periodic=False) if window_fn else None - filterbanks = torch.tensor( - librosa.filters.mel(sample_rate, self.n_fft, n_mels=n_filt, - fmin=lowfreq, fmax=highfreq), - dtype=torch.float).unsqueeze(0) + filterbanks = torch.tensor(librosa.filters.mel(sample_rate, + self.n_fft, + n_mels=n_filt, + fmin=lowfreq, + fmax=highfreq), + dtype=torch.float).unsqueeze(0) # torchscript self.register_buffer("fb", filterbanks) self.register_buffer("window", window_tensor) self.used_window = self.window.to(dtype=torch.float).cuda() - def get_seq_len(self, seq_len): - return torch.ceil(seq_len.to(dtype=torch.float) / self.hop_length).to( - dtype=torch.int) + return torch.ceil(seq_len.to(dtype=torch.float) / + self.hop_length).to(dtype=torch.int) # do stft # TORCHSCRIPT: center removed due to bug def stft(self, x): - return torch.stft(x, n_fft=self.n_fft, hop_length=self.hop_length, + return torch.stft(x, + n_fft=self.n_fft, + hop_length=self.hop_length, win_length=self.win_length, window=self.used_window) @@ -197,10 +229,11 @@ def calculate_features(self, x, seq_len): # do preemphasis if self.preemph is not None: x = torch.cat( - (x[:, 0].unsqueeze(1), x[:, 1:] - self.preemph * x[:, :-1]), dim=1) - x = self.stft(x) + (x[:, 0].unsqueeze(1), x[:, 1:] - self.preemph * x[:, :-1]), + dim=1) + x = self.stft(x) - # get power spectrum + # get power spectrum x = x.pow(2).sum(-1) # dot with filterbank energies @@ -228,4 +261,3 @@ def calculate_features(self, x, seq_len): # del mask return x.to(dtype), seq_len - diff --git a/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_python/1/model.py b/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_python/1/model.py index ab67acb1..1ba4c1e7 100644 --- a/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_python/1/model.py +++ b/benchmarks/dali_vs_python/BM_jasper/model_repository/jasper_python/1/model.py @@ -33,18 +33,34 @@ def decode_audio(audio_bytes): class TritonPythonModel: + def __init__(self): pass def initialize(self, args): self.model_config = model_config = json.loads(args['model_config']) - output0_config = pb_utils.get_output_config_by_name(model_config, "PYTHON_OUTPUT_0") - self.output_dtype = pb_utils.triton_string_to_numpy(output0_config['data_type']) - self.feat_proc = features.FilterbankFeatures( - spec_augment=None, cutout_augment=None, sample_rate=16000, window_size=0.02, - window_stride=0.01, window="hann", normalize="per_feature", n_fft=512, preemph=0.97, - n_filt=64, lowfreq=0, highfreq=None, log=True, dither=1e-5, pad_align=16, - pad_to_max_duration=False, max_duration=float('inf'), frame_splicing=1) + output0_config = pb_utils.get_output_config_by_name( + model_config, "PYTHON_OUTPUT_0") + self.output_dtype = pb_utils.triton_string_to_numpy( + output0_config['data_type']) + self.feat_proc = features.FilterbankFeatures(spec_augment=None, + cutout_augment=None, + sample_rate=16000, + window_size=0.02, + window_stride=0.01, + window="hann", + normalize="per_feature", + n_fft=512, + preemph=0.97, + n_filt=64, + lowfreq=0, + highfreq=None, + log=True, + dither=1e-5, + pad_align=16, + pad_to_max_duration=False, + max_duration=float('inf'), + frame_splicing=1) def execute(self, requests): responses = [] @@ -72,8 +88,8 @@ def execute(self, requests): dec_t = dec_t.cuda() len_t = len_t.cuda() out_audio, out_len = self.feat_proc(dec_t, len_t) - out0_tensor = pb_utils.Tensor.from_dlpack("PYTHON_OUTPUT_0", - torch.utils.dlpack.to_dlpack(out_audio)) + out0_tensor = pb_utils.Tensor.from_dlpack( + "PYTHON_OUTPUT_0", torch.utils.dlpack.to_dlpack(out_audio)) response = pb_utils.InferenceResponse(output_tensors=[out0_tensor]) responses.append(response) diff --git a/benchmarks/dali_vs_python/BM_rn50/model_repository/rn50_dali/pipeline.py b/benchmarks/dali_vs_python/BM_rn50/model_repository/rn50_dali/pipeline.py index 8ff9603b..009cb63d 100644 --- a/benchmarks/dali_vs_python/BM_rn50/model_repository/rn50_dali/pipeline.py +++ b/benchmarks/dali_vs_python/BM_rn50/model_repository/rn50_dali/pipeline.py @@ -25,26 +25,33 @@ def parse_args(): import argparse - parser = argparse.ArgumentParser(description="Serialize the pipeline and save it to a file") - parser.add_argument('file_path', type=str, + parser = argparse.ArgumentParser( + description="Serialize the pipeline and save it to a file") + parser.add_argument('file_path', + type=str, help='The path where to save the serialized pipeline') return parser.parse_args() def preprocessing(images): - images = dali.fn.decoders.image(images, device='mixed', output_type=types.RGB) + images = dali.fn.decoders.image(images, + device='mixed', + output_type=types.RGB) images = dali.fn.resize(images, resize_x=224, resize_y=224) - return dali.fn.crop_mirror_normalize(images, - dtype=types.FLOAT, - output_layout="CHW", - crop=(224, 224), - mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], - std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) + return dali.fn.crop_mirror_normalize( + images, + dtype=types.FLOAT, + output_layout="CHW", + crop=(224, 224), + mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], + std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) @dali.pipeline_def(batch_size=1, num_threads=16, device_id=0) def pipe(): - images = dali.fn.external_source(device='cpu', name="DALI_INPUT_0", no_copy=True) + images = dali.fn.external_source(device='cpu', + name="DALI_INPUT_0", + no_copy=True) return preprocessing(images) diff --git a/benchmarks/dali_vs_python/BM_rn50/model_repository/rn50_python/1/model.py b/benchmarks/dali_vs_python/BM_rn50/model_repository/rn50_python/1/model.py index cac053b9..46166edf 100644 --- a/benchmarks/dali_vs_python/BM_rn50/model_repository/rn50_python/1/model.py +++ b/benchmarks/dali_vs_python/BM_rn50/model_repository/rn50_python/1/model.py @@ -26,19 +26,25 @@ import torchvision.transforms as transforms import triton_python_backend_utils as pb_utils -img_transforms = transforms.Compose( - [transforms.Resize((224, 224)), transforms.CenterCrop(224), transforms.ToTensor()]) +img_transforms = transforms.Compose([ + transforms.Resize((224, 224)), + transforms.CenterCrop(224), + transforms.ToTensor() +]) class TritonPythonModel: + def __init__(self): self.std = None self.mean = None def initialize(self, args): self.model_config = model_config = json.loads(args['model_config']) - output0_config = pb_utils.get_output_config_by_name(model_config, "PYTHON_OUTPUT_0") - self.output_dtype = pb_utils.triton_string_to_numpy(output0_config['data_type']) + output0_config = pb_utils.get_output_config_by_name( + model_config, "PYTHON_OUTPUT_0") + self.output_dtype = pb_utils.triton_string_to_numpy( + output0_config['data_type']) with torch.no_grad(): mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1) @@ -57,8 +63,8 @@ def execute(self, requests): out0.append(self.decode_resize(inp.to(torch.uint8))) out0_t = torch.stack(out0) out0_t = self.normalize(out0_t) - out0_tensor = pb_utils.Tensor.from_dlpack("PYTHON_OUTPUT_0", - torch.utils.dlpack.to_dlpack(out0_t)) + out0_tensor = pb_utils.Tensor.from_dlpack( + "PYTHON_OUTPUT_0", torch.utils.dlpack.to_dlpack(out0_t)) response = pb_utils.InferenceResponse(output_tensors=[out0_tensor]) responses.append(response) @@ -75,5 +81,6 @@ def normalize(self, batch): with torch.no_grad(): batch = batch.cuda() batch = batch.float() - processed_batch = batch.unsqueeze(0).sub_(self.mean).div_(self.std)[0] + processed_batch = batch.unsqueeze(0).sub_(self.mean).div_( + self.std)[0] return processed_batch diff --git a/benchmarks/dali_vs_python/pipelines_compare_client.py b/benchmarks/dali_vs_python/pipelines_compare_client.py index d9b445a2..e3758bab 100644 --- a/benchmarks/dali_vs_python/pipelines_compare_client.py +++ b/benchmarks/dali_vs_python/pipelines_compare_client.py @@ -34,26 +34,58 @@ def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False, + parser.add_argument('-v', + '--verbose', + action="store_true", + required=False, + default=False, help='Enable verbose output') - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', + parser.add_argument('-u', + '--url', + type=str, + required=False, + default='localhost:8001', help='Inference server URL. Default is localhost:8001.') - parser.add_argument('-b', '--batch_size', type=int, required=False, default=1, + parser.add_argument('-b', + '--batch_size', + type=int, + required=False, + default=1, help='Batch size') - parser.add_argument('--n_iter', type=int, required=False, default=-1, + parser.add_argument('--n_iter', + type=int, + required=False, + default=-1, help='Number of iterations , with `batch_size` size.') - parser.add_argument('-m', '--model_name', type=str, required=True, help='Model name') - parser.add_argument('--validate', action="store_true", required=False, - help='Enable the qualitative check of the model outputs.') - parser.add_argument('--eps', type=float, required=False, default=1e-1, + parser.add_argument('-m', + '--model_name', + type=str, + required=True, + help='Model name') + parser.add_argument( + '--validate', + action="store_true", + required=False, + help='Enable the qualitative check of the model outputs.') + parser.add_argument('--eps', + type=float, + required=False, + default=1e-1, help='Epsilon for the output validation. ' - 'Ignored, if --validate option is not enabled.') + 'Ignored, if --validate option is not enabled.') img_group = parser.add_mutually_exclusive_group() - img_group.add_argument('--sample', type=str, required=False, default=None, + img_group.add_argument('--sample', + type=str, + required=False, + default=None, help='Path to the single sample.') - img_group.add_argument('--sample_dir', type=str, required=False, default=None, - help='Directory, with samples that will be broken down into batches and ' - 'inferred. The directory must contain samples only') + img_group.add_argument( + '--sample_dir', + type=str, + required=False, + default=None, + help='Directory, with samples that will be broken down into batches and ' + 'inferred. The directory must contain samples only') return parser.parse_args() @@ -64,7 +96,8 @@ def _select_batch_size(batch_size_provider, batch_idx): return batch_size_provider[batch_idx % len(batch_size_provider)] elif isinstance(batch_size_provider, int): return batch_size_provider - raise TypeError("Incorrect batch_size_provider type. Actual: ", type(batch_size_provider)) + raise TypeError("Incorrect batch_size_provider type. Actual: ", + type(batch_size_provider)) def batcher(dataset, batch_size_provider, n_iterations=-1): @@ -104,7 +137,7 @@ def batcher(dataset, batch_size_provider, n_iterations=-1): return if curr_sample + batch_size < dataset_size: - yield dataset[curr_sample: curr_sample + batch_size] + yield dataset[curr_sample:curr_sample + batch_size] else: # Get as many samples from this revolution of the dataset as possible, # then repeat the dataset as many revolutions as needed @@ -113,10 +146,9 @@ def batcher(dataset, batch_size_provider, n_iterations=-1): n_rep = (batch_size - suffix) // dataset_size prefix = batch_size - (suffix + dataset_size * n_rep) yield np.concatenate( - (dataset[curr_sample:], - np.repeat(dataset, repeats=n_rep, axis=0), - dataset[:prefix]) - ) + (dataset[curr_sample:], np.repeat(dataset, + repeats=n_rep, + axis=0), dataset[:prefix])) curr_sample = (curr_sample + batch_size) % dataset_size iter_idx += 1 @@ -140,11 +172,13 @@ def load_samples(dir_path: str, name_pattern='.', max_samples=-1): samples = [] # Traverses directory for files (not dirs) and returns full paths to them - path_generator = (os.path.join(dir_path, f) for f in os.listdir(dir_path) if - os.path.isfile(os.path.join(dir_path, f)) and + path_generator = (os.path.join(dir_path, f) + for f in os.listdir(dir_path) + if os.path.isfile(os.path.join(dir_path, f)) and re.search(name_pattern, f) is not None) - sample_paths = [dir_path] if os.path.isfile(dir_path) else list(path_generator) + sample_paths = [dir_path + ] if os.path.isfile(dir_path) else list(path_generator) if 0 < max_samples < len(sample_paths): sample_paths = sample_paths[:max_samples] for img in tqdm(sample_paths, desc="Reading samples."): @@ -156,9 +190,13 @@ def array_from_list(arrays): """ Convert list of ndarrays to single ndarray with ndims+=1. """ - lengths = list(map(lambda x, arr=arrays: arr[x].shape[0], [x for x in range(len(arrays))])) + lengths = list( + map(lambda x, arr=arrays: arr[x].shape[0], + [x for x in range(len(arrays))])) max_len = max(lengths) - arrays = list(map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), arrays)) + arrays = list( + map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), + arrays)) for arr in arrays: assert arr.shape == arrays[0].shape, "Arrays must have the same shape." return np.stack(arrays) @@ -183,7 +221,9 @@ def infer_dali(triton_client, batch, model_name_prefix): inputs[0].set_data_from_numpy(batch) # Test with outputs - results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) + results = triton_client.infer(model_name=model_name, + inputs=inputs, + outputs=outputs) # Get the output arrays from the results output0_data = results.as_numpy("DALI_OUTPUT_0") @@ -203,7 +243,9 @@ def infer_python(triton_client, batch, model_name_prefix): inputs[0].set_data_from_numpy(batch) # Test with outputs - results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) + results = triton_client.infer(model_name=model_name, + inputs=inputs, + outputs=outputs) # Get the output arrays from the results output0_data = results.as_numpy("PYTHON_OUTPUT_0") @@ -218,8 +260,8 @@ def calc_rms(a, b): def main(model_name): try: - triton_client = tritonclient.grpc.InferenceServerClient(url=FLAGS.url, - verbose=FLAGS.verbose) + triton_client = tritonclient.grpc.InferenceServerClient( + url=FLAGS.url, verbose=FLAGS.verbose) except Exception as e: print("channel creation failed: " + str(e)) sys.exit() @@ -231,8 +273,11 @@ def main(model_name): image_data = array_from_list(image_data) print("Samples loaded") - for batch in tqdm(batcher(image_data, FLAGS.batch_size, n_iterations=FLAGS.n_iter), - desc="Inferring", total=FLAGS.n_iter): + for batch in tqdm(batcher(image_data, + FLAGS.batch_size, + n_iterations=FLAGS.n_iter), + desc="Inferring", + total=FLAGS.n_iter): output0_dali = infer_dali(triton_client, batch, model_name) output0_python = infer_python(triton_client, batch, model_name) assert output0_python.shape == output0_dali.shape, f"Output shapes do not match: Python={output0_python.shape} vs DALI={output0_dali.shape}." diff --git a/benchmarks/dali_vs_python/run_benchmark.py b/benchmarks/dali_vs_python/run_benchmark.py index e9f0ce46..105d5894 100644 --- a/benchmarks/dali_vs_python/run_benchmark.py +++ b/benchmarks/dali_vs_python/run_benchmark.py @@ -32,11 +32,23 @@ def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-m', '--model_name', type=str, required=True, help='Model name.', + parser.add_argument('-m', + '--model_name', + type=str, + required=True, + help='Model name.', choices=['rn50', 'jasper']) - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', + parser.add_argument('-u', + '--url', + type=str, + required=False, + default='localhost:8001', help='Inference server URL. Default is localhost:8001.') - parser.add_argument('-s', '--scenario', type=str, required=True, help='Model name.', + parser.add_argument('-s', + '--scenario', + type=str, + required=True, + help='Model name.', choices=['online', 'offline']) return parser.parse_args() @@ -54,13 +66,15 @@ def create_cmd(cmd_options): def check_sample_shape(input_name, model_name): input_data_dir = input_data_path(model_name=model_name) - ret = subprocess.run(['stat', '--printf', '%s', f'{input_data_dir}/{input_name}'], - capture_output=True, check=True) + ret = subprocess.run( + ['stat', '--printf', '%s', f'{input_data_dir}/{input_name}'], + capture_output=True, + check=True) return int(ret.stdout) -def create_cmd_options(model_name, input_name, batch_size, sample_shape, backend, - concurrency_range): +def create_cmd_options(model_name, input_name, batch_size, sample_shape, + backend, concurrency_range): input_data_dir = input_data_path(model_name=model_name) cmd_options = { '-m': f'{model_name}_{backend}', @@ -80,14 +94,18 @@ def merge_csvs(csv_fd_list, output_filename, batch_sizes): df_merged.to_csv(output_filename) -def run_perf_analyzer(model_name: str, input_name: str, backend: str, batch_sizes: list, - concurrency_range: str): - sample_shape = check_sample_shape(input_name=input_name, model_name=model_name) +def run_perf_analyzer(model_name: str, input_name: str, backend: str, + batch_sizes: list, concurrency_range: str): + sample_shape = check_sample_shape(input_name=input_name, + model_name=model_name) result_files = [] for bs in batch_sizes: - cmd_options = create_cmd_options(model_name=model_name, input_name=input_name, - batch_size=str(bs), sample_shape=sample_shape, - backend=backend, concurrency_range=concurrency_range) + cmd_options = create_cmd_options(model_name=model_name, + input_name=input_name, + batch_size=str(bs), + sample_shape=sample_shape, + backend=backend, + concurrency_range=concurrency_range) results_file = tempfile.NamedTemporaryFile(mode='w') result_files.append(results_file) cmd_options['-f'] = results_file.name @@ -100,11 +118,11 @@ def run_perf_analyzer(model_name: str, input_name: str, backend: str, batch_size return output_filename -def analyze_model(tritonclient, model_name, input_name, model_backend, batch_sizes, scenario, - concurrency_range): +def analyze_model(tritonclient, model_name, input_name, model_backend, + batch_sizes, scenario, concurrency_range): tritonclient.load_model(f'{model_name}_{model_backend}') - results_filename = run_perf_analyzer(model_name, input_name, model_backend, batch_sizes, - concurrency_range) + results_filename = run_perf_analyzer(model_name, input_name, model_backend, + batch_sizes, concurrency_range) tritonclient.unload_model(f'{model_name}_{model_backend}') print(f"{model_name}_{model_backend} model results: {results_filename}") @@ -112,27 +130,31 @@ def analyze_model(tritonclient, model_name, input_name, model_backend, batch_siz def run_benchmark(model_descrs, batch_sizes, scenario, concurrency_range): client = tritonclient.InferenceServerClient(url='localhost:8001') for descr in model_descrs: - analyze_model(client, descr['model_name'], descr['input_name'], descr['backend'], - batch_sizes, scenario=scenario, concurrency_range=concurrency_range) + analyze_model(client, + descr['model_name'], + descr['input_name'], + descr['backend'], + batch_sizes, + scenario=scenario, + concurrency_range=concurrency_range) def main(): args = parse_args() - model_descriptors = [ - { - 'model_name': f'{args.model_name}', - 'input_name': 'DALI_INPUT_0', - 'backend': 'dali', - }, - { - 'model_name': f'{args.model_name}', - 'input_name': 'PYTHON_INPUT_0', - 'backend': 'python', - } - ] - batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256] if args.scenario == 'offline' else [1] + model_descriptors = [{ + 'model_name': f'{args.model_name}', + 'input_name': 'DALI_INPUT_0', + 'backend': 'dali', + }, { + 'model_name': f'{args.model_name}', + 'input_name': 'PYTHON_INPUT_0', + 'backend': 'python', + }] + batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256 + ] if args.scenario == 'offline' else [1] concurrency_range = '1' if args.scenario == 'offline' else '1:32:1' - run_benchmark(model_descriptors, batch_sizes, args.scenario, concurrency_range) + run_benchmark(model_descriptors, batch_sizes, args.scenario, + concurrency_range) if __name__ == '__main__': diff --git a/client/dali_grpc_client.py b/client/dali_grpc_client.py index 44b142a0..4e352edb 100644 --- a/client/dali_grpc_client.py +++ b/client/dali_grpc_client.py @@ -32,28 +32,65 @@ def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False, + parser.add_argument('-v', + '--verbose', + action="store_true", + required=False, + default=False, help='Enable verbose output') - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', + parser.add_argument('-u', + '--url', + type=str, + required=False, + default='localhost:8001', help='Inference server URL. Default is localhost:8001.') - parser.add_argument('--batch_size', type=int, required=False, default=1, + parser.add_argument('--batch_size', + type=int, + required=False, + default=1, help='Batch size') - parser.add_argument('--n_iter', type=int, required=False, default=-1, + parser.add_argument('--n_iter', + type=int, + required=False, + default=-1, help='Number of iterations , with `batch_size` size') - parser.add_argument('-m', '--model_name', type=str, required=False, default="dali_backend", + parser.add_argument('-m', + '--model_name', + type=str, + required=False, + default="dali_backend", help='Model name') - parser.add_argument('-i', '--input_name', type=str, required=False, default="INPUT", + parser.add_argument('-i', + '--input_name', + type=str, + required=False, + default="INPUT", help='Input name') - parser.add_argument('-o', '--output_name', type=str, required=False, default="OUTPUT", + parser.add_argument('-o', + '--output_name', + type=str, + required=False, + default="OUTPUT", help='Output name') - parser.add_argument('--statistics', action='store_true', required=False, default=False, + parser.add_argument('--statistics', + action='store_true', + required=False, + default=False, help='Print tritonserver statistics after inferring') img_group = parser.add_mutually_exclusive_group() - img_group.add_argument('--img', type=str, required=False, default=None, - help='Run a img dali pipeline. Arg: path to the image.') - img_group.add_argument('--img_dir', type=str, required=False, default=None, - help='Directory, with images that will be broken down into batches and ' - 'inferred. The directory must contain images only') + img_group.add_argument( + '--img', + type=str, + required=False, + default=None, + help='Run a img dali pipeline. Arg: path to the image.') + img_group.add_argument( + '--img_dir', + type=str, + required=False, + default=None, + help='Directory, with images that will be broken down into batches and ' + 'inferred. The directory must contain images only') return parser.parse_args() @@ -76,8 +113,9 @@ def load_images(dir_path: str, name_pattern='.', max_images=-1): images = [] # Traverses directory for files (not dirs) and returns full paths to them - path_generator = (os.path.join(dir_path, f) for f in os.listdir(dir_path) if - os.path.isfile(os.path.join(dir_path, f)) and + path_generator = (os.path.join(dir_path, f) + for f in os.listdir(dir_path) + if os.path.isfile(os.path.join(dir_path, f)) and re.search(name_pattern, f) is not None) img_paths = [dir_path] if os.path.isfile(dir_path) else list(path_generator) @@ -92,9 +130,13 @@ def array_from_list(arrays): """ Convert list of ndarrays to single ndarray with ndims+=1 """ - lengths = list(map(lambda x, arr=arrays: arr[x].shape[0], [x for x in range(len(arrays))])) + lengths = list( + map(lambda x, arr=arrays: arr[x].shape[0], + [x for x in range(len(arrays))])) max_len = max(lengths) - arrays = list(map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), arrays)) + arrays = list( + map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), + arrays)) for arr in arrays: assert arr.shape == arrays[0].shape, "Arrays must have the same shape" return np.stack(arrays) @@ -119,8 +161,8 @@ def generate_outputs(output_name): def main(): FLAGS = parse_args() try: - triton_client = tritonclient.grpc.InferenceServerClient(url=FLAGS.url, - verbose=FLAGS.verbose) + triton_client = tritonclient.grpc.InferenceServerClient( + url=FLAGS.url, verbose=FLAGS.verbose) except Exception as e: print("channel creation failed: " + str(e)) sys.exit() @@ -130,14 +172,18 @@ def main(): print("Loading images") - image_data = load_images(FLAGS.img_dir if FLAGS.img_dir is not None else FLAGS.img, - max_images=FLAGS.batch_size * FLAGS.n_iter) + image_data = load_images( + FLAGS.img_dir if FLAGS.img_dir is not None else FLAGS.img, + max_images=FLAGS.batch_size * FLAGS.n_iter) image_data = array_from_list(image_data) print("Images loaded") - for batch in tqdm(utils.batcher(image_data, FLAGS.batch_size, n_iterations=FLAGS.n_iter), - desc="Inferring", total=FLAGS.n_iter): + for batch in tqdm(utils.batcher(image_data, + FLAGS.batch_size, + n_iterations=FLAGS.n_iter), + desc="Inferring", + total=FLAGS.n_iter): inputs = generate_inputs(FLAGS.input_name, batch.shape, "UINT8") outputs = generate_outputs(FLAGS.output_name) @@ -146,14 +192,17 @@ def main(): inputs[0].set_data_from_numpy(batch) # Test with outputs - results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs) + results = triton_client.infer(model_name=model_name, + inputs=inputs, + outputs=outputs) # Get the output arrays from the results output0_data = results.as_numpy(FLAGS.output_name) maxs = np.argmax(output0_data, axis=1) if FLAGS.statistics: for i in range(len(maxs)): - print("Sample ", i, " - label: ", maxs[i], " ~ ", output0_data[i, maxs[i]]) + print("Sample ", i, " - label: ", maxs[i], " ~ ", + output0_data[i, maxs[i]]) statistics = triton_client.get_inference_statistics(model_name="dali") if len(statistics.model_stats) != 1: diff --git a/client/utils.py b/client/utils.py index 6ee2486b..25e296d2 100644 --- a/client/utils.py +++ b/client/utils.py @@ -30,7 +30,8 @@ def _select_batch_size(batch_size_provider, batch_idx): return batch_size_provider[batch_idx % len(batch_size_provider)] elif isinstance(batch_size_provider, int): return batch_size_provider - raise TypeError("Incorrect batch_size_provider type. Actual: ", type(batch_size_provider)) + raise TypeError("Incorrect batch_size_provider type. Actual: ", + type(batch_size_provider)) def batcher(dataset, batch_size_provider, n_iterations=-1): @@ -70,7 +71,7 @@ def batcher(dataset, batch_size_provider, n_iterations=-1): return if curr_sample + batch_size < dataset_size: - yield dataset[curr_sample: curr_sample + batch_size] + yield dataset[curr_sample:curr_sample + batch_size] else: # Get as many samples from this revolution of the dataset as possible, # then repeat the dataset as many revolutions as needed @@ -79,9 +80,8 @@ def batcher(dataset, batch_size_provider, n_iterations=-1): n_rep = (batch_size - suffix) // dataset_size prefix = batch_size - (suffix + dataset_size * n_rep) yield np.concatenate( - (dataset[curr_sample:], - np.repeat(dataset, repeats=n_rep, axis=0), - dataset[:prefix]) - ) + (dataset[curr_sample:], np.repeat(dataset, + repeats=n_rep, + axis=0), dataset[:prefix])) curr_sample = (curr_sample + batch_size) % dataset_size iter_idx += 1 diff --git a/docs/examples/dali_plugin/custom_copy_pipeline.py b/docs/examples/dali_plugin/custom_copy_pipeline.py index cb131251..e5f6c839 100644 --- a/docs/examples/dali_plugin/custom_copy_pipeline.py +++ b/docs/examples/dali_plugin/custom_copy_pipeline.py @@ -24,10 +24,14 @@ plugin_manager.load_library('./model_repository/libcustomcopy.so') + def parse_args(): import argparse - parser = argparse.ArgumentParser(description="Serialize the pipeline and save it to a file") - parser.add_argument('file_path', type=str, help='The path where to save the serialized pipeline') + parser = argparse.ArgumentParser( + description="Serialize the pipeline and save it to a file") + parser.add_argument('file_path', + type=str, + help='The path where to save the serialized pipeline') return parser.parse_args() diff --git a/docs/examples/inception_ensemble/model_repository/dali/1/dali.py b/docs/examples/inception_ensemble/model_repository/dali/1/dali.py index a4377587..21a072ad 100644 --- a/docs/examples/inception_ensemble/model_repository/dali/1/dali.py +++ b/docs/examples/inception_ensemble/model_repository/dali/1/dali.py @@ -28,12 +28,15 @@ @dali.pipeline_def(batch_size=3, num_threads=1, device_id=0) def pipe(): images = dali.fn.external_source(device="cpu", name="DALI_INPUT_0") - images = dali.fn.decoders.image(images, device="mixed", output_type=types.RGB) + images = dali.fn.decoders.image(images, + device="mixed", + output_type=types.RGB) images = dali.fn.resize(images, resize_x=299, resize_y=299) - images = dali.fn.crop_mirror_normalize(images, - dtype=types.FLOAT, - output_layout="HWC", - crop=(299, 299), - mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], - std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) + images = dali.fn.crop_mirror_normalize( + images, + dtype=types.FLOAT, + output_layout="HWC", + crop=(299, 299), + mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], + std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) return images diff --git a/docs/examples/multi_input/multi_input_pipeline.py b/docs/examples/multi_input/multi_input_pipeline.py index f79c1ded..05fc8531 100644 --- a/docs/examples/multi_input/multi_input_pipeline.py +++ b/docs/examples/multi_input/multi_input_pipeline.py @@ -24,8 +24,11 @@ def parse_args(): import argparse - parser = argparse.ArgumentParser(description="Serialize the pipeline and save it to a file") - parser.add_argument('file_path', type=str, help='The path where to save the serialized pipeline') + parser = argparse.ArgumentParser( + description="Serialize the pipeline and save it to a file") + parser.add_argument('file_path', + type=str, + help='The path where to save the serialized pipeline') return parser.parse_args() diff --git a/docs/examples/perf_analyzer/decoding_pipeline.py b/docs/examples/perf_analyzer/decoding_pipeline.py index fd7cbe72..116162fd 100644 --- a/docs/examples/perf_analyzer/decoding_pipeline.py +++ b/docs/examples/perf_analyzer/decoding_pipeline.py @@ -25,15 +25,20 @@ def parse_args(): import argparse - parser = argparse.ArgumentParser(description="Serialize the pipeline and save it to a file") - parser.add_argument('file_path', type=str, help='The path where to save the serialized pipeline') + parser = argparse.ArgumentParser( + description="Serialize the pipeline and save it to a file") + parser.add_argument('file_path', + type=str, + help='The path where to save the serialized pipeline') return parser.parse_args() @dali.pipeline_def(batch_size=256, num_threads=4, device_id=0) def pipe(): images = dali.fn.external_source(device="cpu", name="DALI_INPUT_0") - images = dali.fn.decoders.image(images, device="mixed", output_type=types.RGB) + images = dali.fn.decoders.image(images, + device="mixed", + output_type=types.RGB) return images diff --git a/docs/examples/resnet50_trt/client.py b/docs/examples/resnet50_trt/client.py index e4a06158..7aa6747b 100644 --- a/docs/examples/resnet50_trt/client.py +++ b/docs/examples/resnet50_trt/client.py @@ -41,7 +41,8 @@ def load_image(img_path: str): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--model_name", - type=str, required=False, + type=str, + required=False, default="ensemble_dali_resnet50", help="Model name") parser.add_argument("--image", @@ -53,19 +54,22 @@ def load_image(img_path: str): required=False, default="localhost:8001", help="Inference server URL. Default is localhost:8001.") - parser.add_argument('-v', "--verbose", + parser.add_argument('-v', + "--verbose", action="store_true", required=False, default=False, help='Enable verbose output') - parser.add_argument("--label_file", - type=str, - default="./model_repository/resnet50_trt/labels.txt", - help="Path to the file with text representation of available labels") + parser.add_argument( + "--label_file", + type=str, + default="./model_repository/resnet50_trt/labels.txt", + help="Path to the file with text representation of available labels") args = parser.parse_args() try: - triton_client = tritongrpcclient.InferenceServerClient(url=args.url, verbose=args.verbose) + triton_client = tritongrpcclient.InferenceServerClient( + url=args.url, verbose=args.verbose) except Exception as e: print("channel creation failed: " + str(e)) sys.exit(1) @@ -80,7 +84,8 @@ def load_image(img_path: str): image_data = load_image(args.image) image_data = np.expand_dims(image_data, axis=0) - inputs.append(tritongrpcclient.InferInput(input_name, image_data.shape, "UINT8")) + inputs.append( + tritongrpcclient.InferInput(input_name, image_data.shape, "UINT8")) outputs.append(tritongrpcclient.InferRequestedOutput(output_name)) inputs[0].set_data_from_numpy(image_data) diff --git a/docs/examples/resnet50_trt/onnx_exporter.py b/docs/examples/resnet50_trt/onnx_exporter.py index f1e4687a..749234ce 100644 --- a/docs/examples/resnet50_trt/onnx_exporter.py +++ b/docs/examples/resnet50_trt/onnx_exporter.py @@ -41,7 +41,15 @@ do_constant_folding=True, input_names=['input'], output_names=['output'], - dynamic_axes={'input': {0: 'batch_size', 2: "height", 3: 'width'}, - 'output': {0: 'batch_size'}}) + dynamic_axes={ + 'input': { + 0: 'batch_size', + 2: "height", + 3: 'width' + }, + 'output': { + 0: 'batch_size' + } + }) print("Saved {}".format(args.save)) diff --git a/docs/examples/resnet50_trt/serialize_dali_pipeline.py b/docs/examples/resnet50_trt/serialize_dali_pipeline.py index 93281c27..8b6cd9f6 100644 --- a/docs/examples/resnet50_trt/serialize_dali_pipeline.py +++ b/docs/examples/resnet50_trt/serialize_dali_pipeline.py @@ -27,21 +27,25 @@ def parse_args(): import argparse parser = argparse.ArgumentParser() - parser.add_argument("--save", default="./model_repository/dali/1/model.dali") + parser.add_argument("--save", + default="./model_repository/dali/1/model.dali") return parser.parse_args() @dali.pipeline_def(batch_size=256, num_threads=4, device_id=0) def pipe(): images = dali.fn.external_source(device="cpu", name="DALI_INPUT_0") - images = dali.fn.decoders.image(images, device="mixed", output_type=types.RGB) + images = dali.fn.decoders.image(images, + device="mixed", + output_type=types.RGB) images = dali.fn.resize(images, resize_x=224, resize_y=224) - images = dali.fn.crop_mirror_normalize(images, - dtype=types.FLOAT, - output_layout="CHW", - crop=(224, 224), - mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], - std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) + images = dali.fn.crop_mirror_normalize( + images, + dtype=types.FLOAT, + output_layout="CHW", + crop=(224, 224), + mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], + std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) return images diff --git a/docs/examples/video_decode_remap/client.py b/docs/examples/video_decode_remap/client.py index 9bae765b..9b247d63 100644 --- a/docs/examples/video_decode_remap/client.py +++ b/docs/examples/video_decode_remap/client.py @@ -31,14 +31,23 @@ def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', + parser.add_argument('-u', + '--url', + type=str, + required=False, + default='localhost:8001', help='Inference server URL. Default is localhost:8001.') - parser.add_argument('--video', type=str, required=False, default=None, - help='Path to a directory, where the video data is located.') + parser.add_argument( + '--video', + type=str, + required=False, + default=None, + help='Path to a directory, where the video data is located.') return parser.parse_args() class UserData: + def __init__(self): self._completed_requests = queue.Queue() @@ -64,9 +73,13 @@ def array_from_list(arrays): """ Convert list of ndarrays to single ndarray with ndims+=1. Pad if necessary. """ - lengths = list(map(lambda x, arr=arrays: arr[x].shape[0], [x for x in range(len(arrays))])) + lengths = list( + map(lambda x, arr=arrays: arr[x].shape[0], + [x for x in range(len(arrays))])) max_len = max(lengths) - arrays = list(map(lambda arr, ml=max_len: np.pad(arr, (0, ml - arr.shape[0])), arrays)) + arrays = list( + map(lambda arr, ml=max_len: np.pad(arr, (0, ml - arr.shape[0])), + arrays)) for arr in arrays: assert arr.shape == arrays[0].shape, "Arrays must have the same shape" return np.stack(arrays) @@ -82,13 +95,17 @@ def main(): if FLAGS.video is None: dali_extra_path = os.environ['DALI_EXTRA_PATH'] filenames = [ - os.path.join(dali_extra_path, "db", "video", "containers", "mkv", "cfr.mkv"), + os.path.join(dali_extra_path, "db", "video", "containers", "mkv", + "cfr.mkv"), ] else: - filenames = [os.path.join(FLAGS.video, p) for p in os.listdir(FLAGS.video)] + filenames = [ + os.path.join(FLAGS.video, p) for p in os.listdir(FLAGS.video) + ] filenames = filenames[:input_batch_size] - with tritonclient.grpc.InferenceServerClient(url=FLAGS.url) as triton_client: + with tritonclient.grpc.InferenceServerClient( + url=FLAGS.url) as triton_client: model_name = "model.dali" model_version = -1 @@ -105,7 +122,8 @@ def main(): video_raw = load_videos(filenames) video_raw = array_from_list(video_raw) input_shape = list(video_raw.shape) - assert input_batch_size == input_shape[0], f"{input_batch_size} == {input_shape[0]}" + assert input_batch_size == input_shape[ + 0], f"{input_batch_size} == {input_shape[0]}" # Config inputs 1 & 2: undistort (remap) maps npz = np.load('remap.npz') @@ -127,7 +145,9 @@ def main(): inputs[2].set_data_from_numpy(remap_v) request_id = "0" - triton_client.async_stream_infer(model_name=model_name, inputs=inputs, request_id=request_id, + triton_client.async_stream_infer(model_name=model_name, + inputs=inputs, + request_id=request_id, outputs=outputs) data_item = user_data._completed_requests.get() diff --git a/docs/examples/video_decode_remap/model_repository/model.dali/1/dali.py b/docs/examples/video_decode_remap/model_repository/model.dali/1/dali.py index aeb9df54..4905f840 100644 --- a/docs/examples/video_decode_remap/model_repository/model.dali/1/dali.py +++ b/docs/examples/video_decode_remap/model_repository/model.dali/1/dali.py @@ -30,8 +30,11 @@ @autoserialize -@dali.pipeline_def(batch_size=3, num_threads=3, device_id=0, - output_dtype=dali.types.UINT8, output_ndim=[4]) +@dali.pipeline_def(batch_size=3, + num_threads=3, + device_id=0, + output_dtype=dali.types.UINT8, + output_ndim=[4]) def pipeline(): """ DALI Pipeline, that performs the following processing: @@ -41,15 +44,23 @@ def pipeline(): 4. OUTPUT - distorted and decoded video. """ # Decode video - vid = fn.experimental.inputs.video(name="INPUT", sequence_length=5, device='mixed') + vid = fn.experimental.inputs.video(name="INPUT", + sequence_length=5, + device='mixed') # Resize to match sizes of Remap parameters. This step is artificial in real life case # you most probably do not want to resize the image before removing the distortion. vid = fn.resize(vid, resize_x=OUT_WIDTH, resize_y=OUT_HEIGHT) # Remove distortion. - mapx = fn.external_source(name="MAPX", ndim=2, dtype=dali.types.FLOAT, repeat_last=True).gpu() - mapy = fn.external_source(name="MAPY", ndim=2, dtype=dali.types.FLOAT, repeat_last=True).gpu() + mapx = fn.external_source(name="MAPX", + ndim=2, + dtype=dali.types.FLOAT, + repeat_last=True).gpu() + mapy = fn.external_source(name="MAPY", + ndim=2, + dtype=dali.types.FLOAT, + repeat_last=True).gpu() # Provided camera maps assume, that the (0,0) point is in the center of the image. # Therefore, we have to modify them to have the origin in the top-left corner. mapx = mapx - OUT_WIDTH * 0.5 diff --git a/python/dali_backend/test_utils/client.py b/python/dali_backend/test_utils/client.py index c5928e8c..d7ed9c38 100644 --- a/python/dali_backend/test_utils/client.py +++ b/python/dali_backend/test_utils/client.py @@ -20,7 +20,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - import tritonclient.grpc as t_client import numpy as np from typing import Sequence @@ -29,20 +28,21 @@ import argparse from concurrent.futures import ThreadPoolExecutor, as_completed + # TODO: Extend def type_to_string(dtype): - if dtype == np.half: - return "FP16" - if dtype == np.single: - return "FP32" - if dtype == np.double: - return "FP64" - if dtype == np.uint8: - return "UINT8" - if dtype == np.int32: - return "INT32" - if dtype == np.int64: - return "INT64" + if dtype == np.half: + return "FP16" + if dtype == np.single: + return "FP32" + if dtype == np.double: + return "FP64" + if dtype == np.uint8: + return "UINT8" + if dtype == np.int32: + return "INT32" + if dtype == np.int64: + return "INT64" def grouper(n, iterable): @@ -55,48 +55,66 @@ def grouper(n, iterable): class TestClient: - def __init__(self, model_name: str, input_names: Sequence[str], output_names: Sequence[str], - url, concurrency=1, verbose=False): - self.client = t_client.InferenceServerClient(url=url, verbose=verbose) - self.input_names = input_names - self.output_names = output_names - self.concurrency = concurrency - self.model_name = model_name - @staticmethod - def _get_input(batch, name): - inp = t_client.InferInput(name, list(batch.shape), type_to_string(batch.dtype)) - inp.set_data_from_numpy(batch) - return inp + def __init__(self, + model_name: str, + input_names: Sequence[str], + output_names: Sequence[str], + url, + concurrency=1, + verbose=False): + self.client = t_client.InferenceServerClient(url=url, verbose=verbose) + self.input_names = input_names + self.output_names = output_names + self.concurrency = concurrency + self.model_name = model_name + + @staticmethod + def _get_input(batch, name): + inp = t_client.InferInput(name, list(batch.shape), + type_to_string(batch.dtype)) + inp.set_data_from_numpy(batch) + return inp - def test_infer(self, data, it): - assert(len(data) == len(self.input_names)) - if (len(data) > 1): - for b in data: - assert b.shape[0] == data[0].shape[0] - inputs = [self._get_input(batch, name) for batch, name in zip(data, self.input_names)] - outputs = [t_client.InferRequestedOutput(name) for name in self.output_names] - res = self.client.infer(model_name=self.model_name, inputs=inputs, outputs=outputs) - res_data = [res.as_numpy(name) for name in self.output_names] - return it, data, res_data + def test_infer(self, data, it): + assert (len(data) == len(self.input_names)) + if (len(data) > 1): + for b in data: + assert b.shape[0] == data[0].shape[0] + inputs = [ + self._get_input(batch, name) + for batch, name in zip(data, self.input_names) + ] + outputs = [ + t_client.InferRequestedOutput(name) for name in self.output_names + ] + res = self.client.infer(model_name=self.model_name, + inputs=inputs, + outputs=outputs) + res_data = [res.as_numpy(name) for name in self.output_names] + return it, data, res_data - def run_tests(self, data, compare_to, n_infers=-1, eps=1e-7): - generator = data if n_infers < 1 else islice(cycle(data), n_infers) - for pack in grouper(self.concurrency, enumerate(generator)): - with ThreadPoolExecutor(max_workers=self.concurrency) as executor: - results_f = [executor.submit(self.test_infer, data, it) for it, data in pack] - for future in as_completed(results_f): - it, data, results = future.result() - ref = compare_to(*data) - assert(len(results) == len(ref)) - for out_i, (out, ref_out) in enumerate(zip(results, ref)): - assert out.shape == ref_out.shape, "Expected: {}, Actual: {}".format(ref_out.shape, out.shape) - if not np.allclose(out, ref_out, atol=eps): - print("Test failure in iteration", it) - print("Output", out_i) - print("Expected:\n", ref_out) - print("Actual:\n", out) - print("Shape: ", ref_out.shape) - print("Mean err", (out - ref_out).mean()) - assert False - print('PASS iteration:', it) + def run_tests(self, data, compare_to, n_infers=-1, eps=1e-7): + generator = data if n_infers < 1 else islice(cycle(data), n_infers) + for pack in grouper(self.concurrency, enumerate(generator)): + with ThreadPoolExecutor(max_workers=self.concurrency) as executor: + results_f = [ + executor.submit(self.test_infer, data, it) + for it, data in pack + ] + for future in as_completed(results_f): + it, data, results = future.result() + ref = compare_to(*data) + assert (len(results) == len(ref)) + for out_i, (out, ref_out) in enumerate(zip(results, ref)): + assert out.shape == ref_out.shape, "Expected: {}, Actual: {}".format( + ref_out.shape, out.shape) + if not np.allclose(out, ref_out, atol=eps): + print("Test failure in iteration", it) + print("Output", out_i) + print("Expected:\n", ref_out) + print("Actual:\n", out) + print("Shape: ", ref_out.shape) + print("Mean err", (out - ref_out).mean()) + assert False + print('PASS iteration:', it) diff --git a/qa/L0_autoconfig/client.py b/qa/L0_autoconfig/client.py index c7fdb17a..a63b78a6 100644 --- a/qa/L0_autoconfig/client.py +++ b/qa/L0_autoconfig/client.py @@ -24,60 +24,66 @@ def check_config(config, bs, out_names, dyn_batching): - assert config['max_batch_size'] == bs + assert config['max_batch_size'] == bs - inps = config['input'] - assert len(inps) == 2 - assert inps[0]['name'] == 'DALI_INPUT_0' - assert inps[0]['data_type'] == 'TYPE_FP16' - assert inps[0]['dims'] == ['-1'] - assert inps[0]['allow_ragged_batch'] == True - assert inps[1]['name'] == 'DALI_INPUT_1' - assert inps[1]['data_type'] == 'TYPE_FP16' - assert inps[1]['dims'] == ['-1'] - assert inps[1]['allow_ragged_batch'] == True + inps = config['input'] + assert len(inps) == 2 + assert inps[0]['name'] == 'DALI_INPUT_0' + assert inps[0]['data_type'] == 'TYPE_FP16' + assert inps[0]['dims'] == ['-1'] + assert inps[0]['allow_ragged_batch'] == True + assert inps[1]['name'] == 'DALI_INPUT_1' + assert inps[1]['data_type'] == 'TYPE_FP16' + assert inps[1]['dims'] == ['-1'] + assert inps[1]['allow_ragged_batch'] == True - outs = config['output'] - assert len(outs) == 2 - assert outs[0]['name'] == out_names[0] - assert outs[0]['data_type'] == 'TYPE_FP16' - assert outs[0]['dims'] == ['-1'] - assert outs[1]['name'] == out_names[1] - assert outs[1]['data_type'] == 'TYPE_FP32' - assert outs[1]['dims'] == ['-1'] - assert config['dynamic_batching'] == dyn_batching + outs = config['output'] + assert len(outs) == 2 + assert outs[0]['name'] == out_names[0] + assert outs[0]['data_type'] == 'TYPE_FP16' + assert outs[0]['dims'] == ['-1'] + assert outs[1]['name'] == out_names[1] + assert outs[1]['data_type'] == 'TYPE_FP32' + assert outs[1]['dims'] == ['-1'] + assert config['dynamic_batching'] == dyn_batching def test_configs(url): - client = t_client.InferenceServerClient(url=url) + client = t_client.InferenceServerClient(url=url) - conf1 = client.get_model_config("full_autoconfig", as_json=True) - dyn_batching = { - 'preferred_batch_size': [256] - } - check_config(conf1['config'], 256, ['__ArithmeticGenericOp_2', '__ArithmeticGenericOp_4'], dyn_batching) + conf1 = client.get_model_config("full_autoconfig", as_json=True) + dyn_batching = {'preferred_batch_size': [256]} + check_config(conf1['config'], 256, + ['__ArithmeticGenericOp_2', '__ArithmeticGenericOp_4'], + dyn_batching) - conf2 = client.get_model_config("partial_autoconfig", as_json=True) - dyn_batching = { - 'preferred_batch_size': [16, 32], - 'max_queue_delay_microseconds': '500' - } - check_config(conf2['config'], 32, ['DALI_OUTPUT_0', 'DALI_OUTPUT_1'], dyn_batching) + conf2 = client.get_model_config("partial_autoconfig", as_json=True) + dyn_batching = { + 'preferred_batch_size': [16, 32], + 'max_queue_delay_microseconds': '500' + } + check_config(conf2['config'], 32, ['DALI_OUTPUT_0', 'DALI_OUTPUT_1'], + dyn_batching) - conf1 = client.get_model_config("no_config_file.dali", as_json=True) - dyn_batching = { - 'preferred_batch_size': [256] - } - check_config(conf1['config'], 256, ['__ArithmeticGenericOp_2', '__ArithmeticGenericOp_4'], dyn_batching) + conf1 = client.get_model_config("no_config_file.dali", as_json=True) + dyn_batching = {'preferred_batch_size': [256]} + check_config(conf1['config'], 256, + ['__ArithmeticGenericOp_2', '__ArithmeticGenericOp_4'], + dyn_batching) def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', - help='Inference server GRPC URL. Default is localhost:8001.') + parser.add_argument( + '-u', + '--url', + type=str, + required=False, + default='localhost:8001', + help='Inference server GRPC URL. Default is localhost:8001.') return parser.parse_args() if __name__ == '__main__': - args = parse_args() - test_configs(args.url) + args = parse_args() + test_configs(args.url) diff --git a/qa/L0_autoconfig/model_repository/full_autoconfig/1/dali.py b/qa/L0_autoconfig/model_repository/full_autoconfig/1/dali.py index 452ddb8d..a3b65004 100644 --- a/qa/L0_autoconfig/model_repository/full_autoconfig/1/dali.py +++ b/qa/L0_autoconfig/model_repository/full_autoconfig/1/dali.py @@ -24,10 +24,20 @@ import multiprocessing as mp from nvidia.dali.plugin.triton import autoserialize + @autoserialize -@dali.pipeline_def(batch_size=256, num_threads=min(mp.cpu_count(), 4), device_id=0, - output_dtype=[dali.types.FLOAT16, dali.types.FLOAT], output_ndim=[1, 1]) +@dali.pipeline_def(batch_size=256, + num_threads=min(mp.cpu_count(), 4), + device_id=0, + output_dtype=[dali.types.FLOAT16, dali.types.FLOAT], + output_ndim=[1, 1]) def pipeline(): - inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0', dtype=dali.types.FLOAT16, ndim=1) - inp2 = fn.external_source(device='gpu', name='DALI_INPUT_1', dtype=dali.types.FLOAT16, ndim=1) - return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 + inp1 = fn.external_source(device='cpu', + name='DALI_INPUT_0', + dtype=dali.types.FLOAT16, + ndim=1) + inp2 = fn.external_source(device='gpu', + name='DALI_INPUT_1', + dtype=dali.types.FLOAT16, + ndim=1) + return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 diff --git a/qa/L0_autoconfig/model_repository/no_config_file.dali/1/dali.py b/qa/L0_autoconfig/model_repository/no_config_file.dali/1/dali.py index 452ddb8d..a3b65004 100644 --- a/qa/L0_autoconfig/model_repository/no_config_file.dali/1/dali.py +++ b/qa/L0_autoconfig/model_repository/no_config_file.dali/1/dali.py @@ -24,10 +24,20 @@ import multiprocessing as mp from nvidia.dali.plugin.triton import autoserialize + @autoserialize -@dali.pipeline_def(batch_size=256, num_threads=min(mp.cpu_count(), 4), device_id=0, - output_dtype=[dali.types.FLOAT16, dali.types.FLOAT], output_ndim=[1, 1]) +@dali.pipeline_def(batch_size=256, + num_threads=min(mp.cpu_count(), 4), + device_id=0, + output_dtype=[dali.types.FLOAT16, dali.types.FLOAT], + output_ndim=[1, 1]) def pipeline(): - inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0', dtype=dali.types.FLOAT16, ndim=1) - inp2 = fn.external_source(device='gpu', name='DALI_INPUT_1', dtype=dali.types.FLOAT16, ndim=1) - return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 + inp1 = fn.external_source(device='cpu', + name='DALI_INPUT_0', + dtype=dali.types.FLOAT16, + ndim=1) + inp2 = fn.external_source(device='gpu', + name='DALI_INPUT_1', + dtype=dali.types.FLOAT16, + ndim=1) + return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 diff --git a/qa/L0_autoconfig/model_repository/partial_autoconfig/1/dali.py b/qa/L0_autoconfig/model_repository/partial_autoconfig/1/dali.py index 2f1c5c1f..4699bd16 100644 --- a/qa/L0_autoconfig/model_repository/partial_autoconfig/1/dali.py +++ b/qa/L0_autoconfig/model_repository/partial_autoconfig/1/dali.py @@ -24,10 +24,17 @@ import multiprocessing as mp from nvidia.dali.plugin.triton import autoserialize + @autoserialize -@dali.pipeline_def(batch_size=256, num_threads=min(mp.cpu_count(), 4), device_id=0, - output_dtype=[dali.types.FLOAT16, dali.types.FLOAT], output_ndim=[1, 1]) +@dali.pipeline_def(batch_size=256, + num_threads=min(mp.cpu_count(), 4), + device_id=0, + output_dtype=[dali.types.FLOAT16, dali.types.FLOAT], + output_ndim=[1, 1]) def pipeline(): - inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0', ndim=1) - inp2 = fn.external_source(device='gpu', name='DALI_INPUT_1', ndim=1, dtype=dali.types.FLOAT16) - return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 + inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0', ndim=1) + inp2 = fn.external_source(device='gpu', + name='DALI_INPUT_1', + ndim=1, + dtype=dali.types.FLOAT16) + return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 diff --git a/qa/L0_config_validation/client.py b/qa/L0_config_validation/client.py index 27a89b71..36d4dfba 100644 --- a/qa/L0_config_validation/client.py +++ b/qa/L0_config_validation/client.py @@ -23,53 +23,67 @@ from tritonclient.utils import InferenceServerException import argparse + def assert_error(func, *args, contains=None): - try: - func(*args) - msg = "Should raise error: " + ', '.join([str(arg) for arg in args]) - assert False, msg - except InferenceServerException as err: - err_msg = str(err) - if contains is not None: - assert contains in err_msg, f'Error message:\n {err_msg}\nshould contain:\n{contains}' + try: + func(*args) + msg = "Should raise error: " + ', '.join([str(arg) for arg in args]) + assert False, msg + except InferenceServerException as err: + err_msg = str(err) + if contains is not None: + assert contains in err_msg, f'Error message:\n {err_msg}\nshould contain:\n{contains}' -def test_loading(url): - client = t_client.InferenceServerClient(url=url) - client.load_model('model0_gpu_valid') - print('model0_gpu_valid OK') +def test_loading(url): + client = t_client.InferenceServerClient(url=url) - assert_error(client.load_model, 'model1_gpu_invalid_i_type', - contains='Invalid argument: Mismatch of data_type config for "DALI_INPUT_1".\n' - 'Data type defined in config: TYPE_FP32\n' - 'Data type defined in pipeline: TYPE_FP16') - print('model1_gpu_invalid_i_type OK') + client.load_model('model0_gpu_valid') + print('model0_gpu_valid OK') - assert_error(client.load_model, 'model2_gpu_invalid_o_ndim', - contains='Invalid argument: Mismatch in number of dimensions for "DALI_OUTPUT_1"\n' - 'Number of dimensions defined in config: 2\n' - 'Number of dimensions defined in pipeline: 1') - print('model2_gpu_invalid_o_ndim OK') + assert_error( + client.load_model, + 'model1_gpu_invalid_i_type', + contains= + 'Invalid argument: Mismatch of data_type config for "DALI_INPUT_1".\n' + 'Data type defined in config: TYPE_FP32\n' + 'Data type defined in pipeline: TYPE_FP16') + print('model1_gpu_invalid_i_type OK') - client.load_model('model3_cpu_valid') - print('model3_cpu_valid OK') + assert_error( + client.load_model, + 'model2_gpu_invalid_o_ndim', + contains= + 'Invalid argument: Mismatch in number of dimensions for "DALI_OUTPUT_1"\n' + 'Number of dimensions defined in config: 2\n' + 'Number of dimensions defined in pipeline: 1') + print('model2_gpu_invalid_o_ndim OK') - assert_error(client.load_model, 'model4_cpu_invalid_missing_output', - contains='The number of outputs specified in the DALI pipeline ' - 'and the configuration file do not match.\n' - 'Model config outputs: 1\n' - 'Pipeline outputs: 2') - print('model4_cpu_invalid_missing_output OK') + client.load_model('model3_cpu_valid') + print('model3_cpu_valid OK') + assert_error( + client.load_model, + 'model4_cpu_invalid_missing_output', + contains='The number of outputs specified in the DALI pipeline ' + 'and the configuration file do not match.\n' + 'Model config outputs: 1\n' + 'Pipeline outputs: 2') + print('model4_cpu_invalid_missing_output OK') def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', - help='Inference server GRPC URL. Default is localhost:8001.') + parser.add_argument( + '-u', + '--url', + type=str, + required=False, + default='localhost:8001', + help='Inference server GRPC URL. Default is localhost:8001.') return parser.parse_args() if __name__ == '__main__': - args = parse_args() - test_loading(args.url) + args = parse_args() + test_loading(args.url) diff --git a/qa/L0_config_validation/pipeline_cpu.py b/qa/L0_config_validation/pipeline_cpu.py index c4ab70dd..8d57bb2d 100644 --- a/qa/L0_config_validation/pipeline_cpu.py +++ b/qa/L0_config_validation/pipeline_cpu.py @@ -24,10 +24,16 @@ import multiprocessing as mp from nvidia.dali.plugin.triton import autoserialize + @autoserialize -@dali.pipeline_def(batch_size=256, num_threads=min(mp.cpu_count(), 4), device_id=None, - output_dtype=[dali.types.FLOAT, None], output_ndim=[1, 1]) +@dali.pipeline_def(batch_size=256, + num_threads=min(mp.cpu_count(), 4), + device_id=None, + output_dtype=[dali.types.FLOAT, None], + output_ndim=[1, 1]) def pipeline(): - inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0') - inp2 = fn.external_source(device='cpu', name='DALI_INPUT_1', dtype=dali.types.FLOAT16) - return inp1 / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 + inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0') + inp2 = fn.external_source(device='cpu', + name='DALI_INPUT_1', + dtype=dali.types.FLOAT16) + return inp1 / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 diff --git a/qa/L0_config_validation/pipeline_gpu.py b/qa/L0_config_validation/pipeline_gpu.py index 942e1380..d4678c09 100644 --- a/qa/L0_config_validation/pipeline_gpu.py +++ b/qa/L0_config_validation/pipeline_gpu.py @@ -24,10 +24,16 @@ import multiprocessing as mp from nvidia.dali.plugin.triton import autoserialize + @autoserialize -@dali.pipeline_def(batch_size=256, num_threads=min(mp.cpu_count(), 4), device_id=0, - output_dtype=[dali.types.FLOAT, None], output_ndim=[1, 1]) +@dali.pipeline_def(batch_size=256, + num_threads=min(mp.cpu_count(), 4), + device_id=0, + output_dtype=[dali.types.FLOAT, None], + output_ndim=[1, 1]) def pipeline(): - inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0') - inp2 = fn.external_source(device='gpu', name='DALI_INPUT_1', dtype=dali.types.FLOAT16) - return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 + inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0') + inp2 = fn.external_source(device='gpu', + name='DALI_INPUT_1', + dtype=dali.types.FLOAT16) + return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 diff --git a/qa/L0_identity/identity_client.py b/qa/L0_identity/identity_client.py index 2145c902..d6488f23 100644 --- a/qa/L0_identity/identity_client.py +++ b/qa/L0_identity/identity_client.py @@ -30,17 +30,35 @@ np.random.seed(100019) + def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False, + parser.add_argument('-v', + '--verbose', + action="store_true", + required=False, + default=False, help='Enable verbose output') - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', + parser.add_argument('-u', + '--url', + type=str, + required=False, + default='localhost:8001', help='Inference server URL. Default is localhost:8001.') - parser.add_argument('--batch_size', type=int, required=False, default=4, + parser.add_argument('--batch_size', + type=int, + required=False, + default=4, help='Batch size') - parser.add_argument('--n_iter', type=int, required=False, default=-1, + parser.add_argument('--n_iter', + type=int, + required=False, + default=-1, help='Number of iterations , with `batch_size` size') - parser.add_argument('--model_name', type=str, required=False, default="dali_identity", + parser.add_argument('--model_name', + type=str, + required=False, + default="dali_identity", help='Model name') return parser.parse_args() @@ -49,9 +67,13 @@ def array_from_list(arrays): """ Convert list of ndarrays to single ndarray with ndims+=1 """ - lengths = list(map(lambda x, arr=arrays: arr[x].shape[0], [x for x in range(len(arrays))])) + lengths = list( + map(lambda x, arr=arrays: arr[x].shape[0], + [x for x in range(len(arrays))])) max_len = max(lengths) - arrays = list(map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), arrays)) + arrays = list( + map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), + arrays)) for arr in arrays: assert arr.shape == arrays[0].shape, "Arrays must have the same shape" return np.stack(arrays) @@ -68,31 +90,34 @@ def batcher(dataset, max_batch_size, n_iterations=-1): raise StopIteration batch_size = min(randint(1, max_batch_size), len(dataset) - data_idx) iter_idx += 1 - yield dataset[data_idx : data_idx + batch_size] + yield dataset[data_idx:data_idx + batch_size] data_idx += batch_size def main(): FLAGS = parse_args() try: - triton_client = tritongrpcclient.InferenceServerClient(url=FLAGS.url, verbose=FLAGS.verbose) + triton_client = tritongrpcclient.InferenceServerClient( + url=FLAGS.url, verbose=FLAGS.verbose) except Exception as e: print("channel creation failed: " + str(e)) sys.exit(1) - if not (triton_client.is_server_live() or - triton_client.is_server_ready() or + if not (triton_client.is_server_live() or triton_client.is_server_ready() or triton_client.is_model_ready(model_name=FLAGS.model_name)): - print("Error connecting to server: Server live {}. Server ready {}. Model ready {}".format( - triton_client.is_server_live, triton_client.is_server_ready, - triton_client.is_model_ready(model_name=FLAGS.model_name))) + print( + "Error connecting to server: Server live {}. Server ready {}. Model ready {}" + .format(triton_client.is_server_live, triton_client.is_server_ready, + triton_client.is_model_ready(model_name=FLAGS.model_name))) sys.exit(1) model_name = FLAGS.model_name model_version = -1 - input_data = [randint(0, 255, size=randint(100), dtype='uint8') for _ in - range(randint(100) * FLAGS.batch_size)] + input_data = [ + randint(0, 255, size=randint(100), dtype='uint8') + for _ in range(randint(100) * FLAGS.batch_size) + ] input_data = array_from_list(input_data) # Infer diff --git a/qa/L0_identity_cpu/identity_client.py b/qa/L0_identity_cpu/identity_client.py index 9b811506..cab3865f 100644 --- a/qa/L0_identity_cpu/identity_client.py +++ b/qa/L0_identity_cpu/identity_client.py @@ -30,17 +30,35 @@ np.random.seed(100019) + def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False, + parser.add_argument('-v', + '--verbose', + action="store_true", + required=False, + default=False, help='Enable verbose output') - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', + parser.add_argument('-u', + '--url', + type=str, + required=False, + default='localhost:8001', help='Inference server URL. Default is localhost:8001.') - parser.add_argument('--batch_size', type=int, required=False, default=4, + parser.add_argument('--batch_size', + type=int, + required=False, + default=4, help='Batch size') - parser.add_argument('--n_iter', type=int, required=False, default=-1, + parser.add_argument('--n_iter', + type=int, + required=False, + default=-1, help='Number of iterations , with `batch_size` size') - parser.add_argument('--model_name', type=str, required=False, default="dali_identity_cpu", + parser.add_argument('--model_name', + type=str, + required=False, + default="dali_identity_cpu", help='Model name') return parser.parse_args() @@ -49,9 +67,13 @@ def array_from_list(arrays): """ Convert list of ndarrays to single ndarray with ndims+=1 """ - lengths = list(map(lambda x, arr=arrays: arr[x].shape[0], [x for x in range(len(arrays))])) + lengths = list( + map(lambda x, arr=arrays: arr[x].shape[0], + [x for x in range(len(arrays))])) max_len = max(lengths) - arrays = list(map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), arrays)) + arrays = list( + map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), + arrays)) for arr in arrays: assert arr.shape == arrays[0].shape, "Arrays must have the same shape" return np.stack(arrays) @@ -68,31 +90,34 @@ def batcher(dataset, max_batch_size, n_iterations=-1): raise StopIteration batch_size = min(randint(1, max_batch_size), len(dataset) - data_idx) iter_idx += 1 - yield dataset[data_idx : data_idx + batch_size] + yield dataset[data_idx:data_idx + batch_size] data_idx += batch_size def main(): FLAGS = parse_args() try: - triton_client = tritongrpcclient.InferenceServerClient(url=FLAGS.url, verbose=FLAGS.verbose) + triton_client = tritongrpcclient.InferenceServerClient( + url=FLAGS.url, verbose=FLAGS.verbose) except Exception as e: print("channel creation failed: " + str(e)) sys.exit(1) - if not (triton_client.is_server_live() or - triton_client.is_server_ready() or + if not (triton_client.is_server_live() or triton_client.is_server_ready() or triton_client.is_model_ready(model_name=FLAGS.model_name)): - print("Error connecting to server: Server live {}. Server ready {}. Model ready {}".format( - triton_client.is_server_live, triton_client.is_server_ready, - triton_client.is_model_ready(model_name=FLAGS.model_name))) + print( + "Error connecting to server: Server live {}. Server ready {}. Model ready {}" + .format(triton_client.is_server_live, triton_client.is_server_ready, + triton_client.is_model_ready(model_name=FLAGS.model_name))) sys.exit(1) model_name = FLAGS.model_name model_version = -1 - input_data = [randint(0, 255, size=randint(100), dtype='uint8') for _ in - range(randint(100) * FLAGS.batch_size)] + input_data = [ + randint(0, 255, size=randint(100), dtype='uint8') + for _ in range(randint(100) * FLAGS.batch_size) + ] input_data = array_from_list(input_data) # Infer diff --git a/qa/L0_identity_cpu/model_repository/identity_pipeline.py b/qa/L0_identity_cpu/model_repository/identity_pipeline.py index 2f34cc29..ffe0a894 100644 --- a/qa/L0_identity_cpu/model_repository/identity_pipeline.py +++ b/qa/L0_identity_cpu/model_repository/identity_pipeline.py @@ -24,8 +24,11 @@ def _parse_args(): import argparse - parser = argparse.ArgumentParser(description="Serialize the pipeline and save it to a file") - parser.add_argument('file_path', type=str, help='The path where to save the serialized pipeline') + parser = argparse.ArgumentParser( + description="Serialize the pipeline and save it to a file") + parser.add_argument('file_path', + type=str, + help='The path where to save the serialized pipeline') return parser.parse_args() diff --git a/qa/L0_img_proc/client.py b/qa/L0_img_proc/client.py index 1b3277d6..9a1c3d5d 100644 --- a/qa/L0_img_proc/client.py +++ b/qa/L0_img_proc/client.py @@ -24,34 +24,61 @@ from numpy.random import randint, random import argparse + def ref_func(imgs): - output = [np.fliplr(imgs[i]) for i in range(imgs.shape[0])] - return np.stack(output), + output = [np.fliplr(imgs[i]) for i in range(imgs.shape[0])] + return np.stack(output), + def random_gen(max_batch_size): - while True: - bs = randint(1, max_batch_size + 1) - width = randint(100, 200) - height = randint(100, 200) - yield [(random((bs, height, width, 3)) * 255).astype(np.uint8)] + while True: + bs = randint(1, max_batch_size + 1) + width = randint(100, 200) + height = randint(100, 200) + yield [(random((bs, height, width, 3)) * 255).astype(np.uint8)] def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', - help='Inference server GRPC URL. Default is localhost:8001.') - parser.add_argument('-n', '--n_iters', type=int, required=False, default=1, help='Number of iterations') - parser.add_argument('-c', '--concurrency', type=int, required=False, default=1, + parser.add_argument( + '-u', + '--url', + type=str, + required=False, + default='localhost:8001', + help='Inference server GRPC URL. Default is localhost:8001.') + parser.add_argument('-n', + '--n_iters', + type=int, + required=False, + default=1, + help='Number of iterations') + parser.add_argument('-c', + '--concurrency', + type=int, + required=False, + default=1, help='Request concurrency level') - parser.add_argument('-b', '--max_batch_size', type=int, required=False, default=256) + parser.add_argument('-b', + '--max_batch_size', + type=int, + required=False, + default=256) return parser.parse_args() + def main(): - args = parse_args() - client = TestClient('img_proc.dali', ['DALI_INPUT_0'], ['DALI_OUTPUT_0',], args.url, - concurrency=args.concurrency) - client.run_tests(random_gen(args.max_batch_size), ref_func, - n_infers=args.n_iters, eps=1e-4) + args = parse_args() + client = TestClient('img_proc.dali', ['DALI_INPUT_0'], [ + 'DALI_OUTPUT_0', + ], + args.url, + concurrency=args.concurrency) + client.run_tests(random_gen(args.max_batch_size), + ref_func, + n_infers=args.n_iters, + eps=1e-4) + if __name__ == '__main__': - main() + main() diff --git a/qa/L0_img_proc/model_repository/img_proc.dali/1/dali.py b/qa/L0_img_proc/model_repository/img_proc.dali/1/dali.py index 4939f7b4..33bdb216 100644 --- a/qa/L0_img_proc/model_repository/img_proc.dali/1/dali.py +++ b/qa/L0_img_proc/model_repository/img_proc.dali/1/dali.py @@ -24,9 +24,16 @@ import multiprocessing as mp from nvidia.dali.plugin.triton import autoserialize + @autoserialize -@dali.pipeline_def(batch_size=256, num_threads=min(mp.cpu_count(), 4), device_id=0, - output_dtype=[dali.types.UINT8], output_ndim=[3]) +@dali.pipeline_def(batch_size=256, + num_threads=min(mp.cpu_count(), 4), + device_id=0, + output_dtype=[dali.types.UINT8], + output_ndim=[3]) def pipeline(): - img = fn.external_source(device='cpu', name='DALI_INPUT_0', layout='HWC', dtype=dali.types.UINT8) - return fn.flip(img, name='DALI_OUTPUT_0') + img = fn.external_source(device='cpu', + name='DALI_INPUT_0', + layout='HWC', + dtype=dali.types.UINT8) + return fn.flip(img, name='DALI_OUTPUT_0') diff --git a/qa/L0_inception_ensemble/ensemble_client.py b/qa/L0_inception_ensemble/ensemble_client.py index 32c50636..7e099cac 100644 --- a/qa/L0_inception_ensemble/ensemble_client.py +++ b/qa/L0_inception_ensemble/ensemble_client.py @@ -29,24 +29,51 @@ np.random.seed(100019) + def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False, + parser.add_argument('-v', + '--verbose', + action="store_true", + required=False, + default=False, help='Enable verbose output') - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', + parser.add_argument('-u', + '--url', + type=str, + required=False, + default='localhost:8001', help='Inference server URL. Default is localhost:8001.') - parser.add_argument('--batch_size', type=int, required=False, default=1, + parser.add_argument('--batch_size', + type=int, + required=False, + default=1, help='Batch size') - parser.add_argument('--n_iter', type=int, required=False, default=-1, + parser.add_argument('--n_iter', + type=int, + required=False, + default=-1, help='Number of iterations , with `batch_size` size') - parser.add_argument('--model_name', type=str, required=False, default="dali_backend", + parser.add_argument('--model_name', + type=str, + required=False, + default="dali_backend", help='Model name') img_group = parser.add_mutually_exclusive_group() - img_group.add_argument('--img', type=str, required=False, default=None, - help='Run a img dali pipeline. Arg: path to the image.') - img_group.add_argument('--img_dir', type=str, required=False, default=None, - help='Directory, with images that will be broken down into batches and inferred. ' - 'The directory must contain only images and single labels.txt file') + img_group.add_argument( + '--img', + type=str, + required=False, + default=None, + help='Run a img dali pipeline. Arg: path to the image.') + img_group.add_argument( + '--img_dir', + type=str, + required=False, + default=None, + help= + 'Directory, with images that will be broken down into batches and inferred. ' + 'The directory must contain only images and single labels.txt file') return parser.parse_args() @@ -69,13 +96,17 @@ def load_images(dir_path: str): labels_fname = 'labels.txt' # Traverses directory for files (not dirs) and returns full paths to them - path_generator = (os.path.join(dir_path, f) for f in os.listdir(dir_path) if - os.path.isfile(os.path.join(dir_path, f)) and f != labels_fname) + path_generator = ( + os.path.join(dir_path, f) + for f in os.listdir(dir_path) + if os.path.isfile(os.path.join(dir_path, f)) and f != labels_fname) img_paths = [dir_path] if os.path.isfile(dir_path) else list(path_generator) # File to dictionary with open(os.path.join(dir_path, labels_fname)) as f: - labels_dict = {k: int(v) for line in f for (k, v) in [line.strip().split(None, 1)]} + labels_dict = { + k: int(v) for line in f for (k, v) in [line.strip().split(None, 1)] + } for img in img_paths: images.append(load_image(img)) @@ -87,9 +118,13 @@ def array_from_list(arrays): """ Convert list of ndarrays to single ndarray with ndims+=1 """ - lengths = list(map(lambda x, arr=arrays: arr[x].shape[0], [x for x in range(len(arrays))])) + lengths = list( + map(lambda x, arr=arrays: arr[x].shape[0], + [x for x in range(len(arrays))])) max_len = max(lengths) - arrays = list(map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), arrays)) + arrays = list( + map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), + arrays)) for arr in arrays: assert arr.shape == arrays[0].shape, "Arrays must have the same shape" return np.stack(arrays) @@ -106,7 +141,7 @@ def batcher(dataset, max_batch_size, n_iterations=-1): raise StopIteration batch_size = min(randint(1, max_batch_size), len(dataset) - data_idx) iter_idx += 1 - yield dataset[data_idx : data_idx + batch_size] + yield dataset[data_idx:data_idx + batch_size] data_idx += batch_size @@ -121,7 +156,8 @@ def save_byte_image(bytes, size_wh=(299, 299), name_suffix=0): def main(): FLAGS = parse_args() try: - triton_client = tritongrpcclient.InferenceServerClient(url=FLAGS.url, verbose=FLAGS.verbose) + triton_client = tritongrpcclient.InferenceServerClient( + url=FLAGS.url, verbose=FLAGS.verbose) except Exception as e: print("channel creation failed: " + str(e)) sys.exit(1) @@ -131,7 +167,8 @@ def main(): print("Loading images") - image_data, labels = load_images(FLAGS.img_dir if FLAGS.img_dir is not None else FLAGS.img) + image_data, labels = load_images( + FLAGS.img_dir if FLAGS.img_dir is not None else FLAGS.img) image_data = array_from_list(image_data) print("Images loaded, inferring") @@ -163,7 +200,8 @@ def main(): print("Output shape: ", np.shape(output0_data)) maxs = np.argmax(output0_data, axis=1) for i in range(len(maxs)): - print("Sample ", i, " - label: ", maxs[i], " ~ ", output0_data[i, maxs[i]]) + print("Sample ", i, " - label: ", maxs[i], " ~ ", + output0_data[i, maxs[i]]) if maxs[i] != labels[img_idx]: sys.exit(1) else: diff --git a/qa/L0_many_versions/many_versions_client.py b/qa/L0_many_versions/many_versions_client.py index 3db67181..e97bbd9f 100644 --- a/qa/L0_many_versions/many_versions_client.py +++ b/qa/L0_many_versions/many_versions_client.py @@ -31,25 +31,34 @@ def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False, + parser.add_argument('-v', + '--verbose', + action="store_true", + required=False, + default=False, help='Enable verbose output') - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', + parser.add_argument('-u', + '--url', + type=str, + required=False, + default='localhost:8001', help='Inference server URL. Default is localhost:8001.') return parser.parse_args() - def main(): FLAGS = parse_args() try: - triton_client = tritongrpcclient.InferenceServerClient(url=FLAGS.url, verbose=FLAGS.verbose) + triton_client = tritongrpcclient.InferenceServerClient( + url=FLAGS.url, verbose=FLAGS.verbose) except Exception as e: print("channel creation failed: " + str(e)) sys.exit(1) if not (triton_client.is_server_live() or triton_client.is_server_ready()): - print("Error connecting to server: Server live {}. Server ready {}.".format( - triton_client.is_server_live(), triton_client.is_server_ready())) + print("Error connecting to server: Server live {}. Server ready {}.". + format(triton_client.is_server_live(), + triton_client.is_server_ready())) sys.exit(1) models_loaded = { @@ -72,7 +81,8 @@ def main(): for name, versions in models_not_loaded.items(): for ver in versions: if triton_client.is_model_ready(name, str(ver)): - print("FAILED: Model {} version {} incorrectly loaded".format(name, ver)) + print("FAILED: Model {} version {} incorrectly loaded".format( + name, ver)) sys.exit(1) diff --git a/qa/L0_many_versions/model_repository/addition_pipeline.py b/qa/L0_many_versions/model_repository/addition_pipeline.py index f300acb9..47f6784d 100644 --- a/qa/L0_many_versions/model_repository/addition_pipeline.py +++ b/qa/L0_many_versions/model_repository/addition_pipeline.py @@ -24,8 +24,11 @@ def _parse_args(): import argparse - parser = argparse.ArgumentParser(description="Serialize the pipeline and save it to a file") - parser.add_argument('file_path', type=str, help='The path where to save the serialized pipeline') + parser = argparse.ArgumentParser( + description="Serialize the pipeline and save it to a file") + parser.add_argument('file_path', + type=str, + help='The path where to save the serialized pipeline') parser.add_argument('add_value', type=int, help='Value to add to the input') return parser.parse_args() diff --git a/qa/L0_multi_input/multi_input_client.py b/qa/L0_multi_input/multi_input_client.py index 8b16697f..9bf02fbe 100644 --- a/qa/L0_multi_input/multi_input_client.py +++ b/qa/L0_multi_input/multi_input_client.py @@ -30,17 +30,35 @@ np.random.seed(100019) + def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False, + parser.add_argument('-v', + '--verbose', + action="store_true", + required=False, + default=False, help='Enable verbose output') - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', + parser.add_argument('-u', + '--url', + type=str, + required=False, + default='localhost:8001', help='Inference server URL. Default is localhost:8001.') - parser.add_argument('--batch_size', type=int, required=False, default=1, + parser.add_argument('--batch_size', + type=int, + required=False, + default=1, help='Batch size') - parser.add_argument('--n_iter', type=int, required=False, default=-1, + parser.add_argument('--n_iter', + type=int, + required=False, + default=-1, help='Number of iterations , with `batch_size` size') - parser.add_argument('--model_name', type=str, required=False, default="dali_multi_input", + parser.add_argument('--model_name', + type=str, + required=False, + default="dali_multi_input", help='Model name') return parser.parse_args() @@ -49,9 +67,13 @@ def array_from_list(arrays): """ Convert list of ndarrays to single ndarray with ndims+=1 """ - lengths = list(map(lambda x, arr=arrays: arr[x].shape[0], [x for x in range(len(arrays))])) + lengths = list( + map(lambda x, arr=arrays: arr[x].shape[0], + [x for x in range(len(arrays))])) max_len = max(lengths) - arrays = list(map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), arrays)) + arrays = list( + map(lambda arr, ml=max_len: np.pad(arr, ((0, ml - arr.shape[0]))), + arrays)) for arr in arrays: assert arr.shape == arrays[0].shape, "Arrays must have the same shape" return np.stack(arrays) @@ -66,16 +88,19 @@ def batcher(dataset, max_batch_size, n_iterations=-1): while data_idx < len(dataset): if 0 < n_iterations <= iter_idx: raise StopIteration - batch_size = min(randint(0, max_batch_size) + 1, len(dataset) - data_idx) + batch_size = min( + randint(0, max_batch_size) + 1, + len(dataset) - data_idx) iter_idx += 1 - yield dataset[data_idx : data_idx + batch_size] + yield dataset[data_idx:data_idx + batch_size] data_idx += batch_size def main(): FLAGS = parse_args() try: - triton_client = tritonclient.grpc.InferenceServerClient(url=FLAGS.url, verbose=FLAGS.verbose) + triton_client = tritonclient.grpc.InferenceServerClient( + url=FLAGS.url, verbose=FLAGS.verbose) except Exception as e: print("channel creation failed: " + str(e)) sys.exit(1) @@ -83,8 +108,10 @@ def main(): model_name = FLAGS.model_name model_version = -1 - input_data = [randint(0, 255, size=randint(100), dtype='uint8') for _ in - range(randint(100) * FLAGS.batch_size)] + input_data = [ + randint(0, 255, size=randint(100), dtype='uint8') + for _ in range(randint(100) * FLAGS.batch_size) + ] input_data = array_from_list(input_data) # Infer @@ -106,15 +133,20 @@ def main(): # Initialize the data input_shape[0] = batch_size scalars = randint(0, 1024, size=(batch_size, 1), dtype=np.int32) - inputs = [tritonclient.grpc.InferInput(iname, input_shape, "UINT8") for iname in - input_names] - scalar_input = tritonclient.grpc.InferInput(scalars_name, [batch_size, 1], "INT32") + inputs = [ + tritonclient.grpc.InferInput(iname, input_shape, "UINT8") + for iname in input_names + ] + scalar_input = tritonclient.grpc.InferInput(scalars_name, + [batch_size, 1], "INT32") for inp in inputs: inp.set_data_from_numpy(np.copy(batch)) scalar_input.set_data_from_numpy(scalars) # Test with outputs - results = triton_client.infer(model_name=model_name, inputs=[*inputs, scalar_input], outputs=outputs) + results = triton_client.infer(model_name=model_name, + inputs=[*inputs, scalar_input], + outputs=outputs) # Get the output arrays from the results for oname in output_names: @@ -122,7 +154,8 @@ def main(): output_data = results.as_numpy(oname) print("Output mean after backend processing:", np.mean(output_data)) print("Output shape: ", np.shape(output_data)) - expected = np.multiply(batch, 1 if oname is "DALI_unchanged" else scalars, + expected = np.multiply(batch, + 1 if oname is "DALI_unchanged" else scalars, dtype=np.int32) if not np.allclose(output_data, expected): print("Pre/post average does not match") diff --git a/qa/L0_unbatched_model/client.py b/qa/L0_unbatched_model/client.py index 1db828a7..077e7564 100644 --- a/qa/L0_unbatched_model/client.py +++ b/qa/L0_unbatched_model/client.py @@ -30,36 +30,61 @@ from os import environ from itertools import cycle + def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', - help='Inference server GRPC URL. Default is localhost:8001.') - parser.add_argument('-n', '--n_iters', type=int, required=False, default=1, help='Number of iterations') - parser.add_argument('-c', '--concurrency', type=int, required=False, default=1, + parser.add_argument( + '-u', + '--url', + type=str, + required=False, + default='localhost:8001', + help='Inference server GRPC URL. Default is localhost:8001.') + parser.add_argument('-n', + '--n_iters', + type=int, + required=False, + default=1, + help='Number of iterations') + parser.add_argument('-c', + '--concurrency', + type=int, + required=False, + default=1, help='Request concurrency level') - parser.add_argument('-b', '--max_batch_size', type=int, required=False, default=2) + parser.add_argument('-b', + '--max_batch_size', + type=int, + required=False, + default=2) return parser.parse_args() + def input_gen(max_bs): - while True: - size1 = np.random.randint(300, 1000) - size2 = np.random.randint(300, 1000) - bs = np.random.randint(1, max_bs) - yield np.random.random((bs, size1)).astype(np.float32), \ - np.random.randint(0, 256, size=(bs, size2), dtype=np.int32) + while True: + size1 = np.random.randint(300, 1000) + size2 = np.random.randint(300, 1000) + bs = np.random.randint(1, max_bs) + yield np.random.random((bs, size1)).astype(np.float32), \ + np.random.randint(0, 256, size=(bs, size2), dtype=np.int32) def ref_func(inp1, inp2): - return inp1 * 2, \ - (inp2 * 3).astype(np.float32) + return inp1 * 2, \ + (inp2 * 3).astype(np.float32) def main(): - args = parse_args() - client = TestClient('model.dali', ['DALI_INPUT_0', 'DALI_INPUT_1'], ['DALI_OUTPUT_0', 'DALI_OUTPUT_1'], args.url, - concurrency=args.concurrency) - client.run_tests(input_gen(args.max_batch_size), ref_func, - n_infers=args.n_iters, eps=1e-4) + args = parse_args() + client = TestClient('model.dali', ['DALI_INPUT_0', 'DALI_INPUT_1'], + ['DALI_OUTPUT_0', 'DALI_OUTPUT_1'], + args.url, + concurrency=args.concurrency) + client.run_tests(input_gen(args.max_batch_size), + ref_func, + n_infers=args.n_iters, + eps=1e-4) + if __name__ == '__main__': - main() + main() diff --git a/qa/L0_unbatched_model/model_repository/model.dali/1/dali.py b/qa/L0_unbatched_model/model_repository/model.dali/1/dali.py index 312b45bc..427ee7c9 100644 --- a/qa/L0_unbatched_model/model_repository/model.dali/1/dali.py +++ b/qa/L0_unbatched_model/model_repository/model.dali/1/dali.py @@ -26,9 +26,18 @@ @autoserialize -@dali.pipeline_def(batch_size=256, num_threads=min(mp.cpu_count(), 4), device_id=0, - output_dtype=dali.types.FLOAT, output_ndim=1) +@dali.pipeline_def(batch_size=256, + num_threads=min(mp.cpu_count(), 4), + device_id=0, + output_dtype=dali.types.FLOAT, + output_ndim=1) def pipeline(): - inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0', ndim=1, dtype=dali.types.FLOAT) - inp2 = fn.external_source(device='cpu', name='DALI_INPUT_1', ndim=1, dtype=dali.types.INT32) - return inp1.gpu() * 2, fn.cast(inp2 * 3, dtype=dali.types.FLOAT) + inp1 = fn.external_source(device='cpu', + name='DALI_INPUT_0', + ndim=1, + dtype=dali.types.FLOAT) + inp2 = fn.external_source(device='cpu', + name='DALI_INPUT_1', + ndim=1, + dtype=dali.types.INT32) + return inp1.gpu() * 2, fn.cast(inp2 * 3, dtype=dali.types.FLOAT) diff --git a/qa/L0_video_input_decoupled/client.py b/qa/L0_video_input_decoupled/client.py index 615cfc38..b6c433b8 100644 --- a/qa/L0_video_input_decoupled/client.py +++ b/qa/L0_video_input_decoupled/client.py @@ -19,7 +19,6 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - from functools import partial from itertools import cycle import numpy as np @@ -34,6 +33,7 @@ import nvidia.dali.fn as fn from nvidia.dali import pipeline_def + class UserData: def __init__(self): @@ -48,16 +48,15 @@ def callback(user_data, result, error): def get_dali_extra_path(): - return environ['DALI_EXTRA_PATH'] + return environ['DALI_EXTRA_PATH'] def input_gen(): - filenames = glob(f'{get_dali_extra_path()}/db/video/[cv]fr/*.mp4') - filenames = filter(lambda filename: 'mpeg4' not in filename, filenames) - filenames = filter(lambda filename: 'hevc' not in filename, filenames) - for filename in filenames: - yield np.fromfile(filename, dtype=np.uint8) - + filenames = glob(f'{get_dali_extra_path()}/db/video/[cv]fr/*.mp4') + filenames = filter(lambda filename: 'mpeg4' not in filename, filenames) + filenames = filter(lambda filename: 'hevc' not in filename, filenames) + for filename in filenames: + yield np.fromfile(filename, dtype=np.uint8) FRAMES_PER_SEQUENCE = 5 @@ -66,21 +65,39 @@ def input_gen(): user_data = UserData() + @pipeline_def(batch_size=1, num_threads=1, device_id=0, prefetch_queue_depth=1) def ref_pipeline(device): inp = fn.external_source(name='data') - decoded = fn.experimental.decoders.video(inp, device='mixed' if device == 'gpu' else 'cpu') + decoded = fn.experimental.decoders.video( + inp, device='mixed' if device == 'gpu' else 'cpu') return fn.pad(decoded, axes=0, align=FRAMES_PER_SEQUENCE) def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', - help='Inference server GRPC URL. Default is localhost:8001.') - parser.add_argument('-d', '--device', type=str, required=False, default='cpu', help='cpu or gpu') - parser.add_argument('-n', '--n_iters', type=int, required=False, default=1, help='Number of iterations') + parser.add_argument( + '-u', + '--url', + type=str, + required=False, + default='localhost:8001', + help='Inference server GRPC URL. Default is localhost:8001.') + parser.add_argument('-d', + '--device', + type=str, + required=False, + default='cpu', + help='cpu or gpu') + parser.add_argument('-n', + '--n_iters', + type=int, + required=False, + default=1, + help='Number of iterations') return parser.parse_args() + if __name__ == '__main__': args = parse_args() model_name = 'model.dali' if args.device == 'cpu' else 'model_gpu.dali' @@ -88,7 +105,8 @@ def parse_args(): triton_client.start_stream(callback=partial(callback, user_data)) for req_id, input_data in zip(range(args.n_iters), cycle(input_gen())): - inp = t_client.InferInput('INPUT', [1, input_data.shape[0]], 'UINT8') + inp = t_client.InferInput('INPUT', [1, input_data.shape[0]], + 'UINT8') inp.set_data_from_numpy(input_data.reshape((1, -1))) outp = t_client.InferRequestedOutput('OUTPUT') @@ -110,7 +128,8 @@ def parse_args(): n_frames = expected_result.shape[0] recv_count = 0 - expected_count = (n_frames + FRAMES_PER_BATCH - 1) // FRAMES_PER_BATCH + expected_count = (n_frames + FRAMES_PER_BATCH - + 1) // FRAMES_PER_BATCH result_dict = {} while recv_count < expected_count: data_item = user_data._completed_requests.get() @@ -124,9 +143,11 @@ def parse_args(): recv_count += 1 result_list = result_dict[request_id] - expected_result = np.split(expected_result, n_frames / FRAMES_PER_SEQUENCE) + expected_result = np.split(expected_result, + n_frames / FRAMES_PER_SEQUENCE) for i, result in enumerate(result_list): - expected_batch = expected_result[i * BATCH_SIZE : min((i+1) * BATCH_SIZE, len(expected_result))] + expected_batch = expected_result[i * BATCH_SIZE:min( + (i + 1) * BATCH_SIZE, len(expected_result))] expected_batch = np.asarray(expected_batch) result_data = result.as_numpy('OUTPUT') assert np.allclose(expected_batch, result_data) diff --git a/qa/L0_video_input_decoupled/model_repository/model.dali/1/dali.py b/qa/L0_video_input_decoupled/model_repository/model.dali/1/dali.py index 9511d5c9..fa23c2ac 100644 --- a/qa/L0_video_input_decoupled/model_repository/model.dali/1/dali.py +++ b/qa/L0_video_input_decoupled/model_repository/model.dali/1/dali.py @@ -25,6 +25,12 @@ @autoserialize -@dali.pipeline_def(batch_size=3, num_threads=3, device_id=0, output_ndim=4, output_dtype=dali.types.UINT8) +@dali.pipeline_def(batch_size=3, + num_threads=3, + device_id=0, + output_ndim=4, + output_dtype=dali.types.UINT8) def pipeline(): - return fn.experimental.inputs.video(sequence_length=5, name='INPUT', last_sequence_policy='pad') + return fn.experimental.inputs.video(sequence_length=5, + name='INPUT', + last_sequence_policy='pad') diff --git a/qa/L0_video_input_decoupled/model_repository/model_gpu.dali/1/dali.py b/qa/L0_video_input_decoupled/model_repository/model_gpu.dali/1/dali.py index 2cc33349..5a8a9bef 100644 --- a/qa/L0_video_input_decoupled/model_repository/model_gpu.dali/1/dali.py +++ b/qa/L0_video_input_decoupled/model_repository/model_gpu.dali/1/dali.py @@ -25,6 +25,13 @@ @autoserialize -@dali.pipeline_def(batch_size=3, num_threads=3, device_id=0, output_ndim=4, output_dtype=dali.types.UINT8) +@dali.pipeline_def(batch_size=3, + num_threads=3, + device_id=0, + output_ndim=4, + output_dtype=dali.types.UINT8) def pipeline(): - return fn.experimental.inputs.video(sequence_length=5, name='INPUT', device='mixed', last_sequence_policy='pad') + return fn.experimental.inputs.video(sequence_length=5, + name='INPUT', + device='mixed', + last_sequence_policy='pad') diff --git a/qa/L0_video_split/client.py b/qa/L0_video_split/client.py index 73257e70..e4172c43 100644 --- a/qa/L0_video_split/client.py +++ b/qa/L0_video_split/client.py @@ -30,72 +30,106 @@ from os import environ from itertools import cycle + def get_dali_extra_path(): - return environ['DALI_EXTRA_PATH'] + return environ['DALI_EXTRA_PATH'] + def input_gen(batch_size): - filenames = glob(f'{get_dali_extra_path()}/db/video/[cv]fr/*.mp4') - filenames = filter(lambda filename: 'mpeg4' not in filename, filenames) - filenames = filter(lambda filename: 'hevc' not in filename, filenames) - filenames = cycle(filenames) - while True: - batch = [] - for _ in range(batch_size): - batch.append(np.fromfile(next(filenames), dtype=np.uint8)) - yield [eager.pad(batch).as_array()] + filenames = glob(f'{get_dali_extra_path()}/db/video/[cv]fr/*.mp4') + filenames = filter(lambda filename: 'mpeg4' not in filename, filenames) + filenames = filter(lambda filename: 'hevc' not in filename, filenames) + filenames = cycle(filenames) + while True: + batch = [] + for _ in range(batch_size): + batch.append(np.fromfile(next(filenames), dtype=np.uint8)) + yield [eager.pad(batch).as_array()] FRAMES_PER_SEQUENCE = 5 OUT_WIDTH = 300 OUT_HEIGHT = 300 -@dali.pipeline_def(num_threads=min(mp.cpu_count(), 4), device_id=0, - output_dtype=dali.types.UINT8, output_ndim=[5, 4, 1], + +@dali.pipeline_def(num_threads=min(mp.cpu_count(), 4), + device_id=0, + output_dtype=dali.types.UINT8, + output_ndim=[5, 4, 1], prefetch_queue_depth=1) def pipeline(): - vid = fn.external_source(device='cpu', name='INPUT', ndim=1, dtype=dali.types.UINT8) - seq = fn.experimental.decoders.video(vid, device='mixed') - seq = fn.resize(seq, resize_x=OUT_WIDTH, resize_y=OUT_HEIGHT) - original_sequence = seq - seq = fn.pad(seq, axis_names='F', align=FRAMES_PER_SEQUENCE) + vid = fn.external_source(device='cpu', + name='INPUT', + ndim=1, + dtype=dali.types.UINT8) + seq = fn.experimental.decoders.video(vid, device='mixed') + seq = fn.resize(seq, resize_x=OUT_WIDTH, resize_y=OUT_HEIGHT) + original_sequence = seq + seq = fn.pad(seq, axis_names='F', align=FRAMES_PER_SEQUENCE) - return fn.reshape(seq, shape=[-1, FRAMES_PER_SEQUENCE, OUT_HEIGHT, OUT_WIDTH, 3], name='OUTPUT'), \ - original_sequence, \ - vid + return fn.reshape(seq, shape=[-1, FRAMES_PER_SEQUENCE, OUT_HEIGHT, OUT_WIDTH, 3], name='OUTPUT'), \ + original_sequence, \ + vid def _split_outer_dim(output): arrays = [output.at(i) for i in range(len(output.shape()))] return np.concatenate(arrays) + class RefFunc: - def __init__(self, max_batch_size): - self._pipeline = pipeline(batch_size=max_batch_size) - self._pipeline.build() + def __init__(self, max_batch_size): + self._pipeline = pipeline(batch_size=max_batch_size) + self._pipeline.build() - def __call__(self, vids): - self._pipeline.feed_input("INPUT", vids) - out1, out2, out3 = self._pipeline.run() - return _split_outer_dim(out1.as_cpu()), _split_outer_dim(out2.as_cpu()), out3.as_array() + def __call__(self, vids): + self._pipeline.feed_input("INPUT", vids) + out1, out2, out3 = self._pipeline.run() + return _split_outer_dim(out1.as_cpu()), _split_outer_dim( + out2.as_cpu()), out3.as_array() def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', - help='Inference server GRPC URL. Default is localhost:8001.') - parser.add_argument('-n', '--n_iters', type=int, required=False, default=1, help='Number of iterations') - parser.add_argument('-c', '--concurrency', type=int, required=False, default=1, + parser.add_argument( + '-u', + '--url', + type=str, + required=False, + default='localhost:8001', + help='Inference server GRPC URL. Default is localhost:8001.') + parser.add_argument('-n', + '--n_iters', + type=int, + required=False, + default=1, + help='Number of iterations') + parser.add_argument('-c', + '--concurrency', + type=int, + required=False, + default=1, help='Request concurrency level') - parser.add_argument('-b', '--max_batch_size', type=int, required=False, default=2) + parser.add_argument('-b', + '--max_batch_size', + type=int, + required=False, + default=2) return parser.parse_args() + def main(): - args = parse_args() - client = TestClient('model.dali', ['INPUT'], ['OUTPUT', 'OUTPUT_images', 'INPUT'], args.url, - concurrency=args.concurrency) - client.run_tests(input_gen(args.max_batch_size), RefFunc(args.max_batch_size), - n_infers=args.n_iters, eps=1e-4) + args = parse_args() + client = TestClient('model.dali', ['INPUT'], + ['OUTPUT', 'OUTPUT_images', 'INPUT'], + args.url, + concurrency=args.concurrency) + client.run_tests(input_gen(args.max_batch_size), + RefFunc(args.max_batch_size), + n_infers=args.n_iters, + eps=1e-4) + if __name__ == '__main__': - main() + main() diff --git a/qa/L0_video_split/model_repository/model.dali/1/dali.py b/qa/L0_video_split/model_repository/model.dali/1/dali.py index a1c4dde5..b6e61c72 100644 --- a/qa/L0_video_split/model_repository/model.dali/1/dali.py +++ b/qa/L0_video_split/model_repository/model.dali/1/dali.py @@ -28,16 +28,23 @@ OUT_WIDTH = 300 OUT_HEIGHT = 300 + @autoserialize -@dali.pipeline_def(batch_size=256, num_threads=min(mp.cpu_count(), 4), device_id=0, - output_dtype=dali.types.UINT8, output_ndim=[5, 4, 1]) +@dali.pipeline_def(batch_size=256, + num_threads=min(mp.cpu_count(), 4), + device_id=0, + output_dtype=dali.types.UINT8, + output_ndim=[5, 4, 1]) def pipeline(): - vid = fn.external_source(device='cpu', name='INPUT', ndim=1, dtype=dali.types.UINT8) - seq = fn.experimental.decoders.video(vid, device='mixed') - seq = fn.resize(seq, resize_x=OUT_WIDTH, resize_y=OUT_HEIGHT) - nonreshaped_sequence = seq - seq = fn.pad(seq, axis_names='F', align=FRAMES_PER_SEQUENCE) + vid = fn.external_source(device='cpu', + name='INPUT', + ndim=1, + dtype=dali.types.UINT8) + seq = fn.experimental.decoders.video(vid, device='mixed') + seq = fn.resize(seq, resize_x=OUT_WIDTH, resize_y=OUT_HEIGHT) + nonreshaped_sequence = seq + seq = fn.pad(seq, axis_names='F', align=FRAMES_PER_SEQUENCE) - return fn.reshape(seq, shape=[-1, FRAMES_PER_SEQUENCE, OUT_HEIGHT, OUT_WIDTH, 3], name='OUTPUT'), \ - fn.reinterpret(nonreshaped_sequence, layout='FHWC', name='OUTPUT_images'), \ - vid + return fn.reshape(seq, shape=[-1, FRAMES_PER_SEQUENCE, OUT_HEIGHT, OUT_WIDTH, 3], name='OUTPUT'), \ + fn.reinterpret(nonreshaped_sequence, layout='FHWC', name='OUTPUT_images'), \ + vid diff --git a/qa/L1_DALI_GPU_ensemble/client.py b/qa/L1_DALI_GPU_ensemble/client.py index f7da88e8..b7504104 100644 --- a/qa/L1_DALI_GPU_ensemble/client.py +++ b/qa/L1_DALI_GPU_ensemble/client.py @@ -24,34 +24,61 @@ from numpy.random import randint, random import argparse + # TODO: Use actual DALI pipelines to calculate ground truth def ref_func(inp1, inp2): - return inp1 * 2 / 3, (inp2 * 3).astype(np.half).astype(np.single) / 2 + return inp1 * 2 / 3, (inp2 * 3).astype(np.half).astype(np.single) / 2 + def random_gen(max_batch_size): - while True: - size1 = randint(100, 300) - size2 = randint(100, 300) - bs = randint(1, max_batch_size + 1) - yield random((bs, size1)).astype(np.single), \ - random((bs, size2)).astype(np.single) + while True: + size1 = randint(100, 300) + size2 = randint(100, 300) + bs = randint(1, max_batch_size + 1) + yield random((bs, size1)).astype(np.single), \ + random((bs, size2)).astype(np.single) + def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001', - help='Inference server GRPC URL. Default is localhost:8001.') - parser.add_argument('-n', '--n_iters', type=int, required=False, default=1, help='Number of iterations') - parser.add_argument('-c', '--concurrency', type=int, required=False, default=1, + parser.add_argument( + '-u', + '--url', + type=str, + required=False, + default='localhost:8001', + help='Inference server GRPC URL. Default is localhost:8001.') + parser.add_argument('-n', + '--n_iters', + type=int, + required=False, + default=1, + help='Number of iterations') + parser.add_argument('-c', + '--concurrency', + type=int, + required=False, + default=1, help='Request concurrency level') - parser.add_argument('-b', '--max_batch_size', type=int, required=False, default=256) + parser.add_argument('-b', + '--max_batch_size', + type=int, + required=False, + default=256) return parser.parse_args() + def main(): - args = parse_args() - client = TestClient('dali_ensemble', ['INPUT_0', 'INPUT_1'], ['OUTPUT_0', 'OUTPUT_1'], args.url, - concurrency=args.concurrency) - client.run_tests(random_gen(args.max_batch_size), ref_func, - n_infers=args.n_iters, eps=1e-4) + args = parse_args() + client = TestClient('dali_ensemble', ['INPUT_0', 'INPUT_1'], + ['OUTPUT_0', 'OUTPUT_1'], + args.url, + concurrency=args.concurrency) + client.run_tests(random_gen(args.max_batch_size), + ref_func, + n_infers=args.n_iters, + eps=1e-4) + if __name__ == '__main__': - main() + main() diff --git a/qa/L1_DALI_GPU_ensemble/model_repository/dali_1/pipeline.py b/qa/L1_DALI_GPU_ensemble/model_repository/dali_1/pipeline.py index c0d6132e..57fde78b 100644 --- a/qa/L1_DALI_GPU_ensemble/model_repository/dali_1/pipeline.py +++ b/qa/L1_DALI_GPU_ensemble/model_repository/dali_1/pipeline.py @@ -24,11 +24,14 @@ import multiprocessing as mp import argparse -@dali.pipeline_def(batch_size=1, num_threads=min(mp.cpu_count(), 4), device_id=0) + +@dali.pipeline_def(batch_size=1, + num_threads=min(mp.cpu_count(), 4), + device_id=0) def pipeline(): - inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0') - inp2 = fn.external_source(device='gpu', name='DALI_INPUT_1') - return inp1.gpu() * 2, fn.cast(inp2 * 3, dtype=dali.types.FLOAT16) + inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0') + inp2 = fn.external_source(device='gpu', name='DALI_INPUT_1') + return inp1.gpu() * 2, fn.cast(inp2 * 3, dtype=dali.types.FLOAT16) def main(filename): @@ -37,7 +40,10 @@ def main(filename): if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Serialize pipeline and save it to file") - parser.add_argument('file_path', type=str, help='Path, where to save serialized pipeline') + parser = argparse.ArgumentParser( + description="Serialize pipeline and save it to file") + parser.add_argument('file_path', + type=str, + help='Path, where to save serialized pipeline') args = parser.parse_args() main(args.file_path) diff --git a/qa/L1_DALI_GPU_ensemble/model_repository/dali_2/pipeline.py b/qa/L1_DALI_GPU_ensemble/model_repository/dali_2/pipeline.py index ea10b435..9ccb2fc7 100644 --- a/qa/L1_DALI_GPU_ensemble/model_repository/dali_2/pipeline.py +++ b/qa/L1_DALI_GPU_ensemble/model_repository/dali_2/pipeline.py @@ -24,11 +24,14 @@ import multiprocessing as mp import argparse -@dali.pipeline_def(batch_size=1, num_threads=min(mp.cpu_count(), 4), device_id=0) + +@dali.pipeline_def(batch_size=1, + num_threads=min(mp.cpu_count(), 4), + device_id=0) def pipeline(): - inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0') - inp2 = fn.external_source(device='gpu', name='DALI_INPUT_1') - return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 + inp1 = fn.external_source(device='cpu', name='DALI_INPUT_0') + inp2 = fn.external_source(device='gpu', name='DALI_INPUT_1') + return inp1.gpu() / 3, fn.cast(inp2, dtype=dali.types.FLOAT) / 2 def main(filename): @@ -37,7 +40,10 @@ def main(filename): if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Serialize pipeline and save it to file") - parser.add_argument('file_path', type=str, help='Path, where to save serialized pipeline') + parser = argparse.ArgumentParser( + description="Serialize pipeline and save it to file") + parser.add_argument('file_path', + type=str, + help='Path, where to save serialized pipeline') args = parser.parse_args() main(args.file_path) diff --git a/src/config_tools/config_tools.cc b/src/config_tools/config_tools.cc index 57ef5b62..0e26ffd0 100644 --- a/src/config_tools/config_tools.cc +++ b/src/config_tools/config_tools.cc @@ -32,29 +32,29 @@ namespace triton { namespace backend { namespace dali { */ std::string to_triton_config(dali_data_type_t type) { switch (type) { - case DALI_UINT8 : + case DALI_UINT8: return "TYPE_UINT8"; - case DALI_UINT16 : + case DALI_UINT16: return "TYPE_UINT16"; - case DALI_UINT32 : + case DALI_UINT32: return "TYPE_UINT32"; - case DALI_UINT64 : + case DALI_UINT64: return "TYPE_UINT64"; - case DALI_INT8 : + case DALI_INT8: return "TYPE_INT8"; - case DALI_INT16 : + case DALI_INT16: return "TYPE_INT16"; - case DALI_INT32 : + case DALI_INT32: return "TYPE_INT32"; - case DALI_INT64 : + case DALI_INT64: return "TYPE_INT64"; - case DALI_FLOAT16 : + case DALI_FLOAT16: return "TYPE_FP16"; - case DALI_FLOAT : + case DALI_FLOAT: return "TYPE_FP32"; - case DALI_FLOAT64 : + case DALI_FLOAT64: return "TYPE_FP64"; - case DALI_BOOL : + case DALI_BOOL: return "TYPE_BOOL"; default: return "TYPE_INVALID"; @@ -64,8 +64,9 @@ std::string to_triton_config(dali_data_type_t type) { void SetShapeArray(TritonJson::Value &array, const std::vector &dims) { TRITON_CALL(array.AssertType(TritonJson::ValueType::ARRAY)); - ENFORCE(array.ArraySize() <= dims.size(), "SetShapeArray expects the initial array size to be " - "smaller or equal the number of dimensions."); + ENFORCE(array.ArraySize() <= dims.size(), + "SetShapeArray expects the initial array size to be " + "smaller or equal the number of dimensions."); size_t i = 0; const auto arr_size = array.ArraySize(); for (; i < arr_size; ++i) { @@ -108,14 +109,15 @@ std::vector ReadShape(TritonJson::Value &dims_array) { } -std::vector MatchShapes(const std::string &name, - const std::vector &config_shape, +std::vector MatchShapes(const std::string &name, const std::vector &config_shape, const std::vector &pipeline_shape) { if (config_shape.size() != pipeline_shape.size()) { - throw TritonError::InvalidArg(make_string("Mismatch in number of dimensions for \"", name, "\"\n" - "Number of dimensions defined in config: ", config_shape.size(), - "\nNumber of dimensions defined in pipeline: ", - pipeline_shape.size())); + throw TritonError::InvalidArg( + make_string("Mismatch in number of dimensions for \"", name, + "\"\n" + "Number of dimensions defined in config: ", + config_shape.size(), + "\nNumber of dimensions defined in pipeline: ", pipeline_shape.size())); } std::vector result(config_shape.size()); for (size_t i = 0; i < result.size(); ++i) { @@ -123,10 +125,9 @@ std::vector MatchShapes(const std::string &name, if (config_shape[i] == -1 || pipeline_shape[i] == -1) { result[i] = std::max(config_shape[i], pipeline_shape[i]); } else { - throw TritonError::InvalidArg( - make_string("Mismath in dims for ", name, "\nDims defined in config: ", - vec_to_string(config_shape), "\nDims defined in pipeline: ", - vec_to_string(pipeline_shape))); + throw TritonError::InvalidArg(make_string( + "Mismath in dims for ", name, "\nDims defined in config: ", vec_to_string(config_shape), + "\nDims defined in pipeline: ", vec_to_string(pipeline_shape))); } } else { result[i] = config_shape[i]; @@ -136,9 +137,9 @@ std::vector MatchShapes(const std::string &name, } -template +template std::string ProcessDtypeConfig(TritonJson::Value &io_object, const std::string &name, - dali_data_type_t dtype) { + dali_data_type_t dtype) { TritonJson::Value dtype_obj(TritonJson::ValueType::OBJECT); if (io_object.Find("data_type", &dtype_obj)) { std::string found_dtype; @@ -146,10 +147,13 @@ std::string ProcessDtypeConfig(TritonJson::Value &io_object, const std::string & if (found_dtype != "TYPE_INVALID") { if (dtype != DALI_NO_TYPE) { if (found_dtype != to_triton_config(dtype)) { - throw TritonError::InvalidArg(make_string( - "Mismatch of data_type config for \"", name, "\".\n" - "Data type defined in config: ", found_dtype, "\n" - "Data type defined in pipeline: ", to_triton_config(dtype))); + throw TritonError::InvalidArg(make_string("Mismatch of data_type config for \"", name, + "\".\n" + "Data type defined in config: ", + found_dtype, + "\n" + "Data type defined in pipeline: ", + to_triton_config(dtype))); } } return found_dtype; @@ -169,7 +173,7 @@ std::string AutofillDtypeConfig(TritonJson::Value &io_object, const std::string void ValidateDtypeConfig(TritonJson::Value &io_object, const std::string &name, - dali_data_type_t dtype) { + dali_data_type_t dtype) { ProcessDtypeConfig(io_object, name, dtype); } @@ -187,7 +191,7 @@ void AutofillShapeConfig(TritonJson::Value &config, TritonJson::Value &config_io TRITON_CALL(config_io.MemberAsString("name", &name)); TritonJson::Value config_dims_obj; std::vector model_io_shape = - batched_model ? _model_io_shape : add_batch_dim(_model_io_shape); + batched_model ? _model_io_shape : add_batch_dim(_model_io_shape); if (config_io.Find("dims", &config_dims_obj)) { auto config_dims = ReadShape(config_dims_obj); if (config_dims.size() > 0) { @@ -205,8 +209,7 @@ void AutofillShapeConfig(TritonJson::Value &config, TritonJson::Value &config_io void ValidateShapeConfig(TritonJson::Value &io_object, const std::string &name, - const std::optional> &shape, - bool batched_model) { + const std::optional> &shape, bool batched_model) { TritonJson::Value dims_obj; TritonError error{io_object.MemberAsArray("dims", &dims_obj)}; if (error) { @@ -214,12 +217,10 @@ void ValidateShapeConfig(TritonJson::Value &io_object, const std::string &name, } if (shape) { - std::vector model_io_shape = - batched_model ? *shape : add_batch_dim(*shape); + std::vector model_io_shape = batched_model ? *shape : add_batch_dim(*shape); auto config_shape = ReadShape(dims_obj); MatchShapes(name, config_shape, model_io_shape); } - } @@ -245,8 +246,7 @@ void AutofillIOConfig(TritonJson::Value &config, TritonJson::Value &config_io, } -void ValidateIOConfig(TritonJson::Value &io_object, const IOConfig &io_config, - bool batched_model) { +void ValidateIOConfig(TritonJson::Value &io_object, const IOConfig &io_config, bool batched_model) { TRITON_CALL(io_object.AssertType(common::TritonJson::ValueType::OBJECT)); std::string name; io_object.MemberAsString("name", &name); @@ -256,14 +256,14 @@ void ValidateIOConfig(TritonJson::Value &io_object, const IOConfig &io_config, void ValidateAgainstTooManyInputs(TritonJson::Value &ins, const std::vector &in_configs) { - for (size_t i = 0; i < ins.ArraySize(); ++i) { + for (size_t i = 0; i < ins.ArraySize(); ++i) { TritonJson::Value io_object(TritonJson::ValueType::OBJECT); ins.IndexAsObject(i, &io_object); std::string name; if (io_object.MemberAsString("name", &name) != TRITONJSON_STATUSSUCCESS) { throw TritonError::InvalidArg( - make_string("The input at index ", i, - " in the model configuration does not contain a `name` field.")); + make_string("The input at index ", i, + " in the model configuration does not contain a `name` field.")); } bool in_present = std::any_of(in_configs.begin(), in_configs.end(), @@ -280,7 +280,7 @@ void AutofillInputsConfig(TritonJson::Value &config, TritonJson::Value &config_i const std::vector &model_ins, bool batched_model) { TRITON_CALL(config_ins.AssertType(common::TritonJson::ValueType::ARRAY)); ValidateAgainstTooManyInputs(config_ins, model_ins); - for (const auto &model_in: model_ins) { + for (const auto &model_in : model_ins) { TritonJson::Value config_in(config, TritonJson::ValueType::OBJECT); auto found = FindObjectByName(config_ins, model_in.name, &config_in); AutofillIOConfig(config, config_in, model_in, batched_model); @@ -299,10 +299,10 @@ void AutofillOutputsConfig(TritonJson::Value &config, TritonJson::Value &config_ TRITON_CALL(config_outs.AssertType(common::TritonJson::ValueType::ARRAY)); if (config_outs.ArraySize() > model_outs.size()) { throw TritonError::InvalidArg( - make_string("The number of outputs specified in the DALI pipeline and the configuration" - " file do not match." - "\nModel config outputs: ", config_outs.ArraySize(), - "\nPipeline outputs: ", model_outs.size())); + make_string("The number of outputs specified in the DALI pipeline and the configuration" + " file do not match." + "\nModel config outputs: ", + config_outs.ArraySize(), "\nPipeline outputs: ", model_outs.size())); } size_t i = 0; @@ -368,12 +368,12 @@ void ValidateInputs(TritonJson::Value &ins, const std::vector &in_conf bool batched_model) { TRITON_CALL(ins.AssertType(common::TritonJson::ValueType::ARRAY)); ValidateAgainstTooManyInputs(ins, in_configs); - for (const auto &in_config: in_configs) { + for (const auto &in_config : in_configs) { TritonJson::Value in_object(TritonJson::ValueType::OBJECT); auto ind = FindObjectByName(ins, in_config.name, &in_object); if (!ind) { throw TritonError::InvalidArg( - make_string("Missing config for \"", in_config.name, "\" input.")); + make_string("Missing config for \"", in_config.name, "\" input.")); } ValidateIOConfig(in_object, in_config, batched_model); } @@ -385,10 +385,10 @@ void ValidateOutputs(TritonJson::Value &outs, const std::vector &out_c TRITON_CALL(outs.AssertType(common::TritonJson::ValueType::ARRAY)); if (outs.ArraySize() != out_configs.size()) { throw TritonError::InvalidArg( - make_string("The number of outputs specified in the DALI pipeline and the " - "configuration file do not match." - "\nModel config outputs: ", outs.ArraySize(), - "\nPipeline outputs: ", out_configs.size())); + make_string("The number of outputs specified in the DALI pipeline and the " + "configuration file do not match." + "\nModel config outputs: ", + outs.ArraySize(), "\nPipeline outputs: ", out_configs.size())); } for (size_t i = 0; i < out_configs.size(); ++i) { TritonJson::Value out_object; @@ -396,8 +396,8 @@ void ValidateOutputs(TritonJson::Value &outs, const std::vector &out_c std::string name; if (out_object.MemberAsString("name", &name) != TRITONJSON_STATUSSUCCESS) { throw TritonError::InvalidArg( - make_string("The output at index ", i, - " in the model configuration does not contain a `name` field.")); + make_string("The output at index ", i, + " in the model configuration does not contain a `name` field.")); } ValidateIOConfig(out_object, out_configs[i], batched_model); } @@ -442,7 +442,8 @@ bool is_sep(char c) { void skip_whitespace(std::string_view &text) { size_t i = 0; - while (i < text.size() && is_whitespace(text[i])) ++i; + while (i < text.size() && is_whitespace(text[i])) + ++i; text.remove_prefix(i); } @@ -452,7 +453,7 @@ void skip_line(std::string_view &text) { if (pos == text.npos) { text = std::string_view(); } else { - text.remove_prefix(pos+1); + text.remove_prefix(pos + 1); } } @@ -460,7 +461,7 @@ void skip_line(std::string_view &text) { void skip_ignored(std::string_view &text) { skip_whitespace(text); while (!text.empty() && text[0] == '#') { - skip_line(text); // remove comment + skip_line(text); // remove comment skip_whitespace(text); } } @@ -471,7 +472,8 @@ void skip_string(std::string_view &text) { while (!text.empty() && text[0] == '\"') { size_t end = 1; while (end < text.size() && text[end] != '\"') { - if (text[end] == '\\') ++end; // escaped character + if (text[end] == '\\') + ++end; // escaped character ++end; } text.remove_prefix(end + 1); @@ -481,14 +483,17 @@ void skip_string(std::string_view &text) { void skip_complex(std::string_view &text, char bra, char ket) { - if (text.empty()) return; + if (text.empty()) + return; size_t open_bracket = 0; do { if (text[0] == '\"') { skip_string(text); } else { - if (text[0] == bra) ++open_bracket; - else if (text[0] == ket) --open_bracket; + if (text[0] == bra) + ++open_bracket; + else if (text[0] == ket) + --open_bracket; text.remove_prefix(1); } skip_ignored(text); @@ -498,7 +503,8 @@ void skip_complex(std::string_view &text, char bra, char ket) { std::optional parse_int(std::string_view &text) { skip_ignored(text); - if (text.empty()) return {}; + if (text.empty()) + return {}; bool negative = false; if (text[0] == '-') { negative = true; @@ -506,7 +512,8 @@ std::optional parse_int(std::string_view &text) { skip_ignored(text); } size_t end = 0; - while (end < text.size() && !(is_whitespace(text[end]) || is_sep(text[end]))) ++end; + while (end < text.size() && !(is_whitespace(text[end]) || is_sep(text[end]))) + ++end; try { std::string value(text.substr(0, end)); int64_t v = std::stoll(value, nullptr, 0); @@ -525,7 +532,7 @@ std::optional ReadMBSFromPBtxt(std::string_view pb_txt) { pb_txt.remove_prefix(field_name.size()); skip_ignored(pb_txt); if (pb_txt[0] == ':') { - pb_txt.remove_prefix(1); // remove : + pb_txt.remove_prefix(1); // remove : return parse_int(pb_txt); } else { // scalar field name has to be followed by a colon diff --git a/src/config_tools/config_tools.h b/src/config_tools/config_tools.h index 6e8562a2..e9124e06 100644 --- a/src/config_tools/config_tools.h +++ b/src/config_tools/config_tools.h @@ -40,12 +40,9 @@ struct IOConfig { IOConfig() = default; - explicit IOConfig(const std::string &name, - dali_data_type_t dtype = DALI_NO_TYPE, - std::optional> shape = {}) - : name(name) - , dtype(dtype) - , shape(shape) {} + explicit IOConfig(const std::string &name, dali_data_type_t dtype = DALI_NO_TYPE, + std::optional> shape = {}) : + name(name), dtype(dtype), shape(shape) {} }; /** @@ -84,8 +81,7 @@ std::vector ReadShape(TritonJson::Value &dims_array); * * Throws an error when shapes cannot be matched. */ -std::vector MatchShapes(const std::string &name, - const std::vector &config_shape, +std::vector MatchShapes(const std::string &name, const std::vector &config_shape, const std::vector &pipeline_shape); @@ -111,13 +107,12 @@ void ValidateDtypeConfig(TritonJson::Value &io_object, const std::string &name, * to extend it with the batch dimension. */ void AutofillShapeConfig(TritonJson::Value &config, TritonJson::Value &config_io, - const std::vector &model_io_shape, - bool batched_model = true); + const std::vector &model_io_shape, bool batched_model = true); /** * @brief Validates dims field in IO object again provided value. * -* If `batched_model` is set to false, the model_io_shape will be prepended with -1 + * If `batched_model` is set to false, the model_io_shape will be prepended with -1 * to extend it with the batch dimension. */ void ValidateShapeConfig(TritonJson::Value &io_object, const std::string &name, @@ -128,7 +123,7 @@ void ValidateShapeConfig(TritonJson::Value &io_object, const std::string &name, * @brief Auto-fills `config_io` IO object with values from model IO configuration `model_io`. * `config` must be a top-level TritonJson object containing `config_io`. * -* If `batched_model` is set to false, the model_io_shape will be prepended with -1 + * If `batched_model` is set to false, the model_io_shape will be prepended with -1 * to extend it with the batch dimension. */ void AutofillIOConfig(TritonJson::Value &config, TritonJson::Value &config_io, @@ -209,6 +204,6 @@ void ValidateConfig(TritonJson::Value &config, const std::vector &in_c */ std::optional ReadMBSFromPBtxt(std::string_view pb_txt); -}}} // namespace triton::backend::dali +}}} // namespace triton::backend::dali #endif // DALI_BACKEND_CONFIG_TOOLS_CONFIG_TOOLS_H_ diff --git a/src/config_tools/config_tools.test.cc b/src/config_tools/config_tools.test.cc index 3880824b..bdeb4f14 100644 --- a/src/config_tools/config_tools.test.cc +++ b/src/config_tools/config_tools.test.cc @@ -30,7 +30,8 @@ namespace triton { namespace backend { namespace dali { namespace test { using Catch::Matchers::Contains; -static void CheckIOConfigEquals(TritonJson::Value &io, IOConfig io_config, bool compare_names = true) { +static void CheckIOConfigEquals(TritonJson::Value &io, IOConfig io_config, + bool compare_names = true) { CHECK(io.AssertType(TritonJson::ValueType::OBJECT) == TRITONJSON_STATUSSUCCESS); if (compare_names) { @@ -86,9 +87,8 @@ TEST_CASE("IO config validation") { TritonJson::Value io_config; TRITON_CALL(io_config.Parse(std::string(R"json({ "name": "io0", - "dims": )json") + - (batched_model ? "[3, 2, 1]," : "[-1, 3, 2, 1],") + - R"json("data_type": "TYPE_FP32" + "dims": )json") + (batched_model ? "[3, 2, 1]," : "[-1, 3, 2, 1],") + + R"json("data_type": "TYPE_FP32" })json")); SECTION("Matching config") { @@ -99,23 +99,26 @@ TEST_CASE("IO config validation") { SECTION("Mismatching dtype") { REQUIRE_THROWS_WITH( - ValidateIOConfig(io_config, IOConfig("io0", DALI_INT32, {{3, 2, 1}}), batched_model), - Contains("Data type defined in config: TYPE_FP32") && - Contains("Data type defined in pipeline: TYPE_INT32")); + ValidateIOConfig(io_config, IOConfig("io0", DALI_INT32, {{3, 2, 1}}), batched_model), + Contains("Data type defined in config: TYPE_FP32") && + Contains("Data type defined in pipeline: TYPE_INT32")); } SECTION("Mismatching ndims") { REQUIRE_THROWS_WITH( - ValidateIOConfig(io_config, IOConfig("io0", DALI_FLOAT, {{-1, -1, -1, -1}}), batched_model), - Contains(make_string("Number of dimensions defined in config: ", batched_model ? 3 : 4)) && - Contains(make_string("Number of dimensions defined in pipeline: ", batched_model ? 4 : 5))); + ValidateIOConfig(io_config, IOConfig("io0", DALI_FLOAT, {{-1, -1, -1, -1}}), batched_model), + Contains(make_string("Number of dimensions defined in config: ", batched_model ? 3 : 4)) && + Contains( + make_string("Number of dimensions defined in pipeline: ", batched_model ? 4 : 5))); } SECTION("Mismatching shapes") { REQUIRE_THROWS_WITH( - ValidateIOConfig(io_config, IOConfig("io0", DALI_FLOAT, {{3, 2, 2}}), batched_model), - Contains(make_string("Dims defined in config: {", batched_model ? "" : "-1, ", "3, 2, 1}")) && - Contains(make_string("Dims defined in pipeline: {", batched_model ? "" : "-1, ", "3, 2, 2}"))); + ValidateIOConfig(io_config, IOConfig("io0", DALI_FLOAT, {{3, 2, 2}}), batched_model), + Contains( + make_string("Dims defined in config: {", batched_model ? "" : "-1, ", "3, 2, 1}")) && + Contains(make_string("Dims defined in pipeline: {", batched_model ? "" : "-1, ", + "3, 2, 2}"))); } } @@ -136,22 +139,17 @@ TEST_CASE("Inputs validation") { ])json")); SECTION("Correct config") { - std::vector ios_config = { - IOConfig("i1", DALI_FLOAT, {{3, 2, 3}}), - IOConfig("i2", DALI_FLOAT16, {{1, 1}}) - }; + std::vector ios_config = {IOConfig("i1", DALI_FLOAT, {{3, 2, 3}}), + IOConfig("i2", DALI_FLOAT16, {{1, 1}})}; ValidateInputs(ios, ios_config); } SECTION("Missing input") { - std::vector ios_config = { - IOConfig("i1", DALI_FLOAT, {{3, 2, 3}}), - IOConfig("i2", DALI_FLOAT16, {{1, 1}}), - IOConfig("i3", DALI_UINT16, {{1}}) - }; + std::vector ios_config = {IOConfig("i1", DALI_FLOAT, {{3, 2, 3}}), + IOConfig("i2", DALI_FLOAT16, {{1, 1}}), + IOConfig("i3", DALI_UINT16, {{1}})}; - REQUIRE_THROWS_WITH(ValidateInputs(ios, ios_config), - Contains("Missing config for \"i3\"")); + REQUIRE_THROWS_WITH(ValidateInputs(ios, ios_config), Contains("Missing config for \"i3\"")); } } @@ -172,19 +170,15 @@ TEST_CASE("Outputs validation") { ])json")); SECTION("Correct config") { - std::vector ios_config = { - IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}), - IOConfig("Pipe_o2", DALI_FLOAT16, {{1, 1}}) - }; + std::vector ios_config = {IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}), + IOConfig("Pipe_o2", DALI_FLOAT16, {{1, 1}})}; ValidateOutputs(ios, ios_config); } SECTION("Missing output") { - std::vector ios_config = { - IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}), - IOConfig("Pipe_o2", DALI_FLOAT16, {{1, 1}}), - IOConfig("Pipe_o3", DALI_UINT16, {{1}}) - }; + std::vector ios_config = {IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}), + IOConfig("Pipe_o2", DALI_FLOAT16, {{1, 1}}), + IOConfig("Pipe_o3", DALI_UINT16, {{1}})}; REQUIRE_THROWS_WITH(ValidateOutputs(ios, ios_config), Contains("The number of outputs specified in the DALI pipeline and the" @@ -282,21 +276,18 @@ TEST_CASE("Inputs auto-config") { ])json")); SECTION("Inputs auto-config") { - std::vector model_ins = { - IOConfig("i1", DALI_FLOAT, {{3, 2, 3}}), - IOConfig("i2", DALI_FLOAT16, {{5, 5}}), - IOConfig("i3", DALI_UINT16, {{4}}) - }; + std::vector model_ins = {IOConfig("i1", DALI_FLOAT, {{3, 2, 3}}), + IOConfig("i2", DALI_FLOAT16, {{5, 5}}), + IOConfig("i3", DALI_UINT16, {{4}})}; AutofillInputsConfig(ios, ios, model_ins); - for (auto &model_in: model_ins) { + for (auto &model_in : model_ins) { TritonJson::Value inp_object; REQUIRE(FindObjectByName(ios, model_in.name, &inp_object)); bool ragged_batches; - REQUIRE( - inp_object.MemberAsBool("allow_ragged_batch", &ragged_batches) == TRITONJSON_STATUSSUCCESS - ); + REQUIRE(inp_object.MemberAsBool("allow_ragged_batch", &ragged_batches) == + TRITONJSON_STATUSSUCCESS); REQUIRE(ragged_batches); CheckIOConfigEquals(inp_object, model_in); } @@ -305,9 +296,9 @@ TEST_CASE("Inputs auto-config") { SECTION("Inputs auto-config, reordered") { std::vector model_ins = { - IOConfig("i0", DALI_INT32, {{-1, -1}}), - IOConfig("i2", DALI_FLOAT16, {{5, 5}}), - IOConfig("i1", DALI_FLOAT, {{3, 2, 3}}), + IOConfig("i0", DALI_INT32, {{-1, -1}}), + IOConfig("i2", DALI_FLOAT16, {{5, 5}}), + IOConfig("i1", DALI_FLOAT, {{3, 2, 3}}), }; AutofillInputsConfig(ios, ios, model_ins); @@ -316,23 +307,20 @@ TEST_CASE("Inputs auto-config") { TritonJson::Value inp_object; REQUIRE(ios.IndexAsObject(0, &inp_object) == TRITONJSON_STATUSSUCCESS); bool ragged_batches; - REQUIRE( - inp_object.MemberAsBool("allow_ragged_batch", &ragged_batches) == TRITONJSON_STATUSSUCCESS - ); + REQUIRE(inp_object.MemberAsBool("allow_ragged_batch", &ragged_batches) == + TRITONJSON_STATUSSUCCESS); REQUIRE(ragged_batches); CheckIOConfigEquals(inp_object, IOConfig("i1", DALI_FLOAT, {{3, 2, 3}})); REQUIRE(ios.IndexAsObject(1, &inp_object) == TRITONJSON_STATUSSUCCESS); - REQUIRE( - inp_object.MemberAsBool("allow_ragged_batch", &ragged_batches) == TRITONJSON_STATUSSUCCESS - ); + REQUIRE(inp_object.MemberAsBool("allow_ragged_batch", &ragged_batches) == + TRITONJSON_STATUSSUCCESS); REQUIRE(ragged_batches); CheckIOConfigEquals(inp_object, IOConfig("i2", DALI_FLOAT16, {{5, 5}})); REQUIRE(ios.IndexAsObject(2, &inp_object) == TRITONJSON_STATUSSUCCESS); - REQUIRE( - inp_object.MemberAsBool("allow_ragged_batch", &ragged_batches) == TRITONJSON_STATUSSUCCESS - ); + REQUIRE(inp_object.MemberAsBool("allow_ragged_batch", &ragged_batches) == + TRITONJSON_STATUSSUCCESS); REQUIRE(ragged_batches); CheckIOConfigEquals(inp_object, IOConfig("i0", DALI_INT32, {{-1, -1}})); } @@ -353,11 +341,9 @@ TEST_CASE("Outputs auto-config") { } ])json")); - std::vector model_outs = { - IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}), - IOConfig("Pipe_o2", DALI_FLOAT16, {{5, 5}}), - IOConfig("Pipe_o3", DALI_UINT16, {{4}}) - }; + std::vector model_outs = {IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}), + IOConfig("Pipe_o2", DALI_FLOAT16, {{5, 5}}), + IOConfig("Pipe_o3", DALI_UINT16, {{4}})}; SECTION("Outputs auto-config") { @@ -403,16 +389,12 @@ TEST_CASE("Autofill config") { ] })json")); - std::vector model_ins = { - IOConfig("i1", DALI_FLOAT16, {{3, 2, 1}}), - IOConfig("i2", DALI_NO_TYPE, {{-1, 3, 3}}), - IOConfig("i3", DALI_INT32, {{1, 1, 1}}) - }; + std::vector model_ins = {IOConfig("i1", DALI_FLOAT16, {{3, 2, 1}}), + IOConfig("i2", DALI_NO_TYPE, {{-1, 3, 3}}), + IOConfig("i3", DALI_INT32, {{1, 1, 1}})}; - std::vector model_outs = { - IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}), - IOConfig("o2", DALI_INT32, {{-1, -1}}) - }; + std::vector model_outs = {IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}), + IOConfig("o2", DALI_INT32, {{-1, -1}})}; std::string expected_config = R"json({ "input": [ @@ -501,16 +483,12 @@ TEST_CASE("Autofill config [unbatched]") { ] })json")); - std::vector model_ins = { - IOConfig("i1", DALI_FLOAT16, {{3, 2, 1}}), - IOConfig("i2", DALI_NO_TYPE, {{-1, 3, 3}}), - IOConfig("i3", DALI_INT32, {{1, 1, 1}}) - }; + std::vector model_ins = {IOConfig("i1", DALI_FLOAT16, {{3, 2, 1}}), + IOConfig("i2", DALI_NO_TYPE, {{-1, 3, 3}}), + IOConfig("i3", DALI_INT32, {{1, 1, 1}})}; - std::vector model_outs = { - IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}), - IOConfig("o2", DALI_INT32, {{-1, -1}}) - }; + std::vector model_outs = {IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}), + IOConfig("o2", DALI_INT32, {{-1, -1}})}; std::string expected_config = R"json({ "input": [ @@ -580,13 +558,9 @@ TEST_CASE("Autofill config [unbatched]") { TEST_CASE("Validate config") { - std::vector ins_config = { - IOConfig("i1", DALI_FLOAT16, {{3, 2, 1}}) - }; + std::vector ins_config = {IOConfig("i1", DALI_FLOAT16, {{3, 2, 1}})}; - std::vector outs_config = { - IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}}) - }; + std::vector outs_config = {IOConfig("Pipe_o1", DALI_FLOAT, {{3, 2, 3}})}; SECTION("correct config") { TritonJson::Value config(TritonJson::ValueType::OBJECT); diff --git a/src/dali_backend.cc b/src/dali_backend.cc index 22fe80b9..4f14431a 100644 --- a/src/dali_backend.cc +++ b/src/dali_backend.cc @@ -243,7 +243,9 @@ TRITONSERVER_Error* TRITONBACKEND_ModelInstanceExecute(TRITONBACKEND_ModelInstan try { dali_instance->Execute(requests); - } catch (TritonError& err) { return err.release(); } + } catch (TritonError& err) { + return err.release(); + } return nullptr; } diff --git a/src/dali_executor/dali_executor.cc b/src/dali_executor/dali_executor.cc index ad8b4e34..275f1ca0 100644 --- a/src/dali_executor/dali_executor.cc +++ b/src/dali_executor/dali_executor.cc @@ -134,23 +134,24 @@ bool DaliExecutor::IsNoCopy(device_type_t es_device, const IDescr& input) { } -static bool streq(const char * lhs, const char * rhs) { +static bool streq(const char* lhs, const char* rhs) { return strcmp(lhs, rhs) == 0; } bool DaliExecutor::IsInputConsumed() { - for (auto &name : input_names_) { + for (auto& name : input_names_) { auto trace = pipeline_.TryGetOperatorTrace(name, "depleted"); if (!trace.has_value()) { - throw std::logic_error(make_string("DALI internal error: \"depleted\" trace not found for input \"" , - name ,"\". It must be defined by all input operators.")); + throw std::logic_error( + make_string("DALI internal error: \"depleted\" trace not found for input \"", name, + "\". It must be defined by all input operators.")); } if (streq(trace->c_str(), "true")) { return true; } } - for (auto &name : input_names_) { + for (auto& name : input_names_) { auto trace = pipeline_.TryGetOperatorTrace(name, "next_output_data_id"); if (trace.has_value() && *trace != request_id_.str()) { return true; diff --git a/src/dali_executor/dali_executor.h b/src/dali_executor/dali_executor.h index 995d7f06..ce75971d 100644 --- a/src/dali_executor/dali_executor.h +++ b/src/dali_executor/dali_executor.h @@ -45,7 +45,8 @@ class DaliExecutor { public: DaliExecutor(DaliPipeline pipeline) : pipeline_(std::move(pipeline)), - thread_pool_(GetNumThreads(), pipeline_.DeviceId(), false, "[DALI Backend][Executor ThreadPool]") {} + thread_pool_(GetNumThreads(), pipeline_.DeviceId(), false, + "[DALI Backend][Executor ThreadPool]") {} /** * @brief Run DALI pipeline. @@ -110,8 +111,8 @@ class DaliExecutor { /** * @brief Checks if current input has been consumed by current iteration. * - * When input has been consumed, the Backend shall wrap up current request. Also, it is necessary to provide - * data with the next request for the next DALI iteration. + * When input has been consumed, the Backend shall wrap up current request. Also, it is necessary + * to provide data with the next request for the next DALI iteration. * * @return True, if input has been consumed. */ diff --git a/src/dali_executor/dali_pipeline.cc b/src/dali_executor/dali_pipeline.cc index c654a8f3..fc2bde59 100644 --- a/src/dali_executor/dali_pipeline.cc +++ b/src/dali_executor/dali_pipeline.cc @@ -55,7 +55,7 @@ std::vector> DaliPipeline::GetOutputShapes() { void DaliPipeline::SetInput(const void* data_ptr, const char* name, device_type_t source_device, dali_data_type_t data_type, span inputs_shapes, - int sample_ndims, const char *data_id, bool force_no_copy) { + int sample_ndims, const char* data_id, bool force_no_copy) { ENFORCE(inputs_shapes.size() % sample_ndims == 0, "Incorrect inputs shapes or sample ndims"); int batch_size = inputs_shapes.size() / sample_ndims; unsigned int flags = DALI_ext_default; @@ -65,7 +65,7 @@ void DaliPipeline::SetInput(const void* data_ptr, const char* name, device_type_ if (data_id) { daliSetExternalInputDataId(&handle_, name, data_id); } - const char *layout = daliGetExternalInputLayout(&handle_, name); + const char* layout = daliGetExternalInputLayout(&handle_, name); daliSetExternalInputBatchSize(&handle_, name, batch_size); daliSetExternalInput(&handle_, name, source_device, data_ptr, data_type, inputs_shapes.data(), sample_ndims, layout, flags); @@ -84,8 +84,8 @@ void DaliPipeline::SetInput(const IDescr& io_descr, std::optional DaliPipeline::ListInputs() { return result; } -std::optional> DaliPipeline::GetInputShape(const std::string &name) { +std::optional> DaliPipeline::GetInputShape(const std::string& name) { int ndim = daliGetExternalInputNdim(&handle_, name.c_str()); if (ndim >= 0) { return std::vector(ndim, -1); @@ -119,7 +119,7 @@ std::optional> DaliPipeline::GetInputShape(const std::strin } } -dali_data_type_t DaliPipeline::GetInputType(const std::string &name) { +dali_data_type_t DaliPipeline::GetInputType(const std::string& name) { return daliGetExternalInputType(&handle_, name.c_str()); } diff --git a/src/dali_executor/dali_pipeline.h b/src/dali_executor/dali_pipeline.h index eba782cb..5ed6823c 100644 --- a/src/dali_executor/dali_pipeline.h +++ b/src/dali_executor/dali_pipeline.h @@ -24,9 +24,9 @@ #define DALI_BACKEND_DALI_EXECUTOR_DALI_PIPELINE_H_ #include +#include #include #include -#include #include "src/dali_executor/io_descriptor.h" #include "src/dali_executor/utils/dali.h" @@ -119,13 +119,14 @@ class DaliPipeline { void SetInput(const void* data_ptr, const char* name, device_type_t source_device, dali_data_type_t data_type, span inputs_shapes, int sample_ndims, - const char *data_id, bool force_no_copy = true); + const char* data_id, bool force_no_copy = true); void SetInput(const void* ptr, const char* name, device_type_t source_device, dali_data_type_t data_type, TensorListShape<> input_shape, std::optional data_id = {}, bool force_no_copy = true); - void SetInput(const IDescr& io_descr, std::optional data_id = {}, bool force_no_copy = true); + void SetInput(const IDescr& io_descr, std::optional data_id = {}, + bool force_no_copy = true); void PutOutput(void* destination, int output_idx, device_type_t destination_device); @@ -137,12 +138,12 @@ class DaliPipeline { /** * @brief Get declared expected shape of the input with a given name. */ - std::optional> GetInputShape(const std::string &name); + std::optional> GetInputShape(const std::string& name); /** * @brief Get declared exptect data type of the input with a given name. */ - dali_data_type_t GetInputType(const std::string &name); + dali_data_type_t GetInputType(const std::string& name); /** * @brief Get name of the pipeline output with a given id. @@ -230,7 +231,8 @@ class DaliPipeline { void ReleasePipeline() { - if (!handle_) return; + if (!handle_) + return; daliDeletePipeline(&handle_); handle_ = nullptr; } diff --git a/src/dali_executor/io_descriptor.h b/src/dali_executor/io_descriptor.h index 75fde6c2..2b8872bf 100644 --- a/src/dali_executor/io_descriptor.h +++ b/src/dali_executor/io_descriptor.h @@ -40,7 +40,7 @@ struct BufferDescr { BufferDescr(BufferDescr other) : device(other.device), device_id(other.device_id), data(other.data), size(other.size) {} - BufferDescr() {}; + BufferDescr(){}; }; using IBufferDescr = BufferDescr; @@ -51,7 +51,7 @@ struct IOMeta { dali_data_type_t type{}; TensorListShape<> shape{}; - IOMeta(const IOMeta &other): name(other.name), type(other.type), shape(other.shape) {} + IOMeta(const IOMeta &other) : name(other.name), type(other.type), shape(other.shape) {} IOMeta &operator=(IOMeta &&rhs) { if (this != &rhs) { @@ -77,27 +77,28 @@ struct IODescr { /** * @brief Moves and appends buffers from the second descriptor and adjusts * the shape of this descriptor. - */ + */ void append(IODescr &&other) { if (meta.shape.num_samples() == 0) { meta = std::move(other.meta); } else { ENFORCE(meta.name == other.meta.name, - make_string("Cannot append IOs with different names. Expected name: ", - meta.name, ", got: ", other.meta.name)); + make_string("Cannot append IOs with different names. Expected name: ", meta.name, + ", got: ", other.meta.name)); ENFORCE(meta.type == other.meta.type, - make_string("Cannot append IOs with different types. For IO ", - meta.name, " the expected type is ", meta.type, ", got ", other.meta.type)); + make_string("Cannot append IOs with different types. For IO ", meta.name, + " the expected type is ", meta.type, ", got ", other.meta.type)); meta.shape.append(other.meta.shape); } - for (auto &buffer: other.buffers) { + for (auto &buffer : other.buffers) { buffers.push_back(std::move(buffer)); } } - IODescr(const IOMeta &meta, const std::vector> &buffers): meta(meta), buffers(buffers) {} + IODescr(const IOMeta &meta, const std::vector> &buffers) : + meta(meta), buffers(buffers) {} - IODescr(const IODescr &other): meta(other.meta), buffers(other.buffers) {} + IODescr(const IODescr &other) : meta(other.meta), buffers(other.buffers) {} IODescr &operator=(IODescr &&rhs) { if (this != &rhs) { diff --git a/src/dali_executor/utils/dali.h b/src/dali_executor/utils/dali.h index 051293da..0fd44033 100644 --- a/src/dali_executor/utils/dali.h +++ b/src/dali_executor/utils/dali.h @@ -42,6 +42,7 @@ using ::dali::copyD2D; using ::dali::copyD2H; using ::dali::copyH2D; using ::dali::copyH2H; +using ::dali::CPU_ONLY_DEVICE_ID; using ::dali::CUDAStream; using ::dali::DALIException; using ::dali::DeviceBuffer; @@ -55,7 +56,6 @@ using ::dali::TensorShape; using ::dali::ThreadPool; using ::dali::UniqueHandle; using ::dali::volume; -using ::dali::CPU_ONLY_DEVICE_ID; inline int64_t dali_type_size(dali_data_type_t type) { diff --git a/src/dali_executor/utils/utils.h b/src/dali_executor/utils/utils.h index 06a5156c..bca818c2 100644 --- a/src/dali_executor/utils/utils.h +++ b/src/dali_executor/utils/utils.h @@ -154,7 +154,7 @@ struct TimeRange { static const uint32_t kPantyPink = 0xBD8BC3; - explicit TimeRange(const std::string& name, const uint32_t rgb = kPantyPink) { + explicit TimeRange(const std::string &name, const uint32_t rgb = kPantyPink) { nvtxEventAttributes_t att = {}; att.version = NVTX_VERSION; att.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; diff --git a/src/dali_model.cc b/src/dali_model.cc index 2f8a4b3e..b497c11e 100644 --- a/src/dali_model.cc +++ b/src/dali_model.cc @@ -32,7 +32,7 @@ TRITONSERVER_Error* DaliModel::Create(TRITONBACKEND_Model* triton_model, DaliMod return e.release(); } catch (DALIException& e) { return TritonError::Unknown(make_string("Error while instantiating DALI pipeline: ", e.what())) - .release(); + .release(); } catch (const std::exception& e) { return TritonError::Unknown(make_string("DALI Backend error: ", e.what())).release(); } diff --git a/src/dali_model.h b/src/dali_model.h index b1360161..127cb24d 100644 --- a/src/dali_model.h +++ b/src/dali_model.h @@ -23,13 +23,13 @@ #ifndef DALI_BACKEND_DALI_MODEL_H_ #define DALI_BACKEND_DALI_MODEL_H_ +#include "src/config_tools/config_tools.h" #include "src/dali_executor/dali_pipeline.h" #include "src/dali_executor/utils/dali.h" #include "src/model_provider/model_provider.h" #include "src/parameters.h" #include "src/utils/triton.h" #include "src/utils/utils.h" -#include "src/config_tools/config_tools.h" #include "triton/backend/backend_common.h" #include "triton/backend/backend_model.h" @@ -49,7 +49,7 @@ class DaliModel : public ::triton::backend::BackendModel { (std::string("model configuration:\n") + buffer.Contents()).c_str()); try { ValidateConfig(model_config_, pipeline_inputs_, pipeline_outputs_, Batched()); - } catch (TritonError &err) { + } catch (TritonError& err) { return err.release(); } @@ -64,7 +64,7 @@ class DaliModel : public ::triton::backend::BackendModel { return params_; } - bool IsOutputSplit(const std::string &name) { + bool IsOutputSplit(const std::string& name) { auto it = std::find(outputs_to_split_.begin(), outputs_to_split_.end(), name); return it != outputs_to_split_.end(); } @@ -93,17 +93,17 @@ class DaliModel : public ::triton::backend::BackendModel { TRITONSERVER_Error* AutoCompleteConfig() { try { - AutofillConfig(model_config_, pipeline_inputs_, pipeline_outputs_, - pipeline_max_batch_size_, Batched()); + AutofillConfig(model_config_, pipeline_inputs_, pipeline_outputs_, pipeline_max_batch_size_, + Batched()); TRITON_CALL(SetModelConfig()); - } catch (TritonError &err) { + } catch (TritonError& err) { return err.release(); } return nullptr; } bool Batched() const { - return !(config_max_batch_size_.has_value() && config_max_batch_size_ == 0) ; + return !(config_max_batch_size_.has_value() && config_max_batch_size_ == 0); } private: @@ -113,8 +113,7 @@ class DaliModel : public ::triton::backend::BackendModel { const char* model_repo_path; TRITONBACKEND_ArtifactType artifact_type; - TRITON_CALL( - TRITONBACKEND_ModelRepository(triton_model_, &artifact_type, &model_repo_path)); + TRITON_CALL(TRITONBACKEND_ModelRepository(triton_model_, &artifact_type, &model_repo_path)); std::string config_path = make_string(model_repo_path, sep, "config.pbtxt"); std::ifstream config_file(config_path); @@ -137,8 +136,7 @@ class DaliModel : public ::triton::backend::BackendModel { ReadPipelineProperties(); TRITON_CALL( - TRITONBACKEND_ModelAutoCompleteConfig(triton_model_, - &should_auto_complete_config_)); + TRITONBACKEND_ModelAutoCompleteConfig(triton_model_, &should_auto_complete_config_)); } void ReadParams() { @@ -254,7 +252,7 @@ class DaliModel : public ::triton::backend::BackendModel { // This method tries to find any GPU instance group and select any device id from it // If there are no GPU inst. groups, it returns CPU_ONLY_DEVICE_ID - int ReadDeviceFromInstanceGroups(triton::common::TritonJson::Value &inst_groups) { + int ReadDeviceFromInstanceGroups(triton::common::TritonJson::Value& inst_groups) { TRITON_CALL(inst_groups.AssertType(triton::common::TritonJson::ValueType::ARRAY)); auto count = inst_groups.ArraySize(); for (size_t i = 0; i < count; ++i) { @@ -287,7 +285,7 @@ class DaliModel : public ::triton::backend::BackendModel { auto name = input_names[i]; pipeline_inputs_[i].name = name; pipeline_inputs_[i].dtype = pipeline.GetInputType(name); - pipeline_inputs_[i].shape = pipeline.GetInputShape(name); + pipeline_inputs_[i].shape = pipeline.GetInputShape(name); } int num_outputs = pipeline.GetNumOutput(); @@ -306,10 +304,10 @@ class DaliModel : public ::triton::backend::BackendModel { DaliPipeline InstantiateDaliPipeline(int config_max_batch_size) { int device_id = FindDevice(); - const std::string &serialized_pipeline = GetModelProvider().GetModel(); + const std::string& serialized_pipeline = GetModelProvider().GetModel(); try { return DaliPipeline(serialized_pipeline, config_max_batch_size, 1, device_id); - } catch (const DALIException &) { + } catch (const DALIException&) { return DaliPipeline(serialized_pipeline, config_max_batch_size, 1, CPU_ONLY_DEVICE_ID); } } diff --git a/src/dali_model_instance.cc b/src/dali_model_instance.cc index f2e62896..c86601c2 100644 --- a/src/dali_model_instance.cc +++ b/src/dali_model_instance.cc @@ -96,7 +96,9 @@ void DaliModelInstance::ExecuteBatched(const std::vector& request TritonError error{}; try { proc_meta = ProcessRequests(requests, responses); - } catch (...) { error = ErrorHandler(); } + } catch (...) { + error = ErrorHandler(); + } for (auto& response : responses) { SendResponse(std::move(response), true, TritonError::Copy(error)); } @@ -109,7 +111,7 @@ void DaliModelInstance::ExecuteBatched(const std::vector& request void DaliModelInstance::ExecuteUnbatched(const std::vector& requests) { DeviceGuard dg(GetDaliDeviceId()); - for (auto &request : requests) { + for (auto& request : requests) { TimeInterval exec_interval{}; start_timer_ns(exec_interval); TritonError error{}; @@ -167,7 +169,7 @@ ProcessingMeta DaliModelInstance::ProcessRequests(const std::vector& r auto idescrs = GenerateInputs(request); reqs_batch_sizes[ri] = idescrs[0].meta.shape.num_samples(); if (ri == 0) { - for (auto &input: idescrs) { + for (auto& input : idescrs) { input_map[input.meta.name] = std::move(input); } } else { - for (auto &input: idescrs) { + for (auto& input : idescrs) { auto idescr = input_map.find(input.meta.name); ENFORCE(idescr != input_map.end(), "Got unexpected input with name " + input.meta.name); idescr->second.append(std::move(input)); @@ -225,7 +227,7 @@ InputsInfo DaliModelInstance::GenerateInputs(const std::vector& r return {inputs, reqs_batch_sizes}; } -std::vector DaliModelInstance::GenerateInputs(const TritonRequest &request) { +std::vector DaliModelInstance::GenerateInputs(const TritonRequest& request) { std::vector inputs(request.InputCount()); int num_samples = 0; for (uint32_t input_idx = 0; input_idx < request.InputCount(); ++input_idx) { @@ -233,7 +235,7 @@ std::vector DaliModelInstance::GenerateInputs(const TritonRequest &reque auto input_byte_size = input.ByteSize(); auto input_buffer_count = input.BufferCount(); auto meta = input.Meta(); - auto &idescr = inputs[input_idx]; + auto& idescr = inputs[input_idx]; for (uint32_t buffer_idx = 0; buffer_idx < input_buffer_count; ++buffer_idx) { auto buffer = input.GetBuffer(buffer_idx, device_type_t::CPU, GetDaliDeviceId()); idescr.buffers.push_back(std::move(buffer)); @@ -252,7 +254,7 @@ std::vector DaliModelInstance::GenerateInputs(const TritonRequest &reque void ValidateRequestedOutputs(const TritonRequest& request, const std::vector& outputs_info, - const std::unordered_map &output_order) { + const std::unordered_map& output_order) { uint32_t output_cnt = request.OutputCount(); ENFORCE(outputs_info.size() == output_cnt, make_string("Number of outputs expected by the requests (", output_cnt, @@ -260,8 +262,8 @@ void ValidateRequestedOutputs(const TritonRequest& request, outputs_info.size(), ").")); ENFORCE(output_cnt == output_order.size(), make_string("Number of outputs exptected by the requests (", output_cnt, - ") does not match the number of outputs in the config (", - output_order.size(), ").")); + ") does not match the number of outputs in the config (", output_order.size(), + ").")); } @@ -285,7 +287,7 @@ std::vector DaliModelInstance::AllocateOutputs( int output_idx = out_index.second; auto shapes = split_list_shape(outputs_info[output_idx].shape, batch_sizes); if (dali_model_->IsOutputSplit(name)) { - for (auto &shape: shapes) { + for (auto& shape : shapes) { shape = split_outer_dim(shape); } } @@ -304,8 +306,9 @@ std::vector DaliModelInstance::AllocateOutputs( return outputs; } -std::vector DaliModelInstance::AllocateOutputs(const TritonRequest &request, const TritonResponse &response, - const std::vector& outputs_info) { +std::vector DaliModelInstance::AllocateOutputs( + const TritonRequest& request, const TritonResponse& response, + const std::vector& outputs_info) { auto output_indices = dali_model_->GetOutputOrder(); ValidateRequestedOutputs(request, outputs_info, output_indices); std::vector outputs(request.OutputCount()); diff --git a/src/dali_model_instance.h b/src/dali_model_instance.h index b8591810..bbaacd76 100644 --- a/src/dali_model_instance.h +++ b/src/dali_model_instance.h @@ -60,7 +60,8 @@ class DaliModelInstance : public ::triton::backend::BackendModelInstance { BackendModelInstance(model, triton_model_instance), dali_model_(model) { auto serialized_pipeline = dali_model_->GetModelProvider().GetModel(); auto max_batch_size = dali_model_->MaxBatchSize(); - if (max_batch_size < 1) max_batch_size = -1; + if (max_batch_size < 1) + max_batch_size = -1; auto num_threads = dali_model_->GetModelParamters().GetNumThreads(); DaliPipeline pipeline(serialized_pipeline, max_batch_size, num_threads, GetDaliDeviceId()); dali_executor_ = std::make_unique(std::move(pipeline)); @@ -95,7 +96,7 @@ class DaliModelInstance : public ::triton::backend::BackendModelInstance { ProcessingMeta ProcessRequests(const std::vector& requests, const std::vector& responses); - TimeInterval ProcessRequest(const TritonRequest &request); + TimeInterval ProcessRequest(const TritonRequest& request); /** * @brief Generate descriptors of inputs provided by given \p requests @@ -104,7 +105,7 @@ class DaliModelInstance : public ::triton::backend::BackendModelInstance { InputsInfo GenerateInputs(const std::vector& requests); - std::vector GenerateInputs(const TritonRequest &request); + std::vector GenerateInputs(const TritonRequest& request); int32_t GetDaliDeviceId() { return !CudaStream() ? CPU_ONLY_DEVICE_ID : device_id_; @@ -121,13 +122,13 @@ class DaliModelInstance : public ::triton::backend::BackendModelInstance { const std::vector& batch_sizes, const std::vector& outputs_info); - std::vector AllocateOutputs(const TritonRequest &request, const TritonResponse &response, + std::vector AllocateOutputs(const TritonRequest& request, const TritonResponse& response, const std::vector& outputs_info); void ExecuteBatched(const std::vector& requests); - void ExecuteUnbatched(const std::vector &requests); + void ExecuteUnbatched(const std::vector& requests); std::unique_ptr dali_executor_; DaliModel* dali_model_; diff --git a/src/utils/triton.h b/src/utils/triton.h index 4cc6d902..169f7bee 100644 --- a/src/utils/triton.h +++ b/src/utils/triton.h @@ -124,14 +124,14 @@ class TritonError : public UniqueHandle, publ } static TritonError InvalidArg(const std::string &msg) { - auto err = TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code::TRITONSERVER_ERROR_INVALID_ARG, - msg.c_str()); + auto err = + TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code::TRITONSERVER_ERROR_INVALID_ARG, msg.c_str()); return TritonError(err); } static TritonError Internal(const std::string &msg) { - auto err = TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code::TRITONSERVER_ERROR_INTERNAL, - msg.c_str()); + auto err = + TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code::TRITONSERVER_ERROR_INTERNAL, msg.c_str()); return TritonError(err); } @@ -158,7 +158,7 @@ class TritonError : public UniqueHandle, publ TritonError &operator=(TRITONSERVER_Error *error) { if (handle_ != error) { - UniqueHandle::operator=(TritonError{error}); + UniqueHandle::operator=(TritonError{error}); } return *this; } diff --git a/src/utils/utils.h b/src/utils/utils.h index 9d2fd230..1cd1966b 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -23,10 +23,10 @@ #ifndef DALI_BACKEND_UTILS_UTILS_H_ #define DALI_BACKEND_UTILS_UTILS_H_ -#include +#include #include +#include #include -#include #include @@ -73,9 +73,9 @@ inline std::string from_string(const std::string& str) { return str; } -template -std::string vec_to_string(const std::vector &vec, const std::string &lbracket = "{", - const std::string &rbracket = "}", const std::string &delim = ", ") { +template +std::string vec_to_string(const std::vector& vec, const std::string& lbracket = "{", + const std::string& rbracket = "}", const std::string& delim = ", ") { std::stringstream ss; ss << lbracket; auto it = vec.begin(); diff --git a/src/utils/utils.test.cc b/src/utils/utils.test.cc index 0c6bdf41..829ac359 100644 --- a/src/utils/utils.test.cc +++ b/src/utils/utils.test.cc @@ -99,11 +99,9 @@ TEST_CASE("Split string") { TEST_CASE("Split outer dim") { int bs = 3; - std::vector> sample_shapes{ - std::vector{1, 1, 2, 3}, - std::vector{2, 2, 3, 1}, - std::vector{3, 3, 2, 1} - }; + std::vector> sample_shapes{std::vector{1, 1, 2, 3}, + std::vector{2, 2, 3, 1}, + std::vector{3, 3, 2, 1}}; TensorListShape<> tlist_shape(sample_shapes); auto split = split_outer_dim(tlist_shape);