Add ResNet preprocessing model (#594)

jantonguirao · web-flow · commit 9e7f179f83d8 · 2023-03-20T09:54:07.000-07:00
* Add ResNet preprocessing model

Signed-off-by: Joaquin Anton &lt;janton@nvidia.com&gt;

* Support sequence in tests

Signed-off-by: Joaquin Anton &lt;janton@nvidia.com&gt;

---------

Signed-off-by: Joaquin Anton &lt;janton@nvidia.com&gt;
diff --git a/ONNX_HUB_MANIFEST.json b/ONNX_HUB_MANIFEST.json
@@ -4683,6 +4683,46 @@
             "model_with_data_bytes": 95237476
         }
     },
+    {
+        "model": "ResNet-preproc",
+        "model_path": "vision/classification/resnet/preproc/resnet-preproc-v1-18.onnx",
+        "onnx_version": "1.13.1",
+        "opset_version": 18,
+        "metadata": {
+            "model_sha": "9cda24af90b4cd2ced4167fa36a41956ea0ce5e55c6ae475614a097cb89762c7",
+            "model_bytes": 1129,
+            "tags": [
+                "vision",
+                "classification",
+                "resnet",
+                "preprocessing"
+            ],
+            "io_ports": {
+                "inputs": [
+                    {
+                        "name": "images",
+                        "shape": [],
+                        "type": "seq(tensor(uint8))"
+                    }
+                ],
+                "outputs": [
+                    {
+                        "name": "preproc_data",
+                        "shape": [
+                            "B",
+                            3,
+                            224,
+                            224
+                        ],
+                        "type": "tensor(float)"
+                    }
+                ]
+            },
+            "model_with_data_path": "vision/classification/resnet/preproc/resnet-preproc-v1-18.tar.gz",
+            "model_with_data_sha": "216b89c1676c8a5a2dfc0ee1736b179b0777f9ba845ee6dd955d4ff684f29a3c",
+            "model_with_data_bytes": 883999
+        }
+    },
     {
         "model": "ShuffleNet-v1",
         "model_path": "vision/classification/shufflenet/model/shufflenet-3.onnx",
diff --git a/vision/classification/imagenet_preprocess.py b/vision/classification/imagenet_preprocess.py
@@ -1,10 +1,50 @@
 # SPDX-License-Identifier: Apache-2.0
 
+import numpy as np
+from PIL import Image
 import mxnet
 from mxnet.gluon.data.vision import transforms
 
+def preprocess(image):
+    # resize so that the shorter side is 256, maintaining aspect ratio
+    def image_resize(image, min_len):
+        image = Image.fromarray(image)
+        ratio = float(min_len) / min(image.size[0], image.size[1])
+        if image.size[0] > image.size[1]:
+            new_size = (int(round(ratio * image.size[0])), min_len)
+        else:
+            new_size = (min_len, int(round(ratio * image.size[1])))
+        image = image.resize(new_size, Image.BILINEAR)
+        return np.array(image)
+    image = image_resize(image, 256)
+
+    # Crop centered window 224x224
+    def crop_center(image, crop_w, crop_h):
+        h, w, c = image.shape
+        start_x = w//2 - crop_w//2
+        start_y = h//2 - crop_h//2
+        return image[start_y:start_y+crop_h, start_x:start_x+crop_w, :]
+    image = crop_center(image, 224, 224)
+
+    # transpose
+    image = image.transpose(2, 0, 1)
+
+    # convert the input data into the float32 input
+    img_data = image.astype('float32')
+
+    # normalize
+    mean_vec = np.array([0.485, 0.456, 0.406])
+    stddev_vec = np.array([0.229, 0.224, 0.225])
+    norm_img_data = np.zeros(img_data.shape).astype('float32')
+    for i in range(img_data.shape[0]):
+        norm_img_data[i,:,:] = (img_data[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]
+
+    # add batch channel
+    norm_img_data = norm_img_data.reshape(1, 3, 224, 224).astype('float32')
+    return norm_img_data
+
 # Pre-processing function for ImageNet models
-def preprocess(img):
+def preprocess_mxnet(img):
     '''
     Preprocessing required on the images for inference with mxnet gluon
     The function takes path to an image and returns processed tensor
diff --git a/vision/classification/resnet/README.md b/vision/classification/resnet/README.md
@@ -37,8 +37,8 @@ ResNet v2 uses pre-activation function whereas ResNet v1  uses post-activation f
 |ResNet50-qdq | [24.6 MB](model/resnet50-v1-12-qdq.onnx) | [16.8 MB](model/resnet50-v1-12-qdq.tar.gz) | 1.10.0 | 12 |74.43 | |
 > Compared with the fp32 ResNet50, int8 ResNet50's Top-1 accuracy drop ratio is 0.27%, Top-5 accuracy drop ratio is 0.01% and performance improvement is 1.82x.
 >
-> Note the performance depends on the test hardware. 
-> 
+> Note the performance depends on the test hardware.
+>
 > Performance data here is collected with Intel® Xeon® Platinum 8280 Processor, 1s 4c per instance, CentOS Linux 8.3, data batch size is 1.
 
 |Model        |Download  |Download (with sample test data)| ONNX version |Opset version|
@@ -68,24 +68,88 @@ All pre-trained models expect input images normalized in the same way, i.e. mini
 The inference was done using jpeg image.
 
 ### Preprocessing
-The images have to be loaded in to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225]. The transformation should preferably happen at preprocessing.
 
-The following code shows how to preprocess a NCHW tensor:
+The image needs to be preprocessed before fed to the network.
+The first step is to extract a 224x224 crop from the center of the image. For this, the image is first scaled to a minimum size of 256x256, while keeping aspect ratio. That is, the shortest side of the image is resized to 256 and the other side is scaled accordingly to maintain the original aspect ratio. After that, the image is normalized with mean = 255*[0.485, 0.456, 0.406] and std = 255*[0.229, 0.224, 0.225]. Last step is to transpose it from HWC to CHW layout.
+
+The described preprocessing steps can be represented with an ONNX model:
+```python
+import onnx
+from onnx import parser
+from onnx import checker
+
+resnet_preproc = parser.parse_model('''
+<
+  ir_version: 8,
+  opset_import: [ "" : 18, "local" : 1 ],
+  metadata_props: [ "preprocessing_fn" : "local.preprocess"]
+>
+resnet_preproc_g (seq(uint8[?, ?, 3]) images) => (float[B, 3, 224, 224] preproc_data)
+{
+    preproc_data = local.preprocess(images)
+}
+
+<
+  opset_import: [ "" : 18 ],
+  domain: "local",
+  doc_string: "Preprocessing function."
+>
+preprocess (input_batch) => (output_tensor) {
+    tmp_seq = SequenceMap <
+        body = sample_preprocessing(uint8[?, ?, 3] sample_in) => (float[3, 224, 224] sample_out) {
+            target_size = Constant <value = int64[2] {256, 256}> ()
+            image_resized = Resize <mode = \"linear\",
+                                    antialias = 1,
+                                    axes = [0, 1],
+                                    keep_aspect_ratio_policy = \"not_smaller\"> (sample_in, , , target_size)
+
+            target_crop = Constant <value = int64[2] {224, 224}> ()
+            image_sliced = CenterCropPad <axes = [0, 1]> (image_resized, target_crop)
+
+            kMean = Constant <value = float[3] {123.675, 116.28, 103.53}> ()
+            kStddev = Constant <value = float[3] {58.395, 57.12, 57.375}> ()
+            im_norm_tmp1 = Cast <to = 1> (image_sliced)
+            im_norm_tmp2 = Sub (im_norm_tmp1, kMean)
+            im_norm = Div (im_norm_tmp2, kStddev)
+
+            sample_out = Transpose <perm = [2, 0, 1]> (im_norm)
+        }
+    > (input_batch)
+    output_tensor = ConcatFromSequence < axis = 0, new_axis = 1 >(tmp_seq)
+}
+
+''')
+checker.check_model(resnet_preproc)
+```
+
+* ResNet preprocessing:
+
+|Model        |Download  |Download (with sample test data)| ONNX version |Opset version|
+|-------------|:--------------|:--------------|:--------------|:--------------|
+|ResNet-preproc| [4.0KB](preproc/resnet-preproc-v1-18.onnx)  |  [864 KB](preproc/resnet-preproc-v1-18.tar.gz) |  1.13.1 | 18|
 
+
+To prepend the data preprocessing steps to the model, we can use the ONNX compose utils:
 ```python
-import numpy
-
-def preprocess(img_data):
-    mean_vec = np.array([0.485, 0.456, 0.406])
-    stddev_vec = np.array([0.229, 0.224, 0.225])
-    norm_img_data = np.zeros(img_data.shape).astype('float32')
-    for i in range(img_data.shape[0]):
-         # for each pixel in each channel, divide the value by 255 to get value between [0, 1] and then normalize
-        norm_img_data[i,:,:] = (img_data[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]
-    return norm_img_data
+
+import onnx
+from onnx import version_converter
+from onnx import checker
+
+network_model = onnx.version_converter.convert_version(network_model, 18)
+network_model.ir_version = 8
+checker.check_model(network_model)
+
+model_w_preproc = onnx.compose.merge_models(
+    preprocessing_model, network_model,
+    io_map=[('preproc_data', 'data')]
+)
+checker.check_model(model_w_preproc)
+
 ```
 
-Check [imagenet_preprocess.py](../imagenet_preprocess.py) for additional sample code.
+
+Check [imagenet_preprocess.py](../imagenet_preprocess.py) for some reference Python and MxNet implementations.
 
 ### Output
 The model outputs image scores for each of the [1000 classes of ImageNet](../synset.txt).
@@ -113,7 +177,7 @@ We used MXNet as framework with gluon APIs to perform validation. Use the notebo
 ResNet50-int8 and ResNet50-qdq are obtained by quantizing ResNet50-fp32 model. We use [Intel® Neural Compressor](https://github.com/intel/neural-compressor) with onnxruntime backend to perform quantization. View the [instructions](https://github.com/intel/neural-compressor/blob/master/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq/README.md) to understand how to use Intel® Neural Compressor for quantization.
 
 ### Environment
-onnx: 1.7.0 
+onnx: 1.7.0
 onnxruntime: 1.6.0+
 
 ### Prepare model
@@ -153,6 +217,7 @@ In European Conference on Computer Vision, pp. 630-645. Springer, Cham, 2016.
 * [airMeng](https://github.com/airMeng) (Intel)
 * [ftian1](https://github.com/ftian1) (Intel)
 * [hshen14](https://github.com/hshen14) (Intel)
+* [jantonguirao](https://github.com/jantonguirao) (NVIDIA)
 
 ## License
 Apache 2.0
diff --git a/vision/classification/resnet/preproc/resnet-preproc-v1-18.onnx b/vision/classification/resnet/preproc/resnet-preproc-v1-18.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cda24af90b4cd2ced4167fa36a41956ea0ce5e55c6ae475614a097cb89762c7
+size 1129
diff --git a/vision/classification/resnet/preproc/resnet-preproc-v1-18.tar.gz b/vision/classification/resnet/preproc/resnet-preproc-v1-18.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:216b89c1676c8a5a2dfc0ee1736b179b0777f9ba845ee6dd955d4ff684f29a3c
+size 883999
diff --git a/workflow_scripts/generate_onnx_hub_manifest.py b/workflow_scripts/generate_onnx_hub_manifest.py
@@ -124,7 +124,11 @@ def get_file_info(row, field, target_models=None):
 def get_model_tags(row):
     source_dir = split(row["source_file"])[0]
     raw_tags = source_dir.split("/")
-    return [tag.replace("_", " ") for tag in raw_tags]
+    tags = [tag.replace("_", " ") for tag in raw_tags]
+    model_file = row['model_path'].contents[0].attrs["href"]
+    if 'preproc' in model_file.split("/"):
+        tags.append('preprocessing')
+    return tags
 
 
 def get_model_ports(source_file, metadata, model_name):
diff --git a/workflow_scripts/onnx_test_data_utils.py b/workflow_scripts/onnx_test_data_utils.py
@@ -11,15 +11,23 @@
 import numpy as np
 import onnx
 from onnx import numpy_helper
+from onnx.onnx_data_pb2 import SequenceProto
 
 
 def read_tensorproto_pb_file(filename):
     """Return tuple of tensor name and numpy.ndarray of the data from a pb file containing a TensorProto."""
-
     tensor = onnx.load_tensor(filename)
     np_array = numpy_helper.to_array(tensor)
     return tensor.name, np_array
 
+def read_sequenceproto_pb_file(filename):
+    """Return tuple of sequence name and list of numpy.ndarray of the data from a pb file containing a SequenceProto."""
+    seq = SequenceProto()
+    with open(filename, 'rb') as f:
+        seq.ParseFromString(f.read())
+    list_of_arrays = numpy_helper.to_list(seq)
+    return seq.name, list_of_arrays
+
 
 def dump_tensorproto_pb_file(filename):
     """Dump the data from a pb file containing a TensorProto."""
diff --git a/workflow_scripts/ort_test_dir_utils.py b/workflow_scripts/ort_test_dir_utils.py
@@ -157,7 +157,7 @@ def save_data(prefix, name_data_map, model_info):
     save_data("output", name_output_map, model_outputs)
 
 
-def read_test_dir(dir_name):
+def read_test_dir(dir_name, input_types, output_types):
     """
     Read the input and output .pb files from the provided directory.
     Input files should have a prefix of 'input_'
@@ -169,15 +169,22 @@ def read_test_dir(dir_name):
 
     inputs = {}
     outputs = {}
+
     input_files = glob.glob(os.path.join(dir_name, "input_*.pb"))
     output_files = glob.glob(os.path.join(dir_name, "output_*.pb"))
 
-    for i in input_files:
-        name, data = onnx_test_data_utils.read_tensorproto_pb_file(i)
+    for i, filename in enumerate(input_files):
+        if 'seq' in input_types[i]:
+            name, data = onnx_test_data_utils.read_sequenceproto_pb_file(filename)
+        else:
+            name, data = onnx_test_data_utils.read_tensorproto_pb_file(filename)
         inputs[name] = data
 
-    for o in output_files:
-        name, data = onnx_test_data_utils.read_tensorproto_pb_file(o)
+    for i, filename in enumerate(output_files):
+        if 'seq' in output_files[i]:
+            name, data = onnx_test_data_utils.read_sequenceproto_pb_file(filename)
+        else:
+            name, data = onnx_test_data_utils.read_tensorproto_pb_file(filename)
         outputs[name] = data
 
     return inputs, outputs
@@ -217,12 +224,14 @@ def run_test_dir(model_or_dir):
     test_dirs = [d for d in glob.glob(os.path.join(model_dir, "test*")) if os.path.isdir(d)]
     if not test_dirs:
         raise ValueError("No directories with name starting with 'test' were found in {}.".format(model_dir))
-
     sess = ort.InferenceSession(model_path)
 
+    input_types = [inp.type for inp in sess.get_inputs()]
+    output_types = [out.type for out in sess.get_outputs()]
+
     for d in test_dirs:
         print(d)
-        inputs, expected_outputs = read_test_dir(d)
+        inputs, expected_outputs = read_test_dir(d, input_types, output_types)
 
         if expected_outputs:
             output_names = list(expected_outputs.keys())

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:9cda24af90b4cd2ced4167fa36a41956ea0ce5e55c6ae475614a097cb89762c7`
	`3`	`+size 1129`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:216b89c1676c8a5a2dfc0ee1736b179b0777f9ba845ee6dd955d4ff684f29a3c`
	`3`	`+size 883999`