diff --git a/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/step.md b/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/step.md index 3519b7d6b..09bc2853d 100644 --- a/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/step.md +++ b/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/step.md @@ -50,6 +50,7 @@ Classes * pre_post_processing.steps.vision.ConvertImageToBGR * pre_post_processing.steps.vision.DrawBoundingBoxes * pre_post_processing.steps.vision.FloatToImageBytes + * pre_post_processing.steps.vision.Grayscale * pre_post_processing.steps.vision.ImageBytesToFloat * pre_post_processing.steps.vision.LetterBox * pre_post_processing.steps.vision.Normalize diff --git a/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/steps/vision.md b/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/steps/vision.md index 4871166d3..8a652578c 100644 --- a/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/steps/vision.md +++ b/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/steps/vision.md @@ -109,6 +109,22 @@ Classes * pre_post_processing.step.Step +`Grayscale(layout: str = 'BGR', name: Optional[str] = None)` +: Convert an image to grayscale. + + Input data can be uint8 or float. + + Input shape: {height, width, 3} + Output shape is the same. + + Args: + layout: Optional channel layout. "BGR" and "RGB" are supported. Defaults to "BGR". + name: Optional name of step. Defaults to 'Grayscale'. + + ### Ancestors (in MRO) + + * pre_post_processing.step.Step + `ImageBytesToFloat(rescale_factor: float = 0.00392156862745098, name: Optional[str] = None)` : Convert uint8 or float values in range 0..255 to floating point values in range 0..1 diff --git a/onnxruntime_extensions/tools/pre_post_processing/readme.md b/onnxruntime_extensions/tools/pre_post_processing/readme.md index 2ce1aa86d..fad4b3fd8 100644 --- a/onnxruntime_extensions/tools/pre_post_processing/readme.md +++ b/onnxruntime_extensions/tools/pre_post_processing/readme.md @@ -1,4 +1,4 @@ -Documentation was generated with pdoc3 (`pip install pdoc3`). +Documentation was generated with pdoc3 (`pip install pdoc3==0.10.0`). From the parent directory: `python -m pdoc pdoc pre_post_processing -o ./pre_post_processing/docs --filter pre_post_processing` diff --git a/onnxruntime_extensions/tools/pre_post_processing/steps/vision.py b/onnxruntime_extensions/tools/pre_post_processing/steps/vision.py index 4d0d7184a..dcb91709d 100644 --- a/onnxruntime_extensions/tools/pre_post_processing/steps/vision.py +++ b/onnxruntime_extensions/tools/pre_post_processing/steps/vision.py @@ -277,6 +277,62 @@ def _create_graph_for_step(self, graph: onnx.GraphProto, onnx_opset: int): # # Pre-processing # + +class Grayscale(Step): + """Convert an image to grayscale. + + Input data can be uint8 or float. + + Input shape: {height, width, 3} + Output shape is the same. + """ + + def __init__(self, layout: str = "BGR", name: Optional[str] = None): + """ + Args: + layout: Optional channel layout. "BGR" and "RGB" are supported. Defaults to "BGR". + name: Optional name of step. Defaults to 'Grayscale'. + """ + super().__init__(["image"], ["grayscale_image"], name) + assert layout == "RGB" or layout == "BGR" + self._layout = layout + + def _create_graph_for_step(self, graph: onnx.GraphProto, onnx_opset: int) -> onnx.GraphProto: + input_type_str, input_shape_str = self._get_input_type_and_shape_strs(graph, 0) + assert input_type_str == "uint8" or input_type_str == "float" + assert len(input_shape_str.split(",")) == 3 + + # do ITU-R 601-2 luma transform. Weights adapted from: + # see: https://docs.opencv.org/3.4/de/d25/imgproc_color_conversions.html + cm_str = "0.114,0.587,0.299" if self._layout == "BGR" else "0.299,0.587,0.114" + + input_name = self.input_names[0] + output_name = self.output_names[0] + + grayscaling_graph = onnx.parser.parse_graph( + f""" + grayscale ({input_type_str}[height, width, 3] {input_name}) + => (uint8[height, width, 3] {output_name}) + {{ + axes = Constant () + + # create a tensor with shape (1, 1, 3) for expanding along the channel dimension + expand_shape = Constant () + + const_node_b = Constant () + + # cast to float (some ops like Sum require floats) + X_float = Cast ({input_name}) + X_mult_b = Mul(const_node_b, X_float) + X_channel_gray = ReduceSum(X_mult_b, axes) + X_channel_cast = Cast (X_channel_gray) + {output_name} = Expand (X_channel_cast, expand_shape) + }} + """ + ) + return grayscaling_graph + + class Resize(Step): """ Resize input data. Aspect ratio is maintained. diff --git a/test/test_tools_add_pre_post_processing_to_model.py b/test/test_tools_add_pre_post_processing_to_model.py index 6d92f924a..bd09a6b5a 100644 --- a/test/test_tools_add_pre_post_processing_to_model.py +++ b/test/test_tools_add_pre_post_processing_to_model.py @@ -408,6 +408,53 @@ def test_qatask_with_tokenizer(self): self.assertEqual(result[0][0], ref_output[0][0]) + def _create_pipeline_and_run_for_grayscale(self, output_model: Path, layout: str = "RGB"): + import onnx + + graph_def = onnx.parser.parse_graph("""\ + identity (uint8[h,w,c] image_in) + => (uint8[h,w,c] image_out) + { + image_out = Identity(image_in) + } + """) + + onnx_opset = 16 + + onnx_import = onnx.helper.make_operatorsetid("", onnx_opset) + ir_version = onnx.helper.find_min_ir_version_for([onnx_import]) + input_model = onnx.helper.make_model_gen_version(graph_def, opset_imports=[onnx_import], ir_version=ir_version) + + create_named_value = pre_post_processing.utils.create_named_value + inputs = [ + create_named_value("image", onnx.TensorProto.UINT8, ["height", "width", 3]), + ] + pipeline = PrePostProcessor(inputs, onnx_opset) + pipeline.add_pre_processing([Grayscale(layout=layout)]) + + new_model = pipeline.run(input_model) + onnx.save_model(new_model, output_model) + + def test_grayscale_step_rgb(self): + output_model = (self.temp4onnx / "identity.onnx").resolve() + self._create_pipeline_and_run_for_grayscale(output_model, layout="RGB") + image = Image.open(Path(test_data_dir) / "../pineapple.jpg").convert("RGB") + + ort_sess = ort.InferenceSession(str(output_model), providers=["CPUExecutionProvider"]) + grayscaled_image = ort_sess.run( + None, + {"image": np.asarray(image)}, + )[0] + + # all channel values are the same + self.assertEqual((np.max(grayscaled_image, axis=-1) == np.min(grayscaled_image, axis=-1)).all(), True) + # assert onnxruntime-extensions gray scaling matches Pillows gray scaling + np.testing.assert_allclose( + np.repeat(np.array(image.convert("L"))[:, :, np.newaxis], 3, axis=2), + grayscaled_image, + atol=1, + ) + # Corner Case def test_debug_step(self): import onnx