diff --git a/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/step.md b/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/step.md
index 3519b7d6b..09bc2853d 100644
--- a/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/step.md
+++ b/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/step.md
@@ -50,6 +50,7 @@ Classes
     * pre_post_processing.steps.vision.ConvertImageToBGR
     * pre_post_processing.steps.vision.DrawBoundingBoxes
     * pre_post_processing.steps.vision.FloatToImageBytes
+    * pre_post_processing.steps.vision.Grayscale
     * pre_post_processing.steps.vision.ImageBytesToFloat
     * pre_post_processing.steps.vision.LetterBox
     * pre_post_processing.steps.vision.Normalize
diff --git a/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/steps/vision.md b/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/steps/vision.md
index 4871166d3..8a652578c 100644
--- a/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/steps/vision.md
+++ b/onnxruntime_extensions/tools/pre_post_processing/docs/pre_post_processing/steps/vision.md
@@ -109,6 +109,22 @@ Classes
 
     * pre_post_processing.step.Step
 
+`Grayscale(layout: str = 'BGR', name: Optional[str] = None)`
+:   Convert an image to grayscale.
+    
+    Input data can be uint8 or float.
+    
+    Input shape: {height, width, 3}
+    Output shape is the same.
+    
+    Args:
+        layout: Optional channel layout. "BGR" and "RGB" are supported. Defaults to "BGR".
+        name: Optional name of step. Defaults to 'Grayscale'.
+
+    ### Ancestors (in MRO)
+
+    * pre_post_processing.step.Step
+
 `ImageBytesToFloat(rescale_factor: float = 0.00392156862745098, name: Optional[str] = None)`
 :   Convert uint8 or float values in range 0..255 to floating point values in range 0..1
     
diff --git a/onnxruntime_extensions/tools/pre_post_processing/readme.md b/onnxruntime_extensions/tools/pre_post_processing/readme.md
index 2ce1aa86d..fad4b3fd8 100644
--- a/onnxruntime_extensions/tools/pre_post_processing/readme.md
+++ b/onnxruntime_extensions/tools/pre_post_processing/readme.md
@@ -1,4 +1,4 @@
-Documentation was generated with pdoc3 (`pip install pdoc3`).
+Documentation was generated with pdoc3 (`pip install pdoc3==0.10.0`).
 From the parent directory:
   `python -m pdoc pdoc pre_post_processing -o ./pre_post_processing/docs --filter pre_post_processing`
 
diff --git a/onnxruntime_extensions/tools/pre_post_processing/steps/vision.py b/onnxruntime_extensions/tools/pre_post_processing/steps/vision.py
index 4d0d7184a..dcb91709d 100644
--- a/onnxruntime_extensions/tools/pre_post_processing/steps/vision.py
+++ b/onnxruntime_extensions/tools/pre_post_processing/steps/vision.py
@@ -277,6 +277,62 @@ def _create_graph_for_step(self, graph: onnx.GraphProto, onnx_opset: int):
 #
 # Pre-processing
 #
+
+class Grayscale(Step):
+    """Convert an image to grayscale.
+
+    Input data can be uint8 or float.
+
+    Input shape: {height, width, 3}
+    Output shape is the same.
+    """
+
+    def __init__(self, layout: str = "BGR", name: Optional[str] = None):
+        """
+        Args:
+            layout: Optional channel layout. "BGR" and "RGB" are supported. Defaults to "BGR".
+            name: Optional name of step. Defaults to 'Grayscale'.
+        """
+        super().__init__(["image"], ["grayscale_image"], name)
+        assert layout == "RGB" or layout == "BGR"
+        self._layout = layout
+
+    def _create_graph_for_step(self, graph: onnx.GraphProto, onnx_opset: int) -> onnx.GraphProto:
+        input_type_str, input_shape_str = self._get_input_type_and_shape_strs(graph, 0)
+        assert input_type_str == "uint8" or input_type_str == "float"
+        assert len(input_shape_str.split(",")) == 3
+
+        # do ITU-R 601-2 luma transform. Weights adapted from:
+        # see: https://docs.opencv.org/3.4/de/d25/imgproc_color_conversions.html
+        cm_str = "0.114,0.587,0.299" if self._layout == "BGR" else "0.299,0.587,0.114"
+
+        input_name = self.input_names[0]
+        output_name = self.output_names[0]
+
+        grayscaling_graph = onnx.parser.parse_graph(
+            f"""
+                grayscale ({input_type_str}[height, width, 3] {input_name})
+                    => (uint8[height, width, 3] {output_name})
+                {{
+                    axes = Constant <value = int64[1] {{2}}>()
+
+                    # create a tensor with shape (1, 1, 3) for expanding along the channel dimension
+                    expand_shape = Constant <value = int64[3] {{1, 1, 3}}>()
+
+                    const_node_b = Constant <value = float[3] {{ {cm_str} }}>()
+                    
+                    # cast to float (some ops like Sum require floats)
+                    X_float = Cast <to={onnx.TensorProto.FLOAT}> ({input_name})
+                    X_mult_b = Mul(const_node_b, X_float)
+                    X_channel_gray = ReduceSum(X_mult_b, axes)
+                    X_channel_cast = Cast <to={onnx.TensorProto.UINT8}> (X_channel_gray)
+                    {output_name} = Expand (X_channel_cast, expand_shape)
+                }}
+        """
+        )
+        return grayscaling_graph
+
+
 class Resize(Step):
     """
     Resize input data. Aspect ratio is maintained.
diff --git a/test/test_tools_add_pre_post_processing_to_model.py b/test/test_tools_add_pre_post_processing_to_model.py
index 6d92f924a..bd09a6b5a 100644
--- a/test/test_tools_add_pre_post_processing_to_model.py
+++ b/test/test_tools_add_pre_post_processing_to_model.py
@@ -408,6 +408,53 @@ def test_qatask_with_tokenizer(self):
 
         self.assertEqual(result[0][0], ref_output[0][0])
 
+    def _create_pipeline_and_run_for_grayscale(self, output_model: Path, layout: str = "RGB"):
+        import onnx
+
+        graph_def = onnx.parser.parse_graph("""\
+            identity (uint8[h,w,c] image_in)
+            => (uint8[h,w,c] image_out)
+            {
+            image_out = Identity(image_in)
+            }
+        """)
+
+        onnx_opset = 16
+
+        onnx_import = onnx.helper.make_operatorsetid("", onnx_opset)
+        ir_version = onnx.helper.find_min_ir_version_for([onnx_import])
+        input_model = onnx.helper.make_model_gen_version(graph_def, opset_imports=[onnx_import], ir_version=ir_version)
+
+        create_named_value = pre_post_processing.utils.create_named_value
+        inputs = [
+            create_named_value("image", onnx.TensorProto.UINT8, ["height", "width", 3]),
+        ]
+        pipeline = PrePostProcessor(inputs, onnx_opset)
+        pipeline.add_pre_processing([Grayscale(layout=layout)])
+
+        new_model = pipeline.run(input_model)
+        onnx.save_model(new_model, output_model)
+
+    def test_grayscale_step_rgb(self):
+        output_model = (self.temp4onnx / "identity.onnx").resolve()
+        self._create_pipeline_and_run_for_grayscale(output_model, layout="RGB")
+        image = Image.open(Path(test_data_dir) / "../pineapple.jpg").convert("RGB")
+
+        ort_sess = ort.InferenceSession(str(output_model), providers=["CPUExecutionProvider"])
+        grayscaled_image = ort_sess.run(
+            None,
+            {"image": np.asarray(image)},
+        )[0]
+
+        # all channel values are the same
+        self.assertEqual((np.max(grayscaled_image, axis=-1) == np.min(grayscaled_image, axis=-1)).all(), True)
+        # assert onnxruntime-extensions gray scaling matches Pillows gray scaling
+        np.testing.assert_allclose(
+            np.repeat(np.array(image.convert("L"))[:, :, np.newaxis], 3, axis=2),
+            grayscaled_image,
+            atol=1,
+        )
+
     # Corner Case
     def test_debug_step(self):
         import onnx