From 8ff731c832de797c8680b57c94dce67e526d3608 Mon Sep 17 00:00:00 2001 From: Felix Schneider Date: Mon, 28 Oct 2024 20:22:23 +0100 Subject: [PATCH 1/3] [linalg] Add quantized version of `conv_3d_ncdhw_fcdhw` This patch adds the quantized 3d convolution operator `conv_3d_ncdhw_fcdhw_q`. This is the "channel-first" dimension ordering used by PyTorch and others. --- .../Linalg/IR/LinalgNamedStructuredOps.yaml | 139 ++++++++++++++++++ .../linalg/opdsl/ops/core_named_ops.py | 40 +++++ mlir/test/Dialect/Linalg/roundtrip.mlir | 15 ++ 3 files changed, 194 insertions(+) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index bf2f26de26e9e..4e3ef937d7d48 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -4024,6 +4024,145 @@ structured_op: !LinalgStructuredOpConfig - !ScalarExpression scalar_arg: K --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: conv_3d_ncdhw_fcdhw_q + cpp_class_name: Conv3DNcdhwFcdhwQOp + doc: |- + Performs 3-D convolution with zero point offsets. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. This includes the zero + point offsets common to quantized operations. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14] -> (s0, s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9, s10 * s11 + s12 + * s13)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14] -> (s14, s1, s4, s8, s12)> + - !LinalgOperandDefConfig + name: IZp + kind: scalar + type_var: I32 + - !LinalgOperandDefConfig + name: KZp + kind: scalar + type_var: I32 + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14] -> (s0, s14, s2, s6, s10)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14] -> (s3, s7, s11)> + default_indices: + - 1 + - 1 + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14] -> (s5, s9, s13)> + default_indices: + - 1 + - 1 + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d8, d1 * s3 + d5 * s5, d2 * s7 + + d6 * s9, d3 * s11 + d7 * s13)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14] -> (d4, d8, d5, d6, d7)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14] -> ()> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14] -> ()> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6, + s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d4, d1, d2, d3)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: mul + operands: + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: sub + operands: + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: IZp + - !ScalarExpression + scalar_fn: + kind: binary + fn_name: sub + operands: + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: K + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: KZp +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: depthwise_conv_1d_nwc_wc cpp_class_name: DepthwiseConv1DNwcWcOp diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index b45fecd0ee145..4c7efc8d80876 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -1126,6 +1126,46 @@ def conv_3d_ncdhw_fcdhw( ], ) * TypeFn.cast_signed(U, K[D.f, D.c, D.kd, D.kh, D.kw]) +@linalg_structured_op +def conv_3d_ncdhw_fcdhw_q( + I=TensorDef( + T1, + S.N, + S.C, + S.OD * S.SD + S.KD * S.DD, + S.OH * S.SH + S.KH * S.DH, + S.OW * S.SW + S.KW * S.DW, + ), + K=TensorDef(T2, S.F, S.C, S.KD, S.KH, S.KW), + IZp=ScalarDef(I32), + KZp=ScalarDef(I32), + O=TensorDef(U, S.N, S.F, S.OD, S.OH, S.OW, output=True), + strides=IndexAttrDef(S.SD, S.SH, S.SW, default=[1, 1, 1]), + dilations=IndexAttrDef(S.DD, S.DH, S.DW, default=[1, 1, 1]), +): + """Performs 3-D convolution with zero point offsets. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. This includes the zero + point offsets common to quantized operations. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.od, D.oh, D.ow, D.f, D.kd, D.kh, D.kw, D.c) + O[D.n, D.f, D.od, D.oh, D.ow] += ( + TypeFn.cast_signed( + U, + I[ + D.n, + D.c, + D.od * S.SD + D.kd * S.DD, + D.oh * S.SH + D.kh * S.DH, + D.ow * S.SW + D.kw * S.DW, + ], + ) - TypeFn.cast_signed(U, IZp) + ) * ( + TypeFn.cast_signed(U, K[D.f, D.c, D.kd, D.kh, D.kw]) + - TypeFn.cast_signed(U, KZp) + ) @linalg_structured_op def depthwise_conv_1d_nwc_wc( diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index 1b8969bd11559..6e5adf007f58d 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -694,3 +694,18 @@ func.func @conv2d_channel_first_q_promote(%img: tensor<100x3x224x224xi8>, %filt: // CHECK-LABEL: func @conv2d_channel_first_q_promote( // CHECK: %[[arg0:[a-zA-z0-9]*]]: tensor<100x3x224x224xi8>, %[[arg1:[a-zA-z0-9]*]]: tensor<64x3x5x5xi8>, %[[arg2:[a-zA-z0-9]*]]: i8, %[[arg3:[a-zA-z0-9]*]]: i8) // CHECK: linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]] : tensor<100x3x224x224xi8>, tensor<64x3x5x5xi8>, i8, i8) outs(%{{.*}} : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32> + +// ----- + +func.func @conv3d_channel_first_q(%img: tensor<1x27x49x48x47xi8>, %filt: tensor<28x27x3x4x5xi8>, %a: i32, %b: i32) -> tensor<1x28x47x45x43xi32> { + %init = arith.constant dense<0> : tensor<1x28x47x45x43xi32> + %1 = linalg.conv_3d_ncdhw_fcdhw_q {dilations = dense<1> : tensor<3xi64>, + strides = dense<1> : tensor<3xi64>} + ins(%img, %filt, %a, %b : tensor<1x27x49x48x47xi8>, tensor<28x27x3x4x5xi8>, i32, i32) + outs(%init : tensor<1x28x47x45x43xi32>) -> tensor<1x28x47x45x43xi32> + return %1 : tensor<1x28x47x45x43xi32> +} + +// CHECK-LABEL: func @conv3d_channel_first_q( +// CHECK: %[[arg0:[a-zA-z0-9]*]]: tensor<1x27x49x48x47xi8>, %[[arg1:[a-zA-z0-9]*]]: tensor<28x27x3x4x5xi8>, %[[arg2:[a-zA-z0-9]*]]: i32, %[[arg3:[a-zA-z0-9]*]]: i32) +// CHECK: linalg.conv_3d_ncdhw_fcdhw_q {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]] : tensor<1x27x49x48x47xi8>, tensor<28x27x3x4x5xi8>, i32, i32) outs(%{{.*}} : tensor<1x28x47x45x43xi32>) -> tensor<1x28x47x45x43xi32> From 639951b7f54638667ac976e5c00aa273ffd7d828 Mon Sep 17 00:00:00 2001 From: Felix Schneider Date: Mon, 28 Oct 2024 20:52:34 +0100 Subject: [PATCH 2/3] fix formatting --- .../python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index 4c7efc8d80876..daa7ccd3ae499 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -1126,6 +1126,7 @@ def conv_3d_ncdhw_fcdhw( ], ) * TypeFn.cast_signed(U, K[D.f, D.c, D.kd, D.kh, D.kw]) + @linalg_structured_op def conv_3d_ncdhw_fcdhw_q( I=TensorDef( @@ -1152,7 +1153,7 @@ def conv_3d_ncdhw_fcdhw_q( implements(ConvolutionOpInterface) domain(D.n, D.od, D.oh, D.ow, D.f, D.kd, D.kh, D.kw, D.c) O[D.n, D.f, D.od, D.oh, D.ow] += ( - TypeFn.cast_signed( + TypeFn.cast_signed( U, I[ D.n, @@ -1161,7 +1162,8 @@ def conv_3d_ncdhw_fcdhw_q( D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, ], - ) - TypeFn.cast_signed(U, IZp) + ) + - TypeFn.cast_signed(U, IZp) ) * ( TypeFn.cast_signed(U, K[D.f, D.c, D.kd, D.kh, D.kw]) - TypeFn.cast_signed(U, KZp) From 9df9b6753fdc2cf39d80636949925471ca09d2dc Mon Sep 17 00:00:00 2001 From: Felix Schneider Date: Mon, 28 Oct 2024 21:43:23 +0100 Subject: [PATCH 3/3] fix formatting? --- mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index daa7ccd3ae499..8a733f5ce22eb 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -1169,6 +1169,7 @@ def conv_3d_ncdhw_fcdhw_q( - TypeFn.cast_signed(U, KZp) ) + @linalg_structured_op def depthwise_conv_1d_nwc_wc( I=TensorDef(T1, S.N, S.OW * S.SW + S.KW * S.DW, S.IC),