diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td
index 13bbba2b492fa..455d07fb4408a 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td
@@ -228,13 +228,13 @@ class Tosa_I32EnumAttr<string name, string description, string mnemonic,
 // FP  : Floating-Point Inference. Primarily FP16 and FP32 operations.
 //
 // Extension:
-// INT16    : 16-bit integer operations.
-// INT4     : 4-bit integer weights.
-// BF16     : BFloat16 operations.
-// FP8      : 8-bit floating-point operations E4M3.
-// FP8      : 8-bit floating-point operations E5M2.
-// FFT      : Fast Fourier Transform operations.
-// VARIABLE : Stateful variable operations.
+// INT16        : 16-bit integer operations.
+// INT4         : 4-bit integer weights.
+// BF16         : BFloat16 operations.
+// FP8E4M3      : 8-bit floating-point operations E4M3.
+// FP8E5M2      : 8-bit floating-point operations E5M2.
+// FFT          : Fast Fourier Transform operations.
+// VARIABLE     : Stateful variable operations.
 //===----------------------------------------------------------------------===//
 
 def Tosa_PRO_INT   : I32EnumAttrCase<"pro_int", 1>;
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
index 3de1c21f40b43..2551c5d6a0a35 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
@@ -37,7 +37,8 @@ def Tosa_ArgMaxOp : Tosa_InferShapedTypeOp<"argmax"> {
 
   let description = [{
     This returns the index with the largest value across the given axis of the
-    input tensor.
+    input tensor. If multiple locations have equal values, returns the first
+    match along the search axis.
   }];
 
   let arguments = (ins
@@ -108,7 +109,8 @@ def Tosa_Conv2DOp : Tosa_ConvOp<"conv2d"> {
 
   let description = [{
     Performs a 2D convolution over the given tensor input, using the weight
-    tensor.
+    tensor. Implementations may choose to skip calculation of multiplies in
+    the padding area.
   }];
 
   let arguments = (ins
@@ -144,7 +146,8 @@ def Tosa_Conv3DOp : Tosa_ConvOp<"conv3d"> {
   let summary = "3D Convolution operator";
 
   let description = [{
-    Performs a 3D convolution over the given input tensor.
+    Performs a 3D convolution over the given input tensor. Implementations
+    may choose to skip calculation of multiplies in the padding area.
   }];
 
   let arguments = (ins
@@ -181,7 +184,8 @@ def Tosa_DepthwiseConv2DOp : Tosa_ConvOp<"depthwise_conv2d"> {
 
   let description = [{
     Performs 2D convolutions separately over each channel of the given tensor
-    input, using the weight tensor.
+    input, using the weight tensor. Implementations may choose to skip
+    calculation of multiplies in the padding area.
   }];
 
   let arguments = (ins
@@ -368,7 +372,8 @@ def Tosa_TransposeConv2DOp : Tosa_ConvOp<"transpose_conv2d"> {
 
   let description = [{
     Performs a 2D transposed convolution over the given tensor input, using the
-    weights tensor.
+    weights tensor. Implementations may choose to skip calculation of multiplies
+    by zero at fractional input positions.
   }];
 
   let arguments = (ins
@@ -443,10 +448,14 @@ def Tosa_SigmoidOp : Tosa_ElementwiseUnaryOp<"sigmoid"> {
   let summary = "Computes elementwise sigmoid of input.";
 
   let description = [{
-    Sigmoid function: output = 1 / (1 + exp(-input))
-    For quantized integer data types, the TABLE operator should be used instead
-    with the following definition.  The sigmoid table has 513 entries each of
-    16-bit precision and covering the input range -16.0 to +16.0
+    Applies the sigmoid logistic function to each element of the input tensor:
+    $ sigmoid(x) = \frac{1}{1 + e^{-x}} $.
+
+    For quantized integer data types, the TABLE operator should be used instead.
+    Each implementation may choose an appropriate TABLE given the scale and zero
+    point of the input data. Eight or sixteen bit precision tables may be used
+    based on the input tensor to the sigmoid function. The sigmoid table has 513
+    entries each of 16-bit precision and covering the input range -16.0 to +16.0
     in steps of 1/16.
   }];
 
@@ -471,10 +480,14 @@ def Tosa_TanhOp : Tosa_ElementwiseUnaryOp<"tanh"> {
   let summary = "Computes elementwise hyperbolic tangent of input";
 
   let description = [{
-    Parameterized hyperbolic tangent.
-    For quantized integer data types, the TABLE operator should be used instead
-    with the following definition.  The tanh_table has 513 entries each of
-    16-bit precision and covering the input range -8.0 to +8.0 in steps of 1/32.
+    Parameterized hyperbolic tangent: $ tanh(x) = \frac{1 - e^{-2x}}{1 + e^{-2x}} $.
+
+    For quantized integer data types, the TABLE operator should be used instead.
+    Each implementation may choose an appropriate TABLE given the scale and zero
+    point of the input data. Eight or sixteen bit precision tables may be used
+    based on the input tensor to the tanh function. The tanh_table has 513
+    entries each of 16-bit precision and covering the input range -8.0 to +8.0
+    in steps of 1/32.
   }];
 
   let arguments = (ins
@@ -498,10 +511,10 @@ def Tosa_ErfOp : Tosa_ElementwiseUnaryOp<"erf"> {
   let summary = "Computes gauss error function of input";
 
   let description = [{
-    Gauss error function: $ erf(x) = \frac{2}{\sqrt(\pi)} \int_{0}^{x} e^{-t^2} \ dt $
+    Gauss error function: $ erf(x) = \frac{2}{\sqrt{\pi}} \int_{0}^{x} e^{-t^2} dt $
     For quantized integer data types, the TABLE operator should be used instead
-    with the following definition.  The erf_table has 513 entries each of
-    16-bit/8-bit precision and covering the input range -4.0 to +4.0 in steps of 1/64.
+    with the following definition. The ERF table has 513 entries each of
+    16-bit precision and covering the input range -4.0 to +4.0 in steps of 1/64.
   }];
 
   let arguments = (ins
@@ -536,7 +549,7 @@ def Tosa_AddOp : Tosa_ElementwiseOp<"add", [
 
   let description = [{
     Elementwise addition of input1 and input2. Axis of size 1 will be broadcast,
-    as necessary.
+    as necessary. Rank of input tensors must match.
 
     Example:
 
@@ -575,7 +588,8 @@ def Tosa_ArithmeticRightShiftOp : Tosa_ElementwiseOp<"arithmetic_right_shift",
 
   let description = [{
     Elementwise arithmetic right shift of input1 by the amount specified in
-    input2. Axis of size 1 will be broadcast, as necessary.
+    input2. Axis of size 1 will be broadcast, as necessary. Rank of input tensors
+    must match.
   }];
 
   let arguments = (ins
@@ -604,7 +618,7 @@ def Tosa_BitwiseAndOp : Tosa_ElementwiseOp<"bitwise_and", [
 
   let description = [{
     Elementwise bitwise AND of input1 and input2. Axis of size 1
-    will be broadcast as necessary.
+    will be broadcast as necessary. Rank of input tensors must match.
   }];
 
   let arguments = (ins
@@ -632,7 +646,7 @@ def Tosa_BitwiseOrOp : Tosa_ElementwiseOp<"bitwise_or", [
 
   let description = [{
     Elementwise bitwise OR of input1 and input2. Axis of size 1 will be
-    broadcast as necessary.
+    broadcast as necessary. Rank of input tensors must match.
   }];
 
   let arguments = (ins
@@ -660,7 +674,7 @@ def Tosa_BitwiseXorOp : Tosa_ElementwiseOp<"bitwise_xor", [
 
   let description = [{
     Elementwise bitwise XOR of input1 and input2. Axis of size 1 will be
-    broadcast as necessary.
+    broadcast as necessary. Rank of input tensors must match.
   }];
 
   let arguments = (ins
@@ -685,8 +699,10 @@ def Tosa_IntDivOp : Tosa_ElementwiseOp<"int_div", [SameOperandsAndResultElementT
   let summary = "Integer divide operator";
 
   let description = [{
-    Elementwise integer divide operator of input1 by input2. Axis of size 1
-    will be broadcast, as necessary.
+    Elementwise integer divide operator of input1 by input2. The result of the divide
+    is truncated towards zero. Expected use is for operations on non-scaled integers.
+    Floating point divide should use RECIPROCAL and MUL. Quantized integer divide
+    should use TABLE (for 1/x) and MUL.
   }];
 
   let arguments = (ins
@@ -716,7 +732,7 @@ def Tosa_LogicalAndOp : Tosa_ElementwiseOp<"logical_and", [
 
   let description = [{
     Elementwise logical AND of input1 and input2. Axis of size 1 will be
-    broadcast, as necessary.
+    broadcast, as necessary. Rank of input tensors must match.
   }];
 
   let arguments = (ins
@@ -743,7 +759,7 @@ def Tosa_LogicalLeftShiftOp : Tosa_ElementwiseOp<"logical_left_shift",
 
   let description = [{
     Elementwise left shift of input1 and input2. Axis of size 1 will be
-    broadcast, as necessary.
+    broadcast, as necessary. Rank of input tensors must match.
   }];
 
   let arguments = (ins
@@ -770,7 +786,8 @@ def Tosa_LogicalRightShiftOp : Tosa_ElementwiseOp<"logical_right_shift",
 
   let description = [{
     Elementwise logical right shift of input1 by the amount specified in input2.
-    Axis of size 1 will be broadcast, as necessary.
+    Axis of size 1 will be broadcast, as necessary. Rank of input tensors must
+    match.
   }];
 
   let arguments = (ins
@@ -798,7 +815,7 @@ def Tosa_LogicalOrOp : Tosa_ElementwiseOp<"logical_or", [
 
   let description = [{
     Elementwise logical OR of input1 and input2. Axis of size 1 will be
-    broadcast as necessary.
+    broadcast as necessary. Rank of input tensors must match.
   }];
 
   let arguments = (ins
@@ -825,8 +842,8 @@ def Tosa_LogicalXorOp : Tosa_ElementwiseOp<"logical_xor", [
   let summary = "Returns the truth value of x XOR y element-wise.";
 
   let description = [{
-    Elementwise logical XOR of input1 and input2.  Axis of size 1 will be
-    broadcast as necessary.
+    Elementwise logical XOR of input1 and input2. Axis of size 1 will be
+    broadcast as necessary. Rank of input tensors must match.
   }];
 
   let arguments = (ins
@@ -854,7 +871,7 @@ def Tosa_MaximumOp : Tosa_ElementwiseOp<"maximum", [
 
   let description = [{
     Elementwise max of input1 and input2. Axis of size 1 will be broadcast, as
-    necessary.
+    necessary. Rank of input tensors must match.
   }];
 
   let arguments = (ins
@@ -883,7 +900,7 @@ def Tosa_MinimumOp : Tosa_ElementwiseOp<"minimum", [
 
   let description = [{
     Elementwise minimum of input1 and input2. Axis of size 1
-    will be broadcast, as necessary.
+    will be broadcast, as necessary. Rank of input tensors must match.
   }];
 
   let arguments = (ins
@@ -919,8 +936,8 @@ def Tosa_MulOp : Tosa_Op<"mul", [
 
   let description = [{
     Elementwise multiplication (Hadamard product) of input1 and input2.
-    Axis of size 1 will be broadcast, as necessary.
-    i8/i16 input type can be promoted to i32 result type.
+    Axis of size 1 will be broadcast, as necessary. Rank of input tensors must
+    match.
   }];
 
   let arguments = (ins
@@ -954,7 +971,8 @@ def Tosa_PowOp : Tosa_ElementwiseOp<"pow", [SameOperandsAndResultElementType]> {
 
   let description = [{
     Elementwise input1 raised to the power of input2.
-    Axis of size 1 will be broadcast, as necessary.
+    Axis of size 1 will be broadcast, as necessary. Rank of input tensors must
+    match.
   }];
 
   let arguments = (ins
@@ -980,7 +998,7 @@ def Tosa_SubOp : Tosa_ElementwiseOp<"sub", [SameOperandsAndResultElementType]> {
 
   let description = [{
     Elementwise subtraction of input1 and input2. Axis of size 1 will be
-    broadcast as necessary.
+    broadcast as necessary. Rank of input tensors must match.
   }];
 
   let arguments = (ins
@@ -1007,20 +1025,20 @@ def Tosa_TableOp : Tosa_InferShapedTypeOp<"table"> {
   let summary = "Table lookup op";
 
   let description = [{
-    Interpolated table lookup operation. Input values are scaled to create a
-    fixed-point 9.7 value.    The high 9 bits are used to index into the table.
-    The fractional bits are used to interpolate based on the looked up value and
-    the index+1 value in the table. The TABLE operator then returns a 16.7
-    interpolated value. Note that there must be 513 values to handle the full
-    range of inputs.
+    Table lookup operation. For int8_t TABLE operation, perform a 256 entry
+    table lookup returning an int8_t value. For int16_t tables, the int16_t
+    input is treated as a fixed-point 9.7 value. The most significant 9 bits
+    are used to index into the table. The fractional 7 bits are used to
+    interpolate based on table[index] and table[index+1]. For int16_t inputs,
+    the TABLE operator returns a 16.7 interpolated value in an int32_t. This
+    value can then be input to the RESCALE operator to scale to the required
+    output data type. Note that int16_t table has 513 values to handle
+    table[index+1] when index=511.
 
-    The TABLE operator is expected to be used as follows:
-    * A RESCALE node is expected before the TABLE operator to scale the input
-      to a full int16_t range for the table lookup
-    * If an int16_t result is required then follow the TABLE operator with a
-      RESCALE with a right shift of 7
-    * If an int8_t result is required then follow the TABLE operator with a
-      RESCALE with a right shift of 15
+    An int16_t to int16_t table lookup can be constructed in TOSA as follows:
+    * Use the TABLE operator to produce a fixed point 16.7 interpolated result
+    * Use RESCALE (in_t=int32_t, out_t=int16_t, scale=1<<14, shift=21) to
+      scale the output to int16_t range (or alternate scale as required)
   }];
 
   let arguments = (ins
@@ -1057,7 +1075,7 @@ def Tosa_AbsOp : Tosa_ElementwiseUnaryOp<"abs"> {
   let summary = "Elementwise abs op";
 
   let description = [{
-    Elementwise absolute value operation
+    Elementwise absolute value operation.
 
     Example:
 
@@ -1211,7 +1229,7 @@ def Tosa_FloorOp : Tosa_ElementwiseUnaryOp<"floor"> {
   let summary = "Elementwise floor op";
 
   let description = [{
-    Elementwise floor operation
+    Elementwise floor operation.
   }];
 
   let arguments = (ins
@@ -1285,7 +1303,7 @@ def Tosa_NegateOp : Tosa_ElementwiseUnaryOp<"negate"> {
   let summary = "Elementwise negate op";
 
   let description = [{
-    Elementwise negation operation
+    Elementwise negation operation.
   }];
 
   let arguments = (ins
@@ -1819,9 +1837,9 @@ def Tosa_PadOp : Tosa_InferShapedTypeOp<"pad"> {
   let summary = "Pads a tensor with value specified.";
 
   let description = [{
-    The `tosa.pad` operation pads a tensor along borders of each dimension with
-    `pad_const` (defaults to zero), given a padding configuration `padding`
-    specifying low and high values along the dimensions.
+    Pads a tensor along the borders of each dimension with a supplied value.
+    Returns a new tensor with the padding included. The pad_const value includes
+    the zero point if the tensor uses a zero point.
 
     Example:
 
@@ -2006,7 +2024,9 @@ def Tosa_TransposeOp : Tosa_InferShapedTypeOp<"transpose",
   let summary = "Transpose operator";
 
   let description = [{
-    Permutes the dimensions based on perm.
+    Permutes the dimensions of the input tensor input1 based on the perms
+    argument. Each value in the perms list must be a valid dimension of the
+    input tensor and may not be repeated.
   }];
 
   let arguments = (ins
@@ -2044,8 +2064,10 @@ def Tosa_GatherOp : Tosa_InferShapedTypeOp<"gather"> {
   let summary = "Gather operation,";
 
   let description = [{
-    Generate a tensor for which each element in the output is a slice of the
-    values tensor based on the value of indices.
+    Generate a tensor for which each element in the output is a subtensor of the
+    values tensor based on the indices. N is the number of batches, W the number
+    of indices in each batch, K the range of each index and C the number data
+    channels for each index.
   }];
 
   let arguments = (ins
@@ -2070,8 +2092,14 @@ def Tosa_ScatterOp : Tosa_InferShapedTypeOp<"scatter"> {
   let summary = "Scatter operation,";
 
   let description = [{
-    The values_out tensor is set to the values_in tensor with data modified as follows:
-    data from the input tensor is inserted at the positions specified by the indices tensor.
+    The values_out tensor is set to the values_in tensor with data modified as
+    follows: data from the input tensor is inserted at the positions specified
+    by the indices tensor. N is the number of batches, W the number of indices
+    in each batch, K the range of each index and C the number data channels for
+    each index. It is not permitted to repeat the same output index within a
+    single SCATTER operation and so each output index occurs at most once. It
+    follows that K >= W. In use cases that require multiple updates to the same
+    output position, these must be decomposed into multiple SCATTER operations.
   }];
 
   let arguments = (ins
@@ -2102,12 +2130,30 @@ def Tosa_ResizeOp : Tosa_InferShapedTypeOp<"resize"> {
   let summary = "Resize operation, supports various resize/upsample modes";
 
   let description = [{
-    Resizes a tensor. Resize is only allowed in the H and W dimensions. In
-    expected use, The height dimension is scaled by factor (scale_y_n/scale_y_d).
-    And the width dimension is scaled by factor (scale_x_n/scale_x_d). Thus the
-    output dimensions can be derived from the input dimensions by inverting the
-    scale. And the [order_y, border_x] values adjust the output size to allow
-    fractional sampling beyond integer input position (IH-1,IW-1).
+    Resizes a tensor. Resize is only allowed in the H and W dimensions.
+
+    The height dimension is scaled by factor (scale_y_n/scale_y_d). The width
+    dimension is scaled by factor (scale_x_n/scale_x_d).
+
+    The NEAREST_NEIGHBOR mode returns the value of the input tensor closest to
+    the calculated sample position for both floating-point and integer data
+    formats.
+
+    Floating-point BILINEAR mode returns a bilinearly interpolated output value
+    based on the four closest input sample positions.
+
+    For integer BILINEAR interpolation mode, the output value must be scaled by
+    1/(scale_y_n * scale_x_n) in a following operation to complete the
+    interpolation (for example with a RESCALE operator).
+
+    The output dimensions can be derived from the input dimensions by inverting
+    the scale as described in the pseudocode. The [border_y, border_x] values
+    adjust the output size to allow fractional sampling beyond integer input
+    position (IH - 1,IW - 1).
+
+    The limit MAX_SCALE is applied to each scale ratio after reduction of the
+    ratio. Individual scale numerator and denominaor values are allowed to be
+    larger than MAX_SCALE.
   }];
 
   let arguments = (ins