[CodeStyle][Xdoctest][6,13,81,84,142,144,146] Fix example code(paddle.Tensor.(angle,flatten),paddle.incubate.nn.(FusedFeedForward,FusedTransformerEncoderLayer),paddle.nn.(Transformer,TransformerDecoderLayer,TransformerEncoderLayer)) (PaddlePaddle#76563)

ooooo-create · SigureMo · web-flow · commit 94a0e984d050 · 2025-11-25T04:36:08.000+08:00
---------

Co-authored-by: Nyakku Shigure &lt;sigure.qaq@gmail.com&gt;
diff --git a/python/paddle/incubate/nn/layer/fused_transformer.py b/python/paddle/incubate/nn/layer/fused_transformer.py
@@ -577,7 +577,7 @@ class FusedFeedForward(Layer):
             this property. For more information, please refer to :ref:`api_guide_Name`.
 
     Examples:
-        .. code-block:: python
+        .. code-block:: pycon
 
             >>> # doctest: +REQUIRES(env:GPU)
             >>> import paddle
@@ -588,7 +588,7 @@ class FusedFeedForward(Layer):
             >>> x = paddle.rand((1, 8, 8))
             >>> out = fused_feedforward_layer(x)
             >>> print(out.shape)
-            [1, 8, 8]
+            paddle.Size([1, 8, 8])
     """
 
     name: str | None
@@ -789,7 +789,7 @@ class FusedTransformerEncoderLayer(Layer):
 
 
     Examples:
-        .. code-block:: python
+        .. code-block:: pycon
 
             >>> # doctest: +REQUIRES(env:GPU)
             >>> import paddle
@@ -803,7 +803,7 @@ class FusedTransformerEncoderLayer(Layer):
             >>> encoder_layer = FusedTransformerEncoderLayer(128, 2, 512)
             >>> enc_output = encoder_layer(enc_input, attn_mask)
             >>> print(enc_output.shape)
-            [2, 4, 128]
+            paddle.Size([2, 4, 128])
 
     """
 
diff --git a/python/paddle/jit/dy2static/transformers/base.py b/python/paddle/jit/dy2static/transformers/base.py
@@ -102,7 +102,8 @@ class ForLoopTuplePreTransformer(BaseTransformer):
 
     will be changed into :
 
-    >>> UUID_iterator = _jst.Indexable(B)  # make iterator-only to indexable list.
+    >>> # make iterator-only to indexable list.
+    >>> UUID_iterator = _jst.Indexable(B)
     >>> for UUID_target in UUID_iterator:
     >>>     A = _jst.Unpack(UUID_target, structure)
     >>>     C
diff --git a/python/paddle/jit/dy2static/utils.py b/python/paddle/jit/dy2static/utils.py
@@ -476,16 +476,18 @@ def wrap_as_closure(tree: gast.AST, closure_vars: list[str]) -> gast.AST:
 
     Before:
 
-        >>> def fn(x):
-        ...     ...
+        >>> def fn(x): ...
 
     After:
 
         >>> def create_fn():
         ...     closure_var_1 = None
-        ...     def fn(x):
-        ...         ...
+        ...
+        ...     def fn(x): ...
+        ...
         ...     return fn
+        ...
+        ...
         ... fn = create_fn()
     """
 
diff --git a/python/paddle/jit/sot/opcode_translator/executor/dispatcher.py b/python/paddle/jit/sot/opcode_translator/executor/dispatcher.py
@@ -200,9 +200,7 @@ class Dispatcher:
 
     Examples:
 
-        >>> def builtin_add(a: int, b: int) -> int:
-        ...     ...
-        ...
+        >>> def builtin_add(a: int, b: int) -> int: ...
         >>> Dispatcher.register(builtin_add, ("int", "int"), lambda a, b: a + b)
         >>> handler = Dispatcher.dispatch(builtin_add, 1, 2)
         >>> handler(1, 2)
@@ -250,13 +248,10 @@ def register_decorator(cls, fn: Callable[..., Any]):
             fn: The function to be registered.
 
         Examples:
-            >>> def builtin_add(a: int, b: int) -> int:
-            ...     ...
-            ...
+            >>> def builtin_add(a: int, b: int) -> int: ...
             >>> @Dispatcher.register_decorator(builtin_add)
             ... def builtin_add_dispatcher(a: int, b: int) -> int:
             ...     return a + b
-            ...
             >>> handler = Dispatcher.dispatch(builtin_add, 1, 2)
             >>> handler(1, 2)
             3
diff --git a/python/paddle/jit/sot/opcode_translator/executor/tracker.py b/python/paddle/jit/sot/opcode_translator/executor/tracker.py
@@ -158,7 +158,10 @@ class DanglingTracker(Tracker):
 
     Examples:
         >>> import operator
-        >>> from sot.opcode_translator.executor.variables import BuiltinVariable, ConstantVariable
+        >>> from sot.opcode_translator.executor.variables import (
+        ...     BuiltinVariable,
+        ...     ConstantVariable,
+        ... )
         >>> a = ConstantVariable.wrap_literal(1, None)
         >>> b = ConstantVariable.wrap_literal(2, None)
         >>> c = BuiltinVariable(operator.add, None, DanglingTracker())(a, b)
diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py
@@ -608,7 +608,7 @@ class TransformerEncoderLayer(Layer):
 
     Examples:
 
-        .. code-block:: python
+        .. code-block:: pycon
 
             >>> import paddle
             >>> from paddle.nn import TransformerEncoderLayer
@@ -620,7 +620,7 @@ class TransformerEncoderLayer(Layer):
             >>> encoder_layer = TransformerEncoderLayer(128, 2, 512)
             >>> enc_output = encoder_layer(enc_input, attn_mask)
             >>> print(enc_output.shape)
-            [2, 4, 128]
+            paddle.Size([2, 4, 128])
     """
 
     activation: Layer
@@ -972,7 +972,7 @@ class TransformerDecoderLayer(Layer):
 
     Examples:
 
-        .. code-block:: python
+        .. code-block:: pycon
 
             >>> import paddle
             >>> from paddle.nn import TransformerDecoderLayer
@@ -986,12 +986,11 @@ class TransformerDecoderLayer(Layer):
             >>> # cross attention mask: [batch_size, n_head, tgt_len, src_len]
             >>> cross_attn_mask = paddle.rand((2, 2, 4, 6))
             >>> decoder_layer = TransformerDecoderLayer(128, 2, 512)
-            >>> output = decoder_layer(dec_input,
-            ...                        enc_output,
-            ...                        self_attn_mask,
-            ...                        cross_attn_mask)
+            >>> output = decoder_layer(
+            ...     dec_input, enc_output, self_attn_mask, cross_attn_mask
+            ... )
             >>> print(output.shape)
-            [2, 4, 128]
+            paddle.Size([2, 4, 128])
     """
 
     normalize_before: bool
@@ -1498,7 +1497,7 @@ class Transformer(Layer):
 
     Examples:
 
-        .. code-block:: python
+        .. code-block:: pycon
 
             >>> import paddle
             >>> from paddle.nn import Transformer
@@ -1514,13 +1513,15 @@ class Transformer(Layer):
             >>> # memory_mask: [batch_size, n_head, tgt_len, src_len]
             >>> cross_attn_mask = paddle.rand((2, 2, 6, 4))
             >>> transformer = Transformer(128, 2, 4, 4, 512)
-            >>> output = transformer(enc_input,
-            ...                      dec_input,
-            ...                      enc_self_attn_mask,
-            ...                      dec_self_attn_mask,
-            ...                      cross_attn_mask)
+            >>> output = transformer(
+            ...     enc_input,
+            ...     dec_input,
+            ...     enc_self_attn_mask,
+            ...     dec_self_attn_mask,
+            ...     cross_attn_mask,
+            ... )
             >>> print(output.shape)
-            [2, 6, 128]
+            paddle.Size([2, 6, 128])
     """
 
     encoder: Layer
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
@@ -2035,18 +2035,23 @@ def flatten(
 
     Examples:
 
-        .. code-block:: python
+        .. code-block:: pycon
 
             >>> import paddle
 
-            >>> image_shape=(2, 3, 4, 4)
+            >>> image_shape = (2, 3, 4, 4)
 
-            >>> x = paddle.arange(end=image_shape[0] * image_shape[1] * image_shape[2] * image_shape[3])
+            >>> x = paddle.arange(
+            ...     end=image_shape[0]
+            ...     * image_shape[1]
+            ...     * image_shape[2]
+            ...     * image_shape[3]
+            ... )
             >>> img = paddle.reshape(x, image_shape)
 
             >>> out = paddle.flatten(img, start_axis=1, stop_axis=2)
             >>> print(out.shape)
-            [2, 12, 4]
+            paddle.Size([2, 12, 4])
 
             >>> # out shares data with img in dygraph mode
             >>> img[0, 0, 0, 0] = -1
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
@@ -5880,19 +5880,27 @@ def angle(x: Tensor, name: str | None = None) -> Tensor:
         Tensor: An N-D Tensor of real data type with the same precision as that of x's data type.
 
     Examples:
-        .. code-block:: python
+        .. code-block:: pycon
 
             >>> import paddle
 
-            >>> x = paddle.to_tensor([-2, -1, 0, 1]).unsqueeze(-1).astype('float32')
+            >>> x = (
+            ...     paddle.to_tensor([-2, -1, 0, 1])
+            ...     .unsqueeze(-1)
+            ...     .astype('float32')
+            ... )
             >>> y = paddle.to_tensor([-2, -1, 0, 1]).astype('float32')
             >>> z = x + 1j * y
             >>> z
             Tensor(shape=[4, 4], dtype=complex64, place=Place(cpu), stop_gradient=True,
-            [[(-2-2j), (-2-1j), (-2+0j), (-2+1j)],
-             [(-1-2j), (-1-1j), (-1+0j), (-1+1j)],
-             [-2j    , -1j    ,  0j    ,  1j    ],
-             [ (1-2j),  (1-1j),  (1+0j),  (1+1j)]])
+            [[(-2.00000000-2.00000000j), (-2.00000000-1.00000000j),
+              (-2.00000000+0.00000000j), (-2.00000000+1.00000000j)],
+             [(-1.00000000-2.00000000j), (-1.00000000-1.00000000j),
+              (-1.00000000+0.00000000j), (-1.00000000+1.00000000j)],
+             [(0.00000000-2.00000000j) , (0.00000000-1.00000000j) ,
+               (0.00000000+0.00000000j),  (0.00000000+1.00000000j)],
+             [ (1.00000000-2.00000000j),  (1.00000000-1.00000000j),
+               (1.00000000+0.00000000j),  (1.00000000+1.00000000j)]])
 
             >>> theta = paddle.angle(z)
             >>> theta