Replace appender in stata (#62621)

JuanCarlos3 · web-flow · commit bfb740aec847 · 2025-10-09T08:39:20.000-07:00
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -44,7 +44,6 @@
     ValueLabelTypeMismatch,
 )
 from pandas.util._decorators import (
-    Appender,
     doc,
     set_module,
 )
@@ -127,10 +126,6 @@
     Return StataReader object for iterations, returns chunks with
     given number of lines."""
 
-_iterator_params = """\
-iterator : bool, default False
-    Return StataReader object."""
-
 _reader_notes = """\
 Notes
 -----
@@ -139,80 +134,6 @@
 file is associated to an incomplete set of value labels that only
 label a strict subset of the values."""
 
-_read_stata_doc = f"""
-Read Stata file into DataFrame.
-
-Parameters
-----------
-filepath_or_buffer : str, path object or file-like object
-    Any valid string path is acceptable. The string could be a URL. Valid
-    URL schemes include http, ftp, s3, and file. For file URLs, a host is
-    expected. A local file could be: ``file://localhost/path/to/table.dta``.
-
-    If you want to pass in a path object, pandas accepts any ``os.PathLike``.
-
-    By file-like object, we refer to objects with a ``read()`` method,
-    such as a file handle (e.g. via builtin ``open`` function)
-    or ``StringIO``.
-{_statafile_processing_params1}
-{_statafile_processing_params2}
-{_chunksize_params}
-{_iterator_params}
-{_shared_docs["decompression_options"] % "filepath_or_buffer"}
-{_shared_docs["storage_options"]}
-
-Returns
--------
-DataFrame, pandas.api.typing.StataReader
-    If iterator or chunksize, returns StataReader, else DataFrame.
-
-See Also
---------
-io.stata.StataReader : Low-level reader for Stata data files.
-DataFrame.to_stata: Export Stata data files.
-
-{_reader_notes}
-
-Examples
---------
-
-Creating a dummy stata for this example
-
->>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon', 'parrot'],
-...                   'speed': [350, 18, 361, 15]}})  # doctest: +SKIP
->>> df.to_stata('animals.dta')  # doctest: +SKIP
-
-Read a Stata dta file:
-
->>> df = pd.read_stata('animals.dta')  # doctest: +SKIP
-
-Read a Stata dta file in 10,000 line chunks:
-
->>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8")  # doctest: +SKIP
->>> df = pd.DataFrame(values, columns=["i"])  # doctest: +SKIP
->>> df.to_stata('filename.dta')  # doctest: +SKIP
-
->>> with pd.read_stata('filename.dta', chunksize=10000) as itr:  # doctest: +SKIP
->>>     for chunk in itr:
-...         # Operate on a single chunk, e.g., chunk.mean()
-...         pass  # doctest: +SKIP
-"""
-
-_read_method_doc = f"""\
-Reads observations from Stata file, converting them into a dataframe
-
-Parameters
-----------
-nrows : int
-    Number of lines to read from data file, if None read whole file.
-{_statafile_processing_params1}
-{_statafile_processing_params2}
-
-Returns
--------
-DataFrame
-"""
-
 _stata_reader_doc = f"""\
 Class for reading Stata dta files.
 
@@ -1677,7 +1598,6 @@ def get_chunk(self, size: int | None = None) -> DataFrame:
             size = self._chunksize
         return self.read(nrows=size)
 
-    @Appender(_read_method_doc)
     def read(
         self,
         nrows: int | None = None,
@@ -1689,6 +1609,38 @@ def read(
         columns: Sequence[str] | None = None,
         order_categoricals: bool | None = None,
     ) -> DataFrame:
+        """
+        Reads observations from Stata file, converting them into a dataframe
+
+        Parameters
+        ----------
+        nrows : int
+            Number of lines to read from data file, if None read whole file.
+        convert_dates : bool, default True
+            Convert date variables to DataFrame time values.
+        convert_categoricals : bool, default True
+            Read value labels and convert columns to Categorical/Factor variables.
+        index_col : str, optional
+            Column to set as index.
+        convert_missing : bool, default False
+            Flag indicating whether to convert missing values to their Stata
+            representations.  If False, missing values are replaced with nan.
+            If True, columns containing missing values are returned with
+            object data types and missing values are represented by
+            StataMissingValue objects.
+        preserve_dtypes : bool, default True
+            Preserve Stata datatypes. If False, numeric data are upcast to pandas
+            default types for foreign data (float64 or int64).
+        columns : list or None
+            Columns to retain.  Columns will be returned in the given order.  None
+            returns all columns.
+        order_categoricals : bool, default True
+            Flag indicating whether converted categorical data are ordered.
+
+        Returns
+        -------
+        DataFrame
+        """
         self._ensure_open()
 
         # Handle options
@@ -2135,7 +2087,6 @@ def value_labels(self) -> dict[str, dict[int, str]]:
 
 
 @set_module("pandas")
-@Appender(_read_stata_doc)
 def read_stata(
     filepath_or_buffer: FilePath | ReadBuffer[bytes],
     *,
@@ -2151,6 +2102,122 @@ def read_stata(
     compression: CompressionOptions = "infer",
     storage_options: StorageOptions | None = None,
 ) -> DataFrame | StataReader:
+    """
+    Read Stata file into DataFrame.
+
+    Parameters
+    ----------
+    filepath_or_buffer : str, path object or file-like object
+        Any valid string path is acceptable. The string could be a URL. Valid
+        URL schemes include http, ftp, s3, and file. For file URLs, a host is
+        expected. A local file could be: ``file://localhost/path/to/table.dta``.
+
+        If you want to pass in a path object, pandas accepts any ``os.PathLike``.
+
+        By file-like object, we refer to objects with a ``read()`` method,
+        such as a file handle (e.g. via builtin ``open`` function)
+        or ``StringIO``.
+    convert_dates : bool, default True
+        Convert date variables to DataFrame time values.
+    convert_categoricals : bool, default True
+        Read value labels and convert columns to Categorical/Factor variables.
+    index_col : str, optional
+        Column to set as index.
+    convert_missing : bool, default False
+        Flag indicating whether to convert missing values to their Stata
+        representations.  If False, missing values are replaced with nan.
+        If True, columns containing missing values are returned with
+        object data types and missing values are represented by
+        StataMissingValue objects.
+    preserve_dtypes : bool, default True
+        Preserve Stata datatypes. If False, numeric data are upcast to pandas
+        default types for foreign data (float64 or int64).
+    columns : list or None
+        Columns to retain.  Columns will be returned in the given order.  None
+        returns all columns.
+    order_categoricals : bool, default True
+        Flag indicating whether converted categorical data are ordered.
+    chunksize : int, default None
+        Return StataReader object for iterations, returns chunks with
+        given number of lines.
+    iterator : bool, default False
+        Return StataReader object.
+    compression : str or dict, default 'infer'
+        For on-the-fly decompression of on-disk data. If 'infer' and
+        'filepath_or_buffer' is path-like, then detect compression from the
+        following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar',
+        '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression).
+        If using 'zip' or 'tar', the ZIP file must contain only one
+        data file to be read in. Set to ``None`` for no decompression.
+        Can also be a dict with key ``'method'`` set to one of
+        {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and
+        other key-value pairs are forwarded to
+        ``zipfile.ZipFile``, ``gzip.GzipFile``,
+        ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or
+        ``tarfile.TarFile``, respectively.
+        As an example, the following could be passed for Zstandard decompression using a
+        custom compression dictionary:
+        ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
+
+        .. versionadded:: 1.5.0
+            Added support for `.tar` files.
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g.
+        host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
+        are forwarded to ``urllib.request.Request`` as header options. For other
+        URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
+        forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
+        details, and for more examples on storage options refer `here
+        <https://pandas.pydata.org/docs/user_guide/io.html?
+        highlight=storage_options#reading-writing-remote-files>`_.
+
+    Returns
+    -------
+    DataFrame, pandas.api.typing.StataReader
+        If iterator or chunksize, returns StataReader, else DataFrame.
+
+    See Also
+    --------
+    io.stata.StataReader : Low-level reader for Stata data files.
+    DataFrame.to_stata: Export Stata data files.
+
+    Notes
+    -----
+    Categorical variables read through an iterator may not have the same
+    categories and dtype. This occurs when  a variable stored in a DTA
+    file is associated to an incomplete set of value labels that only
+    label a strict subset of the values.
+
+    Examples
+    --------
+
+    Creating a dummy stata for this example
+
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "animal": ["falcon", "parrot", "falcon", "parrot"],
+    ...         "speed": [350, 18, 361, 15],
+    ...     }
+    ... )  # doctest: +SKIP
+    >>> df.to_stata("animals.dta")  # doctest: +SKIP
+
+    Read a Stata dta file:
+
+    >>> df = pd.read_stata("animals.dta")  # doctest: +SKIP
+
+    Read a Stata dta file in 10,000 line chunks:
+
+    >>> values = np.random.randint(
+    ...     0, 10, size=(20_000, 1), dtype="uint8"
+    ... )  # doctest: +SKIP
+    >>> df = pd.DataFrame(values, columns=["i"])  # doctest: +SKIP
+    >>> df.to_stata("filename.dta")  # doctest: +SKIP
+
+    >>> with pd.read_stata('filename.dta', chunksize=10000) as itr:  # doctest: +SKIP
+    >>>     for chunk in itr:
+    ...         # Operate on a single chunk, e.g., chunk.mean()
+    ...         pass  # doctest: +SKIP
+    """
     reader = StataReader(
         filepath_or_buffer,
         convert_dates=convert_dates,