Skip to content

Commit f797864

Browse files
committed
Re-added contents of removed @appender decorators as static docstrings.
1 parent b20d6ab commit f797864

File tree

1 file changed

+142
-0
lines changed

1 file changed

+142
-0
lines changed

pandas/io/stata.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,6 +1688,38 @@ def read(
16881688
columns: Sequence[str] | None = None,
16891689
order_categoricals: bool | None = None,
16901690
) -> DataFrame:
1691+
"""
1692+
Reads observations from Stata file, converting them into a dataframe
1693+
1694+
Parameters
1695+
----------
1696+
nrows : int
1697+
Number of lines to read from data file, if None read whole file.
1698+
convert_dates : bool, default True
1699+
Convert date variables to DataFrame time values.
1700+
convert_categoricals : bool, default True
1701+
Read value labels and convert columns to Categorical/Factor variables.
1702+
index_col : str, optional
1703+
Column to set as index.
1704+
convert_missing : bool, default False
1705+
Flag indicating whether to convert missing values to their Stata
1706+
representations. If False, missing values are replaced with nan.
1707+
If True, columns containing missing values are returned with
1708+
object data types and missing values are represented by
1709+
StataMissingValue objects.
1710+
preserve_dtypes : bool, default True
1711+
Preserve Stata datatypes. If False, numeric data are upcast to pandas
1712+
default types for foreign data (float64 or int64).
1713+
columns : list or None
1714+
Columns to retain. Columns will be returned in the given order. None
1715+
returns all columns.
1716+
order_categoricals : bool, default True
1717+
Flag indicating whether converted categorical data are ordered.
1718+
1719+
Returns
1720+
-------
1721+
DataFrame
1722+
"""
16911723
self._ensure_open()
16921724

16931725
# Handle options
@@ -2149,6 +2181,116 @@ def read_stata(
21492181
compression: CompressionOptions = "infer",
21502182
storage_options: StorageOptions | None = None,
21512183
) -> DataFrame | StataReader:
2184+
"""
2185+
Read Stata file into DataFrame.
2186+
2187+
Parameters
2188+
----------
2189+
filepath_or_buffer : str, path object or file-like object
2190+
Any valid string path is acceptable. The string could be a URL. Valid
2191+
URL schemes include http, ftp, s3, and file. For file URLs, a host is
2192+
expected. A local file could be: ``file://localhost/path/to/table.dta``.
2193+
2194+
If you want to pass in a path object, pandas accepts any ``os.PathLike``.
2195+
2196+
By file-like object, we refer to objects with a ``read()`` method,
2197+
such as a file handle (e.g. via builtin ``open`` function)
2198+
or ``StringIO``.
2199+
convert_dates : bool, default True
2200+
Convert date variables to DataFrame time values.
2201+
convert_categoricals : bool, default True
2202+
Read value labels and convert columns to Categorical/Factor variables.
2203+
index_col : str, optional
2204+
Column to set as index.
2205+
convert_missing : bool, default False
2206+
Flag indicating whether to convert missing values to their Stata
2207+
representations. If False, missing values are replaced with nan.
2208+
If True, columns containing missing values are returned with
2209+
object data types and missing values are represented by
2210+
StataMissingValue objects.
2211+
preserve_dtypes : bool, default True
2212+
Preserve Stata datatypes. If False, numeric data are upcast to pandas
2213+
default types for foreign data (float64 or int64).
2214+
columns : list or None
2215+
Columns to retain. Columns will be returned in the given order. None
2216+
returns all columns.
2217+
order_categoricals : bool, default True
2218+
Flag indicating whether converted categorical data are ordered.
2219+
chunksize : int, default None
2220+
Return StataReader object for iterations, returns chunks with
2221+
given number of lines.
2222+
iterator : bool, default False
2223+
Return StataReader object.
2224+
compression : str or dict, default 'infer'
2225+
For on-the-fly decompression of on-disk data. If 'infer' and 'filepath_or_buffer' is
2226+
path-like, then detect compression from the following extensions: '.gz',
2227+
'.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2'
2228+
(otherwise no compression).
2229+
If using 'zip' or 'tar', the ZIP file must contain only one data file to be read in.
2230+
Set to ``None`` for no decompression.
2231+
Can also be a dict with key ``'method'`` set
2232+
to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and
2233+
other key-value pairs are forwarded to
2234+
``zipfile.ZipFile``, ``gzip.GzipFile``,
2235+
``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or
2236+
``tarfile.TarFile``, respectively.
2237+
As an example, the following could be passed for Zstandard decompression using a
2238+
custom compression dictionary:
2239+
``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
2240+
2241+
.. versionadded:: 1.5.0
2242+
Added support for `.tar` files.
2243+
storage_options : dict, optional
2244+
Extra options that make sense for a particular storage connection, e.g.
2245+
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
2246+
are forwarded to ``urllib.request.Request`` as header options. For other
2247+
URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
2248+
forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
2249+
details, and for more examples on storage options refer `here
2250+
<https://pandas.pydata.org/docs/user_guide/io.html?
2251+
highlight=storage_options#reading-writing-remote-files>`_.
2252+
2253+
Returns
2254+
-------
2255+
DataFrame, pandas.api.typing.StataReader
2256+
If iterator or chunksize, returns StataReader, else DataFrame.
2257+
2258+
See Also
2259+
--------
2260+
io.stata.StataReader : Low-level reader for Stata data files.
2261+
DataFrame.to_stata: Export Stata data files.
2262+
2263+
Notes
2264+
-----
2265+
Categorical variables read through an iterator may not have the same
2266+
categories and dtype. This occurs when a variable stored in a DTA
2267+
file is associated to an incomplete set of value labels that only
2268+
label a strict subset of the values.
2269+
2270+
Examples
2271+
--------
2272+
2273+
Creating a dummy stata for this example
2274+
2275+
>>> df = pd.DataFrame({'animal': ['falcon', 'parrot', 'falcon', 'parrot'],
2276+
... 'speed': [350, 18, 361, 15]}) # doctest: +SKIP
2277+
>>> df.to_stata('animals.dta') # doctest: +SKIP
2278+
2279+
Read a Stata dta file:
2280+
2281+
>>> df = pd.read_stata('animals.dta') # doctest: +SKIP
2282+
2283+
Read a Stata dta file in 10,000 line chunks:
2284+
2285+
>>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") # doctest: +SKIP
2286+
>>> df = pd.DataFrame(values, columns=["i"]) # doctest: +SKIP
2287+
>>> df.to_stata('filename.dta') # doctest: +SKIP
2288+
2289+
>>> with pd.read_stata('filename.dta', chunksize=10000) as itr: # doctest: +SKIP
2290+
>>> for chunk in itr:
2291+
... # Operate on a single chunk, e.g., chunk.mean()
2292+
... pass # doctest: +SKIP
2293+
"""
21522294
reader = StataReader(
21532295
filepath_or_buffer,
21542296
convert_dates=convert_dates,

0 commit comments

Comments
 (0)