pandas-dev
diff --git a/‎ci/code_checks.sh
Lines changed: 0 additions & 1 deletion b/‎ci/code_checks.sh
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/source/getting_started/comparison/comparison_with_sql.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/getting_started/comparison/comparison_with_sql.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/pyarrow.rst
Lines changed: 3 additions & 1 deletion b/‎doc/source/user_guide/pyarrow.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/core/apply.py
Lines changed: 6 additions & 12 deletions b/‎pandas/core/apply.py
Lines changed: 6 additions & 12 deletions
diff --git a/‎pandas/core/dtypes/common.py
Lines changed: 5 additions & 0 deletions b/‎pandas/core/dtypes/common.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 1 addition & 0 deletions b/‎pandas/core/frame.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/core/series.py
Lines changed: 1 addition & 0 deletions b/‎pandas/core/series.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/io/stata.py
Lines changed: 31 additions & 6 deletions b/‎pandas/io/stata.py
Lines changed: 31 additions & 6 deletions
diff --git a/‎pandas/tests/apply/test_frame_apply.py
Lines changed: 5 additions & 5 deletions b/‎pandas/tests/apply/test_frame_apply.py
Lines changed: 5 additions & 5 deletions
@@ -304,7 +304,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.api.types.is_re PR07,SA01" \
         -i "pandas.api.types.is_re_compilable PR07,SA01" \
         -i "pandas.api.types.is_sparse SA01" \
-        -i "pandas.api.types.is_string_dtype SA01" \
         -i "pandas.api.types.is_timedelta64_ns_dtype SA01" \
         -i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
         -i "pandas.api.types.union_categoricals RT03,SA01" \
 
@@ -505,7 +505,7 @@ DELETE
     DELETE FROM tips
     WHERE tip > 9;
 
-In pandas we select the rows that should remain instead of deleting them:
+In pandas we select the rows that should remain instead of deleting the rows that should be removed:
 
 .. ipython:: python
 
 
@@ -159,9 +159,11 @@ PyArrow also provides IO reading functionality that has been integrated into sev
 functions provide an ``engine`` keyword that can dispatch to PyArrow to accelerate reading from an IO source.
 
 * :func:`read_csv`
+* :func:`read_feather`
 * :func:`read_json`
 * :func:`read_orc`
-* :func:`read_feather`
+* :func:`read_parquet`
+* :func:`read_table` (experimental)
 
 .. ipython:: python
 
 
@@ -583,6 +583,7 @@ I/O
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
 - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
+- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
 
 Period
 ^^^^^^
 
@@ -486,20 +486,14 @@ def compute_dict_like(
                 cols = df[key]
 
                 if cols.ndim == 1:
-                    series_list = [obj._gotitem(key, ndim=1, subset=cols)]
+                    series = obj._gotitem(key, ndim=1, subset=cols)
+                    results.append(getattr(series, op_name)(how, **kwargs))
+                    keys.append(key)
                 else:
-                    series_list = []
-                    for index in range(cols.shape[1]):
-                        col = cols.iloc[:, index]
-
+                    for _, col in cols.items():
                         series = obj._gotitem(key, ndim=1, subset=col)
-                        series_list.append(series)
-
-                for series in series_list:
-                    result = getattr(series, op_name)(how, **kwargs)
-                    results.append(result)
-                    keys.append(key)
-
+                        results.append(getattr(series, op_name)(how, **kwargs))
+                        keys.append(key)
         else:
             results = [
                 getattr(obj._gotitem(key, ndim=1), op_name)(how, **kwargs)
 
@@ -558,6 +558,11 @@ def is_string_dtype(arr_or_dtype) -> bool:
     boolean
         Whether or not the array or dtype is of the string dtype.
 
+    See Also
+    --------
+    api.types.is_string_dtype : Check whether the provided array or dtype
+                                is of the string dtype.
+
     Examples
     --------
     >>> from pandas.api.types import is_string_dtype
 
@@ -531,6 +531,7 @@ class DataFrame(NDFrame, OpsMixin):
         will perform column selection instead.
     dtype : dtype, default None
         Data type to force. Only a single dtype is allowed. If None, infer.
+        If ``data`` is DataFrame then is ignored.
     copy : bool or None, default None
         Copy data from inputs.
         For dict data, the default of None behaves like ``copy=True``.  For DataFrame
 
@@ -256,6 +256,7 @@ class Series(base.IndexOpsMixin, NDFrame):  # type: ignore[misc]
         Data type for the output Series. If not specified, this will be
         inferred from `data`.
         See the :ref:`user guide <basics.dtypes>` for more usages.
+        If ``data`` is Series then is ignored.
     name : Hashable, default None
         The name to give to the Series.
     copy : bool, default False
 
@@ -983,6 +983,19 @@ def __init__(self) -> None:
                 np.float64(struct.unpack("<d", float64_max)[0]),
             ),
         }
+        self.OLD_VALID_RANGE = {
+            "b": (-128, 126),
+            "h": (-32768, 32766),
+            "l": (-2147483648, 2147483646),
+            "f": (
+                np.float32(struct.unpack("<f", float32_min)[0]),
+                np.float32(struct.unpack("<f", float32_max)[0]),
+            ),
+            "d": (
+                np.float64(struct.unpack("<d", float64_min)[0]),
+                np.float64(struct.unpack("<d", float64_max)[0]),
+            ),
+        }
 
         self.OLD_TYPE_MAPPING = {
             98: 251,  # byte
@@ -994,7 +1007,7 @@ def __init__(self) -> None:
 
         # These missing values are the generic '.' in Stata, and are used
         # to replace nans
-        self.MISSING_VALUES = {
+        self.MISSING_VALUES: dict[str, int | np.float32 | np.float64] = {
             "b": 101,
             "h": 32741,
             "l": 2147483621,
@@ -1808,11 +1821,18 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra
         replacements = {}
         for i in range(len(data.columns)):
             fmt = self._typlist[i]
-            if fmt not in self.VALID_RANGE:
-                continue
+            if self._format_version <= 111:
+                if fmt not in self.OLD_VALID_RANGE:
+                    continue
 
-            fmt = cast(str, fmt)  # only strs in VALID_RANGE
-            nmin, nmax = self.VALID_RANGE[fmt]
+                fmt = cast(str, fmt)  # only strs in OLD_VALID_RANGE
+                nmin, nmax = self.OLD_VALID_RANGE[fmt]
+            else:
+                if fmt not in self.VALID_RANGE:
+                    continue
+
+                fmt = cast(str, fmt)  # only strs in VALID_RANGE
+                nmin, nmax = self.VALID_RANGE[fmt]
             series = data.iloc[:, i]
 
             # appreciably faster to do this with ndarray instead of Series
@@ -1827,7 +1847,12 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra
                 umissing, umissing_loc = np.unique(series[missing], return_inverse=True)
                 replacement = Series(series, dtype=object)
                 for j, um in enumerate(umissing):
-                    missing_value = StataMissingValue(um)
+                    if self._format_version <= 111:
+                        missing_value = StataMissingValue(
+                            float(self.MISSING_VALUES[fmt])
+                        )
+                    else:
+                        missing_value = StataMissingValue(um)
 
                     loc = missing_loc[umissing_loc == j]
                     replacement.iloc[loc] = missing_value
 
@@ -368,18 +368,18 @@ def test_apply_mixed_dtype_corner():
     result = df[:0].apply(np.mean, axis=1)
     # the result here is actually kind of ambiguous, should it be a Series
     # or a DataFrame?
-    expected = Series(np.nan, index=pd.Index([], dtype="int64"))
+    expected = Series(dtype=np.float64)
     tm.assert_series_equal(result, expected)
 
 
 def test_apply_mixed_dtype_corner_indexing():
     df = DataFrame({"A": ["foo"], "B": [1.0]})
     result = df.apply(lambda x: x["A"], axis=1)
-    expected = Series(["foo"], index=[0])
+    expected = Series(["foo"], index=range(1))
     tm.assert_series_equal(result, expected)
 
     result = df.apply(lambda x: x["B"], axis=1)
-    expected = Series([1.0], index=[0])
+    expected = Series([1.0], index=range(1))
     tm.assert_series_equal(result, expected)
 
 
@@ -1037,7 +1037,7 @@ def test_result_type(int_frame_const_col):
 
     result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand")
     expected = df.copy()
-    expected.columns = [0, 1, 2]
+    expected.columns = range(3)
     tm.assert_frame_equal(result, expected)
 
 
@@ -1047,7 +1047,7 @@ def test_result_type_shorter_list(int_frame_const_col):
     df = int_frame_const_col
     result = df.apply(lambda x: [1, 2], axis=1, result_type="expand")
     expected = df[["A", "B"]].copy()
-    expected.columns = [0, 1]
+    expected.columns = range(2)
     tm.assert_frame_equal(result, expected)