diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 07c8ab6d4f2cb..109f674fb9043 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -392,6 +392,73 @@ def _from_sequence_of_strings(
             )
         return cls._from_sequence(scalars, dtype=pa_type, copy=copy)
 
+    def _cast_pointwise_result(self, values) -> ArrayLike:
+        if len(values) == 0:
+            # Retain our dtype
+            return self[:0].copy()
+
+        try:
+            arr = pa.array(values, from_pandas=True)
+        except (ValueError, TypeError):
+            # e.g. test_by_column_values_with_same_starting_value with nested
+            #  values, one entry of which is an ArrowStringArray
+            #  or test_agg_lambda_complex128_dtype_conversion for complex values
+            return super()._cast_pointwise_result(values)
+
+        if pa.types.is_duration(arr.type):
+            # workaround for https://github.com/apache/arrow/issues/40620
+            result = ArrowExtensionArray._from_sequence(values)
+            if pa.types.is_duration(self._pa_array.type):
+                result = result.astype(self.dtype)  # type: ignore[assignment]
+            elif pa.types.is_timestamp(self._pa_array.type):
+                # Try to retain original unit
+                new_dtype = ArrowDtype(pa.duration(self._pa_array.type.unit))
+                try:
+                    result = result.astype(new_dtype)  # type: ignore[assignment]
+                except ValueError:
+                    pass
+            elif pa.types.is_date64(self._pa_array.type):
+                # Try to match unit we get on non-pointwise op
+                dtype = ArrowDtype(pa.duration("ms"))
+                result = result.astype(dtype)  # type: ignore[assignment]
+            elif pa.types.is_date(self._pa_array.type):
+                # Try to match unit we get on non-pointwise op
+                dtype = ArrowDtype(pa.duration("s"))
+                result = result.astype(dtype)  # type: ignore[assignment]
+            return result
+
+        elif pa.types.is_date(arr.type) and pa.types.is_date(self._pa_array.type):
+            arr = arr.cast(self._pa_array.type)
+        elif pa.types.is_time(arr.type) and pa.types.is_time(self._pa_array.type):
+            arr = arr.cast(self._pa_array.type)
+        elif pa.types.is_decimal(arr.type) and pa.types.is_decimal(self._pa_array.type):
+            arr = arr.cast(self._pa_array.type)
+        elif pa.types.is_integer(arr.type) and pa.types.is_integer(self._pa_array.type):
+            try:
+                arr = arr.cast(self._pa_array.type)
+            except pa.lib.ArrowInvalid:
+                # e.g. test_combine_add if we can't cast
+                pass
+        elif pa.types.is_floating(arr.type) and pa.types.is_floating(
+            self._pa_array.type
+        ):
+            try:
+                arr = arr.cast(self._pa_array.type)
+            except pa.lib.ArrowInvalid:
+                # e.g. test_combine_add if we can't cast
+                pass
+
+        if isinstance(self.dtype, StringDtype):
+            if pa.types.is_string(arr.type) or pa.types.is_large_string(arr.type):
+                # ArrowStringArrayNumpySemantics
+                return type(self)(arr).astype(self.dtype)
+            if self.dtype.na_value is np.nan:
+                # ArrowEA has different semantics, so we return numpy-based
+                #  result instead
+                return super()._cast_pointwise_result(values)
+            return ArrowExtensionArray(arr)
+        return type(self)(arr)
+
     @classmethod
     def _box_pa(
         cls, value, pa_type: pa.DataType | None = None
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 90fb8c175ebf6..1cd10a9eef9d1 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -19,7 +19,6 @@
     cast,
     overload,
 )
-import warnings
 
 import numpy as np
 
@@ -35,13 +34,11 @@
     Substitution,
     cache_readonly,
 )
-from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import (
     validate_bool_kwarg,
     validate_insert_loc,
 )
 
-from pandas.core.dtypes.cast import maybe_cast_pointwise_result
 from pandas.core.dtypes.common import (
     is_list_like,
     is_scalar,
@@ -89,7 +86,6 @@
         AstypeArg,
         AxisInt,
         Dtype,
-        DtypeObj,
         FillnaOptions,
         InterpolateOptions,
         NumpySorter,
@@ -311,38 +307,6 @@ def _from_sequence(
         """
         raise AbstractMethodError(cls)
 
-    @classmethod
-    def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
-        """
-        Strict analogue to _from_sequence, allowing only sequences of scalars
-        that should be specifically inferred to the given dtype.
-
-        Parameters
-        ----------
-        scalars : sequence
-        dtype : ExtensionDtype
-
-        Raises
-        ------
-        TypeError or ValueError
-
-        Notes
-        -----
-        This is called in a try/except block when casting the result of a
-        pointwise operation.
-        """
-        try:
-            return cls._from_sequence(scalars, dtype=dtype, copy=False)
-        except (ValueError, TypeError):
-            raise
-        except Exception:
-            warnings.warn(
-                "_from_scalars should only raise ValueError or TypeError. "
-                "Consider overriding _from_scalars where appropriate.",
-                stacklevel=find_stack_level(),
-            )
-            raise
-
     @classmethod
     def _from_sequence_of_strings(
         cls, strings, *, dtype: ExtensionDtype, copy: bool = False
@@ -371,9 +335,6 @@ def _from_sequence_of_strings(
             from a sequence of scalars.
         api.extensions.ExtensionArray._from_factorized : Reconstruct an ExtensionArray
             after factorization.
-        api.extensions.ExtensionArray._from_scalars : Strict analogue to _from_sequence,
-            allowing only sequences of scalars that should be specifically inferred to
-            the given dtype.
 
         Examples
         --------
@@ -416,6 +377,14 @@ def _from_factorized(cls, values, original):
         """
         raise AbstractMethodError(cls)
 
+    def _cast_pointwise_result(self, values) -> ArrayLike:
+        """
+        Cast the result of a pointwise operation (e.g. Series.map) to an
+        array, preserve dtype_backend if possible.
+        """
+        values = np.asarray(values, dtype=object)
+        return lib.maybe_convert_objects(values, convert_non_numeric=True)
+
     # ------------------------------------------------------------------------
     # Must be a Sequence
     # ------------------------------------------------------------------------
@@ -2842,7 +2811,7 @@ def _maybe_convert(arr):
                     # https://github.com/pandas-dev/pandas/issues/22850
                     # We catch all regular exceptions here, and fall back
                     # to an ndarray.
-                    res = maybe_cast_pointwise_result(arr, self.dtype, same_dtype=False)
+                    res = self._cast_pointwise_result(arr)
                     if not isinstance(res, type(self)):
                         # exception raised in _from_sequence; ensure we have ndarray
                         res = np.asarray(arr)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 86e5f93609d1c..78928713166f4 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -103,7 +103,6 @@
         AstypeArg,
         AxisInt,
         Dtype,
-        DtypeObj,
         NpDtype,
         Ordered,
         Shape,
@@ -529,20 +528,12 @@ def _from_sequence(
     ) -> Self:
         return cls(scalars, dtype=dtype, copy=copy)
 
-    @classmethod
-    def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
-        if dtype is None:
-            # The _from_scalars strictness doesn't make much sense in this case.
-            raise NotImplementedError
-
-        res = cls._from_sequence(scalars, dtype=dtype)
-
-        # if there are any non-category elements in scalars, these will be
-        #  converted to NAs in res.
-        mask = isna(scalars)
-        if not (mask == res.isna()).all():
-            # Some non-category element in scalars got converted to NA in res.
-            raise ValueError
+    def _cast_pointwise_result(self, values) -> ArrayLike:
+        res = super()._cast_pointwise_result(values)
+        cat = type(self)._from_sequence(res, dtype=self.dtype)
+        if (cat.isna() == isna(res)).all():
+            # i.e. the conversion was non-lossy
+            return cat
         return res
 
     @overload
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 4103b7c337efe..67ee16fd3a34e 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -83,7 +83,6 @@
     from pandas._typing import (
         ArrayLike,
         DateTimeErrorChoices,
-        DtypeObj,
         IntervalClosedType,
         TimeAmbiguous,
         TimeNonexistent,
@@ -293,14 +292,6 @@ def _scalar_type(self) -> type[Timestamp]:
     _dtype: np.dtype[np.datetime64] | DatetimeTZDtype
     _freq: BaseOffset | None = None
 
-    @classmethod
-    def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
-        if lib.infer_dtype(scalars, skipna=True) not in ["datetime", "datetime64"]:
-            # TODO: require any NAs be valid-for-DTA
-            # TODO: if dtype is passed, check for tzawareness compat?
-            raise ValueError
-        return cls._from_sequence(scalars, dtype=dtype)
-
     @classmethod
     def _validate_dtype(cls, values, dtype):
         # used in TimeLikeOps.__init__
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 0f619df14ee0c..0402452e484ea 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -26,6 +26,7 @@
 from pandas.util._decorators import doc
 
 from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
     is_bool,
     is_integer_dtype,
@@ -147,6 +148,19 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
         values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy)
         return cls(values, mask)
 
+    def _cast_pointwise_result(self, values) -> ArrayLike:
+        values = np.asarray(values, dtype=object)
+        result = lib.maybe_convert_objects(values, convert_to_nullable_dtype=True)
+        lkind = self.dtype.kind
+        rkind = result.dtype.kind
+        if (lkind in "iu" and rkind in "iu") or (lkind == rkind == "f"):
+            result = cast(BaseMaskedArray, result)
+            new_data = maybe_downcast_to_dtype(
+                result._data, dtype=self.dtype.numpy_dtype
+            )
+            result = type(result)(new_data, result._mask)
+        return result
+
     @classmethod
     @doc(ExtensionArray._empty)
     def _empty(cls, shape: Shape, dtype: ExtensionDtype) -> Self:
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index 73e509474b6e4..cef20da195f43 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -14,7 +14,10 @@
 from pandas.compat.numpy import function as nv
 
 from pandas.core.dtypes.astype import astype_array
-from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
+from pandas.core.dtypes.cast import (
+    construct_1d_object_array_from_listlike,
+    maybe_downcast_to_dtype,
+)
 from pandas.core.dtypes.common import pandas_dtype
 from pandas.core.dtypes.dtypes import NumpyEADtype
 from pandas.core.dtypes.missing import isna
@@ -34,6 +37,7 @@
     from collections.abc import Callable
 
     from pandas._typing import (
+        ArrayLike,
         AxisInt,
         Dtype,
         FillnaOptions,
@@ -145,6 +149,24 @@ def _from_sequence(
             result = result.copy()
         return cls(result)
 
+    def _cast_pointwise_result(self, values) -> ArrayLike:
+        result = super()._cast_pointwise_result(values)
+        lkind = self.dtype.kind
+        rkind = result.dtype.kind
+        if (
+            (lkind in "iu" and rkind in "iu")
+            or (lkind == "f" and rkind == "f")
+            or (lkind == rkind == "c")
+        ):
+            result = maybe_downcast_to_dtype(result, self.dtype.numpy_dtype)
+        elif rkind == "M":
+            # Ensure potential subsequent .astype(object) doesn't incorrectly
+            #  convert Timestamps to ints
+            from pandas import array as pd_array
+
+            result = pd_array(result, copy=False)
+        return result
+
     # ------------------------------------------------------------------------
     # Data
 
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 384a264ca690e..ab5569537dc55 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -607,6 +607,23 @@ def _from_sequence(
     def _from_factorized(cls, values, original) -> Self:
         return cls(values, dtype=original.dtype)
 
+    def _cast_pointwise_result(self, values):
+        result = super()._cast_pointwise_result(values)
+        if result.dtype.kind == self.dtype.kind:
+            try:
+                # e.g. test_groupby_agg_extension
+                res = type(self)._from_sequence(result, dtype=self.dtype)
+                if ((res == result) | (isna(result) & res.isna())).all():
+                    # This does not hold for e.g.
+                    #  test_arith_frame_with_scalar[0-__truediv__]
+                    return res
+                return type(self)._from_sequence(result)
+            except (ValueError, TypeError):
+                return type(self)._from_sequence(result)
+        else:
+            # e.g. test_combine_le avoid casting bools to Sparse[float64, nan]
+            return type(self)._from_sequence(result)
+
     # ------------------------------------------------------------------------
     # Data
     # ------------------------------------------------------------------------
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 983e7b246032c..4d91f33a8df87 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -412,13 +412,6 @@ def tolist(self) -> list:
             return [x.tolist() for x in self]
         return list(self.to_numpy())
 
-    @classmethod
-    def _from_scalars(cls, scalars, dtype: DtypeObj) -> Self:
-        if lib.infer_dtype(scalars, skipna=True) not in ["string", "empty"]:
-            # TODO: require any NAs be valid-for-string
-            raise ValueError
-        return cls._from_sequence(scalars, dtype=dtype)
-
     def _formatter(self, boxed: bool = False):
         formatter = partial(
             printing.pprint_thing,
@@ -732,6 +725,13 @@ def _from_sequence_of_strings(
     ) -> Self:
         return cls._from_sequence(strings, dtype=dtype, copy=copy)
 
+    def _cast_pointwise_result(self, values) -> ArrayLike:
+        result = super()._cast_pointwise_result(values)
+        if isinstance(result.dtype, StringDtype):
+            # Ensure we retain our same na_value/storage
+            result = result.astype(self.dtype)  # type: ignore[call-overload]
+        return result
+
     @classmethod
     def _empty(cls, shape, dtype) -> StringArray:
         values = np.empty(shape, dtype=object)
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 20fe9b92b4677..afe359b3faede 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -437,80 +437,6 @@ def maybe_upcast_numeric_to_64bit(arr: NumpyIndexT) -> NumpyIndexT:
         return arr
 
 
-def maybe_cast_pointwise_result(
-    result: ArrayLike,
-    dtype: DtypeObj,
-    numeric_only: bool = False,
-    same_dtype: bool = True,
-) -> ArrayLike:
-    """
-    Try casting result of a pointwise operation back to the original dtype if
-    appropriate.
-
-    Parameters
-    ----------
-    result : array-like
-        Result to cast.
-    dtype : np.dtype or ExtensionDtype
-        Input Series from which result was calculated.
-    numeric_only : bool, default False
-        Whether to cast only numerics or datetimes as well.
-    same_dtype : bool, default True
-        Specify dtype when calling _from_sequence
-
-    Returns
-    -------
-    result : array-like
-        result maybe casted to the dtype.
-    """
-
-    if isinstance(dtype, ExtensionDtype):
-        cls = dtype.construct_array_type()
-        if same_dtype:
-            result = _maybe_cast_to_extension_array(cls, result, dtype=dtype)
-        else:
-            result = _maybe_cast_to_extension_array(cls, result)
-
-    elif (numeric_only and dtype.kind in "iufcb") or not numeric_only:
-        result = maybe_downcast_to_dtype(result, dtype)
-
-    return result
-
-
-def _maybe_cast_to_extension_array(
-    cls: type[ExtensionArray], obj: ArrayLike, dtype: ExtensionDtype | None = None
-) -> ArrayLike:
-    """
-    Call to `_from_sequence` that returns the object unchanged on Exception.
-
-    Parameters
-    ----------
-    cls : class, subclass of ExtensionArray
-    obj : arraylike
-        Values to pass to cls._from_sequence
-    dtype : ExtensionDtype, optional
-
-    Returns
-    -------
-    ExtensionArray or obj
-    """
-    result: ArrayLike
-
-    if dtype is not None:
-        try:
-            result = cls._from_scalars(obj, dtype=dtype)
-        except (TypeError, ValueError):
-            return obj
-        return result
-
-    try:
-        result = cls._from_sequence(obj, dtype=dtype)
-    except Exception:
-        # We can't predict what downstream EA constructors may raise
-        result = obj
-    return result
-
-
 @overload
 def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype: ...
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 594358c0b4080..eab221e4df2a9 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -35,7 +35,6 @@
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.cast import (
-    maybe_cast_pointwise_result,
     maybe_downcast_to_dtype,
 )
 from pandas.core.dtypes.common import (
@@ -44,7 +43,6 @@
     ensure_platform_int,
     ensure_uint64,
     is_1d_only_ea_dtype,
-    is_string_dtype,
 )
 from pandas.core.dtypes.missing import (
     isna,
@@ -52,7 +50,6 @@
 )
 
 from pandas.core.arrays import Categorical
-from pandas.core.arrays.arrow.array import ArrowExtensionArray
 from pandas.core.frame import DataFrame
 from pandas.core.groupby import grouper
 from pandas.core.indexes.api import (
@@ -966,29 +963,7 @@ def agg_series(
         np.ndarray or ExtensionArray
         """
         result = self._aggregate_series_pure_python(obj, func)
-        npvalues = lib.maybe_convert_objects(result, try_float=False)
-
-        if isinstance(obj._values, ArrowExtensionArray):
-            # When obj.dtype is a string, any object can be cast. Only do so if the
-            # UDF returned strings or NA values.
-            if not is_string_dtype(obj.dtype) or lib.is_string_array(
-                npvalues, skipna=True
-            ):
-                out = maybe_cast_pointwise_result(
-                    npvalues, obj.dtype, numeric_only=True, same_dtype=preserve_dtype
-                )
-            else:
-                out = npvalues
-
-        elif not isinstance(obj._values, np.ndarray):
-            # we can preserve a little bit more aggressively with EA dtype
-            #  because maybe_cast_pointwise_result will do a try/except
-            #  with _from_sequence.  NB we are assuming here that _from_sequence
-            #  is sufficiently strict that it casts appropriately.
-            out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
-        else:
-            out = npvalues
-        return out
+        return obj.array._cast_pointwise_result(result)
 
     @final
     def _aggregate_series_pure_python(
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index df4df2a4913c2..59ac122e4f9ea 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -90,7 +90,6 @@
     common_dtype_categorical_compat,
     find_result_type,
     infer_dtype_from,
-    maybe_cast_pointwise_result,
     np_can_hold_element,
 )
 from pandas.core.dtypes.common import (
@@ -6398,17 +6397,20 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
         if not new_values.size:
             # empty
             dtype = self.dtype
-
-        # e.g. if we are floating and new_values is all ints, then we
-        #  don't want to cast back to floating.  But if we are UInt64
-        #  and new_values is all ints, we want to try.
-        same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type
-        if same_dtype:
-            new_values = maybe_cast_pointwise_result(
-                new_values, self.dtype, same_dtype=same_dtype
-            )
-
-        return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)
+        elif isinstance(new_values, Categorical):
+            # cast_pointwise_result is unnecessary
+            dtype = new_values.dtype
+        else:
+            if isinstance(self, MultiIndex):
+                arr = self[:0].to_flat_index().array
+            else:
+                arr = self[:0].array
+            # e.g. if we are floating and new_values is all ints, then we
+            #  don't want to cast back to floating.  But if we are UInt64
+            #  and new_values is all ints, we want to try.
+            new_values = arr._cast_pointwise_result(new_values)
+            dtype = new_values.dtype
+        return Index(new_values, dtype=dtype, copy=False, name=self.name)
 
     # TODO: De-duplicate with map, xref GH#32349
     @final
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 00cff09801f1a..6055e65c2786b 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -70,7 +70,6 @@
     find_common_type,
     infer_dtype_from,
     maybe_box_native,
-    maybe_cast_pointwise_result,
 )
 from pandas.core.dtypes.common import (
     is_dict_like,
@@ -84,7 +83,6 @@
     validate_all_hashable,
 )
 from pandas.core.dtypes.dtypes import (
-    CategoricalDtype,
     ExtensionDtype,
     SparseDtype,
 )
@@ -117,7 +115,6 @@
 )
 from pandas.core.arrays.categorical import CategoricalAccessor
 from pandas.core.arrays.sparse import SparseAccessor
-from pandas.core.arrays.string_ import StringDtype
 from pandas.core.construction import (
     array as pd_array,
     extract_array,
@@ -3185,15 +3182,14 @@ def combine(
                 new_values[:] = [func(lv, other) for lv in self._values]
             new_name = self.name
 
-        # try_float=False is to match agg_series
-        npvalues = lib.maybe_convert_objects(new_values, try_float=False)
-        # same_dtype here is a kludge to avoid casting e.g. [True, False] to
-        #  ["True", "False"]
-        same_dtype = isinstance(self.dtype, (StringDtype, CategoricalDtype))
-        res_values = maybe_cast_pointwise_result(
-            npvalues, self.dtype, same_dtype=same_dtype
+        res_values = self.array._cast_pointwise_result(new_values)
+        return self._constructor(
+            res_values,
+            dtype=res_values.dtype,
+            index=new_index,
+            name=new_name,
+            copy=False,
         )
-        return self._constructor(res_values, index=new_index, name=new_name, copy=False)
 
     def combine_first(self, other) -> Series:
         """
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index fd9fec0cb490c..90ec84a30a129 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -367,6 +367,18 @@ def test_combine_le(self, data_repeated):
         )
         tm.assert_series_equal(result, expected)
 
+    def _construct_for_combine_add(self, left, right):
+        if isinstance(right, type(left)):
+            return left._from_sequence(
+                [a + b for (a, b) in zip(list(left), list(right))],
+                dtype=left.dtype,
+            )
+        else:
+            return left._from_sequence(
+                [a + right for a in list(left)],
+                dtype=left.dtype,
+            )
+
     def test_combine_add(self, data_repeated):
         # GH 20825
         orig_data1, orig_data2 = data_repeated(2)
@@ -377,26 +389,22 @@ def test_combine_add(self, data_repeated):
         #  we will expect Series.combine to raise as well.
         try:
             with np.errstate(over="ignore"):
-                expected = pd.Series(
-                    orig_data1._from_sequence(
-                        [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))]
-                    )
-                )
+                arr = self._construct_for_combine_add(orig_data1, orig_data2)
         except TypeError:
             # If the operation is not supported pointwise for our scalars,
             #  then Series.combine should also raise
             with pytest.raises(TypeError):
                 s1.combine(s2, lambda x1, x2: x1 + x2)
             return
+        expected = pd.Series(arr)
 
         result = s1.combine(s2, lambda x1, x2: x1 + x2)
         tm.assert_series_equal(result, expected)
 
         val = s1.iloc[0]
         result = s1.combine(val, lambda x1, x2: x1 + x2)
-        expected = pd.Series(
-            orig_data1._from_sequence([a + val for a in list(orig_data1)])
-        )
+        arr = self._construct_for_combine_add(orig_data1, val)
+        expected = pd.Series(arr)
         tm.assert_series_equal(result, expected)
 
     def test_combine_first(self, data):
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index 45991e32726c6..65fb6f33b0ea3 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -109,6 +109,16 @@ def _from_sequence_of_strings(cls, strings, *, dtype: ExtensionDtype, copy=False
     def _from_factorized(cls, values, original):
         return cls(values)
 
+    def _cast_pointwise_result(self, values):
+        result = super()._cast_pointwise_result(values)
+        try:
+            # If this were ever made a non-test EA, special-casing could
+            #  be avoided by handling Decimal in maybe_convert_objects
+            res = type(self)._from_sequence(result, dtype=self.dtype)
+        except (ValueError, TypeError):
+            return result
+        return res
+
     _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray)
 
     def to_numpy(
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 5221cd402f53d..5247dfcbb275b 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import decimal
-import operator
 
 import numpy as np
 import pytest
@@ -282,33 +281,10 @@ def _create_arithmetic_method(cls, op):
 DecimalArrayWithoutCoercion._add_arithmetic_ops()
 
 
-def test_combine_from_sequence_raises(monkeypatch):
-    # https://github.com/pandas-dev/pandas/issues/22850
-    cls = DecimalArrayWithoutFromSequence
-
-    def construct_array_type(self):
-        return DecimalArrayWithoutFromSequence
-
-    monkeypatch.setattr(DecimalDtype, "construct_array_type", construct_array_type)
-
-    arr = cls([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
-    ser = pd.Series(arr)
-    result = ser.combine(ser, operator.add)
-
-    # note: object dtype
-    expected = pd.Series(
-        [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
-    )
-    tm.assert_series_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "class_", [DecimalArrayWithoutFromSequence, DecimalArrayWithoutCoercion]
-)
-def test_scalar_ops_from_sequence_raises(class_):
+def test_scalar_ops_from_sequence_raises():
     # op(EA, EA) should return an EA, or an ndarray if it's not possible
     # to return an EA with the return values.
-    arr = class_([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
+    arr = DecimalArrayWithoutCoercion([decimal.Decimal("1.0"), decimal.Decimal("2.0")])
     result = arr + arr
     expected = np.array(
         [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object"
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index a1799d0c113d1..bc30ba4ef7769 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -90,6 +90,13 @@ def _from_sequence(cls, scalars, *, dtype=None, copy=False):
     def _from_factorized(cls, values, original):
         return cls([UserDict(x) for x in values if x != ()])
 
+    def _cast_pointwise_result(self, values):
+        result = super()._cast_pointwise_result(values)
+        try:
+            return type(self)._from_sequence(result, dtype=self.dtype)
+        except (ValueError, TypeError):
+            return result
+
     def __getitem__(self, item):
         if isinstance(item, tuple):
             item = unpack_tuple_and_ellipses(item)
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 857fdc7468cec..4c0ced8b56288 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -46,6 +46,7 @@
 )
 from pandas.errors import Pandas4Warning
 
+from pandas.core.dtypes.common import pandas_dtype
 from pandas.core.dtypes.dtypes import (
     ArrowDtype,
     CategoricalDtypeType,
@@ -271,6 +272,26 @@ def data_for_twos(data):
 
 
 class TestArrowArray(base.ExtensionTests):
+    def _construct_for_combine_add(self, left, right):
+        dtype = left.dtype
+
+        # in a couple cases, addition is not dtype-preserving
+        if dtype == "bool[pyarrow]":
+            dtype = pandas_dtype("int64[pyarrow]")
+        elif dtype == "int8[pyarrow]" and isinstance(right, type(left)):
+            dtype = pandas_dtype("int64[pyarrow]")
+
+        if isinstance(right, type(left)):
+            return left._from_sequence(
+                [a + b for (a, b) in zip(list(left), list(right))],
+                dtype=dtype,
+            )
+        else:
+            return left._from_sequence(
+                [a + right for a in list(left)],
+                dtype=dtype,
+            )
+
     def test_compare_scalar(self, data, comparison_op):
         ser = pd.Series(data)
         self._compare_other(ser, data, comparison_op, data[0])
@@ -786,6 +807,8 @@ def rtruediv(x, y):
 
         return tm.get_op_from_name(op_name)
 
+    # TODO: use EA._cast_pointwise_result, same with other test files that
+    #  override this
     def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
         # BaseOpsUtil._combine can upcast expected dtype
         # (because it generates expected on python scalars)
@@ -795,16 +818,28 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
         if op_name in ["eq", "ne", "lt", "le", "gt", "ge"]:
             return pointwise_result.astype("boolean[pyarrow]")
 
+        original_dtype = tm.get_dtype(expected)
+
         was_frame = False
         if isinstance(expected, pd.DataFrame):
             was_frame = True
             expected_data = expected.iloc[:, 0]
-            original_dtype = obj.iloc[:, 0].dtype
         else:
             expected_data = expected
-            original_dtype = obj.dtype
 
-        orig_pa_type = original_dtype.pyarrow_dtype
+        # the pointwise method will have retained our original dtype, while
+        #  the op(ser, other) version will have cast to 64bit
+        if type(other) is int and op_name not in ["__floordiv__"]:
+            if original_dtype.kind == "f":
+                return expected.astype("float64[pyarrow]")
+            else:
+                return expected.astype("int64[pyarrow]")
+        elif type(other) is float:
+            return expected.astype("float64[pyarrow]")
+
+        # error: Item "ExtensionDtype" of "dtype[Any] | ExtensionDtype" has
+        #  no attribute "pyarrow_dtype"
+        orig_pa_type = original_dtype.pyarrow_dtype  # type: ignore[union-attr]
         if not was_frame and isinstance(other, pd.Series):
             # i.e. test_arith_series_with_array
             if not (
@@ -834,29 +869,7 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
 
         pa_expected = pa.array(expected_data._values)
 
-        if pa.types.is_duration(pa_expected.type):
-            if pa.types.is_date(orig_pa_type):
-                if pa.types.is_date64(orig_pa_type):
-                    # TODO: why is this different vs date32?
-                    unit = "ms"
-                else:
-                    unit = "s"
-            else:
-                # pyarrow sees sequence of datetime/timedelta objects and defaults
-                #  to "us" but the non-pointwise op retains unit
-                # timestamp or duration
-                unit = orig_pa_type.unit
-                if type(other) in [datetime, timedelta] and unit in ["s", "ms"]:
-                    # pydatetime/pytimedelta objects have microsecond reso, so we
-                    #  take the higher reso of the original and microsecond. Note
-                    #  this matches what we would do with DatetimeArray/TimedeltaArray
-                    unit = "us"
-
-            pa_expected = pa_expected.cast(f"duration[{unit}]")
-
-        elif pa.types.is_decimal(pa_expected.type) and pa.types.is_decimal(
-            orig_pa_type
-        ):
+        if pa.types.is_decimal(pa_expected.type) and pa.types.is_decimal(orig_pa_type):
             # decimal precision can resize in the result type depending on data
             # just compare the float values
             alt = getattr(obj, op_name)(other)
diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py
index 0e9ffce07bf98..034ddb351a7ab 100644
--- a/pandas/tests/extension/test_masked.py
+++ b/pandas/tests/extension/test_masked.py
@@ -168,6 +168,8 @@ def data_for_grouping(dtype):
 
 
 class TestMaskedArrays(base.ExtensionTests):
+    _combine_le_expected_dtype = "boolean"
+
     @pytest.fixture(autouse=True)
     def skip_if_doesnt_support_2d(self, dtype, request):
         # Override the fixture so that we run these tests.
@@ -215,42 +217,14 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
         sdtype = tm.get_dtype(obj)
         expected = pointwise_result
 
-        if op_name in ("eq", "ne", "le", "ge", "lt", "gt"):
-            return expected.astype("boolean")
-
-        if sdtype.kind in "iu":
-            if op_name in ("__rtruediv__", "__truediv__", "__div__"):
-                filled = expected.fillna(np.nan)
-                expected = filled.astype("Float64")
-            else:
-                # combine method result in 'biggest' (int64) dtype
-                expected = expected.astype(sdtype)
-        elif sdtype.kind == "b":
+        if sdtype.kind == "b":
             if op_name in (
-                "__floordiv__",
-                "__rfloordiv__",
-                "__pow__",
-                "__rpow__",
                 "__mod__",
                 "__rmod__",
             ):
                 # combine keeps boolean type
                 expected = expected.astype("Int8")
 
-            elif op_name in ("__truediv__", "__rtruediv__"):
-                # combine with bools does not generate the correct result
-                #  (numpy behaviour for div is to regard the bools as numeric)
-                op = self.get_op_from_name(op_name)
-                expected = self._combine(obj.astype(float), other, op)
-                expected = expected.astype("Float64")
-
-            if op_name == "__rpow__":
-                # for rpow, combine does not propagate NaN
-                result = getattr(obj, op_name)(other)
-                expected[result.isna()] = np.nan
-        else:
-            # combine method result in 'biggest' (float64) dtype
-            expected = expected.astype(sdtype)
         return expected
 
     def test_divmod_series_array(self, data, data_for_twos, request):
@@ -263,16 +237,6 @@ def test_divmod_series_array(self, data, data_for_twos, request):
             request.applymarker(mark)
         super().test_divmod_series_array(data, data_for_twos)
 
-    def test_combine_le(self, data_repeated):
-        # TODO: patching self is a bad pattern here
-        orig_data1, orig_data2 = data_repeated(2)
-        if orig_data1.dtype.kind == "b":
-            self._combine_le_expected_dtype = "boolean"
-        else:
-            # TODO: can we make this boolean?
-            self._combine_le_expected_dtype = object
-        super().test_combine_le(data_repeated)
-
     def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
         if op_name in ["any", "all"] and ser.dtype.kind != "b":
             pytest.skip(reason="Tested in tests/reductions/test_reductions.py")
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 96c014f549056..d8203c2e2e350 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -101,6 +101,14 @@ def data_for_grouping(dtype, chunked):
 
 
 class TestStringArray(base.ExtensionTests):
+    def test_combine_le(self, data_repeated):
+        dtype = next(iter(data_repeated(2))).dtype
+        if dtype.storage == "pyarrow" and dtype.na_value is pd.NA:
+            self._combine_le_expected_dtype = "bool[pyarrow]"
+        else:
+            self._combine_le_expected_dtype = "bool"
+        return super().test_combine_le(data_repeated)
+
     def test_eq_with_str(self, dtype):
         super().test_eq_with_str(dtype)
 
@@ -223,9 +231,7 @@ def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
 
     def test_combine_add(self, data_repeated, using_infer_string, request):
         dtype = next(data_repeated(1)).dtype
-        if using_infer_string and (
-            (dtype.na_value is pd.NA) and dtype.storage == "python"
-        ):
+        if not using_infer_string and dtype.storage == "python":
             mark = pytest.mark.xfail(
                 reason="The pointwise operation result will be inferred to "
                 "string[nan, pyarrow], which does not match the input dtype"
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index ae7a840937163..c968587c469d1 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -188,6 +188,8 @@ def f(x):
 
     agged = grouped.agg(f)
     expected = Series([4.0, 2.0], index=["bar", "foo"])
+    if values.dtype == np.float32:
+        expected = expected.astype(np.float32)
 
     tm.assert_series_equal(agged, expected)
 
@@ -546,6 +548,14 @@ def test_callable_result_dtype_frame(
     op = getattr(df.groupby(keys)[["c"]], method)
     result = op(lambda x: x.astype(result_dtype).iloc[0])
     expected_index = pd.RangeIndex(0, 1) if method == "transform" else agg_index
+
+    if method == "aggregate":
+        # _cast_pointwise_result retains the input's dtype where feasible
+        if input_dtype == "float32" and result_dtype == "float64":
+            result_dtype = "float32"
+        if input_dtype == "int32" and result_dtype == "int64":
+            result_dtype = "int32"
+
     expected = DataFrame({"c": [df["c"].iloc[0]]}, index=expected_index).astype(
         result_dtype
     )
@@ -1813,31 +1823,23 @@ def test_groupby_aggregation_func_list_multi_index_duplicate_columns():
 
 @td.skip_if_no("pyarrow")
 @pytest.mark.parametrize(
-    "input_dtype, output_dtype",
-    [
-        # With NumPy arrays, the results from the UDF would be e.g. np.float32 scalars
-        # which we can therefore preserve. However with PyArrow arrays, the results are
-        # Python scalars so we have no information about size or uint vs int.
-        ("float[pyarrow]", "double[pyarrow]"),
-        ("int64[pyarrow]", "int64[pyarrow]"),
-        ("uint64[pyarrow]", "int64[pyarrow]"),
-        ("bool[pyarrow]", "bool[pyarrow]"),
-    ],
+    "dtype",
+    ["float[pyarrow]", "int64[pyarrow]", "uint64[pyarrow]", "bool[pyarrow]"],
 )
-def test_agg_lambda_pyarrow_dtype_conversion(input_dtype, output_dtype):
+def test_agg_lambda_pyarrow_dtype_conversion(dtype):
     # GH#59601
     # Test PyArrow dtype conversion back to PyArrow dtype
     df = DataFrame(
         {
             "A": ["c1", "c2", "c3", "c1", "c2", "c3"],
-            "B": pd.array([100, 200, 255, 0, 199, 40392], dtype=input_dtype),
+            "B": pd.array([100, 200, 255, 0, 199, 40392], dtype=dtype),
         }
     )
     gb = df.groupby("A")
     result = gb.agg(lambda x: x.min())
 
     expected = DataFrame(
-        {"B": pd.array([0, 199, 255], dtype=output_dtype)},
+        {"B": pd.array([0, 199, 255], dtype=dtype)},
         index=Index(["c1", "c2", "c3"], name="A"),
     )
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py
index a4c18732ef258..be15bce8bb82f 100644
--- a/pandas/tests/indexes/test_any_index.py
+++ b/pandas/tests/indexes/test_any_index.py
@@ -9,6 +9,7 @@
 
 from pandas.errors import InvalidIndexError
 
+from pandas import StringDtype
 import pandas._testing as tm
 
 
@@ -36,8 +37,15 @@ def test_mutability(index):
 
 
 @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
-def test_map_identity_mapping(index, request):
+def test_map_identity_mapping(index, request, using_infer_string):
     # GH#12766
+    if (
+        not using_infer_string
+        and isinstance(index.dtype, StringDtype)
+        and index.dtype.storage == "python"
+    ):
+        mark = pytest.mark.xfail(reason="Does not preserve dtype")
+        request.applymarker(mark)
 
     result = index.map(lambda x: x)
     if index.dtype == object and (result.dtype == bool or result.dtype == "string"):
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 985ed880e7998..26eb33195ccbc 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -581,12 +581,19 @@ def test_map_dictlike_simple(self, mapper):
         ],
     )
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
-    def test_map_dictlike(self, index, mapper, request):
+    def test_map_dictlike(self, index, mapper, request, using_infer_string):
         # GH 12756
         if isinstance(index, CategoricalIndex):
             pytest.skip("Tested in test_categorical")
         elif not index.is_unique:
             pytest.skip("Cannot map duplicated index")
+        if (
+            not using_infer_string
+            and isinstance(index.dtype, pd.StringDtype)
+            and index.dtype.storage == "python"
+        ):
+            mark = pytest.mark.xfail(reason="map does not retain dtype")
+            request.applymarker(mark)
 
         rng = np.arange(len(index), 0, -1, dtype=np.int64)
 
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 042c8ae186804..262f032f20187 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -1246,12 +1246,7 @@ def test_resample_not_monotonic(unit):
         "int64",
         "int32",
         "float64",
-        pytest.param(
-            "float32",
-            marks=pytest.mark.xfail(
-                reason="Empty groups cause x.mean() to return float64"
-            ),
-        ),
+        "float32",
     ],
 )
 def test_resample_median_bug_1688(dtype, unit):
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index 286625b8ce470..f3c52a674cf66 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -264,8 +264,6 @@ def f_1(x):
         return x.resample("2s").apply(lambda y: y.sum())
 
     result = g.apply(f_1)
-    # y.sum() results in int64 instead of int32 on 32-bit architectures
-    expected = expected.astype("int64")
     tm.assert_frame_equal(result, expected)
 
 
@@ -295,7 +293,9 @@ def test_apply_columns_multilevel():
     # GH 16231
     cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
     ind = date_range(start="2017-01-01", freq="15Min", periods=8)
-    df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols)
+    df = DataFrame(
+        np.array([0] * 16, dtype=np.int64).reshape(8, 2), index=ind, columns=cols
+    )
     agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
     result = df.resample("h").apply(lambda x: agg_dict[x.name](x))
     expected = DataFrame(