diff --git a/ci/scripts/integration_arrow_build.sh b/ci/scripts/integration_arrow_build.sh
index 47ef34d9a47..1c7e65cf27f 100755
--- a/ci/scripts/integration_arrow_build.sh
+++ b/ci/scripts/integration_arrow_build.sh
@@ -60,7 +60,7 @@ if [ "${ARCHERY_INTEGRATION_WITH_JAVA}" -gt "0" ]; then
     export JAVA_JNI_CMAKE_ARGS="-DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF -DARROW_JAVA_JNI_ENABLE_C=ON"
 
     ${arrow_dir}/java/ci/scripts/jni_build.sh "${arrow_dir}/java" "${ARROW_HOME}" "${build_dir}/java/" /tmp/dist/java
-    ${arrow_dir}/java/ci/scripts/java_build.sh "${arrow_dir}/java" "${build_dir}/java" /tmp/dist/java
+    ${arrow_dir}/java/ci/scripts/build.sh "${arrow_dir}/java" "${build_dir}/java" /tmp/dist/java
 fi
 github_actions_group_end
 
diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
index c9420103968..edaf28cd92a 100644
--- a/cpp/src/parquet/properties.h
+++ b/cpp/src/parquet/properties.h
@@ -271,6 +271,7 @@ class PARQUET_EXPORT WriterProperties {
           created_by_(properties.created_by()),
           store_decimal_as_integer_(properties.store_decimal_as_integer()),
           page_checksum_enabled_(properties.page_checksum_enabled()),
+          size_statistics_level_(properties.size_statistics_level()),
           sorting_columns_(properties.sorting_columns()),
           default_column_properties_(properties.default_column_properties()) {}
 
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 9f04d33f83c..c43df2b6f25 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1430,6 +1430,12 @@ tasks:
         # ensure we have at least one build with parquet encryption disabled
         PARQUET_REQUIRE_ENCRYPTION: "OFF"
       {% endif %}
+      {% if pandas_version == "nightly" %}
+        # TODO can be removed once this is enabled by default in pandas >= 3
+        # This is to enable the Pandas feature.
+        # See: https://github.com/pandas-dev/pandas/pull/58459
+        PANDAS_FUTURE_INFER_STRING: "1"
+      {% endif %}
     {% if not cache_leaf %}
       # use the latest pandas release, so prevent reusing any cached layers
       flags: --no-leaf-cache
diff --git a/docker-compose.yml b/docker-compose.yml
index bd912095633..b70d924da13 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1326,6 +1326,7 @@ services:
       PYTEST_ARGS:  # inherit
       HYPOTHESIS_PROFILE:  # inherit
       PYARROW_TEST_HYPOTHESIS:  # inherit
+      PANDAS_FUTURE_INFER_STRING:  # inherit
     volumes: *conda-volumes
     command: *python-conda-command
 
diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst
index 52f4a751dcc..605a1adbe10 100644
--- a/docs/source/developers/release.rst
+++ b/docs/source/developers/release.rst
@@ -24,7 +24,10 @@ Release Management Guide
 This page provides detailed information on the steps followed to perform
 a release. It can be used both as a guide to learn the Apache Arrow release
 process and as a comprehensive checklist for the Release Manager when
-performing a release.
+performing a release. The person acting as Release Manager must at least have
+committer status in order to perform the tasks below. If the Release Manager is
+a committer but not a member of the PMC, some tasks will need to be delegated
+to a PMC member and these are marked below accordingly.
 
 Principles
 ==========
@@ -36,8 +39,15 @@ Preparing for the release
 =========================
 
 Before creating a source release, the Release Manager must ensure that any
-resolved JIRAs have the appropriate Fix Version set so that the changelog is
-generated properly.
+resolved GitHub issues have the appropriate milestone set so that the changelog
+is generated properly.
+
+Note that pull requests without a corresponding GitHub issue won't be detected
+by the cherry-pick script and must be cherry-picked manually by the release
+manager onto the maintenance branch. Examples include MINOR and Dependabot pull
+requests. For this reason, it's encouraged to avoid the need for manual
+cherry-picking by creating issues for any pull requests that are merged to the
+default branch after the release maintenance branch has been created.
 
 .. dropdown:: Requirements
    :animate: fade-in-slide-down
@@ -67,7 +77,8 @@ generated properly.
 Before creating a Release Candidate
 ===================================
 
-Ensure local tags are removed, gpg-agent is set and JIRA tickets are correctly assigned.
+Ensure local tags are removed, gpg-agent is set and GitHub issues are correctly
+assigned.
 
 .. code-block::
 
@@ -78,7 +89,8 @@ Ensure local tags are removed, gpg-agent is set and JIRA tickets are correctly a
     source dev/release/setup-gpg-agent.sh
 
     # Curate the release
-    # The end of the generated report shows the JIRA tickets with wrong version number assigned.
+    # The end of the generated report shows any GitHub issues with the wrong
+    # version number assigned.
     archery release curate <version>
 
 Ensure a major version milestone for a follow up release is created on GitHub. This will
@@ -149,7 +161,7 @@ Create or update the corresponding maintenance branch
             # This will create a branch locally called maint-X.Y.Z.
             # X.Y.Z corresponds with the Major, Minor and Patch version number
             # of the release respectively. As an example 9.0.0
-            archery release --jira-cache /tmp/jiracache cherry-pick X.Y.Z --execute
+            archery release cherry-pick X.Y.Z --execute
             # Push the maintenance branch to the remote repository
             git push -u apache maint-X.Y.Z
 
@@ -158,14 +170,30 @@ Create or update the corresponding maintenance branch
       .. code-block::
 
             # First run in dry-mode to see which commits will be cherry-picked.
-            # If there are commits that we don't want to get applied ensure the version on
-            # JIRA is set to the following release.
-            archery release --jira-cache /tmp/jiracache cherry-pick X.Y.Z --continue
+            # If there are commits that we don't want to get applied, ensure the
+            # milestone on GitHub is set to the following release.
+            archery release cherry-pick X.Y.Z --continue
             # Update the maintenance branch with the previous commits
-            archery release --jira-cache /tmp/jiracache cherry-pick X.Y.Z --continue --execute
+            archery release cherry-pick X.Y.Z --continue --execute
             # Push the updated maintenance branch to the remote repository
             git push -u apache maint-X.Y.Z
 
+Optional: Test Before Creating a Release Candidate
+--------------------------------------------------
+
+Some release managers prefer to perform testing before creating the first
+release candidate to avoid the need to create multiple release candidates within
+a given release.
+
+To test before creating a release candiate:
+
+* Create a pull request from the up-to-date maint-X.Y.Z branch onto main
+* Title the pull request "WIP: Dummy PR to check maint-X.Y.Z status"
+* Comment on the pull request to trigger the relevant Crossbow jobs:
+
+  * ``@github-actions crossbow submit --group verify-rc-source``
+  * ``@github-actions crossbow submit --group packaging``
+
 Create the Release Candidate branch from the updated maintenance branch
 -----------------------------------------------------------------------
 
@@ -178,12 +206,12 @@ Create the Release Candidate branch from the updated maintenance branch
     # place the necessary commits updating the version number and then create a git tag
     # on OSX use gnu-sed with homebrew: brew install gnu-sed (and export to $PATH)
     #
-    # <rc-number> starts at 0 and increments every time the Release Candidate is burned
+    # <rc-number> starts at 0 and increments every time the Release Candidate is created
     # so for the first RC this would be: dev/release/01-prepare.sh 4.0.0 5.0.0 0
     dev/release/01-prepare.sh <version> <next-version> <rc-number>
 
     # Push the release candidate tag
-    git push -u apache apache-arrow-<version>rc<rc-number>
+    git push -u apache apache-arrow-<version>-rc<rc-number>
     # Push the release candidate branch in order to trigger verification jobs later
     git push -u apache release-<version>-rc<rc-number>
 
@@ -194,6 +222,7 @@ Build source and binaries and submit them
 
     # Build the source release tarball and create Pull Request with verification tasks
     #
+    # NOTE: This must be run by a PMC member
     # NOTE: You need to have GitHub CLI installed to run this script.
     dev/release/02-source.sh <version> <rc-number>
 
@@ -209,13 +238,16 @@ Build source and binaries and submit them
 
     # Sign and upload the binaries
     #
+    # NOTE: This must be run by a PMC member
+    #
     # On macOS the only way I could get this to work was running "echo "UPDATESTARTUPTTY" | gpg-connect-agent" before running this comment
     # otherwise I got errors referencing "ioctl" errors.
     dev/release/05-binary-upload.sh <version> <rc-number>
 
     # Sign and upload MATLAB artifacts to the GitHub Releases area.
     #
-    # Note that you need to have GitHub CLI installed to run this script.
+    # NOTE: This must be run by a PMC member
+    # NOTE: You need to have GitHub CLI installed to run this script.
     dev/release/06-matlab-upload.sh <version> <rc-number>
 
     # Start verifications for binaries and wheels
@@ -246,8 +278,6 @@ After the release vote, we must undertake many tasks to update source artifacts,
 Be sure to go through on the following checklist:
 
 #. Update the released milestone Date and set to "Closed" on GitHub
-#. Make the CPP PARQUET related version as "RELEASED" on JIRA
-#. Start the new version on JIRA for the related CPP PARQUET version
 #. Merge changes on release branch to maintenance branch for patch releases
 #. Add the new release to the Apache Reporter System
 #. Push release tag
@@ -266,7 +296,6 @@ Be sure to go through on the following checklist:
 #. Update vcpkg port
 #. Update Conan recipe
 #. Bump versions
-#. Update tags for Go modules
 #. Update docs
 #. Update version in Apache Arrow Cookbook
 #. Announce the new release
@@ -274,28 +303,6 @@ Be sure to go through on the following checklist:
 #. Announce the release on Twitter
 #. Remove old artifacts
 
-.. dropdown:: Mark the released version as "RELEASED" on JIRA
-   :animate: fade-in-slide-down
-   :class-title: sd-fs-5
-   :class-container: sd-shadow-md
-
-   - Open https://issues.apache.org/jira/plugins/servlet/project-config/ARROW/administer-versions
-   - Click "..." for the release version in "Actions" column
-   - Select "Release"
-   - Set "Release date"
-   - Click "Release" button
-
-.. dropdown:: Start the new version on JIRA
-   :animate: fade-in-slide-down
-   :class-title: sd-fs-5
-   :class-container: sd-shadow-md
-
-   - Open https://issues.apache.org/jira/plugins/servlet/project-config/ARROW/administer-versions
-   - Click "..." for the next version in "Actions" column
-   - Select "Edit"
-   - Set "Start date"
-   - Click "Save" button
-
 .. dropdown:: Merge changes on release branch to maintenance branch for patch releases
    :animate: fade-in-slide-down
    :class-title: sd-fs-5
@@ -588,7 +595,7 @@ Be sure to go through on the following checklist:
    :class-title: sd-fs-5
    :class-container: sd-shadow-md
 
-   Open a pull request to vcpkg:
+   Open a pull request to Conan:
 
    .. code-block:: Bash
 
@@ -604,8 +611,8 @@ Be sure to go through on the following checklist:
       git remote add upstream https://github.com/conan-io/conan-center-index.git
       cd -
 
-      # dev/release/post-17-conan.sh 10.0.1 ../conan-center-index
-      dev/release/post-17-conan.sh X.Y.Z <YOUR_CONAN_CENTER_INDEX_FORK>
+      # dev/release/post-16-conan.sh 10.0.1 ../conan-center-index
+      dev/release/post-16-conan.sh X.Y.Z <YOUR_CONAN_CENTER_INDEX_FORK>
 
    This script pushes a ``arrow-X.Y.Z`` branch to your ``conan-io/conan-center-index`` fork. You need to create a pull request from the ``arrow-X.Y.Z`` branch on your Web browser.
 
@@ -627,7 +634,8 @@ Be sure to go through on the following checklist:
    :class-title: sd-fs-5
    :class-container: sd-shadow-md
 
-   The documentations are generated in the release process. We just need to upload the generated documentations:
+   Documentation is generated as part of the release process. We just need to
+   upload the generated documentation:
 
    .. code-block:: Bash
 
@@ -650,7 +658,8 @@ Be sure to go through on the following checklist:
    :class-title: sd-fs-5
    :class-container: sd-shadow-md
 
-   TODO
+   Follow `the documentation <https://github.com/apache/arrow-cookbook/tree/main/dev/release>`_
+   in the Apache Arrow Cookbook repository
 
 .. dropdown:: Announce the new release
    :animate: fade-in-slide-down
@@ -666,16 +675,38 @@ Be sure to go through on the following checklist:
    :class-title: sd-fs-5
    :class-container: sd-shadow-md
 
-   TODO
+   The blog post process isn't automated. The rough set of steps we usually take
+   are:
 
-.. dropdown:: Announce the release on Twitter
+   * Clone https://github.com/apache/arrow-site.
+   * Create a new branch off ``main`` for the blog post pull request we're
+     creating.
+   * Duplicate a recent blog post entry in the ``_posts`` subfolder and update
+     the filename and YAML metadata.
+
+     * Set the date in the filename and in the YAML metadata to the date that the
+       release candidate vote thread for the release closed (in GMT).
+
+   * *For minor releases only*, remove any section about community updates (new
+     committers, PMC members, etc).
+   * Update the remainder of the text as needed
+   * Create the pull request
+   * In the pull request, ping contributors in each section requesting help
+     filling in the details for each section.
+
+
+.. dropdown:: Announce the release on social media
    :animate: fade-in-slide-down
    :class-title: sd-fs-5
    :class-container: sd-shadow-md
 
-   Post the release blog post on Twitter from the `@ApacheArrow <https://twitter.com/ApacheArrow>`_ handle.
+   Post about the release and link to the blog post on social media. The project
+   has two official accounts:
+
+   * Twitter/X: `@ApacheArrow <https://twitter.com/ApacheArrow>`_
+   * LinkedIn: https://www.linkedin.com/company/apache-arrow/
 
-   PMC members have access or can request access, after which they can post via `TweetDeck <https://tweetdeck.twitter.com>`_.
+   PMC members have access or can request access to post under these accounts.
 
 .. dropdown:: Remove old artifacts
    :animate: fade-in-slide-down
@@ -687,3 +718,5 @@ Be sure to go through on the following checklist:
    .. code-block:: Bash
 
       dev/release/post-09-remove-old-artifacts.sh
+
+   Note: This step must be done by a PMC member.
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index f86caf1433d..2ef42051d9a 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -117,6 +117,8 @@ def _handle_arrow_array_protocol(obj, type, mask, size):
                         "return a pyarrow Array or ChunkedArray.")
     if isinstance(res, ChunkedArray) and res.num_chunks==1:
         res = res.chunk(0)
+    if type is not None and res.type != type:
+        res = res.cast(type)
     return res
 
 
diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index 74f0d981b52..5be6f03f86e 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -38,7 +38,7 @@ cdef class _PandasAPIShim(object):
         object _array_like_types, _is_extension_array_dtype, _lock
         bint has_sparse
         bint _pd024
-        bint _is_v1, _is_ge_v21, _is_ge_v3
+        bint _is_v1, _is_ge_v21, _is_ge_v3, _is_ge_v3_strict
 
     def __init__(self):
         self._lock = Lock()
@@ -80,6 +80,7 @@ cdef class _PandasAPIShim(object):
         self._is_v1 = self._loose_version < Version('2.0.0')
         self._is_ge_v21 = self._loose_version >= Version('2.1.0')
         self._is_ge_v3 = self._loose_version >= Version('3.0.0.dev0')
+        self._is_ge_v3_strict = self._loose_version >= Version('3.0.0')
 
         self._compat_module = pdcompat
         self._data_frame = pd.DataFrame
@@ -174,6 +175,20 @@ cdef class _PandasAPIShim(object):
         self._check_import()
         return self._is_ge_v3
 
+    def is_ge_v3_strict(self):
+        self._check_import()
+        return self._is_ge_v3_strict
+
+    def uses_string_dtype(self):
+        if self.is_ge_v3_strict():
+            return True
+        try:
+            if self.pd.options.future.infer_string:
+                return True
+        except:
+            pass
+        return False
+
     @property
     def categorical_type(self):
         self._check_import()
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index d0582f825b5..e9655914ad7 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -174,7 +174,11 @@ def get_column_metadata(column, name, arrow_type, field_name):
         }
         string_dtype = 'object'
 
-    if name is not None and not isinstance(name, str):
+    if (
+        name is not None
+        and not (isinstance(name, float) and np.isnan(name))
+        and not isinstance(name, str)
+    ):
         raise TypeError(
             'Column name must be a string. Got column {} of type {}'.format(
                 name, type(name).__name__
@@ -340,8 +344,8 @@ def _column_name_to_strings(name):
         return str(tuple(map(_column_name_to_strings, name)))
     elif isinstance(name, Sequence):
         raise TypeError("Unsupported type for MultiIndex level")
-    elif name is None:
-        return None
+    elif name is None or (isinstance(name, float) and np.isnan(name)):
+        return name
     return str(name)
 
 
@@ -790,10 +794,12 @@ def table_to_dataframe(
         table, index = _reconstruct_index(table, index_descriptors,
                                           all_columns, types_mapper)
         ext_columns_dtypes = _get_extension_dtypes(
-            table, all_columns, types_mapper)
+            table, all_columns, types_mapper, options, categories)
     else:
         index = _pandas_api.pd.RangeIndex(table.num_rows)
-        ext_columns_dtypes = _get_extension_dtypes(table, [], types_mapper)
+        ext_columns_dtypes = _get_extension_dtypes(
+            table, [], types_mapper, options, categories
+        )
 
     _check_data_column_metadata_consistency(all_columns)
     columns = _deserialize_column_index(table, all_columns, column_indexes)
@@ -838,7 +844,7 @@ def table_to_dataframe(
 }
 
 
-def _get_extension_dtypes(table, columns_metadata, types_mapper=None):
+def _get_extension_dtypes(table, columns_metadata, types_mapper, options, categories):
     """
     Based on the stored column pandas metadata and the extension types
     in the arrow schema, infer which columns should be converted to a
@@ -851,6 +857,9 @@ def _get_extension_dtypes(table, columns_metadata, types_mapper=None):
     and then we can check if this dtype supports conversion from arrow.
 
     """
+    strings_to_categorical = options["strings_to_categorical"]
+    categories = categories or []
+
     ext_columns = {}
 
     # older pandas version that does not yet support extension dtypes
@@ -889,9 +898,32 @@ def _get_extension_dtypes(table, columns_metadata, types_mapper=None):
             # that are certainly numpy dtypes
             pandas_dtype = _pandas_api.pandas_dtype(dtype)
             if isinstance(pandas_dtype, _pandas_api.extension_dtype):
+                if isinstance(pandas_dtype, _pandas_api.pd.StringDtype):
+                    # when the metadata indicate to use the string dtype,
+                    # ignore this in case:
+                    # - it is specified to convert strings / this column to categorical
+                    # - the column itself is dictionary encoded and would otherwise be
+                    #   converted to categorical
+                    if strings_to_categorical or name in categories:
+                        continue
+                    try:
+                        if pa.types.is_dictionary(table.schema.field(name).type):
+                            continue
+                    except KeyError:
+                        pass
                 if hasattr(pandas_dtype, "__from_arrow__"):
                     ext_columns[name] = pandas_dtype
 
+    # for pandas 3.0+, use pandas' new default string dtype
+    if _pandas_api.uses_string_dtype() and not strings_to_categorical:
+        for field in table.schema:
+            if field.name not in ext_columns and (
+                pa.types.is_string(field.type)
+                or pa.types.is_large_string(field.type)
+                or pa.types.is_string_view(field.type)
+            ) and field.name not in categories:
+                ext_columns[field.name] = _pandas_api.pd.StringDtype(na_value=np.nan)
+
     return ext_columns
 
 
@@ -1049,9 +1081,9 @@ def get_pandas_logical_type_map():
             'date': 'datetime64[D]',
             'datetime': 'datetime64[ns]',
             'datetimetz': 'datetime64[ns]',
-            'unicode': np.str_,
+            'unicode': 'str',
             'bytes': np.bytes_,
-            'string': np.str_,
+            'string': 'str',
             'integer': np.int64,
             'floating': np.float64,
             'decimal': np.object_,
@@ -1142,6 +1174,20 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
         # GH-41503: if the column index was decimal, restore to decimal
         elif pandas_dtype == "decimal":
             level = _pandas_api.pd.Index([decimal.Decimal(i) for i in level])
+        elif (
+            level.dtype == "str" and numpy_dtype == "object"
+            and ("mixed" in pandas_dtype or pandas_dtype in ["unicode", "string"])
+        ):
+            # the metadata indicate that the original dataframe used object dtype,
+            # but ignore this and keep string dtype if:
+            # - the original columns used mixed types -> we don't attempt to faithfully
+            #   roundtrip in this case, but keep the column names as strings
+            # - the original columns were inferred to be strings but stored in object
+            #   dtype -> we don't restore the object dtype because all metadata
+            #   generated using pandas < 3 will have this case by default, and
+            #   for pandas >= 3 we want to use the default string dtype for .columns
+            new_levels.append(level)
+            continue
         elif level.dtype != dtype:
             level = level.astype(dtype)
         # ARROW-9096: if original DataFrame was upcast we keep that
diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index 10c4d0e1600..a0f1d5bbbed 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -2523,7 +2523,8 @@ Status ConvertCategoricals(const PandasOptions& options, ChunkedArrayVector* arr
   }
   if (options.strings_to_categorical) {
     for (int i = 0; i < static_cast<int>(arrays->size()); i++) {
-      if (is_base_binary_like((*arrays)[i]->type()->id())) {
+      if (is_base_binary_like((*arrays)[i]->type()->id()) ||
+          is_binary_view_like((*arrays)[i]->type()->id())) {
         columns_to_encode.push_back(i);
       }
     }
@@ -2557,7 +2558,8 @@ Status ConvertChunkedArrayToPandas(const PandasOptions& options,
     py_ref = nullptr;
   }
 
-  if (options.strings_to_categorical && is_base_binary_like(arr->type()->id())) {
+  if (options.strings_to_categorical && (is_base_binary_like(arr->type()->id()) ||
+                                         is_binary_view_like(arr->type()->id()))) {
     if (options.zero_copy_only) {
       return Status::Invalid("Need to dictionary encode a column, but ",
                              "only zero-copy conversions allowed");
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index e6fcd6149ee..6f28205a18e 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1020,7 +1020,7 @@ def test_replace_slice():
     offsets = range(-3, 4)
 
     arr = pa.array([None, '', 'a', 'ab', 'abc', 'abcd', 'abcde'])
-    series = arr.to_pandas()
+    series = arr.to_pandas().astype(object).replace({np.nan: None})
     for start in offsets:
         for stop in offsets:
             expected = series.str.slice_replace(start, stop, 'XX')
@@ -1031,7 +1031,7 @@ def test_replace_slice():
             assert pc.binary_replace_slice(arr, start, stop, 'XX') == actual
 
     arr = pa.array([None, '', 'π', 'πb', 'πbθ', 'πbθd', 'πbθde'])
-    series = arr.to_pandas()
+    series = arr.to_pandas().astype(object).replace({np.nan: None})
     for start in offsets:
         for stop in offsets:
             expected = series.str.slice_replace(start, stop, 'XX')
@@ -2132,7 +2132,8 @@ def test_strftime():
             for fmt in formats:
                 options = pc.StrftimeOptions(fmt)
                 result = pc.strftime(tsa, options=options)
-                expected = pa.array(ts.strftime(fmt))
+                # cast to the same type as result to ignore string vs large_string
+                expected = pa.array(ts.strftime(fmt)).cast(result.type)
                 assert result.equals(expected)
 
         fmt = "%Y-%m-%dT%H:%M:%S"
@@ -2140,34 +2141,34 @@ def test_strftime():
         # Default format
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         result = pc.strftime(tsa, options=pc.StrftimeOptions())
-        expected = pa.array(ts.strftime(fmt))
+        expected = pa.array(ts.strftime(fmt)).cast(result.type)
         assert result.equals(expected)
 
         # Default format plus timezone
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
-        expected = pa.array(ts.strftime(fmt + "%Z"))
+        expected = pa.array(ts.strftime(fmt + "%Z")).cast(result.type)
         assert result.equals(expected)
 
         # Pandas %S is equivalent to %S in arrow for unit="s"
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         options = pc.StrftimeOptions("%S")
         result = pc.strftime(tsa, options=options)
-        expected = pa.array(ts.strftime("%S"))
+        expected = pa.array(ts.strftime("%S")).cast(result.type)
         assert result.equals(expected)
 
         # Pandas %S.%f is equivalent to %S in arrow for unit="us"
         tsa = pa.array(ts, type=pa.timestamp("us", timezone))
         options = pc.StrftimeOptions("%S")
         result = pc.strftime(tsa, options=options)
-        expected = pa.array(ts.strftime("%S.%f"))
+        expected = pa.array(ts.strftime("%S.%f")).cast(result.type)
         assert result.equals(expected)
 
         # Test setting locale
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         options = pc.StrftimeOptions(fmt, locale="C")
         result = pc.strftime(tsa, options=options)
-        expected = pa.array(ts.strftime(fmt))
+        expected = pa.array(ts.strftime(fmt)).cast(result.type)
         assert result.equals(expected)
 
     # Test timestamps without timezone
@@ -2175,7 +2176,7 @@ def test_strftime():
     ts = pd.to_datetime(times)
     tsa = pa.array(ts, type=pa.timestamp("s"))
     result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
-    expected = pa.array(ts.strftime(fmt))
+    expected = pa.array(ts.strftime(fmt)).cast(result.type)
 
     # Positional format
     assert pc.strftime(tsa, fmt) == result
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 18c8cd5b654..249fb621279 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -426,7 +426,11 @@ def test_empty_strings(version):
 @pytest.mark.pandas
 def test_all_none(version):
     df = pd.DataFrame({'all_none': [None] * 10})
-    _check_pandas_roundtrip(df, version=version)
+    if version == 1 and pa.pandas_compat._pandas_api.uses_string_dtype():
+        expected = df.astype("str")
+    else:
+        expected = df
+    _check_pandas_roundtrip(df, version=version, expected=expected)
 
 
 @pytest.mark.pandas
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 1186f87b032..f356874c576 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -349,6 +349,17 @@ def test_integer_index_column(self):
         df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')])
         _check_pandas_roundtrip(df, preserve_index=True)
 
+    def test_float_column_index_with_missing(self):
+        df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')], columns=[1.5, np.nan])
+        _check_pandas_roundtrip(df, preserve_index=True)
+
+    @pytest.mark.filterwarnings(
+        "ignore:The DataFrame has column names of mixed type:UserWarning"
+    )
+    def test_string_column_index_with_missing(self):
+        df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')], columns=["A", None])
+        _check_pandas_roundtrip(df, preserve_index=True)
+
     def test_index_metadata_field_name(self):
         # test None case, and strangely named non-index columns
         df = pd.DataFrame(
@@ -359,8 +370,11 @@ def test_index_metadata_field_name(self):
             ),
             columns=['a', None, '__index_level_0__'],
         )
-        with pytest.warns(UserWarning):
+        if _pandas_api.uses_string_dtype():
             t = pa.Table.from_pandas(df, preserve_index=True)
+        else:
+            with pytest.warns(UserWarning):
+                t = pa.Table.from_pandas(df, preserve_index=True)
         js = t.schema.pandas_metadata
 
         col1, col2, col3, idx0, foo = js['columns']
@@ -368,8 +382,12 @@ def test_index_metadata_field_name(self):
         assert col1['name'] == 'a'
         assert col1['name'] == col1['field_name']
 
-        assert col2['name'] is None
-        assert col2['field_name'] == 'None'
+        if _pandas_api.uses_string_dtype():
+            assert np.isnan(col2['name'])
+            assert col2['field_name'] == 'nan'
+        else:
+            assert col2['name'] is None
+            assert col2['field_name'] == 'None'
 
         assert col3['name'] == '__index_level_0__'
         assert col3['name'] == col3['field_name']
@@ -411,7 +429,9 @@ def test_string_column_index(self):
         column_indexes, = js['column_indexes']
         assert column_indexes['name'] == 'stringz'
         assert column_indexes['name'] == column_indexes['field_name']
-        assert column_indexes['numpy_type'] == 'object'
+        assert column_indexes['numpy_type'] == (
+            'str' if _pandas_api.uses_string_dtype() else 'object'
+        )
         assert column_indexes['pandas_type'] == 'unicode'
 
         md = column_indexes['metadata']
@@ -1680,7 +1700,10 @@ def test_pandas_unicode(self):
         repeats = 1000
         values = ['foo', None, 'bar', 'mañana', np.nan]
         df = pd.DataFrame({'strings': values * repeats})
-        field = pa.field('strings', pa.string())
+        field = pa.field(
+            'strings',
+            pa.large_string() if _pandas_api.uses_string_dtype() else pa.string()
+        )
         schema = pa.schema([field])
         ex_values = ['foo', None, 'bar', 'mañana', None]
         expected = pd.DataFrame({'strings': ex_values * repeats})
@@ -1836,10 +1859,13 @@ def test_to_pandas_categories_already_dictionary(self):
         result = table.to_pandas(categories=['col'])
         assert table.to_pandas().equals(result)
 
-    def test_table_str_to_categorical_without_na(self):
+    @pytest.mark.parametrize(
+        "string_type", [pa.string(), pa.large_string(), pa.string_view()]
+    )
+    def test_table_str_to_categorical_without_na(self, string_type):
         values = ['a', 'a', 'b', 'b', 'c']
         df = pd.DataFrame({'strings': values})
-        field = pa.field('strings', pa.string())
+        field = pa.field('strings', string_type)
         schema = pa.schema([field])
         table = pa.Table.from_pandas(df, schema=schema)
 
@@ -1851,10 +1877,22 @@ def test_table_str_to_categorical_without_na(self):
             table.to_pandas(strings_to_categorical=True,
                             zero_copy_only=True)
 
-    def test_table_str_to_categorical_with_na(self):
+        # chunked array
+        result = table["strings"].to_pandas(strings_to_categorical=True)
+        expected = pd.Series(pd.Categorical(values), name="strings")
+        tm.assert_series_equal(result, expected)
+
+        with pytest.raises(pa.ArrowInvalid):
+            table["strings"].to_pandas(strings_to_categorical=True,
+                                       zero_copy_only=True)
+
+    @pytest.mark.parametrize(
+        "string_type", [pa.string(), pa.large_string(), pa.string_view()]
+    )
+    def test_table_str_to_categorical_with_na(self, string_type):
         values = [None, 'a', 'b', np.nan]
         df = pd.DataFrame({'strings': values})
-        field = pa.field('strings', pa.string())
+        field = pa.field('strings', string_type)
         schema = pa.schema([field])
         table = pa.Table.from_pandas(df, schema=schema)
 
@@ -1866,6 +1904,15 @@ def test_table_str_to_categorical_with_na(self):
             table.to_pandas(strings_to_categorical=True,
                             zero_copy_only=True)
 
+        # chunked array
+        result = table["strings"].to_pandas(strings_to_categorical=True)
+        expected = pd.Series(pd.Categorical(values), name="strings")
+        tm.assert_series_equal(result, expected)
+
+        with pytest.raises(pa.ArrowInvalid):
+            table["strings"].to_pandas(strings_to_categorical=True,
+                                       zero_copy_only=True)
+
     # Regression test for ARROW-2101
     def test_array_of_bytes_to_strings(self):
         converted = pa.array(np.array([b'x'], dtype=object), pa.string())
@@ -3299,6 +3346,10 @@ def _assert_nunique(obj, expected):
 
 
 def test_to_pandas_deduplicate_strings_array_types():
+    if _pandas_api.uses_string_dtype():
+        pytest.skip(
+            "pandas uses string dtype and not object dtype, keyword has no effect"
+        )
     nunique = 100
     repeats = 10
     values = _generate_dedup_example(nunique, repeats)
@@ -3311,6 +3362,10 @@ def test_to_pandas_deduplicate_strings_array_types():
 
 
 def test_to_pandas_deduplicate_strings_table_types():
+    if _pandas_api.uses_string_dtype():
+        pytest.skip(
+            "pandas uses string dtype and not object dtype, keyword has no effect"
+        )
     nunique = 100
     repeats = 10
     values = _generate_dedup_example(nunique, repeats)
@@ -3774,20 +3829,26 @@ def _check_to_pandas_memory_unchanged(obj, **kwargs):
     x = obj.to_pandas(**kwargs)  # noqa
 
     # Memory allocation unchanged -- either zero copy or self-destructing
-    assert pa.total_allocated_bytes() == prior_allocation
+    if _pandas_api.uses_string_dtype():
+        # for the string array of the columns Index
+        # -> increase the size to account for overallocation for small arrays
+        max_index_allocation = max(192, x.columns.nbytes * 2)
+        assert pa.total_allocated_bytes() <= (prior_allocation + max_index_allocation)
+    else:
+        assert pa.total_allocated_bytes() == prior_allocation
 
 
 def test_to_pandas_split_blocks():
     # ARROW-3789
     t = pa.table([
-        pa.array([1, 2, 3, 4, 5], type='i1'),
-        pa.array([1, 2, 3, 4, 5], type='i4'),
-        pa.array([1, 2, 3, 4, 5], type='i8'),
-        pa.array([1, 2, 3, 4, 5], type='f4'),
-        pa.array([1, 2, 3, 4, 5], type='f8'),
-        pa.array([1, 2, 3, 4, 5], type='f8'),
-        pa.array([1, 2, 3, 4, 5], type='f8'),
-        pa.array([1, 2, 3, 4, 5], type='f8'),
+        pa.array([1, 2, 3, 4, 5]*100, type='i1'),
+        pa.array([1, 2, 3, 4, 5]*100, type='i4'),
+        pa.array([1, 2, 3, 4, 5]*100, type='i8'),
+        pa.array([1, 2, 3, 4, 5]*100, type='f4'),
+        pa.array([1, 2, 3, 4, 5]*100, type='f8'),
+        pa.array([1, 2, 3, 4, 5]*100, type='f8'),
+        pa.array([1, 2, 3, 4, 5]*100, type='f8'),
+        pa.array([1, 2, 3, 4, 5]*100, type='f8'),
     ], ['f{}'.format(i) for i in range(8)])
 
     _check_blocks_created(t, 8)
@@ -3832,7 +3893,12 @@ def test_table_uses_memory_pool():
     prior_allocation = pa.total_allocated_bytes()
     x = t.to_pandas()
 
-    assert pa.total_allocated_bytes() == (prior_allocation + 3 * N * 8)
+    new_allocation = 3 * N * 8
+    if _pandas_api.uses_string_dtype():
+        # for the small columns Index
+        new_allocation += 128
+
+    assert pa.total_allocated_bytes() == (prior_allocation + new_allocation)
 
     # Check successful garbage collection
     x = None  # noqa
@@ -4110,7 +4176,10 @@ def test_dictionary_encoded_nested_to_pandas():
 
 def test_dictionary_from_pandas():
     cat = pd.Categorical(['a', 'b', 'a'])
-    expected_type = pa.dictionary(pa.int8(), pa.string())
+    expected_type = pa.dictionary(
+        pa.int8(),
+        pa.large_string() if _pandas_api.uses_string_dtype() else pa.string()
+    )
 
     result = pa.array(cat)
     assert result.to_pylist() == ['a', 'b', 'a']