diff --git a/pyproject.toml b/pyproject.toml
index 511a9e0d744..abac4a9d90a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,4 +24,5 @@ filterwarnings = [
 markers = [
     "unit: unit test",
     "integration: integration test",
+    "high_memory: tests requiring significant RAM (>=16GB)",
 ]
diff --git a/src/datasets/arrow_writer.py b/src/datasets/arrow_writer.py
index 3174f5cf206..606a007af90 100644
--- a/src/datasets/arrow_writer.py
+++ b/src/datasets/arrow_writer.py
@@ -524,7 +524,9 @@ def _build_schema(self, inferred_schema: pa.Schema):
 
     def _build_writer(self, inferred_schema: pa.Schema):
         self._schema, self._features = self._build_schema(inferred_schema)
-        self.pa_writer = pa.RecordBatchStreamWriter(self.stream, self._schema)
+        self.pa_writer = pa.RecordBatchStreamWriter(
+            self.stream, self._schema, options=pa.ipc.IpcWriteOptions(allow_64bit=True)
+        )
 
     @property
     def schema(self):
diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
index 88259767ae0..93215184d9f 100644
--- a/src/datasets/features/features.py
+++ b/src/datasets/features/features.py
@@ -1535,9 +1535,13 @@ def list_of_pa_arrays_to_pyarrow_listarray(l_arr: list[Optional[pa.Array]]) -> p
         [0] + [len(arr) for arr in l_arr], dtype=object
     )  # convert to dtype object to allow None insertion
     offsets = np.insert(offsets, null_indices, None)
-    offsets = pa.array(offsets, type=pa.int32())
     values = pa.concat_arrays(l_arr)
-    return pa.ListArray.from_arrays(offsets, values)
+    try:
+        offsets = pa.array(offsets, type=pa.int32())
+        return pa.ListArray.from_arrays(offsets, values)
+    except pa.lib.ArrowInvalid:
+        offsets = pa.array(offsets, type=pa.int64())
+        return pa.LargeListArray.from_arrays(offsets, values)
 
 
 def list_of_np_array_to_pyarrow_listarray(l_arr: list[np.ndarray], type: pa.DataType = None) -> pa.ListArray:
diff --git a/tests/test_arrow_dataset.py b/tests/test_arrow_dataset.py
index 8e76952d6ca..e40bce33296 100644
--- a/tests/test_arrow_dataset.py
+++ b/tests/test_arrow_dataset.py
@@ -4783,3 +4783,26 @@ def test_from_polars_save_to_disk_and_load_from_disk_round_trip_with_large_list(
 def test_polars_round_trip():
     ds = Dataset.from_dict({"x": [[1, 2], [3, 4, 5]], "y": ["a", "b"]})
     assert isinstance(Dataset.from_polars(ds.to_polars()), Dataset)
+
+
+@pytest.mark.high_memory
+def test_map_int32_overflow():
+    # GH: 7821
+    # This test requires ~4GB RAM to create a large array that triggers int32 overflow
+    # Marked as high_memory (>=16GB) to prevent CI failures
+    def process_batch(batch):
+        res = []
+        for _ in batch["id"]:
+            res.append(np.zeros((2**31)).astype(np.uint16))
+
+        return {"audio": res}
+
+    ds = Dataset.from_dict({"id": [0]})
+    mapped_ds = ds.map(
+        process_batch,
+        batched=True,
+        batch_size=1,
+        num_proc=0,
+        remove_columns=ds.column_names,
+    )
+    assert isinstance(mapped_ds, Dataset)