@@ -247,7 +247,11 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
247
247
.. versionadded:: 1.4.0
248
248
249
249
The 'pyarrow' engine was added as an *experimental* engine, and some features
250
- are unsupported, or may not work correctly, with this engine.
250
+ are unsupported, or may not work correctly, with this engine. For example,
251
+ the newlines_in_values in the ParseOptions of the pyarrow allows handling the
252
+ newline characters within values when parsing csv files. However, this is not
253
+ currently supported by Pandas. In this case, the 'csv' module in the pyarrow
254
+ should be used instead. For more information, refer to the example.
251
255
converters : dict of {{Hashable : Callable}}, optional
252
256
Functions for converting values in specified columns. Keys can either
253
257
be column labels or column indices.
@@ -545,12 +549,25 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
545
549
... parse_dates=[1, 2],
546
550
... date_format={{'col 2': '%d/%m/%Y', 'col 3': '%a %d %b %Y'}},
547
551
... ) # doctest: +SKIP
548
-
549
552
>>> df.dtypes # doctest: +SKIP
550
553
col 1 int64
551
554
col 2 datetime64[ns]
552
555
col 3 datetime64[ns]
553
556
dtype: object
557
+
558
+ The csv in pyarrow must be used if values have new line character.
559
+
560
+ >>> from pyarrow import csv
561
+ >>> parse_options = csv.ParseOptions(newlines_in_values=True)
562
+ >>> table = csv.read_csv("./example.csv", parse_options=parse_options)
563
+ >>> df = table.to_pandas()
564
+ >>> df.head()
565
+ text idx
566
+ 0 ab\ncd 0
567
+ 1 ab\ncd 1
568
+ 2 ab\ncd 2
569
+ 3 ab\ncd 3
570
+ 4 ab\ncd 4
554
571
""" # noqa: E501
555
572
556
573
0 commit comments