1616
1717import numpy as np
1818
19+ from pandas ._config import using_pyarrow_strict_nans
20+
1921from pandas ._libs import lib
20- from pandas ._libs .missing import NA
22+ from pandas ._libs .missing import is_pdna_or_none
2123from pandas ._libs .tslibs import (
2224 Timedelta ,
2325 Timestamp ,
@@ -326,6 +328,11 @@ def _from_sequence_of_strings(
326328 """
327329 Construct a new ExtensionArray from a sequence of strings.
328330 """
331+ mask = isna (strings )
332+
333+ if isinstance (strings , cls ):
334+ strings = strings ._pa_array
335+
329336 pa_type = to_pyarrow_type (dtype )
330337 if (
331338 pa_type is None
@@ -344,22 +351,35 @@ def _from_sequence_of_strings(
344351 from pandas .core .tools .datetimes import to_datetime
345352
346353 scalars = to_datetime (strings , errors = "raise" ).date
354+
355+ if isinstance (strings , cls ):
356+ # Avoid an object path
357+ # TODO: this assumes that pyarrows str->date casting is the
358+ # same as to_datetime. Is that a fair assumption?
359+ scalars = strings ._pa_array .cast (pa_type )
360+ else :
361+ scalars = pa .array (scalars , mask = mask .view (bool ), type = pa_type )
362+
347363 elif pa .types .is_duration (pa_type ):
348364 from pandas .core .tools .timedeltas import to_timedelta
349365
350366 scalars = to_timedelta (strings , errors = "raise" )
367+
351368 if pa_type .unit != "ns" :
352369 # GH51175: test_from_sequence_of_strings_pa_array
353370 # attempt to parse as int64 reflecting pyarrow's
354371 # duration to string casting behavior
355372 mask = isna (scalars )
356- if not isinstance (strings , (pa .Array , pa .ChunkedArray )):
357- strings = pa .array (strings , type = pa .string ())
373+ if isinstance (strings , cls ):
374+ strings = strings ._pa_array
375+ elif not isinstance (strings , (pa .Array , pa .ChunkedArray )):
376+ strings = pa .array (strings , type = pa .string (), mask = mask )
358377 strings = pc .if_else (mask , None , strings )
359378 try :
360379 scalars = strings .cast (pa .int64 ())
361380 except pa .ArrowInvalid :
362381 pass
382+
363383 elif pa .types .is_time (pa_type ):
364384 from pandas .core .tools .times import to_time
365385
@@ -375,7 +395,7 @@ def _from_sequence_of_strings(
375395 if isinstance (strings , (pa .Array , pa .ChunkedArray )):
376396 scalars = strings
377397 else :
378- scalars = pa .array (strings , type = pa .string ())
398+ scalars = pa .array (strings , type = pa .string (), mask = mask )
379399 scalars = pc .if_else (pc .equal (scalars , "1.0" ), "1" , scalars )
380400 scalars = pc .if_else (pc .equal (scalars , "0.0" ), "0" , scalars )
381401 scalars = scalars .cast (pa .bool_ ())
@@ -387,12 +407,16 @@ def _from_sequence_of_strings(
387407 from pandas .core .tools .numeric import to_numeric
388408
389409 scalars = to_numeric (strings , errors = "raise" )
390- if not pa .types .is_decimal (pa_type ):
410+ if not pa .types .is_decimal (pa_type ) and isinstance (
411+ strings , (pa .Array , pa .ChunkedArray )
412+ ):
391413 # TODO: figure out why doing this cast breaks with decimal dtype
392414 # in test_from_sequence_of_strings_pa_array
393415 mask = strings .is_null ()
394416 scalars = pa .array (scalars , mask = np .array (mask ), type = pa_type )
395417 # TODO: could we just do strings.cast(pa_type)?
418+ elif mask is not None :
419+ scalars = pa .array (scalars , mask = mask .view (bool ), type = pa_type )
396420
397421 else :
398422 raise NotImplementedError (
@@ -546,23 +570,20 @@ def _box_pa_array(
546570 return pa_array
547571
548572 mask = None
549- if getattr (value , "dtype" , None ) is None or value .dtype .kind not in "mfM" :
550- # similar to isna(value) but exclude NaN
551- # TODO: cythonize!
552- mask = np .array ([x is NA or x is None for x in value ], dtype = bool )
553-
554- from_pandas = False
555- if pa .types .is_integer (pa_type ):
556- # If user specifically asks to cast a numpy float array with NaNs
557- # to pyarrow integer, we'll treat those NaNs as NA
558- from_pandas = True
573+ if getattr (value , "dtype" , None ) is None or value .dtype .kind not in "mMf" :
574+ try :
575+ arr_value = np .asarray (value )
576+ except ValueError :
577+ # e.g. list dtype with mixed-length lists
578+ arr_value = np .asarray (value , dtype = object )
579+ # similar to isna(value) but exclude NaN, NaT, nat-like, nan-like
580+ mask = is_pdna_or_none (arr_value )
581+
559582 try :
560- pa_array = pa .array (
561- value , type = pa_type , mask = mask , from_pandas = from_pandas
562- )
583+ pa_array = pa .array (value , type = pa_type , mask = mask )
563584 except (pa .ArrowInvalid , pa .ArrowTypeError ):
564585 # GH50430: let pyarrow infer type, then cast
565- pa_array = pa .array (value , mask = mask , from_pandas = from_pandas )
586+ pa_array = pa .array (value , mask = mask )
566587
567588 if pa_type is None and pa .types .is_duration (pa_array .type ):
568589 # Workaround https://github.com/apache/arrow/issues/37291
@@ -1517,7 +1538,11 @@ def to_numpy(
15171538 pa .types .is_floating (pa_type )
15181539 and (
15191540 na_value is np .nan
1520- or (original_na_value is lib .no_default and is_float_dtype (dtype ))
1541+ or (
1542+ original_na_value is lib .no_default
1543+ and is_float_dtype (dtype )
1544+ and not using_pyarrow_strict_nans ()
1545+ )
15211546 )
15221547 ):
15231548 result = data ._pa_array .to_numpy ()
@@ -2390,6 +2415,7 @@ def _replace_with_mask(
23902415 replacements = np .array (replacements , dtype = object )
23912416 elif isinstance (replacements , pa .Scalar ):
23922417 replacements = replacements .as_py ()
2418+
23932419 result = np .array (values , dtype = object )
23942420 result [mask ] = replacements
23952421 return pa .array (result , type = values .type )
0 commit comments