Skip to content

Commit 0a76788

Browse files
committed
Documentation update
1 parent 9da60e4 commit 0a76788

File tree

4 files changed

+36
-53
lines changed

4 files changed

+36
-53
lines changed

pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
441441
if (sublen > 0) {
442442
int has_sep = 0;
443443
int j = 0;
444-
for (j = 0; j < (sublen > 2 ? 2 : sublen); ++j) {
444+
for (j = 0; j < (sublen > 2 && !has_ymd_sep ? 2 : sublen); ++j) {
445445
char c = substr[j];
446446
for (i = 0; i < valid_ymd_sep_len; ++i) {
447447
if (c == valid_ymd_sep[i]) {

pandas/_libs/tslibs/strptime.pyx

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -368,23 +368,7 @@ def array_strptime(
368368
errors : string specifying error handling, {'raise', 'coerce'}
369369
creso : NPY_DATETIMEUNIT, default NPY_FR_GENERIC
370370
Set to NPY_FR_GENERIC to infer a resolution.
371-
372-
/// INSERT DOCUMENTATION UPDATE HERE ///
373-
########################
374-
########################
375-
########################
376-
########################
377-
########################
378-
########################
379-
########################
380-
########################
381-
########################
382-
########################
383-
########################
384-
########################
385-
########################
386-
########################
387-
########################
371+
threshold : minimum fraction of valid datetime components required
388372
"""
389373

390374
cdef:

pandas/core/tools/datetimes.py

Lines changed: 26 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -351,23 +351,8 @@ def _convert_listlike_datetimes(
351351
yearfirst parsing behavior from to_datetime
352352
exact : bool, default True
353353
exact format matching behavior from to_datetime
354-
355-
/// INSERT DOCUMENTATION UPDATE HERE ///
356-
########################
357-
########################
358-
########################
359-
########################
360-
########################
361-
########################
362-
########################
363-
########################
364-
########################
365-
########################
366-
########################
367-
########################
368-
########################
369-
########################
370-
########################
354+
threshold : float
355+
Minimum fraction of valid datetime components required
371356
372357
Returns
373358
-------
@@ -660,6 +645,7 @@ def to_datetime(
660645
unit: str | None = ...,
661646
origin=...,
662647
cache: bool = ...,
648+
threshold: float = ...,
663649
) -> Timestamp: ...
664650

665651

@@ -675,6 +661,7 @@ def to_datetime(
675661
unit: str | None = ...,
676662
origin=...,
677663
cache: bool = ...,
664+
threshold: float = ...,
678665
) -> Series: ...
679666

680667

@@ -690,6 +677,7 @@ def to_datetime(
690677
unit: str | None = ...,
691678
origin=...,
692679
cache: bool = ...,
680+
threshold: float = ...,
693681
) -> DatetimeIndex: ...
694682

695683

@@ -814,24 +802,19 @@ def to_datetime(
814802
is only used when there are at least 50 values. The presence of
815803
out-of-bounds values will render the cache unusable and may slow down
816804
parsing.
817-
818-
/// INSERT DOCUMENTATION UPDATE HERE ///
819-
########################
820-
########################
821-
########################
822-
########################
823-
########################
824-
########################
825-
########################
826-
########################
827-
########################
828-
########################
829-
########################
830-
########################
831-
########################
832-
########################
833-
########################
834-
805+
threshold : float
806+
Minimum fraction of valid datetime components required to consider parsing
807+
successful. Components include year, month, day, hour, minute, and second
808+
if present in the input. An invalid component has too many or too few digits
809+
or a number outside the possible range (e.g., month outside [1, 12]). Behavior
810+
depends on the threshold:
811+
812+
- 1.0 (default): all components must be valid, else raises error (unless
813+
``errors='coerce'``).
814+
- 0.0: any invalid component produces NaT, else returns a valid datetime.
815+
- Values between 0 and 1: if all components are valid, returns a valid
816+
datetime; if the fraction of valid components >= threshold, returns NaT;
817+
otherwise raises error.
835818
Returns
836819
-------
837820
datetime
@@ -1032,6 +1015,14 @@ def to_datetime(
10321015
>>> pd.to_datetime(["2018-10-26 12:00", datetime(2020, 1, 1, 18)], utc=True)
10331016
DatetimeIndex(['2018-10-26 12:00:00+00:00', '2020-01-01 18:00:00+00:00'],
10341017
dtype='datetime64[us, UTC]', freq=None)
1018+
1019+
- Input string with one invalid component returns NaT if threshold allows
1020+
partial validity
1021+
1022+
>>> pd.to_datetime(
1023+
... "2018-100-26 12:00:00", format="%Y-%m-%d %H:%M:%S", threshold=0.5
1024+
... )
1025+
NaT
10351026
"""
10361027
if exact is not lib.no_default and format in {"mixed", "ISO8601"}:
10371028
raise ValueError("Cannot use 'exact' when 'format' is 'mixed' or 'ISO8601'")

pandas/tests/tools/test_to_datetime.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4147,3 +4147,11 @@ def test_parse_mixed_format_threshold(self):
41474147
result = to_datetime(series, format="mixed", threshold=0.5, errors="coerce")
41484148
expected = Series([Timestamp("2020-01-01"), Timestamp("2021-01-02"), NaT])
41494149
tm.assert_series_equal(result, expected)
4150+
4151+
def test_example(self):
4152+
result = to_datetime(
4153+
"2018-100-26 12:00:00",
4154+
format="%Y-%m-%d %H:%M:%S",
4155+
threshold=0.5,
4156+
)
4157+
assert isna(result)

0 commit comments

Comments
 (0)