Skip to content

Commit 05e83c5

Browse files
committed
Fixed problem with dates with no ymd separator
1 parent 334ec7a commit 05e83c5

File tree

1 file changed

+71
-34
lines changed

1 file changed

+71
-34
lines changed

pandas/_libs/src/vendored/numpy/datetime/np_datetime_strings.c

Lines changed: 71 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -174,15 +174,14 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
174174
}
175175

176176
/* PARSE THE YEAR (4 digits) */
177-
printf("Start: %s\n", str);
178177
comparison =
179178
compare_format(&format, &format_len, "%Y", 2, format_requirement);
180179

181180
int to_month = 0;
182181

183182
if (comparison == COMPARISON_ERROR) {
184183
invalid_components++;
185-
while (sublen > 0 && !isdigit(*substr + 1)) {
184+
while (sublen > 1 && !isdigit(substr[1])) {
186185
substr++;
187186
sublen--;
188187
}
@@ -219,6 +218,12 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
219218
to_month = 1;
220219
goto find_sep;
221220
}
221+
} else if (sublen == 3 && isdigit(substr[0]) && isdigit(substr[1]) &&
222+
isdigit(substr[2])) {
223+
invalid_components++;
224+
substr += 3;
225+
sublen -= 3;
226+
goto finish;
222227
} else if (sublen >= 3 && isdigit(substr[0]) && isdigit(substr[1]) &&
223228
!isdigit(substr[2])) {
224229
int valid_sep = 0;
@@ -235,6 +240,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
235240
goto find_sep;
236241
}
237242
goto find_sep;
243+
} else if (sublen == 2 && isdigit(substr[0]) && isdigit(substr[1])) {
244+
invalid_components++;
245+
substr += 2;
246+
sublen -= 2;
247+
goto finish;
238248
} else if (sublen >= 2 && isdigit(substr[0]) && !isdigit(substr[1])) {
239249
int valid_sep = 0;
240250
for (i = 0; i < valid_ymd_sep_len; ++i) {
@@ -249,6 +259,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
249259
to_month = 1;
250260
goto find_sep;
251261
}
262+
} else if (sublen == 1 && isdigit(substr[0])) {
263+
invalid_components++;
264+
substr++;
265+
sublen--;
266+
goto finish;
252267
} else if (sublen >= 1 && !isdigit(substr[0])) {
253268
int valid_sep = 0;
254269
for (i = 0; i < valid_ymd_sep_len; ++i) {
@@ -264,7 +279,37 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
264279
}
265280

266281
/* Invalidates the component if there is more than 4 digits */
267-
int still_more = 1;
282+
int has_sep = 0;
283+
int j = 0;
284+
for (j = 0; j < (sublen > 4 ? 4 : sublen); ++j) {
285+
char c = substr[j];
286+
for (i = 0; i < valid_ymd_sep_len; ++i) {
287+
if (c == valid_ymd_sep[i]) {
288+
has_sep = 1;
289+
break;
290+
}
291+
}
292+
if (has_sep || !isdigit(c)) {
293+
break;
294+
}
295+
}
296+
if (has_sep && j != 0) {
297+
invalid_components++;
298+
substr += j;
299+
sublen -= j;
300+
if (sublen == 0) {
301+
goto finish;
302+
}
303+
to_month = 1;
304+
goto find_sep;
305+
}
306+
if (!has_sep && sublen < 4) {
307+
invalid_components++;
308+
substr += sublen;
309+
sublen = 0;
310+
goto finish;
311+
}
312+
/*int still_more = 1;
268313
for (i = 0; i < valid_ymd_sep_len; ++i) {
269314
if (*substr == valid_ymd_sep[i]) {
270315
still_more = 0;
@@ -282,7 +327,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
282327
}
283328
to_month = 1;
284329
goto find_sep;
285-
}
330+
}*/
286331

287332
/* Negate the year if necessary */
288333
if (str[0] == '-') {
@@ -298,7 +343,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
298343
}
299344
if (format_len) {
300345
invalid_components++;
301-
while (sublen > 0 && !isdigit(*substr + 1)) {
346+
while (sublen > 1 && !isdigit(substr[1])) {
302347
substr++;
303348
sublen--;
304349
}
@@ -380,8 +425,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
380425

381426
/* PARSE THE MONTH */
382427
month:
383-
printf("\nI-V after year-parsing: %d-%d\n", invalid_components,
384-
valid_components);
385428
comparison =
386429
compare_format(&format, &format_len, "%m", 2, format_requirement);
387430

@@ -414,26 +457,35 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
414457

415458
/* Invalidates the component if there is more than 2 digits */
416459
if (sublen > 0) {
417-
int still_more = 1;
418-
for (i = 0; i < valid_ymd_sep_len; ++i) {
419-
if (*substr == valid_ymd_sep[i]) {
420-
still_more = 0;
460+
int has_sep = 0;
461+
int j = 0;
462+
for (j = 0; j < (sublen > 2 ? 2 : sublen); ++j) {
463+
char c = substr[j];
464+
for (i = 0; i < valid_ymd_sep_len; ++i) {
465+
if (c == valid_ymd_sep[i]) {
466+
has_sep = 1;
467+
break;
468+
}
469+
}
470+
if (has_sep || !isdigit(c)) {
421471
break;
422472
}
423473
}
424-
if (still_more) {
474+
if (has_sep && j != 0) {
425475
invalid_components++;
426-
while (sublen > 0 && isdigit(substr[0])) {
427-
substr++;
428-
sublen--;
429-
}
476+
substr += j;
477+
sublen -= j;
430478
if (sublen == 0) {
431479
goto finish;
432480
}
433481
to_month = 1;
434-
comparison = compare_format(&format, &format_len, &ymd_sep, 1,
435-
format_requirement);
436-
goto month_sep;
482+
goto find_sep;
483+
}
484+
if (!has_sep && sublen < 2) {
485+
invalid_components++;
486+
substr += sublen;
487+
sublen = 0;
488+
goto finish;
437489
}
438490
}
439491
} else if (!has_ymd_sep) {
@@ -537,8 +589,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
537589

538590
/* PARSE THE DAY */
539591
day:
540-
printf("\nI-V after month-parsing: %d-%d\n", invalid_components,
541-
valid_components);
542592
comparison =
543593
compare_format(&format, &format_len, "%d", 2, format_requirement);
544594
if (comparison == COMPARISON_ERROR) {
@@ -594,7 +644,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
594644
if (sublen == 0) {
595645
goto finish;
596646
}
597-
to_month = 1;
598647
comparison = compare_format(&format, &format_len, &ymd_sep, 1,
599648
format_requirement);
600649
goto day_sep;
@@ -677,8 +726,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
677726

678727
/* PARSE THE HOURS */
679728
hour:
680-
printf("\nI-V after day-parsing: %d-%d\n", invalid_components,
681-
valid_components);
682729
comparison =
683730
compare_format(&format, &format_len, "%H", 2, format_requirement);
684731
if (comparison == COMPARISON_ERROR) {
@@ -733,7 +780,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
733780
if (sublen == 0) {
734781
goto finish;
735782
}
736-
to_month = 1;
737783
goto hour_sep;
738784
}
739785
}
@@ -834,8 +880,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
834880

835881
/* PARSE THE MINUTES */
836882
minute:
837-
printf("\nI-V after hour-parsing: %d-%d\n", invalid_components,
838-
valid_components);
839883
comparison =
840884
compare_format(&format, &format_len, "%M", 2, format_requirement);
841885
if (comparison == COMPARISON_ERROR) {
@@ -878,7 +922,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
878922
if (sublen == 0) {
879923
goto finish;
880924
}
881-
to_month = 1;
882925
goto minute_sep;
883926
}
884927
}
@@ -966,8 +1009,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
9661009

9671010
/* PARSE THE SECONDS */
9681011
second:
969-
printf("\nI-V after minute-parsing: %d-%d\n", invalid_components,
970-
valid_components);
9711012
comparison =
9721013
compare_format(&format, &format_len, "%S", 2, format_requirement);
9731014
if (comparison == COMPARISON_ERROR) {
@@ -1009,7 +1050,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
10091050
if (sublen == 0) {
10101051
goto finish;
10111052
}
1012-
to_month = 1;
10131053
goto second_sep;
10141054
}
10151055
}
@@ -1064,8 +1104,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
10641104

10651105
/* PARSE THE MICROSECONDS (0 to 6 digits) */
10661106
microsecond:
1067-
printf("\nI-V after second-parsing: %d-%d\n", invalid_components,
1068-
valid_components);
10691107
comparison =
10701108
compare_format(&format, &format_len, "%f", 2, format_requirement);
10711109
if (comparison == COMPARISON_ERROR) {
@@ -1286,7 +1324,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
12861324
}
12871325

12881326
finish:
1289-
printf("\nI-V at end: %d-%d\n", invalid_components, valid_components);
12901327
if (invalid_components > 0 &&
12911328
(double)valid_components / (valid_components + invalid_components) >=
12921329
threshold) {

0 commit comments

Comments
 (0)