Skip to content

Commit 55d7b3a

Browse files
authored
Specify datetime format in tests (#1905)
* Specify datetime format in tests * skip tests containing date_format argument for Python 3.7 and below --- fixes https://app.shortcut.com/tiledb-inc/story/34107
1 parent 3615631 commit 55d7b3a

File tree

1 file changed

+44
-4
lines changed

1 file changed

+44
-4
lines changed

tiledb/tests/test_pandas_dataframe.py

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import random
55
import string
6+
import sys
67
import uuid
78

89
import numpy as np
@@ -380,6 +381,10 @@ def test_dataframe_categorical(self):
380381
with tiledb.open(uri) as B:
381382
tm.assert_frame_equal(df, B.df[:])
382383

384+
@pytest.mark.skipif(
385+
sys.version_info < (3, 8),
386+
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
387+
)
383388
def test_dataframe_csv_rt1(self):
384389
def rand_dtype(dtype, size):
385390
nbytes = size * np.dtype(dtype).itemsize
@@ -409,7 +414,12 @@ def rand_dtype(dtype, size):
409414

410415
csv_array_uri = os.path.join(uri, "tiledb_csv")
411416
tiledb.from_csv(
412-
csv_array_uri, csv_uri, index_col=0, parse_dates=[1], sparse=False
417+
csv_array_uri,
418+
csv_uri,
419+
index_col=0,
420+
parse_dates=[1],
421+
date_format="%Y-%m-%d %H:%M:%S.%f",
422+
sparse=False,
413423
)
414424

415425
df_from_array = tiledb.open_dataframe(csv_array_uri)
@@ -420,7 +430,12 @@ def rand_dtype(dtype, size):
420430
with tiledb.FileIO(tiledb.VFS(), csv_uri, "rb") as fio:
421431
csv_array_uri2 = os.path.join(csv_array_uri + "_2")
422432
tiledb.from_csv(
423-
csv_array_uri2, csv_uri, index_col=0, parse_dates=[1], sparse=False
433+
csv_array_uri2,
434+
csv_uri,
435+
index_col=0,
436+
parse_dates=[1],
437+
sparse=False,
438+
date_format="%Y-%m-%d %H:%M:%S.%f",
424439
)
425440

426441
df_from_array2 = tiledb.open_dataframe(csv_array_uri2)
@@ -677,6 +692,10 @@ def test_csv_dense(self):
677692
tmp_array2 = os.path.join(tmp_dir, "array2")
678693
tiledb.from_csv(tmp_array2, tmp_csv, sparse=False)
679694

695+
@pytest.mark.skipif(
696+
sys.version_info < (3, 8),
697+
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
698+
)
680699
def test_csv_col_to_sparse_dims(self):
681700
df = make_dataframe_basic3(20)
682701

@@ -697,6 +716,7 @@ def test_csv_col_to_sparse_dims(self):
697716
sparse=True,
698717
index_col=["time", "double_range"],
699718
parse_dates=["time"],
719+
date_format="%Y-%m-%d %H:%M:%S.%f",
700720
)
701721

702722
df_bk = tiledb.open_dataframe(tmp_array)
@@ -734,6 +754,7 @@ def test_csv_col_to_sparse_dims(self):
734754
tmp_csv2,
735755
index_col=["int_vals"],
736756
parse_dates=["time"],
757+
date_format="%Y-%m-%d %H:%M:%S.%f",
737758
sparse=True,
738759
allows_duplicates=True,
739760
float_precision="round_trip",
@@ -748,6 +769,10 @@ def test_csv_col_to_sparse_dims(self):
748769
cmp_df = df.set_index("int_vals").sort_values(by="time")
749770
tm.assert_frame_equal(res_df, cmp_df)
750771

772+
@pytest.mark.skipif(
773+
sys.version_info < (3, 8),
774+
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
775+
)
751776
def test_dataframe_csv_schema_only(self):
752777
col_size = 10
753778
df = make_dataframe_basic3(col_size)
@@ -784,6 +809,7 @@ def test_dataframe_csv_schema_only(self):
784809
tmp_csv,
785810
index_col=["time", "double_range"],
786811
parse_dates=["time"],
812+
date_format="%Y-%m-%d %H:%M:%S.%f",
787813
mode="schema_only",
788814
capacity=1001,
789815
sparse=True,
@@ -856,6 +882,10 @@ def test_dataframe_csv_schema_only(self):
856882
df_bk.sort_index(level="time", inplace=True)
857883
tm.assert_frame_equal(df_bk, df_combined)
858884

885+
@pytest.mark.skipif(
886+
sys.version_info < (3, 8),
887+
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
888+
)
859889
def test_dataframe_csv_chunked(self):
860890
col_size = 200
861891
df = make_dataframe_basic3(col_size)
@@ -876,7 +906,7 @@ def test_dataframe_csv_chunked(self):
876906
tmp_csv,
877907
index_col=["double_range"],
878908
parse_dates=["time"],
879-
date_spec={"time": "%Y-%m-%dT%H:%M:%S.%f"},
909+
date_format="%Y-%m-%d %H:%M:%S.%f",
880910
chunksize=10,
881911
sparse=True,
882912
quotechar='"',
@@ -893,7 +923,12 @@ def test_dataframe_csv_chunked(self):
893923
# Test dense chunked
894924
tmp_array_dense = os.path.join(tmp_dir, "array_dense")
895925
tiledb.from_csv(
896-
tmp_array_dense, tmp_csv, parse_dates=["time"], sparse=False, chunksize=25
926+
tmp_array_dense,
927+
tmp_csv,
928+
parse_dates=["time"],
929+
date_format="%Y-%m-%d %H:%M:%S.%f",
930+
sparse=False,
931+
chunksize=25,
897932
)
898933

899934
with tiledb.open(tmp_array_dense) as A:
@@ -933,6 +968,10 @@ def test_dataframe_csv_chunked(self):
933968
df_idx_res = A.query(coords=False).df[int(ned[0]) : int(ned[1])]
934969
tm.assert_frame_equal(df_idx_res, df.reset_index(drop=True))
935970

971+
@pytest.mark.skipif(
972+
sys.version_info < (3, 8),
973+
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
974+
)
936975
def test_csv_fillna(self):
937976
if pytest.tiledb_vfs == "s3":
938977
pytest.skip(
@@ -1016,6 +1055,7 @@ def check_array(path, df):
10161055
csv_paths,
10171056
index_col=["time"],
10181057
parse_dates=["time"],
1058+
date_format="%Y-%m-%d %H:%M:%S.%f",
10191059
chunksize=25,
10201060
sparse=True,
10211061
)

0 commit comments

Comments
 (0)