3
3
import os
4
4
import random
5
5
import string
6
+ import sys
6
7
import uuid
7
8
8
9
import numpy as np
@@ -380,6 +381,10 @@ def test_dataframe_categorical(self):
380
381
with tiledb .open (uri ) as B :
381
382
tm .assert_frame_equal (df , B .df [:])
382
383
384
+ @pytest .mark .skipif (
385
+ sys .version_info < (3 , 8 ),
386
+ reason = "requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below" ,
387
+ )
383
388
def test_dataframe_csv_rt1 (self ):
384
389
def rand_dtype (dtype , size ):
385
390
nbytes = size * np .dtype (dtype ).itemsize
@@ -409,7 +414,12 @@ def rand_dtype(dtype, size):
409
414
410
415
csv_array_uri = os .path .join (uri , "tiledb_csv" )
411
416
tiledb .from_csv (
412
- csv_array_uri , csv_uri , index_col = 0 , parse_dates = [1 ], sparse = False
417
+ csv_array_uri ,
418
+ csv_uri ,
419
+ index_col = 0 ,
420
+ parse_dates = [1 ],
421
+ date_format = "%Y-%m-%d %H:%M:%S.%f" ,
422
+ sparse = False ,
413
423
)
414
424
415
425
df_from_array = tiledb .open_dataframe (csv_array_uri )
@@ -420,7 +430,12 @@ def rand_dtype(dtype, size):
420
430
with tiledb .FileIO (tiledb .VFS (), csv_uri , "rb" ) as fio :
421
431
csv_array_uri2 = os .path .join (csv_array_uri + "_2" )
422
432
tiledb .from_csv (
423
- csv_array_uri2 , csv_uri , index_col = 0 , parse_dates = [1 ], sparse = False
433
+ csv_array_uri2 ,
434
+ csv_uri ,
435
+ index_col = 0 ,
436
+ parse_dates = [1 ],
437
+ sparse = False ,
438
+ date_format = "%Y-%m-%d %H:%M:%S.%f" ,
424
439
)
425
440
426
441
df_from_array2 = tiledb .open_dataframe (csv_array_uri2 )
@@ -677,6 +692,10 @@ def test_csv_dense(self):
677
692
tmp_array2 = os .path .join (tmp_dir , "array2" )
678
693
tiledb .from_csv (tmp_array2 , tmp_csv , sparse = False )
679
694
695
+ @pytest .mark .skipif (
696
+ sys .version_info < (3 , 8 ),
697
+ reason = "requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below" ,
698
+ )
680
699
def test_csv_col_to_sparse_dims (self ):
681
700
df = make_dataframe_basic3 (20 )
682
701
@@ -697,6 +716,7 @@ def test_csv_col_to_sparse_dims(self):
697
716
sparse = True ,
698
717
index_col = ["time" , "double_range" ],
699
718
parse_dates = ["time" ],
719
+ date_format = "%Y-%m-%d %H:%M:%S.%f" ,
700
720
)
701
721
702
722
df_bk = tiledb .open_dataframe (tmp_array )
@@ -734,6 +754,7 @@ def test_csv_col_to_sparse_dims(self):
734
754
tmp_csv2 ,
735
755
index_col = ["int_vals" ],
736
756
parse_dates = ["time" ],
757
+ date_format = "%Y-%m-%d %H:%M:%S.%f" ,
737
758
sparse = True ,
738
759
allows_duplicates = True ,
739
760
float_precision = "round_trip" ,
@@ -748,6 +769,10 @@ def test_csv_col_to_sparse_dims(self):
748
769
cmp_df = df .set_index ("int_vals" ).sort_values (by = "time" )
749
770
tm .assert_frame_equal (res_df , cmp_df )
750
771
772
+ @pytest .mark .skipif (
773
+ sys .version_info < (3 , 8 ),
774
+ reason = "requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below" ,
775
+ )
751
776
def test_dataframe_csv_schema_only (self ):
752
777
col_size = 10
753
778
df = make_dataframe_basic3 (col_size )
@@ -784,6 +809,7 @@ def test_dataframe_csv_schema_only(self):
784
809
tmp_csv ,
785
810
index_col = ["time" , "double_range" ],
786
811
parse_dates = ["time" ],
812
+ date_format = "%Y-%m-%d %H:%M:%S.%f" ,
787
813
mode = "schema_only" ,
788
814
capacity = 1001 ,
789
815
sparse = True ,
@@ -856,6 +882,10 @@ def test_dataframe_csv_schema_only(self):
856
882
df_bk .sort_index (level = "time" , inplace = True )
857
883
tm .assert_frame_equal (df_bk , df_combined )
858
884
885
+ @pytest .mark .skipif (
886
+ sys .version_info < (3 , 8 ),
887
+ reason = "requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below" ,
888
+ )
859
889
def test_dataframe_csv_chunked (self ):
860
890
col_size = 200
861
891
df = make_dataframe_basic3 (col_size )
@@ -876,7 +906,7 @@ def test_dataframe_csv_chunked(self):
876
906
tmp_csv ,
877
907
index_col = ["double_range" ],
878
908
parse_dates = ["time" ],
879
- date_spec = { "time" : " %Y-%m-%dT %H:%M:%S.%f"} ,
909
+ date_format = " %Y-%m-%d %H:%M:%S.%f" ,
880
910
chunksize = 10 ,
881
911
sparse = True ,
882
912
quotechar = '"' ,
@@ -893,7 +923,12 @@ def test_dataframe_csv_chunked(self):
893
923
# Test dense chunked
894
924
tmp_array_dense = os .path .join (tmp_dir , "array_dense" )
895
925
tiledb .from_csv (
896
- tmp_array_dense , tmp_csv , parse_dates = ["time" ], sparse = False , chunksize = 25
926
+ tmp_array_dense ,
927
+ tmp_csv ,
928
+ parse_dates = ["time" ],
929
+ date_format = "%Y-%m-%d %H:%M:%S.%f" ,
930
+ sparse = False ,
931
+ chunksize = 25 ,
897
932
)
898
933
899
934
with tiledb .open (tmp_array_dense ) as A :
@@ -933,6 +968,10 @@ def test_dataframe_csv_chunked(self):
933
968
df_idx_res = A .query (coords = False ).df [int (ned [0 ]) : int (ned [1 ])]
934
969
tm .assert_frame_equal (df_idx_res , df .reset_index (drop = True ))
935
970
971
+ @pytest .mark .skipif (
972
+ sys .version_info < (3 , 8 ),
973
+ reason = "requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below" ,
974
+ )
936
975
def test_csv_fillna (self ):
937
976
if pytest .tiledb_vfs == "s3" :
938
977
pytest .skip (
@@ -1016,6 +1055,7 @@ def check_array(path, df):
1016
1055
csv_paths ,
1017
1056
index_col = ["time" ],
1018
1057
parse_dates = ["time" ],
1058
+ date_format = "%Y-%m-%d %H:%M:%S.%f" ,
1019
1059
chunksize = 25 ,
1020
1060
sparse = True ,
1021
1061
)
0 commit comments