@@ -110,7 +110,7 @@ _tiledb_dtype_to_numpy_typeid_convert ={
110110 TILEDB_INT16 : np .NPY_INT16 ,
111111 TILEDB_UINT16 : np .NPY_UINT16 ,
112112 TILEDB_CHAR : np .NPY_STRING ,
113- TILEDB_STRING_ASCII : np .NPY_STRING ,
113+ TILEDB_STRING_ASCII : np .NPY_UNICODE ,
114114 TILEDB_STRING_UTF8 : np .NPY_UNICODE ,
115115}
116116IF LIBTILEDB_VERSION_MAJOR >= 2 :
@@ -133,7 +133,7 @@ _tiledb_dtype_to_numpy_dtype_convert = {
133133 TILEDB_INT16 : np .int16 ,
134134 TILEDB_UINT16 : np .uint16 ,
135135 TILEDB_CHAR : np .dtype ('S1' ),
136- TILEDB_STRING_ASCII : np .dtype ('S ' ),
136+ TILEDB_STRING_ASCII : np .dtype ('U ' ),
137137 TILEDB_STRING_UTF8 : np .dtype ('U1' ),
138138}
139139IF LIBTILEDB_VERSION_MAJOR >= 2 :
@@ -1824,10 +1824,8 @@ cdef class Attr(object):
18241824 filters_str += repr (f ) + ", "
18251825 filters_str += "])"
18261826
1827- attr_dtype = "ascii" if self .isascii else self .dtype
1828-
18291827 # filters_str must be last with no spaces
1830- return (f"""Attr(name={ repr (self .name )} , dtype='{ attr_dtype !s} ', """
1828+ return (f"""Attr(name={ repr (self .name )} , dtype='{ self . dtype !s} ', """
18311829 f"""var={ self .isvar !s} , nullable={ self .isnullable !s} """
18321830 f"""{ filters_str } )""" )
18331831
@@ -1852,7 +1850,7 @@ cdef class Attr(object):
18521850
18531851 output .write ("<tr>" )
18541852 output .write (f"<td>{ self .name } </td>" )
1855- output .write (f"<td>{ 'ascii' if self .isascii else self . dtype } </td>" )
1853+ output .write (f"<td>{ self .isascii } </td>" )
18561854 output .write (f"<td>{ self .isvar } </td>" )
18571855 output .write (f"<td>{ self .isnullable } </td>" )
18581856 output .write (f"<td>{ self .filters ._repr_html_ ()} </td>" )
@@ -1903,8 +1901,12 @@ cdef class Dim(object):
19031901 if not ctx :
19041902 ctx = default_ctx ()
19051903
1904+ is_string = (
1905+ isinstance (dtype , str ) and dtype == "ascii"
1906+ ) or np .dtype (dtype ) in (np .str_ , np .bytes_ )
1907+
19061908 if var is not None :
1907- if var and np . dtype ( dtype ) not in ( np . str_ , np . bytes_ ) :
1909+ if var and not is_string :
19081910 raise TypeError ("'var=True' specified for non-str/bytes dtype" )
19091911
19101912 if domain is not None and len (domain ) != 2 :
@@ -1919,12 +1921,14 @@ cdef class Dim(object):
19191921 cdef void * tile_size_ptr = NULL
19201922 cdef np .dtype domain_dtype
19211923
1922- if ((isinstance (dtype , str ) and dtype == "ascii" ) or
1923- dtype == np .dtype ('S' )):
1924+ if is_string :
19241925 # Handle var-len domain type
19251926 # (currently only TILEDB_STRING_ASCII)
19261927 # The dimension's domain is implicitly formed as
19271928 # coordinates are written.
1929+ if dtype != "ascii" :
1930+ warnings .warn ("Use 'ascii' for string dimensions." )
1931+ dtype = np .dtype ("|U0" )
19281932 dim_datatype = TILEDB_STRING_ASCII
19291933 else :
19301934 if domain is None or len (domain ) != 2 :
@@ -1985,17 +1989,19 @@ cdef class Dim(object):
19851989 self .ptr = dim_ptr
19861990
19871991 def __repr__ (self ):
1988- filters_str = ""
1992+ filters = ""
19891993 if self .filters :
1990- filters_str = ", filters=FilterList(["
1994+ filters = ", filters=FilterList(["
19911995 for f in self .filters :
1992- filters_str += repr (f ) + ", "
1993- filters_str += "])"
1996+ filters += repr (f ) + ", "
1997+ filters += "])"
1998+
1999+ dtype = "ascii" if self ._get_type () == TILEDB_STRING_ASCII else self .dtype
19942000
19952001 # for consistency, print `var=True` for string-like types
1996- varlen = "" if not self . dtype in ( np . str_ , np . bytes_ ) else ", var=True"
1997- return "Dim(name={0 !r}, domain={1!s }, tile={2 !r}, dtype='{3!s }'{4}{5 })" \
1998- . format ( self . name , self . domain , self . tile , self . dtype , varlen , filters_str )
2002+ varlen = "" if dtype != "ascii" else ", var=True"
2003+ return f "Dim(name={ self . name !r} , domain={ self . domain } , tile={ self . tile !r} , dtype='{ dtype } '{ varlen } { filters } )"
2004+
19992005
20002006 def _repr_html_ (self ) -> str :
20012007 output = io .StringIO ()
@@ -2022,7 +2028,7 @@ cdef class Dim(object):
20222028 output .write (f"<td>{ self .domain } </td>" )
20232029 output .write (f"<td>{ self .tile } </td>" )
20242030 output .write (f"<td>{ self .dtype } </td>" )
2025- output .write (f"<td>{ self .dtype in ( np . str_ , np . bytes_ ) } </td>" )
2031+ output .write (f"<td>{ self .dtype == 'ascii' } </td>" )
20262032 output .write (f"<td>{ self .filters ._repr_html_ ()} </td>" )
20272033 output .write ("</tr>" )
20282034
@@ -2222,7 +2228,7 @@ cdef class Dim(object):
22222228 :rtype: tuple(numpy scalar, numpy scalar)
22232229
22242230 """
2225- if self .dtype == np .dtype ('S ' ):
2231+ if self .dtype == np .dtype ('U ' ):
22262232 return None , None
22272233 cdef const void * domain_ptr = NULL
22282234 check_error (self .ctx ,
@@ -3864,9 +3870,8 @@ cdef class Array(object):
38643870 results .append ((None , None ))
38653871 continue
38663872
3867- buf_dtype = 'S'
3868- start_buf = np .empty (start_size , 'S' + str (start_size ))
3869- end_buf = np .empty (end_size , 'S' + str (end_size ))
3873+ start_buf = np .empty (start_size , f"S{ start_size } " )
3874+ end_buf = np .empty (end_size , f"S{ end_size } " )
38703875 start_buf_ptr = np .PyArray_DATA (start_buf )
38713876 end_buf_ptr = np .PyArray_DATA (end_buf )
38723877 else :
@@ -3884,7 +3889,8 @@ cdef class Array(object):
38843889 return None
38853890
38863891 if start_size > 0 and end_size > 0 :
3887- results .append ((start_buf .item (0 ), end_buf .item (0 )))
3892+ results .append ((start_buf .item (0 ).decode ("UTF-8" ),
3893+ end_buf .item (0 ).decode ("UTF-8" )))
38883894 else :
38893895 results .append ((None , None ))
38903896 else :
@@ -4918,7 +4924,7 @@ def index_domain_coords(dom: Domain, idx: tuple, check_ndim: bool):
49184924 # ensure strings contain only ASCII characters
49194925 domain_coords .append (np .array (sel , dtype = np .bytes_ , ndmin = 1 ))
49204926 except Exception as exc :
4921- raise TileDBError (f'Dim \' strings may only contain ASCII characters' )
4927+ raise TileDBError ('Dimension strings may only contain ASCII characters' )
49224928 else :
49234929 domain_coords .append (np .array (sel , dtype = dim .dtype , ndmin = 1 ))
49244930
0 commit comments