@@ -1066,7 +1066,7 @@ If the `_Encoding` special attribute is set for a character array
10661066(dtype `S1`) variable, the `chartostring` utility function is used to convert the array of
10671067characters to an array of strings with one less dimension (the last dimension is
10681068interpreted as the length of each string) when reading the data. The character
1069- set (usually ascii) is specified by the `_Encoding` attribute. If `_Encoding`
1069+ set is specified by the `_Encoding` attribute. If `_Encoding`
10701070is 'none' or 'bytes', then the character array is converted to a numpy
10711071fixed-width byte string array (dtype `S#`), otherwise a numpy unicode (dtype
10721072`U#`) array is created. When writing the data,
@@ -5525,11 +5525,15 @@ cannot be safely cast to variable data type""" % attname
55255525 # if data is a string or a bytes object, convert to a numpy string array
55265526 # whose length is equal to the rightmost dimension of the
55275527 # variable.
5528- if type (data) in [str ,bytes]: data = numpy.asarray(data,dtype = ' S' + repr (self .shape[- 1 ]))
5528+ if type (data) in [str ,bytes]:
5529+ if encoding == ' ascii' :
5530+ data = numpy.asarray(data,dtype = ' S' + repr (self .shape[- 1 ]))
5531+ else :
5532+ data = numpy.asarray(data,dtype = ' U' + repr (self .shape[- 1 ]))
55295533 if data.dtype.kind in [' S' ,' U' ] and data.dtype.itemsize > 1 :
55305534 # if data is a numpy string array, convert it to an array
55315535 # of characters with one more dimension.
5532- data = stringtochar(data, encoding = encoding)
5536+ data = stringtochar(data, encoding = encoding, n_strlen = self .shape[ - 1 ] )
55335537
55345538 # if structured data has strings (and _Encoding att set), create view as char arrays
55355539 # (issue #773)
@@ -6771,9 +6775,9 @@ returns a rank 1 numpy character array of length NUMCHARS with datatype `'S1'`
67716775 arr[0 :len (string)] = tuple (string)
67726776 return arr
67736777
6774- def stringtochar (a ,encoding = ' utf-8' ):
6778+ def stringtochar (a ,encoding = ' utf-8' , n_strlen = None ):
67756779 """
6776- **`stringtochar(a,encoding='utf-8')`**
6780+ **`stringtochar(a,encoding='utf-8',n_strlen=None )`**
67776781
67786782convert a string array to a character array with one extra dimension
67796783
@@ -6785,16 +6789,29 @@ optional kwarg `encoding` can be used to specify character encoding (default
67856789`utf-8`). If `encoding` is 'none' or 'bytes', a `numpy.string_` the input array
67866790is treated a raw byte strings (`numpy.string_`).
67876791
6792+ optional kwarg `n_strlen` is the number of characters in each string. Default
6793+ is None, which means `n_strlen` will be set to a.itemsize (the number of bytes
6794+ used to represent each string in the input array).
6795+
67886796returns a numpy character array with datatype `'S1'` or `'U1'`
67896797and shape `a.shape + (N,)`, where N is the length of each string in a."""
67906798 dtype = a.dtype.kind
6799+ if n_strlen is None :
6800+ n_strlen = a.dtype.itemsize
67916801 if dtype not in [" S" ," U" ]:
67926802 raise ValueError (" type must string or unicode ('S' or 'U')" )
67936803 if encoding in [' none' ,' None' ,' bytes' ]:
67946804 b = numpy.array(tuple (a.tobytes()),' S1' )
6795- else :
6805+ elif encoding == ' ascii ' :
67966806 b = numpy.array(tuple (a.tobytes().decode(encoding)),dtype+ ' 1' )
6797- b.shape = a.shape + (a.itemsize,)
6807+ b.shape = a.shape + (n_strlen,)
6808+ else :
6809+ if not a.ndim:
6810+ a = numpy.array([a])
6811+ bbytes = [text.encode(encoding) for text in a]
6812+ pad = b' \0' * n_strlen
6813+ bbytes = [(x + pad)[:n_strlen] for x in bbytes]
6814+ b = numpy.array([[bb[i:i+ 1 ] for i in range (n_strlen)] for bb in bbytes])
67986815 return b
67996816
68006817def chartostring (b ,encoding = ' utf-8' ):
@@ -6816,15 +6833,12 @@ returns a numpy string array with datatype `'UN'` (or `'SN'`) and shape
68166833 dtype = b.dtype.kind
68176834 if dtype not in [" S" ," U" ]:
68186835 raise ValueError (" type must be string or unicode ('S' or 'U')" )
6819- if encoding in [' none' ,' None' ,' bytes' ]:
6820- bs = b.tobytes()
6821- else :
6822- bs = b.tobytes().decode(encoding)
6836+ bs = b.tobytes()
68236837 slen = int (b.shape[- 1 ])
68246838 if encoding in [' none' ,' None' ,' bytes' ]:
68256839 a = numpy.array([bs[n1:n1+ slen] for n1 in range (0 ,len (bs),slen)],' S' + repr (slen))
68266840 else :
6827- a = numpy.array([bs[n1:n1+ slen] for n1 in range (0 ,len (bs),slen)],' U' + repr (slen))
6841+ a = numpy.array([bs[n1:n1+ slen].decode(encoding) for n1 in range (0 ,len (bs),slen)],' U' + repr (slen))
68286842 a.shape = b.shape[:- 1 ]
68296843 return a
68306844
0 commit comments