Skip to content

Commit 2d722bb

Browse files
committed
rtlib: Fix wstring-to-zstring conversion functions
This fixes fb_wstr_ConvToA() and also improves fb_wstr_ConvFromA() in the process. 1. fb_wstr_ConvToA() was definitely broken - it should use wcstombs(), but that code was commented out, and it only converted ASCII chars, turning everything else into '?' chars. Looks like the only reason it was commented out is that the call might fail. Well, we can use it, and in case it fails, then we can fall back to the ASCII-only conversion. 2. fb_wstr_ConvFromA() used mbstowcs(), but didn't do ASCII-only conversion as a fallback. Now it does - seems like it's better to return something instead of nothing. 3. FB_WSTR_WCHARTOCHAR macro stuff removed, the two functions are now implemented in the proper module instead of fb_unicode.h, parameters re-ordered to indicate that dst/dst_chars belong together, while src must be null-terminated. 4. fb_WstrAssignToAEx() now allocates a big dst string, in the hopes to have enough room for the decoded multi-byte string (which can be UTF8 on GNU/Linux). Previously it would simply allocate the same amount of "chars" for dst as were present in src, but the conversion isn't guaranteed to be 1:1 in terms of chars. Now it will probably often allocate more memory than needed, but in exchange the string is much less likely to be truncated. Unfortunately it is nearly impossible to test this stuff in a portable way. The Unicode <-> codepage conversions are lossy and depend on the system codepage at run-time.
1 parent d39fef9 commit 2d722bb

File tree

12 files changed

+222
-110
lines changed

12 files changed

+222
-110
lines changed

changelog.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Version 1.05.0
1414
- BYREF fixed-length strings (e.g. BYREF myParameter AS ZSTRING * 10) now trigger a compiler error since they are unsupported
1515
- #print typeof() output now differentiates between ZSTRING and ZSTRING * N (ZSTRING without size is produced by dereferencing a ZSTRING PTR, or BYREF AS ZSTRING)
1616
- Context-specific keywords, e.g. graphics PUT modes, must now be given as keywords (e.g. PSET), string literals (e.g. "PSET") are no longer accepted.
17+
- Wstring-to-Zstring conversions didn't use the system's Unicode <-> codepage conversion function, and only converted ASCII characters. Now it will try to convert the Unicode chars to codepage chars.
1718

1819

1920
Version 1.04.0

src/gfxlib2/gfx_print_wstr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ void fb_GfxPrintBufferWstrEx(const FB_WCHAR *buffer, size_t len, int mask)
99
char temp[len + 1];
1010

1111
if( len > 0 )
12-
fb_wstr_ConvToA( temp, buffer, len );
12+
fb_wstr_ConvToA( temp, len, buffer );
1313
else
1414
*temp = '\0';
1515

src/rtlib/dev_file_write_wstr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ int fb_DevFileWriteWstr( FB_FILE *handle, const FB_WCHAR* src, size_t chars )
2424

2525
/* convert to ascii, file should be opened with the ENCODING option
2626
to allow UTF characters to be written */
27-
fb_wstr_ConvToA( buffer, src, chars );
27+
fb_wstr_ConvToA( buffer, chars, src );
2828

2929
/* do write */
3030
res = fwrite( (void *)buffer, 1, chars, fp ) == chars;

src/rtlib/dos/io_printbuff_wstr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ void fb_ConsolePrintBufferWstrEx( const FB_WCHAR *buffer, size_t len, int mask )
1010
char *temp = alloca( len + 1 );
1111

1212
if( len > 0 )
13-
fb_wstr_ConvToA( temp, buffer, len );
13+
fb_wstr_ConvToA( temp, len, buffer );
1414
else
1515
*temp = '\0';
1616

src/rtlib/dos/io_printer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ int fb_PrinterWriteWstr( DEV_LPT_INFO *devInfo, const FB_WCHAR *data, size_t len
4747
char *temp = alloca( length + 1 );
4848

4949
if( length > 0 )
50-
fb_wstr_ConvToA( temp, data, length );
50+
fb_wstr_ConvToA( temp, length, data );
5151
else
5252
*temp = '\0';
5353

src/rtlib/error_assert_wstr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
static void hConvToA( char *buffer, FB_WCHAR *expression )
88
{
9-
fb_wstr_ConvToA( buffer, expression, BUFFER_SIZE-1 );
9+
fb_wstr_ConvToA( buffer, BUFFER_SIZE-1, expression );
1010
buffer[BUFFER_SIZE-1] = 0; /* null terminator */
1111
}
1212

src/rtlib/fb_unicode.h

Lines changed: 2 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ typedef uint8_t UTF_8;
3939
# define wcstoull strtoull
4040
# define wcschr strchr
4141
# define wcscspn strcspn
42-
# define FB_WSTR_WCHARTOCHAR fb_wstr_WcharToChar
4342
static __inline__ size_t __dos_mbstowcs(FB_WCHAR *wcstr, const char *mbstr, size_t count)
4443
{
4544
return memcpy(wcstr,mbstr,count), count;
@@ -48,10 +47,6 @@ typedef uint8_t UTF_8;
4847
{
4948
return memcpy(mbstr,wcstr,count), count;
5049
}
51-
static __inline__ void fb_wstr_WcharToChar( char *dst, const FB_WCHAR *src, ssize_t chars )
52-
{
53-
memcpy(dst,src,chars);
54-
}
5550
static __inline__ int swprintf(FB_WCHAR *buffer, size_t n, const FB_WCHAR *format, ...)
5651
{
5752
int result;
@@ -78,19 +73,6 @@ typedef uint8_t UTF_8;
7873
# else
7974
# define FB_WEOF ((FB_WCHAR)-1)
8075
# endif
81-
# define FB_WSTR_WCHARTOCHAR fb_wstr_WcharToChar
82-
static __inline__ void fb_wstr_WcharToChar( char *dst, const FB_WCHAR *src, ssize_t chars )
83-
{
84-
while( chars-- ) {
85-
UTF_16 c = *src++;
86-
if( c > 255 ) {
87-
if( c >= UTF16_SUR_HIGH_START && c <= UTF16_SUR_HIGH_END )
88-
++src;
89-
c = '?';
90-
}
91-
*dst++ = c;
92-
}
93-
}
9476
#else
9577
# define __USE_ISOC99 1
9678
# define __USE_ISOC95 1
@@ -141,20 +123,6 @@ typedef uint8_t UTF_8;
141123
swprintf( buffer, 16+8 + 1, _LC("%.16g"), (double) (num) )
142124
#endif
143125

144-
#ifndef FB_WSTR_WCHARTOCHAR
145-
#define FB_WSTR_WCHARTOCHAR fb_wstr_WcharToChar
146-
static __inline__ void fb_wstr_WcharToChar( char *dst, const FB_WCHAR *src, ssize_t chars )
147-
{
148-
while( chars ) {
149-
UTF_32 c = *src++;
150-
if( c > 255 )
151-
c = '?';
152-
*dst++ = c;
153-
--chars;
154-
}
155-
}
156-
#endif
157-
158126
/* Calculate the number of characters between two pointers. */
159127
static __inline__ ssize_t fb_wstr_CalcDiff( const FB_WCHAR *ini, const FB_WCHAR *end )
160128
{
@@ -179,52 +147,8 @@ static __inline__ ssize_t fb_wstr_Len( const FB_WCHAR *s )
179147
return wcslen( s );
180148
}
181149

182-
static __inline__ void fb_wstr_ConvFromA( FB_WCHAR *dst, ssize_t dst_chars, const char *src )
183-
{
184-
ssize_t chars;
185-
186-
/* NULL? */
187-
if( src == NULL ) {
188-
chars = -1;
189-
} else {
190-
/* plus the null-term (note: "n" in chars, not bytes!) */
191-
chars = mbstowcs( dst, src, dst_chars + 1 );
192-
}
193-
194-
/* error? */
195-
if( chars == -1 )
196-
*dst = _LC('\0');
197-
198-
/* if there's no enough space in dst the null-term won't be added? */
199-
else if( chars == (dst_chars + 1) )
200-
dst[dst_chars] = _LC('\0');
201-
}
202-
203-
static __inline__ void fb_wstr_ConvToA( char *dst, const FB_WCHAR *src, ssize_t chars )
204-
{
205-
/* !!!FIXME!!! wcstombs() will fail and not emit '?' or such if the
206-
characters are above 255 and can't be converted? not good.. */
207-
#if 0
208-
ssize_t bytes;
209-
210-
/* plus the null-term */
211-
bytes = wcstombs( dst, src, chars + 1 );
212-
213-
/* error? */
214-
if( bytes == -1 )
215-
*dst = '\0';
216-
217-
/* if there's no enough space in dst the null-term won't be added? */
218-
else if( bytes == chars + 1 )
219-
dst[src_chars] = '\0';
220-
221-
#else
222-
FB_WSTR_WCHARTOCHAR( dst, src, chars );
223-
224-
/* plus the null-term */
225-
dst[chars] = '\0';
226-
#endif
227-
}
150+
ssize_t fb_wstr_ConvFromA( FB_WCHAR *dst, int dst_chars, const char *src );
151+
ssize_t fb_wstr_ConvToA( char *dst, int dst_chars, const FB_WCHAR *src );
228152

229153
static __inline__ int fb_wstr_IsLower( FB_WCHAR c )
230154
{

src/rtlib/strw_convassign.c

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,13 @@ FBCALL FB_WCHAR *fb_WstrAssignFromA
3838
return dst;
3939
}
4040

41+
/* We'll convert wide-string to multi-byte string -- we don't know
42+
how big the result will be, but we can make a good guess.
43+
In the worst case, we'll allocate too much. */
44+
/* 4 bytes per char - allowing for UTF8 multi-byte strings, happens on GNU/Linux.
45+
On Windows there can be double-byte encodings etc. too. */
46+
#define getALenForWLen(wlen) ((wlen) * 4)
47+
4148
FBCALL void *fb_WstrAssignToAEx
4249
(
4350
void *dst,
@@ -77,44 +84,44 @@ FBCALL void *fb_WstrAssignToAEx
7784
else
7885
{
7986
/* realloc dst if needed and copy src */
87+
dst_chars = getALenForWLen(src_chars);
8088
if( is_init == FB_FALSE )
8189
{
82-
if( FB_STRSIZE( dst ) != src_chars )
83-
fb_hStrRealloc( (FBSTRING *)dst, src_chars, FB_FALSE );
90+
if( FB_STRSIZE( dst ) != dst_chars )
91+
fb_hStrRealloc( (FBSTRING *)dst, dst_chars, FB_FALSE );
8492
}
8593
else
8694
{
87-
fb_hStrAlloc( (FBSTRING *)dst, src_chars );
95+
fb_hStrAlloc( (FBSTRING *)dst, dst_chars );
8896
}
8997

90-
fb_wstr_ConvToA( ((FBSTRING *)dst)->data, src, src_chars );
98+
ssize_t writtenchars = fb_wstr_ConvToA( ((FBSTRING *)dst)->data, dst_chars, src );
99+
fb_hStrSetLength( dst, writtenchars );
91100
}
92101
}
93102
/* fixed-len or zstring.. */
94103
else
95104
{
96105
/* src NULL? */
97-
if( src_chars == 0 )
98-
{
99-
*(char *)dst = '\0';
100-
}
101-
else
102-
{
103-
/* byte ptr? as in C, assume dst is large enough */
104-
if( dst_chars == 0 )
105-
dst_chars = src_chars;
106-
107-
fb_wstr_ConvToA( (char *)dst,
108-
src,
109-
(dst_chars <= src_chars? dst_chars : src_chars) );
110-
}
111-
112-
/* fill reminder with null's */
113-
if( fill_rem != 0 )
114-
{
115-
dst_chars -= src_chars;
116-
if( dst_chars > 0 )
117-
memset( &(((char *)dst)[src_chars]), 0, dst_chars );
106+
if( src_chars == 0 ) {
107+
if( fill_rem && dst_chars > 0 ) {
108+
memset( dst, 0, dst_chars );
109+
} else {
110+
*(char *)dst = '\0';
111+
}
112+
/* byte ptr? as in C, assume dst is large enough */
113+
} else if( dst_chars == 0 ) {
114+
dst_chars = getALenForWLen(src_chars);
115+
fb_wstr_ConvToA( (char *)dst, dst_chars, src );
116+
} else {
117+
dst_chars -= 1; /* null terminator */
118+
ssize_t writtenchars = fb_wstr_ConvToA( (char *)dst, dst_chars, src );
119+
120+
/* fill remainder with null's */
121+
if( fill_rem && writtenchars < dst_chars ) {
122+
/* + 1 to fill behind null terminator. There is room for dst_chars + 1. */
123+
memset( ((char *)dst) + writtenchars + 1, 0, dst_chars - writtenchars );
124+
}
118125
}
119126
}
120127

src/rtlib/strw_convfrom_str.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,57 @@
22

33
#include "fb.h"
44

5+
/* dst_chars == room in dst buffer without null terminator. Thus, the dst buffer
6+
must be at least (dst_chars + 1) * sizeof(FB_WCHAR).
7+
src must be null-terminated.
8+
result = number of chars written, excluding null terminator that is always written */
9+
ssize_t fb_wstr_ConvFromA(FB_WCHAR *dst, ssize_t dst_chars, const char *src)
10+
{
11+
if (src == NULL) {
12+
*dst = _LC('\0');
13+
return 0;
14+
}
15+
16+
#if defined HOST_DOS
17+
ssize_t chars = strlen(src);
18+
if (chars > dst_chars)
19+
chars = dst_chars;
20+
21+
memcpy(dst, src, chars + 1);
22+
return chars;
23+
#else
24+
/* plus the null-term (note: "n" in chars, not bytes!) */
25+
ssize_t chars = mbstowcs(dst, src, dst_chars + 1);
26+
27+
/* worked? */
28+
if (chars >= 0) {
29+
/* a null terminator won't be added if there was not
30+
enough space, so do it manually (this will cut off the last
31+
char, but what can you do) */
32+
if (chars == (dst_chars + 1)) {
33+
dst[dst_chars] = _LC('\0');
34+
return dst_chars - 1;
35+
}
36+
return chars;
37+
}
38+
39+
/* mbstowcs() failed; translate at least ASCII chars
40+
and write out '?' for the others */
41+
FB_WCHAR *origdst = dst;
42+
FB_WCHAR *dstlimit = dst + dst_chars;
43+
while (dst < dstlimit) {
44+
unsigned char c = *src++;
45+
if (c == 0)
46+
break;
47+
if (c > 127)
48+
c = '?';
49+
*dst++ = c;
50+
}
51+
*dst = _LC('\0');
52+
return dst - origdst;
53+
#endif
54+
}
55+
556
FBCALL FB_WCHAR *fb_StrToWstr( const char *src )
657
{
758
FB_WCHAR *dst;

src/rtlib/strw_convto_str.c

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,68 @@
22

33
#include "fb.h"
44

5+
/* dst_chars == room in dst buffer without null terminator. Thus, the dst buffer
6+
must be at least dst_chars+1 bytes.
7+
src must be null-terminated.
8+
result = number of chars written, excluding null terminator that is always written */
9+
ssize_t fb_wstr_ConvToA(char *dst, ssize_t dst_chars, const FB_WCHAR *src)
10+
{
11+
if (src == NULL) {
12+
*dst = '\0';
13+
return 0;
14+
}
15+
16+
#if defined HOST_DOS
17+
ssize_t chars = strlen(src);
18+
if (chars > dst_chars)
19+
chars = dst_chars;
20+
21+
memcpy(dst, src, chars + 1);
22+
return chars;
23+
#else
24+
/* plus the null-term */
25+
ssize_t chars = wcstombs(dst, src, dst_chars + 1);
26+
27+
/* worked? */
28+
if (chars >= 0) {
29+
/* a null terminator won't be added if there was not
30+
enough space, so do it manually (this will cut off the last
31+
char, but what can you do) */
32+
if (chars == (dst_chars + 1)) {
33+
dst[dst_chars] = '\0';
34+
return dst_chars - 1;
35+
}
36+
return chars;
37+
}
38+
39+
/* wcstombs() failed; translate at least ASCII chars
40+
and write out '?' for the others */
41+
char *origdst = dst;
42+
char *dstlimit = dst + dst_chars;
43+
while (dst < dstlimit) {
44+
#if defined HOST_WIN32
45+
UTF_16 c = *src++;
46+
if (c == 0)
47+
break;
48+
if (c > 127) {
49+
if (c >= UTF16_SUR_HIGH_START && c <= UTF16_SUR_HIGH_END)
50+
src++;
51+
c = '?';
52+
}
53+
#else
54+
UTF_32 c = *src++;
55+
if (c == 0)
56+
break;
57+
if (c > 127)
58+
c = '?';
59+
#endif
60+
*dst++ = c;
61+
}
62+
*dst = '\0';
63+
return dst - origdst;
64+
#endif
65+
}
66+
567
FBCALL FBSTRING *fb_WstrToStr( const FB_WCHAR *src )
668
{
769
FBSTRING *dst;
@@ -18,7 +80,7 @@ FBCALL FBSTRING *fb_WstrToStr( const FB_WCHAR *src )
1880
if( dst == NULL )
1981
return &__fb_ctx.null_desc;
2082

21-
fb_wstr_ConvToA( dst->data, src, chars );
83+
fb_wstr_ConvToA( dst->data, chars, src );
2284

2385
return dst;
2486
}

0 commit comments

Comments
 (0)