Skip to content

Commit 344389a

Browse files
committed
Revert bad Get# wstring changes
In commit cab4665 I changed Get#(wstring) to convert the loaded bytes to wstring characters, but that's not how Get# should work: It should just fill the buffer with the raw bytes. That's also what it does for other data types like Integer variables - it will load sizeof(integer) bytes, not just 1 byte. This makes sense, since Get# is intended for use with Open For Binary. The conversion to wstring chars is something that Input# with Open For Input (i.e. text mode) would do. That Get# change also broke part of fbc's UTF file reading, because it uses Get#(wstring) (in hUTF16LEToUTF16LE() and hUTF32LEToUTF32LE()) and relies on it to load raw bytes as-is without any conversion.
1 parent 10dd3dd commit 344389a

File tree

3 files changed

+261
-26
lines changed

3 files changed

+261
-26
lines changed

changelog.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ Version 1.05.0
1616
- Context-specific keywords, e.g. graphics PUT modes, must now be given as keywords (e.g. PSET), string literals (e.g. "PSET") are no longer accepted.
1717
- Wstring-to-Zstring conversions didn't use the system's Unicode <-> codepage conversion function, and only converted ASCII characters. Now it will try to convert the Unicode chars to codepage chars.
1818
- Compiler crash during error recovery when there was an error when parsing the argument expression for a BYREF AS ANY parameter
19+
- 1.04.0 regression: Get# for WStrings was incorrectly changed to convert the loaded bytes to wstring characters, like Input# would do. Now it's changed back to just loading the raw bytes into the wstring, which is also how Get# works for other datatypes.
20+
- 1.04.0 regression: Due to the Get# wstring breakage, the compiler failed to read source files encoded in UTF16LE with BOM on Windows, and UTF32LE with BOM on Linux.
1921

2022

2123
Version 1.04.0

src/rtlib/file_get_wstr.c

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,44 @@ int fb_FileGetWstrEx
1111
size_t *bytesread
1212
)
1313
{
14-
int res;
15-
1614
if( bytesread )
1715
*bytesread = 0;
1816

19-
if( !FB_HANDLE_USED(handle) )
17+
if( !FB_HANDLE_USED(handle) || !dst || dst_chars < 0 )
2018
return fb_ErrorSetNum( FB_RTERROR_ILLEGALFUNCTIONCALL );
2119

22-
if( (dst != NULL) && (dst_chars > 1) ) {
23-
/* read dst_chars - 1 chars, then add null-terminator */
24-
size_t chars;
25-
res = fb_FileGetDataEx( handle, pos, (void *)dst, dst_chars - 1, &chars, TRUE, TRUE );
26-
if (res == FB_RTERROR_OK) {
27-
dst[chars] = _LC('\0'); /* null-terminator */
28-
if (bytesread)
29-
*bytesread = chars * sizeof(FB_WCHAR);
30-
}
31-
} else {
32-
/* no/empty destination string */
33-
res = fb_ErrorSetNum( FB_RTERROR_ILLEGALFUNCTIONCALL );
20+
/* may have to detect the length if given a dereferenced wstring ptr */
21+
if( dst_chars == 0 ) {
22+
dst_chars = fb_wstr_Len( dst ) + 1;
23+
}
24+
25+
/* need room for at least 1 wchar and the null terminator */
26+
/* (Get# on a wstring * 1, i.e. just room for the null terminator, is not supported,
27+
same as for [z]strings) */
28+
if( dst_chars < 2 )
29+
return fb_ErrorSetNum( FB_RTERROR_ILLEGALFUNCTIONCALL );
30+
31+
/* Fill wchar buffer with raw bytes from the file. */
32+
/* We request to read in multiples of sizeof(wchar), but EOF can be
33+
reached at an odd number of bytes - fb_DevFileRead() will fill the
34+
remainder with zeroes at least. */
35+
size_t rawbytesread;
36+
int res = fb_FileGetDataEx( handle, pos, (void *)dst, (dst_chars - 1) * sizeof(FB_WCHAR), &rawbytesread, TRUE, FALSE );
37+
if( res != FB_RTERROR_OK )
38+
return res;
39+
40+
if (bytesread)
41+
*bytesread = rawbytesread;
42+
43+
/* Add null-terminator */
44+
int extra = rawbytesread % sizeof(FB_WCHAR);
45+
if (extra > 0) {
46+
rawbytesread += sizeof(FB_WCHAR) - extra; /* round up */
3447
}
48+
DBG_ASSERT( (rawbytesread % sizeof(FB_WCHAR)) == 0 );
49+
dst[rawbytesread / sizeof(FB_WCHAR)] = _LC('\0');
3550

36-
return res;
51+
return FB_RTERROR_OK;
3752
}
3853

3954
FBCALL int fb_FileGetWstr( int fnum, int pos, FB_WCHAR *dst, ssize_t dst_chars )

tests/file/get.bas

Lines changed: 228 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "fbcu.bi"
2+
#include "file.bi"
23

34
namespace fbc_tests.file_.get_
45

@@ -23,6 +24,8 @@ private sub test cdecl( )
2324
dim s as string
2425
dim z6 as zstring * 6
2526
dim w6 as wstring * 6
27+
dim pz6 as zstring ptr = callocate( 6 * sizeof( zstring ) )
28+
dim pw6 as wstring ptr = callocate( 6 * sizeof( wstring ) )
2629
dim array4b(0 to 3) as byte
2730
dim array4l(0 to 3) as long
2831

@@ -54,7 +57,41 @@ private sub test cdecl( )
5457
CU_ASSERT( z6 = "12345" )
5558

5659
CU_ASSERT( get( #f, 1, w6 ) = 0 )
57-
CU_ASSERT( w6 = wstr( "12345" ) )
60+
#if sizeof(wstring) = 2
61+
CU_ASSERT( w6[0] = cvshort( "12" ) )
62+
CU_ASSERT( w6[1] = cvshort( "34" ) )
63+
CU_ASSERT( w6[2] = cvshort( "56" ) )
64+
CU_ASSERT( w6[3] = cvshort( "78" ) )
65+
CU_ASSERT( w6[4] = cvshort( "90" ) )
66+
#elseif sizeof(wstring) = 4
67+
CU_ASSERT( w6[0] = cvl( "1234" ) )
68+
CU_ASSERT( w6[1] = cvl( "5678" ) )
69+
CU_ASSERT( w6[2] = cvshort( "90" ) )
70+
CU_ASSERT( w6[3] = 0 )
71+
CU_ASSERT( w6[4] = 0 )
72+
#endif
73+
CU_ASSERT( w6[5] = 0 )
74+
75+
*pz6 = "00000"
76+
CU_ASSERT( get( #f, 1, *pz6 ) = 0 )
77+
CU_ASSERT( *pz6 = "12345" )
78+
79+
*pw6 = "00000"
80+
CU_ASSERT( get( #f, 1, *pw6 ) = 0 )
81+
#if sizeof(wstring) = 2
82+
CU_ASSERT( (*pw6)[0] = cvshort( "12" ) )
83+
CU_ASSERT( (*pw6)[1] = cvshort( "34" ) )
84+
CU_ASSERT( (*pw6)[2] = cvshort( "56" ) )
85+
CU_ASSERT( (*pw6)[3] = cvshort( "78" ) )
86+
CU_ASSERT( (*pw6)[4] = cvshort( "90" ) )
87+
#elseif sizeof(wstring) = 4
88+
CU_ASSERT( (*pw6)[0] = cvl( "1234" ) )
89+
CU_ASSERT( (*pw6)[1] = cvl( "5678" ) )
90+
CU_ASSERT( (*pw6)[2] = cvshort( "90" ) )
91+
CU_ASSERT( (*pw6)[3] = 0 )
92+
CU_ASSERT( (*pw6)[4] = 0 )
93+
#endif
94+
CU_ASSERT( (*pw6)[5] = 0 )
5895

5996
CU_ASSERT( get( #f, 1, array4b() ) = 0 )
6097
CU_ASSERT( array4b(0) = asc( "1" ) )
@@ -65,7 +102,7 @@ private sub test cdecl( )
65102
CU_ASSERT( get( #f, 1, array4l() ) = 0 )
66103
CU_ASSERT( array4l(0) = cvl( "1234" ) )
67104
CU_ASSERT( array4l(1) = cvl( "5678" ) )
68-
CU_ASSERT( array4l(2) = &h3039 ) '' 90
105+
CU_ASSERT( array4l(2) = cvshort( "90" ) )
69106
CU_ASSERT( array4l(3) = 0 )
70107

71108
''
@@ -124,8 +161,46 @@ private sub test cdecl( )
124161

125162
bytesread = 0
126163
CU_ASSERT( get( #f, 1, w6, , bytesread ) = 0 )
127-
CU_ASSERT( w6 = wstr( "12345" ) )
128-
CU_ASSERT( bytesread = (5 * sizeof( wstring )) )
164+
#if sizeof(wstring) = 2
165+
CU_ASSERT( w6[0] = cvshort( "12" ) )
166+
CU_ASSERT( w6[1] = cvshort( "34" ) )
167+
CU_ASSERT( w6[2] = cvshort( "56" ) )
168+
CU_ASSERT( w6[3] = cvshort( "78" ) )
169+
CU_ASSERT( w6[4] = cvshort( "90" ) )
170+
#elseif sizeof(wstring) = 4
171+
CU_ASSERT( w6[0] = cvl( "1234" ) )
172+
CU_ASSERT( w6[1] = cvl( "5678" ) )
173+
CU_ASSERT( w6[2] = cvshort( "90" ) )
174+
CU_ASSERT( w6[3] = 0 )
175+
CU_ASSERT( w6[4] = 0 )
176+
#endif
177+
CU_ASSERT( w6[5] = 0 )
178+
CU_ASSERT( bytesread = 10 )
179+
180+
bytesread = 0
181+
*pz6 = "00000"
182+
CU_ASSERT( get( #f, 1, *pz6, , bytesread ) = 0 )
183+
CU_ASSERT( *pz6 = "12345" )
184+
CU_ASSERT( bytesread = 5 )
185+
186+
bytesread = 0
187+
*pw6 = "00000"
188+
CU_ASSERT( get( #f, 1, *pw6, , bytesread ) = 0 )
189+
#if sizeof(wstring) = 2
190+
CU_ASSERT( (*pw6)[0] = cvshort( "12" ) )
191+
CU_ASSERT( (*pw6)[1] = cvshort( "34" ) )
192+
CU_ASSERT( (*pw6)[2] = cvshort( "56" ) )
193+
CU_ASSERT( (*pw6)[3] = cvshort( "78" ) )
194+
CU_ASSERT( (*pw6)[4] = cvshort( "90" ) )
195+
#elseif sizeof(wstring) = 4
196+
CU_ASSERT( (*pw6)[0] = cvl( "1234" ) )
197+
CU_ASSERT( (*pw6)[1] = cvl( "5678" ) )
198+
CU_ASSERT( (*pw6)[2] = cvshort( "90" ) )
199+
CU_ASSERT( (*pw6)[3] = 0 )
200+
CU_ASSERT( (*pw6)[4] = 0 )
201+
#endif
202+
CU_ASSERT( (*pw6)[5] = 0 )
203+
CU_ASSERT( bytesread = 10 )
129204

130205
bytesread = 0
131206
CU_ASSERT( get( #f, 1, array4b(), , bytesread ) = 0 )
@@ -139,7 +214,7 @@ private sub test cdecl( )
139214
CU_ASSERT( get( #f, 1, array4l(), , bytesread ) = 0 )
140215
CU_ASSERT( array4l(0) = cvl( "1234" ) )
141216
CU_ASSERT( array4l(1) = cvl( "5678" ) )
142-
CU_ASSERT( array4l(2) = &h3039 ) '' 90
217+
CU_ASSERT( array4l(2) = cvshort( "90" ) )
143218
CU_ASSERT( array4l(3) = 0 )
144219
CU_ASSERT( bytesread = 10 )
145220

@@ -193,7 +268,41 @@ private sub test cdecl( )
193268
CU_ASSERT( z6 = "12345" )
194269

195270
CU_ASSERT( get( #f, 1ll, w6 ) = 0 )
196-
CU_ASSERT( w6 = wstr( "12345" ) )
271+
#if sizeof(wstring) = 2
272+
CU_ASSERT( w6[0] = cvshort( "12" ) )
273+
CU_ASSERT( w6[1] = cvshort( "34" ) )
274+
CU_ASSERT( w6[2] = cvshort( "56" ) )
275+
CU_ASSERT( w6[3] = cvshort( "78" ) )
276+
CU_ASSERT( w6[4] = cvshort( "90" ) )
277+
#elseif sizeof(wstring) = 4
278+
CU_ASSERT( w6[0] = cvl( "1234" ) )
279+
CU_ASSERT( w6[1] = cvl( "5678" ) )
280+
CU_ASSERT( w6[2] = cvshort( "90" ) )
281+
CU_ASSERT( w6[3] = 0 )
282+
CU_ASSERT( w6[4] = 0 )
283+
#endif
284+
CU_ASSERT( w6[5] = 0 )
285+
286+
*pz6 = "00000"
287+
CU_ASSERT( get( #f, 1ll, *pz6 ) = 0 )
288+
CU_ASSERT( *pz6 = "12345" )
289+
290+
*pw6 = "00000"
291+
CU_ASSERT( get( #f, 1ll, *pw6 ) = 0 )
292+
#if sizeof(wstring) = 2
293+
CU_ASSERT( (*pw6)[0] = cvshort( "12" ) )
294+
CU_ASSERT( (*pw6)[1] = cvshort( "34" ) )
295+
CU_ASSERT( (*pw6)[2] = cvshort( "56" ) )
296+
CU_ASSERT( (*pw6)[3] = cvshort( "78" ) )
297+
CU_ASSERT( (*pw6)[4] = cvshort( "90" ) )
298+
#elseif sizeof(wstring) = 4
299+
CU_ASSERT( (*pw6)[0] = cvl( "1234" ) )
300+
CU_ASSERT( (*pw6)[1] = cvl( "5678" ) )
301+
CU_ASSERT( (*pw6)[2] = cvshort( "90" ) )
302+
CU_ASSERT( (*pw6)[3] = 0 )
303+
CU_ASSERT( (*pw6)[4] = 0 )
304+
#endif
305+
CU_ASSERT( (*pw6)[5] = 0 )
197306

198307
CU_ASSERT( get( #f, 1ll, array4b() ) = 0 )
199308
CU_ASSERT( array4b(0) = asc( "1" ) )
@@ -204,7 +313,7 @@ private sub test cdecl( )
204313
CU_ASSERT( get( #f, 1ll, array4l() ) = 0 )
205314
CU_ASSERT( array4l(0) = cvl( "1234" ) )
206315
CU_ASSERT( array4l(1) = cvl( "5678" ) )
207-
CU_ASSERT( array4l(2) = &h3039 ) '' 90
316+
CU_ASSERT( array4l(2) = cvshort( "90" ) )
208317
CU_ASSERT( array4l(3) = 0 )
209318

210319
''
@@ -262,8 +371,46 @@ private sub test cdecl( )
262371

263372
bytesread = 0
264373
CU_ASSERT( get( #f, 1ll, w6, , bytesread ) = 0 )
265-
CU_ASSERT( w6 = wstr( "12345" ) )
266-
CU_ASSERT( bytesread = (5 * sizeof( wstring )) )
374+
#if sizeof(wstring) = 2
375+
CU_ASSERT( w6[0] = cvshort( "12" ) )
376+
CU_ASSERT( w6[1] = cvshort( "34" ) )
377+
CU_ASSERT( w6[2] = cvshort( "56" ) )
378+
CU_ASSERT( w6[3] = cvshort( "78" ) )
379+
CU_ASSERT( w6[4] = cvshort( "90" ) )
380+
#elseif sizeof(wstring) = 4
381+
CU_ASSERT( w6[0] = cvl( "1234" ) )
382+
CU_ASSERT( w6[1] = cvl( "5678" ) )
383+
CU_ASSERT( w6[2] = cvshort( "90" ) )
384+
CU_ASSERT( w6[3] = 0 )
385+
CU_ASSERT( w6[4] = 0 )
386+
#endif
387+
CU_ASSERT( w6[5] = 0 )
388+
CU_ASSERT( bytesread = 10 )
389+
390+
bytesread = 0
391+
*pz6 = "00000"
392+
CU_ASSERT( get( #f, 1ll, *pz6, , bytesread ) = 0 )
393+
CU_ASSERT( *pz6 = "12345" )
394+
CU_ASSERT( bytesread = 5 )
395+
396+
bytesread = 0
397+
*pw6 = "00000"
398+
CU_ASSERT( get( #f, 1ll, *pw6, , bytesread ) = 0 )
399+
#if sizeof(wstring) = 2
400+
CU_ASSERT( (*pw6)[0] = cvshort( "12" ) )
401+
CU_ASSERT( (*pw6)[1] = cvshort( "34" ) )
402+
CU_ASSERT( (*pw6)[2] = cvshort( "56" ) )
403+
CU_ASSERT( (*pw6)[3] = cvshort( "78" ) )
404+
CU_ASSERT( (*pw6)[4] = cvshort( "90" ) )
405+
#elseif sizeof(wstring) = 4
406+
CU_ASSERT( (*pw6)[0] = cvl( "1234" ) )
407+
CU_ASSERT( (*pw6)[1] = cvl( "5678" ) )
408+
CU_ASSERT( (*pw6)[2] = cvshort( "90" ) )
409+
CU_ASSERT( (*pw6)[3] = 0 )
410+
CU_ASSERT( (*pw6)[4] = 0 )
411+
#endif
412+
CU_ASSERT( (*pw6)[5] = 0 )
413+
CU_ASSERT( bytesread = 10 )
267414

268415
bytesread = 0
269416
CU_ASSERT( get( #f, 1ll, array4b(), , bytesread ) = 0 )
@@ -277,16 +424,87 @@ private sub test cdecl( )
277424
CU_ASSERT( get( #f, 1ll, array4l(), , bytesread ) = 0 )
278425
CU_ASSERT( array4l(0) = cvl( "1234" ) )
279426
CU_ASSERT( array4l(1) = cvl( "5678" ) )
280-
CU_ASSERT( array4l(2) = &h3039 ) '' 90
427+
CU_ASSERT( array4l(2) = cvshort( "90" ) )
281428
CU_ASSERT( array4l(3) = 0 )
282429
CU_ASSERT( bytesread = 10 )
283430

431+
deallocate( pw6 )
432+
deallocate( pz6 )
284433
close #f
285434
end sub
286435

436+
sub testGetWstrFill cdecl( )
437+
for n as integer = 0 to sizeof(wstring) * 2
438+
const TESTFILE = "data.tmp"
439+
440+
'' Create test file with N bytes
441+
scope
442+
if( kill( TESTFILE ) ) then
443+
end if
444+
var f = freefile( )
445+
if( open( TESTFILE, for binary, access write, as #f ) <> 0 ) then
446+
CU_FAIL( "could not create file " & TESTFILE )
447+
end if
448+
for i as integer = 0 to n - 1
449+
var b = cubyte( asc( "a" ) )
450+
put #f, , b
451+
next
452+
close #f
453+
CU_ASSERT( filelen( TESTFILE ) = n )
454+
end scope
455+
456+
scope
457+
var f = freefile( )
458+
if( open( TESTFILE, for binary, access read, as #f ) <> 0 ) then
459+
CU_FAIL( "could not open file " & TESTFILE )
460+
end if
461+
462+
dim w as wstring * 10
463+
'' Fill all bytes in the wstring buffer with '?'
464+
for i as integer = 0 to sizeof(w) - 1
465+
cptr( ubyte ptr, @w )[i] = asc( "?" )
466+
next
467+
468+
'' Try a Get# wstring. It should read the 'a' bytes from
469+
'' the file, fill the last wchar's remaining bytes with
470+
'' zeroes if needed to round up to sizeof(wstring),
471+
'' and add a null terminator.
472+
dim bytesread as integer
473+
CU_ASSERT( get( #f, , w, , bytesread ) = 0 )
474+
CU_ASSERT( bytesread = n )
475+
476+
'' Check whether it worked
477+
478+
'' 'a' bytes as read from file
479+
for i as integer = 0 to n - 1
480+
CU_ASSERT( cptr( ubyte ptr, @w )[i] = asc( "a" ) )
481+
next
482+
483+
'' zero bytes padding, if needed
484+
dim as integer extra = bytesread mod sizeof( wstring )
485+
if extra > 0 then
486+
bytesread += sizeof( wstring ) - extra '' round up
487+
for i as integer = n to bytesread - 1
488+
CU_ASSERT( cptr( ubyte ptr, @w )[i] = 0 )
489+
next
490+
end if
491+
492+
'' null terminator
493+
CU_ASSERT( (bytesread mod sizeof( wstring )) = 0 )
494+
CU_ASSERT( w[bytesread \ sizeof( wstring )] = 0 )
495+
496+
close #f
497+
end scope
498+
499+
'' Delete test file
500+
CU_ASSERT( kill( TESTFILE ) = 0 )
501+
next
502+
end sub
503+
287504
private sub ctor( ) constructor
288505
fbcu.add_suite( "tests/file/get" )
289506
fbcu.add_test( "test", @test )
507+
fbcu.add_test( "testGetWstrFill", @testGetWstrFill )
290508
end sub
291509

292510
end namespace

0 commit comments

Comments
 (0)