Skip to content

Commit e30beca

Browse files
committed
fbc: token pasting operator '##' allows pasting of single '_' characters
- previously fbc unable to handle single '_' token pasting single since in such cases the '_' is always seen as line continutation. Example: #define join1(a,b) a##_ b #define join2(a,b) a _##b #define join3(a,b) a##_##b - because '_' line continuation and '##' token pasting are handled nearly entirely in the lexer, no token checks can be made - adds lexGetLookAheadChar2( ) so that the sequence '_##' can be checked before tokenizing - plus previous commits (Skyfish) allow handling the sequence '##_' to escape the '_' character and paste it in on expansion
1 parent 34766aa commit e30beca

File tree

5 files changed

+235
-20
lines changed

5 files changed

+235
-20
lines changed

changelog.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ Version 1.08.0
138138
- sf.net #917: optimize 'm += s' string concatenations to fix the long compile times in the gcc backend (which makes heavy use of string building).
139139
- github #217: C backend, fix gcc array out of bounds warning when compiled with -O2 or higher optimizations and accessing non-zero lower bound fixed length string arrays
140140
- C backend: inline asm - don't add rsp/esp to the clobber list, it's deprecated in newer gcc versions and silently ignored in older versions
141+
- github #309: token pasting operator '##' allows pasting of single '_' characters
141142

142143

143144
Version 1.07.0

src/compiler/lex.bas

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ sub lexInit _
9898
next
9999

100100
lex.ctx->currchar = UINVALID
101-
lex.ctx->lahdchar = UINVALID
101+
lex.ctx->lahdchar1 = UINVALID
102+
lex.ctx->lahdchar2 = UINVALID
102103

103104
if( is_fb_eval ) then
104105
lex.ctx->linenum = (lex.ctx-1)->linenum
@@ -279,15 +280,21 @@ private function hReadChar _
279280
end function
280281

281282
sub lexEatChar( )
282-
if( lex.ctx->lahdchar = UINVALID ) then
283+
if( lex.ctx->lahdchar1 = UINVALID ) then
283284
'' No look-ahead char, read next char and force the next
284285
'' lexCurrentChar() to update the current char.
285286
hSkipChar( )
286287
lex.ctx->currchar = UINVALID
288+
elseif( lex.ctx->lahdchar2 = UINVALID ) then
289+
'' Look ahead char is the next current char
290+
lex.ctx->currchar = lex.ctx->lahdchar1
291+
lex.ctx->lahdchar1 = UINVALID
287292
else
288293
'' Look ahead char is the next current char
289-
lex.ctx->currchar = lex.ctx->lahdchar
290-
lex.ctx->lahdchar = UINVALID
294+
'' and second look ahead char becomes look ahead char
295+
lex.ctx->currchar = lex.ctx->lahdchar1
296+
lex.ctx->lahdchar1 = lex.ctx->lahdchar2
297+
lex.ctx->lahdchar2 = UINVALID
291298
end if
292299
end sub
293300

@@ -351,20 +358,38 @@ function lexGetLookAheadChar _
351358
byval skipwhitespc as integer = FALSE _
352359
) as uinteger
353360

354-
if( lex.ctx->lahdchar = UINVALID ) then
361+
if( lex.ctx->lahdchar1 = UINVALID ) then
355362
hSkipChar( )
356-
lex.ctx->lahdchar = hReadChar( )
363+
lex.ctx->lahdchar1 = hReadChar( )
357364
end if
358365

359366
if( skipwhitespc ) then
360-
do while( (lex.ctx->lahdchar = CHAR_TAB) or (lex.ctx->lahdchar = CHAR_SPACE) )
367+
do while( (lex.ctx->lahdchar1 = CHAR_TAB) or (lex.ctx->lahdchar1 = CHAR_SPACE) )
361368
lex.ctx->after_space = TRUE
362369
hSkipChar( )
363-
lex.ctx->lahdchar = hReadChar( )
370+
lex.ctx->lahdchar1 = hReadChar( )
364371
loop
365372
end if
366373

367-
function = lex.ctx->lahdchar
374+
function = lex.ctx->lahdchar1
375+
376+
end function
377+
378+
'':::::
379+
function lexGetLookAheadChar2 _
380+
( _
381+
) as uinteger
382+
383+
'' internally, should never use this function unless there
384+
'' is already a character in the look aead
385+
assert( lex.ctx->lahdchar1 <> UINVALID )
386+
387+
if( lex.ctx->lahdchar2 = UINVALID ) then
388+
hSkipChar( )
389+
lex.ctx->lahdchar2 = hReadChar( )
390+
end if
391+
392+
function = lex.ctx->lahdchar2
368393

369394
end function
370395

@@ -489,7 +514,7 @@ private sub hReadIdentifier _
489514
lexEatChar( )
490515

491516
'' '#'?
492-
case FB_TK_DBLTYPECHAR
517+
case FB_TK_DBLTYPECHAR '' alias for CHAR_SHARP
493518
'' isn't it a '##'?
494519
if( lexGetLookAheadChar( ) <> FB_TK_DBLTYPECHAR ) then
495520
dtype = FB_DATATYPE_DOUBLE
@@ -897,7 +922,7 @@ private sub hReadFloatNumber _
897922
end if
898923

899924
'' '#'?
900-
case FB_TK_DBLTYPECHAR
925+
case FB_TK_DBLTYPECHAR '' alias for CHAR_SHARP
901926
t.dtype = FB_DATATYPE_DOUBLE
902927

903928
if( (flags and LEXCHECK_NOSUFFIX) = 0 ) then
@@ -1201,7 +1226,7 @@ private sub hReadNumber( byref t as FBTOKEN, byval flags as LEXCHECK )
12011226
end if
12021227

12031228
'' '#'
1204-
case FB_TK_DBLTYPECHAR
1229+
case FB_TK_DBLTYPECHAR '' alias for CHAR_SHARP
12051230
if( have_u_suffix = FALSE ) then
12061231
'' isn't it a '##'?
12071232
if( lexGetLookAheadChar( ) <> FB_TK_DBLTYPECHAR ) then
@@ -1632,13 +1657,19 @@ re_read:
16321657
CHAR_0 to CHAR_9, CHAR_UNDER
16331658
exit do
16341659

1635-
'' otherwise, skip until new-line is found
1636-
case else
1637-
lexEatChar( )
1638-
islinecont = TRUE
1639-
continue do
1660+
'' could it be '_##'?
1661+
case CHAR_SHARP
1662+
if( lexGetLookAheadChar2( ) = CHAR_SHARP ) then
1663+
exit do
1664+
end if
1665+
16401666
end select
16411667

1668+
'' otherwise, skip until new-line is found
1669+
lexEatChar( )
1670+
islinecont = TRUE
1671+
continue do
1672+
16421673
'' else, take it as-is
16431674
else
16441675
exit do

src/compiler/lex.bi

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ type LEX_TKCTX
8484
tail as FBTOKEN ptr
8585

8686
currchar as uinteger '' current char
87-
lahdchar as uinteger '' look ahead char
87+
lahdchar1 as uinteger '' look ahead first char
88+
lahdchar2 as uinteger '' look ahead second char
8889

8990
linenum as integer
9091
lasttk_id as integer
@@ -234,6 +235,10 @@ declare function lexGetLookAheadChar _
234235
byval skipwhitespc as integer = FALSE _
235236
) as uinteger
236237

238+
declare function lexGetLookAheadChar2 _
239+
( _
240+
) as uinteger
241+
237242
declare sub lexEatChar( )
238243

239244
declare function lexPeekCurrentLine _

src/compiler/pp.bas

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,11 @@ function ppReadLiteral _
550550
case CHAR_SHARP
551551
lexSkipToken( LEX_FLAGS )
552552
lexSkipToken( LEX_FLAGS or LEXCHECK_NOLINECONT)
553-
if *lexGetText( ) <> "_" then '' Is only '##_'?
553+
554+
'' we can't check lexGetToken( ) here because a single '_' will
555+
'' return as FB_TK_ID, so we need to do a lexGetText( ) check
556+
'' Is only '##_'?
557+
if *lexGetText( ) <> "_" then
554558
DZstrConcatAssign( text, "##" )
555559
end if
556560

@@ -701,7 +705,11 @@ function ppReadLiteralW _
701705
case CHAR_SHARP
702706
lexSkipToken( LEX_FLAGS )
703707
lexSkipToken( LEX_FLAGS or LEXCHECK_NOLINECONT)
704-
if *lexGetText( ) <> "_" then '' Is only '##_'?
708+
709+
'' we can't check lexGetToken( ) here because a single '_' will
710+
'' return as FB_TK_ID, so we need to do a lexGetText( ) check
711+
'' Is only '##_'?
712+
if *lexGetText( ) <> "_" then
705713
DWstrConcatAssignA( text, "##" )
706714
end if
707715

tests/pp/token-pasting.bas

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
#include "fbcunit.bi"
2+
3+
#define tostr(a) #a
4+
5+
SUITE( fbc_tests.pp.token_pasting )
6+
7+
TEST( plain )
8+
9+
#define join1(a,b) a##b
10+
#define join2(a,b) a ##b
11+
#define join3(a,b) a## b
12+
#define join4(a,b) a ## b
13+
#define join5(a,b) a ## b
14+
15+
CU_ASSERT_EQUAL( tostr(join1(X,Y)), tostr(XY) )
16+
CU_ASSERT_EQUAL( tostr(join2(X,Y)), tostr(X Y) )
17+
CU_ASSERT_EQUAL( tostr(join3(X,Y)), tostr(X Y) )
18+
CU_ASSERT_EQUAL( tostr(join4(X,Y)), tostr(X Y) )
19+
CU_ASSERT_EQUAL( tostr(join5(X,Y)), tostr(X Y) )
20+
21+
END_TEST
22+
23+
TEST( under1 )
24+
25+
#define join_01(a) a##_
26+
#define join_02(a) _##a
27+
#define join_03(a) a##__
28+
#define join_04(a) __##a
29+
30+
CU_ASSERT_EQUAL( tostr(join_01(X)), tostr(X_) )
31+
CU_ASSERT_EQUAL( tostr(join_02(X)), tostr(_X) )
32+
CU_ASSERT_EQUAL( tostr(join_03(X)), tostr(X__) )
33+
CU_ASSERT_EQUAL( tostr(join_04(X)), tostr(__X) )
34+
35+
END_TEST
36+
37+
TEST( under_1 )
38+
39+
#define join_01(_a) _a##_
40+
#define join_02(_a) _##_a
41+
#define join_03(_a) _a##__
42+
#define join_04(_a) __##_a
43+
44+
CU_ASSERT_EQUAL( tostr(join_01(X)), tostr(X_) )
45+
CU_ASSERT_EQUAL( tostr(join_02(X)), tostr(_X) )
46+
CU_ASSERT_EQUAL( tostr(join_03(X)), tostr(X__) )
47+
CU_ASSERT_EQUAL( tostr(join_04(X)), tostr(__X) )
48+
49+
END_TEST
50+
51+
TEST( under1_ )
52+
53+
#define join_01(a_) a_##_
54+
#define join_02(a_) _##a_
55+
#define join_03(a_) a_##__
56+
#define join_04(a_) __##a_
57+
58+
CU_ASSERT_EQUAL( tostr(join_01(X)), tostr(X_) )
59+
CU_ASSERT_EQUAL( tostr(join_02(X)), tostr(_X) )
60+
CU_ASSERT_EQUAL( tostr(join_03(X)), tostr(X__) )
61+
CU_ASSERT_EQUAL( tostr(join_04(X)), tostr(__X) )
62+
63+
END_TEST
64+
65+
TEST( under_1_ )
66+
67+
#define join_01(_a_) _a_##_
68+
#define join_02(_a_) _##_a_
69+
#define join_03(_a_) _a_##__
70+
#define join_04(_a_) __##_a_
71+
72+
CU_ASSERT_EQUAL( tostr(join_01(X)), tostr(X_) )
73+
CU_ASSERT_EQUAL( tostr(join_02(X)), tostr(_X) )
74+
CU_ASSERT_EQUAL( tostr(join_03(X)), tostr(X__) )
75+
CU_ASSERT_EQUAL( tostr(join_04(X)), tostr(__X) )
76+
77+
END_TEST
78+
79+
TEST( under2 )
80+
81+
#define join_01(a,b) a##_##b
82+
#define join_02(a,b) a ##_##b
83+
#define join_03(a,b) a## _##b
84+
#define join_04(a,b) a ## _##b
85+
#define join_05(a,b) a##_ ##b
86+
#define join_06(a,b) a##_##b
87+
#define join_07(a,b) a ##_##b
88+
#define join_08(a,b) a## _##b
89+
#define join_09(a,b) a ## _##b
90+
#define join_10(a,b) a##_ ##b
91+
92+
CU_ASSERT_EQUAL( tostr(join_01(X,Y)), tostr(X_Y) )
93+
CU_ASSERT_EQUAL( tostr(join_02(X,Y)), tostr(X _Y) )
94+
CU_ASSERT_EQUAL( tostr(join_03(X,Y)), tostr(X _Y) )
95+
CU_ASSERT_EQUAL( tostr(join_04(X,Y)), tostr(X _Y) )
96+
CU_ASSERT_EQUAL( tostr(join_05(X,Y)), tostr(X_ Y) )
97+
CU_ASSERT_EQUAL( tostr(join_06(X,Y)), tostr(X_Y) )
98+
CU_ASSERT_EQUAL( tostr(join_07(X,Y)), tostr(X _Y) )
99+
CU_ASSERT_EQUAL( tostr(join_08(X,Y)), tostr(X _Y) )
100+
CU_ASSERT_EQUAL( tostr(join_09(X,Y)), tostr(X _Y) )
101+
CU_ASSERT_EQUAL( tostr(join_10(X,Y)), tostr(X_ Y) )
102+
103+
'' Parser behaviour:
104+
'' - everything after a line continuation token '_' is ignored
105+
'' - the following expressions are poorly formed because the line
106+
'' continuation either continues to the following define
107+
'' or the following statment
108+
109+
/'
110+
#define join_11(a,b) a ##_ ##b
111+
#define join_12(a,b) a## _ ##b
112+
#define join_13(a,b) a ## _ ##b
113+
#define join_14(a,b) a ##_ ##b
114+
#define join_15(a,b) a## _ ##b
115+
#define join_16(a,b) a ## _ ##b
116+
117+
CU_ASSERT_EQUAL( tostr(join_11(X,Y)), tostr(X _ Y) )
118+
CU_ASSERT_EQUAL( tostr(join_12(X,Y)), tostr(X _ Y) )
119+
CU_ASSERT_EQUAL( tostr(join_13(X,Y)), tostr(X _ Y) )
120+
CU_ASSERT_EQUAL( tostr(join_14(X,Y)), tostr(X _ Y) )
121+
CU_ASSERT_EQUAL( tostr(join_15(X,Y)), tostr(X _ Y) )
122+
CU_ASSERT_EQUAL( tostr(join_16(X,Y)), tostr(X _ Y) )
123+
'/
124+
125+
END_TEST
126+
127+
''
128+
'' test case from commit: 91e58fe7da978e464f976dc3f94378fbf3d1fbb1
129+
''
130+
131+
Type _MAP_ENTRY
132+
id As integer
133+
pA As integer
134+
End Type
135+
#macro BEGIN_ENTRIESMAP()
136+
Function _GetMapEntries() As _MAP_ENTRY Ptr
137+
Static As _MAP_ENTRY _entries(0 To ...) = { ##_
138+
#endmacro
139+
#macro END_ENTRIESMAP()
140+
(0, 0)}
141+
Return @_entries(0)
142+
End Function
143+
#endmacro
144+
#define _INTERFACE_ENTRY(x, y) (x, y), ##_
145+
146+
#macro GENMAP()
147+
148+
BEGIN_ENTRIESMAP()
149+
_INTERFACE_ENTRY(1, 2)
150+
_INTERFACE_ENTRY(3, 4)
151+
_INTERFACE_ENTRY(5, 6)
152+
END_ENTRIESMAP()
153+
154+
#endmacro
155+
156+
GENMAP()
157+
158+
TEST( line_pasting )
159+
160+
var p = _GetMapEntries()
161+
CU_ASSERT_EQUAL( p[0].id, 1 )
162+
CU_ASSERT_EQUAL( p[0].pA, 2 )
163+
CU_ASSERT_EQUAL( p[1].id, 3 )
164+
CU_ASSERT_EQUAL( p[1].pA, 4 )
165+
CU_ASSERT_EQUAL( p[2].id, 5 )
166+
CU_ASSERT_EQUAL( p[2].pA, 6 )
167+
168+
END_TEST
169+
170+
END_SUITE

0 commit comments

Comments
 (0)