@@ -46,6 +46,23 @@ Prescanner::Prescanner(const Prescanner &that, bool isNestedInIncludeDirective)
46
46
compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_ },
47
47
compilerDirectiveSentinels_{that.compilerDirectiveSentinels_ } {}
48
48
49
+ // Returns number of bytes to skip
50
+ static inline int IsSpace (const char *p) {
51
+ if (*p == ' ' ) {
52
+ return 1 ;
53
+ } else if (*p == ' \xa0 ' ) { // LATIN-1 NBSP non-breaking space
54
+ return 1 ;
55
+ } else if (p[0 ] == ' \xc2 ' && p[1 ] == ' \xa0 ' ) { // UTF-8 NBSP
56
+ return 2 ;
57
+ } else {
58
+ return 0 ;
59
+ }
60
+ }
61
+
62
+ static inline int IsSpaceOrTab (const char *p) {
63
+ return *p == ' \t ' ? 1 : IsSpace (p);
64
+ }
65
+
49
66
static inline constexpr bool IsFixedFormCommentChar (char ch) {
50
67
return ch == ' !' || ch == ' *' || ch == ' C' || ch == ' c' ;
51
68
}
@@ -126,8 +143,8 @@ void Prescanner::Statement() {
126
143
if (inFixedForm_) {
127
144
CHECK (IsFixedFormCommentChar (*at_));
128
145
} else {
129
- while (*at_ == ' ' || * at_ == ' \t ' ) {
130
- ++ at_, ++column_;
146
+ while (int n{ IsSpaceOrTab ( at_)} ) {
147
+ at_ += n , ++column_;
131
148
}
132
149
CHECK (*at_ == ' !' );
133
150
}
@@ -159,10 +176,10 @@ void Prescanner::Statement() {
159
176
++sp, ++at_, ++column_) {
160
177
EmitChar (tokens, *sp);
161
178
}
162
- if (* at_ == ' ' || *at_ == ' \t ' ) {
179
+ if (IsSpaceOrTab ( at_) ) {
163
180
EmitChar (tokens, ' ' );
164
- while (*at_ == ' ' || * at_ == ' \t ' ) {
165
- ++ at_, ++column_;
181
+ while (int n{ IsSpaceOrTab ( at_)} ) {
182
+ at_ += n , ++column_;
166
183
}
167
184
}
168
185
tokens.CloseToken ();
@@ -361,7 +378,7 @@ void Prescanner::LabelField(TokenSequence &token) {
361
378
column_ = 7 ;
362
379
break ;
363
380
}
364
- if (* at_ != ' ' &&
381
+ if (int n{ IsSpace ( at_)}; n == 0 &&
365
382
!(*at_ == ' 0' && column_ == 6 )) { // '0' in column 6 becomes space
366
383
EmitChar (token, *at_);
367
384
++outCol;
@@ -493,7 +510,9 @@ bool Prescanner::MustSkipToEndOfLine() const {
493
510
494
511
void Prescanner::NextChar () {
495
512
CHECK (*at_ != ' \n ' );
496
- ++at_, ++column_;
513
+ int n{IsSpace (at_)};
514
+ at_ += n ? n : 1 ;
515
+ ++column_;
497
516
while (at_[0 ] == ' \xef ' && at_[1 ] == ' \xbb ' && at_[2 ] == ' \xbf ' ) {
498
517
// UTF-8 byte order mark - treat this file as UTF-8
499
518
at_ += 3 ;
@@ -556,23 +575,23 @@ void Prescanner::SkipCComments() {
556
575
}
557
576
558
577
void Prescanner::SkipSpaces () {
559
- while (* at_ == ' ' || *at_ == ' \t ' ) {
578
+ while (IsSpaceOrTab ( at_) ) {
560
579
NextChar ();
561
580
}
562
581
insertASpace_ = false ;
563
582
}
564
583
565
584
const char *Prescanner::SkipWhiteSpace (const char *p) {
566
- while (*p == ' ' || *p == ' \t ' ) {
567
- ++p ;
585
+ while (int n{ IsSpaceOrTab (p)} ) {
586
+ p += n ;
568
587
}
569
588
return p;
570
589
}
571
590
572
591
const char *Prescanner::SkipWhiteSpaceAndCComments (const char *p) const {
573
592
while (true ) {
574
- if (*p == ' ' || *p == ' \t ' ) {
575
- ++p ;
593
+ if (int n{ IsSpaceOrTab (p)} ) {
594
+ p += n ;
576
595
} else if (IsCComment (p)) {
577
596
if (const char *after{SkipCComment (p)}) {
578
597
p = after;
@@ -613,7 +632,7 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
613
632
}
614
633
SkipCComments ();
615
634
}
616
- if (* at_ == ' ' || *at_ == ' \t ' ) {
635
+ if (IsSpaceOrTab ( at_) ) {
617
636
// Compress free-form white space into a single space character.
618
637
const auto theSpace{at_};
619
638
char previous{at_ <= start_ ? ' ' : at_[-1 ]};
@@ -976,8 +995,8 @@ bool Prescanner::IsFixedFormCommentLine(const char *start) const {
976
995
}
977
996
bool anyTabs{false };
978
997
while (true ) {
979
- if (*p == ' ' ) {
980
- ++p ;
998
+ if (int n{ IsSpace (p)} ) {
999
+ p += n ;
981
1000
} else if (*p == ' \t ' ) {
982
1001
anyTabs = true ;
983
1002
++p;
@@ -1089,7 +1108,8 @@ void Prescanner::FortranInclude(const char *firstQuote) {
1089
1108
1090
1109
const char *Prescanner::IsPreprocessorDirectiveLine (const char *start) const {
1091
1110
const char *p{start};
1092
- for (; *p == ' ' ; ++p) {
1111
+ while (int n{IsSpace (p)}) {
1112
+ p += n;
1093
1113
}
1094
1114
if (*p == ' #' ) {
1095
1115
if (inFixedForm_ && p == start + 5 ) {
@@ -1178,9 +1198,9 @@ const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
1178
1198
return nullptr ;
1179
1199
}
1180
1200
}
1181
- char col6{nextLine_[ 5 ] };
1182
- if (col6 != ' \n ' && col6 != ' \t ' && col6 != ' ' && col6 != ' 0 ' ) {
1183
- if (nextLine_[ 6 ] != ' ' && mightNeedSpace ) {
1201
+ const char * col6{nextLine_ + 5 };
1202
+ if (* col6 != ' \n ' && * col6 != ' 0 ' && ! IsSpaceOrTab ( col6) ) {
1203
+ if (mightNeedSpace && ! IsSpace (nextLine_ + 6 ) ) {
1184
1204
insertASpace_ = true ;
1185
1205
}
1186
1206
return nextLine_ + 6 ;
@@ -1207,9 +1227,9 @@ const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
1207
1227
features_.IsEnabled (LanguageFeature::OldDebugLines))) &&
1208
1228
nextLine_[1 ] == ' ' && nextLine_[2 ] == ' ' && nextLine_[3 ] == ' ' &&
1209
1229
nextLine_[4 ] == ' ' ) {
1210
- char col6{nextLine_[ 5 ] };
1211
- if (col6 != ' \n ' && col6 != ' \t ' && col6 != ' ' && col6 != ' 0 ' ) {
1212
- if ((col6 == ' i' || col6 == ' I' ) && IsIncludeLine (nextLine_)) {
1230
+ const char * col6{nextLine_ + 5 };
1231
+ if (* col6 != ' \n ' && * col6 != ' 0 ' && ! IsSpaceOrTab ( col6) ) {
1232
+ if ((* col6 == ' i' || * col6 == ' I' ) && IsIncludeLine (nextLine_)) {
1213
1233
// It's An INCLUDE line, not a continuation
1214
1234
} else {
1215
1235
return nextLine_ + 6 ;
@@ -1356,7 +1376,7 @@ Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const {
1356
1376
char sentinel[5 ], *sp{sentinel};
1357
1377
int column{2 };
1358
1378
for (; column < 6 ; ++column, ++p) {
1359
- if (*p == ' ' || *p == ' \n ' || *p == ' \t ' ) {
1379
+ if (*p == ' \n ' || IsSpaceOrTab (p) ) {
1360
1380
break ;
1361
1381
}
1362
1382
if (sp == sentinel + 1 && sentinel[0 ] == ' $' && IsDecimalDigit (*p)) {
@@ -1366,8 +1386,10 @@ Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const {
1366
1386
*sp++ = ToLowerCaseLetter (*p);
1367
1387
}
1368
1388
if (column == 6 ) {
1369
- if (*p == ' ' || *p == ' \t ' || *p == ' 0' ) {
1389
+ if (*p == ' 0' ) {
1370
1390
++p;
1391
+ } else if (int n{IsSpaceOrTab (p)}) {
1392
+ p += n;
1371
1393
} else {
1372
1394
// This is a Continuation line, not an initial directive line.
1373
1395
return std::nullopt;
@@ -1442,10 +1464,10 @@ std::optional<std::pair<const char *, const char *>>
1442
1464
Prescanner::IsCompilerDirectiveSentinel (const char *p) const {
1443
1465
char sentinel[8 ];
1444
1466
for (std::size_t j{0 }; j + 1 < sizeof sentinel && *p != ' \n ' ; ++p, ++j) {
1445
- if (*p == ' ' || *p == ' \t ' || *p == ' & ' ) {
1467
+ if (int n{ *p == ' & ' ? 1 : IsSpaceOrTab (p)} ) {
1446
1468
if (j > 0 ) {
1447
1469
sentinel[j] = ' \0 ' ;
1448
- p = SkipWhiteSpace (p + 1 );
1470
+ p = SkipWhiteSpace (p + n );
1449
1471
if (*p != ' !' ) {
1450
1472
if (const char *sp{IsCompilerDirectiveSentinel (sentinel, j)}) {
1451
1473
return std::make_pair (sp, p);
0 commit comments