@@ -46,6 +46,23 @@ Prescanner::Prescanner(const Prescanner &that, bool isNestedInIncludeDirective)
4646 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_ },
4747 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_ } {}
4848
49+ // Returns number of bytes to skip
50+ static inline int IsSpace (const char *p) {
51+ if (*p == ' ' ) {
52+ return 1 ;
53+ } else if (*p == ' \xa0 ' ) { // LATIN-1 NBSP non-breaking space
54+ return 1 ;
55+ } else if (p[0 ] == ' \xc2 ' && p[1 ] == ' \xa0 ' ) { // UTF-8 NBSP
56+ return 2 ;
57+ } else {
58+ return 0 ;
59+ }
60+ }
61+
62+ static inline int IsSpaceOrTab (const char *p) {
63+ return *p == ' \t ' ? 1 : IsSpace (p);
64+ }
65+
4966static inline constexpr bool IsFixedFormCommentChar (char ch) {
5067 return ch == ' !' || ch == ' *' || ch == ' C' || ch == ' c' ;
5168}
@@ -126,8 +143,8 @@ void Prescanner::Statement() {
126143 if (inFixedForm_) {
127144 CHECK (IsFixedFormCommentChar (*at_));
128145 } else {
129- while (*at_ == ' ' || * at_ == ' \t ' ) {
130- ++ at_, ++column_;
146+ while (int n{ IsSpaceOrTab ( at_)} ) {
147+ at_ += n , ++column_;
131148 }
132149 CHECK (*at_ == ' !' );
133150 }
@@ -159,10 +176,10 @@ void Prescanner::Statement() {
159176 ++sp, ++at_, ++column_) {
160177 EmitChar (tokens, *sp);
161178 }
162- if (* at_ == ' ' || *at_ == ' \t ' ) {
179+ if (IsSpaceOrTab ( at_) ) {
163180 EmitChar (tokens, ' ' );
164- while (*at_ == ' ' || * at_ == ' \t ' ) {
165- ++ at_, ++column_;
181+ while (int n{ IsSpaceOrTab ( at_)} ) {
182+ at_ += n , ++column_;
166183 }
167184 }
168185 tokens.CloseToken ();
@@ -361,7 +378,7 @@ void Prescanner::LabelField(TokenSequence &token) {
361378 column_ = 7 ;
362379 break ;
363380 }
364- if (* at_ != ' ' &&
381+ if (int n{ IsSpace ( at_)}; n == 0 &&
365382 !(*at_ == ' 0' && column_ == 6 )) { // '0' in column 6 becomes space
366383 EmitChar (token, *at_);
367384 ++outCol;
@@ -493,7 +510,9 @@ bool Prescanner::MustSkipToEndOfLine() const {
493510
494511void Prescanner::NextChar () {
495512 CHECK (*at_ != ' \n ' );
496- ++at_, ++column_;
513+ int n{IsSpace (at_)};
514+ at_ += n ? n : 1 ;
515+ ++column_;
497516 while (at_[0 ] == ' \xef ' && at_[1 ] == ' \xbb ' && at_[2 ] == ' \xbf ' ) {
498517 // UTF-8 byte order mark - treat this file as UTF-8
499518 at_ += 3 ;
@@ -556,23 +575,23 @@ void Prescanner::SkipCComments() {
556575}
557576
558577void Prescanner::SkipSpaces () {
559- while (* at_ == ' ' || *at_ == ' \t ' ) {
578+ while (IsSpaceOrTab ( at_) ) {
560579 NextChar ();
561580 }
562581 insertASpace_ = false ;
563582}
564583
565584const char *Prescanner::SkipWhiteSpace (const char *p) {
566- while (*p == ' ' || *p == ' \t ' ) {
567- ++p ;
585+ while (int n{ IsSpaceOrTab (p)} ) {
586+ p += n ;
568587 }
569588 return p;
570589}
571590
572591const char *Prescanner::SkipWhiteSpaceAndCComments (const char *p) const {
573592 while (true ) {
574- if (*p == ' ' || *p == ' \t ' ) {
575- ++p ;
593+ if (int n{ IsSpaceOrTab (p)} ) {
594+ p += n ;
576595 } else if (IsCComment (p)) {
577596 if (const char *after{SkipCComment (p)}) {
578597 p = after;
@@ -613,7 +632,7 @@ bool Prescanner::NextToken(TokenSequence &tokens) {
613632 }
614633 SkipCComments ();
615634 }
616- if (* at_ == ' ' || *at_ == ' \t ' ) {
635+ if (IsSpaceOrTab ( at_) ) {
617636 // Compress free-form white space into a single space character.
618637 const auto theSpace{at_};
619638 char previous{at_ <= start_ ? ' ' : at_[-1 ]};
@@ -976,8 +995,8 @@ bool Prescanner::IsFixedFormCommentLine(const char *start) const {
976995 }
977996 bool anyTabs{false };
978997 while (true ) {
979- if (*p == ' ' ) {
980- ++p ;
998+ if (int n{ IsSpace (p)} ) {
999+ p += n ;
9811000 } else if (*p == ' \t ' ) {
9821001 anyTabs = true ;
9831002 ++p;
@@ -1089,7 +1108,8 @@ void Prescanner::FortranInclude(const char *firstQuote) {
10891108
10901109const char *Prescanner::IsPreprocessorDirectiveLine (const char *start) const {
10911110 const char *p{start};
1092- for (; *p == ' ' ; ++p) {
1111+ while (int n{IsSpace (p)}) {
1112+ p += n;
10931113 }
10941114 if (*p == ' #' ) {
10951115 if (inFixedForm_ && p == start + 5 ) {
@@ -1178,9 +1198,9 @@ const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
11781198 return nullptr ;
11791199 }
11801200 }
1181- char col6{nextLine_[ 5 ] };
1182- if (col6 != ' \n ' && col6 != ' \t ' && col6 != ' ' && col6 != ' 0 ' ) {
1183- if (nextLine_[ 6 ] != ' ' && mightNeedSpace ) {
1201+ const char * col6{nextLine_ + 5 };
1202+ if (* col6 != ' \n ' && * col6 != ' 0 ' && ! IsSpaceOrTab ( col6) ) {
1203+ if (mightNeedSpace && ! IsSpace (nextLine_ + 6 ) ) {
11841204 insertASpace_ = true ;
11851205 }
11861206 return nextLine_ + 6 ;
@@ -1207,9 +1227,9 @@ const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
12071227 features_.IsEnabled (LanguageFeature::OldDebugLines))) &&
12081228 nextLine_[1 ] == ' ' && nextLine_[2 ] == ' ' && nextLine_[3 ] == ' ' &&
12091229 nextLine_[4 ] == ' ' ) {
1210- char col6{nextLine_[ 5 ] };
1211- if (col6 != ' \n ' && col6 != ' \t ' && col6 != ' ' && col6 != ' 0 ' ) {
1212- if ((col6 == ' i' || col6 == ' I' ) && IsIncludeLine (nextLine_)) {
1230+ const char * col6{nextLine_ + 5 };
1231+ if (* col6 != ' \n ' && * col6 != ' 0 ' && ! IsSpaceOrTab ( col6) ) {
1232+ if ((* col6 == ' i' || * col6 == ' I' ) && IsIncludeLine (nextLine_)) {
12131233 // It's An INCLUDE line, not a continuation
12141234 } else {
12151235 return nextLine_ + 6 ;
@@ -1356,7 +1376,7 @@ Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const {
13561376 char sentinel[5 ], *sp{sentinel};
13571377 int column{2 };
13581378 for (; column < 6 ; ++column, ++p) {
1359- if (*p == ' ' || *p == ' \n ' || *p == ' \t ' ) {
1379+ if (*p == ' \n ' || IsSpaceOrTab (p) ) {
13601380 break ;
13611381 }
13621382 if (sp == sentinel + 1 && sentinel[0 ] == ' $' && IsDecimalDigit (*p)) {
@@ -1366,8 +1386,10 @@ Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const {
13661386 *sp++ = ToLowerCaseLetter (*p);
13671387 }
13681388 if (column == 6 ) {
1369- if (*p == ' ' || *p == ' \t ' || *p == ' 0' ) {
1389+ if (*p == ' 0' ) {
13701390 ++p;
1391+ } else if (int n{IsSpaceOrTab (p)}) {
1392+ p += n;
13711393 } else {
13721394 // This is a Continuation line, not an initial directive line.
13731395 return std::nullopt ;
@@ -1442,10 +1464,10 @@ std::optional<std::pair<const char *, const char *>>
14421464Prescanner::IsCompilerDirectiveSentinel (const char *p) const {
14431465 char sentinel[8 ];
14441466 for (std::size_t j{0 }; j + 1 < sizeof sentinel && *p != ' \n ' ; ++p, ++j) {
1445- if (*p == ' ' || *p == ' \t ' || *p == ' & ' ) {
1467+ if (int n{ *p == ' & ' ? 1 : IsSpaceOrTab (p)} ) {
14461468 if (j > 0 ) {
14471469 sentinel[j] = ' \0 ' ;
1448- p = SkipWhiteSpace (p + 1 );
1470+ p = SkipWhiteSpace (p + n );
14491471 if (*p != ' !' ) {
14501472 if (const char *sp{IsCompilerDirectiveSentinel (sentinel, j)}) {
14511473 return std::make_pair (sp, p);
0 commit comments