3939// C++ variants of C standard header files
4040#include < cstdio>
4141#include < cstdlib>
42+ #include < cstring>
4243
4344// C++ standard header files
4445#include < limits>
@@ -396,6 +397,16 @@ bool Scanner::GetNextLexeme(Lexeme& lexeme)
396397 if (mpSource == nullptr )
397398 return false ;
398399
400+ if ((mpNextChar == maBuffer) && (mBase == 0 ))
401+ {
402+ // At the very start of the stream.
403+ // Check for file format signatures.
404+
405+ // Currently, only UTF-8 is supported.
406+ if (GetNextSignatureLexeme (lexeme, Lexeme::kUTF8SignatureBOM , u8" \uFEFF " ))
407+ return true ;
408+ }
409+
399410 while (!mEndOfStream )
400411 {
401412 // Skip over any whitespace (including blank lines).
@@ -423,7 +434,7 @@ bool Scanner::GetNextLexeme(Lexeme& lexeme)
423434 else if (*mpNextChar == ' /' )
424435 {
425436 // Either division operator or start of comment.
426- lexeme.category = Lexeme::Category:: kOther ;
437+ lexeme.category = Lexeme::kOther ;
427438 if (!CopyAndAdvance (lexeme))
428439 return true ;
429440 if (*mpNextChar == ' /' )
@@ -442,7 +453,7 @@ bool Scanner::GetNextLexeme(Lexeme& lexeme)
442453 else if ((*mpNextChar == ' !' ) || (*mpNextChar == ' <' ) || (*mpNextChar == ' >' ))
443454 {
444455 // Either single-character operator or comparison.
445- lexeme.category = Lexeme::Category:: kOther ;
456+ lexeme.category = Lexeme::kOther ;
446457 if (!CopyAndAdvance (lexeme))
447458 return true ;
448459 if (*mpNextChar == ' =' )
@@ -457,7 +468,7 @@ bool Scanner::GetNextLexeme(Lexeme& lexeme)
457468 else
458469 {
459470 // Single-character operator (or not a valid lexeme at all)
460- lexeme.category = Lexeme::Category:: kOther ;
471+ lexeme.category = Lexeme::kOther ;
461472 (void )CopyAndAdvance (lexeme);
462473 return true ;
463474 }
@@ -513,7 +524,7 @@ bool Scanner::GetNextDirective(Lexeme& lexeme)
513524 {
514525 POV_PARSER_ASSERT (*mpNextChar == ' #' );
515526 // Found what we've been looking for.
516- lexeme.category = Lexeme::Category:: kOther ;
527+ lexeme.category = Lexeme::kOther ;
517528 (void )CopyAndAdvance (lexeme);
518529 return true ;
519530 }
@@ -529,7 +540,7 @@ bool Scanner::GetNextWordLexeme(Lexeme& lexeme)
529540 POV_PARSER_ASSERT (!mEndOfStream );
530541 POV_PARSER_ASSERT (IsIdentifierChar1 (*mpNextChar));
531542
532- lexeme.category = Lexeme::Category:: kWord ;
543+ lexeme.category = Lexeme::kWord ;
533544
534545 // Read identifier name.
535546 while (CopyAndAdvance (lexeme) && IsIdentifierChar2 (*mpNextChar))
@@ -544,7 +555,7 @@ bool Scanner::GetNextFloatLiteralLexeme(Lexeme& lexeme)
544555{
545556 POV_PARSER_ASSERT (!mEndOfStream );
546557
547- lexeme.category = Lexeme::Category:: kFloatLiteral ;
558+ lexeme.category = Lexeme::kFloatLiteral ;
548559
549560 if (!GetNextFloatLiteralDigits (lexeme))
550561 POV_PARSER_ASSERT (false );
@@ -577,7 +588,7 @@ bool Scanner::GetNextFloatLiteralOrDotLexeme(Lexeme& lexeme)
577588 if (CopyAndAdvance (lexeme) && IsDecimalDigit (*mpNextChar))
578589 {
579590 // Valid start of a numeric literal, starting with the decimal point.
580- lexeme.category = Lexeme::Category:: kFloatLiteral ;
591+ lexeme.category = Lexeme::kFloatLiteral ;
581592
582593 // Read fractional part.
583594 if (!GetNextFloatLiteralDigits (lexeme))
@@ -593,7 +604,7 @@ bool Scanner::GetNextFloatLiteralOrDotLexeme(Lexeme& lexeme)
593604 else
594605 {
595606 // Dot operator.
596- lexeme.category = Lexeme::Category:: kOther ;
607+ lexeme.category = Lexeme::kOther ;
597608
598609 // Dot has already been copied to lexeme.
599610
@@ -667,7 +678,7 @@ bool Scanner::GetNextStringLiteralLexeme(Lexeme& lexeme)
667678 POV_PARSER_ASSERT (!mEndOfStream );
668679 POV_PARSER_ASSERT (*mpNextChar == ' "' );
669680
670- lexeme.category = Lexeme::Category:: kStringLiteral ;
681+ lexeme.category = Lexeme::kStringLiteral ;
671682
672683 if (!CopyAndAdvance (lexeme))
673684 return false ;
@@ -758,6 +769,32 @@ bool Scanner::EatNextBlockComment()
758769
759770// ------------------------------------------------------------------------------
760771
772+ bool Scanner::GetNextSignatureLexeme (Lexeme& lexeme, Lexeme::Category sigId, const Octet* sigToTest, size_t sigLength)
773+ {
774+ POV_PARSER_ASSERT (!mEndOfStream );
775+ POV_PARSER_ASSERT (mBase == 0 );
776+ POV_PARSER_ASSERT (mpNextChar == maBuffer);
777+
778+ if ((mpBufferEnd - mpNextChar) < sigLength)
779+ return false ;
780+ if (std::memcmp (mpNextChar, sigToTest, sigLength) != 0 )
781+ return false ;
782+
783+ lexeme.text = UTF8String (reinterpret_cast <const char *>(sigToTest), sigLength);
784+ lexeme.position = mCurrentPosition ;
785+ lexeme.category = sigId;
786+ mpNextChar += sigLength;
787+ mCurrentPosition .offset += sigLength;
788+ return true ;
789+ }
790+
791+ bool Scanner::GetNextSignatureLexeme (Lexeme& lexeme, Lexeme::Category sigId, const char * sigToTest)
792+ {
793+ return GetNextSignatureLexeme (lexeme, sigId, reinterpret_cast <const Octet*>(sigToTest), std::strlen (sigToTest));
794+ }
795+
796+ // ------------------------------------------------------------------------------
797+
761798bool Scanner::GetRaw (unsigned char * buffer, size_t size)
762799{
763800 POV_PARSER_ASSERT (!mEndOfStream );
0 commit comments