1010#include < yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h>
1111#include < yql/essentials/sql/v1/lexer/antlr4_pure/lexer.h>
1212#include < yql/essentials/sql/v1/lexer/antlr4_pure_ansi/lexer.h>
13+ #include < yql/essentials/sql/v1/lexer/regex/lexer.h>
1314#include < yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h>
1415#include < yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h>
1516#include < yql/essentials/providers/common/provider/yql_provider_names.h>
@@ -171,6 +172,7 @@ bool TestLexers(
171172 lexers.Antlr4PureAnsi = NSQLTranslationV1::MakeAntlr4PureAnsiLexerFactory ();
172173 auto lexerMain = NSQLTranslationV1::MakeLexer (lexers, settings.AnsiLexer , true , NSQLTranslationV1::ELexerFlavor::Default);
173174 auto lexerPure = NSQLTranslationV1::MakeLexer (lexers, settings.AnsiLexer , true , NSQLTranslationV1::ELexerFlavor::Pure);
175+ auto lexerRegex = NSQLTranslationV1::MakeRegexLexerFactory (settings.AnsiLexer )->MakeLexer ();
174176 TVector<NSQLTranslation::TParsedToken> mainTokens;
175177 if (!lexerMain->Tokenize (query, " " , [&](auto token) { mainTokens.push_back (token);}, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
176178 Cerr << issues.ToString ();
@@ -183,21 +185,40 @@ bool TestLexers(
183185 return false ;
184186 }
185187
186- bool hasErrors = false ;
187- if (mainTokens. size () != pureTokens. size ( )) {
188- hasErrors = true ;
189- Cerr << " Mismatch token count, main: " << mainTokens. size () << " , pure: " << pureTokens. size () << " \n " ;
188+ TVector<NSQLTranslation::TParsedToken> regexTokens ;
189+ if (!lexerRegex-> Tokenize (query, " " , [&]( auto token) { regexTokens. push_back (token);}, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS )) {
190+ Cerr << issues. ToString () ;
191+ return false ;
190192 }
191193
192- for (size_t i = 0 ; i < Min (mainTokens.size (), pureTokens.size ()); ++i) {
193- if (mainTokens[i].Name != pureTokens[i].Name || mainTokens[i].Content != pureTokens[i].Content ) {
194+ bool hasErrors = false ;
195+ auto check = [&](const char * name, const TVector<NSQLTranslation::TParsedToken>& otherTokens) {
196+ if (mainTokens.size () != otherTokens.size ()) {
194197 hasErrors = true ;
195- Cerr << " Mismatch token #" << i << " , main: " << mainTokens[i].Name << " :" << mainTokens[i].Content
196- << " , pure: " << pureTokens[i].Name << " :" << pureTokens[i].Content << " \n " ;
197- break ;
198+ Cerr << " Mismatch token count, main: " << mainTokens.size () << " , " << name << " : " << otherTokens.size () << " \n " ;
198199 }
199- }
200200
201+ TStringBuilder textBuilder;
202+
203+ for (size_t i = 0 ; i < Min (mainTokens.size (), otherTokens.size ()); ++i) {
204+ if (mainTokens[i].Name != otherTokens[i].Name || mainTokens[i].Content != otherTokens[i].Content ) {
205+ hasErrors = true ;
206+ Cerr << " Mismatch token #" << i << " , main: " << mainTokens[i].Name << " :" << mainTokens[i].Content
207+ << " , " << name << " : " << otherTokens[i].Name << " :" << otherTokens[i].Content << " \n " ;
208+ Cerr << " Text sample: [" ;
209+ TString text = textBuilder;
210+ constexpr size_t LexerContextSample = 50 ;
211+ Cerr << text.substr (text.size () >= LexerContextSample ? text.size () - LexerContextSample : 0u , LexerContextSample);
212+ Cerr << " ]\n " ;
213+ break ;
214+ }
215+
216+ textBuilder << mainTokens[i].Content ;
217+ }
218+ };
219+
220+ check (" pure" , pureTokens);
221+ check (" regex" , regexTokens);
201222 return !hasErrors;
202223}
203224
0 commit comments