1111
1212#include < boost/algorithm/string.hpp>
1313#include < boost/lexical_cast.hpp>
14+ #include < log_surgeon/Constants.hpp>
1415#include < log_surgeon/SchemaParser.hpp>
1516#include < spdlog/spdlog.h>
1617#include < string_utils/string_utils.hpp>
@@ -120,12 +121,8 @@ ErrorCode read_list_of_paths(string const& list_path, vector<string>& paths) {
120121// TODO: duplicates code in log_surgeon/parser.tpp, should implement a
121122// SearchParser in log_surgeon instead and use it here. Specifically, initialization of
122123// lexer.m_symbol_id, contains_delimiter error, and add_rule logic.
123- void load_lexer_from_file (
124- std::string const & schema_file_path,
125- bool reverse,
126- log_surgeon::lexers::ByteLexer& lexer
127- ) {
128- log_surgeon::SchemaParser sp;
124+ void
125+ load_lexer_from_file (std::string const & schema_file_path, log_surgeon::lexers::ByteLexer& lexer) {
129126 std::unique_ptr<log_surgeon::SchemaAST> schema_ast
130127 = log_surgeon::SchemaParser::try_schema_file (schema_file_path);
131128 if (!lexer.m_symbol_id .empty ()) {
@@ -134,52 +131,52 @@ void load_lexer_from_file(
134131
135132 // cTokenEnd and cTokenUncaughtString never need to be added as a rule to the lexer as they are
136133 // not parsed
137- lexer.m_symbol_id [log_surgeon::cTokenEnd] = static_cast <int >(log_surgeon::SymbolID::TokenEndID );
134+ lexer.m_symbol_id [log_surgeon::cTokenEnd] = static_cast <int >(log_surgeon::SymbolId::TokenEnd );
138135 lexer.m_symbol_id [log_surgeon::cTokenUncaughtString]
139- = static_cast <int >(log_surgeon::SymbolID::TokenUncaughtStringID );
136+ = static_cast <int >(log_surgeon::SymbolId::TokenUncaughtString );
140137 // cTokenInt, cTokenFloat, cTokenFirstTimestamp, and cTokenNewlineTimestamp each have unknown
141138 // rule(s) until specified by the user so can't be explicitly added and are done by looping over
142139 // schema_vars (user schema)
143- lexer.m_symbol_id [log_surgeon::cTokenInt] = static_cast <int >(log_surgeon::SymbolID::TokenIntId );
140+ lexer.m_symbol_id [log_surgeon::cTokenInt] = static_cast <int >(log_surgeon::SymbolId::TokenInt );
144141 lexer.m_symbol_id [log_surgeon::cTokenFloat]
145- = static_cast <int >(log_surgeon::SymbolID::TokenFloatId );
142+ = static_cast <int >(log_surgeon::SymbolId::TokenFloat );
146143 lexer.m_symbol_id [log_surgeon::cTokenFirstTimestamp]
147- = static_cast <int >(log_surgeon::SymbolID::TokenFirstTimestampId );
144+ = static_cast <int >(log_surgeon::SymbolId::TokenFirstTimestamp );
148145 lexer.m_symbol_id [log_surgeon::cTokenNewlineTimestamp]
149- = static_cast <int >(log_surgeon::SymbolID::TokenNewlineTimestampId );
146+ = static_cast <int >(log_surgeon::SymbolId::TokenNewlineTimestamp );
150147 // cTokenNewline is not added in schema_vars and can be explicitly added as '\n' to catch the
151148 // end of non-timestamped log messages
152149 lexer.m_symbol_id [log_surgeon::cTokenNewline]
153- = static_cast <int >(log_surgeon::SymbolID::TokenNewlineId );
150+ = static_cast <int >(log_surgeon::SymbolId::TokenNewline );
154151
155- lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolID::TokenEndID )] = log_surgeon::cTokenEnd;
156- lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolID::TokenUncaughtStringID )]
152+ lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolId::TokenEnd )] = log_surgeon::cTokenEnd;
153+ lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolId::TokenUncaughtString )]
157154 = log_surgeon::cTokenUncaughtString;
158- lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolID::TokenIntId )] = log_surgeon::cTokenInt;
159- lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolID::TokenFloatId )]
155+ lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolId::TokenInt )] = log_surgeon::cTokenInt;
156+ lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolId::TokenFloat )]
160157 = log_surgeon::cTokenFloat;
161- lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolID::TokenFirstTimestampId )]
158+ lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolId::TokenFirstTimestamp )]
162159 = log_surgeon::cTokenFirstTimestamp;
163- lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolID::TokenNewlineTimestampId )]
160+ lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolId::TokenNewlineTimestamp )]
164161 = log_surgeon::cTokenNewlineTimestamp;
165- lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolID::TokenNewlineId )]
162+ lexer.m_id_symbol [static_cast <int >(log_surgeon::SymbolId::TokenNewline )]
166163 = log_surgeon::cTokenNewline;
167164
168165 lexer.add_rule (
169166 lexer.m_symbol_id [" newLine" ],
170167 std::move (
171168 std::make_unique<log_surgeon::finite_automata::RegexASTLiteral<
172- log_surgeon::finite_automata::RegexNFAByteState >>(
169+ log_surgeon::finite_automata::ByteNfaState >>(
173170 log_surgeon::finite_automata::RegexASTLiteral<
174- log_surgeon::finite_automata::RegexNFAByteState >(' \n ' )
171+ log_surgeon::finite_automata::ByteNfaState >(' \n ' )
175172 )
176173 )
177174 );
178175
179176 for (auto const & delimiters_ast : schema_ast->m_delimiters ) {
180177 auto * delimiters_ptr = dynamic_cast <log_surgeon::DelimiterStringAST*>(delimiters_ast.get ());
181178 if (delimiters_ptr != nullptr ) {
182- lexer.add_delimiters (delimiters_ptr->m_delimiters );
179+ lexer.set_delimiters (delimiters_ptr->m_delimiters );
183180 }
184181 }
185182 vector<uint32_t > delimiters;
@@ -203,7 +200,7 @@ void load_lexer_from_file(
203200 // transform '.' from any-character into any non-delimiter character
204201 rule->m_regex_ptr ->remove_delimiters_from_wildcard (delimiters);
205202
206- bool is_possible_input[ log_surgeon::cUnicodeMax] = { false };
203+ std::array< bool , log_surgeon::cSizeOfUnicode> is_possible_input{ };
207204 rule->m_regex_ptr ->set_possible_inputs_to_true (is_possible_input);
208205 bool contains_delimiter = false ;
209206 uint32_t delimiter_name;
@@ -242,10 +239,6 @@ void load_lexer_from_file(
242239 }
243240 lexer.add_rule (lexer.m_symbol_id [rule->m_name ], std::move (rule->m_regex_ptr ));
244241 }
245- if (reverse) {
246- lexer.generate_reverse ();
247- } else {
248- lexer.generate ();
249- }
242+ lexer.generate ();
250243}
251244} // namespace clp
0 commit comments