77#ifndef XGRAMMAR_EARLEY_PARSER_H_
88#define XGRAMMAR_EARLEY_PARSER_H_
99#include < cstdint>
10- #include < map>
1110#include < ostream>
1211#include < queue>
1312#include < unordered_set>
@@ -219,9 +218,9 @@ class EarleyParser {
219218 * We divide the parser states into three categories:
220219 * - Scanable (which will be stored in scanable_state_history_).
221220 * - Predictable(If it predict a new rule successfully, then it will be stored in
222- * rule_id_to_completeable_states ).
223- * - Completeable (which can perform a completion operation).
224- * A state will be stored in rule_id_to_completeable_states_ if it can be completed,
221+ * rule_id_to_completable_states ).
222+ * - completable (which can perform a completion operation).
223+ * A state will be stored in rule_id_to_completable_states_ if it can be completed,
225224 * and it will be stored in scanable_state_history_ if it can be scanned. Otherwise,
226225 * it will be discarded.
227226 */
@@ -238,10 +237,10 @@ class EarleyParser {
238237 std::vector<bool > is_completed_;
239238
240239 /* !
241- * \brief rule_id_to_completeable_states [i][j] is the i pos j rule_id states. Earley
240+ * \brief rule_id_to_completable_states [i][j] is the i pos j rule_id states. Earley
242241 * parser needs it to complete.
243242 */
244- std::vector <std::multimap <int32_t , ParserState>> rule_id_to_completeable_states_ ;
243+ Compact2DArray <std::pair <int32_t , ParserState>> rule_id_to_completable_states_ ;
245244
246245 /* !
247246 * \brief The states history. state_stack[i] is a vector storing the states after accepting the
@@ -261,9 +260,6 @@ class EarleyParser {
261260 /* ! \brief The class is used to check if a state has been added into the queue. */
262261 RepeatDetector tmp_states_visited_in_queue_;
263262
264- /* ! \brief The targets of the fsm edges, used in AdvanceFsm. */
265- std::vector<int > tmp_fsm_targets_;
266-
267263 /* ! \brief Check if the stop token is accepted. */
268264 bool stop_token_is_accepted_ = false ;
269265
@@ -288,15 +284,15 @@ class EarleyParser {
288284 * of the grammar is used to check if the grammar is completed,
289285 * so it should be added into the next states.
290286 */
291- void Complete (const ParserState& state, const GrammarExpr& grammar_expr );
287+ void Complete (const ParserState& state);
292288
293289 /* !
294290 * \brief The prediction operation of the Earley parser.
295291 * \return First: If the state scanable, or the state is the end of the grammar,
296292 * then return true, otherwise return false.
297293 * \return Second: If the state is completable, then return true, otherwise return false.
298294 */
299- std::pair<bool , bool > Predict (const ParserState& state, const GrammarExpr& grammar_expr );
295+ std::pair<bool , bool > Predict (const ParserState& state);
300296
301297 /* !
302298 * \brief Handle the unexpanded rule, used for pushing initial state.
@@ -319,6 +315,12 @@ class EarleyParser {
319315 const ParserState& state, const GrammarExpr& grammar_expr, const GrammarExpr* sub_grammar_expr
320316 );
321317
318+ /* !
319+ * \brief Expand the rule, used for RuleRef and kTagDispatch.
320+ * \param state The state to be expanded, and it's should be on the FSM.
321+ */
322+ void ExpandNextRuleRefElementOnFSM (const ParserState& state);
323+
322324 /* !
323325 * \brief Advance the parser to the next state, with the sub sequence is kCharacterClass.
324326 * \param state The state to be advanced.
@@ -359,7 +361,7 @@ class EarleyParser {
359361 * \param cur_sequence The sequence of the current state.
360362 * \return The next state, Invalid state if the character is not accepted.
361363 */
362- void AdvanceFsm (const ParserState& state, const uint8_t ch, const GrammarExpr& cur_sequence );
364+ void AdvanceFsm (const ParserState& state, const uint8_t ch);
363365
364366 /* !
365367 * \brief Enqueue the state into the queue.
@@ -373,6 +375,17 @@ class EarleyParser {
373375 }
374376 }
375377
378+ /* !
379+ * \brief Enqueue the state into the queue, without prediction and completion.
380+ * \param state The state to be enqueued.
381+ */
382+ void EnqueueWithoutProcessing (const ParserState& state) {
383+ if (!IsStateVisitedInQueue (state)) {
384+ tmp_states_visited_in_queue_.Insert (state);
385+ tmp_states_to_be_added_.push_back (state);
386+ }
387+ }
388+
376389 public:
377390 /* !
378391 * \brief Constructor of the Earley parser.
@@ -435,7 +448,7 @@ class EarleyParser {
435448 * \param state The state to be pushed.
436449 */
437450 void PushOneStateToCheck (const ParserState& state) {
438- rule_id_to_completeable_states_. emplace_back ( );
451+ rule_id_to_completable_states_. PushBack (std::vector<std::pair< int32_t , ParserState>>() );
439452 is_completed_.push_back (is_completed_.back ());
440453 scanable_state_history_.PushBack (&state, 1 );
441454 return ;
0 commit comments