-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathpredictive_parser.cpp
More file actions
167 lines (154 loc) · 6.31 KB
/
predictive_parser.cpp
File metadata and controls
167 lines (154 loc) · 6.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
/**
Compiler Phase 2: PRSGEN
predictive_parser.cpp
Purpose: The actual implementation of the parser flowchart itself that uses STACK, INPUT BUFFER and PARSING TABLE to parse the input program.
@author(s) Mostafa Yousry
@version 1.0
@date 21/4/2020
*/
#include "predictive_parser.h"
#include <utility>
#include <numeric>
#include <algorithm>
#include <iterator>
#define END_MARKER "$"
#define EPSILON "\\L"
#define SYNCH_INDEX -1
#define EMPTY_CELL_INDEX -2
PredicativeParser::PredicativeParser(LexicalAnalyzerDriver& lexicalAnalyzerDriver,
std::vector<NonTerminal> non_terminals, // map of name and object non terminals
std::unordered_set<std::string>& terminals,
const std::string& outputFilePath)
:lexical_analyzer_{lexicalAnalyzerDriver},
terminals_{terminals}
{
starting_non_terminal_ = non_terminals.at(0).getName_();
for (auto&& non_terminal: non_terminals) {
non_terminals_.insert({non_terminal.getName_(), non_terminal});
}
output_file_.open(outputFilePath);
}
void PredicativeParser::Parse()
{
if (checkTerminalsAndRegularExpressions()) {
// Push the end marker "$" and start non terminal to the stack
stack_.push(END_MARKER);
stack_.push(starting_non_terminal_);
// Call first token from lexical analyzer
Token* current_token = lexical_analyzer_.GetNextToken();
std::string stack_top_entry = stack_.top();
while (stack_.top()!=END_MARKER) {
// If the top of stack is a terminal
if (terminals_.find(stack_top_entry)!=terminals_.end()) {
ProceedOnTerminal(stack_top_entry, ¤t_token);
}
// If the top of stack is a non-terminal
else {
ProceedOnNonTerminal(stack_top_entry, ¤t_token);
}
// Check if the lexical analyzer couldn't identify a token
if (current_token == nullptr) {
output_file_ << "Illegal token. Lexical analyzer couldn't identify a token" << endl;
break;
}
}
if (current_token != nullptr) {
// Check if there is still tokens in the input buffer (driver.getNextToken)
if (current_token->GetTokenName() != END_MARKER) {
output_file_ << "Parsing ended while there are still tokens" << endl;
}
}
delete current_token;
}
else {
output_file_ << "Terminals in grammar rules and regular expressions in lexical rules don't match" << endl;
}
output_file_.close();
}
void PredicativeParser::ProceedOnTerminal(string& stack_top_entry, Token** current_token)
{
// If the terminal matches the current token
if (stack_top_entry== (*current_token)->GetTokenName()) {
// Output matching message in output file
output_file_ << "Match " << stack_top_entry << endl;
// Pop the top entry of the stack
stack_.pop();
stack_top_entry = stack_.top();
// Call for the next token from lexical analyzer
if (lexical_analyzer_.IsInputOver()) {
*current_token = new Token("$", "$");
}
else {
Token *next_token = lexical_analyzer_.GetNextToken();
if (next_token != nullptr){
*current_token = next_token;
}else{
*current_token = nullptr;
}
}
}
// If the terminal doesn't match the current token
else {
// Output missing token error message in output file
output_file_ << "Error, missing " << stack_top_entry << endl;
// Pop the top entry of the stack
stack_.pop();
stack_top_entry = stack_.top();
}
}
void PredicativeParser::ProceedOnNonTerminal(string& stack_top_entry, Token** current_token)
{
NonTerminal non_terminal = non_terminals_.at(stack_top_entry);
int production_rule_index = non_terminal.GetProductionRuleIndex((*current_token)->GetTokenName());
switch (production_rule_index) {
case SYNCH_INDEX: // If there is a synchronizing token
// Output illegal non terminal error message in output file and call next token from lexical analyzer
output_file_ << "Illegal " << stack_top_entry << " Drop non terminal : " << stack_top_entry << endl;
stack_.pop();
stack_top_entry = stack_.top();
break;
case EMPTY_CELL_INDEX: // If there isn't a production rule under the token (empty cell)
// Output illegal non terminal error message in output file and call next token from lexical analyzer
output_file_ << "Illegal " << stack_top_entry << " Drop token : " << (*current_token)->GetTokenName() << endl;
if (lexical_analyzer_.IsInputOver()) {
*current_token = new Token("$", "$");
}
else {
Token *next_token = lexical_analyzer_.GetNextToken();
if (next_token != nullptr){
*current_token = next_token;
}else{
*current_token = nullptr;
}
}
break;
default: // If there is a production rule under the token
std::vector<std::string> production_rule = non_terminal.GetProductionRule(production_rule_index);
std::string production_rule_string = std::accumulate(production_rule.begin(), production_rule.end(),
std::string(""),
[](std::string& ss, std::string& s) {
return ss.empty() ? s : ss+" "+s;
});
output_file_ << stack_top_entry << " -> " << production_rule_string << endl;
stack_.pop();
if (production_rule_string!=EPSILON) {
for (auto iterator = production_rule.rbegin(); iterator!=production_rule.rend(); ++iterator) {
stack_.push(*iterator);
}
}
stack_top_entry = stack_.top();
}
}
bool PredicativeParser::checkTerminalsAndRegularExpressions()
{
std::set<std::string> difference;
std::set<std::string> regular_expressions_names = lexical_analyzer_.GetRegularExpressionsNames();
std::set_difference(terminals_.begin(), terminals_.end(), regular_expressions_names.begin(),
regular_expressions_names.end(),
std::inserter(difference, difference.end()));
return !difference.empty();
}
PredicativeParser::~PredicativeParser()
{
output_file_.close();
}