forked from mathis-s/CoreDSL2LLVM
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTokenStream.cpp
More file actions
230 lines (202 loc) · 5.75 KB
/
TokenStream.cpp
File metadata and controls
230 lines (202 loc) · 5.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#include "TokenStream.hpp"
#include "Lexer.hpp"
#include <cassert>
#include <iostream>
#include <fstream>
#include <cerrno>
#include <iterator>
static int clog2 (uint64_t n)
{
if (n == 0) return 1;
return std::max(64 - __builtin_clzl(n), 1);
}
Token TokenStream::Pop()
{
if (peekToken.has_value())
{
Token t = peekToken.value();
peekToken.reset();
return t;
}
if (replayTokens.has_value())
{
if (replayIdx >= replayTokens->size())
return Token(None);
return (*replayTokens)[replayIdx++];
}
size_t len = src.length();
const char* srcC = src.c_str();
while (1)
{
// Skip whitespace
while (isspace(srcC[i]))
{
if (srcC[i] == '\n') lineNumber++;
i++;
}
// Single Line Comment
if (srcC[i] == '/' && srcC[i + 1] == '/')
{
while (i < len && srcC[i] != '\n')
i++;
continue;
}
/* Multi-line
Comment */
if (srcC[i] == '/' && srcC[i + 1] == '*')
{
i += 2;
while (i < len && !(srcC[i - 2] == '*' && src[i - 1] == '/'))
{
if (srcC[i-2] == '\n') lineNumber++;
i++;
}
continue;
}
break;
}
if (i == len)
return Token(None);
{ // Try lexing operator
TokenType t;
size_t nextI = LexOperator(srcC, i, &t);
if (nextI != 0)
{
i = nextI;
return Token(t);
}
}
// Try lexing keywords or tokens
if (isalpha(srcC[i]) || srcC[i] == '_')
{
size_t identLen = 1;
while (1)
{
char c = srcC[i + identLen];
if (!isdigit(c) && !isalpha(c) && c != '_')
break;
identLen++;
}
std::string_view substr(srcC + i, identLen);
i += identLen;
auto iter = strings.find(substr);
if (iter != strings.end())
{
if (iter->second < NUM_KEYWORDS)
return Token((TokenType)(TOK_KW_START + iter->second));
else
return Token(iter->second - NUM_KEYWORDS, std::move(substr));
}
else
{
uint32_t idx = (strings[substr] = strings.size());
return Token(idx - NUM_KEYWORDS, std::move(substr));
}
}
// Int Literal
if (isdigit(srcC[i]))
{
bool isBase10 = srcC[i] != 0 && srcC[i + 1] != 'b' && srcC[i + 1] != 'x';
uint64_t literal;
size_t iCopy = i;
{
const char* startPtr = src.c_str() + iCopy;
char* endPtr;
literal = strtoul(startPtr, &endPtr, 0);
if (startPtr == endPtr) return Token(None);
iCopy += endPtr - startPtr;
}
if (isBase10 && srcC[iCopy] == '\'')
{
// Do not support sized literals larger than 64 bit
if (literal > 64)
return Token(None);
iCopy++;
bool isSigned = (srcC[iCopy] == 's');
if (isSigned)
iCopy++;
int base;
switch (srcC[iCopy])
{
case 'h': base = 16; break;
case 'd': base = 10; break;
case 'o': base = 8; break;
case 'b': base = 2; break;
default: return Token(None);
}
iCopy++;
size_t literal2;
{
const char* startPtr = src.c_str() + iCopy;
char* endPtr;
literal2 = strtoul(startPtr, &endPtr, base);
if (startPtr == endPtr) return Token(None);
iCopy += endPtr - startPtr;
}
i = iCopy;
return Token(isSigned, literal, literal2);
}
i = iCopy;
return Token(false, clog2(literal), literal);
}
// String Literal
if (srcC[i] == '\"')
{
size_t litLen = 1;
if (i + litLen >= len)
return Token(None);
while (srcC[i + litLen] != '\"')
{
if (srcC[i + litLen] == '\n')
return Token(None);
litLen++;
if (i + litLen >= len)
return Token(None);
}
Token t = Token(std::string_view(srcC + i + 1, litLen - 1));
i += litLen + 1;
return t;
}
return Token(None);
}
Token TokenStream::Peek()
{
if (!peekToken.has_value())
peekToken = Pop();
return peekToken.value();
}
static std::string read_file_as_str (std::string path)
{
std::ifstream ifs(path);
if (!ifs) {
fprintf(stderr, "Aborting! File does not exist: %s\n", path.c_str());
exit(-1);
}
std::string content((std::istreambuf_iterator<char>(ifs)),
std::istreambuf_iterator<char>());
return content;
}
unsigned TokenStream::GetIdentIdx(std::string_view ident)
{
auto it = strings.find(ident);
if (it == strings.end())
return (strings[ident] = strings.size()) - NUM_KEYWORDS;
return it->second - NUM_KEYWORDS;
}
std::string_view TokenStream::GetIdent(unsigned identIdx)
{
identIdx += NUM_KEYWORDS;
auto it = strings.begin();
std::advance(it, identIdx);
assert(it->second == identIdx);
return it->first;
}
TokenStream::TokenStream(std::string&& srcPath) : path(srcPath), src(read_file_as_str(srcPath)) {}
TokenStream::TokenStream(std::string&& srcPath, std::vector<Token> &&tokens,
int startLine)
: path(srcPath), src(""), lineNumber(startLine),
replayTokens(std::move(tokens)) {
for (const auto &t : *replayTokens)
if (t.type == Identifier)
strings[t.ident.str] = t.ident.idx + NUM_KEYWORDS;
}