sammine-lang
Loading...
Searching...
No Matches
Token.h
Go to the documentation of this file.
1#pragma once
2#include "util/Utilities.h"
3#include <cassert>
4#include <functional>
5#include <list>
6#include <map>
7#include <memory>
8#include <string>
9
12
13namespace sammine_lang {
14enum TokenType {
15 TokADD, // +
16 TokSUB, // -
17 TokMUL, // *
18 TokDIV, // /
19 TokMOD, // %
20
21 TokAddAssign, // +=
22 TokAddIncr, // ++
23 TokSubAssign, // -=
24 TokSubDecr, // --
25 TokMulAssign, // *=
26 TokDivAssign, // /=
27
28 TokAND, // &&
29 TokAndLogical, // &
30 TokOR, // ||
31 TokORLogical, // |
32 TokPipe, // |>
33 TokXOR, // ^
34 TokSHL, // <<
35 TokSHR, // >>
36
37 TokEQUAL, // ==
38 TokLESS, // <
39 TokLessEqual, // <=
40
41 TokGREATER, // >
42 TokGreaterEqual, // >=
43
44 TokASSIGN, // =
45 TokNOT, // !
46 TokNOTEqual, // !=
47
48 // TokEXP AND FloorDiv
49 TokEXP, // **
50 TokFloorDiv, // /_
51 TokCeilDiv, // /^
52
53 // TokPAREN
54 TokLeftParen, // (
55 TokRightParen, // )
56 TokLeftCurly, // {
57 TokRightCurly, // }
58 TokLeftBracket, // [
59 TokRightBracket, // ]
60
61 // Comma and colons and all that
62 TokComma, // ,
63 TokDot, // .
64 TokSemiColon, // ;
65 TokColon, // :
66 TokDoubleColon, // ::
67
68 // TokFunction
69 TokReturn,
70 TokFunc, // fn
71 TokStruct, // struct
72 TokPtr, // ptr
73 TokAlloc, // alloc
74 TokFree, // free
75 TokLen, // len
76 TokArrow, // ->
77 TokLet, // let
78 TokMUT, // mut
79 TokReuse, // reuse
80 TokExport, // export
81 TokImport, // import
82 TokAs, // as
83 TokEllipsis, // ...
84 TokTypeclass, // typeclass
85 TokInstance, // instance
86 TokCase, // case
87 TokFatArrow, // =>
88 // TokID
89 TokID, // Representing an identifier
90 TokStr, // Representing a string
91 // TokNum
92 TokNum, // Representing a number
93 TokTrue, // Representing a boolean true
94 TokFalse, // Representing a boolean false
95 TokChar, // Representing a char literal
96 TokTick, // ' (linear pointer prefix)
97 // TokIf
98 TokIf, // if
99 TokElse, // else
100 TokWhile, // while
101
102 TokType, // type
103
104 // TokCOMMENTS
105 TokSingleComment, //
106 TokEOF,
107 TokINVALID,
108};
109
110static const std::map<TokenType, std::string> TokenMap = {
111 {TokADD, "+"},
112 {TokSUB, "-"},
113 {TokMUL, "*"},
114 {TokDIV, "/"},
115 {TokMOD, "%"},
116
117 {TokAddAssign, "+="},
118 {TokAddIncr, "++"},
119 {TokSubAssign, "-="},
120 {TokSubDecr, "--"},
121 {TokMulAssign, "*="},
122 {TokDivAssign, "/="},
123
124 {TokAND, "&&"},
125 {TokAndLogical, "&"},
126 {TokOR, "||"},
127 {TokORLogical, "|"},
128 {TokPipe, "|>"},
129 {TokXOR, "^"},
130 {TokSHL, "<<"},
131 {TokSHR, ">>"},
132 {TokEQUAL, "=="},
133 {TokLESS, "<"},
134 {TokLessEqual, "<="},
135
136 {TokGREATER, ">"},
137 {TokGreaterEqual, ">="},
138
139 {TokASSIGN, "="},
140 {TokNOT, "!"},
141 {TokNOTEqual, "!="},
142 {TokEXP, "**"},
143 {TokFloorDiv, "/_"},
144 {TokCeilDiv, "/^"},
145
146 {TokLeftParen, "("},
147 {TokRightParen, ")"},
148 {TokLeftCurly, "{"},
149 {TokRightCurly, "}"},
150 {TokLeftBracket, "["},
151 {TokRightBracket, "]"},
152
153 {TokComma, ","},
154 {TokDot, "."},
155 {TokSemiColon, ";"},
156 {TokColon, ":"},
157 {TokDoubleColon, "::"},
158
159 // TokFunction
160 {TokReturn, "return"},
161 {TokFunc, "fn"},
162 {TokArrow, "->"},
163 {TokLet, "let"},
164 {TokMUT, "mut"},
165 {TokStruct, "struct"},
166 {TokPtr, "ptr"},
167 {TokAlloc, "alloc"},
168 {TokFree, "free"},
169 {TokLen, "len"},
170 {TokReuse, "reuse"},
171 {TokExport, "export"},
172 {TokImport, "import"},
173 {TokAs, "as"},
174 {TokEllipsis, "..."},
175 {TokTypeclass, "typeclass"},
176 {TokInstance, "instance"},
177 {TokCase, "case"},
178 {TokFatArrow, "=>"},
179 {TokID, "identifier"},
180
181 {TokNum, "number"},
182 {TokChar, "char literal"},
183 {TokTick, "'"},
184
185 {TokType, "type"},
186
187 {TokIf, "if"},
188 {TokElse, "else"},
189 {TokWhile, "while"},
190
191 // TokCOMMENTS
192 {TokSingleComment, "#"},
193 {TokEOF, "EOF"},
194 {TokINVALID, "UNRECOGNIZED"},
195};
196
199
202class Token {
203 using Location = sammine_util::Location;
204
205public:
206 TokenType tok_type;
207 std::string lexeme;
208 Location location;
209 Token() = delete;
210 Token(TokenType type, std::string lexeme, Location location)
211 : tok_type(type), lexeme(std::move(lexeme)), location(location) {}
212 bool is_comparison() const {
213 return tok_type == TokLESS || tok_type == TokGreaterEqual ||
214 tok_type == TokLessEqual || tok_type == TokGREATER ||
215 tok_type == TokEQUAL || tok_type == TokNOTEqual;
216 }
217 bool is_assign() const { return tok_type == TokASSIGN; }
218 bool is_logical() const { return tok_type == TokOR || tok_type == TokAND; }
219 bool is_bitwise() const {
220 return tok_type == TokAndLogical || tok_type == TokORLogical ||
221 tok_type == TokXOR || tok_type == TokSHL || tok_type == TokSHR;
222 }
223 Location get_location() const { return this->location; }
224};
225
227
230class TokenStream {
231 using TokenList = std::list<std::shared_ptr<Token>>;
232 using Iterator = TokenList::iterator;
233
234 TokenList TokStream;
235 Iterator cursor;
236 Iterator rollback_cursor;
237 bool error;
238 std::function<void()> tokenProducer;
239
240 struct SplitRecord {
241 Iterator inserted;
242 std::shared_ptr<Token> original;
243 };
244 std::vector<SplitRecord> pending_splits;
245 size_t splits_at_mark = 0;
246
247 void ensureToken() {
248 while (cursor == TokStream.end() && tokenProducer)
249 tokenProducer();
250 }
251
252public:
253 std::vector<std::shared_ptr<Token>> ErrStream;
254
255 TokenStream()
256 : cursor(TokStream.end()), rollback_cursor(TokStream.end()),
257 error(false) {}
258
259 void setTokenProducer(std::function<void()> producer) {
260 tokenProducer = std::move(producer);
261 }
262
263 void push_back(const std::shared_ptr<Token> &token) {
264 if (token->tok_type == TokINVALID) {
265 error = true;
266 ErrStream.push_back(token);
267 return;
268 }
269 bool was_at_end = (cursor == TokStream.end());
270 TokStream.push_back(token);
271 if (was_at_end)
272 cursor = std::prev(TokStream.end());
273 }
274
275 bool hasErrors() const { return error; }
276
277 void push_back(const Token &token) {
278 this->push_back(std::make_shared<Token>(token));
279 }
280
281 void mark_rollback() {
282 rollback_cursor = cursor;
283 splits_at_mark = pending_splits.size();
284 }
285
286 void rollback() {
287 while (pending_splits.size() > splits_at_mark) {
288 auto &r = pending_splits.back();
289 *std::prev(r.inserted) = r.original;
290 TokStream.erase(r.inserted);
291 pending_splits.pop_back();
292 }
293 cursor = rollback_cursor;
294 }
295
296 void rollback(size_t rollback_count) {
297 for (size_t i = 0; i < rollback_count; ++i) {
298 assert(cursor != TokStream.begin() &&
299 "Cannot rollback past the beginning");
300 --cursor;
301 }
302 }
303
304 std::shared_ptr<Token> &exhaust_until(TokenType tokType) {
305 if (tokType == TokenType::TokEOF) {
306 while (tokenProducer) {
307 if (!TokStream.empty() && TokStream.back()->tok_type == TokEOF)
308 break;
309 tokenProducer();
310 }
311 cursor = std::prev(TokStream.end());
312 return TokStream.back();
313 }
314 while (!isEnd()) {
315 ensureToken();
316 if ((*cursor)->tok_type == tokType) {
317 auto &ref = *cursor;
318 ++cursor;
319 return ref;
320 } else {
321 ++cursor;
322 }
323 }
324 return TokStream.back();
325 }
326
327 bool isEnd() {
328 ensureToken();
329 return cursor != TokStream.end() && (*cursor)->tok_type == TokEOF;
330 }
331
332 std::shared_ptr<Token> peek() {
333 ensureToken();
334 return *cursor;
335 }
336
337 std::shared_ptr<Token> consume() {
338 ensureToken();
339 auto token = *cursor;
340 if (token->tok_type != TokEOF)
341 ++cursor;
342 return token;
343 }
344
345 sammine_util::Location currentLocation() {
346 ensureToken();
347 if (!TokStream.empty())
348 return (*cursor)->get_location();
349 return {};
350 }
351
352 void split_current(TokenType first_type, const std::string &first_lex,
353 TokenType second_type, const std::string &second_lex) {
354 ensureToken();
355 auto original = *cursor;
356 auto loc = original->location;
357 *cursor = std::make_shared<Token>(first_type, first_lex, loc);
358 auto inserted = TokStream.insert(
359 std::next(cursor),
360 std::make_shared<Token>(second_type, second_lex, loc));
361 pending_splits.push_back({inserted, original});
362 }
363};
364} // namespace sammine_lang
Holds classes and functionalities for dealing with Error handling, source locations caching & indexin...
Definition Token.h:202
Definition Utilities.h:70