-
Notifications
You must be signed in to change notification settings - Fork 0
/
tokenize.js
98 lines (92 loc) · 2.34 KB
/
tokenize.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
const token = (type, value) => ({ type, value });
const isDigit = char => char >= '0' && char <= '9';
const isLowerAlpha = char => char >= 'a' && char <= 'z';
const isUpperAlpha = char => char >= 'A' && char <= 'Z';
const isAlpha = char => isLowerAlpha(char) || isUpperAlpha(char);
const isStarter = char => isAlpha(char) || char === "_";
const isQuote = char => char === "'" || char === '"';
const isWhitespace = char => /\s/.test(char);
function getEscape(eq) {
const what = eq.next();
switch(what) {
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
case 'b': return '\b';
case '0': return '\0';
case 'u':
const raw = eq.next() + eq.next() + eq.next() + eq.next();
const code = parseInt(raw, 16);
if(!code) throw "invalid unicode";
return String.fromCharCode(code);
}
return what;
}
function number(eq) {
let buffer = "";
let usedDecimal = false;
if(eq.peek() === '0') buffer += eq.next() + eq.next();
while(true) {
const char = eq.peek();
if(isDigit(char)) {
buffer += eq.next();
} else if(char === '.') {
if(usedDecimal) throw "extra decimal point";
buffer += eq.next();
if(!isDigit(eq.peek())) throw "bad decimal point";
usedDecimal = true;
} else break;
}
if(isNaN(buffer)) throw "not a number";
return token("number", Number(buffer));
}
function str(eq) {
const type = eq.next();
if(!isQuote(type)) throw "bad quote";
let buffer = "";
while(true) {
const char = eq.next();
if(!char) throw "unterminated string";
if(char === type) break;
if(char === '\\') {
buffer += getEscape(eq);
} else {
buffer += char;
}
}
return token("string", buffer);
}
function word(eq) {
let buffer = eq.next();
if(!isStarter(buffer)) throw "invalid word";
while(true) {
const char = eq.peek();
if(!isStarter(char) && !isDigit(char)) break;
buffer += eq.next();
}
return token("word", buffer);
}
function tokenize(eq) {
const tokens = [];
const gen = {
i: 0,
next: () => eq[gen.i++],
peek: () => eq[gen.i],
};
while(gen.i < eq.length) {
const char = gen.peek();
if(isDigit(char)) {
tokens.push(number(gen));
} else if(isStarter(char)) {
tokens.push(word(gen));
} else if(isQuote(char)) {
tokens.push(str(gen));
} else if(isWhitespace(char)) {
gen.next();
} else {
tokens.push(token("symbol", gen.next()));
};
}
return tokens;
}
module.exports = tokenize;