Skip to content

Commit 9f00e4d

Browse files
finalfantasiamarijnh
authored andcommitted
[clojure mode] Mark invalid tokens
Types of invalid tokens that can be indicated include: - invalid numbers (e.g., `42a`, `0x0042m`, etc.) - invalid character literals (e.g., `\ab`, `\a1`, `\newlines`, `\NEWLINE`) - invalid symbols (e.g., `42foo`, etc.)
1 parent 72c708d commit 9f00e4d

File tree

2 files changed

+70
-28
lines changed

2 files changed

+70
-28
lines changed

mode/clojure/clojure.js

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -156,23 +156,34 @@ CodeMirror.defineMode("clojure", function (options) {
156156
var specialForm = createLookupMap(specialForms);
157157
var coreSymbol = createLookupMap(coreSymbols);
158158
var hasBodyParameter = createLookupMap(haveBodyParameter);
159-
var numberLiteral = /^[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?/;
160-
var symbolCharacter = /[!#$&'*+\-.\/:<=>?_|\w\xa1-\uffff]/;
159+
var delimiter = /^(?:[\\\[\]\s"(),;@^`{}~]|$)/;
160+
var numberLiteral = /^(?:[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?(?=[\\\[\]\s"#'(),;@^`{}~]|$))/;
161+
var characterLiteral = /^(?:\\(?:backspace|formfeed|newline|return|space|tab|o[0-7]{3}|u[0-9A-Fa-f]{4}|x[0-9A-Fa-f]{4}|.)?(?=[\\\[\]\s"(),;@^`{}~]|$))/;
162+
163+
// simple-namespace := /^[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*/
164+
// simple-symbol := /^(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)/
165+
// qualified-symbol := (<simple-namespace>(<.><simple-namespace>)*</>)?<simple-symbol>
166+
var qualifiedSymbol = /^(?:(?:[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*(?:\.[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)*\/)?(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)*(?=[\\\[\]\s"(),;@^`{}~]|$))/;
161167

162168
function base(stream, state) {
163169
if (stream.eatSpace()) return ["space", null];
164170
if (stream.match(numberLiteral)) return [null, "number"];
165-
166-
var ch = stream.next();
167-
168-
if (ch === "\\") {stream.next(); readSymbol(stream); return [null, "string-2"];}
169-
if (ch === '"') return (state.tokenize = inString)(stream, state);
170-
if (is(ch, /[(\[{]/)) return ["open", "bracket"];
171-
if (is(ch, /[)\]}]/)) return ["close", "bracket"];
172-
if (ch === ";") {stream.skipToEnd(); return ["space", "comment"];}
173-
if (is(ch, /[#'@^`~]/)) return [null, "meta"];
174-
175-
var symbol = readSymbol(stream);
171+
if (stream.match(characterLiteral)) return [null, "string-2"];
172+
if (stream.eat(/^"/)) return (state.tokenize = inString)(stream, state);
173+
if (stream.eat(/^[(\[{]/)) return ["open", "bracket"];
174+
if (stream.eat(/^[)\]}]/)) return ["close", "bracket"];
175+
if (stream.eat(/^;/)) {stream.skipToEnd(); return ["space", "comment"];}
176+
if (stream.eat(/^[#'@^`~]/)) return [null, "meta"];
177+
178+
var matches = stream.match(qualifiedSymbol);
179+
var symbol = matches && matches[0];
180+
181+
if (!symbol) {
182+
// advance stream by at least one character so we don't get stuck.
183+
stream.next();
184+
stream.eatWhile(function (c) {return !is(c, delimiter);});
185+
return [null, "error"];
186+
}
176187

177188
if (symbol === "comment" && state.lastToken === "(")
178189
return (state.tokenize = inComment)(stream, state);
@@ -187,7 +198,7 @@ CodeMirror.defineMode("clojure", function (options) {
187198
var escaped = false, next;
188199

189200
while (next = stream.next()) {
190-
if (next === '"' && !escaped) {state.tokenize = base; break;}
201+
if (next === "\"" && !escaped) {state.tokenize = base; break;}
191202
escaped = !escaped && next === "\\";
192203
}
193204

@@ -211,17 +222,6 @@ CodeMirror.defineMode("clojure", function (options) {
211222
return ["space", "comment"];
212223
}
213224

214-
function readSymbol(stream) {
215-
var ch;
216-
217-
while (ch = stream.next()) {
218-
if (ch === "\\") stream.next();
219-
else if (!is(ch, symbolCharacter)) {stream.backUp(1); break;}
220-
}
221-
222-
return stream.current();
223-
}
224-
225225
function createLookupMap(words) {
226226
var obj = {};
227227

mode/clojure/test.js

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,22 @@
4343
"[number 8r52] [number +8r52] [number -8r52]",
4444
"[number 36rhello] [number +36rhello] [number -36rhello]",
4545
"[number 36rz] [number +36rz] [number -36rz]",
46-
"[number 36rZ] [number +36rZ] [number -36rZ]"
46+
"[number 36rZ] [number +36rZ] [number -36rZ]",
47+
48+
// invalid numbers
49+
"[error 42foo]",
50+
"[error 42Nfoo]",
51+
"[error 42Mfoo]",
52+
"[error 42.42Mfoo]",
53+
"[error 42.42M!]",
54+
"[error 42!]",
55+
"[error 0x42afm]"
4756
);
4857

4958
MT("characters",
5059
"[string-2 \\1]",
5160
"[string-2 \\a]",
61+
"[string-2 \\a\\b\\c]",
5262
"[string-2 \\#]",
5363
"[string-2 \\\\]",
5464
"[string-2 \\\"]",
@@ -63,7 +73,21 @@
6373
"[string-2 \\u1000]",
6474
"[string-2 \\uAaAa]",
6575
"[string-2 \\u9F9F]",
66-
"[string-2 \\o123]"
76+
"[string-2 \\o123]",
77+
"[string-2 \\符]",
78+
"[string-2 \\シ]",
79+
"[string-2 \\ۇ]",
80+
// FIXME
81+
// "[string-2 \\🙂]",
82+
83+
// invalid character literals
84+
"[error \\abc]",
85+
"[error \\a123]",
86+
"[error \\a!]",
87+
"[error \\newlines]",
88+
"[error \\NEWLINE]",
89+
"[error \\u9F9FF]",
90+
"[error \\o1234]"
6791
);
6892

6993
MT("strings",
@@ -72,6 +96,8 @@
7296
"[string \"I'm]", // this is
7397
"[string a]", // a multi-line
7498
"[string teapot.\"]" // string
99+
100+
// TODO unterminated (multi-line) strings?
75101
);
76102

77103
MT("comments",
@@ -123,7 +149,23 @@
123149
"[variable 符号]",
124150
"[variable シンボル]",
125151
"[variable ئۇيغۇر]",
126-
"[variable 🙂❤🇺🇸]"
152+
"[variable 🙂❤🇺🇸]",
153+
154+
// invalid symbols
155+
"[error 3foo]",
156+
"[error 3+]",
157+
"[error 3|]",
158+
"[error 3_]"
159+
);
160+
161+
MT("numbers and other forms",
162+
"[number 42][bracket (][builtin foo][bracket )]",
163+
"[number 42][bracket [[][variable foo][bracket ]]]",
164+
"[number 42][meta #][bracket {][variable foo][bracket }]",
165+
"[number 42][bracket {][atom :foo] [variable bar][bracket }]",
166+
"[number 42][meta `][variable foo]",
167+
"[number 42][meta ~][variable foo]",
168+
"[number 42][meta #][variable foo]"
127169
);
128170

129171
var specialForms = [".", "catch", "def", "do", "if", "monitor-enter",

0 commit comments

Comments
 (0)