3636 :symbol
3737 :keyword
3838 :string
39+ :regex
3940 :character )
4041 " Types of tokens that represent leaf nodes in the AST." )
4142
4445 :rbrace )
4546 " Types of tokens that mark the end of a non-atomic form." )
4647
48+ (defvar parseclj-lex--prefix-tokens '(:quote
49+ :backquote
50+ :unquote
51+ :unquote-splice
52+ :discard
53+ :tag
54+ :reader-conditional
55+ :reader-conditional-splice
56+ :var
57+ :deref )
58+ " Tokens that modify the form that follows." )
59+
60+ (defvar parseclj-lex--prefix-2-tokens '(:metadata )
61+ " Tokens that modify the two forms that follow." )
62+
4763; ; Token interface
4864
4965(defun parseclj-lex-token (type form pos &rest attributes )
@@ -81,6 +97,11 @@ A token is an association list with :token-type as its first key."
8197 (and (consp token)
8298 (cdr (assq :token-type token))))
8399
100+ (defun parseclj-lex-token-form (token )
101+ " Get the form of TOKEN."
102+ (and (consp token)
103+ (cdr (assq :form token))))
104+
84105(defun parseclj-lex-leaf-token-p (token )
85106 " Return t if the given AST TOKEN is a leaf node."
86107 (member (parseclj-lex-token-type token) parseclj-lex--leaf-tokens))
@@ -89,6 +110,9 @@ A token is an association list with :token-type as its first key."
89110 " Return t if the given ast TOKEN is a closing token."
90111 (member (parseclj-lex-token-type token) parseclj-lex--closing-tokens))
91112
113+ (defun parseclj-lex-error-p (token )
114+ " Return t if the TOKEN represents a lexing error token."
115+ (eq (parseclj-lex-token-type token) :lex-error ))
92116
93117; ; Elisp values from tokens
94118
@@ -177,18 +201,32 @@ S goes through three transformations:
177201 (<= (char-after (point )) ?9 ))
178202 (right-char )))
179203
204+ (defun parseclj-lex-skip-hex ()
205+ " Skip all consecutive hex digits after point."
206+ (while (and (char-after (point ))
207+ (or (<= ?0 (char-after (point )) ?9 )
208+ (<= ?a (char-after (point )) ?f )
209+ (<= ?A (char-after (point )) ?F )))
210+ (right-char )))
211+
180212(defun parseclj-lex-skip-number ()
181213 " Skip a number at point."
182214 ; ; [\+\-]?\d+\.\d+
183- (when (member (char-after (point )) '(?+ ?- ))
184- (right-char ))
215+ (if (and (eq ?0 (char-after (point )))
216+ (eq ?x (char-after (1+ (point )))))
217+ (progn
218+ (right-char 2 )
219+ (parseclj-lex-skip-hex))
220+ (progn
221+ (when (member (char-after (point )) '(?+ ?- ))
222+ (right-char ))
185223
186- (parseclj-lex-skip-digits)
224+ (parseclj-lex-skip-digits)
187225
188- (when (eq (char-after (point )) ?. )
189- (right-char ))
226+ (when (eq (char-after (point )) ?. )
227+ (right-char ))
190228
191- (parseclj-lex-skip-digits))
229+ (parseclj-lex-skip-digits)) ))
192230
193231(defun parseclj-lex-number ()
194232 " Consume a number and return a `:number' token representing it."
@@ -270,22 +308,39 @@ are returned as their own lex tokens."
270308 ((equal sym " false" ) (parseclj-lex-token :false " false" pos))
271309 (t (parseclj-lex-token :symbol sym pos))))))
272310
273- (defun parseclj-lex-string ()
274- " Return a lex token representing a string.
275- If EOF is reached without finding a closing double quote, a :lex-error
276- token is returned."
311+ (defun parseclj-lex-string* ()
312+ " Helper for string/regex lexing.
313+ Returns either the string, or an error token"
277314 (let ((pos (point )))
278315 (right-char )
279316 (while (not (or (equal (char-after (point )) ?\" ) (parseclj-lex-at-eof-p)))
280317 (if (equal (char-after (point )) ?\\ )
281318 (right-char 2 )
282319 (right-char )))
283- (if (equal (char-after (point )) ?\" )
284- (progn
285- (right-char )
286- (parseclj-lex-token :string (buffer-substring-no-properties pos (point )) pos))
320+ (when (equal (char-after (point )) ?\" )
321+ (right-char )
322+ (buffer-substring-no-properties pos (point )))))
323+
324+ (defun parseclj-lex-string ()
325+ " Return a lex token representing a string.
326+ If EOF is reached without finding a closing double quote, a :lex-error
327+ token is returned."
328+ (let ((pos (point ))
329+ (str (parseclj-lex-string*)))
330+ (if str
331+ (parseclj-lex-token :string str pos)
287332 (parseclj-lex-error-token pos :invalid-string ))))
288333
334+ (defun parseclj-lex-regex ()
335+ " Return a lex token representing a regular expression.
336+ If EOF is reached without finding a closing double quote, a :lex-error
337+ token is returned."
338+ (let ((pos (1- (point )))
339+ (str (parseclj-lex-string*)))
340+ (if str
341+ (parseclj-lex-token :regex (concat " #" str) pos)
342+ (parseclj-lex-error-token pos :invalid-regex ))))
343+
289344(defun parseclj-lex-lookahead (n )
290345 " Return a lookahead string of N characters after point."
291346 (buffer-substring-no-properties (point ) (min (+ (point ) n) (point-max ))))
@@ -387,6 +442,22 @@ See `parseclj-lex-token'."
387442 (right-char )
388443 (parseclj-lex-token :rbrace " }" pos))
389444
445+ ((equal char ?' )
446+ (right-char )
447+ (parseclj-lex-token :quote " '" pos))
448+
449+ ((equal char ?` )
450+ (right-char )
451+ (parseclj-lex-token :backquote " '" pos))
452+
453+ ((equal char ?~ )
454+ (right-char )
455+ (if (eq ?@ (char-after (point )))
456+ (progn
457+ (right-char )
458+ (parseclj-lex-token :unquote-splice " ~@" pos))
459+ (parseclj-lex-token :unquote " ~" pos)))
460+
390461 ((parseclj-lex-at-number-p)
391462 (parseclj-lex-number))
392463
@@ -405,6 +476,14 @@ See `parseclj-lex-token'."
405476 ((equal char ?\; )
406477 (parseclj-lex-comment))
407478
479+ ((equal char ?^ )
480+ (right-char )
481+ (parseclj-lex-token :metadata " ^" pos))
482+
483+ ((equal char ?@ )
484+ (right-char )
485+ (parseclj-lex-token :deref " @" pos))
486+
408487 ((equal char ?# )
409488 (right-char )
410489 (let ((char (char-after (point ))))
@@ -415,6 +494,21 @@ See `parseclj-lex-token'."
415494 ((equal char ?_ )
416495 (right-char )
417496 (parseclj-lex-token :discard " #_" pos))
497+ ((equal char ?\( )
498+ (right-char )
499+ (parseclj-lex-token :lambda " #(" pos))
500+ ((equal char ?' )
501+ (right-char )
502+ (parseclj-lex-token :var " #'" pos))
503+ ((equal char ?\" )
504+ (parseclj-lex-regex))
505+ ((equal char ?\? )
506+ (right-char )
507+ (if (eq ?@ (char-after (point )))
508+ (progn
509+ (right-char )
510+ (parseclj-lex-token :reader-conditional-splice " #?@" pos))
511+ (parseclj-lex-token :reader-conditional " #?" pos)))
418512 ((parseclj-lex-symbol-start-p char t )
419513 (right-char )
420514 (parseclj-lex-token :tag (concat " #" (parseclj-lex-get-symbol-at-point (1+ pos))) pos))
0 commit comments