Skip to content

Commit 3f1eec3

Browse files
committed
Move '/' reading to identifier parsing, added tests
Since normally '/' (alone) works as any identifier name -- including for keywords
1 parent 3cad6d5 commit 3f1eec3

File tree

1 file changed

+100
-30
lines changed

1 file changed

+100
-30
lines changed

src/reader.rs

Lines changed: 100 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,16 @@ fn cons_str(head: char, tail: &str) -> String {
104104
/// - `$`,
105105
/// - `*`,
106106
/// - `!`,
107-
fn is_identifier_char(chr: char) -> bool {
108-
chr.is_alphanumeric() || "|?<>+-_=^%&$*!.".contains(chr)
107+
fn is_identifier_char(ch: char) -> bool {
108+
ch.is_alphanumeric() || "|?<>+-_=^%&$*!.".contains(ch)
109109
}
110110

111-
/// Returns whether if a character can be in the head of an identifier.
111+
/// Returns true if a character is an acceptable (non numeric) identifier char
112112
///
113-
/// An identifier is composed of a head (its first char) and a tail (the other
114-
/// chars).
113+
/// An identifier is either a non numeric identifier char, followed by any number
114+
/// of identifier chars, or is a '/' and nothing else.
115+
///
116+
/// A separate function will be used to detect if an identifier is possibly just '/'
115117
///
116118
/// A character is an identifier char if it is alphabetic or if it is one of:
117119
/// - `|`,
@@ -128,20 +130,55 @@ fn is_identifier_char(chr: char) -> bool {
128130
/// - `$`,
129131
/// - `*`,
130132
/// - `!`,
131-
fn is_non_numeric_identifier_char(chr: char) -> bool {
132-
chr.is_alphabetic() || "|?<>+-_=^%&$*!.".contains(chr)
133+
fn is_non_numeric_identifier_char(ch: char) -> bool {
134+
ch.is_alphabetic() || "|?<>+-_=^%&$*!.".contains(ch)
135+
}
136+
137+
/// Returns true if a character is an acceptable (non numeric) identifier char, or '/'
138+
///
139+
/// An identifier is either a non numeric identifier char, followed by any number
140+
/// of identifier chars, or is a '/' and nothing else.
141+
///
142+
/// The reason we check if this is *either* a non numeric identifier char, or a '/',
143+
/// is because we will want to use it to parse either
144+
/// 1.a normal identifier
145+
/// 2.'/',
146+
/// 3. something like '/blah'
147+
/// And then, if we have '/blah', we will proactively make the read fail
148+
///
149+
/// We need to explicitly look for this '/blah' case is otherwise, if we just check for 1 and 2,
150+
/// then in the case where someone types in '/blah' it will count as two valid separate reads --
151+
/// first the symbol '/' and then the symbol 'blah'.
152+
///
153+
/// This function passes if the char is alphabetic, a '/', or one of:
154+
/// - `|`,
155+
/// - `?`,
156+
/// - `<`,
157+
/// - `>`,
158+
/// - `+`,
159+
/// - `-`,
160+
/// - `_`,
161+
/// - `=`,
162+
/// - `^`,
163+
/// - `%`,
164+
/// - `&`,
165+
/// - `$`,
166+
/// - `*`,
167+
/// - `!`,
168+
fn is_non_numeric_identifier_char_or_slash(ch: char) -> bool {
169+
ch == '/' || is_non_numeric_identifier_char(ch)
133170
}
134171

135172
/// Returns true if given character is a minus character
136173
/// - `-`,
137-
fn is_minus_char(chr: char) -> bool {
138-
chr == '-'
174+
fn is_minus_char(ch: char) -> bool {
175+
ch == '-'
139176
}
140177

141178
/// Returns true if given character is a period character
142179
/// - `-`,
143-
fn is_period_char(chr: char) -> bool {
144-
chr == '.'
180+
fn is_period_char(ch: char) -> bool {
181+
ch == '.'
145182
}
146183

147184
/// Returns whether if a given character is a whitespace.
@@ -207,27 +244,40 @@ fn identifier_tail(input: &str) -> IResult<&str, &str> {
207244
}
208245

209246
/// Parses valid Clojure identifiers
210-
/// Example Successes: ab, cat, -12+3, |blah|, <well>
211-
/// Example Failures: 'a, 12b, ,cat
247+
/// Example Successes: ab, cat, -12+3, |blah|, <well>, / (edge case)
248+
/// Example Failures: 'a, 12b, ,cat , /ab
212249
pub fn identifier_parser(input: &str) -> IResult<&str, String> {
213-
named!(identifier_head<&str, char>,
250+
// We will try to parse either a valid identifier, *or* the invalid identifier
251+
// '/slashwithmorecharacters'
252+
// Because if we do get the '/blah', we want to know and actively fail, otherwise '/blah'
253+
// will just count as two valid reads; one for '/' and one for 'blah'
254+
// So, we call these parsers 'maybe_valid_identifier_..', as they are also trying to catch
255+
// this one invalid case
256+
named!(maybe_invalid_identifier_head_parser<&str, char>,
214257
map!(
215-
take_while_m_n!(1, 1, is_non_numeric_identifier_char),
258+
take_while_m_n!(1, 1, is_non_numeric_identifier_char_or_slash),
216259
first_char
217260
)
218261
);
219262

220-
// identifier_tail<&str,&str> defined above to have magic 'complete' powers
263+
// identifier_tail<&str,&str> defined above so it can be a 'completion' parser instead of a
264+
// 'streaming' parser -- look into nom's documentation for more info
221265

222-
named!(identifier <&str, String>,
266+
named!(maybe_invalid_identifier_parser <&str, String>,
223267
do_parse!(
224-
head: identifier_head >>
268+
head: maybe_invalid_identifier_head_parser >>
225269
rest_input: identifier_tail >>
226270
(cons_str(head, &rest_input))
227271
)
228272
);
229273

230-
identifier(input)
274+
named!(valid_identifier_parser <&str,String>,
275+
verify!(maybe_invalid_identifier_parser,|identifier| {
276+
first_char(&identifier) != '/' ||
277+
identifier == "/"
278+
}));
279+
280+
valid_identifier_parser(input)
231281
}
232282

233283
/// Parses valid Clojure symbol
@@ -432,16 +482,6 @@ pub fn try_read_pattern(input: &str) -> IResult<&str, Value> {
432482
Ok((rest_input, regex))
433483
}
434484

435-
/// This reader is needed for parsing the division sign /
436-
pub fn try_read_division_forward_slash(input: &str) -> IResult<&str, Value> {
437-
named!(slash_parser<&str, &str>, preceded!(consume_clojure_whitespaces_parser, tag!("/")));
438-
439-
let (rest_input, slash) = slash_parser(input)?;
440-
441-
// If an error is thrown, this will be coerced into a condition
442-
Ok((rest_input, Value::Symbol(Symbol::intern(slash))))
443-
}
444-
445485
// @TODO Perhaps generalize this, or even generalize it as a reader macro
446486
/// Tries to parse &str into Value::PersistentListMap, or some other Value::..Map
447487
/// Example Successes:
@@ -543,7 +583,6 @@ pub fn try_read(input: &str) -> IResult<&str, Value> {
543583
try_read_bool,
544584
try_read_nil,
545585
try_read_symbol,
546-
try_read_division_forward_slash,
547586
try_read_keyword,
548587
try_read_list,
549588
try_read_vector,
@@ -788,6 +827,7 @@ mod tests {
788827
use crate::persistent_vector;
789828
use crate::reader::try_read;
790829
use crate::symbol::Symbol;
830+
use crate::keyword::Keyword;
791831
use crate::value::Value::{PersistentList, PersistentListMap, PersistentVector};
792832
use crate::value::{ToValue, Value};
793833

@@ -903,6 +943,36 @@ mod tests {
903943
try_read("/ ").ok().unwrap().1
904944
);
905945
}
946+
#[test]
947+
fn try_read_forward_slash_with_letters_and_fails_test() {
948+
assert!(try_read("/ab ").ok().is_none());
949+
}
950+
951+
#[test]
952+
fn try_read_forward_slash_keyword_test() {
953+
assert_eq!(
954+
Value::Keyword(Keyword::intern(&"/")),
955+
try_read(":/ ").ok().unwrap().1
956+
);
957+
}
958+
959+
#[test]
960+
fn try_read_forward_slash_keyword_with_letters_and_fails_test() {
961+
assert!(try_read(":/ab ").ok().is_none());
962+
}
963+
964+
#[test]
965+
fn try_read_forward_slash_keyword_with_ns_test() {
966+
assert_eq!(
967+
Value::Keyword(Keyword::intern_with_ns("core", "/")),
968+
try_read(":core// ").ok().unwrap().1
969+
);
970+
}
971+
972+
#[test]
973+
fn try_read_forward_slash_keyword_with_ns_with_letters_and_fails_test() {
974+
assert!(try_read(":core//ab ").ok().is_none());
975+
}
906976
}
907977

908978
mod regex_tests {

0 commit comments

Comments
 (0)