@@ -104,14 +104,16 @@ fn cons_str(head: char, tail: &str) -> String {
104
104
/// - `$`,
105
105
/// - `*`,
106
106
/// - `!`,
107
- fn is_identifier_char ( chr : char ) -> bool {
108
- chr . is_alphanumeric ( ) || "|?<>+-_=^%&$*!." . contains ( chr )
107
+ fn is_identifier_char ( ch : char ) -> bool {
108
+ ch . is_alphanumeric ( ) || "|?<>+-_=^%&$*!." . contains ( ch )
109
109
}
110
110
111
- /// Returns whether if a character can be in the head of an identifier.
111
+ /// Returns true if a character is an acceptable (non numeric) identifier char
112
112
///
113
- /// An identifier is composed of a head (its first char) and a tail (the other
114
- /// chars).
113
+ /// An identifier is either a non numeric identifier char, followed by any number
114
+ /// of identifier chars, or is a '/' and nothing else.
115
+ ///
116
+ /// A separate function will be used to detect if an identifier is possibly just '/'
115
117
///
116
118
/// A character is an identifier char if it is alphabetic or if it is one of:
117
119
/// - `|`,
@@ -128,20 +130,55 @@ fn is_identifier_char(chr: char) -> bool {
128
130
/// - `$`,
129
131
/// - `*`,
130
132
/// - `!`,
131
- fn is_non_numeric_identifier_char ( chr : char ) -> bool {
132
- chr. is_alphabetic ( ) || "|?<>+-_=^%&$*!." . contains ( chr)
133
+ fn is_non_numeric_identifier_char ( ch : char ) -> bool {
134
+ ch. is_alphabetic ( ) || "|?<>+-_=^%&$*!." . contains ( ch)
135
+ }
136
+
137
+ /// Returns true if a character is an acceptable (non numeric) identifier char, or '/'
138
+ ///
139
+ /// An identifier is either a non numeric identifier char, followed by any number
140
+ /// of identifier chars, or is a '/' and nothing else.
141
+ ///
142
+ /// The reason we check if this is *either* a non numeric identifier char, or a '/',
143
+ /// is because we will want to use it to parse either
144
+ /// 1.a normal identifier
145
+ /// 2.'/',
146
+ /// 3. something like '/blah'
147
+ /// And then, if we have '/blah', we will proactively make the read fail
148
+ ///
149
+ /// We need to explicitly look for this '/blah' case is otherwise, if we just check for 1 and 2,
150
+ /// then in the case where someone types in '/blah' it will count as two valid separate reads --
151
+ /// first the symbol '/' and then the symbol 'blah'.
152
+ ///
153
+ /// This function passes if the char is alphabetic, a '/', or one of:
154
+ /// - `|`,
155
+ /// - `?`,
156
+ /// - `<`,
157
+ /// - `>`,
158
+ /// - `+`,
159
+ /// - `-`,
160
+ /// - `_`,
161
+ /// - `=`,
162
+ /// - `^`,
163
+ /// - `%`,
164
+ /// - `&`,
165
+ /// - `$`,
166
+ /// - `*`,
167
+ /// - `!`,
168
+ fn is_non_numeric_identifier_char_or_slash ( ch : char ) -> bool {
169
+ ch == '/' || is_non_numeric_identifier_char ( ch)
133
170
}
134
171
135
172
/// Returns true if given character is a minus character
136
173
/// - `-`,
137
- fn is_minus_char ( chr : char ) -> bool {
138
- chr == '-'
174
+ fn is_minus_char ( ch : char ) -> bool {
175
+ ch == '-'
139
176
}
140
177
141
178
/// Returns true if given character is a period character
142
179
/// - `-`,
143
- fn is_period_char ( chr : char ) -> bool {
144
- chr == '.'
180
+ fn is_period_char ( ch : char ) -> bool {
181
+ ch == '.'
145
182
}
146
183
147
184
/// Returns whether if a given character is a whitespace.
@@ -207,27 +244,40 @@ fn identifier_tail(input: &str) -> IResult<&str, &str> {
207
244
}
208
245
209
246
/// Parses valid Clojure identifiers
210
- /// Example Successes: ab, cat, -12+3, |blah|, <well>
211
- /// Example Failures: 'a, 12b, ,cat
247
+ /// Example Successes: ab, cat, -12+3, |blah|, <well>, / (edge case)
248
+ /// Example Failures: 'a, 12b, ,cat , /ab
212
249
pub fn identifier_parser ( input : & str ) -> IResult < & str , String > {
213
- named ! ( identifier_head<& str , char >,
250
+ // We will try to parse either a valid identifier, *or* the invalid identifier
251
+ // '/slashwithmorecharacters'
252
+ // Because if we do get the '/blah', we want to know and actively fail, otherwise '/blah'
253
+ // will just count as two valid reads; one for '/' and one for 'blah'
254
+ // So, we call these parsers 'maybe_valid_identifier_..', as they are also trying to catch
255
+ // this one invalid case
256
+ named ! ( maybe_invalid_identifier_head_parser<& str , char >,
214
257
map!(
215
- take_while_m_n!( 1 , 1 , is_non_numeric_identifier_char ) ,
258
+ take_while_m_n!( 1 , 1 , is_non_numeric_identifier_char_or_slash ) ,
216
259
first_char
217
260
)
218
261
) ;
219
262
220
- // identifier_tail<&str,&str> defined above to have magic 'complete' powers
263
+ // identifier_tail<&str,&str> defined above so it can be a 'completion' parser instead of a
264
+ // 'streaming' parser -- look into nom's documentation for more info
221
265
222
- named ! ( identifier <& str , String >,
266
+ named ! ( maybe_invalid_identifier_parser <& str , String >,
223
267
do_parse!(
224
- head: identifier_head >>
268
+ head: maybe_invalid_identifier_head_parser >>
225
269
rest_input: identifier_tail >>
226
270
( cons_str( head, & rest_input) )
227
271
)
228
272
) ;
229
273
230
- identifier ( input)
274
+ named ! ( valid_identifier_parser <& str , String >,
275
+ verify!( maybe_invalid_identifier_parser, |identifier| {
276
+ first_char( & identifier) != '/' ||
277
+ identifier == "/"
278
+ } ) ) ;
279
+
280
+ valid_identifier_parser ( input)
231
281
}
232
282
233
283
/// Parses valid Clojure symbol
@@ -432,16 +482,6 @@ pub fn try_read_pattern(input: &str) -> IResult<&str, Value> {
432
482
Ok ( ( rest_input, regex) )
433
483
}
434
484
435
- /// This reader is needed for parsing the division sign /
436
- pub fn try_read_division_forward_slash ( input : & str ) -> IResult < & str , Value > {
437
- named ! ( slash_parser<& str , & str >, preceded!( consume_clojure_whitespaces_parser, tag!( "/" ) ) ) ;
438
-
439
- let ( rest_input, slash) = slash_parser ( input) ?;
440
-
441
- // If an error is thrown, this will be coerced into a condition
442
- Ok ( ( rest_input, Value :: Symbol ( Symbol :: intern ( slash) ) ) )
443
- }
444
-
445
485
// @TODO Perhaps generalize this, or even generalize it as a reader macro
446
486
/// Tries to parse &str into Value::PersistentListMap, or some other Value::..Map
447
487
/// Example Successes:
@@ -543,7 +583,6 @@ pub fn try_read(input: &str) -> IResult<&str, Value> {
543
583
try_read_bool,
544
584
try_read_nil,
545
585
try_read_symbol,
546
- try_read_division_forward_slash,
547
586
try_read_keyword,
548
587
try_read_list,
549
588
try_read_vector,
@@ -788,6 +827,7 @@ mod tests {
788
827
use crate :: persistent_vector;
789
828
use crate :: reader:: try_read;
790
829
use crate :: symbol:: Symbol ;
830
+ use crate :: keyword:: Keyword ;
791
831
use crate :: value:: Value :: { PersistentList , PersistentListMap , PersistentVector } ;
792
832
use crate :: value:: { ToValue , Value } ;
793
833
@@ -903,6 +943,36 @@ mod tests {
903
943
try_read( "/ " ) . ok( ) . unwrap( ) . 1
904
944
) ;
905
945
}
946
+ #[ test]
947
+ fn try_read_forward_slash_with_letters_and_fails_test ( ) {
948
+ assert ! ( try_read( "/ab " ) . ok( ) . is_none( ) ) ;
949
+ }
950
+
951
+ #[ test]
952
+ fn try_read_forward_slash_keyword_test ( ) {
953
+ assert_eq ! (
954
+ Value :: Keyword ( Keyword :: intern( & "/" ) ) ,
955
+ try_read( ":/ " ) . ok( ) . unwrap( ) . 1
956
+ ) ;
957
+ }
958
+
959
+ #[ test]
960
+ fn try_read_forward_slash_keyword_with_letters_and_fails_test ( ) {
961
+ assert ! ( try_read( ":/ab " ) . ok( ) . is_none( ) ) ;
962
+ }
963
+
964
+ #[ test]
965
+ fn try_read_forward_slash_keyword_with_ns_test ( ) {
966
+ assert_eq ! (
967
+ Value :: Keyword ( Keyword :: intern_with_ns( "core" , "/" ) ) ,
968
+ try_read( ":core// " ) . ok( ) . unwrap( ) . 1
969
+ ) ;
970
+ }
971
+
972
+ #[ test]
973
+ fn try_read_forward_slash_keyword_with_ns_with_letters_and_fails_test ( ) {
974
+ assert ! ( try_read( ":core//ab " ) . ok( ) . is_none( ) ) ;
975
+ }
906
976
}
907
977
908
978
mod regex_tests {
0 commit comments