@@ -107,14 +107,16 @@ fn cons_str(head: char, tail: &str) -> String {
107
107
/// - `$`,
108
108
/// - `*`,
109
109
/// - `!`,
110
- fn is_identifier_char ( chr : char ) -> bool {
111
- chr . is_alphanumeric ( ) || "|?<>+-_=^%&$*!." . contains ( chr )
110
+ fn is_identifier_char ( ch : char ) -> bool {
111
+ ch . is_alphanumeric ( ) || "|?<>+-_=^%&$*!." . contains ( ch )
112
112
}
113
113
114
- /// Returns whether if a character can be in the head of an identifier.
114
+ /// Returns true if a character is an acceptable (non numeric) identifier char
115
115
///
116
- /// An identifier is composed of a head (its first char) and a tail (the other
117
- /// chars).
116
+ /// An identifier is either a non numeric identifier char, followed by any number
117
+ /// of identifier chars, or is a '/' and nothing else.
118
+ ///
119
+ /// A separate function will be used to detect if an identifier is possibly just '/'
118
120
///
119
121
/// A character is an identifier char if it is alphabetic or if it is one of:
120
122
/// - `|`,
@@ -131,20 +133,55 @@ fn is_identifier_char(chr: char) -> bool {
131
133
/// - `$`,
132
134
/// - `*`,
133
135
/// - `!`,
134
- fn is_non_numeric_identifier_char ( chr : char ) -> bool {
135
- chr. is_alphabetic ( ) || "|?<>+-_=^%&$*!." . contains ( chr)
136
+ fn is_non_numeric_identifier_char ( ch : char ) -> bool {
137
+ ch. is_alphabetic ( ) || "|?<>+-_=^%&$*!." . contains ( ch)
138
+ }
139
+
140
+ /// Returns true if a character is an acceptable (non numeric) identifier char, or '/'
141
+ ///
142
+ /// An identifier is either a non numeric identifier char, followed by any number
143
+ /// of identifier chars, or is a '/' and nothing else.
144
+ ///
145
+ /// The reason we check if this is *either* a non numeric identifier char, or a '/',
146
+ /// is because we will want to use it to parse either
147
+ /// 1.a normal identifier
148
+ /// 2.'/',
149
+ /// 3. something like '/blah'
150
+ /// And then, if we have '/blah', we will proactively make the read fail
151
+ ///
152
+ /// We need to explicitly look for this '/blah' case is otherwise, if we just check for 1 and 2,
153
+ /// then in the case where someone types in '/blah' it will count as two valid separate reads --
154
+ /// first the symbol '/' and then the symbol 'blah'.
155
+ ///
156
+ /// This function passes if the char is alphabetic, a '/', or one of:
157
+ /// - `|`,
158
+ /// - `?`,
159
+ /// - `<`,
160
+ /// - `>`,
161
+ /// - `+`,
162
+ /// - `-`,
163
+ /// - `_`,
164
+ /// - `=`,
165
+ /// - `^`,
166
+ /// - `%`,
167
+ /// - `&`,
168
+ /// - `$`,
169
+ /// - `*`,
170
+ /// - `!`,
171
+ fn is_non_numeric_identifier_char_or_slash ( ch : char ) -> bool {
172
+ ch == '/' || is_non_numeric_identifier_char ( ch)
136
173
}
137
174
138
175
/// Returns true if given character is a minus character
139
176
/// - `-`,
140
- fn is_minus_char ( chr : char ) -> bool {
141
- chr == '-'
177
+ fn is_minus_char ( ch : char ) -> bool {
178
+ ch == '-'
142
179
}
143
180
144
181
/// Returns true if given character is a period character
145
182
/// - `-`,
146
- fn is_period_char ( chr : char ) -> bool {
147
- chr == '.'
183
+ fn is_period_char ( ch : char ) -> bool {
184
+ ch == '.'
148
185
}
149
186
150
187
/// Returns whether if a given character is a whitespace.
@@ -210,27 +247,40 @@ fn identifier_tail(input: &str) -> IResult<&str, &str> {
210
247
}
211
248
212
249
/// Parses valid Clojure identifiers
213
- /// Example Successes: ab, cat, -12+3, |blah|, <well>
214
- /// Example Failures: 'a, 12b, ,cat
250
+ /// Example Successes: ab, cat, -12+3, |blah|, <well>, / (edge case)
251
+ /// Example Failures: 'a, 12b, ,cat , /ab
215
252
pub fn identifier_parser ( input : & str ) -> IResult < & str , String > {
216
- named ! ( identifier_head<& str , char >,
253
+ // We will try to parse either a valid identifier, *or* the invalid identifier
254
+ // '/slashwithmorecharacters'
255
+ // Because if we do get the '/blah', we want to know and actively fail, otherwise '/blah'
256
+ // will just count as two valid reads; one for '/' and one for 'blah'
257
+ // So, we call these parsers 'maybe_valid_identifier_..', as they are also trying to catch
258
+ // this one invalid case
259
+ named ! ( maybe_invalid_identifier_head_parser<& str , char >,
217
260
map!(
218
- take_while_m_n!( 1 , 1 , is_non_numeric_identifier_char ) ,
261
+ take_while_m_n!( 1 , 1 , is_non_numeric_identifier_char_or_slash ) ,
219
262
first_char
220
263
)
221
264
) ;
222
265
223
- // identifier_tail<&str,&str> defined above to have magic 'complete' powers
266
+ // identifier_tail<&str,&str> defined above so it can be a 'completion' parser instead of a
267
+ // 'streaming' parser -- look into nom's documentation for more info
224
268
225
- named ! ( identifier <& str , String >,
269
+ named ! ( maybe_invalid_identifier_parser <& str , String >,
226
270
do_parse!(
227
- head: identifier_head >>
271
+ head: maybe_invalid_identifier_head_parser >>
228
272
rest_input: identifier_tail >>
229
273
( cons_str( head, & rest_input) )
230
274
)
231
275
) ;
232
276
233
- identifier ( input)
277
+ named ! ( valid_identifier_parser <& str , String >,
278
+ verify!( maybe_invalid_identifier_parser, |identifier| {
279
+ first_char( & identifier) != '/' ||
280
+ identifier == "/"
281
+ } ) ) ;
282
+
283
+ valid_identifier_parser ( input)
234
284
}
235
285
236
286
/// Parses valid Clojure symbol
@@ -1012,6 +1062,43 @@ mod tests {
1012
1062
_ => panic ! ( "try_read_meta \" ^:cat a\" should return a symbol" )
1013
1063
}
1014
1064
}
1065
+ #[ test]
1066
+ fn try_read_forward_slash_test ( ) {
1067
+ assert_eq ! (
1068
+ Value :: Symbol ( Symbol :: intern( & "/" ) ) ,
1069
+ try_read( "/ " ) . ok( ) . unwrap( ) . 1
1070
+ ) ;
1071
+ }
1072
+ #[ test]
1073
+ fn try_read_forward_slash_with_letters_and_fails_test ( ) {
1074
+ assert ! ( try_read( "/ab " ) . ok( ) . is_none( ) ) ;
1075
+ }
1076
+
1077
+ #[ test]
1078
+ fn try_read_forward_slash_keyword_test ( ) {
1079
+ assert_eq ! (
1080
+ Value :: Keyword ( Keyword :: intern( & "/" ) ) ,
1081
+ try_read( ":/ " ) . ok( ) . unwrap( ) . 1
1082
+ ) ;
1083
+ }
1084
+
1085
+ #[ test]
1086
+ fn try_read_forward_slash_keyword_with_letters_and_fails_test ( ) {
1087
+ assert ! ( try_read( ":/ab " ) . ok( ) . is_none( ) ) ;
1088
+ }
1089
+
1090
+ #[ test]
1091
+ fn try_read_forward_slash_keyword_with_ns_test ( ) {
1092
+ assert_eq ! (
1093
+ Value :: Keyword ( Keyword :: intern_with_ns( "core" , "/" ) ) ,
1094
+ try_read( ":core// " ) . ok( ) . unwrap( ) . 1
1095
+ ) ;
1096
+ }
1097
+
1098
+ #[ test]
1099
+ fn try_read_forward_slash_keyword_with_ns_with_letters_and_fails_test ( ) {
1100
+ assert ! ( try_read( ":core//ab " ) . ok( ) . is_none( ) ) ;
1101
+ }
1015
1102
}
1016
1103
1017
1104
mod regex_tests {
0 commit comments