Skip to content

Commit 00ac597

Browse files
committed
clojure.string/split and split-lines works with regex pattern type
1 parent ac4136c commit 00ac597

File tree

6 files changed

+178
-83
lines changed

6 files changed

+178
-83
lines changed

src/clojure/string.clj

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
"clojure.string"
2+
3+
"TODO : some special syntax required because of missing require"
4+
5+
(def clojure.string/split-lines
6+
(fn [s]
7+
(clojure.string/split s #"\r?\n")))

src/clojure_string.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ pub(crate) mod includes_qmark_;
44
pub(crate) mod join;
55
pub(crate) mod lower_case;
66
pub(crate) mod reverse;
7+
pub(crate) mod split;
78
pub(crate) mod starts_with_qmark_;
89
pub(crate) mod trim;
910
pub(crate) mod trim_newline;

src/clojure_string/split.rs

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
use crate::ifn::IFn;
2+
use crate::value::{ToValue, Value};
3+
use std::rc::Rc;
4+
5+
use crate::error_message;
6+
use crate::persistent_vector::PersistentVector;
7+
use crate::type_tag::TypeTag;
8+
9+
/// clojure.string/split [s re & [limit]] splits strings by pattern, optionally maximum of limit
10+
/// amount
11+
#[derive(Debug, Clone)]
12+
pub struct SplitFn {}
13+
impl ToValue for SplitFn {
14+
fn to_value(&self) -> Value {
15+
Value::IFn(Rc::new(self.clone()))
16+
}
17+
}
18+
impl IFn for SplitFn {
19+
fn invoke(&self, args: Vec<Rc<Value>>) -> Value {
20+
if args.len() != 2 && args.len() != 3 {
21+
return error_message::wrong_varg_count(&[2, 3], args.len());
22+
} else {
23+
match (
24+
args.get(0).unwrap().to_value(),
25+
args.get(1).unwrap().to_value(),
26+
) {
27+
(Value::String(s), Value::Pattern(re)) => {
28+
let splits: Vec<Rc<Value>> = re
29+
.split(&s)
30+
.collect::<Vec<&str>>()
31+
.into_iter()
32+
.filter(|ss| !ss.is_empty())
33+
.map(|ss| Rc::new(Value::String(ss.to_string())))
34+
.collect();
35+
return Value::PersistentVector(
36+
splits.into_iter().collect::<PersistentVector>(),
37+
);
38+
}
39+
(_a, Value::Pattern(_)) => error_message::type_mismatch(TypeTag::String, &_a),
40+
(Value::String(_), _b) => error_message::type_mismatch(TypeTag::Pattern, &_b),
41+
(_, _) => error_message::unknown_err(String::from("Unknown error")),
42+
}
43+
}
44+
}
45+
}
46+
47+
#[cfg(test)]
48+
mod tests {
49+
mod split_tests {
50+
use crate::clojure_string::split::SplitFn;
51+
use crate::ifn::IFn;
52+
use crate::persistent_vector::PersistentVector;
53+
use crate::value::Value;
54+
use std::rc::Rc;
55+
56+
#[test]
57+
fn split_by_comma() {
58+
let split = SplitFn {};
59+
let s = "hello,world,again";
60+
let re = ",";
61+
let args = vec![
62+
Rc::new(Value::String(String::from(s))),
63+
Rc::new(Value::Pattern(regex::Regex::new(re).unwrap())),
64+
];
65+
assert_eq!(
66+
Value::PersistentVector(
67+
vec![
68+
Rc::new(Value::String(String::from("hello"))),
69+
Rc::new(Value::String(String::from("world"))),
70+
Rc::new(Value::String(String::from("again")))
71+
]
72+
.into_iter()
73+
.collect::<PersistentVector>()
74+
),
75+
split.invoke(args)
76+
);
77+
}
78+
79+
#[test]
80+
fn split_by_comma_not_in_string() {
81+
let split = SplitFn {};
82+
let s = "hello world again";
83+
let re = ",";
84+
let args = vec![
85+
Rc::new(Value::String(String::from(s))),
86+
Rc::new(Value::Pattern(regex::Regex::new(re).unwrap())),
87+
];
88+
assert_eq!(
89+
Value::PersistentVector(
90+
vec![Rc::new(Value::String(String::from("hello world again")))]
91+
.into_iter()
92+
.collect::<PersistentVector>()
93+
),
94+
split.invoke(args)
95+
);
96+
}
97+
98+
#[test]
99+
fn split_by_doublequotes() {
100+
let split = SplitFn {};
101+
let s = r#"hello world"again""#;
102+
let re = "\"";
103+
let args = vec![
104+
Rc::new(Value::String(String::from(s))),
105+
Rc::new(Value::Pattern(regex::Regex::new(re).unwrap())),
106+
];
107+
assert_eq!(
108+
Value::PersistentVector(
109+
vec![
110+
Rc::new(Value::String(String::from("hello world"))),
111+
Rc::new(Value::String(String::from("again")))
112+
]
113+
.into_iter()
114+
.collect::<PersistentVector>()
115+
),
116+
split.invoke(args)
117+
);
118+
}
119+
}
120+
}

src/environment.rs

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::clojure_std;
22
use crate::clojure_string;
3-
use crate::namespace::{Namespaces};
3+
use crate::namespace::Namespaces;
44
use crate::repl::Repl;
55
use crate::rust_core;
66
use crate::symbol::Symbol;
@@ -219,6 +219,7 @@ impl Environment {
219219
let triml_fn = clojure_string::triml::TrimLFn {};
220220
let trimr_fn = clojure_string::trimr::TrimRFn {};
221221
let trim_newline_fn = clojure_string::trim_newline::TrimNewlineFn {};
222+
let split_fn = clojure_string::split::SplitFn {};
222223

223224
// Hardcoded fns
224225
let lexical_eval_fn = Value::LexicalEvalFn {};
@@ -255,23 +256,23 @@ impl Environment {
255256
environment.insert(Symbol::intern("eval"), eval_fn.to_rc_value());
256257

257258
// Thread namespace
258-
environment.insert_into_namespace(
259-
&Symbol::intern("Thread"),
260-
Symbol::intern("sleep"),
261-
thread_sleep_fn.to_rc_value()
262-
);
263-
264-
// System namespace
265-
environment.insert_into_namespace(
266-
&Symbol::intern("System"),
267-
Symbol::intern("nanoTime"),
268-
nanotime_fn.to_rc_value()
269-
);
270-
environment.insert_into_namespace(
271-
&Symbol::intern("System"),
272-
Symbol::intern("getenv"),
273-
get_env_fn.to_rc_value()
274-
);
259+
environment.insert_into_namespace(
260+
&Symbol::intern("Thread"),
261+
Symbol::intern("sleep"),
262+
thread_sleep_fn.to_rc_value(),
263+
);
264+
265+
// System namespace
266+
environment.insert_into_namespace(
267+
&Symbol::intern("System"),
268+
Symbol::intern("nanoTime"),
269+
nanotime_fn.to_rc_value(),
270+
);
271+
environment.insert_into_namespace(
272+
&Symbol::intern("System"),
273+
Symbol::intern("getenv"),
274+
get_env_fn.to_rc_value(),
275+
);
275276

276277
// core.clj wraps calls to the rust implementations
277278
// @TODO add this to clojure.rs.core namespace as clojure.rs.core/slurp
@@ -350,6 +351,12 @@ impl Environment {
350351
trim_newline_fn.to_rc_value(),
351352
);
352353

354+
environment.insert_into_namespace(
355+
&Symbol::intern("clojure.string"),
356+
Symbol::intern("split"),
357+
split_fn.to_rc_value(),
358+
);
359+
353360
environment.insert(Symbol::intern("+"), add_fn.to_rc_value());
354361
environment.insert(Symbol::intern("let"), let_macro.to_rc_value());
355362
environment.insert(Symbol::intern("str"), str_fn.to_rc_value());
@@ -389,12 +396,19 @@ impl Environment {
389396
);
390397
environment.insert(Symbol::intern("read-line"), read_line_fn.to_rc_value());
391398

392-
environment.insert(Symbol::intern("="),equals_fn.to_rc_value());
399+
environment.insert(Symbol::intern("="), equals_fn.to_rc_value());
393400
//
394401
// Read in clojure.core
395402
//
396403
// @TODO its time for a RT (runtime), which environment seems to be becoming
397404
let _ = Repl::new(Rc::clone(&environment)).try_eval_file("./src/clojure/core.clj");
405+
println!(
406+
"{:#?} {:#?}",
407+
&environment.get_current_namespace(),
408+
&environment.get_current_namespace_name()
409+
);
410+
// TODO: should read into namespace if (ns ..) is given in source file
411+
let _ = Repl::new(Rc::clone(&environment)).try_eval_file("./src/clojure/string.clj");
398412

399413
// We can add this back once we have requires
400414
// environment.change_namespace(Symbol::intern("user"));
@@ -528,8 +542,7 @@ mod tests {
528542
MainEnvironment(EnvironmentVal {
529543
curr_ns_sym: _,
530544
namespaces,
531-
}) => namespaces
532-
.get(&Symbol::intern("user"),&Symbol::intern("+")),
545+
}) => namespaces.get(&Symbol::intern("user"), &Symbol::intern("+")),
533546
_ => panic!("new_main_environment() should return Main"),
534547
};
535548

src/error_message.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,7 @@ pub fn index_cannot_be_negative(ind: usize) -> Value {
4444
pub fn generic_err(error: Box<dyn Error>) -> Value {
4545
Value::Condition(error.to_string())
4646
}
47+
48+
pub fn unknown_err(error: String) -> Value {
49+
Value::Condition(error)
50+
}

src/reader.rs

Lines changed: 12 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -404,23 +404,24 @@ pub fn try_read_string(input: &str) -> IResult<&str, Value> {
404404
pub fn try_read_pattern(input: &str) -> IResult<&str, Value> {
405405
named!(hash_parser<&str, &str>, preceded!(consume_clojure_whitespaces_parser, tag!("#")));
406406

407-
let (rest_input, _) = hash_parser(input)?;
408-
let (rest_input,regex_string_val) = try_read_string(rest_input)?;
407+
let (rest_input, _) = hash_parser(input)?;
408+
let (rest_input, regex_string_val) = try_read_string(rest_input)?;
409409

410410
let mut regex_string = String::from("");
411411

412412
// @TODO separate try_read_string into a parser, so we don't have to read a Value
413-
// and then unwrap it
413+
// and then unwrap it
414414
match regex_string_val {
415-
Value::String(reg_str) => { regex_string = reg_str; },
416-
_ => { panic!("try_read_string returned something that wasn't string"); }
415+
Value::String(reg_str) => {
416+
regex_string = reg_str;
417+
}
418+
_ => {
419+
panic!("try_read_string returned something that wasn't string");
420+
}
417421
}
418422

419423
let regex = regex::Regex::new(regex_string.as_str()).unwrap();
420-
Ok((
421-
rest_input,
422-
Value::Pattern(regex),
423-
))
424+
Ok((rest_input, Value::Pattern(regex)))
424425
}
425426

426427
// @TODO Perhaps generalize this, or even generalize it as a reader macro
@@ -845,60 +846,8 @@ mod tests {
845846
fn try_read_bool_false_test() {
846847
assert_eq!(Value::Boolean(false), try_read("false ").ok().unwrap().1)
847848
}
848-
849-
mod regex_tests {
850-
use crate::reader::try_read;
851-
use crate::value::Value;
852-
853-
#[test]
854-
fn try_read_simple_regex_pattern_test() {
855-
assert_eq!(
856-
Value::Pattern(regex::Regex::new("a").unwrap()),
857-
try_read(r##"#"a" "##).ok().unwrap().1
858-
);
859-
}
860-
861-
#[test]
862-
fn try_read_simple_with_escaped_quote_regex_pattern_test() {
863-
assert_eq!(
864-
Value::Pattern(regex::Regex::new("a").unwrap()),
865-
try_read(r###"#"a\"" "###).ok().unwrap().1
866-
);
867-
}
868-
869-
#[test]
870-
fn try_read_regex_pattern_test() {
871-
assert_eq!(
872-
Value::Pattern(regex::Regex::new("hello").unwrap()),
873-
try_read("#\"hello\" ").ok().unwrap().1
874-
);
875-
}
876-
877-
#[test]
878-
fn try_read_regex_pattern_escaped_quote_test() {
879-
assert_eq!(
880-
Value::Pattern(regex::Regex::new("h\"e\"l\"l\"o").unwrap()),
881-
try_read(r#"#"h\"e\"l\"l\"o\"" something"#).ok().unwrap().1
882-
);
883-
}
884-
885-
#[test]
886-
fn try_read_regex_pattern_escaped_quote_prefixed_by_whitespace_test() {
887-
assert_eq!(
888-
Value::Pattern(regex::Regex::new("h\"e\"l\"l \"o").unwrap()),
889-
try_read("#\"h\"e\"l\"l \"o\" something").ok().unwrap().1
890-
);
891-
}
892-
893-
#[test]
894-
fn try_read_regex_pattern_escaped_quote_suffixed_by_whitespace_test() {
895-
assert_eq!(
896-
Value::Pattern(regex::Regex::new("h\"e\"l\" l \"o").unwrap()),
897-
try_read("#\"h\"e\"l\" l \"o\" something").ok().unwrap().1
898-
);
899-
}
900-
}
901849
}
850+
902851
mod regex_tests {
903852
use crate::reader::try_read;
904853
use crate::value::Value;
@@ -943,6 +892,7 @@ mod tests {
943892
);
944893
}
945894
}
895+
946896
mod consume_clojure_whitespaces_tests {
947897
use crate::reader::consume_clojure_whitespaces_parser;
948898
#[test]

0 commit comments

Comments
 (0)