Skip to content

Commit 5da4251

Browse files
authored
basilisp.data namespace (#555)
* `basilisp.data` namespace * More stuff * Respect equality partition * Docstring and slightly changing the interfaces * Fix set tests * Simplify some logic in the sequence differ * More test cases * Maybe fix * Ok * More sequence tests
1 parent e50da8b commit 5da4251

File tree

2 files changed

+373
-0
lines changed

2 files changed

+373
-0
lines changed

src/basilisp/data.lpy

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
(ns basilisp.data
2+
(:require
3+
[basilisp.set :as set]))
4+
5+
(defprotocol IDiffable
6+
(diff* [this other]
7+
"Diff `other` against `this`, returning a three element vector containing
8+
`[items-in-a items-in-b items-in-both]`."))
9+
10+
(defn diff
11+
"Diff items `a` and `b`, returning a three element vector containing
12+
`[items-in-a items-in-b items-in-both]`.
13+
14+
Items are diffed according to the following heuristics:
15+
- Mapping types (for both Basilisp maps and Python dicts) are compared
16+
by key and values and are sub-diffed where values differ for the same
17+
key. Python dicts will be coerced to Basilisp maps before diffing.
18+
- Sequential types (for Basilisp lists and vectors and Python lists and
19+
tuples) are compared by value at the corresponding index in the other
20+
collection. Values at each index are compared according to the rules
21+
in this list. Sequential types are always returned as vectors.
22+
- Sets (for both Basilisp and Python set types) are compared by their
23+
values, which are never sub-diffed. Python sets and frozensets will
24+
be coerced to Basilisp sets before diffing.
25+
- All other types are compared for equality as by `=`."
26+
[a b]
27+
(if (= a b)
28+
[nil nil a]
29+
(diff* (py->lisp a) (py->lisp b))))
30+
31+
(extend-protocol IDiffable
32+
python/object
33+
(diff* [this other]
34+
[this other nil]))
35+
36+
(defn ^:private diff-map
37+
[this other]
38+
(if-not (map? other)
39+
[this other nil]
40+
(loop [[[k] :as a-items] (seq this)
41+
a this
42+
b other
43+
both nil]
44+
(if (seq a-items)
45+
(if (contains? b k)
46+
(let [[in-a in-b in-both] (diff (get a k) (get b k))]
47+
(recur (rest a-items)
48+
(cond-> (dissoc a k) in-a (assoc k in-a))
49+
(cond-> (dissoc b k) in-b (assoc k in-b))
50+
(cond-> both in-both (assoc k in-both))))
51+
(recur (rest a-items) a b both))
52+
[a b both]))))
53+
54+
(defn ^:private collect-rest
55+
"Collect the rest of the elements in seq `source` into the vector `target` (if
56+
there are any). If every element in the resulting vector is nil, return nil.
57+
Otherwise, return the vector."
58+
[target source]
59+
(let [res (if (seq source)
60+
(apply conj target source)
61+
target)]
62+
(if (every? nil? res)
63+
nil
64+
res)))
65+
66+
(defn ^:priviate include-seq-elem?
67+
"Return true if an element should be included in the final diff vector.
68+
69+
`nil` elements at the end of a diff vector will be excluded."
70+
[s elem]
71+
(or s (and (not s) (not (nil? elem)))))
72+
73+
(defn ^:private diff-seq
74+
[this other]
75+
(if-not (sequential? other)
76+
[this other nil]
77+
(loop [a (seq this)
78+
b (seq other)
79+
in-a []
80+
in-b []
81+
in-both []]
82+
(if (and a b)
83+
(let [[a-head & a-rest] a
84+
[b-head & b-rest] b
85+
[a-part b-part both-part] (diff a-head b-head)]
86+
(recur (seq a-rest)
87+
(seq b-rest)
88+
(cond-> in-a (include-seq-elem? (seq a-rest) a-part) (conj a-part))
89+
(cond-> in-b (include-seq-elem? (seq b-rest) b-part) (conj b-part))
90+
(cond-> in-both
91+
(include-seq-elem? (and (seq a-rest) (seq b-rest)) both-part)
92+
(conj both-part))))
93+
[(collect-rest in-a a)
94+
(collect-rest in-b b)
95+
(if (every? nil? in-both)
96+
nil
97+
in-both)]))))
98+
99+
(defn ^:private diff-set
100+
[this other]
101+
(if-not (set? other)
102+
[this other nil]
103+
(let [shared (set/intersection this other)]
104+
(if (seq shared)
105+
[(set/difference this shared)
106+
(set/difference other shared)
107+
shared]
108+
[this other nil]))))
109+
110+
(extend basilisp.lang.interfaces/IPersistentMap IDiffable {:diff* diff-map})
111+
(extend basilisp.lang.interfaces/IPersistentSet IDiffable {:diff* diff-set})
112+
(extend basilisp.lang.interfaces/IPersistentVector IDiffable {:diff* diff-seq})
113+
(extend basilisp.lang.interfaces/IPersistentList IDiffable {:diff* diff-seq})
114+
(extend basilisp.lang.interfaces/ISeq IDiffable {:diff* diff-seq})

tests/basilisp/test_data.lpy

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
(ns tests.basilisp.test-data
2+
(:import datetime)
3+
(:require
4+
[basilisp.data :refer [diff]]
5+
[basilisp.test :refer [deftest are testing]]))
6+
7+
(deftest diff-test
8+
(testing "scalar types"
9+
(testing "no diff if equal"
10+
(are [x] (= [nil nil x] (diff x x))
11+
""
12+
"string"
13+
-1
14+
0
15+
1
16+
-1.0
17+
0.0
18+
1.0
19+
true
20+
false
21+
nil
22+
23+
:kw
24+
:ns/kw
25+
'sym
26+
'ns/sym
27+
28+
#inst "2020-06-04T22:32:29.871744"
29+
(datetime/date 2020 6 4)
30+
(datetime/time 22 35 38)
31+
3.1415926535M
32+
22/7
33+
#uuid "632ac3d8-fcfd-4d36-a05b-a54277a345bc"))
34+
35+
(testing "completely different if not equal"
36+
(are [x y] (= [x y nil] (diff x y))
37+
"" "not empty"
38+
"string" "not a string"
39+
-1 -9327
40+
0 3
41+
1 1373
42+
-1.0 -71.183
43+
0.0 1.373
44+
1.0 43727272.3
45+
true false
46+
false true
47+
nil :not-nil
48+
49+
:kw :other-kw
50+
:ns/kw :other-ns/kw
51+
'sym 'other-sym
52+
'ns/sym 'other-ns/sym
53+
54+
#inst "2020-06-04T22:32:29.871744" #inst "2019-01-03T11:15:30.871744"
55+
(datetime/date 2020 6 4) (datetime/time 22 35 38)
56+
(datetime/time 22 35 38) (datetime/date 2020 6 4)
57+
3.1415926535M 2.7182818284M
58+
22/7 1/2
59+
#uuid "632ac3d8-fcfd-4d36-a05b-a54277a345bc" #uuid "33fef9e5-071c-47e6-886c-f8790bf1d07d")))
60+
61+
(testing "set types"
62+
(testing "set types never equal to non-set type"
63+
(are [x] (= [#{} x nil] (diff #{} x))
64+
""
65+
"string"
66+
-1
67+
0
68+
1
69+
-1.0
70+
0.0
71+
1.0
72+
true
73+
false
74+
nil
75+
76+
:kw
77+
:ns/kw
78+
'sym
79+
'ns/sym
80+
81+
#inst "2020-06-04T22:32:29.871744"
82+
(datetime/date 2020 6 4)
83+
(datetime/time 22 35 38)
84+
3.1415926535M
85+
22/7
86+
#uuid "632ac3d8-fcfd-4d36-a05b-a54277a345bc"
87+
88+
{}
89+
[]
90+
'()))
91+
92+
(testing "Basilisp set"
93+
(are [x y z] (= z (diff x y))
94+
#{} #{} [nil nil #{}]
95+
#{:a} #{:a} [nil nil #{:a}]
96+
#{:a} #{:b} [#{:a} #{:b} nil]
97+
#{:a :b} #{:b :c} [#{:a} #{:c} #{:b}]
98+
99+
;; no subdiffs
100+
#{:a [:b :c]} #{:a [:c :d]} [#{[:b :c]} #{[:c :d]} #{:a}]
101+
#{:a '(:b :c)} #{:a '(:c :d)} [#{'(:b :c)} #{'(:c :d)} #{:a}]
102+
#{:a #{:b :c}} #{:a #{:c :d}} [#{#{:b :c}} #{#{:c :d}} #{:a}]
103+
#{:a {:b 2 :c 3}} #{:a {:c 3 :d 4}} [#{{:b 2 :c 3}} #{{:c 3 :d 4}} #{:a}]))
104+
105+
(testing "mutable Python set"
106+
(are [x y z] (= z (diff (python/set x) (python/set y)))
107+
[] [] [nil nil #{}]
108+
[:a] [:a] [nil nil #{:a}]
109+
[:a] [:b] [#{:a} #{:b} nil]
110+
[:a :b] [:b :c] [#{:a} #{:c} #{:b}]
111+
112+
;; no subdiffs
113+
[:a [:b :c]] [:a [:c :d]] [#{[:b :c]} #{[:c :d]} #{:a}]
114+
[:a '(:b :c)] [:a '(:c :d)] [#{'(:b :c)} #{'(:c :d)} #{:a}]
115+
[:a #{:b :c}] [:a #{:c :d}] [#{#{:b :c}} #{#{:c :d}} #{:a}]
116+
[:a {:b 2 :c 3}] [:a {:c 3 :d 4}] [#{{:b 2 :c 3}} #{{:c 3 :d 4}} #{:a}]))
117+
118+
(testing "Python frozenset"
119+
(are [x y z] (= z (diff (python/frozenset x) (python/frozenset y)))
120+
[] [] [nil nil #{}]
121+
[:a] [:a] [nil nil #{:a}]
122+
[:a] [:b] [#{:a} #{:b} nil]
123+
[:a :b] [:b :c] [#{:a} #{:c} #{:b}]
124+
125+
;; no subdiffs
126+
[:a [:b :c]] [:a [:c :d]] [#{[:b :c]} #{[:c :d]} #{:a}]
127+
[:a '(:b :c)] [:a '(:c :d)] [#{'(:b :c)} #{'(:c :d)} #{:a}]
128+
[:a #{:b :c}] [:a #{:c :d}] [#{#{:b :c}} #{#{:c :d}} #{:a}]
129+
[:a {:b 2 :c 3}] [:a {:c 3 :d 4}] [#{{:b 2 :c 3}} #{{:c 3 :d 4}} #{:a}])))
130+
131+
(testing "sequential types"
132+
(testing "vector types never equal to non-sequential type"
133+
(are [x] (= [[] x nil] (diff [] x))
134+
""
135+
"string"
136+
-1
137+
0
138+
1
139+
-1.0
140+
0.0
141+
1.0
142+
true
143+
false
144+
nil
145+
146+
:kw
147+
:ns/kw
148+
'sym
149+
'ns/sym
150+
151+
#inst "2020-06-04T22:32:29.871744"
152+
(datetime/date 2020 6 4)
153+
(datetime/time 22 35 38)
154+
3.1415926535M
155+
22/7
156+
#uuid "632ac3d8-fcfd-4d36-a05b-a54277a345bc"
157+
158+
{}
159+
{:a 1}
160+
#{}
161+
#{:a :b :c}))
162+
163+
(testing "list types never equal to non-sequential type"
164+
(are [x] (= ['() x nil] (diff '() x))
165+
""
166+
"string"
167+
-1
168+
0
169+
1
170+
-1.0
171+
0.0
172+
1.0
173+
true
174+
false
175+
nil
176+
177+
:kw
178+
:ns/kw
179+
'sym
180+
'ns/sym
181+
182+
#inst "2020-06-04T22:32:29.871744"
183+
(datetime/date 2020 6 4)
184+
(datetime/time 22 35 38)
185+
3.1415926535M
186+
22/7
187+
#uuid "632ac3d8-fcfd-4d36-a05b-a54277a345bc"
188+
189+
{}
190+
{:a 1}
191+
#{}
192+
#{:a :b :c}))
193+
194+
(testing "sequential types equal"
195+
(are [x y z] (= z (diff x y))
196+
[] [] [nil nil []]
197+
[1 2 3] [2 3 4] [[1 2 3] [2 3 4] nil]
198+
[1 2 3] [1 2 4] [[nil nil 3] [nil nil 4] [1 2]]
199+
[1 2 3] [1 2 3] [nil nil [1 2 3]]
200+
[1 2 3] [1 2 3 4] [nil [nil nil nil 4] [1 2 3]]
201+
[1 2 3] [5 9 3 2 3 7] [[1 2] [5 9 nil 2 3 7] [nil nil 3]]
202+
[{:a 1 :b 2} 2 3] [{:a 1 :b 3} 0 3] [[{:b 2} 2] [{:b 3} 0] [{:a 1} nil 3]]
203+
[1 #{:a :b :c} 3] [1 #{:b :c :d} 4 3] [[nil #{:a} 3] [nil #{:d} 4 3] [1 #{:b :c}]]
204+
205+
'() '() [nil nil []]
206+
'(1 2 3) '(2 3 4) [[1 2 3] [2 3 4] nil]
207+
'(1 2 3) '(1 2 4) [[nil nil 3] [nil nil 4] [1 2]]
208+
'(1 2 3) '(1 2 3) [nil nil [1 2 3]]
209+
'(1 2 3) '(1 2 3 4) [nil [nil nil nil 4] [1 2 3]]
210+
'(1 2 3) '(5 9 3 2 3 7) [[1 2] [5 9 nil 2 3 7] [nil nil 3]]
211+
'({:a 1 :b 2} 2 3) '({:a 1 :b 3} 0 3) [[{:b 2} 2] [{:b 3} 0] [{:a 1} nil 3]]
212+
'(1 #{:a :b :c} 3) '(1 #{:b :c :d} 4 3) [[nil #{:a} 3] [nil #{:d} 4 3] [1 #{:b :c}]])))
213+
214+
(testing "map types"
215+
(testing "map types never equal to non-map type"
216+
(are [x] (= [{} x nil] (diff {} x))
217+
""
218+
"string"
219+
-1
220+
0
221+
1
222+
-1.0
223+
0.0
224+
1.0
225+
true
226+
false
227+
nil
228+
229+
:kw
230+
:ns/kw
231+
'sym
232+
'ns/sym
233+
234+
#inst "2020-06-04T22:32:29.871744"
235+
(datetime/date 2020 6 4)
236+
(datetime/time 22 35 38)
237+
3.1415926535M
238+
22/7
239+
#uuid "632ac3d8-fcfd-4d36-a05b-a54277a345bc"
240+
241+
[]
242+
[:a :b :c]
243+
'()
244+
'(:a :b :c)
245+
#{}
246+
#{:a :b :c}))
247+
248+
(are [x y z] (= z (diff x y))
249+
{} {} [nil nil {}]
250+
{:a 1} {:a 1} [nil nil {:a 1}]
251+
{:a 1} {:a 2} [{:a 1} {:a 2} nil]
252+
{:a 1} {:b 2} [{:a 1} {:b 2} nil]
253+
{:a #{1 2 3}} {:a #{2 3 4}} [{:a #{1}} {:a #{4}} {:a #{3 2}}]
254+
{:a 1 :b {:c #{1 2 3}}} {:a 2 :b {:c #{2 3 4}}} [{:a 1 :b {:c #{1}}} {:a 2 :b {:c #{4}}} {:b {:c #{2 3}}}]
255+
{:a [1 2 3]} {:a [5 9 3 2 3 7]} [{:a [1 2]} {:a [5 9 nil 2 3 7]} {:a [nil nil 3]}]
256+
{:a [1 2 3]} {:a '(5 9 3 2 3 7)} [{:a [1 2]} {:a [5 9 nil 2 3 7]} {:a [nil nil 3]}]
257+
{:a 1 :b {:c '(1 2 3)}} {:a 2 :b {:c [5 9 3 2 3 7]}} [{:a 1 :b {:c [1 2]}} {:a 2 :b {:c [5 9 nil 2 3 7]}} {:b {:c [nil nil 3]}}]
258+
{:a 1 :b {:c 2 :d 3}} {:a 1 :b {:c 2 :e 5}} [{:b {:d 3}} {:b {:e 5}} {:a 1 :b {:c 2}}]
259+
{:a {:e :f :g :h} :b :c} {:a {:e :z :g :h} :c :d} [{:a {:e :f} :b :c} {:a {:e :z} :c :d} {:a {:g :h}}])))

0 commit comments

Comments
 (0)