Skip to content

Commit d1fd5a3

Browse files
committed
Fix HashDict so that key comparison uses exact match
This change means that HashDict buckets are nolonger strictly ordered. Previously keys which compared equal (==) but not exactly equal (===) would be considered different for large sizes but equivalent for small sizes. This ambiguity occured because hashes are only equal when terms are exactly equal (===), so both now use exact (===) key comparisons. Small HashDicts are still a single ordered list and large HashDicts contain ordered buckets. Both of these may contain keys that compare equal (==) but are not exactly equal (===), should these keys be present in the same list or bucket the ordering of these keys (relative to each other) is undefined. See github issue #1171 for examples. Note: If assumptions were made about the form of a HashDict they may nolonger be valid. Particularly that HashDicts can nolonger be reliably pattern matched on for equality.
1 parent a3aa1e8 commit d1fd5a3

File tree

4 files changed

+93
-51
lines changed

4 files changed

+93
-51
lines changed

lib/elixir/lib/hash_dict.ex

Lines changed: 67 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -209,24 +209,14 @@ defmodule HashDict do
209209
ordered()
210210
end
211211

212-
def equal?(ordered(bucket: a, size: size), ordered(bucket: b, size: ^size)) do
213-
a == b
214-
end
215-
216-
def equal?(trie(size: size) = a, trie(size: ^size) = b) do
217-
a == b
218-
end
219-
220-
def equal?(ordered() = a, trie() = b) do
221-
equal?(b, a)
222-
end
223-
224-
def equal?(trie(size: size) = a, ordered(bucket: b, size: ^size)) do
225-
:lists.keysort(1, to_list(a)) == :lists.keysort(1, b)
226-
end
227-
228-
def equal?(_, _) do
229-
false
212+
def equal?(dict1, dict2) do
213+
size = elem(dict1, 1)
214+
case elem(dict2, 1) do
215+
^size ->
216+
dict_equal?(dict1, dict1)
217+
_ ->
218+
false
219+
end
230220
end
231221

232222
@doc """
@@ -237,7 +227,7 @@ defmodule HashDict do
237227
end
238228

239229
def to_list(dict) do
240-
dict_fold(dict, [], [&1|&2]) |> :lists.reverse
230+
dict_fold(dict, [], [&1|&2])
241231
end
242232

243233
@doc """
@@ -335,11 +325,11 @@ defmodule HashDict do
335325
## Dict-wide functions
336326

337327
defp dict_get(ordered(bucket: bucket), key) do
338-
:lists.keyfind(key, 1, bucket)
328+
bucket_get(bucket, key)
339329
end
340330

341331
defp dict_get(trie(root: root, depth: depth), key) do
342-
:lists.keyfind(key, 1, node_bucket(root, depth, bucket_hash(key)))
332+
bucket_get(node_bucket(root, depth, bucket_hash(key)), key)
343333
end
344334

345335
defp dict_fold(ordered(bucket: bucket), acc, fun) do
@@ -402,28 +392,64 @@ defmodule HashDict do
402392
end
403393
end
404394

395+
defp dict_equal?(dict1, dict2) do
396+
fun = fn({ key, value }, acc) ->
397+
case fetch(dict2, key) do
398+
{ _ok, ^value } ->
399+
acc
400+
{ _ok, _other} ->
401+
throw(:error)
402+
:error ->
403+
throw(:error)
404+
end
405+
end
406+
try do
407+
reduce(dict1, true, fun)
408+
catch
409+
:error ->
410+
false
411+
end
412+
end
413+
405414
## Bucket helpers
406415

407-
# Puts a value in the bucket
408-
defp bucket_put([{k,_}=e|bucket], key, { :put, value }) when key < k do
409-
{ [{key,value},e|bucket], 1 }
416+
# Get value from the bucket
417+
defp bucket_get([{k,_}|_bucket], key) when k > key do
418+
:false
410419
end
411420

412-
defp bucket_put([{k,_}=e|bucket], key, { :update, initial, _fun }) when key < k do
413-
{ [{key,initial},e|bucket], 1 }
421+
defp bucket_get([{key,_}=e|_bucket], key) do
422+
e
414423
end
415424

416-
defp bucket_put([{k,_}=e|bucket], key, value) when key > k do
417-
{ rest, count } = bucket_put(bucket, key, value)
418-
{ [e|rest], count }
425+
defp bucket_get([_e|bucket], key) do
426+
bucket_get(bucket, key)
419427
end
420428

421-
defp bucket_put([{_,_}|bucket], key, { :put, value }) do
429+
defp bucket_get([], _key) do
430+
:false
431+
end
432+
433+
# Puts a value in the bucket
434+
defp bucket_put([{k,_}|_]=bucket, key, { :put, value }) when k > key do
435+
{ [{key, value}|bucket], 1 }
436+
end
437+
438+
defp bucket_put([{k,_}|_]=bucket, key, { :update, initial, _fun }) when k > key do
439+
{ [{key, initial}|bucket], 1 }
440+
end
441+
442+
defp bucket_put([{key,_}|bucket], key, { :put, value }) do
422443
{ [{key,value}|bucket], 0 }
423444
end
424445

425-
defp bucket_put([{_,value}|bucket], key, { :update, _initial, fun }) do
426-
{ [{key,fun.(value)}|bucket], 0 }
446+
defp bucket_put([{key,value}|bucket], key, { :update, _initial, fun }) do
447+
{ [{key, fun.(value)}|bucket], 0 }
448+
end
449+
450+
defp bucket_put([e|bucket], key, value) do
451+
{ rest, count } = bucket_put(bucket, key, value)
452+
{ [e|rest], count }
427453
end
428454

429455
defp bucket_put([], key, { :put, value }) do
@@ -436,23 +462,23 @@ defmodule HashDict do
436462

437463
# Puts a value in the bucket without returning
438464
# the operation value
439-
defp bucket_put!([{k,_}=e|bucket], key, value) when key < k, do: [{key,value},e|bucket]
440-
defp bucket_put!([{k,_}=e|bucket], key, value) when key > k, do: [e|bucket_put!(bucket, key, value)]
441-
defp bucket_put!([{_,_}|bucket], key, value), do: [{key,value}|bucket]
465+
defp bucket_put!([{k,_}|_]=bucket, key, value) when k > key, do: [{key,value}|bucket]
466+
defp bucket_put!([{key,_}|bucket], key, value), do: [{key,value}|bucket]
467+
defp bucket_put!([{_,_}=e|bucket], key, value), do: [e|bucket_put!(bucket, key, value)]
442468
defp bucket_put!([], key, value), do: [{key,value}]
443469

444470
# Deletes a key from the bucket
445-
defp bucket_delete([{k,_}|_] = bucket, key) when key < k do
471+
defp bucket_delete([{k,_}|_]=bucket, key) when k > key do
446472
{ bucket, nil, 0 }
447473
end
448474

449-
defp bucket_delete([{k,_}=e|bucket], key) when key > k do
450-
{ rest, value, count } = bucket_delete(bucket, key)
451-
{ [e|rest], value, count }
475+
defp bucket_delete([{key,value}|bucket], key) do
476+
{ bucket, value, -1 }
452477
end
453478

454-
defp bucket_delete([{_,value}|bucket], _key) do
455-
{ bucket, value, -1 }
479+
defp bucket_delete([e|bucket], key) do
480+
{ rest, value, count } = bucket_delete(bucket, key)
481+
{ [e|rest], value, count }
456482
end
457483

458484
defp bucket_delete([], _key) do

lib/elixir/test/elixir/hash_dict_test.exs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ defmodule HashDictTest do
6262
dict = HashDict.put_new(dict, 11, 13)
6363
assert HashDict.get(dict, 11) == 13
6464
assert HashDict.size(dict) == 9
65+
66+
dict = HashDict.put_new(dict, 11.0, 15)
67+
assert HashDict.get(dict, 11.0) == 15
68+
assert HashDict.get(dict, 11) == 13
69+
assert HashDict.size(dict) == 10
6570
end
6671

6772
test :update do
@@ -82,6 +87,17 @@ defmodule HashDictTest do
8287
dict = HashDict.update(dict, 11, 13, &1 * 2)
8388
assert HashDict.get(dict, 11) == 13
8489
assert HashDict.size(dict) == 9
90+
91+
assert_raise KeyError, fn->
92+
HashDict.update(dict, 11.0, &1 * 2)
93+
end
94+
dict = HashDict.update(dict, 11.0, 15, &1 * 2)
95+
assert HashDict.get(dict, 11.0) == 15
96+
assert HashDict.size(dict) == 10
97+
dict = HashDict.update(dict, 11.0, 15, &1 * 2)
98+
assert HashDict.get(dict, 11.0) == 30
99+
assert HashDict.get(dict, 11) == 13
100+
assert HashDict.size(dict) == 10
85101
end
86102

87103
test :to_list do
@@ -95,13 +111,13 @@ defmodule HashDictTest do
95111
list = dict |> HashDict.to_list
96112
assert length(list) == 20
97113
assert { 1, 1 } in list
98-
assert list == Enum.to_list(dict)
114+
assert :lists.keysort(1, list) == :lists.keysort(1, Enum.to_list(dict))
99115

100116
dict = filled_dict(120)
101117
list = dict |> HashDict.to_list
102118
assert length(list) == 120
103119
assert { 1, 1 } in list
104-
assert list == Enum.to_list(dict)
120+
assert :lists.keysort(1, list) == :lists.keysort(1, Enum.to_list(dict))
105121
end
106122

107123
test :keys do

lib/elixir/test/elixir/keyword_test.exs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ defmodule KeywordTest do
2525
list = [{:b,2},{:a,1},{:c,3}]
2626
dict = HashDict.new list
2727
assert Keyword.from_enum(list) == [b: 2, a: 1, c: 3]
28-
assert Keyword.from_enum(dict) == [a: 1, b: 2, c: 3]
28+
assert Keyword.equal?(Keyword.from_enum(dict), [a: 1, b: 2, c: 3])
2929
end
3030

3131
test :keyword? do

lib/elixir/test/elixir/uri_test.exs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,16 @@ defmodule URITest do
2323
end
2424

2525
test :decode_query do
26-
assert URI.decode_query("q=search%20query&cookie=ab%26cd&block%20buster=") ==
27-
HashDict.new [{"block buster", ""}, {"cookie", "ab&cd"}, {"q", "search query"}]
28-
assert URI.decode_query("") == HashDict.new
29-
assert URI.decode_query("something=weird%3Dhappening") == HashDict.new [{"something", "weird=happening"}]
26+
assert HashDict.equal?(URI.decode_query("q=search%20query&cookie=ab%26cd&block%20buster="),
27+
HashDict.new [{"block buster", ""}, {"cookie", "ab&cd"}, {"q", "search query"}])
28+
assert HashDict.equal?(URI.decode_query(""), HashDict.new)
29+
assert HashDict.equal?(URI.decode_query("something=weird%3Dhappening"), HashDict.new [{"something", "weird=happening"}])
3030

3131
assert URI.decode_query("", []) == []
3232

33-
assert URI.decode_query("garbage") == HashDict.new [{"garbage", nil}]
34-
assert URI.decode_query("=value") == HashDict.new [{"", "value"}]
35-
assert URI.decode_query("something=weird=happening") == HashDict.new [{"something", "weird=happening"}]
33+
assert HashDict.equal?(URI.decode_query("garbage"), HashDict.new [{"garbage", nil}])
34+
assert HashDict.equal?(URI.decode_query("=value"), HashDict.new [{"", "value"}])
35+
assert HashDict.equal?(URI.decode_query("something=weird=happening"), HashDict.new [{"something", "weird=happening"}])
3636
end
3737

3838
test :decoder do

0 commit comments

Comments
 (0)