Skip to content

Commit 26cf547

Browse files
author
Vanessa K Lee
committed
clean up module docs and tests;
move some code to Util
1 parent cb6fea9 commit 26cf547

File tree

6 files changed

+45
-40
lines changed

6 files changed

+45
-40
lines changed

lib/akin.ex

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
defmodule Akin do
22
@moduledoc """
3-
Compare two strings for similarity. Options accepted in a keyword list (i.e. [ngram_size: 3]).
3+
Akin
4+
=======
5+
6+
Functions for comparing two strings for similarity using a collection of string comparison algorithms for Elixir. Algorithms can be called independently or in total to return a map of metrics.
7+
8+
## Options
9+
10+
Options accepted in a keyword list (i.e. [ngram_size: 3]).
411
512
1. `algorithms`: algorithms to use in comparision. Accepts the name or a keyword list. Default is algorithms/0.
613
1. `metric` - algorithm metric. Default is both
@@ -20,22 +27,18 @@ defmodule Akin do
2027
1. `stem`: boolean representing whether to compare the stemmed version the strings; uses Stemmer. Default `false`
2128
"""
2229
import Akin.Util,
23-
only: [list_algorithms: 1, modulize: 1, compose: 1, opts: 2]
30+
only: [list_algorithms: 1, modulize: 1, compose: 1, opts: 2, r: 1, default_opts: 0]
2431

2532
alias Akin.Corpus
2633
alias Akin.Names
2734

28-
NimbleCSV.define(CSVParse, separator: "\t")
29-
30-
@opts [ngram_size: 2, level: "normal", short_length: 8, match_at: 0.9]
31-
3235
@spec compare(binary() | %Corpus{}, binary() | %Corpus{}, keyword()) :: float()
3336
@doc """
3437
Compare two strings. Return map of algorithm metrics.
3538
3639
Options accepted as a keyword list. If no options are given, default values will be used.
3740
"""
38-
def compare(left, right, opts \\ @opts)
41+
def compare(left, right, opts \\ default_opts())
3942

4043
def compare(left, right, opts) when is_binary(left) and is_binary(right) do
4144
if opts(opts, :stem) do
@@ -67,7 +70,7 @@ defmodule Akin do
6770
Compare a string against a list of strings. Matches are determined by algorithem metrics equal to or higher than the
6871
`match_at` option. Return a list of strings that are a likely match.
6972
"""
70-
def match_names(left, rights, opts \\ @opts)
73+
def match_names(left, rights, opts \\ default_opts())
7174

7275
def match_names(left, rights, opts) when is_binary(left) and is_list(rights) do
7376
rights = Enum.map(rights, fn right -> compose(right) end)
@@ -90,7 +93,7 @@ defmodule Akin do
9093
Compare a string against a list of strings. Matches are determined by algorithem metrics equal to or higher than the
9194
`match_at` option. Return a list of strings that are a likely match and their algorithm metrics.
9295
"""
93-
def match_names_metrics(left, rights, opts \\ @opts)
96+
def match_names_metrics(left, rights, opts \\ default_opts())
9497

9598
def match_names_metrics(left, rights, opts) when is_binary(left) and is_list(rights) do
9699
Enum.reduce(rights, [], fn right, acc ->
@@ -116,7 +119,7 @@ defmodule Akin do
116119
metrics equal to or higher than the `match_at` option. Return a list of strings that are a likely
117120
match and their algorithm metrics.
118121
"""
119-
def match_name_metrics(left, rights, opts \\ @opts)
122+
def match_name_metrics(left, rights, opts \\ default_opts())
120123

121124
def match_name_metrics(left, right, opts) when is_binary(left) and is_binary(right) do
122125
left = compose(left)
@@ -150,16 +153,4 @@ defmodule Akin do
150153
|> List.flatten()
151154
|> Enum.uniq()
152155
end
153-
154-
@doc """
155-
Return the default option values
156-
"""
157-
def default_opts, do: @opts
158-
159-
@doc """
160-
Round data types that can be rounded to 2 decimal points.
161-
"""
162-
def r(v) when is_float(v), do: Float.round(v, 2)
163-
def r(v) when is_binary(v), do: Float.round(String.to_float(v), 2)
164-
def r(v) when is_integer(v), do: Float.round(v / 1, 2)
165156
end

lib/akin/algorithms/jaccard.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ defmodule Akin.Jaccard do
1717
iex> Akin.Jaccard.compare(%Akin.Corpus{string: "contact"}, %Akin.Corpus{string: "context"}, [ngram_size: 1])
1818
0.5555555555555556
1919
"""
20-
def compare(left, right, opts \\ []) when is_list(opts) do
20+
def compare(left, right, opts \\ []) do
2121
perform(left, right, opts(opts, :ngram_size))
2222
end
2323

lib/akin/algorithms/names.ex

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ defmodule Akin.Names do
33
Function specific to the comparison and matching of names. Returns matching names and metrics.
44
"""
55
@behaviour Akin.Task
6-
import Akin.Util, only: [compose: 1, opts: 2, len: 1]
6+
import Akin.Util, only: [compose: 1, opts: 2, len: 1, r: 1]
77
alias Akin.Helpers.InitialsComparison
88
alias Akin.Corpus
99

@@ -35,7 +35,7 @@ defmodule Akin.Names do
3535

3636
score =
3737
calc(metrics, weight, short_length, len(right.string))
38-
|> Enum.map(fn {k, v} -> {k, Akin.r(v)} end)
38+
|> Enum.map(fn {k, v} -> {k, r(v)} end)
3939

4040
%{scores: score}
4141
end

lib/akin/algorithms/substring_double_metaphone.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ defmodule Akin.SubstringDoubleMetaphone do
2626
"""
2727
def compare(left, right, opts \\ [])
2828

29-
def compare(%Corpus{list: left}, %Corpus{list: right}, opts) when is_list(opts) do
29+
def compare(%Corpus{list: left}, %Corpus{list: right}, opts) do
3030
Double.substring_compare(left, right, opts) / 1.0
3131
end
3232
end

lib/akin/util.ex

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
defmodule Akin.Util do
22
@moduledoc """
3-
Module for utilities to handle string preparation, manipulation, and inspection.
3+
Utilities for string preparation, manipulation, comparison, and inspection.
44
"""
55
alias Akin.Corpus
66

7+
@opts [ngram_size: 2, level: "normal", short_length: 8, match_at: 0.9]
8+
79
@algorithms [
810
"bag_distance",
911
"substring_set",
@@ -36,6 +38,11 @@ defmodule Akin.Util do
3638
]
3739
@nontext_codepoints ~r/[\x{203C}\x{2049}\x{2122}\x{2139}\x{2194}-\x{2199}\x{21A9}-\x{21AA}\x{231A}-\x{231B}\x{2328}\x{23CF}\x{23E9}-\x{23F3}\x{23F8}-\x{23FA}\x{24C2}\x{25AA}-\x{25AB}\x{25B6}\x{25C0}\x{25FB}-\x{25FE}\x{2600}-\x{2604}\x{260E}\x{2611}\x{2614}-\x{2615}\x{2618}\x{261D}\x{2620}\x{2622}-\x{2623}\x{2626}\x{262A}\x{262E}-\x{262F}\x{2638}-\x{263A}\x{2640}\x{2642}\x{2648}-\x{2653}\x{2660}\x{2663}\x{2665}-\x{2666}\x{2668}\x{267B}\x{267E}-\x{267F}\x{2692}-\x{2697}\x{2699}\x{269B}-\x{269C}\x{26A0}-\x{26A1}\x{26AA}-\x{26AB}\x{26B0}-\x{26B1}\x{26BD}-\x{26BE}\x{26C4}-\x{26C5}\x{26C8}\x{26CE}\x{26CF}\x{26D1}\x{26D3}-\x{26D4}\x{26E9}-\x{26EA}\x{26F0}-\x{26F5}\x{26F7}-\x{26FA}\x{26FD}\x{2702}\x{2705}\x{2708}-\x{2709}\x{270A}-\x{270B}\x{270C}-\x{270D}\x{270F}\x{2712}\x{2714}\x{2716}\x{271D}\x{2721}\x{2728}\x{2733}-\x{2734}\x{2744}\x{2747}\x{274C}\x{274E}\x{2753}-\x{2755}\x{2757}\x{2763}-\x{2764}\x{2795}-\x{2797}\x{27A1}\x{27B0}\x{27BF}\x{2934}-\x{2935}\x{2B05}-\x{2B07}\x{2B1B}-\x{2B1C}\x{2B50}\x{2B55}\x{3030}\x{303D}\x{3297}\x{3299}\x{1F004}\x{1F0CF}\x{1F170}-\x{1F171}\x{1F17E}\x{1F17F}\x{1F18E}\x{1F191}-\x{1F19A}\x{1F1E6}-\x{1F1FF}\x{1F201}-\x{1F202}\x{1F21A}\x{1F22F}\x{1F232}-\x{1F23A}\x{1F250}-\x{1F251}\x{1F300}-\x{1F320}\x{1F321}\x{1F324}-\x{1F32C}\x{1F32D}-\x{1F32F}\x{1F330}-\x{1F335}\x{1F336}\x{1F337}-\x{1F37C}\x{1F37D}\x{1F37E}-\x{1F37F}\x{1F380}-\x{1F393}\x{1F396}-\x{1F397}\x{1F399}-\x{1F39B}\x{1F39E}-\x{1F39F}\x{1F3A0}-\x{1F3C4}\x{1F3C5}\x{1F3C6}-\x{1F3CA}\x{1F3CB}-\x{1F3CE}\x{1F3CF}-\x{1F3D3}\x{1F3D4}-\x{1F3DF}\x{1F3E0}-\x{1F3F0}\x{1F3F3}-\x{1F3F5}\x{1F3F7}\x{1F3F8}-\x{1F3FF}\x{1F400}-\x{1F43E}\x{1F43F}\x{1F440}\x{1F441}\x{1F442}-\x{1F4F7}\x{1F4F8}\x{1F4F9}-\x{1F4FC}\x{1F4FD}\x{1F4FF}\x{1F500}-\x{1F53D}\x{1F549}-\x{1F54A}\x{1F54B}-\x{1F54E}\x{1F550}-\x{1F567}\x{1F56F}-\x{1F570}\x{1F573}-\x{1F579}\x{1F57A}\x{1F587}\x{1F58A}-\x{1F58D}\x{1F590}\x{1F595}-\x{1F596}\x{1F5A4}\x{1F5A5}\x{1F5A8}\x{1F5B1}-\x{1F5B2}\x{1F5BC}\x{1F5C2}-\x{1F5C4}\x{1F5D1}-\x{1F5D3}\x{1F5DC}-\x{1F5DE}\x{1F5E1}\x{1F5E3}\x{1F5E8}\x{1F5EF}\x{1F5F3}\x{1F5FA}\x{1F5FB}-\x{1F5FF}\x{1F600}\x{1F601}-\x{1F610}\x{1F611}\x{1F612}-\x{1F614}\x{1F615}\x{1F616}\x{1F617}\x{1F618}\x{1F619}\x{1F61A}\x{1F61B}\x{1F61C}-\x{1F61E}\x{1F61F}\x{1F620}-\x{1F625}\x{1F626}-\x{1F627}\x{1F628}-\x{1F62B}\x{1F62C}\x{1F62D}\x{1F62E}-\x{1F62F}\x{1F630}-\x{1F633}\x{1F634}\x{1F635}-\x{1F640}\x{1F641}-\x{1F642}\x{1F643}-\x{1F644}\x{1F645}-\x{1F64F}\x{1F680}-\x{1F6C5}\x{1F6CB}-\x{1F6CF}\x{1F6D0}\x{1F6D1}-\x{1F6D2}\x{1F6E0}-\x{1F6E5}\x{1F6E9}\x{1F6EB}-\x{1F6EC}\x{1F6F0}\x{1F6F3}\x{1F6F4}-\x{1F6F6}\x{1F6F7}-\x{1F6F8}\x{1F6F9}\x{1F910}-\x{1F918}\x{1F919}-\x{1F91E}\x{1F91F}\x{1F920}-\x{1F927}\x{1F928}-\x{1F92F}\x{1F930}\x{1F931}-\x{1F932}\x{1F933}-\x{1F93A}\x{1F93C}-\x{1F93E}\x{1F940}-\x{1F945}\x{1F947}-\x{1F94B}\x{1F94C}\x{1F94D}-\x{1F94F}\x{1F950}-\x{1F95E}\x{1F95F}-\x{1F96B}\x{1F96C}-\x{1F970}\x{1F973}-\x{1F976}\x{1F97A}\x{1F97C}-\x{1F97F}\x{1F980}-\x{1F984}\x{1F985}-\x{1F991}\x{1F992}-\x{1F997}\x{1F998}-\x{1F9A2}\x{1F9B0}-\x{1F9B9}\x{1F9C0}\x{1F9C1}-\x{1F9C2}\x{1F9D0}-\x{1F9E6}\x{1F9E7}-\x{1F9FF}\x{23E9}-\x{23EC}\x{23F0}\x{23F3}\x{25FD}-\x{25FE}\x{267F}\x{2693}\x{26A1}\x{26D4}\x{26EA}\x{26F2}-\x{26F3}\x{26F5}\x{26FA}\x{1F201}\x{1F232}-\x{1F236}\x{1F238}-\x{1F23A}\x{1F3F4}\x{1F6CC}\x{1F3FB}-\x{1F3FF}\x{26F9}\x{1F385}\x{1F3C2}-\x{1F3C4}\x{1F3C7}\x{1F3CA}\x{1F3CB}-\x{1F3CC}\x{1F442}-\x{1F443}\x{1F446}-\x{1F450}\x{1F466}-\x{1F469}\x{1F46E}\x{1F470}-\x{1F478}\x{1F47C}\x{1F481}-\x{1F483}\x{1F485}-\x{1F487}\x{1F4AA}\x{1F574}-\x{1F575}\x{1F645}-\x{1F647}\x{1F64B}-\x{1F64F}\x{1F6A3}\x{1F6B4}-\x{1F6B6}\x{1F6C0}\x{1F918}\x{1F919}-\x{1F91C}\x{1F91E}\x{1F926}\x{1F933}-\x{1F939}\x{1F93D}-\x{1F93E}\x{1F9B5}-\x{1F9B6}\x{1F9D1}-\x{1F9DD}\x{200D}\x{20E3}\x{FE0F}\x{1F9B0}-\x{1F9B3}\x{E0020}-\x{E007F}\x{2388}\x{2600}-\x{2605}\x{2607}-\x{2612}\x{2616}-\x{2617}\x{2619}\x{261A}-\x{266F}\x{2670}-\x{2671}\x{2672}-\x{267D}\x{2680}-\x{2689}\x{268A}-\x{2691}\x{2692}-\x{269C}\x{269D}\x{269E}-\x{269F}\x{26A2}-\x{26B1}\x{26B2}\x{26B3}-\x{26BC}\x{26BD}-\x{26BF}\x{26C0}-\x{26C3}\x{26C4}-\x{26CD}\x{26CF}-\x{26E1}\x{26E2}\x{26E3}\x{26E4}-\x{26E7}\x{26E8}-\x{26FF}\x{2700}\x{2701}-\x{2704}\x{270C}-\x{2712}\x{2763}-\x{2767}\x{1F000}-\x{1F02B}\x{1F02C}-\x{1F02F}\x{1F030}-\x{1F093}\x{1F094}-\x{1F09F}\x{1F0A0}-\x{1F0AE}\x{1F0AF}-\x{1F0B0}\x{1F0B1}-\x{1F0BE}\x{1F0BF}\x{1F0C0}\x{1F0C1}-\x{1F0CF}\x{1F0D0}\x{1F0D1}-\x{1F0DF}\x{1F0E0}-\x{1F0F5}\x{1F0F6}-\x{1F0FF}\x{1F10D}-\x{1F10F}\x{1F12F}\x{1F16C}-\x{1F16F}\x{1F1AD}-\x{1F1E5}\x{1F203}-\x{1F20F}\x{1F23C}-\x{1F23F}\x{1F249}-\x{1F24F}\x{1F252}-\x{1F25F}\x{1F260}-\x{1F265}\x{1F266}-\x{1F2FF}\x{1F321}-\x{1F32C}\x{1F394}-\x{1F39F}\x{1F3F1}-\x{1F3F7}\x{1F3F8}-\x{1F3FA}\x{1F4FD}-\x{1F4FE}\x{1F53E}-\x{1F53F}\x{1F540}-\x{1F543}\x{1F544}-\x{1F54A}\x{1F54B}-\x{1F54F}\x{1F568}-\x{1F579}\x{1F57B}-\x{1F5A3}\x{1F5A5}-\x{1F5FA}\x{1F6C6}-\x{1F6CF}\x{1F6D3}-\x{1F6D4}\x{1F6D5}-\x{1F6DF}\x{1F6E0}-\x{1F6EC}\x{1F6ED}-\x{1F6EF}\x{1F6F0}-\x{1F6F3}\x{1F6F9}-\x{1F6FF}\x{1F774}-\x{1F77F}\x{1F7D5}-\x{1F7FF}\x{1F80C}-\x{1F80F}\x{1F848}-\x{1F84F}\x{1F85A}-\x{1F85F}\x{1F888}-\x{1F88F}\x{1F8AE}-\x{1F8FF}\x{1F900}-\x{1F90B}\x{1F90C}-\x{1F90F}\x{1F93F}\x{1F96C}-\x{1F97F}\x{1F998}-\x{1F9BF}\x{1F9C1}-\x{1F9CF}\x{1F9E7}-\x{1FFFD}]/u
3840

41+
@doc """
42+
Return the default option values
43+
"""
44+
def default_opts, do: @opts
45+
3946
@spec compose(binary(), binary()) :: list()
4047
@spec compose(binary()) :: struct() | nil
4148
@doc """
@@ -218,6 +225,8 @@ defmodule Akin.Util do
218225
list_algorithms(metric, unit, algorithms)
219226
end
220227

228+
def list_algorithms(_), do: @algorithms
229+
221230
def list_algorithms(nil, nil, algorithms) do
222231
Enum.map(algorithms, fn {name, _, _} -> name end) |> Enum.sort()
223232
end
@@ -291,9 +300,7 @@ defmodule Akin.Util do
291300
# list_algorithms("phonetic", "whole", []) ++ list_algorithms("phonetic", "partial", [])
292301
# end
293302

294-
295-
296-
@spec ngram_tokenize(binary(), integer()) :: list()
303+
@spec ngram_tokenize(any, any) :: list
297304
@doc """
298305
Tokenizes the input into N-grams (http://en.wikipedia.org/wiki/N-gram).
299306
"""
@@ -321,10 +328,10 @@ defmodule Akin.Util do
321328
options list.
322329
"""
323330
def opts(opts, key) when is_list(opts) and is_atom(key) do
324-
Keyword.get(opts, key) || Keyword.get(Akin.default_opts(), key)
331+
Keyword.get(opts, key) || Keyword.get(default_opts(), key)
325332
end
326333

327-
def opts(_, key) when is_atom(key), do: Keyword.get(Akin.default_opts(), key)
334+
def opts(_, key) when is_atom(key), do: Keyword.get(default_opts(), key)
328335

329336
def opts(_, _), do: nil
330337

@@ -371,4 +378,11 @@ defmodule Akin.Util do
371378
new_right = String.replace(right, char, " ")
372379
{left, new_right}
373380
end
381+
382+
@doc """
383+
Round data types that can be rounded to 2 decimal points.
384+
"""
385+
def r(v) when is_float(v), do: Float.round(v, 2)
386+
def r(v) when is_binary(v), do: Float.round(String.to_float(v), 2)
387+
def r(v) when is_integer(v), do: Float.round(v / 1, 2)
374388
end

test/lib/util_test.exs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,11 @@ defmodule UtilTest do
122122

123123
describe "The ngram_size is returned correctly" do
124124
setup do
125-
%{default: Keyword.get(Akin.default_opts(), :ngram_size)}
125+
%{default: Keyword.get(default_opts(), :ngram_size)}
126126
end
127127

128128
test "The n_gram size is correct when option list contains an ngram_size value", cxt do
129-
assert opts(Akin.default_opts(), :ngram_size) == cxt.default
129+
assert opts(default_opts(), :ngram_size) == cxt.default
130130
assert opts([ngram_size: 3], :ngram_size) == 3
131131
end
132132

@@ -139,12 +139,12 @@ defmodule UtilTest do
139139

140140
describe "The short_length is returned correctly" do
141141
setup do
142-
%{default: Keyword.get(Akin.default_opts(), :short_length)}
142+
%{default: Keyword.get(default_opts(), :short_length)}
143143
end
144144

145145
test "The short_length size is correct when option list contains an short_length value",
146146
cxt do
147-
assert opts(Akin.default_opts(), :short_length) == cxt.default
147+
assert opts(default_opts(), :short_length) == cxt.default
148148
assert opts([short_length: 10], :short_length) == 10
149149
end
150150

@@ -158,11 +158,11 @@ defmodule UtilTest do
158158

159159
describe "The level is returned correctly" do
160160
setup do
161-
%{default: Keyword.get(Akin.default_opts(), :level)}
161+
%{default: Keyword.get(default_opts(), :level)}
162162
end
163163

164164
test "The level size is correct when option list contains an level value", cxt do
165-
assert opts(Akin.default_opts(), :level) == cxt.default
165+
assert opts(default_opts(), :level) == cxt.default
166166
assert opts([level: 10], :level) == 10
167167
end
168168

@@ -175,11 +175,11 @@ defmodule UtilTest do
175175

176176
describe "The match_at is returned correctly" do
177177
setup do
178-
%{default: Keyword.get(Akin.default_opts(), :match_at)}
178+
%{default: Keyword.get(default_opts(), :match_at)}
179179
end
180180

181181
test "The match_at size is correct when option list contains an match_at value", cxt do
182-
assert opts(Akin.default_opts(), :match_at) == cxt.default
182+
assert opts(default_opts(), :match_at) == cxt.default
183183
assert opts([match_at: 10], :match_at) == 10
184184
end
185185

0 commit comments

Comments
 (0)