@@ -12,9 +12,16 @@ representing vertex operations:
1212
1313
1414### Optional Arguments
15- - `insert_cost::Function=v->1.0`
16- - `delete_cost::Function=u->1.0`
17- - `subst_cost::Function=(u,v)->0.5`
15+ - `vertex_insert_cost::Function=v->0.`
16+ - `vertex_delete_cost::Function=u->0.`
17+ - `vertex_subst_cost::Function=(u, v)->0.`
18+ - `edge_insert_cost::Function=e->1.`
19+ - `edge_delete_cost::Function=e->1.`
20+ - `edge_subst_cost::Function=(e1, e2)->0.`
21+
22+ The algorithm will always try to match two edges if it can, so if it is
23+ preferrable to delete two edges rather than match these, it should be
24+ reflected in the `edge_subst_cost` function.
1825
1926By default, the algorithm uses constant operation costs. The
2027user can provide classical Minkowski costs computed from vertex
@@ -31,7 +38,7 @@ search in case the default heuristic is not satisfactory.
3138- Given two graphs ``|G₁| < |G₂|``, `edit_distance(G₁, G₂)` is faster to
3239compute than `edit_distance(G₂, G₁)`. Consider swapping the arguments
3340if involved costs are equivalent.
34- - The use of simple Minkowski costs can improve performance considerably.
41+ - The use of a heuristic can improve performance considerably.
3542- Exploit vertex attributes when designing operation costs.
3643
3744### References
@@ -49,51 +56,163 @@ julia> g1 = SimpleDiGraph([0 1 0 0 0; 0 0 1 0 0; 1 0 0 1 0; 0 0 0 0 1; 0 0 0 1 0
4956julia> g2 = SimpleDiGraph([0 1 0; 0 0 1; 1 0 0]);
5057
5158julia> edit_distance(g1, g2)
52- (3.5 , Tuple[(1, 2 ), (2, 1), (3, 0 ), (4, 3 ), (5, 0)])
59+ (3.0 , Tuple[(1, 3 ), (2, 1), (3, 2 ), (4, 0 ), (5, 0)])
5360```
5461"""
5562function edit_distance (
5663 G₁:: AbstractGraph ,
5764 G₂:: AbstractGraph ;
58- insert_cost:: Function = v -> 1.0 ,
59- delete_cost:: Function = u -> 1.0 ,
60- subst_cost:: Function = (u, v) -> 0.5 ,
61- heuristic:: Function = DefaultEditHeuristic,
65+ vertex_insert_cost= nothing ,
66+ vertex_delete_cost= nothing ,
67+ vertex_subst_cost= nothing ,
68+ edge_insert_cost= nothing ,
69+ edge_delete_cost= nothing ,
70+ edge_subst_cost= nothing ,
71+ heuristic= nothing ,
6272)
73+ if isnothing (vertex_insert_cost) &&
74+ isnothing (vertex_delete_cost) &&
75+ isnothing (vertex_subst_cost) &&
76+ isnothing (edge_insert_cost) &&
77+ isnothing (edge_delete_cost) &&
78+ isnothing (edge_subst_cost) &&
79+ isnothing (heuristic)
80+ heuristic = default_edit_heuristic
81+ end
82+ vertex_insert_cost = something (vertex_insert_cost, v -> 0.0 )
83+ vertex_delete_cost = something (vertex_delete_cost, v -> 0.0 )
84+ vertex_subst_cost = something (vertex_subst_cost, (u, v) -> 0.0 )
85+ edge_insert_cost = something (edge_insert_cost, e -> 1.0 )
86+ edge_delete_cost = something (edge_delete_cost, e -> 1.0 )
87+ edge_subst_cost = something (edge_subst_cost, (e1, e2) -> 0.0 )
88+ heuristic = something (heuristic, (λ, G₁, G₂) -> 0.0 )
89+ return _edit_distance (
90+ G₁:: AbstractGraph ,
91+ G₂:: AbstractGraph ,
92+ vertex_insert_cost,
93+ vertex_delete_cost,
94+ vertex_subst_cost,
95+ edge_insert_cost,
96+ edge_delete_cost,
97+ edge_subst_cost,
98+ heuristic,
99+ )
100+ end
101+
102+ function _edit_distance (
103+ G₁:: AbstractGraph{T} ,
104+ G₂:: AbstractGraph{U} ,
105+ vertex_insert_cost:: Function ,
106+ vertex_delete_cost:: Function ,
107+ vertex_subst_cost:: Function ,
108+ edge_insert_cost:: Function ,
109+ edge_delete_cost:: Function ,
110+ edge_subst_cost:: Function ,
111+ heuristic:: Function ,
112+ ) where {T<: Integer ,U<: Integer }
113+ isdirected = is_directed (G₁) || is_directed (G₂)
114+
115+ # compute the cost on edges due to associate u1 to v1 and u2 to v2
116+ # u2 and v2 can eventually be 0
117+ function association_cost (u1, u2, v1, v2)
118+ cost = 0.0
119+ if has_edge (G₁, u1, u2)
120+ if has_edge (G₂, v1, v2)
121+ cost += edge_subst_cost (Edge (u1, u2), Edge (v1, v2))
122+ else
123+ cost += edge_delete_cost (Edge (u1, u2))
124+ end
125+ else
126+ if has_edge (G₂, v1, v2)
127+ cost += edge_insert_cost (Edge (v1, v2))
128+ end
129+ end
130+ if isdirected && u1 != u2
131+ if has_edge (G₁, u2, u1)
132+ if has_edge (G₂, v2, v1)
133+ cost += edge_subst_cost (Edge (u2, u1), Edge (v2, v1))
134+ else
135+ cost += edge_delete_cost (Edge (u2, u1))
136+ end
137+ else
138+ if has_edge (G₂, v2, v1)
139+ cost += edge_insert_cost (Edge (v2, v1))
140+ end
141+ end
142+ end
143+ return cost
144+ end
63145
64146 # A* search heuristic
65147 h (λ) = heuristic (λ, G₁, G₂)
66148
67149 # initialize open set
68150 OPEN = PriorityQueue {Vector{Tuple},Float64} ()
69- for v in 1 : nv (G₂)
70- enqueue! (OPEN, [(1 , v)], subst_cost (1 , v) + h ([(1 , v)]))
151+ for v in vertices (G₂)
152+ enqueue! (OPEN, [(T ( 1 ) , v)], vertex_subst_cost (1 , v) + h ([(T ( 1 ) , v)]))
71153 end
72- enqueue! (OPEN, [(1 , 0 ) ], delete_cost (1 ) + h ([(1 , 0 )]))
154+ enqueue! (OPEN, [(T ( 1 ), U ( 0 )) ], vertex_delete_cost (1 ) + h ([(T ( 1 ), U ( 0 ) )]))
73155
156+ c = 0
74157 while true
75158 # minimum (partial) edit path
76159 λ, cost = peek (OPEN)
160+ c += 1
77161 dequeue! (OPEN)
78162
79163 if is_complete_path (λ, G₁, G₂)
80164 return cost, λ
81165 else
82- k, _ = λ[end ]
83- vs = setdiff (1 : nv (G₂), [v for (u, v) in λ])
166+ u1, _ = λ[end ]
167+ u1 += T (1 )
168+ vs = setdiff (vertices (G₂), [v for (u, v) in λ])
84169
85- if k < nv (G₁) # there are still vertices to process in G₁?
86- for v in vs
87- λ⁺ = [λ; (k + 1 , v)]
88- enqueue! (OPEN, λ⁺, cost + subst_cost (k + 1 , v) + h (λ⁺) - h (λ))
170+ if u1 <= nv (G₁) # there are still vertices to process in G₁?
171+ # we try every possible assignment of v1
172+ for v1 in vs
173+ λ⁺ = [λ; (u1, v1)]
174+ new_cost = cost + vertex_subst_cost (u1, v1) + h (λ⁺) - h (λ)
175+ for (u2, v2) in λ
176+ new_cost += association_cost (u1, u2, v1, v2)
177+ end
178+ new_cost += association_cost (u1, u1, v1, v1) # handle self-loops
179+
180+ enqueue! (OPEN, λ⁺, new_cost)
181+ end
182+ # we try deleting v1
183+ λ⁺ = [λ; (u1, U (0 ))]
184+ new_cost = cost + vertex_delete_cost (u1) + h (λ⁺) - h (λ)
185+ for u2 in outneighbors (G₁, u1)
186+ # edges deleted later when assigning v2
187+ u2 > u1 && continue
188+ new_cost += edge_delete_cost (Edge (u1, u2))
89189 end
90- λ⁺ = [λ; (k + 1 , 0 )]
91- enqueue! (OPEN, λ⁺, cost + delete_cost (k + 1 ) + h (λ⁺) - h (λ))
190+ if isdirected
191+ for u2 in inneighbors (G₁, u1)
192+ # edges deleted later when assigning v2, and we should not count a self loop twice
193+ u2 >= u1 && continue
194+ new_cost += edge_delete_cost (Edge (u2, u1))
195+ end
196+ end
197+ enqueue! (OPEN, λ⁺, new_cost)
92198 else
93- # add remaining vertices of G₂ to the path
94- λ⁺ = [λ; [(0 , v) for v in vs]]
95- total_insert_cost = sum (insert_cost, vs)
96- enqueue! (OPEN, λ⁺, cost + total_insert_cost + h (λ⁺) - h (λ))
199+ # add remaining vertices of G₂ to the path by deleting them
200+ λ⁺ = [λ; [(T (0 ), v) for v in vs]]
201+ new_cost = cost + sum (vertex_insert_cost, vs)
202+ for v1 in vs
203+ for v2 in outneighbors (G₂, v1)
204+ (v2 > v1 && v2 in vs) && continue # these edges will be deleted later
205+ new_cost += edge_insert_cost (Edge (v1, v2))
206+ end
207+ if isdirected
208+ for v2 in inneighbors (G₂, v1)
209+ (v2 > v1 && v2 in vs) && continue # these edges will be deleted later
210+ v1 == v2 && continue # we should not count a self loop twice
211+ new_cost += edge_insert_cost (Edge (v2, v1))
212+ end
213+ end
214+ end
215+ enqueue! (OPEN, λ⁺, new_cost + h (λ⁺) - h (λ))
97216 end
98217 end
99218 end
@@ -112,11 +231,40 @@ function is_complete_path(λ, G₁, G₂)
112231 return length (us) == nv (G₁) && length (vs) == nv (G₂)
113232end
114233
115- function DefaultEditHeuristic (λ, G₁:: AbstractGraph , G₂:: AbstractGraph )
116- vs = Set ([v for (u, v) in λ])
117- delete! (vs, 0 )
234+ # edit_distance(G₁::AbstractGraph, G₂::AbstractGraph) =
235+ # edit_distance(G₁, G₂,
236+ # vertex_insert_cost=v -> 0.,
237+ # vertex_delete_cost=u -> 0.,
238+ # vertex_subst_cost=(u, v) -> 0.,
239+ # edge_insert_cost=e -> 1.,
240+ # edge_delete_cost=e -> 1.,
241+ # edge_subst_cost=(e1, e2) -> 0.,
242+ # heuristic=default_edit_heuristic)
118243
119- return nv (G₂) - length (vs)
244+ """
245+ compute an upper bound on the number of edges that can still be affected
246+ """
247+ function default_edit_heuristic (λ, G₁:: AbstractGraph , G₂:: AbstractGraph )
248+ us = setdiff (1 : nv (G₁), [u for (u, v) in λ])
249+ vs = setdiff (1 : nv (G₂), [v for (u, v) in λ])
250+ total_free_edges_g1 = 0
251+ total_free_edges_g2 = 0
252+ if ! isempty (us)
253+ total_free_edges_g1 = sum (u -> outdegree (G₁, u), us)
254+ end
255+ if ! isempty (vs)
256+ total_free_edges_g2 = sum (v -> outdegree (G₂, v), vs)
257+ end
258+ for (u1, v1) in λ
259+ (u1 == 0 || v1 == 0 ) && continue
260+ total_free_edges_g1 += count (u2 -> u2 in us, outneighbors (G₁, u1))
261+ total_free_edges_g2 += count (v2 -> v2 in vs, outneighbors (G₂, v1))
262+ end
263+ if ! is_directed (G₁) && ! is_directed (G₂)
264+ total_free_edges_g1 = total_free_edges_g1 / 2
265+ total_free_edges_g2 = total_free_edges_g2 / 2
266+ end
267+ return abs (total_free_edges_g1 - total_free_edges_g2)
120268end
121269
122270# -------------------------
0 commit comments