@@ -12,9 +12,16 @@ representing vertex operations:
12
12
13
13
14
14
### Optional Arguments
15
- - `insert_cost::Function=v->1.0`
16
- - `delete_cost::Function=u->1.0`
17
- - `subst_cost::Function=(u,v)->0.5`
15
+ - `vertex_insert_cost::Function=v->0.`
16
+ - `vertex_delete_cost::Function=u->0.`
17
+ - `vertex_subst_cost::Function=(u, v)->0.`
18
+ - `edge_insert_cost::Function=e->1.`
19
+ - `edge_delete_cost::Function=e->1.`
20
+ - `edge_subst_cost::Function=(e1, e2)->0.`
21
+
22
+ The algorithm will always try to match two edges if it can, so if it is
23
+ preferrable to delete two edges rather than match these, it should be
24
+ reflected in the `edge_subst_cost` function.
18
25
19
26
By default, the algorithm uses constant operation costs. The
20
27
user can provide classical Minkowski costs computed from vertex
@@ -31,7 +38,7 @@ search in case the default heuristic is not satisfactory.
31
38
- Given two graphs ``|G₁| < |G₂|``, `edit_distance(G₁, G₂)` is faster to
32
39
compute than `edit_distance(G₂, G₁)`. Consider swapping the arguments
33
40
if involved costs are equivalent.
34
- - The use of simple Minkowski costs can improve performance considerably.
41
+ - The use of a heuristic can improve performance considerably.
35
42
- Exploit vertex attributes when designing operation costs.
36
43
37
44
### References
@@ -49,51 +56,163 @@ julia> g1 = SimpleDiGraph([0 1 0 0 0; 0 0 1 0 0; 1 0 0 1 0; 0 0 0 0 1; 0 0 0 1 0
49
56
julia> g2 = SimpleDiGraph([0 1 0; 0 0 1; 1 0 0]);
50
57
51
58
julia> edit_distance(g1, g2)
52
- (3.5 , Tuple[(1, 2 ), (2, 1), (3, 0 ), (4, 3 ), (5, 0)])
59
+ (3.0 , Tuple[(1, 3 ), (2, 1), (3, 2 ), (4, 0 ), (5, 0)])
53
60
```
54
61
"""
55
62
function edit_distance (
56
63
G₁:: AbstractGraph ,
57
64
G₂:: AbstractGraph ;
58
- insert_cost:: Function = v -> 1.0 ,
59
- delete_cost:: Function = u -> 1.0 ,
60
- subst_cost:: Function = (u, v) -> 0.5 ,
61
- heuristic:: Function = DefaultEditHeuristic,
65
+ vertex_insert_cost= nothing ,
66
+ vertex_delete_cost= nothing ,
67
+ vertex_subst_cost= nothing ,
68
+ edge_insert_cost= nothing ,
69
+ edge_delete_cost= nothing ,
70
+ edge_subst_cost= nothing ,
71
+ heuristic= nothing ,
62
72
)
73
+ if isnothing (vertex_insert_cost) &&
74
+ isnothing (vertex_delete_cost) &&
75
+ isnothing (vertex_subst_cost) &&
76
+ isnothing (edge_insert_cost) &&
77
+ isnothing (edge_delete_cost) &&
78
+ isnothing (edge_subst_cost) &&
79
+ isnothing (heuristic)
80
+ heuristic = default_edit_heuristic
81
+ end
82
+ vertex_insert_cost = something (vertex_insert_cost, v -> 0.0 )
83
+ vertex_delete_cost = something (vertex_delete_cost, v -> 0.0 )
84
+ vertex_subst_cost = something (vertex_subst_cost, (u, v) -> 0.0 )
85
+ edge_insert_cost = something (edge_insert_cost, e -> 1.0 )
86
+ edge_delete_cost = something (edge_delete_cost, e -> 1.0 )
87
+ edge_subst_cost = something (edge_subst_cost, (e1, e2) -> 0.0 )
88
+ heuristic = something (heuristic, (λ, G₁, G₂) -> 0.0 )
89
+ return _edit_distance (
90
+ G₁:: AbstractGraph ,
91
+ G₂:: AbstractGraph ,
92
+ vertex_insert_cost,
93
+ vertex_delete_cost,
94
+ vertex_subst_cost,
95
+ edge_insert_cost,
96
+ edge_delete_cost,
97
+ edge_subst_cost,
98
+ heuristic,
99
+ )
100
+ end
101
+
102
+ function _edit_distance (
103
+ G₁:: AbstractGraph{T} ,
104
+ G₂:: AbstractGraph{U} ,
105
+ vertex_insert_cost:: Function ,
106
+ vertex_delete_cost:: Function ,
107
+ vertex_subst_cost:: Function ,
108
+ edge_insert_cost:: Function ,
109
+ edge_delete_cost:: Function ,
110
+ edge_subst_cost:: Function ,
111
+ heuristic:: Function ,
112
+ ) where {T<: Integer ,U<: Integer }
113
+ isdirected = is_directed (G₁) || is_directed (G₂)
114
+
115
+ # compute the cost on edges due to associate u1 to v1 and u2 to v2
116
+ # u2 and v2 can eventually be 0
117
+ function association_cost (u1, u2, v1, v2)
118
+ cost = 0.0
119
+ if has_edge (G₁, u1, u2)
120
+ if has_edge (G₂, v1, v2)
121
+ cost += edge_subst_cost (Edge (u1, u2), Edge (v1, v2))
122
+ else
123
+ cost += edge_delete_cost (Edge (u1, u2))
124
+ end
125
+ else
126
+ if has_edge (G₂, v1, v2)
127
+ cost += edge_insert_cost (Edge (v1, v2))
128
+ end
129
+ end
130
+ if isdirected && u1 != u2
131
+ if has_edge (G₁, u2, u1)
132
+ if has_edge (G₂, v2, v1)
133
+ cost += edge_subst_cost (Edge (u2, u1), Edge (v2, v1))
134
+ else
135
+ cost += edge_delete_cost (Edge (u2, u1))
136
+ end
137
+ else
138
+ if has_edge (G₂, v2, v1)
139
+ cost += edge_insert_cost (Edge (v2, v1))
140
+ end
141
+ end
142
+ end
143
+ return cost
144
+ end
63
145
64
146
# A* search heuristic
65
147
h (λ) = heuristic (λ, G₁, G₂)
66
148
67
149
# initialize open set
68
150
OPEN = PriorityQueue {Vector{Tuple},Float64} ()
69
- for v in 1 : nv (G₂)
70
- enqueue! (OPEN, [(1 , v)], subst_cost (1 , v) + h ([(1 , v)]))
151
+ for v in vertices (G₂)
152
+ enqueue! (OPEN, [(T ( 1 ) , v)], vertex_subst_cost (1 , v) + h ([(T ( 1 ) , v)]))
71
153
end
72
- enqueue! (OPEN, [(1 , 0 ) ], delete_cost (1 ) + h ([(1 , 0 )]))
154
+ enqueue! (OPEN, [(T ( 1 ), U ( 0 )) ], vertex_delete_cost (1 ) + h ([(T ( 1 ), U ( 0 ) )]))
73
155
156
+ c = 0
74
157
while true
75
158
# minimum (partial) edit path
76
159
λ, cost = peek (OPEN)
160
+ c += 1
77
161
dequeue! (OPEN)
78
162
79
163
if is_complete_path (λ, G₁, G₂)
80
164
return cost, λ
81
165
else
82
- k, _ = λ[end ]
83
- vs = setdiff (1 : nv (G₂), [v for (u, v) in λ])
166
+ u1, _ = λ[end ]
167
+ u1 += T (1 )
168
+ vs = setdiff (vertices (G₂), [v for (u, v) in λ])
84
169
85
- if k < nv (G₁) # there are still vertices to process in G₁?
86
- for v in vs
87
- λ⁺ = [λ; (k + 1 , v)]
88
- enqueue! (OPEN, λ⁺, cost + subst_cost (k + 1 , v) + h (λ⁺) - h (λ))
170
+ if u1 <= nv (G₁) # there are still vertices to process in G₁?
171
+ # we try every possible assignment of v1
172
+ for v1 in vs
173
+ λ⁺ = [λ; (u1, v1)]
174
+ new_cost = cost + vertex_subst_cost (u1, v1) + h (λ⁺) - h (λ)
175
+ for (u2, v2) in λ
176
+ new_cost += association_cost (u1, u2, v1, v2)
177
+ end
178
+ new_cost += association_cost (u1, u1, v1, v1) # handle self-loops
179
+
180
+ enqueue! (OPEN, λ⁺, new_cost)
181
+ end
182
+ # we try deleting v1
183
+ λ⁺ = [λ; (u1, U (0 ))]
184
+ new_cost = cost + vertex_delete_cost (u1) + h (λ⁺) - h (λ)
185
+ for u2 in outneighbors (G₁, u1)
186
+ # edges deleted later when assigning v2
187
+ u2 > u1 && continue
188
+ new_cost += edge_delete_cost (Edge (u1, u2))
89
189
end
90
- λ⁺ = [λ; (k + 1 , 0 )]
91
- enqueue! (OPEN, λ⁺, cost + delete_cost (k + 1 ) + h (λ⁺) - h (λ))
190
+ if isdirected
191
+ for u2 in inneighbors (G₁, u1)
192
+ # edges deleted later when assigning v2, and we should not count a self loop twice
193
+ u2 >= u1 && continue
194
+ new_cost += edge_delete_cost (Edge (u2, u1))
195
+ end
196
+ end
197
+ enqueue! (OPEN, λ⁺, new_cost)
92
198
else
93
- # add remaining vertices of G₂ to the path
94
- λ⁺ = [λ; [(0 , v) for v in vs]]
95
- total_insert_cost = sum (insert_cost, vs)
96
- enqueue! (OPEN, λ⁺, cost + total_insert_cost + h (λ⁺) - h (λ))
199
+ # add remaining vertices of G₂ to the path by deleting them
200
+ λ⁺ = [λ; [(T (0 ), v) for v in vs]]
201
+ new_cost = cost + sum (vertex_insert_cost, vs)
202
+ for v1 in vs
203
+ for v2 in outneighbors (G₂, v1)
204
+ (v2 > v1 && v2 in vs) && continue # these edges will be deleted later
205
+ new_cost += edge_insert_cost (Edge (v1, v2))
206
+ end
207
+ if isdirected
208
+ for v2 in inneighbors (G₂, v1)
209
+ (v2 > v1 && v2 in vs) && continue # these edges will be deleted later
210
+ v1 == v2 && continue # we should not count a self loop twice
211
+ new_cost += edge_insert_cost (Edge (v2, v1))
212
+ end
213
+ end
214
+ end
215
+ enqueue! (OPEN, λ⁺, new_cost + h (λ⁺) - h (λ))
97
216
end
98
217
end
99
218
end
@@ -112,11 +231,40 @@ function is_complete_path(λ, G₁, G₂)
112
231
return length (us) == nv (G₁) && length (vs) == nv (G₂)
113
232
end
114
233
115
- function DefaultEditHeuristic (λ, G₁:: AbstractGraph , G₂:: AbstractGraph )
116
- vs = Set ([v for (u, v) in λ])
117
- delete! (vs, 0 )
234
+ # edit_distance(G₁::AbstractGraph, G₂::AbstractGraph) =
235
+ # edit_distance(G₁, G₂,
236
+ # vertex_insert_cost=v -> 0.,
237
+ # vertex_delete_cost=u -> 0.,
238
+ # vertex_subst_cost=(u, v) -> 0.,
239
+ # edge_insert_cost=e -> 1.,
240
+ # edge_delete_cost=e -> 1.,
241
+ # edge_subst_cost=(e1, e2) -> 0.,
242
+ # heuristic=default_edit_heuristic)
118
243
119
- return nv (G₂) - length (vs)
244
+ """
245
+ compute an upper bound on the number of edges that can still be affected
246
+ """
247
+ function default_edit_heuristic (λ, G₁:: AbstractGraph , G₂:: AbstractGraph )
248
+ us = setdiff (1 : nv (G₁), [u for (u, v) in λ])
249
+ vs = setdiff (1 : nv (G₂), [v for (u, v) in λ])
250
+ total_free_edges_g1 = 0
251
+ total_free_edges_g2 = 0
252
+ if ! isempty (us)
253
+ total_free_edges_g1 = sum (u -> outdegree (G₁, u), us)
254
+ end
255
+ if ! isempty (vs)
256
+ total_free_edges_g2 = sum (v -> outdegree (G₂, v), vs)
257
+ end
258
+ for (u1, v1) in λ
259
+ (u1 == 0 || v1 == 0 ) && continue
260
+ total_free_edges_g1 += count (u2 -> u2 in us, outneighbors (G₁, u1))
261
+ total_free_edges_g2 += count (v2 -> v2 in vs, outneighbors (G₂, v1))
262
+ end
263
+ if ! is_directed (G₁) && ! is_directed (G₂)
264
+ total_free_edges_g1 = total_free_edges_g1 / 2
265
+ total_free_edges_g2 = total_free_edges_g2 / 2
266
+ end
267
+ return abs (total_free_edges_g1 - total_free_edges_g2)
120
268
end
121
269
122
270
# -------------------------
0 commit comments