Skip to content

Commit 22d8733

Browse files
dschogitster
authored andcommitted
linear-assignment: a function to solve least-cost assignment problems
The problem solved by the code introduced in this commit goes like this: given two sets of items, and a cost matrix which says how much it "costs" to assign any given item of the first set to any given item of the second, assign all items (except when the sets have different size) in the cheapest way. We use the Jonker-Volgenant algorithm to solve the assignment problem to answer questions such as: given two different versions of a topic branch (or iterations of a patch series), what is the best pairing of commits/patches between the different versions? Signed-off-by: Johannes Schindelin <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 1d89318 commit 22d8733

File tree

3 files changed

+224
-0
lines changed

3 files changed

+224
-0
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,7 @@ LIB_OBJS += gpg-interface.o
870870
LIB_OBJS += graph.o
871871
LIB_OBJS += grep.o
872872
LIB_OBJS += hashmap.o
873+
LIB_OBJS += linear-assignment.o
873874
LIB_OBJS += help.o
874875
LIB_OBJS += hex.o
875876
LIB_OBJS += ident.o

linear-assignment.c

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
/*
2+
* Based on: Jonker, R., & Volgenant, A. (1987). <i>A shortest augmenting path
3+
* algorithm for dense and sparse linear assignment problems</i>. Computing,
4+
* 38(4), 325-340.
5+
*/
6+
#include "cache.h"
7+
#include "linear-assignment.h"
8+
9+
#define COST(column, row) cost[(column) + column_count * (row)]
10+
11+
/*
12+
* The parameter `cost` is the cost matrix: the cost to assign column j to row
13+
* i is `cost[j + column_count * i].
14+
*/
15+
void compute_assignment(int column_count, int row_count, int *cost,
16+
int *column2row, int *row2column)
17+
{
18+
int *v, *d;
19+
int *free_row, free_count = 0, saved_free_count, *pred, *col;
20+
int i, j, phase;
21+
22+
memset(column2row, -1, sizeof(int) * column_count);
23+
memset(row2column, -1, sizeof(int) * row_count);
24+
ALLOC_ARRAY(v, column_count);
25+
26+
/* column reduction */
27+
for (j = column_count - 1; j >= 0; j--) {
28+
int i1 = 0;
29+
30+
for (i = 1; i < row_count; i++)
31+
if (COST(j, i1) > COST(j, i))
32+
i1 = i;
33+
v[j] = COST(j, i1);
34+
if (row2column[i1] == -1) {
35+
/* row i1 unassigned */
36+
row2column[i1] = j;
37+
column2row[j] = i1;
38+
} else {
39+
if (row2column[i1] >= 0)
40+
row2column[i1] = -2 - row2column[i1];
41+
column2row[j] = -1;
42+
}
43+
}
44+
45+
/* reduction transfer */
46+
ALLOC_ARRAY(free_row, row_count);
47+
for (i = 0; i < row_count; i++) {
48+
int j1 = row2column[i];
49+
if (j1 == -1)
50+
free_row[free_count++] = i;
51+
else if (j1 < -1)
52+
row2column[i] = -2 - j1;
53+
else {
54+
int min = COST(!j1, i) - v[!j1];
55+
for (j = 1; j < column_count; j++)
56+
if (j != j1 && min > COST(j, i) - v[j])
57+
min = COST(j, i) - v[j];
58+
v[j1] -= min;
59+
}
60+
}
61+
62+
if (free_count ==
63+
(column_count < row_count ? row_count - column_count : 0)) {
64+
free(v);
65+
free(free_row);
66+
return;
67+
}
68+
69+
/* augmenting row reduction */
70+
for (phase = 0; phase < 2; phase++) {
71+
int k = 0;
72+
73+
saved_free_count = free_count;
74+
free_count = 0;
75+
while (k < saved_free_count) {
76+
int u1, u2;
77+
int j1 = 0, j2, i0;
78+
79+
i = free_row[k++];
80+
u1 = COST(j1, i) - v[j1];
81+
j2 = -1;
82+
u2 = INT_MAX;
83+
for (j = 1; j < column_count; j++) {
84+
int c = COST(j, i) - v[j];
85+
if (u2 > c) {
86+
if (u1 < c) {
87+
u2 = c;
88+
j2 = j;
89+
} else {
90+
u2 = u1;
91+
u1 = c;
92+
j2 = j1;
93+
j1 = j;
94+
}
95+
}
96+
}
97+
if (j2 < 0) {
98+
j2 = j1;
99+
u2 = u1;
100+
}
101+
102+
i0 = column2row[j1];
103+
if (u1 < u2)
104+
v[j1] -= u2 - u1;
105+
else if (i0 >= 0) {
106+
j1 = j2;
107+
i0 = column2row[j1];
108+
}
109+
110+
if (i0 >= 0) {
111+
if (u1 < u2)
112+
free_row[--k] = i0;
113+
else
114+
free_row[free_count++] = i0;
115+
}
116+
row2column[i] = j1;
117+
column2row[j1] = i;
118+
}
119+
}
120+
121+
/* augmentation */
122+
saved_free_count = free_count;
123+
ALLOC_ARRAY(d, column_count);
124+
ALLOC_ARRAY(pred, column_count);
125+
ALLOC_ARRAY(col, column_count);
126+
for (free_count = 0; free_count < saved_free_count; free_count++) {
127+
int i1 = free_row[free_count], low = 0, up = 0, last, k;
128+
int min, c, u1;
129+
130+
for (j = 0; j < column_count; j++) {
131+
d[j] = COST(j, i1) - v[j];
132+
pred[j] = i1;
133+
col[j] = j;
134+
}
135+
136+
j = -1;
137+
do {
138+
last = low;
139+
min = d[col[up++]];
140+
for (k = up; k < column_count; k++) {
141+
j = col[k];
142+
c = d[j];
143+
if (c <= min) {
144+
if (c < min) {
145+
up = low;
146+
min = c;
147+
}
148+
col[k] = col[up];
149+
col[up++] = j;
150+
}
151+
}
152+
for (k = low; k < up; k++)
153+
if (column2row[col[k]] == -1)
154+
goto update;
155+
156+
/* scan a row */
157+
do {
158+
int j1 = col[low++];
159+
160+
i = column2row[j1];
161+
u1 = COST(j1, i) - v[j1] - min;
162+
for (k = up; k < column_count; k++) {
163+
j = col[k];
164+
c = COST(j, i) - v[j] - u1;
165+
if (c < d[j]) {
166+
d[j] = c;
167+
pred[j] = i;
168+
if (c == min) {
169+
if (column2row[j] == -1)
170+
goto update;
171+
col[k] = col[up];
172+
col[up++] = j;
173+
}
174+
}
175+
}
176+
} while (low != up);
177+
} while (low == up);
178+
179+
update:
180+
/* updating of the column pieces */
181+
for (k = 0; k < last; k++) {
182+
int j1 = col[k];
183+
v[j1] += d[j1] - min;
184+
}
185+
186+
/* augmentation */
187+
do {
188+
if (j < 0)
189+
BUG("negative j: %d", j);
190+
i = pred[j];
191+
column2row[j] = i;
192+
SWAP(j, row2column[i]);
193+
} while (i1 != i);
194+
}
195+
196+
free(col);
197+
free(pred);
198+
free(d);
199+
free(v);
200+
free(free_row);
201+
}

linear-assignment.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#ifndef LINEAR_ASSIGNMENT_H
2+
#define LINEAR_ASSIGNMENT_H
3+
4+
/*
5+
* Compute an assignment of columns -> rows (and vice versa) such that every
6+
* column is assigned to at most one row (and vice versa) minimizing the
7+
* overall cost.
8+
*
9+
* The parameter `cost` is the cost matrix: the cost to assign column j to row
10+
* i is `cost[j + column_count * i].
11+
*
12+
* The arrays column2row and row2column will be populated with the respective
13+
* assignments (-1 for unassigned, which can happen only if column_count !=
14+
* row_count).
15+
*/
16+
void compute_assignment(int column_count, int row_count, int *cost,
17+
int *column2row, int *row2column);
18+
19+
/* The maximal cost in the cost matrix (to prevent integer overflows). */
20+
#define COST_MAX (1<<16)
21+
22+
#endif

0 commit comments

Comments
 (0)