Skip to content

Commit 0245c3f

Browse files
committed
scripts: Improve diversity of changes
1 parent 754b379 commit 0245c3f

File tree

1 file changed

+31
-10
lines changed

1 file changed

+31
-10
lines changed

scripts/filter_pr_changes.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,52 @@
22

33
import subprocess
44
import os
5+
import heapq
56

67
max_diff_per_file = 500
78
max_diff_total = 15000
89
max_file_total = 200
10+
trivial_penalty = 200
11+
diversity_penalty_inc = 30
912

1013
stats = subprocess.check_output(['git', 'diff', '--numstat']).decode().splitlines()
11-
diffs = []
12-
# TODO: maximize diff diversity
13-
diff_pattern = set()
14+
diffs = dict()
1415
for line in stats:
1516
add, sub, file = line.removesuffix('\n').split()
1617
count = int(add)+int(sub)
1718
if count > max_diff_per_file:
1819
continue
19-
key = (add, sub)
20-
if key in diff_pattern:
21-
continue
22-
diff_pattern.add(key)
23-
diffs.append((file, count))
24-
diffs.sort(key=lambda x: x[1])
20+
if add == sub:
21+
count += trivial_penalty
22+
proj = os.path.basename(os.path.dirname(os.path.dirname(file)))
23+
diff_list = diffs.get(proj, list())
24+
diff_list.append((count, file, proj, int(add), int(sub)))
25+
diffs[proj] = diff_list
26+
27+
diff_heap = []
28+
for list in diffs.values():
29+
list.sort(key=lambda x: x[0])
30+
diff_heap.append(list.pop(0))
31+
heapq.heapify(diff_heap)
2532

33+
diversity_penalty = dict()
34+
diff_pattern = set()
2635
file_count = 0
2736
diff_count = 0
37+
while len(diff_heap) != 0:
38+
cnt, file, proj, add, sub = heapq.heappop(diff_heap)
39+
proj_list = diffs[proj]
40+
if len(proj_list) != 0:
41+
diversity_penalty[proj] = diversity_penalty.get(proj, 0) + diversity_penalty_inc
42+
cnt2, file2, proj2, add2, sub2 = proj_list.pop(0)
43+
cnt2 += diversity_penalty[proj]
44+
heapq.heappush(diff_heap, (cnt2, file2, proj2, add2, sub2))
2845

29-
for file, count in diffs:
46+
key = (add, sub)
47+
if key in diff_pattern:
48+
continue
49+
diff_pattern.add(key)
50+
count = add + sub
3051
if file_count < max_file_total and diff_count + count <= max_diff_total:
3152
file_count += 1
3253
diff_count += count

0 commit comments

Comments
 (0)