|
2 | 2 |
|
3 | 3 | import subprocess |
4 | 4 | import os |
| 5 | +import heapq |
5 | 6 |
|
6 | 7 | max_diff_per_file = 500 |
7 | 8 | max_diff_total = 15000 |
8 | 9 | max_file_total = 200 |
| 10 | +trivial_penalty = 200 |
| 11 | +diversity_penalty_inc = 30 |
9 | 12 |
|
10 | 13 | stats = subprocess.check_output(['git', 'diff', '--numstat']).decode().splitlines() |
11 | | -diffs = [] |
12 | | -# TODO: maximize diff diversity |
13 | | -diff_pattern = set() |
| 14 | +diffs = dict() |
14 | 15 | for line in stats: |
15 | 16 | add, sub, file = line.removesuffix('\n').split() |
16 | 17 | count = int(add)+int(sub) |
17 | 18 | if count > max_diff_per_file: |
18 | 19 | continue |
19 | | - key = (add, sub) |
20 | | - if key in diff_pattern: |
21 | | - continue |
22 | | - diff_pattern.add(key) |
23 | | - diffs.append((file, count)) |
24 | | -diffs.sort(key=lambda x: x[1]) |
| 20 | + if add == sub: |
| 21 | + count += trivial_penalty |
| 22 | + proj = os.path.basename(os.path.dirname(os.path.dirname(file))) |
| 23 | + diff_list = diffs.get(proj, list()) |
| 24 | + diff_list.append((count, file, proj, int(add), int(sub))) |
| 25 | + diffs[proj] = diff_list |
| 26 | + |
| 27 | +diff_heap = [] |
| 28 | +for list in diffs.values(): |
| 29 | + list.sort(key=lambda x: x[0]) |
| 30 | + diff_heap.append(list.pop(0)) |
| 31 | +heapq.heapify(diff_heap) |
25 | 32 |
|
| 33 | +diversity_penalty = dict() |
| 34 | +diff_pattern = set() |
26 | 35 | file_count = 0 |
27 | 36 | diff_count = 0 |
| 37 | +while len(diff_heap) != 0: |
| 38 | + cnt, file, proj, add, sub = heapq.heappop(diff_heap) |
| 39 | + proj_list = diffs[proj] |
| 40 | + if len(proj_list) != 0: |
| 41 | + diversity_penalty[proj] = diversity_penalty.get(proj, 0) + diversity_penalty_inc |
| 42 | + cnt2, file2, proj2, add2, sub2 = proj_list.pop(0) |
| 43 | + cnt2 += diversity_penalty[proj] |
| 44 | + heapq.heappush(diff_heap, (cnt2, file2, proj2, add2, sub2)) |
28 | 45 |
|
29 | | -for file, count in diffs: |
| 46 | + key = (add, sub) |
| 47 | + if key in diff_pattern: |
| 48 | + continue |
| 49 | + diff_pattern.add(key) |
| 50 | + count = add + sub |
30 | 51 | if file_count < max_file_total and diff_count + count <= max_diff_total: |
31 | 52 | file_count += 1 |
32 | 53 | diff_count += count |
|
0 commit comments