Skip to content

Commit c501aad

Browse files
committed
Script for dumping pair frequency tables
1 parent f69a25b commit c501aad

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

scripts/frequency.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import itertools
2+
import sys
3+
4+
import pefile
5+
6+
7+
def main():
8+
files = [pefile.PE(path, fast_load=True) for path in sys.argv[1:]]
9+
10+
pair_counts = [0 for _ in range(2 ** 16)]
11+
total_pairs_count = 0
12+
13+
for pe in files:
14+
for section in pe.sections:
15+
if not section.IMAGE_SCN_MEM_EXECUTE:
16+
continue
17+
data = section.get_data()
18+
total_pairs_count += len(data) - 1
19+
for a, b in zip(data[:], data[1:]):
20+
pair_counts[a * 0x100 + b] += 1
21+
22+
top_n_pairs = 512
23+
sorted_pairs = sorted(
24+
enumerate(sorted(range(len(pair_counts)), key=lambda x: pair_counts[x], reverse=True)[:top_n_pairs]),
25+
key=lambda x: x[1])
26+
27+
top_pairs_count = 0
28+
for batch in itertools.batched(sorted_pairs, 8):
29+
for _, packed in batch:
30+
a, b = divmod(packed, 0x100)
31+
top_pairs_count += pair_counts[packed]
32+
print(f'p(0x{a:02X}, 0x{b:02X}), ', end='')
33+
print()
34+
35+
print()
36+
for batch in itertools.batched(sorted_pairs, 16):
37+
for score, _ in batch:
38+
print(f'0x{score:03X}, ', end='')
39+
print()
40+
41+
print(f'% of all: {top_pairs_count / total_pairs_count:0.4%}')
42+
43+
44+
if __name__ == '__main__':
45+
main()

scripts/requirements.txt

40 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)