-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathresumemassdns.py
More file actions
125 lines (96 loc) · 3.84 KB
/
resumemassdns.py
File metadata and controls
125 lines (96 loc) · 3.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python3
"""
Script to remove already processed domains from the original domains file.
This helps resume massdns processing from where it left off.
Usage: python remove_processed_domains.py
"""
import os
from collections import defaultdict
def extract_processed_domains(results_file):
"""
Extract processed domains from massdns results file.
Returns a set of processed domain names.
"""
processed_domains = set()
print(f"reading file from {results_file}...")
try:
with open(results_file, 'r', encoding='utf-8') as f:
line_count = 0
for line in f:
line = line.strip()
if line and ' A ' in line:
domain = line.split(' A ')[0].rstrip('.')
processed_domains.add(domain)
line_count += 1
if line_count % 100000 == 0:
print(f" read {line_count:,} lines, found {len(processed_domains):,} domains")
print(f"done found {len(processed_domains):,} domains")
return processed_domains
except FileNotFoundError:
print(f"cant find{results_file}")
return set()
except Exception as e:
print(f"error {results_file} while reading {e}")
return set()
def filter_domains(original_file, processed_domains, output_file):
"""
Filter out processed domains from the original domains file.
"""
print(f"正在從 {original_file} 移除已處理的域名...")
try:
remaining_count = 0
removed_count = 0
total_count = 0
with open(original_file, 'r', encoding='utf-8') as infile, \
open(output_file, 'w', encoding='utf-8') as outfile:
for line in infile:
domain = line.strip()
total_count += 1
if domain in processed_domains:
removed_count += 1
else:
outfile.write(line)
remaining_count += 1
# Progress indicator every 100k lines
if total_count % 100000 == 0:
print(f" done reading {total_count:,} lines (remain: {remaining_count:,}, removed: {removed_count:,})")
print(f"done")
print(f"total: {total_count:,}")
print(f"removed: {removed_count:,}")
print(f"remain: {remaining_count:,}")
print(f"saved to {output_file}")
except FileNotFoundError:
print(f"cant find {original_file}")
except Exception as e:
print(f"error: {e}")
def main():
original_domains_file = "domains.txt"
results_file = "results.txt"
output_file = "remaining_domains.txt"
if not os.path.exists(original_domains_file):
print(f"error when find {original_domains_file}")
return
if not os.path.exists(results_file):
print(f"cant find {results_file}")
return
original_size = os.path.getsize(original_domains_file) / (1024**3) # GB
results_size = os.path.getsize(results_file) / (1024**3) # GB
print(f"original file: {original_domains_file} ({original_size:.2f} GB)")
print(f"result file: {results_file} ({results_size:.2f} GB)")
print(f"output file: {output_file}")
print()
response = input("start? (y/N): ").strip().lower()
if response != 'y':
print("stopped")
return
print()
processed_domains = extract_processed_domains(results_file)
if not processed_domains:
print("cant find and domains")
return
print()
filter_domains(original_domains_file, processed_domains, output_file)
print()
print(f"now you can use {output_file} to run massdns")
if __name__ == "__main__":
main()