|
1 | 1 | import ipaddress |
2 | 2 | import argparse |
3 | 3 | import os |
| 4 | +from collections import defaultdict |
4 | 5 |
|
5 | 6 | def extract_unique_hosts_from_file(file_path): |
6 | 7 | unique_hosts = set() |
@@ -34,19 +35,41 @@ def main(): |
34 | 35 | parser = argparse.ArgumentParser(description="Count unique valid hosts from CIDRs or IPs in files.") |
35 | 36 | parser.add_argument("file", nargs='?', help="Path to file containing CIDR ranges or IPs, one per line.") |
36 | 37 | parser.add_argument("-d", "--directory", help="Directory of files to process.") |
| 38 | + parser.add_argument("-D", "--duplicates", action="store_true", |
| 39 | + help="When processing a directory, list hosts that appear in more than one file and which files they appear in.") |
37 | 40 | args = parser.parse_args() |
38 | 41 |
|
39 | 42 | if args.directory: |
40 | 43 | grand_total_hosts = set() |
| 44 | + file_hosts_map = {} |
41 | 45 | print(f"\nProcessing directory: {args.directory}") |
42 | | - for entry in os.listdir(args.directory): |
| 46 | + for entry in sorted(os.listdir(args.directory)): |
43 | 47 | full_path = os.path.join(args.directory, entry) |
44 | 48 | if os.path.isfile(full_path): |
45 | 49 | file_hosts = extract_unique_hosts_from_file(full_path) |
| 50 | + file_hosts_map[entry] = file_hosts |
46 | 51 | print(f"{entry}: {len(file_hosts)} unique hosts") |
47 | 52 | grand_total_hosts.update(file_hosts) |
48 | 53 | print(f"\nTotal unique hosts across all files: {len(grand_total_hosts)}") |
49 | 54 |
|
| 55 | + if args.duplicates: |
| 56 | + # Build host -> set(files) mapping |
| 57 | + host_to_files = defaultdict(set) |
| 58 | + for fname, hosts in file_hosts_map.items(): |
| 59 | + for h in hosts: |
| 60 | + host_to_files[h].add(fname) |
| 61 | + |
| 62 | + # Find hosts present in more than one file |
| 63 | + duplicates = {host: sorted(list(files)) for host, files in host_to_files.items() if len(files) > 1} |
| 64 | + |
| 65 | + if not duplicates: |
| 66 | + print("\nNo duplicate hosts found between files.") |
| 67 | + else: |
| 68 | + print(f"\nDuplicate hosts found between files: {len(duplicates)} hosts\n") |
| 69 | + for host in sorted(duplicates.keys(), key=lambda x: tuple(int(p) for p in x.split('.'))): |
| 70 | + files_list = ", ".join(duplicates[host]) |
| 71 | + print(f"{host}: {files_list}") |
| 72 | + |
50 | 73 | elif args.file: |
51 | 74 | unique_hosts = extract_unique_hosts_from_file(args.file) |
52 | 75 | print(f"\nTotal unique hosts in {args.file} (excluding network and broadcast): {len(unique_hosts)}") |
|
0 commit comments