11#! /bin/bash
22#
33# Wifite2 Cracked Database Cleaner
4- # Removes duplicate entries from cracked.json based on BSSID
4+ # Advanced duplicate detection and removal for cracked.json
55#
6- # Usage: ./clean.sh [cracked.json]
6+ # Detects and removes:
7+ # - Duplicate BSSIDs (same MAC address)
8+ # - Duplicate ESSID+Key combinations (same network credentials)
9+ # - ESSID variations (hex encoding, unicode, trailing spaces)
10+ # - Invalid/corrupted entries
11+ #
12+ # Usage: ./clean.sh [cracked.json] [--aggressive]
713#
814
915set -e
@@ -13,15 +19,41 @@ RED='\033[0;31m'
1319GREEN=' \033[0;32m'
1420YELLOW=' \033[1;33m'
1521BLUE=' \033[0;34m'
22+ CYAN=' \033[0;36m'
23+ MAGENTA=' \033[0;35m'
1624NC=' \033[0m' # No Color
1725
26+ # Parse arguments
27+ CRACKED_FILE=" "
28+ AGGRESSIVE_MODE=false
29+
30+ for arg in " $@ " ; do
31+ case $arg in
32+ --aggressive)
33+ AGGRESSIVE_MODE=true
34+ shift
35+ ;;
36+ * )
37+ if [ -z " $CRACKED_FILE " ]; then
38+ CRACKED_FILE=" $arg "
39+ fi
40+ ;;
41+ esac
42+ done
43+
1844# Default file location (relative to tools directory)
19- CRACKED_FILE=" ${1:- ../ cracked.json} "
45+ CRACKED_FILE=" ${CRACKED_FILE:- ../ cracked.json} "
46+
47+ # Banner
48+ echo -e " ${CYAN} ╔════════════════════════════════════════════════╗${NC} "
49+ echo -e " ${CYAN} ║ Wifite2 Cracked Database Cleaner v2.0 ║${NC} "
50+ echo -e " ${CYAN} ╚════════════════════════════════════════════════╝${NC} "
51+ echo " "
2052
2153# Check if file exists
2254if [ ! -f " $CRACKED_FILE " ]; then
2355 echo -e " ${RED} [!]${NC} Error: File '$CRACKED_FILE ' not found"
24- echo -e " ${BLUE} [?]${NC} Usage: $0 [cracked.json]"
56+ echo -e " ${BLUE} [?]${NC} Usage: $0 [cracked.json] [--aggressive] "
2557 exit 1
2658fi
2759
@@ -46,35 +78,190 @@ echo -e "${GREEN}[+]${NC} Created backup: $BACKUP_FILE"
4678# Count original entries
4779ORIGINAL_COUNT=$( jq ' length' " $CRACKED_FILE " )
4880echo -e " ${BLUE} [*]${NC} Original entries: $ORIGINAL_COUNT "
81+ echo " "
82+
83+ # ============================================================================
84+ # PHASE 1: Normalize ESSID fields (fix encoding issues)
85+ # ============================================================================
86+ echo -e " ${MAGENTA} [Phase 1]${NC} Normalizing ESSID encoding..."
87+
88+ jq ' map(
89+ . + {
90+ essid: (
91+ .essid
92+ | gsub("\\u0011\\u0011\\u0011"; "111111") # Fix unicode encoding
93+ | gsub("^\\s+|\\s+$"; "") # Trim whitespace
94+ )
95+ }
96+ )' " $CRACKED_FILE " > " ${CRACKED_FILE} .phase1"
97+
98+ PHASE1_CHANGES=$( jq -r '
99+ [.[] | select(.essid != (.essid | gsub("\\u0011\\u0011\\u0011"; "111111") | gsub("^\\s+|\\s+$"; "")))] | length
100+ ' " $CRACKED_FILE " )
101+
102+ if [ " $PHASE1_CHANGES " -gt 0 ]; then
103+ echo -e " ${YELLOW} →${NC} Normalized $PHASE1_CHANGES ESSID(s)"
104+ else
105+ echo -e " ${GREEN} ✓${NC} No normalization needed"
106+ fi
49107
50- # Remove duplicates based on BSSID (keep the most recent entry)
51- # Sort by date descending, then use unique_by to keep first occurrence (most recent)
52- jq ' sort_by(.date) | reverse | unique_by(.bssid)' " $CRACKED_FILE " > " ${CRACKED_FILE} .tmp"
108+ # ============================================================================
109+ # PHASE 2: Remove duplicate BSSIDs (keep most recent)
110+ # ============================================================================
111+ echo -e " ${MAGENTA} [Phase 2]${NC} Removing duplicate BSSIDs..."
53112
54- # Count cleaned entries
55- CLEANED_COUNT=$( jq ' length' " ${CRACKED_FILE} .tmp" )
56- REMOVED_COUNT=$(( ORIGINAL_COUNT - CLEANED_COUNT))
113+ # Find duplicates before removal
114+ BSSID_DUPS=$( jq -r ' [.[].bssid] | group_by(.) | map(select(length > 1)) | length' " ${CRACKED_FILE} .phase1" )
57115
58- # Show results
59- echo -e " ${BLUE} [*] ${NC} Cleaned entries: $CLEANED_COUNT "
116+ # Remove duplicates, keeping most recent
117+ jq ' sort_by(.date) | reverse | unique_by(.bssid) ' " ${CRACKED_FILE} .phase1 " > " ${CRACKED_FILE} .phase2 "
60118
61- if [ $REMOVED_COUNT -gt 0 ]; then
62- echo -e " ${YELLOW} [!]${NC} Removed $REMOVED_COUNT duplicate(s)"
119+ PHASE2_REMOVED=$(( $(jq 'length' "${CRACKED_FILE} .phase1 ") - $(jq 'length' "${CRACKED_FILE} .phase2 ")) )
120+
121+ if [ " $PHASE2_REMOVED " -gt 0 ]; then
122+ echo -e " ${YELLOW} →${NC} Removed $PHASE2_REMOVED duplicate BSSID(s) from $BSSID_DUPS network(s)"
63123
64124 # Show which BSSIDs had duplicates
65- echo -e " ${BLUE} [*] ${NC} Duplicate BSSIDs removed: "
66- jq -r ' .[].bssid' " $CRACKED_FILE " | sort | uniq -d | while read -r bssid ; do
67- ESSID =$( jq -r " .[] | select(.bssid == \" $bssid \" ) | .essid " " $CRACKED_FILE " | head -1 )
68- echo -e " ${YELLOW} → ${ NC} $bssid ($ESSID )"
125+ jq -r ' .[].bssid ' " ${CRACKED_FILE} .phase1 " | sort | uniq -d | head -10 | while read -r bssid ; do
126+ ESSID= $( jq -r " .[] | select( .bssid == \" $bssid \" ) | .essid " " ${CRACKED_FILE} .phase2 " | head -1 )
127+ COUNT =$( jq -r " .[] | select(.bssid == \" $bssid \" ) | .bssid " " ${ CRACKED_FILE} .phase1 " | wc -l )
128+ echo -e " ${CYAN} • ${ NC} $bssid ($ESSID ) - had $COUNT entries "
69129 done
70130
71- # Replace original file
72- mv " ${CRACKED_FILE} .tmp" " $CRACKED_FILE "
131+ # Show if there are more
132+ TOTAL_DUP_BSSIDS=$( jq -r ' .[].bssid' " ${CRACKED_FILE} .phase1" | sort | uniq -d | wc -l)
133+ if [ " $TOTAL_DUP_BSSIDS " -gt 10 ]; then
134+ echo -e " ${CYAN} •${NC} ... and $(( TOTAL_DUP_BSSIDS - 10 )) more"
135+ fi
136+ else
137+ echo -e " ${GREEN} ✓${NC} No duplicate BSSIDs found"
138+ fi
139+
140+ # ============================================================================
141+ # PHASE 3: Remove duplicate ESSID+Key combinations (aggressive mode)
142+ # ============================================================================
143+ if [ " $AGGRESSIVE_MODE " = true ]; then
144+ echo -e " ${MAGENTA} [Phase 3]${NC} Removing duplicate ESSID+Key combinations..."
145+
146+ # Find duplicates before removal
147+ ESSID_KEY_DUPS=$( jq -r '
148+ [.[] | select(.key != null) | "\(.essid)|\(.key)"]
149+ | group_by(.)
150+ | map(select(length > 1))
151+ | length
152+ ' " ${CRACKED_FILE} .phase2" )
153+
154+ # Remove duplicates based on ESSID+Key, keeping most recent
155+ jq ' sort_by(.date) | reverse | unique_by("\(.essid)|\(.key // "null")")' " ${CRACKED_FILE} .phase2" > " ${CRACKED_FILE} .phase3"
156+
157+ PHASE3_REMOVED=$(( $(jq 'length' "${CRACKED_FILE} .phase2 ") - $(jq 'length' "${CRACKED_FILE} .phase3 ")) )
158+
159+ if [ " $PHASE3_REMOVED " -gt 0 ]; then
160+ echo -e " ${YELLOW} →${NC} Removed $PHASE3_REMOVED duplicate ESSID+Key combination(s)"
161+
162+ # Show examples
163+ jq -r ' .[] | select(.key != null) | "\(.essid)|\(.key)"' " ${CRACKED_FILE} .phase2" | \
164+ sort | uniq -d | head -5 | while IFS=' |' read -r essid key; do
165+ COUNT=$( jq -r " .[] | select(.essid == \" $essid \" and .key == \" $key \" ) | .bssid" " ${CRACKED_FILE} .phase2" | wc -l)
166+ echo -e " ${CYAN} •${NC} $essid / $key - had $COUNT different BSSIDs"
167+ done
168+ else
169+ echo -e " ${GREEN} ✓${NC} No duplicate ESSID+Key combinations found"
170+ fi
171+ else
172+ echo -e " ${MAGENTA} [Phase 3]${NC} Skipped (use --aggressive to enable ESSID+Key deduplication)"
173+ cp " ${CRACKED_FILE} .phase2" " ${CRACKED_FILE} .phase3"
174+ PHASE3_REMOVED=0
175+ fi
176+
177+ # ============================================================================
178+ # PHASE 4: Remove invalid entries
179+ # ============================================================================
180+ echo -e " ${MAGENTA} [Phase 4]${NC} Removing invalid entries..."
181+
182+ # Remove entries with missing required fields
183+ jq ' [.[] | select(
184+ .type != null and
185+ .date != null and
186+ .essid != null and
187+ .bssid != null and
188+ .bssid != "" and
189+ .essid != ""
190+ )]' " ${CRACKED_FILE} .phase3" > " ${CRACKED_FILE} .phase4"
191+
192+ PHASE4_REMOVED=$(( $(jq 'length' "${CRACKED_FILE} .phase3 ") - $(jq 'length' "${CRACKED_FILE} .phase4 ")) )
193+
194+ if [ " $PHASE4_REMOVED " -gt 0 ]; then
195+ echo -e " ${YELLOW} →${NC} Removed $PHASE4_REMOVED invalid/incomplete entry(ies)"
196+ else
197+ echo -e " ${GREEN} ✓${NC} No invalid entries found"
198+ fi
199+
200+ # ============================================================================
201+ # PHASE 5: Sort by date (newest first) and finalize
202+ # ============================================================================
203+ echo -e " ${MAGENTA} [Phase 5]${NC} Sorting and finalizing..."
204+
205+ jq ' sort_by(.date) | reverse' " ${CRACKED_FILE} .phase4" > " ${CRACKED_FILE} .final"
206+
207+ # ============================================================================
208+ # Summary and Statistics
209+ # ============================================================================
210+ echo " "
211+ echo -e " ${CYAN} ╔════════════════════════════════════════════════╗${NC} "
212+ echo -e " ${CYAN} ║ Cleanup Summary ║${NC} "
213+ echo -e " ${CYAN} ╚════════════════════════════════════════════════╝${NC} "
214+
215+ FINAL_COUNT=$( jq ' length' " ${CRACKED_FILE} .final" )
216+ TOTAL_REMOVED=$(( ORIGINAL_COUNT - FINAL_COUNT))
217+
218+ echo -e " ${BLUE} [*]${NC} Original entries: $ORIGINAL_COUNT "
219+ echo -e " ${BLUE} [*]${NC} Final entries: $FINAL_COUNT "
220+ echo -e " ${BLUE} [*]${NC} Total removed: $TOTAL_REMOVED "
221+ echo " "
222+ echo -e " ${BLUE} [*]${NC} Breakdown:"
223+ echo -e " ${CYAN} •${NC} ESSID normalized: $PHASE1_CHANGES "
224+ echo -e " ${CYAN} •${NC} Duplicate BSSIDs: $PHASE2_REMOVED "
225+ echo -e " ${CYAN} •${NC} Duplicate ESSID+Key: $PHASE3_REMOVED "
226+ echo -e " ${CYAN} •${NC} Invalid entries: $PHASE4_REMOVED "
227+ echo " "
228+
229+ # Statistics
230+ echo -e " ${CYAN} ╔════════════════════════════════════════════════╗${NC} "
231+ echo -e " ${CYAN} ║ Database Statistics ║${NC} "
232+ echo -e " ${CYAN} ╚════════════════════════════════════════════════╝${NC} "
233+
234+ # Count by type
235+ echo -e " ${BLUE} [*]${NC} Entries by attack type:"
236+ jq -r ' [.[] | .type] | group_by(.) | map({type: .[0], count: length}) | .[] | " \(.type): \(.count)"' " ${CRACKED_FILE} .final"
237+
238+ echo " "
239+ echo -e " ${BLUE} [*]${NC} Unique networks (ESSID): $( jq -r ' [.[].essid] | unique | length' " ${CRACKED_FILE} .final" ) "
240+ echo -e " ${BLUE} [*]${NC} Unique access points (BSSID): $( jq -r ' [.[].bssid] | unique | length' " ${CRACKED_FILE} .final" ) "
241+ echo -e " ${BLUE} [*]${NC} Networks with passwords: $( jq -r ' [.[] | select(.key != null)] | length' " ${CRACKED_FILE} .final" ) "
242+
243+ # Top 5 most common passwords
244+ echo " "
245+ echo -e " ${BLUE} [*]${NC} Top 5 most common passwords:"
246+ jq -r ' [.[] | select(.key != null) | .key] | group_by(.) | map({key: .[0], count: length}) | sort_by(.count) | reverse | .[0:5] | .[] | " \(.count)x - \(.key)"' " ${CRACKED_FILE} .final"
247+
248+ # ============================================================================
249+ # Finalize
250+ # ============================================================================
251+ echo " "
252+
253+ if [ " $TOTAL_REMOVED " -gt 0 ]; then
254+ mv " ${CRACKED_FILE} .final" " $CRACKED_FILE "
73255 echo -e " ${GREEN} [+]${NC} Successfully cleaned $CRACKED_FILE "
74- echo -e " ${GREEN} [+]${NC} Kept most recent entry for each BSSID "
256+ echo -e " ${GREEN} [+]${NC} Removed $TOTAL_REMOVED duplicate/invalid entries "
75257else
76- echo -e " ${GREEN} [+]${NC} No duplicates found - file is already clean "
77- rm " ${CRACKED_FILE} .tmp "
258+ echo -e " ${GREEN} [+]${NC} Database is already clean - no changes needed "
259+ rm " ${CRACKED_FILE} .final "
78260fi
79261
80- echo -e " ${BLUE} [*]${NC} Done!"
262+ # Cleanup temporary files
263+ rm -f " ${CRACKED_FILE} .phase" * 2> /dev/null
264+
265+ echo -e " ${BLUE} [*]${NC} Backup saved as: $BACKUP_FILE "
266+ echo -e " ${GREEN} [✓]${NC} Done!"
267+ echo " "
0 commit comments