Skip to content

Commit afd321e

Browse files
committed
tools: improve the clean script
1 parent aa10810 commit afd321e

File tree

1 file changed

+211
-24
lines changed

1 file changed

+211
-24
lines changed

tools/clean.sh

Lines changed: 211 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
#!/bin/bash
22
#
33
# Wifite2 Cracked Database Cleaner
4-
# Removes duplicate entries from cracked.json based on BSSID
4+
# Advanced duplicate detection and removal for cracked.json
55
#
6-
# Usage: ./clean.sh [cracked.json]
6+
# Detects and removes:
7+
# - Duplicate BSSIDs (same MAC address)
8+
# - Duplicate ESSID+Key combinations (same network credentials)
9+
# - ESSID variations (hex encoding, unicode, trailing spaces)
10+
# - Invalid/corrupted entries
11+
#
12+
# Usage: ./clean.sh [cracked.json] [--aggressive]
713
#
814

915
set -e
@@ -13,15 +19,41 @@ RED='\033[0;31m'
1319
GREEN='\033[0;32m'
1420
YELLOW='\033[1;33m'
1521
BLUE='\033[0;34m'
22+
CYAN='\033[0;36m'
23+
MAGENTA='\033[0;35m'
1624
NC='\033[0m' # No Color
1725

26+
# Parse arguments
27+
CRACKED_FILE=""
28+
AGGRESSIVE_MODE=false
29+
30+
for arg in "$@"; do
31+
case $arg in
32+
--aggressive)
33+
AGGRESSIVE_MODE=true
34+
shift
35+
;;
36+
*)
37+
if [ -z "$CRACKED_FILE" ]; then
38+
CRACKED_FILE="$arg"
39+
fi
40+
;;
41+
esac
42+
done
43+
1844
# Default file location (relative to tools directory)
19-
CRACKED_FILE="${1:-../cracked.json}"
45+
CRACKED_FILE="${CRACKED_FILE:-../cracked.json}"
46+
47+
# Banner
48+
echo -e "${CYAN}╔════════════════════════════════════════════════╗${NC}"
49+
echo -e "${CYAN}║ Wifite2 Cracked Database Cleaner v2.0 ║${NC}"
50+
echo -e "${CYAN}╚════════════════════════════════════════════════╝${NC}"
51+
echo ""
2052

2153
# Check if file exists
2254
if [ ! -f "$CRACKED_FILE" ]; then
2355
echo -e "${RED}[!]${NC} Error: File '$CRACKED_FILE' not found"
24-
echo -e "${BLUE}[?]${NC} Usage: $0 [cracked.json]"
56+
echo -e "${BLUE}[?]${NC} Usage: $0 [cracked.json] [--aggressive]"
2557
exit 1
2658
fi
2759

@@ -46,35 +78,190 @@ echo -e "${GREEN}[+]${NC} Created backup: $BACKUP_FILE"
4678
# Count original entries
4779
ORIGINAL_COUNT=$(jq 'length' "$CRACKED_FILE")
4880
echo -e "${BLUE}[*]${NC} Original entries: $ORIGINAL_COUNT"
81+
echo ""
82+
83+
# ============================================================================
84+
# PHASE 1: Normalize ESSID fields (fix encoding issues)
85+
# ============================================================================
86+
echo -e "${MAGENTA}[Phase 1]${NC} Normalizing ESSID encoding..."
87+
88+
jq 'map(
89+
. + {
90+
essid: (
91+
.essid
92+
| gsub("\\u0011\\u0011\\u0011"; "111111") # Fix unicode encoding
93+
| gsub("^\\s+|\\s+$"; "") # Trim whitespace
94+
)
95+
}
96+
)' "$CRACKED_FILE" > "${CRACKED_FILE}.phase1"
97+
98+
PHASE1_CHANGES=$(jq -r '
99+
[.[] | select(.essid != (.essid | gsub("\\u0011\\u0011\\u0011"; "111111") | gsub("^\\s+|\\s+$"; "")))] | length
100+
' "$CRACKED_FILE")
101+
102+
if [ "$PHASE1_CHANGES" -gt 0 ]; then
103+
echo -e "${YELLOW}${NC} Normalized $PHASE1_CHANGES ESSID(s)"
104+
else
105+
echo -e "${GREEN}${NC} No normalization needed"
106+
fi
49107

50-
# Remove duplicates based on BSSID (keep the most recent entry)
51-
# Sort by date descending, then use unique_by to keep first occurrence (most recent)
52-
jq 'sort_by(.date) | reverse | unique_by(.bssid)' "$CRACKED_FILE" > "${CRACKED_FILE}.tmp"
108+
# ============================================================================
109+
# PHASE 2: Remove duplicate BSSIDs (keep most recent)
110+
# ============================================================================
111+
echo -e "${MAGENTA}[Phase 2]${NC} Removing duplicate BSSIDs..."
53112

54-
# Count cleaned entries
55-
CLEANED_COUNT=$(jq 'length' "${CRACKED_FILE}.tmp")
56-
REMOVED_COUNT=$((ORIGINAL_COUNT - CLEANED_COUNT))
113+
# Find duplicates before removal
114+
BSSID_DUPS=$(jq -r '[.[].bssid] | group_by(.) | map(select(length > 1)) | length' "${CRACKED_FILE}.phase1")
57115

58-
# Show results
59-
echo -e "${BLUE}[*]${NC} Cleaned entries: $CLEANED_COUNT"
116+
# Remove duplicates, keeping most recent
117+
jq 'sort_by(.date) | reverse | unique_by(.bssid)' "${CRACKED_FILE}.phase1" > "${CRACKED_FILE}.phase2"
60118

61-
if [ $REMOVED_COUNT -gt 0 ]; then
62-
echo -e "${YELLOW}[!]${NC} Removed $REMOVED_COUNT duplicate(s)"
119+
PHASE2_REMOVED=$(($(jq 'length' "${CRACKED_FILE}.phase1") - $(jq 'length' "${CRACKED_FILE}.phase2")))
120+
121+
if [ "$PHASE2_REMOVED" -gt 0 ]; then
122+
echo -e "${YELLOW}${NC} Removed $PHASE2_REMOVED duplicate BSSID(s) from $BSSID_DUPS network(s)"
63123

64124
# Show which BSSIDs had duplicates
65-
echo -e "${BLUE}[*]${NC} Duplicate BSSIDs removed:"
66-
jq -r '.[].bssid' "$CRACKED_FILE" | sort | uniq -d | while read -r bssid; do
67-
ESSID=$(jq -r ".[] | select(.bssid == \"$bssid\") | .essid" "$CRACKED_FILE" | head -1)
68-
echo -e " ${YELLOW}${NC} $bssid ($ESSID)"
125+
jq -r '.[].bssid' "${CRACKED_FILE}.phase1" | sort | uniq -d | head -10 | while read -r bssid; do
126+
ESSID=$(jq -r ".[] | select(.bssid == \"$bssid\") | .essid" "${CRACKED_FILE}.phase2" | head -1)
127+
COUNT=$(jq -r ".[] | select(.bssid == \"$bssid\") | .bssid" "${CRACKED_FILE}.phase1" | wc -l)
128+
echo -e "${CYAN} ${NC} $bssid ($ESSID) - had $COUNT entries"
69129
done
70130

71-
# Replace original file
72-
mv "${CRACKED_FILE}.tmp" "$CRACKED_FILE"
131+
# Show if there are more
132+
TOTAL_DUP_BSSIDS=$(jq -r '.[].bssid' "${CRACKED_FILE}.phase1" | sort | uniq -d | wc -l)
133+
if [ "$TOTAL_DUP_BSSIDS" -gt 10 ]; then
134+
echo -e "${CYAN}${NC} ... and $((TOTAL_DUP_BSSIDS - 10)) more"
135+
fi
136+
else
137+
echo -e "${GREEN}${NC} No duplicate BSSIDs found"
138+
fi
139+
140+
# ============================================================================
141+
# PHASE 3: Remove duplicate ESSID+Key combinations (aggressive mode)
142+
# ============================================================================
143+
if [ "$AGGRESSIVE_MODE" = true ]; then
144+
echo -e "${MAGENTA}[Phase 3]${NC} Removing duplicate ESSID+Key combinations..."
145+
146+
# Find duplicates before removal
147+
ESSID_KEY_DUPS=$(jq -r '
148+
[.[] | select(.key != null) | "\(.essid)|\(.key)"]
149+
| group_by(.)
150+
| map(select(length > 1))
151+
| length
152+
' "${CRACKED_FILE}.phase2")
153+
154+
# Remove duplicates based on ESSID+Key, keeping most recent
155+
jq 'sort_by(.date) | reverse | unique_by("\(.essid)|\(.key // "null")")' "${CRACKED_FILE}.phase2" > "${CRACKED_FILE}.phase3"
156+
157+
PHASE3_REMOVED=$(($(jq 'length' "${CRACKED_FILE}.phase2") - $(jq 'length' "${CRACKED_FILE}.phase3")))
158+
159+
if [ "$PHASE3_REMOVED" -gt 0 ]; then
160+
echo -e "${YELLOW}${NC} Removed $PHASE3_REMOVED duplicate ESSID+Key combination(s)"
161+
162+
# Show examples
163+
jq -r '.[] | select(.key != null) | "\(.essid)|\(.key)"' "${CRACKED_FILE}.phase2" | \
164+
sort | uniq -d | head -5 | while IFS='|' read -r essid key; do
165+
COUNT=$(jq -r ".[] | select(.essid == \"$essid\" and .key == \"$key\") | .bssid" "${CRACKED_FILE}.phase2" | wc -l)
166+
echo -e "${CYAN}${NC} $essid / $key - had $COUNT different BSSIDs"
167+
done
168+
else
169+
echo -e "${GREEN}${NC} No duplicate ESSID+Key combinations found"
170+
fi
171+
else
172+
echo -e "${MAGENTA}[Phase 3]${NC} Skipped (use --aggressive to enable ESSID+Key deduplication)"
173+
cp "${CRACKED_FILE}.phase2" "${CRACKED_FILE}.phase3"
174+
PHASE3_REMOVED=0
175+
fi
176+
177+
# ============================================================================
178+
# PHASE 4: Remove invalid entries
179+
# ============================================================================
180+
echo -e "${MAGENTA}[Phase 4]${NC} Removing invalid entries..."
181+
182+
# Remove entries with missing required fields
183+
jq '[.[] | select(
184+
.type != null and
185+
.date != null and
186+
.essid != null and
187+
.bssid != null and
188+
.bssid != "" and
189+
.essid != ""
190+
)]' "${CRACKED_FILE}.phase3" > "${CRACKED_FILE}.phase4"
191+
192+
PHASE4_REMOVED=$(($(jq 'length' "${CRACKED_FILE}.phase3") - $(jq 'length' "${CRACKED_FILE}.phase4")))
193+
194+
if [ "$PHASE4_REMOVED" -gt 0 ]; then
195+
echo -e "${YELLOW}${NC} Removed $PHASE4_REMOVED invalid/incomplete entry(ies)"
196+
else
197+
echo -e "${GREEN}${NC} No invalid entries found"
198+
fi
199+
200+
# ============================================================================
201+
# PHASE 5: Sort by date (newest first) and finalize
202+
# ============================================================================
203+
echo -e "${MAGENTA}[Phase 5]${NC} Sorting and finalizing..."
204+
205+
jq 'sort_by(.date) | reverse' "${CRACKED_FILE}.phase4" > "${CRACKED_FILE}.final"
206+
207+
# ============================================================================
208+
# Summary and Statistics
209+
# ============================================================================
210+
echo ""
211+
echo -e "${CYAN}╔════════════════════════════════════════════════╗${NC}"
212+
echo -e "${CYAN}║ Cleanup Summary ║${NC}"
213+
echo -e "${CYAN}╚════════════════════════════════════════════════╝${NC}"
214+
215+
FINAL_COUNT=$(jq 'length' "${CRACKED_FILE}.final")
216+
TOTAL_REMOVED=$((ORIGINAL_COUNT - FINAL_COUNT))
217+
218+
echo -e "${BLUE}[*]${NC} Original entries: $ORIGINAL_COUNT"
219+
echo -e "${BLUE}[*]${NC} Final entries: $FINAL_COUNT"
220+
echo -e "${BLUE}[*]${NC} Total removed: $TOTAL_REMOVED"
221+
echo ""
222+
echo -e "${BLUE}[*]${NC} Breakdown:"
223+
echo -e " ${CYAN}${NC} ESSID normalized: $PHASE1_CHANGES"
224+
echo -e " ${CYAN}${NC} Duplicate BSSIDs: $PHASE2_REMOVED"
225+
echo -e " ${CYAN}${NC} Duplicate ESSID+Key: $PHASE3_REMOVED"
226+
echo -e " ${CYAN}${NC} Invalid entries: $PHASE4_REMOVED"
227+
echo ""
228+
229+
# Statistics
230+
echo -e "${CYAN}╔════════════════════════════════════════════════╗${NC}"
231+
echo -e "${CYAN}║ Database Statistics ║${NC}"
232+
echo -e "${CYAN}╚════════════════════════════════════════════════╝${NC}"
233+
234+
# Count by type
235+
echo -e "${BLUE}[*]${NC} Entries by attack type:"
236+
jq -r '[.[] | .type] | group_by(.) | map({type: .[0], count: length}) | .[] | " \(.type): \(.count)"' "${CRACKED_FILE}.final"
237+
238+
echo ""
239+
echo -e "${BLUE}[*]${NC} Unique networks (ESSID): $(jq -r '[.[].essid] | unique | length' "${CRACKED_FILE}.final")"
240+
echo -e "${BLUE}[*]${NC} Unique access points (BSSID): $(jq -r '[.[].bssid] | unique | length' "${CRACKED_FILE}.final")"
241+
echo -e "${BLUE}[*]${NC} Networks with passwords: $(jq -r '[.[] | select(.key != null)] | length' "${CRACKED_FILE}.final")"
242+
243+
# Top 5 most common passwords
244+
echo ""
245+
echo -e "${BLUE}[*]${NC} Top 5 most common passwords:"
246+
jq -r '[.[] | select(.key != null) | .key] | group_by(.) | map({key: .[0], count: length}) | sort_by(.count) | reverse | .[0:5] | .[] | " \(.count)x - \(.key)"' "${CRACKED_FILE}.final"
247+
248+
# ============================================================================
249+
# Finalize
250+
# ============================================================================
251+
echo ""
252+
253+
if [ "$TOTAL_REMOVED" -gt 0 ]; then
254+
mv "${CRACKED_FILE}.final" "$CRACKED_FILE"
73255
echo -e "${GREEN}[+]${NC} Successfully cleaned $CRACKED_FILE"
74-
echo -e "${GREEN}[+]${NC} Kept most recent entry for each BSSID"
256+
echo -e "${GREEN}[+]${NC} Removed $TOTAL_REMOVED duplicate/invalid entries"
75257
else
76-
echo -e "${GREEN}[+]${NC} No duplicates found - file is already clean"
77-
rm "${CRACKED_FILE}.tmp"
258+
echo -e "${GREEN}[+]${NC} Database is already clean - no changes needed"
259+
rm "${CRACKED_FILE}.final"
78260
fi
79261

80-
echo -e "${BLUE}[*]${NC} Done!"
262+
# Cleanup temporary files
263+
rm -f "${CRACKED_FILE}.phase"* 2>/dev/null
264+
265+
echo -e "${BLUE}[*]${NC} Backup saved as: $BACKUP_FILE"
266+
echo -e "${GREEN}[✓]${NC} Done!"
267+
echo ""

0 commit comments

Comments
 (0)