Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,11 @@ usage: sherlock [-h] [--version] [--verbose] [--folderoutput FOLDEROUTPUT]
[--output OUTPUT] [--tor] [--unique-tor] [--csv] [--xlsx]
[--site SITE_NAME] [--proxy PROXY_URL] [--json JSON_FILE]
[--timeout TIMEOUT] [--print-all] [--print-found] [--no-color]
[--browse] [--local] [--nsfw]
[--browse] [--local] [--nsfw] [--no-cache] [--force-check]
Copy link
Member

@ppfeister ppfeister Oct 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bikeshed:

Could we do --skip-cache and --ignore-cache? I feel like that just removes some ambiguity.

Like, what does --force-check even do? Of course I want to check these usernames. Maybe it bypasses username validation? (of course we know what it does)

Open to hearing your thoughts.

[--cache-duration CACHE_DURATION]
USERNAMES [USERNAMES ...]

Sherlock: Find Usernames Across Social Networks (Version 0.14.3)
Sherlock: Find Usernames Across Social Networks (Version 0.16.0)

positional arguments:
USERNAMES One or more usernames to check with social networks.
Expand Down Expand Up @@ -96,6 +97,10 @@ optional arguments:
--browse, -b Browse to all results on default browser.
--local, -l Force the use of the local data.json file.
--nsfw Include checking of NSFW sites from default list.
--no-cache Disable caching of results (don't read or write cache)
--force-check Ignore cached results and force fresh checks for all sites
--cache-duration CACHE_DURATION
Cache duration in seconds (default: 86400 = 24 hours)
```
## Apify Actor Usage [![Sherlock Actor](https://apify.com/actor-badge?actor=netmilk/sherlock)](https://apify.com/netmilk/sherlock?fpr=sherlock)

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,4 @@ defusedxml = "^0.7.1"

[tool.poetry.scripts]
sherlock = 'sherlock_project.sherlock:main'
sherlock-cache = "sherlock_project.cache_cli:main"
181 changes: 181 additions & 0 deletions sherlock_project/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""
Sherlock Cache Module
This module handles SQLite-based caching for username lookup results.
"""

import sqlite3
import time
from pathlib import Path
from typing import Optional, Dict, Any
from sherlock_project.result import QueryStatus


class SherlockCache:
"""Manages SQLite cache for Sherlock results."""

def __init__(self, cache_path: Optional[str] = None, cache_duration: int = 86400):
"""
Initialize the cache.
Args:
cache_path: Path to SQLite database file. Defaults to ~/.sherlock_cache.db
cache_duration: Time in seconds to cache results. Default: 86400 (24 hours)
"""
if cache_path is None:
cache_dir = Path.home() / ".sherlock"
cache_dir.mkdir(exist_ok=True)
cache_path = str(cache_dir / "cache.db")

self.cache_path = cache_path
self.cache_duration = cache_duration
self._init_database()

def _init_database(self):
"""Initialize the SQLite database with required tables."""
conn = sqlite3.connect(self.cache_path)
cursor = conn.cursor()

cursor.execute('''
CREATE TABLE IF NOT EXISTS results (
username TEXT NOT NULL,
site TEXT NOT NULL,
status TEXT NOT NULL,
url TEXT,
timestamp INTEGER NOT NULL,
PRIMARY KEY (username, site)
)
''')

# Create index for faster lookups
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_timestamp
ON results(timestamp)
''')

conn.commit()
conn.close()

def get(self, username: str, site: str) -> Optional[Dict[str, Any]]:
"""
Retrieve cached result for a username on a specific site.
Args:
username: The username to lookup
site: The site name
Returns:
Dictionary with cached result or None if not cached/expired
"""
conn = sqlite3.connect(self.cache_path)
cursor = conn.cursor()

cursor.execute('''
SELECT status, url, timestamp FROM results
WHERE username = ? AND site = ?
''', (username, site))

result = cursor.fetchone()
conn.close()

if result is None:
return None

status, url, timestamp = result
current_time = int(time.time())

# Check if cache is expired
if current_time - timestamp > self.cache_duration:
return None

return {
'status': QueryStatus[status],
'url': url,
'timestamp': timestamp
}

def set(self, username: str, site: str, status: QueryStatus,
url: Optional[str] = None):
"""
Store result in cache.
Args:
username: The username
site: The site name
status: Query status
url: URL of the found profile (if applicable)
"""
conn = sqlite3.connect(self.cache_path)
cursor = conn.cursor()

current_time = int(time.time())

cursor.execute('''
INSERT OR REPLACE INTO results (username, site, status, url, timestamp)
VALUES (?, ?, ?, ?, ?)
''', (username, site, status.name, url, current_time))

conn.commit()
conn.close()

def clear(self, username: Optional[str] = None, site: Optional[str] = None):
"""
Clear cache entries.
Args:
username: Clear specific username (if None, clears all)
site: Clear specific site (if None, clears all)
"""
conn = sqlite3.connect(self.cache_path)
cursor = conn.cursor()

if username and site:
cursor.execute('DELETE FROM results WHERE username = ? AND site = ?',
(username, site))
elif username:
cursor.execute('DELETE FROM results WHERE username = ?', (username,))
elif site:
cursor.execute('DELETE FROM results WHERE site = ?', (site,))
else:
cursor.execute('DELETE FROM results')

conn.commit()
conn.close()

def cleanup_expired(self):
"""Remove expired entries from cache."""
conn = sqlite3.connect(self.cache_path)
cursor = conn.cursor()

current_time = int(time.time())
expiration_time = current_time - self.cache_duration

cursor.execute('DELETE FROM results WHERE timestamp < ?',
(expiration_time,))

conn.commit()
conn.close()

def get_stats(self) -> Dict[str, Any]:
"""Get cache statistics."""
conn = sqlite3.connect(self.cache_path)
cursor = conn.cursor()

cursor.execute('SELECT COUNT(*) FROM results')
total = cursor.fetchone()[0]

current_time = int(time.time())
expiration_time = current_time - self.cache_duration

cursor.execute('SELECT COUNT(*) FROM results WHERE timestamp >= ?',
(expiration_time,))
valid = cursor.fetchone()[0]

conn.close()

return {
'total_entries': total,
'valid_entries': valid,
'expired_entries': total - valid,
'cache_path': self.cache_path
}
77 changes: 77 additions & 0 deletions sherlock_project/cache_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env python3
"""
Sherlock Cache Management CLI
Utility for managing Sherlock's SQLite cache.
"""

import argparse
import sys
from sherlock_project.cache import SherlockCache
from colorama import Fore, Style


def main():
"""Main entry point for cache management CLI."""
parser = argparse.ArgumentParser(
prog="sherlock-cache",
description="Manage Sherlock's result cache"
)

subparsers = parser.add_subparsers(dest="command", help="Cache management commands")

# Clear command
clear_parser = subparsers.add_parser("clear", help="Clear cache entries")
clear_parser.add_argument(
"--username",
help="Clear cache for specific username only"
)
clear_parser.add_argument(
"--site",
help="Clear cache for specific site only"
)

# Stats command
subparsers.add_parser("stats", help="Show cache statistics")

# Cleanup command
subparsers.add_parser("cleanup", help="Remove expired cache entries")

args = parser.parse_args()

if not args.command:
parser.print_help()
sys.exit(1)

cache = SherlockCache()

if args.command == "clear":
username = getattr(args, 'username', None)
site = getattr(args, 'site', None)

cache.clear(username=username, site=site)

if username and site:
print(f"{Fore.GREEN}{Style.RESET_ALL} Cleared cache for {username} on {site}")
elif username:
print(f"{Fore.GREEN}{Style.RESET_ALL} Cleared all cache for username: {username}")
elif site:
print(f"{Fore.GREEN}{Style.RESET_ALL} Cleared all cache for site: {site}")
else:
print(f"{Fore.GREEN}{Style.RESET_ALL} Cleared entire cache")

elif args.command == "stats":
stats = cache.get_stats()
print(f"\n{Style.BRIGHT}Cache Statistics:{Style.RESET_ALL}")
print(f" Cache Path: {stats['cache_path']}")
print(f" Total Entries: {stats['total_entries']}")
print(f" Valid Entries: {Fore.GREEN}{stats['valid_entries']}{Style.RESET_ALL}")
print(f" Expired Entries: {Fore.YELLOW}{stats['expired_entries']}{Style.RESET_ALL}\n")

elif args.command == "cleanup":
cache.cleanup_expired()
print(f"{Fore.GREEN}{Style.RESET_ALL} Cleaned up expired cache entries")


if __name__ == "__main__":
main()
Loading