Skip to content

Commit 0cf110e

Browse files
Merge branch 'master' into fix/remediate-blitztactics
2 parents 876e58b + a88adb0 commit 0cf110e

File tree

9 files changed

+341
-255
lines changed

9 files changed

+341
-255
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
### REPOSITORY
2-
/.github/CODEOWNERS @sdushantha
2+
/.github/CODEOWNERS @sdushantha @ppfeister
33
/.github/FUNDING.yml @sdushantha
44
/LICENSE @sdushantha
55

.github/workflows/regression.yml

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ on:
1111
- '**/*.py'
1212
- '**/*.ini'
1313
- '**/*.toml'
14+
- 'Dockerfile'
1415
push:
1516
branches:
1617
- master
@@ -21,11 +22,13 @@ on:
2122
- '**/*.py'
2223
- '**/*.ini'
2324
- '**/*.toml'
25+
- 'Dockerfile'
2426

2527
jobs:
2628
tox-lint:
27-
# Linting is ran through tox to ensure that the same linter is used by local runners
2829
runs-on: ubuntu-latest
30+
# Linting is ran through tox to ensure that the same linter
31+
# is used by local runners
2932
steps:
3033
- uses: actions/checkout@v4
3134
- name: Set up linting environment
@@ -41,7 +44,8 @@ jobs:
4144
tox-matrix:
4245
runs-on: ${{ matrix.os }}
4346
strategy:
44-
fail-fast: false # We want to know what specicic versions it fails on
47+
# We want to know what specicic versions it fails on
48+
fail-fast: false
4549
matrix:
4650
os: [
4751
ubuntu-latest,
@@ -67,3 +71,22 @@ jobs:
6771
pip install tox-gh-actions
6872
- name: Run tox
6973
run: tox
74+
docker-build-test:
75+
runs-on: ubuntu-latest
76+
steps:
77+
- name: Checkout code
78+
uses: actions/checkout@v4
79+
- name: Set up Docker Buildx
80+
uses: docker/setup-buildx-action@v3
81+
- name: Get version from pyproject.toml
82+
id: get-version
83+
run: |
84+
VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
85+
echo "version=$VERSION" >> $GITHUB_OUTPUT
86+
- name: Build Docker image
87+
run: |
88+
docker build \
89+
--build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
90+
-t sherlock-test:latest .
91+
- name: Test Docker image runs
92+
run: docker run --rm sherlock-test:latest --version

.github/workflows/validate_modified_targets.yml

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,38 +17,57 @@ jobs:
1717
- name: Checkout repository
1818
uses: actions/checkout@v5
1919
with:
20+
# Checkout the base branch but fetch all history to avoid a second fetch call
2021
ref: ${{ github.base_ref }}
21-
fetch-depth: 1
22+
fetch-depth: 0
2223

2324
- name: Set up Python
2425
uses: actions/setup-python@v6
2526
with:
26-
python-version: '3.13'
27+
python-version: "3.13"
2728

2829
- name: Install Poetry
2930
uses: abatilo/actions-poetry@v4
3031
with:
31-
poetry-version: 'latest'
32+
poetry-version: "latest"
3233

3334
- name: Install dependencies
3435
run: |
3536
poetry install --no-interaction --with dev
3637
37-
- name: Drop in place updated manifest from base
38+
- name: Prepare JSON versions for comparison
3839
run: |
39-
cp sherlock_project/resources/data.json data.json.base
40-
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1
41-
git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json
42-
cp sherlock_project/resources/data.json data.json.head
40+
# Fetch only the PR's branch head (single network call in this step)
41+
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
42+
43+
# Find the merge-base commit between the target branch and the PR branch
44+
MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
45+
echo "Comparing PR head against merge-base commit: $MERGE_BASE"
46+
47+
# Safely extract the file from the PR's head and the merge-base commit
48+
git show pr:sherlock_project/resources/data.json > data.json.head
49+
git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
50+
51+
# CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
52+
# This ensures that pytest runs against the new, updated file.
53+
cp data.json.head sherlock_project/resources/data.json
4354
4455
- name: Discover modified targets
4556
id: discover-modified
4657
run: |
4758
CHANGED=$(
4859
python - <<'EOF'
4960
import json
50-
with open("data.json.base") as f: base = json.load(f)
51-
with open("data.json.head") as f: head = json.load(f)
61+
import sys
62+
try:
63+
with open("data.json.base") as f: base = json.load(f)
64+
with open("data.json.head") as f: head = json.load(f)
65+
except FileNotFoundError as e:
66+
print(f"Error: Could not find {e.filename}", file=sys.stderr)
67+
sys.exit(1)
68+
except json.JSONDecodeError as e:
69+
print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
70+
sys.exit(1)
5271
5372
changed = []
5473
for k, v in head.items():
@@ -63,6 +82,8 @@ jobs:
6382
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
6483
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
6584
85+
# --- The rest of the steps below are unchanged ---
86+
6687
- name: Validate modified targets
6788
if: steps.discover-modified.outputs.changed_targets != ''
6889
continue-on-error: true

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# 3. Build image with BOTH latest and version tags
55
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
66

7-
FROM python:3.12-slim-bullseye as build
7+
FROM python:3.12-slim-bullseye AS build
88
WORKDIR /sherlock
99

1010
RUN pip3 install --no-cache-dir --upgrade pip

pyproject.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,10 @@ PySocks = "^1.7.0"
4646
requests = "^2.22.0"
4747
requests-futures = "^1.0.0"
4848
stem = "^1.8.0"
49-
torrequest = "^0.1.0"
5049
pandas = "^2.2.1"
5150
openpyxl = "^3.0.10"
5251
tomli = "^2.2.1"
5352

54-
[tool.poetry.extras]
55-
tor = ["torrequest"]
56-
5753
[tool.poetry.group.dev.dependencies]
5854
jsonschema = "^4.0.0"
5955
rstr = "^3.2.2"

sherlock_project/resources/data.json

Lines changed: 67 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,13 @@
7979
"username_claimed": "pink"
8080
},
8181
"AllMyLinks": {
82-
"errorMsg": "Not Found",
83-
"errorType": "message",
84-
"regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
85-
"url": "https://allmylinks.com/{}",
86-
"urlMain": "https://allmylinks.com/",
87-
"username_claimed": "blue"
88-
},
82+
"errorMsg": "Page not found",
83+
"errorType": "message",
84+
"regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
85+
"url": "https://allmylinks.com/{}",
86+
"urlMain": "https://allmylinks.com/",
87+
"username_claimed": "blue"
88+
},
8989
"AniWorld": {
9090
"errorMsg": "Dieses Profil ist nicht verf\u00fcgbar",
9191
"errorType": "message",
@@ -115,7 +115,7 @@
115115
"username_claimed": "lio24d"
116116
},
117117
"Apple Discussions": {
118-
"errorMsg": "The page you tried was not found. You may have used an outdated link or may have typed the address (URL) incorrectly.",
118+
"errorMsg": "Looking for something in Apple Support Communities?",
119119
"errorType": "message",
120120
"url": "https://discussions.apple.com/profile/{}",
121121
"urlMain": "https://discussions.apple.com",
@@ -279,14 +279,7 @@
279279
"urlMain": "https://bsky.app/",
280280
"username_claimed": "mcuban"
281281
},
282-
"BoardGameGeek": {
283-
"errorType": "status_code",
284-
"regexCheck": "^[a-zA-Z0-9_]*$",
285-
"errorMsg": "User not found",
286-
"url": "https://boardgamegeek.com/user/{}",
287-
"urlMain": "https://boardgamegeek.com",
288-
"username_claimed": "blue"
289-
},
282+
290283
"BongaCams": {
291284
"errorType": "status_code",
292285
"isNSFW": true,
@@ -300,6 +293,14 @@
300293
"urlMain": "https://www.bookcrossing.com/",
301294
"username_claimed": "blue"
302295
},
296+
"BoardGameGeek": {
297+
"errorMsg": "\"isValid\":true",
298+
"errorType": "message",
299+
"url": "https://boardgamegeek.com/user/{}",
300+
"urlMain": "https://boardgamegeek.com/",
301+
"urlProbe": "https://api.geekdo.com/api/accounts/validate/username?username={}",
302+
"username_claimed": "blue"
303+
},
303304
"BraveCommunity": {
304305
"errorType": "status_code",
305306
"url": "https://community.brave.com/u/{}/",
@@ -573,8 +574,7 @@
573574
"username_claimed": "brown"
574575
},
575576
"CyberDefenders": {
576-
"errorMsg": "<title>Blue Team Training for SOC analysts and DFIR - CyberDefenders</title>",
577-
"errorType": "message",
577+
"errorType": "status_code",
578578
"regexCheck": "^[^\\/:*?\"<>|@]{3,50}$",
579579
"request_method": "GET",
580580
"url": "https://cyberdefenders.org/p/{}",
@@ -601,6 +601,12 @@
601601
"urlMain": "https://www.dailymotion.com/",
602602
"username_claimed": "blue"
603603
},
604+
"dcinside": {
605+
"errorType": "status_code",
606+
"url": "https://gallog.dcinside.com/{}",
607+
"urlMain": "https://www.dcinside.com/",
608+
"username_claimed": "anrbrb"
609+
},
604610
"Dealabs": {
605611
"errorMsg": "La page que vous essayez",
606612
"errorType": "message",
@@ -609,13 +615,14 @@
609615
"urlMain": "https://www.dealabs.com/",
610616
"username_claimed": "blue"
611617
},
612-
"DeviantART": {
613-
"errorType": "status_code",
614-
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
615-
"url": "https://{}.deviantart.com",
616-
"urlMain": "https://deviantart.com",
617-
"username_claimed": "blue"
618-
},
618+
"DeviantArt": {
619+
"errorType": "message",
620+
"errorMsg": "Llama Not Found",
621+
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
622+
"url": "https://www.deviantart.com/{}",
623+
"urlMain": "https://www.deviantart.com/",
624+
"username_claimed": "blue"
625+
},
619626
"DigitalSpy": {
620627
"errorMsg": "The page you were looking for could not be found.",
621628
"errorType": "message",
@@ -1441,12 +1448,12 @@
14411448
"username_claimed": "blue"
14421449
},
14431450
"Mydramalist": {
1444-
"errorMsg": "Sign in - MyDramaList",
1445-
"errorType": "message",
1446-
"url": "https://www.mydramalist.com/profile/{}",
1447-
"urlMain": "https://mydramalist.com",
1448-
"username_claimed": "elhadidy12398"
1449-
},
1451+
"errorMsg": "The requested page was not found",
1452+
"errorType": "message",
1453+
"url": "https://www.mydramalist.com/profile/{}",
1454+
"urlMain": "https://mydramalist.com",
1455+
"username_claimed": "elhadidy12398"
1456+
},
14501457
"Myspace": {
14511458
"errorType": "status_code",
14521459
"url": "https://myspace.com/{}",
@@ -1460,6 +1467,13 @@
14601467
"urlMain": "https://www.native-instruments.com/forum/",
14611468
"username_claimed": "jambert"
14621469
},
1470+
"namuwiki": {
1471+
"__comment__": "This is a Korean site and it's expected to return false negatives in certain other regions.",
1472+
"errorType": "status_code",
1473+
"url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}",
1474+
"urlMain": "https://namu.wiki/",
1475+
"username_claimed": "namu"
1476+
},
14631477
"NationStates Nation": {
14641478
"errorMsg": "Was this your nation? It may have ceased to exist due to inactivity, but can rise again!",
14651479
"errorType": "message",
@@ -1810,8 +1824,7 @@
18101824
"username_claimed": "blue"
18111825
},
18121826
"Roblox": {
1813-
"errorMsg": "Page cannot be found or no longer exists",
1814-
"errorType": "message",
1827+
"errorType": "status_code",
18151828
"url": "https://www.roblox.com/user.aspx?username={}",
18161829
"urlMain": "https://www.roblox.com/",
18171830
"username_claimed": "bluewolfekiller"
@@ -1919,7 +1932,7 @@
19191932
},
19201933
"SlideShare": {
19211934
"errorType": "message",
1922-
"errorMsg": "<title>Username available</title>",
1935+
"errorMsg": "<title>Page no longer exists</title>",
19231936
"url": "https://slideshare.net/{}",
19241937
"urlMain": "https://slideshare.net/",
19251938
"username_claimed": "blue"
@@ -1953,6 +1966,13 @@
19531966
"urlMain": "https://www.snapchat.com",
19541967
"username_claimed": "teamsnapchat"
19551968
},
1969+
"SOOP": {
1970+
"errorType": "status_code",
1971+
"url": "https://www.sooplive.co.kr/station/{}",
1972+
"urlMain": "https://www.sooplive.co.kr/",
1973+
"urlProbe": "https://api-channel.sooplive.co.kr/v1.1/channel/{}/station",
1974+
"username_claimed": "udkn"
1975+
},
19561976
"SoundCloud": {
19571977
"errorType": "status_code",
19581978
"url": "https://soundcloud.com/{}",
@@ -2120,6 +2140,12 @@
21202140
"urlMain": "https://themeforest.net/",
21212141
"username_claimed": "user"
21222142
},
2143+
"tistory": {
2144+
"errorType": "status_code",
2145+
"url": "https://{}.tistory.com/",
2146+
"urlMain": "https://www.tistory.com/",
2147+
"username_claimed": "notice"
2148+
},
21232149
"TnAFlix": {
21242150
"errorType": "status_code",
21252151
"isNSFW": true,
@@ -2296,6 +2322,12 @@
22962322
"urlMain": "https://discourse.wicg.io/",
22972323
"username_claimed": "stefano"
22982324
},
2325+
"Wakatime": {
2326+
"errorType": "status_code",
2327+
"url": "https://wakatime.com/@{}",
2328+
"urlMain": "https://wakatime.com/",
2329+
"username_claimed": "blue"
2330+
},
22992331
"Warrior Forum": {
23002332
"errorType": "status_code",
23012333
"url": "https://www.warriorforum.com/members/{}.html",
@@ -2794,7 +2826,7 @@
27942826
"username_claimed": "green"
27952827
},
27962828
"threads": {
2797-
"errorMsg": "<title>Threads</title>",
2829+
"errorMsg": "<title>Threads • Log in</title>",
27982830
"errorType": "message",
27992831
"headers": {
28002832
"Sec-Fetch-Mode": "navigate"
@@ -2841,13 +2873,6 @@
28412873
"urlMain": "https://znanylekarz.pl",
28422874
"username_claimed": "janusz-nowak"
28432875
},
2844-
"Bluesky": {
2845-
"errorType": "status_code",
2846-
"url": "https://bsky.app/profile/{}.bsky.social",
2847-
"urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social",
2848-
"urlMain": "https://bsky.app/",
2849-
"username_claimed": "mcuban"
2850-
},
28512876
"Platzi": {
28522877
"errorType": "status_code",
28532878
"errorCode": 404,

0 commit comments

Comments
 (0)