Skip to content

Commit 37b3060

Browse files
committed
Merge branch 'master' into Issue#2442
2 parents 7afdee4 + d4d8e01 commit 37b3060

17 files changed

+625
-49
lines changed

.github/workflows/exclusions.yml

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
name: Exclusions Updater
2+
3+
on:
4+
schedule:
5+
#- cron: '0 5 * * 0' # Runs at 05:00 every Sunday
6+
- cron: '0 5 * * *' # Runs at 05:00 every day
7+
workflow_dispatch:
8+
9+
jobs:
10+
update-exclusions:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- name: Checkout repository
14+
uses: actions/checkout@v5
15+
16+
- name: Set up Python
17+
uses: actions/setup-python@v6
18+
with:
19+
python-version: '3.13'
20+
21+
- name: Install Poetry
22+
uses: abatilo/actions-poetry@v4
23+
with:
24+
poetry-version: 'latest'
25+
26+
- name: Install dependencies
27+
run: |
28+
poetry install --no-interaction --with dev
29+
30+
- name: Run false positive tests
31+
run: |
32+
$(poetry env activate)
33+
pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt
34+
deactivate
35+
36+
- name: Parse false positive detections by desired categories
37+
run: |
38+
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \
39+
| sort -u > false_positive_exclusions.txt
40+
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \
41+
| sort -u > waf_hits.txt
42+
43+
- name: Detect if exclusions list changed
44+
id: detect_changes
45+
run: |
46+
git fetch origin exclusions || true
47+
48+
if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
49+
# If the exclusions branch and file exist, compare
50+
if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
51+
echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
52+
else
53+
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
54+
fi
55+
else
56+
# If the exclusions branch or file do not exist, treat as changed
57+
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
58+
fi
59+
60+
- name: Quantify and display results
61+
run: |
62+
FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs)
63+
WAF_COUNT=$(wc -l < waf_hits.txt | xargs)
64+
echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits."
65+
echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt
66+
echo ">>> WAF hits:" && cat waf_hits.txt
67+
68+
- name: Commit and push exclusions list
69+
if: steps.detect_changes.outputs.exclusions_changed == 'true'
70+
run: |
71+
git config user.name "Paul Pfeister (automation)"
72+
git config user.email "[email protected]"
73+
74+
mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
75+
76+
git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
77+
git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
78+
79+
git fetch origin exclusions || true # Allows creation of branch if deleted
80+
git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)
81+
82+
git stash pop || true
83+
84+
mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
85+
86+
git rm -f false_positive_exclusions.txt.tmp || true
87+
git add false_positive_exclusions.txt
88+
git commit -m "auto: update exclusions list" || echo "No changes to commit"
89+
git push origin exclusions

.github/workflows/regression.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ jobs:
4949
macos-latest,
5050
]
5151
python-version: [
52-
'3.9',
5352
'3.10',
5453
'3.11',
5554
'3.12',
55+
'3.13',
5656
]
5757
steps:
5858
- uses: actions/checkout@v4
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
name: Modified Target Validation
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- master
7+
paths:
8+
- "sherlock_project/resources/data.json"
9+
10+
jobs:
11+
validate-modified-targets:
12+
runs-on: ubuntu-latest
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v5
16+
with:
17+
ref: ${{ github.event.pull_request.head.sha }}
18+
fetch-depth: 0
19+
20+
- name: Set up Python
21+
uses: actions/setup-python@v6
22+
with:
23+
python-version: '3.13'
24+
25+
- name: Install Poetry
26+
uses: abatilo/actions-poetry@v4
27+
with:
28+
poetry-version: 'latest'
29+
30+
- name: Install dependencies
31+
run: |
32+
poetry install --no-interaction --with dev
33+
34+
- name: Discover modified targets
35+
id: discover-modified
36+
run: |
37+
# Fetch the upstream branch
38+
git fetch origin ${{ github.base_ref }} --depth=1
39+
40+
# Discover changes
41+
git show origin/${{ github.base_ref }}:sherlock_project/resources/data.json > data.json.base
42+
cp sherlock_project/resources/data.json data.json.head
43+
44+
CHANGED=$(
45+
python - <<'EOF'
46+
import json
47+
with open("data.json.base") as f: base = json.load(f)
48+
with open("data.json.head") as f: head = json.load(f)
49+
50+
changed = []
51+
for k, v in head.items():
52+
if k not in base or base[k] != v:
53+
changed.append(k)
54+
55+
print(",".join(sorted(changed)))
56+
EOF
57+
)
58+
59+
# Preserve changelist
60+
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
61+
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
62+
63+
- name: Validate modified targets
64+
if: steps.discover-modified.outputs.changed_targets != ''
65+
continue-on-error: true
66+
run: |
67+
$(poetry env activate)
68+
pytest -q --tb no -rA -m validate_targets -n 20 --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" --junitxml=validation_results.xml
69+
deactivate
70+
71+
- name: Prepare validation summary
72+
if: steps.discover-modified.outputs.changed_targets != ''
73+
id: prepare-summary
74+
run: |
75+
$(poetry env activate)
76+
summary=$(
77+
python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
78+
)
79+
deactivate
80+
echo "$summary" > validation_summary.md
81+
82+
- name: Announce validation results
83+
if: steps.discover-modified.outputs.changed_targets != ''
84+
uses: actions/github-script@v8
85+
with:
86+
script: |
87+
const fs = require('fs');
88+
const body = fs.readFileSync('validation_summary.md', 'utf8');
89+
github.rest.issues.createComment({
90+
issue_number: context.payload.pull_request.number,
91+
owner: context.repo.owner,
92+
repo: context.repo.repo,
93+
body: body,
94+
});
95+
96+
- name: This step shows as ran when no modifications are found
97+
if: steps.discover-modified.outputs.changed_targets == ''
98+
run: |
99+
echo "No modified targets found"

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py
33
# 2. Update the VCS_REF tag to match the tagged version's FULL commit hash
44
# 3. Build image with BOTH latest and version tags
5-
# i.e. `docker build -t sherlock/sherlock:0.15.0 -t sherlock/sherlock:latest .`
5+
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
66

77
FROM python:3.12-slim-bullseye as build
88
WORKDIR /sherlock

devel/summarize_site_validation.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env python
2+
# This module summarizes the results of site validation tests queued by
3+
# workflow validate_modified_targets for presentation in Issue comments.
4+
5+
from defusedxml import ElementTree as ET
6+
import sys
7+
from pathlib import Path
8+
9+
def summarize_junit_xml(xml_path: Path) -> str:
10+
tree = ET.parse(xml_path)
11+
root = tree.getroot()
12+
suite = root.find('testsuite')
13+
14+
pass_message: str = ":heavy_check_mark: &nbsp; Pass"
15+
fail_message: str = ":x: &nbsp; Fail"
16+
17+
if suite is None:
18+
raise ValueError("Invalid JUnit XML: No testsuite found")
19+
20+
summary_lines: list[str] = []
21+
summary_lines.append("#### Automatic validation of changes\n")
22+
summary_lines.append("| Target | F+ Check | F- Check |")
23+
summary_lines.append("|---|---|---|")
24+
25+
failures = int(suite.get('failures', 0))
26+
errors_detected: bool = False
27+
28+
results: dict[str, dict[str, str]] = {}
29+
30+
for testcase in suite.findall('testcase'):
31+
test_name = testcase.get('name').split('[')[0]
32+
site_name = testcase.get('name').split('[')[1].rstrip(']')
33+
failure = testcase.find('failure')
34+
error = testcase.find('error')
35+
36+
if site_name not in results:
37+
results[site_name] = {}
38+
39+
if test_name == "test_false_neg":
40+
results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
41+
elif test_name == "test_false_pos":
42+
results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
43+
44+
if error is not None:
45+
errors_detected = True
46+
47+
for result in results:
48+
summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
49+
50+
if failures > 0:
51+
summary_lines.append("\n___\n" +
52+
"\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
53+
" will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
54+
55+
if errors_detected:
56+
summary_lines.append("\n___\n" +
57+
"\n**Errors were detected during validation. Please review the workflow logs.**")
58+
59+
return "\n".join(summary_lines)
60+
61+
if __name__ == "__main__":
62+
if len(sys.argv) != 2:
63+
print("Usage: summarize_site_validation.py <junit-xml-file>")
64+
sys.exit(1)
65+
66+
xml_path: Path = Path(sys.argv[1])
67+
if not xml_path.is_file():
68+
print(f"Error: File '{xml_path}' does not exist.")
69+
sys.exit(1)
70+
71+
summary: str = summarize_junit_xml(xml_path)
72+
print(summary)

docs/README.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
<p align=center>
1+
<p align="center">
22
<br>
3-
<a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png"/></a>
3+
<a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png" alt="sherlock"/></a>
44
<br>
55
<span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span>
66
<br>
@@ -15,8 +15,7 @@
1515
</p>
1616

1717
<p align="center">
18-
<img width="70%" height="70%" src="images/demo.png"/>
19-
</a>
18+
<img width="70%" height="70%" src="images/demo.png" alt="demo"/>
2019
</p>
2120

2221

@@ -112,17 +111,17 @@ $ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock
112111
"https://www.1337x.to/user/user123/",
113112
...
114113
]
115-
}]s
114+
}]
116115
```
117116

118-
Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmaticaly via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).
117+
Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).
119118

120119
## Credits
121120

122121
Thank you to everyone who has contributed to Sherlock! ❤️
123122

124123
<a href="https://github.com/sherlock-project/sherlock/graphs/contributors">
125-
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" noZoom />
124+
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
126125
</a>
127126

128127
## Star history

docs/removed-sites.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,3 +1982,16 @@ __2025-02-16 :__ Unsure if any way to view profiles exists now
19821982
"username_claimed": "t3dotgg"
19831983
}
19841984
```
1985+
1986+
## TorrentGalaxy
1987+
__2025-07-06 :__ Site appears to have gone offline in March and hasn't come back
1988+
```json
1989+
"TorrentGalaxy": {
1990+
"errorMsg": "<title>TGx:Can't show details</title>",
1991+
"errorType": "message",
1992+
"regexCheck": "^[A-Za-z0-9]{3,15}$",
1993+
"url": "https://torrentgalaxy.to/profile/{}",
1994+
"urlMain": "https://torrentgalaxy.to/",
1995+
"username_claimed": "GalaxyRG"
1996+
},
1997+
```

pyproject.toml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ source = "init"
88

99
[tool.poetry]
1010
name = "sherlock-project"
11-
# single source of truth for version is __init__.py
12-
version = "0"
11+
version = "0.16.0"
1312
description = "Hunt down social media accounts by username across social networks"
1413
license = "MIT"
1514
authors = [
@@ -50,12 +49,20 @@ stem = "^1.8.0"
5049
torrequest = "^0.1.0"
5150
pandas = "^2.2.1"
5251
openpyxl = "^3.0.10"
52+
tomli = "^2.2.1"
5353

5454
[tool.poetry.extras]
5555
tor = ["torrequest"]
5656

5757
[tool.poetry.group.dev.dependencies]
5858
jsonschema = "^4.0.0"
59+
rstr = "^3.2.2"
60+
pytest = "^8.4.2"
61+
pytest-xdist = "^3.8.0"
62+
63+
64+
[tool.poetry.group.ci.dependencies]
65+
defusedxml = "^0.7.1"
5966

6067
[tool.poetry.scripts]
6168
sherlock = 'sherlock_project.sherlock:main'

pytest.ini

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
[pytest]
2-
addopts = --strict-markers
2+
addopts = --strict-markers -m "not validate_targets"
33
markers =
44
online: mark tests are requiring internet access.
5+
validate_targets: mark tests for sweeping manifest validation (sends many requests).
6+
validate_targets_fp: validate_targets, false positive tests only.
7+
validate_targets_fn: validate_targets, false negative tests only.

0 commit comments

Comments
 (0)