Skip to content

Commit 5b38e94

Browse files
A
feat: Implement site name aliases
1 parent f60de0d commit 5b38e94

File tree

4 files changed

+169
-153
lines changed

4 files changed

+169
-153
lines changed

.actor/dataset_schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
"fields": [
3030
"username",
3131
"links"
32-
],
32+
]
3333
},
3434
"display": {
3535
"component": "table",

sherlock_project/resources/data.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2263,6 +2263,7 @@
22632263
"<div class=\"error-panel\"><span>User ",
22642264
"<title>429 Too Many Requests</title>"
22652265
],
2266+
"aliases": ["X"],
22662267
"errorType": "message",
22672268
"regexCheck": "^[a-zA-Z0-9_]{1,15}$",
22682269
"url": "https://x.com/{}",
Lines changed: 147 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -1,149 +1,154 @@
11
{
2-
"$schema": "https://json-schema.org/draft/2020-12/schema",
3-
"title": "Sherlock Target Manifest",
4-
"description": "Social media targets to probe for the existence of known usernames",
5-
"type": "object",
6-
"properties": {
7-
"$schema": { "type": "string" }
8-
},
9-
"patternProperties": {
10-
"^(?!\\$).*?$": {
11-
"type": "object",
12-
"description": "Target name and associated information (key should be human readable name)",
13-
"required": ["url", "urlMain", "errorType", "username_claimed"],
14-
"properties": {
15-
"url": { "type": "string" },
16-
"urlMain": { "type": "string" },
17-
"urlProbe": { "type": "string" },
18-
"username_claimed": { "type": "string" },
19-
"regexCheck": { "type": "string" },
20-
"isNSFW": { "type": "boolean" },
21-
"headers": { "type": "object" },
22-
"request_payload": { "type": "object" },
23-
"__comment__": {
24-
"type": "string",
25-
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
26-
},
27-
"tags": {
28-
"oneOf": [
29-
{ "$ref": "#/$defs/tag" },
30-
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
31-
]
32-
},
33-
"request_method": {
34-
"type": "string",
35-
"enum": ["GET", "POST", "HEAD", "PUT"]
36-
},
37-
"errorType": {
38-
"oneOf": [
39-
{
40-
"type": "string",
41-
"enum": ["message", "response_url", "status_code"]
42-
},
43-
{
44-
"type": "array",
45-
"items": {
46-
"type": "string",
47-
"enum": ["message", "response_url", "status_code"]
48-
}
49-
}
50-
]
51-
},
52-
"errorMsg": {
53-
"oneOf": [
54-
{ "type": "string" },
55-
{ "type": "array", "items": { "type": "string" } }
56-
]
57-
},
58-
"errorCode": {
59-
"oneOf": [
60-
{ "type": "integer" },
61-
{ "type": "array", "items": { "type": "integer" } }
62-
]
63-
},
64-
"errorUrl": { "type": "string" },
65-
"response_url": { "type": "string" }
66-
},
67-
"dependencies": {
68-
"errorMsg": {
69-
"oneOf": [
70-
{ "properties": { "errorType": { "const": "message" } } },
71-
{
72-
"properties": {
73-
"errorType": {
74-
"type": "array",
75-
"contains": { "const": "message" }
76-
}
77-
}
78-
}
79-
]
80-
},
81-
"errorUrl": {
82-
"oneOf": [
83-
{ "properties": { "errorType": { "const": "response_url" } } },
84-
{
85-
"properties": {
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"title": "Sherlock Target Manifest",
4+
"description": "Social media targets to probe for the existence of known usernames",
5+
"type": "object",
6+
"properties": {
7+
"$schema": { "type": "string" }
8+
},
9+
"patternProperties": {
10+
"^(?!\\$).*?$": {
11+
"type": "object",
12+
"description": "Target name and associated information (key should be human readable name)",
13+
"required": ["url", "urlMain", "errorType", "username_claimed"],
14+
"properties": {
15+
"url": { "type": "string" },
16+
"urlMain": { "type": "string" },
17+
"urlProbe": { "type": "string" },
18+
"username_claimed": { "type": "string" },
19+
"username_unclaimed": { "type": "string" },
20+
"regexCheck": { "type": "string" },
21+
"isNSFW": { "type": "boolean" },
22+
"headers": { "type": "object" },
23+
"request_payload": { "type": "object" },
24+
"__comment__": {
25+
"type": "string",
26+
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
27+
},
28+
"tags": {
29+
"oneOf": [
30+
{ "$ref": "#/$defs/tag" },
31+
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
32+
]
33+
},
34+
"request_method": {
35+
"type": "string",
36+
"enum": ["GET", "POST", "HEAD", "PUT"]
37+
},
8638
"errorType": {
87-
"type": "array",
88-
"contains": { "const": "response_url" }
89-
}
90-
}
91-
}
92-
]
93-
},
94-
"errorCode": {
95-
"oneOf": [
96-
{ "properties": { "errorType": { "const": "status_code" } } },
97-
{
98-
"properties": {
99-
"errorType": {
100-
"type": "array",
101-
"contains": { "const": "status_code" }
102-
}
103-
}
104-
}
105-
]
106-
}
107-
},
108-
"allOf": [
109-
{
110-
"if": {
111-
"anyOf": [
112-
{ "properties": { "errorType": { "const": "message" } } },
113-
{
114-
"properties": {
115-
"errorType": {
39+
"oneOf": [
40+
{
41+
"type": "string",
42+
"enum": ["message", "response_url", "status_code"]
43+
},
44+
{
45+
"type": "array",
46+
"items": {
47+
"type": "string",
48+
"enum": ["message", "response_url", "status_code"]
49+
}
50+
}
51+
]
52+
},
53+
"errorMsg": {
54+
"oneOf": [
55+
{ "type": "string" },
56+
{ "type": "array", "items": { "type": "string" } }
57+
]
58+
},
59+
"errorCode": {
60+
"oneOf": [
61+
{ "type": "integer" },
62+
{ "type": "array", "items": { "type": "integer" } }
63+
]
64+
},
65+
"errorUrl": { "type": "string" },
66+
"response_url": { "type": "string" },
67+
"aliases": {
11668
"type": "array",
117-
"contains": { "const": "message" }
118-
}
69+
"items": { "type": "string" }
11970
}
120-
}
121-
]
122-
},
123-
"then": { "required": ["errorMsg"] }
124-
},
125-
{
126-
"if": {
127-
"anyOf": [
128-
{ "properties": { "errorType": { "const": "response_url" } } },
129-
{
130-
"properties": {
131-
"errorType": {
132-
"type": "array",
133-
"contains": { "const": "response_url" }
134-
}
71+
},
72+
"dependencies": {
73+
"errorMsg": {
74+
"oneOf": [
75+
{ "properties": { "errorType": { "const": "message" } } },
76+
{
77+
"properties": {
78+
"errorType": {
79+
"type": "array",
80+
"contains": { "const": "message" }
81+
}
82+
}
83+
}
84+
]
85+
},
86+
"errorUrl": {
87+
"oneOf": [
88+
{ "properties": { "errorType": { "const": "response_url" } } },
89+
{
90+
"properties": {
91+
"errorType": {
92+
"type": "array",
93+
"contains": { "const": "response_url" }
94+
}
95+
}
96+
}
97+
]
98+
},
99+
"errorCode": {
100+
"oneOf": [
101+
{ "properties": { "errorType": { "const": "status_code" } } },
102+
{
103+
"properties": {
104+
"errorType": {
105+
"type": "array",
106+
"contains": { "const": "status_code" }
107+
}
108+
}
109+
}
110+
]
111+
}
112+
},
113+
"allOf": [
114+
{
115+
"if": {
116+
"anyOf": [
117+
{ "properties": { "errorType": { "const": "message" } } },
118+
{
119+
"properties": {
120+
"errorType": {
121+
"type": "array",
122+
"contains": { "const": "message" }
123+
}
124+
}
125+
}
126+
]
127+
},
128+
"then": { "required": ["errorMsg"] }
129+
},
130+
{
131+
"if": {
132+
"anyOf": [
133+
{ "properties": { "errorType": { "const": "response_url" } } },
134+
{
135+
"properties": {
136+
"errorType": {
137+
"type": "array",
138+
"contains": { "const": "response_url" }
139+
}
140+
}
141+
}
142+
]
143+
},
144+
"then": { "required": ["errorUrl"] }
135145
}
136-
}
137-
]
138-
},
139-
"then": { "required": ["errorUrl"] }
146+
],
147+
"additionalProperties": false
140148
}
141-
],
142-
"additionalProperties": false
149+
},
150+
"additionalProperties": false,
151+
"$defs": {
152+
"tag": { "type": "string", "enum": ["adult", "gaming"] }
143153
}
144-
},
145-
"additionalProperties": false,
146-
"$defs": {
147-
"tag": { "type": "string", "enum": ["adult", "gaming"] }
148-
}
149-
}
154+
}

sherlock_project/sherlock.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -783,23 +783,33 @@ def main():
783783
# Eventually, the rest of the code will be updated to use the new object
784784
# directly, but this will glue the two pieces together.
785785
site_data_all = {site.name: site.information for site in sites}
786-
if args.site_list == []:
786+
if not args.site_list:
787787
# Not desired to look at a sub-set of sites
788788
site_data = site_data_all
789789
else:
790790
# User desires to selectively run queries on a sub-set of the site list.
791791
# Make sure that the sites are supported & build up pruned site database.
792792
site_data = {}
793793
site_missing = []
794-
for site in args.site_list:
795-
counter = 0
796-
for existing_site in site_data_all:
797-
if site.lower() == existing_site.lower():
798-
site_data[existing_site] = site_data_all[existing_site]
799-
counter += 1
800-
if counter == 0:
801-
# Build up list of sites not supported for future error message.
802-
site_missing.append(f"'{site}'")
794+
795+
# Create a mapping from all site names and aliases (in lowercase) to their proper names
796+
site_map = {}
797+
for site_name, site_info in site_data_all.items():
798+
site_map[site_name.lower()] = site_name
799+
if "aliases" in site_info:
800+
for alias in site_info["aliases"]:
801+
site_map[alias.lower()] = site_name
802+
803+
for site_name_from_user in args.site_list:
804+
# Find the proper site name from the user's input (which could be an alias)
805+
proper_site_name = site_map.get(site_name_from_user.lower())
806+
807+
if proper_site_name:
808+
# If a match was found, add the site's data to our list
809+
site_data[proper_site_name] = site_data_all[proper_site_name]
810+
else:
811+
# If no match was found for the name or any alias
812+
site_missing.append(f"'{site_name_from_user}'")
803813

804814
if site_missing:
805815
print(f"Error: Desired sites not found: {', '.join(site_missing)}.")

0 commit comments

Comments
 (0)