Skip to content

Commit cfcc82a

Browse files
authored
Merge pull request #2597 from sherlock-project/feat/multiple-types
Support multiple errorType checks
2 parents 975965a + 0794e02 commit cfcc82a

File tree

2 files changed

+200
-124
lines changed

2 files changed

+200
-124
lines changed
Lines changed: 143 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,149 @@
11
{
2-
"$schema": "https://json-schema.org/draft/2020-12/schema",
3-
"title": "Sherlock Target Manifest",
4-
"description": "Social media targets to probe for the existence of known usernames",
5-
"type": "object",
6-
"properties": {
7-
"$schema": { "type": "string" }
8-
},
9-
"patternProperties": {
10-
"^(?!\\$).*?$": {
11-
"type": "object",
12-
"description": "Target name and associated information (key should be human readable name)",
13-
"required": [ "url", "urlMain", "errorType", "username_claimed" ],
14-
"properties": {
15-
"url": { "type": "string" },
16-
"urlMain": { "type": "string" },
17-
"urlProbe": { "type": "string" },
18-
"username_claimed": { "type": "string" },
19-
"regexCheck": { "type": "string" },
20-
"isNSFW": { "type": "boolean" },
21-
"headers": { "type": "object" },
22-
"request_payload": { "type": "object" },
23-
"__comment__": {
24-
"type": "string",
25-
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
26-
},
27-
"tags": {
28-
"oneOf": [
29-
{ "$ref": "#/$defs/tag" },
30-
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
31-
]
32-
},
33-
"request_method": {
34-
"type": "string",
35-
"enum": [ "GET", "POST", "HEAD", "PUT" ]
36-
},
37-
"errorType": {
38-
"type": "string",
39-
"enum": [ "message", "response_url", "status_code" ]
40-
},
41-
"errorMsg": {
42-
"oneOf": [
43-
{ "type": "string" },
44-
{ "type": "array", "items": { "type": "string" } }
45-
]
46-
},
47-
"errorCode": {
48-
"oneOf": [
49-
{ "type": "integer" },
50-
{ "type": "array", "items": { "type": "integer" } }
51-
]
52-
},
53-
"errorUrl": { "type": "string" },
54-
"response_url": { "type": "string" }
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"title": "Sherlock Target Manifest",
4+
"description": "Social media targets to probe for the existence of known usernames",
5+
"type": "object",
6+
"properties": {
7+
"$schema": { "type": "string" }
8+
},
9+
"patternProperties": {
10+
"^(?!\\$).*?$": {
11+
"type": "object",
12+
"description": "Target name and associated information (key should be human readable name)",
13+
"required": ["url", "urlMain", "errorType", "username_claimed"],
14+
"properties": {
15+
"url": { "type": "string" },
16+
"urlMain": { "type": "string" },
17+
"urlProbe": { "type": "string" },
18+
"username_claimed": { "type": "string" },
19+
"regexCheck": { "type": "string" },
20+
"isNSFW": { "type": "boolean" },
21+
"headers": { "type": "object" },
22+
"request_payload": { "type": "object" },
23+
"__comment__": {
24+
"type": "string",
25+
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
26+
},
27+
"tags": {
28+
"oneOf": [
29+
{ "$ref": "#/$defs/tag" },
30+
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
31+
]
32+
},
33+
"request_method": {
34+
"type": "string",
35+
"enum": ["GET", "POST", "HEAD", "PUT"]
36+
},
37+
"errorType": {
38+
"oneOf": [
39+
{
40+
"type": "string",
41+
"enum": ["message", "response_url", "status_code"]
5542
},
56-
"dependencies": {
57-
"errorMsg": {
58-
"properties" : { "errorType": { "const": "message" } }
59-
},
60-
"errorUrl": {
61-
"properties": { "errorType": { "const": "response_url" } }
62-
},
63-
"errorCode": {
64-
"properties": { "errorType": { "const": "status_code" } }
43+
{
44+
"type": "array",
45+
"items": {
46+
"type": "string",
47+
"enum": ["message", "response_url", "status_code"]
48+
}
49+
}
50+
]
51+
},
52+
"errorMsg": {
53+
"oneOf": [
54+
{ "type": "string" },
55+
{ "type": "array", "items": { "type": "string" } }
56+
]
57+
},
58+
"errorCode": {
59+
"oneOf": [
60+
{ "type": "integer" },
61+
{ "type": "array", "items": { "type": "integer" } }
62+
]
63+
},
64+
"errorUrl": { "type": "string" },
65+
"response_url": { "type": "string" }
66+
},
67+
"dependencies": {
68+
"errorMsg": {
69+
"oneOf": [
70+
{ "properties": { "errorType": { "const": "message" } } },
71+
{
72+
"properties": {
73+
"errorType": {
74+
"type": "array",
75+
"contains": { "const": "message" }
6576
}
66-
},
67-
"if": { "properties": { "errorType": { "const": "message" } } },
68-
"then": { "required": [ "errorMsg" ] },
69-
"else": {
70-
"if": { "properties": { "errorType": { "const": "response_url" } } },
71-
"then": { "required": [ "errorUrl" ] }
72-
},
73-
"additionalProperties": false
77+
}
78+
}
79+
]
80+
},
81+
"errorUrl": {
82+
"oneOf": [
83+
{ "properties": { "errorType": { "const": "response_url" } } },
84+
{
85+
"properties": {
86+
"errorType": {
87+
"type": "array",
88+
"contains": { "const": "response_url" }
89+
}
90+
}
91+
}
92+
]
93+
},
94+
"errorCode": {
95+
"oneOf": [
96+
{ "properties": { "errorType": { "const": "status_code" } } },
97+
{
98+
"properties": {
99+
"errorType": {
100+
"type": "array",
101+
"contains": { "const": "status_code" }
102+
}
103+
}
104+
}
105+
]
106+
}
107+
},
108+
"allOf": [
109+
{
110+
"if": {
111+
"anyOf": [
112+
{ "properties": { "errorType": { "const": "message" } } },
113+
{
114+
"properties": {
115+
"errorType": {
116+
"type": "array",
117+
"contains": { "const": "message" }
118+
}
119+
}
120+
}
121+
]
122+
},
123+
"then": { "required": ["errorMsg"] }
124+
},
125+
{
126+
"if": {
127+
"anyOf": [
128+
{ "properties": { "errorType": { "const": "response_url" } } },
129+
{
130+
"properties": {
131+
"errorType": {
132+
"type": "array",
133+
"contains": { "const": "response_url" }
134+
}
135+
}
136+
}
137+
]
138+
},
139+
"then": { "required": ["errorUrl"] }
74140
}
75-
},
76-
"additionalProperties": false,
77-
"$defs": {
78-
"tag": { "type": "string", "enum": [ "adult", "gaming" ] }
141+
],
142+
"additionalProperties": false
79143
}
144+
},
145+
"additionalProperties": false,
146+
"$defs": {
147+
"tag": { "type": "string", "enum": ["adult", "gaming"] }
148+
}
80149
}

sherlock_project/sherlock.py

Lines changed: 57 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,8 @@ def sherlock(
381381

382382
# Get the expected error type
383383
error_type = net_info["errorType"]
384+
if isinstance(error_type, str):
385+
error_type: list[str] = [error_type]
384386

385387
# Retrieve future and ensure it has finished
386388
future = net_info["request_future"]
@@ -425,58 +427,63 @@ def sherlock(
425427
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
426428
query_status = QueryStatus.WAF
427429

428-
elif error_type == "message":
429-
# error_flag True denotes no error found in the HTML
430-
# error_flag False denotes error found in the HTML
431-
error_flag = True
432-
errors = net_info.get("errorMsg")
433-
# errors will hold the error message
434-
# it can be string or list
435-
# by isinstance method we can detect that
436-
# and handle the case for strings as normal procedure
437-
# and if its list we can iterate the errors
438-
if isinstance(errors, str):
439-
# Checks if the error message is in the HTML
440-
# if error is present we will set flag to False
441-
if errors in r.text:
442-
error_flag = False
443-
else:
444-
# If it's list, it will iterate all the error message
445-
for error in errors:
446-
if error in r.text:
430+
else:
431+
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
432+
# It should be impossible to ever get here...
433+
raise ValueError(
434+
f"Unknown Error Type '{error_type}' for "
435+
f"site '{social_network}'"
436+
)
437+
438+
if "message" in error_type:
439+
# error_flag True denotes no error found in the HTML
440+
# error_flag False denotes error found in the HTML
441+
error_flag = True
442+
errors = net_info.get("errorMsg")
443+
# errors will hold the error message
444+
# it can be string or list
445+
# by isinstance method we can detect that
446+
# and handle the case for strings as normal procedure
447+
# and if its list we can iterate the errors
448+
if isinstance(errors, str):
449+
# Checks if the error message is in the HTML
450+
# if error is present we will set flag to False
451+
if errors in r.text:
447452
error_flag = False
448-
break
449-
if error_flag:
450-
query_status = QueryStatus.CLAIMED
451-
else:
452-
query_status = QueryStatus.AVAILABLE
453-
elif error_type == "status_code":
454-
error_codes = net_info.get("errorCode")
455-
query_status = QueryStatus.CLAIMED
456-
457-
# Type consistency, allowing for both singlets and lists in manifest
458-
if isinstance(error_codes, int):
459-
error_codes = [error_codes]
460-
461-
if error_codes is not None and r.status_code in error_codes:
462-
query_status = QueryStatus.AVAILABLE
463-
elif r.status_code >= 300 or r.status_code < 200:
464-
query_status = QueryStatus.AVAILABLE
465-
elif error_type == "response_url":
466-
# For this detection method, we have turned off the redirect.
467-
# So, there is no need to check the response URL: it will always
468-
# match the request. Instead, we will ensure that the response
469-
# code indicates that the request was successful (i.e. no 404, or
470-
# forward to some odd redirect).
471-
if 200 <= r.status_code < 300:
453+
else:
454+
# If it's list, it will iterate all the error message
455+
for error in errors:
456+
if error in r.text:
457+
error_flag = False
458+
break
459+
if error_flag:
460+
query_status = QueryStatus.CLAIMED
461+
else:
462+
query_status = QueryStatus.AVAILABLE
463+
464+
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
465+
error_codes = net_info.get("errorCode")
472466
query_status = QueryStatus.CLAIMED
473-
else:
474-
query_status = QueryStatus.AVAILABLE
475-
else:
476-
# It should be impossible to ever get here...
477-
raise ValueError(
478-
f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
479-
)
467+
468+
# Type consistency, allowing for both singlets and lists in manifest
469+
if isinstance(error_codes, int):
470+
error_codes = [error_codes]
471+
472+
if error_codes is not None and r.status_code in error_codes:
473+
query_status = QueryStatus.AVAILABLE
474+
elif r.status_code >= 300 or r.status_code < 200:
475+
query_status = QueryStatus.AVAILABLE
476+
477+
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
478+
# For this detection method, we have turned off the redirect.
479+
# So, there is no need to check the response URL: it will always
480+
# match the request. Instead, we will ensure that the response
481+
# code indicates that the request was successful (i.e. no 404, or
482+
# forward to some odd redirect).
483+
if 200 <= r.status_code < 300:
484+
query_status = QueryStatus.CLAIMED
485+
else:
486+
query_status = QueryStatus.AVAILABLE
480487

481488
if dump_response:
482489
print("+++++++++++++++++++++")

0 commit comments

Comments
 (0)