Skip to content

Commit e44fe49

Browse files
authored
Merge pull request #2601 from sherlock-project/feat/graceful-skip
feat: gracefully skip sites with invalid errorType
2 parents 947f1ad + 52cd5fd commit e44fe49

File tree

1 file changed

+50
-53
lines changed

1 file changed

+50
-53
lines changed

sherlock_project/sherlock.py

Lines changed: 50 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -429,61 +429,58 @@ def sherlock(
429429

430430
else:
431431
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
432-
# It should be impossible to ever get here...
433-
raise ValueError(
434-
f"Unknown Error Type '{error_type}' for "
435-
f"site '{social_network}'"
436-
)
437-
438-
if "message" in error_type:
439-
# error_flag True denotes no error found in the HTML
440-
# error_flag False denotes error found in the HTML
441-
error_flag = True
442-
errors = net_info.get("errorMsg")
443-
# errors will hold the error message
444-
# it can be string or list
445-
# by isinstance method we can detect that
446-
# and handle the case for strings as normal procedure
447-
# and if its list we can iterate the errors
448-
if isinstance(errors, str):
449-
# Checks if the error message is in the HTML
450-
# if error is present we will set flag to False
451-
if errors in r.text:
452-
error_flag = False
453-
else:
454-
# If it's list, it will iterate all the error message
455-
for error in errors:
456-
if error in r.text:
432+
error_context = f"Unknown error type '{error_type}' for {social_network}"
433+
query_status = QueryStatus.UNKNOWN
434+
else:
435+
if "message" in error_type:
436+
# error_flag True denotes no error found in the HTML
437+
# error_flag False denotes error found in the HTML
438+
error_flag = True
439+
errors = net_info.get("errorMsg")
440+
# errors will hold the error message
441+
# it can be string or list
442+
# by isinstance method we can detect that
443+
# and handle the case for strings as normal procedure
444+
# and if its list we can iterate the errors
445+
if isinstance(errors, str):
446+
# Checks if the error message is in the HTML
447+
# if error is present we will set flag to False
448+
if errors in r.text:
457449
error_flag = False
458-
break
459-
if error_flag:
450+
else:
451+
# If it's list, it will iterate all the error message
452+
for error in errors:
453+
if error in r.text:
454+
error_flag = False
455+
break
456+
if error_flag:
457+
query_status = QueryStatus.CLAIMED
458+
else:
459+
query_status = QueryStatus.AVAILABLE
460+
461+
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
462+
error_codes = net_info.get("errorCode")
460463
query_status = QueryStatus.CLAIMED
461-
else:
462-
query_status = QueryStatus.AVAILABLE
463-
464-
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
465-
error_codes = net_info.get("errorCode")
466-
query_status = QueryStatus.CLAIMED
467-
468-
# Type consistency, allowing for both singlets and lists in manifest
469-
if isinstance(error_codes, int):
470-
error_codes = [error_codes]
471-
472-
if error_codes is not None and r.status_code in error_codes:
473-
query_status = QueryStatus.AVAILABLE
474-
elif r.status_code >= 300 or r.status_code < 200:
475-
query_status = QueryStatus.AVAILABLE
476-
477-
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
478-
# For this detection method, we have turned off the redirect.
479-
# So, there is no need to check the response URL: it will always
480-
# match the request. Instead, we will ensure that the response
481-
# code indicates that the request was successful (i.e. no 404, or
482-
# forward to some odd redirect).
483-
if 200 <= r.status_code < 300:
484-
query_status = QueryStatus.CLAIMED
485-
else:
486-
query_status = QueryStatus.AVAILABLE
464+
465+
# Type consistency, allowing for both singlets and lists in manifest
466+
if isinstance(error_codes, int):
467+
error_codes = [error_codes]
468+
469+
if error_codes is not None and r.status_code in error_codes:
470+
query_status = QueryStatus.AVAILABLE
471+
elif r.status_code >= 300 or r.status_code < 200:
472+
query_status = QueryStatus.AVAILABLE
473+
474+
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
475+
# For this detection method, we have turned off the redirect.
476+
# So, there is no need to check the response URL: it will always
477+
# match the request. Instead, we will ensure that the response
478+
# code indicates that the request was successful (i.e. no 404, or
479+
# forward to some odd redirect).
480+
if 200 <= r.status_code < 300:
481+
query_status = QueryStatus.CLAIMED
482+
else:
483+
query_status = QueryStatus.AVAILABLE
487484

488485
if dump_response:
489486
print("+++++++++++++++++++++")

0 commit comments

Comments
 (0)