Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 19 additions & 24 deletions sherlock_project/sherlock.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,22 @@ def multiple_usernames(username):
return allUsernames


def check_message_query_status(response_text, errors, status_code) -> QueryStatus:
"""Determine the query status for a "message"-type site.

The absence of the "not found" message is only meaningful when the server
actually answered the request. On a server error (HTTP 5xx) the body cannot
be trusted, so the status is UNKNOWN rather than a false CLAIMED.
"""
if isinstance(errors, str):
errors = [errors]
if any(error in response_text for error in errors):
return QueryStatus.AVAILABLE
if status_code >= 500:
return QueryStatus.UNKNOWN
return QueryStatus.CLAIMED


def sherlock(
username: str,
site_data: dict[str, dict[str, str]],
Expand Down Expand Up @@ -404,30 +420,9 @@ def sherlock(
query_status = QueryStatus.UNKNOWN
else:
if "message" in error_type:
# error_flag True denotes no error found in the HTML
# error_flag False denotes error found in the HTML
error_flag = True
errors = net_info.get("errorMsg")
# errors will hold the error message
# it can be string or list
# by isinstance method we can detect that
# and handle the case for strings as normal procedure
# and if its list we can iterate the errors
if isinstance(errors, str):
# Checks if the error message is in the HTML
# if error is present we will set flag to False
if errors in r.text:
error_flag = False
else:
# If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
error_flag = False
break
if error_flag:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
query_status = check_message_query_status(
r.text, net_info.get("errorMsg"), r.status_code
)

if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
error_codes = net_info.get("errorCode")
Expand Down
34 changes: 34 additions & 0 deletions tests/test_message_query_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Offline unit tests for message-type query status detection."""

from sherlock_project.result import QueryStatus
from sherlock_project.sherlock import check_message_query_status


def test_available_when_not_found_message_present():
assert (
check_message_query_status('{"valid":true}', '"valid":true', 200)
== QueryStatus.AVAILABLE
)


def test_claimed_when_message_absent_and_response_ok():
assert (
check_message_query_status('{"status":"success"}', '"valid":true', 200)
== QueryStatus.CLAIMED
)


def test_unknown_on_server_error_instead_of_false_claimed():
# 502 body lacks the "not found" message; must not be reported as Claimed.
# Regression for https://github.com/sherlock-project/sherlock/issues/2950
assert (
check_message_query_status("Bad Gateway", '"valid":true', 502)
== QueryStatus.UNKNOWN
)


def test_supports_list_of_error_messages():
assert (
check_message_query_status("user not found", ["no such user", "not found"], 200)
== QueryStatus.AVAILABLE
)