diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index e037d39458..a42722ccc2 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -170,6 +170,22 @@ def multiple_usernames(username): return allUsernames +def check_message_query_status(response_text, errors, status_code) -> QueryStatus: + """Determine the query status for a "message"-type site. + + The absence of the "not found" message is only meaningful when the server + actually answered the request. On a server error (HTTP 5xx) the body cannot + be trusted, so the status is UNKNOWN rather than a false CLAIMED. + """ + if isinstance(errors, str): + errors = [errors] + if any(error in response_text for error in errors): + return QueryStatus.AVAILABLE + if status_code >= 500: + return QueryStatus.UNKNOWN + return QueryStatus.CLAIMED + + def sherlock( username: str, site_data: dict[str, dict[str, str]], @@ -404,30 +420,9 @@ def sherlock( query_status = QueryStatus.UNKNOWN else: if "message" in error_type: - # error_flag True denotes no error found in the HTML - # error_flag False denotes error found in the HTML - error_flag = True - errors = net_info.get("errorMsg") - # errors will hold the error message - # it can be string or list - # by isinstance method we can detect that - # and handle the case for strings as normal procedure - # and if its list we can iterate the errors - if isinstance(errors, str): - # Checks if the error message is in the HTML - # if error is present we will set flag to False - if errors in r.text: - error_flag = False - else: - # If it's list, it will iterate all the error message - for error in errors: - if error in r.text: - error_flag = False - break - if error_flag: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE + query_status = check_message_query_status( + r.text, net_info.get("errorMsg"), r.status_code + ) if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: error_codes = net_info.get("errorCode") diff --git a/tests/test_message_query_status.py b/tests/test_message_query_status.py new file mode 100644 index 0000000000..dba250ab55 --- /dev/null +++ b/tests/test_message_query_status.py @@ -0,0 +1,34 @@ +"""Offline unit tests for message-type query status detection.""" + +from sherlock_project.result import QueryStatus +from sherlock_project.sherlock import check_message_query_status + + +def test_available_when_not_found_message_present(): + assert ( + check_message_query_status('{"valid":true}', '"valid":true', 200) + == QueryStatus.AVAILABLE + ) + + +def test_claimed_when_message_absent_and_response_ok(): + assert ( + check_message_query_status('{"status":"success"}', '"valid":true', 200) + == QueryStatus.CLAIMED + ) + + +def test_unknown_on_server_error_instead_of_false_claimed(): + # 502 body lacks the "not found" message; must not be reported as Claimed. + # Regression for https://github.com/sherlock-project/sherlock/issues/2950 + assert ( + check_message_query_status("Bad Gateway", '"valid":true', 502) + == QueryStatus.UNKNOWN + ) + + +def test_supports_list_of_error_messages(): + assert ( + check_message_query_status("user not found", ["no such user", "not found"], 200) + == QueryStatus.AVAILABLE + )