From 6e531679f4dccf9ac4f5fa2b13378b80e00127d0 Mon Sep 17 00:00:00 2001 From: Poccia <114789517+kjpoccia@users.noreply.github.com> Date: Tue, 2 Dec 2025 04:17:32 -0500 Subject: [PATCH] fix/adjust web search to properly block domains (#19670) Co-authored-by: Tim Baek --- backend/open_webui/retrieval/web/main.py | 2 +- backend/open_webui/utils/misc.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/backend/open_webui/retrieval/web/main.py b/backend/open_webui/retrieval/web/main.py index 6d2fd1bc5a..1b8df9f8ee 100644 --- a/backend/open_webui/retrieval/web/main.py +++ b/backend/open_webui/retrieval/web/main.py @@ -33,7 +33,7 @@ def get_filtered_results(results, filter_list): except Exception: pass - if any(is_string_allowed(hostname, filter_list) for hostname in hostnames): + if is_string_allowed(hostnames, filter_list): filtered_results.append(result) continue diff --git a/backend/open_webui/utils/misc.py b/backend/open_webui/utils/misc.py index c7ff2a3edd..2f58ab21d4 100644 --- a/backend/open_webui/utils/misc.py +++ b/backend/open_webui/utils/misc.py @@ -6,7 +6,7 @@ import uuid import logging from datetime import timedelta from pathlib import Path -from typing import Callable, Optional +from typing import Callable, Optional, Sequence, Union import json import aiohttp @@ -43,25 +43,29 @@ def get_allow_block_lists(filter_list): return allow_list, block_list -def is_string_allowed(string: str, filter_list: Optional[list[str]] = None) -> bool: +def is_string_allowed( + string: Union[str, Sequence[str]], filter_list: Optional[list[str]] = None +) -> bool: """ Checks if a string is allowed based on the provided filter list. - :param string: The string to check (e.g., domain or hostname). + :param string: The string or sequence of strings to check (e.g., domain or hostname). :param filter_list: List of allowed/blocked strings. Strings starting with "!" are blocked. - :return: True if the string is allowed, False otherwise. + :return: True if the string or sequence of strings is allowed, False otherwise. """ if not filter_list: return True allow_list, block_list = get_allow_block_lists(filter_list) + print(string, allow_list, block_list) + strings = [string] if isinstance(string, str) else list(string) # If allow list is non-empty, require domain to match one of them if allow_list: - if not any(string.endswith(allowed) for allowed in allow_list): + if not any(s.endswith(allowed) for s in strings for allowed in allow_list): return False # Block list always removes matches - if any(string.endswith(blocked) for blocked in block_list): + if any(s.endswith(blocked) for s in strings for blocked in block_list): return False return True