From ee10f372a0143aad16c62b58e131eafd9f97a9fb Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 16 Nov 2025 13:52:09 -0500 Subject: [PATCH] refac/enh: web search domain allow/block filter --- backend/open_webui/config.py | 1 + backend/open_webui/retrieval/web/main.py | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 49c4c2e666..c176c64dfd 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2840,6 +2840,7 @@ WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( # "wikipedia.com", # "wikimedia.org", # "wikidata.org", + # "!stackoverflow.com", ], ) diff --git a/backend/open_webui/retrieval/web/main.py b/backend/open_webui/retrieval/web/main.py index dc1eafb331..8025303d6a 100644 --- a/backend/open_webui/retrieval/web/main.py +++ b/backend/open_webui/retrieval/web/main.py @@ -9,14 +9,32 @@ from pydantic import BaseModel def get_filtered_results(results, filter_list): if not filter_list: return results + + # Domains starting without "!" → allowed + allow_list = [d for d in filter_list if not d.startswith("!")] + # Domains starting with "!" → blocked + block_list = [d[1:] for d in filter_list if d.startswith("!")] + filtered_results = [] + for result in results: url = result.get("url") or result.get("link", "") or result.get("href", "") if not validators.url(url): continue + domain = urlparse(url).netloc - if any(domain.endswith(filtered_domain) for filtered_domain in filter_list): - filtered_results.append(result) + + # If allow list is non-empty, require domain to match one of them + if allow_list: + if not any(domain.endswith(allowed) for allowed in allow_list): + continue + + # Block list always removes matches + if any(domain.endswith(blocked) for blocked in block_list): + continue + + filtered_results.append(result) + return filtered_results