diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 49c4c2e666..c176c64dfd 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2840,6 +2840,7 @@ WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( # "wikipedia.com", # "wikimedia.org", # "wikidata.org", + # "!stackoverflow.com", ], ) diff --git a/backend/open_webui/retrieval/web/main.py b/backend/open_webui/retrieval/web/main.py index dc1eafb331..8025303d6a 100644 --- a/backend/open_webui/retrieval/web/main.py +++ b/backend/open_webui/retrieval/web/main.py @@ -9,14 +9,32 @@ from pydantic import BaseModel def get_filtered_results(results, filter_list): if not filter_list: return results + + # Domains starting without "!" → allowed + allow_list = [d for d in filter_list if not d.startswith("!")] + # Domains starting with "!" → blocked + block_list = [d[1:] for d in filter_list if d.startswith("!")] + filtered_results = [] + for result in results: url = result.get("url") or result.get("link", "") or result.get("href", "") if not validators.url(url): continue + domain = urlparse(url).netloc - if any(domain.endswith(filtered_domain) for filtered_domain in filter_list): - filtered_results.append(result) + + # If allow list is non-empty, require domain to match one of them + if allow_list: + if not any(domain.endswith(allowed) for allowed in allow_list): + continue + + # Block list always removes matches + if any(domain.endswith(blocked) for blocked in block_list): + continue + + filtered_results.append(result) + return filtered_results