diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 4656ce5258..f14c3f3277 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2982,6 +2982,24 @@ BING_SEARCH_V7_SUBSCRIPTION_KEY = PersistentConfig( os.environ.get("BING_SEARCH_V7_SUBSCRIPTION_KEY", ""), ) +AZURE_AI_SEARCH_API_KEY = PersistentConfig( + "AZURE_AI_SEARCH_API_KEY", + "rag.web.search.azure_ai_search_api_key", + os.environ.get("AZURE_AI_SEARCH_API_KEY", ""), +) + +AZURE_AI_SEARCH_ENDPOINT = PersistentConfig( + "AZURE_AI_SEARCH_ENDPOINT", + "rag.web.search.azure_ai_search_endpoint", + os.environ.get("AZURE_AI_SEARCH_ENDPOINT", ""), +) + +AZURE_AI_SEARCH_INDEX_NAME = PersistentConfig( + "AZURE_AI_SEARCH_INDEX_NAME", + "rag.web.search.azure_ai_search_index_name", + os.environ.get("AZURE_AI_SEARCH_INDEX_NAME", ""), +) + EXA_API_KEY = PersistentConfig( "EXA_API_KEY", "rag.web.search.exa_api_key", diff --git a/backend/open_webui/retrieval/web/azure.py b/backend/open_webui/retrieval/web/azure.py new file mode 100644 index 0000000000..814cf4b63c --- /dev/null +++ b/backend/open_webui/retrieval/web/azure.py @@ -0,0 +1,128 @@ +import logging +from typing import Optional +from open_webui.retrieval.web.main import SearchResult, get_filtered_results +from open_webui.env import SRC_LOG_LEVELS + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + +""" +Azure AI Search integration for Open WebUI. +Documentation: https://learn.microsoft.com/en-us/python/api/overview/azure/search-documents-readme?view=azure-python + +Required package: azure-search-documents +Install: pip install azure-search-documents +""" + + +def search_azure( + api_key: str, + endpoint: str, + index_name: str, + query: str, + count: int, + filter_list: Optional[list[str]] = None, +) -> list[SearchResult]: + """ + Search using Azure AI Search. + + Args: + api_key: Azure Search API key (query key or admin key) + endpoint: Azure Search service endpoint (e.g., https://myservice.search.windows.net) + index_name: Name of the search index to query + query: Search query string + count: Number of results to return + filter_list: Optional list of domains to filter results + + Returns: + List of SearchResult objects with link, title, and snippet + """ + try: + from azure.core.credentials import AzureKeyCredential + from azure.search.documents import SearchClient + except ImportError: + log.error( + "azure-search-documents package is not installed. " + "Install it with: pip install azure-search-documents" + ) + raise ImportError( + "azure-search-documents is required for Azure AI Search. " + "Install it with: pip install azure-search-documents" + ) + + try: + # Create search client with API key authentication + credential = AzureKeyCredential(api_key) + search_client = SearchClient( + endpoint=endpoint, index_name=index_name, credential=credential + ) + + # Perform the search + results = search_client.search(search_text=query, top=count) + + # Convert results to list and extract fields + search_results = [] + for result in results: + # Azure AI Search returns documents with custom schemas + # We need to extract common fields that might represent URL, title, and content + # Common field names to look for: + result_dict = dict(result) + + # Try to find URL field (common names) + link = ( + result_dict.get("url") + or result_dict.get("link") + or result_dict.get("uri") + or result_dict.get("metadata_storage_path") + or "" + ) + + # Try to find title field (common names) + title = ( + result_dict.get("title") + or result_dict.get("name") + or result_dict.get("metadata_title") + or result_dict.get("metadata_storage_name") + or None + ) + + # Try to find content/snippet field (common names) + snippet = ( + result_dict.get("content") + or result_dict.get("snippet") + or result_dict.get("description") + or result_dict.get("summary") + or result_dict.get("text") + or None + ) + + # Truncate snippet if too long + if snippet and len(snippet) > 500: + snippet = snippet[:497] + "..." + + if link: # Only add if we found a valid link + search_results.append( + { + "link": link, + "title": title, + "snippet": snippet, + } + ) + + # Apply domain filtering if specified + if filter_list: + search_results = get_filtered_results(search_results, filter_list) + + # Convert to SearchResult objects + return [ + SearchResult( + link=result["link"], + title=result.get("title"), + snippet=result.get("snippet"), + ) + for result in search_results + ] + + except Exception as ex: + log.error(f"Azure AI Search error: {ex}") + raise ex diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index e98e0c226e..b7ddaf6af1 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -64,6 +64,7 @@ from open_webui.retrieval.web.serply import search_serply from open_webui.retrieval.web.serpstack import search_serpstack from open_webui.retrieval.web.tavily import search_tavily from open_webui.retrieval.web.bing import search_bing +from open_webui.retrieval.web.azure import search_azure from open_webui.retrieval.web.exa import search_exa from open_webui.retrieval.web.perplexity import search_perplexity from open_webui.retrieval.web.sougou import search_sougou @@ -2037,6 +2038,24 @@ def search_web( request.app.state.config.WEB_SEARCH_RESULT_COUNT, request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) + elif engine == "azure": + if ( + request.app.state.config.AZURE_AI_SEARCH_API_KEY + and request.app.state.config.AZURE_AI_SEARCH_ENDPOINT + and request.app.state.config.AZURE_AI_SEARCH_INDEX_NAME + ): + return search_azure( + request.app.state.config.AZURE_AI_SEARCH_API_KEY, + request.app.state.config.AZURE_AI_SEARCH_ENDPOINT, + request.app.state.config.AZURE_AI_SEARCH_INDEX_NAME, + query, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + ) + else: + raise Exception( + "AZURE_AI_SEARCH_API_KEY, AZURE_AI_SEARCH_ENDPOINT, and AZURE_AI_SEARCH_INDEX_NAME are required for Azure AI Search" + ) elif engine == "exa": return search_exa( request.app.state.config.EXA_API_KEY, diff --git a/pyproject.toml b/pyproject.toml index 852a4d25dd..1a77461a80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,7 @@ dependencies = [ "sentencepiece", "soundfile==0.13.1", "azure-ai-documentintelligence==1.0.2", + "azure-search-documents>=11.4.0", "pillow==11.3.0", "opencv-python-headless==4.11.0.86",