Add Azure Search (#19104)

Co-authored-by: Tim Baek <tim@openwebui.com>
This commit is contained in:
Sang Lê 2025-11-14 10:12:34 +10:00 committed by GitHub
parent 117a33b030
commit 64747f7f79
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 166 additions and 0 deletions

View file

@ -2982,6 +2982,24 @@ BING_SEARCH_V7_SUBSCRIPTION_KEY = PersistentConfig(
os.environ.get("BING_SEARCH_V7_SUBSCRIPTION_KEY", ""), os.environ.get("BING_SEARCH_V7_SUBSCRIPTION_KEY", ""),
) )
AZURE_AI_SEARCH_API_KEY = PersistentConfig(
"AZURE_AI_SEARCH_API_KEY",
"rag.web.search.azure_ai_search_api_key",
os.environ.get("AZURE_AI_SEARCH_API_KEY", ""),
)
AZURE_AI_SEARCH_ENDPOINT = PersistentConfig(
"AZURE_AI_SEARCH_ENDPOINT",
"rag.web.search.azure_ai_search_endpoint",
os.environ.get("AZURE_AI_SEARCH_ENDPOINT", ""),
)
AZURE_AI_SEARCH_INDEX_NAME = PersistentConfig(
"AZURE_AI_SEARCH_INDEX_NAME",
"rag.web.search.azure_ai_search_index_name",
os.environ.get("AZURE_AI_SEARCH_INDEX_NAME", ""),
)
EXA_API_KEY = PersistentConfig( EXA_API_KEY = PersistentConfig(
"EXA_API_KEY", "EXA_API_KEY",
"rag.web.search.exa_api_key", "rag.web.search.exa_api_key",

View file

@ -0,0 +1,128 @@
import logging
from typing import Optional
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
"""
Azure AI Search integration for Open WebUI.
Documentation: https://learn.microsoft.com/en-us/python/api/overview/azure/search-documents-readme?view=azure-python
Required package: azure-search-documents
Install: pip install azure-search-documents
"""
def search_azure(
api_key: str,
endpoint: str,
index_name: str,
query: str,
count: int,
filter_list: Optional[list[str]] = None,
) -> list[SearchResult]:
"""
Search using Azure AI Search.
Args:
api_key: Azure Search API key (query key or admin key)
endpoint: Azure Search service endpoint (e.g., https://myservice.search.windows.net)
index_name: Name of the search index to query
query: Search query string
count: Number of results to return
filter_list: Optional list of domains to filter results
Returns:
List of SearchResult objects with link, title, and snippet
"""
try:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
except ImportError:
log.error(
"azure-search-documents package is not installed. "
"Install it with: pip install azure-search-documents"
)
raise ImportError(
"azure-search-documents is required for Azure AI Search. "
"Install it with: pip install azure-search-documents"
)
try:
# Create search client with API key authentication
credential = AzureKeyCredential(api_key)
search_client = SearchClient(
endpoint=endpoint, index_name=index_name, credential=credential
)
# Perform the search
results = search_client.search(search_text=query, top=count)
# Convert results to list and extract fields
search_results = []
for result in results:
# Azure AI Search returns documents with custom schemas
# We need to extract common fields that might represent URL, title, and content
# Common field names to look for:
result_dict = dict(result)
# Try to find URL field (common names)
link = (
result_dict.get("url")
or result_dict.get("link")
or result_dict.get("uri")
or result_dict.get("metadata_storage_path")
or ""
)
# Try to find title field (common names)
title = (
result_dict.get("title")
or result_dict.get("name")
or result_dict.get("metadata_title")
or result_dict.get("metadata_storage_name")
or None
)
# Try to find content/snippet field (common names)
snippet = (
result_dict.get("content")
or result_dict.get("snippet")
or result_dict.get("description")
or result_dict.get("summary")
or result_dict.get("text")
or None
)
# Truncate snippet if too long
if snippet and len(snippet) > 500:
snippet = snippet[:497] + "..."
if link: # Only add if we found a valid link
search_results.append(
{
"link": link,
"title": title,
"snippet": snippet,
}
)
# Apply domain filtering if specified
if filter_list:
search_results = get_filtered_results(search_results, filter_list)
# Convert to SearchResult objects
return [
SearchResult(
link=result["link"],
title=result.get("title"),
snippet=result.get("snippet"),
)
for result in search_results
]
except Exception as ex:
log.error(f"Azure AI Search error: {ex}")
raise ex

View file

@ -64,6 +64,7 @@ from open_webui.retrieval.web.serply import search_serply
from open_webui.retrieval.web.serpstack import search_serpstack from open_webui.retrieval.web.serpstack import search_serpstack
from open_webui.retrieval.web.tavily import search_tavily from open_webui.retrieval.web.tavily import search_tavily
from open_webui.retrieval.web.bing import search_bing from open_webui.retrieval.web.bing import search_bing
from open_webui.retrieval.web.azure import search_azure
from open_webui.retrieval.web.exa import search_exa from open_webui.retrieval.web.exa import search_exa
from open_webui.retrieval.web.perplexity import search_perplexity from open_webui.retrieval.web.perplexity import search_perplexity
from open_webui.retrieval.web.sougou import search_sougou from open_webui.retrieval.web.sougou import search_sougou
@ -2037,6 +2038,24 @@ def search_web(
request.app.state.config.WEB_SEARCH_RESULT_COUNT, request.app.state.config.WEB_SEARCH_RESULT_COUNT,
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
) )
elif engine == "azure":
if (
request.app.state.config.AZURE_AI_SEARCH_API_KEY
and request.app.state.config.AZURE_AI_SEARCH_ENDPOINT
and request.app.state.config.AZURE_AI_SEARCH_INDEX_NAME
):
return search_azure(
request.app.state.config.AZURE_AI_SEARCH_API_KEY,
request.app.state.config.AZURE_AI_SEARCH_ENDPOINT,
request.app.state.config.AZURE_AI_SEARCH_INDEX_NAME,
query,
request.app.state.config.WEB_SEARCH_RESULT_COUNT,
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
)
else:
raise Exception(
"AZURE_AI_SEARCH_API_KEY, AZURE_AI_SEARCH_ENDPOINT, and AZURE_AI_SEARCH_INDEX_NAME are required for Azure AI Search"
)
elif engine == "exa": elif engine == "exa":
return search_exa( return search_exa(
request.app.state.config.EXA_API_KEY, request.app.state.config.EXA_API_KEY,

View file

@ -86,6 +86,7 @@ dependencies = [
"sentencepiece", "sentencepiece",
"soundfile==0.13.1", "soundfile==0.13.1",
"azure-ai-documentintelligence==1.0.2", "azure-ai-documentintelligence==1.0.2",
"azure-search-documents>=11.4.0",
"pillow==11.3.0", "pillow==11.3.0",
"opencv-python-headless==4.11.0.86", "opencv-python-headless==4.11.0.86",