mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-11 20:05:19 +00:00
parent
117a33b030
commit
64747f7f79
4 changed files with 166 additions and 0 deletions
|
|
@ -2982,6 +2982,24 @@ BING_SEARCH_V7_SUBSCRIPTION_KEY = PersistentConfig(
|
|||
os.environ.get("BING_SEARCH_V7_SUBSCRIPTION_KEY", ""),
|
||||
)
|
||||
|
||||
AZURE_AI_SEARCH_API_KEY = PersistentConfig(
|
||||
"AZURE_AI_SEARCH_API_KEY",
|
||||
"rag.web.search.azure_ai_search_api_key",
|
||||
os.environ.get("AZURE_AI_SEARCH_API_KEY", ""),
|
||||
)
|
||||
|
||||
AZURE_AI_SEARCH_ENDPOINT = PersistentConfig(
|
||||
"AZURE_AI_SEARCH_ENDPOINT",
|
||||
"rag.web.search.azure_ai_search_endpoint",
|
||||
os.environ.get("AZURE_AI_SEARCH_ENDPOINT", ""),
|
||||
)
|
||||
|
||||
AZURE_AI_SEARCH_INDEX_NAME = PersistentConfig(
|
||||
"AZURE_AI_SEARCH_INDEX_NAME",
|
||||
"rag.web.search.azure_ai_search_index_name",
|
||||
os.environ.get("AZURE_AI_SEARCH_INDEX_NAME", ""),
|
||||
)
|
||||
|
||||
EXA_API_KEY = PersistentConfig(
|
||||
"EXA_API_KEY",
|
||||
"rag.web.search.exa_api_key",
|
||||
|
|
|
|||
128
backend/open_webui/retrieval/web/azure.py
Normal file
128
backend/open_webui/retrieval/web/azure.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
import logging
|
||||
from typing import Optional
|
||||
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
|
||||
from open_webui.env import SRC_LOG_LEVELS
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
|
||||
"""
|
||||
Azure AI Search integration for Open WebUI.
|
||||
Documentation: https://learn.microsoft.com/en-us/python/api/overview/azure/search-documents-readme?view=azure-python
|
||||
|
||||
Required package: azure-search-documents
|
||||
Install: pip install azure-search-documents
|
||||
"""
|
||||
|
||||
|
||||
def search_azure(
|
||||
api_key: str,
|
||||
endpoint: str,
|
||||
index_name: str,
|
||||
query: str,
|
||||
count: int,
|
||||
filter_list: Optional[list[str]] = None,
|
||||
) -> list[SearchResult]:
|
||||
"""
|
||||
Search using Azure AI Search.
|
||||
|
||||
Args:
|
||||
api_key: Azure Search API key (query key or admin key)
|
||||
endpoint: Azure Search service endpoint (e.g., https://myservice.search.windows.net)
|
||||
index_name: Name of the search index to query
|
||||
query: Search query string
|
||||
count: Number of results to return
|
||||
filter_list: Optional list of domains to filter results
|
||||
|
||||
Returns:
|
||||
List of SearchResult objects with link, title, and snippet
|
||||
"""
|
||||
try:
|
||||
from azure.core.credentials import AzureKeyCredential
|
||||
from azure.search.documents import SearchClient
|
||||
except ImportError:
|
||||
log.error(
|
||||
"azure-search-documents package is not installed. "
|
||||
"Install it with: pip install azure-search-documents"
|
||||
)
|
||||
raise ImportError(
|
||||
"azure-search-documents is required for Azure AI Search. "
|
||||
"Install it with: pip install azure-search-documents"
|
||||
)
|
||||
|
||||
try:
|
||||
# Create search client with API key authentication
|
||||
credential = AzureKeyCredential(api_key)
|
||||
search_client = SearchClient(
|
||||
endpoint=endpoint, index_name=index_name, credential=credential
|
||||
)
|
||||
|
||||
# Perform the search
|
||||
results = search_client.search(search_text=query, top=count)
|
||||
|
||||
# Convert results to list and extract fields
|
||||
search_results = []
|
||||
for result in results:
|
||||
# Azure AI Search returns documents with custom schemas
|
||||
# We need to extract common fields that might represent URL, title, and content
|
||||
# Common field names to look for:
|
||||
result_dict = dict(result)
|
||||
|
||||
# Try to find URL field (common names)
|
||||
link = (
|
||||
result_dict.get("url")
|
||||
or result_dict.get("link")
|
||||
or result_dict.get("uri")
|
||||
or result_dict.get("metadata_storage_path")
|
||||
or ""
|
||||
)
|
||||
|
||||
# Try to find title field (common names)
|
||||
title = (
|
||||
result_dict.get("title")
|
||||
or result_dict.get("name")
|
||||
or result_dict.get("metadata_title")
|
||||
or result_dict.get("metadata_storage_name")
|
||||
or None
|
||||
)
|
||||
|
||||
# Try to find content/snippet field (common names)
|
||||
snippet = (
|
||||
result_dict.get("content")
|
||||
or result_dict.get("snippet")
|
||||
or result_dict.get("description")
|
||||
or result_dict.get("summary")
|
||||
or result_dict.get("text")
|
||||
or None
|
||||
)
|
||||
|
||||
# Truncate snippet if too long
|
||||
if snippet and len(snippet) > 500:
|
||||
snippet = snippet[:497] + "..."
|
||||
|
||||
if link: # Only add if we found a valid link
|
||||
search_results.append(
|
||||
{
|
||||
"link": link,
|
||||
"title": title,
|
||||
"snippet": snippet,
|
||||
}
|
||||
)
|
||||
|
||||
# Apply domain filtering if specified
|
||||
if filter_list:
|
||||
search_results = get_filtered_results(search_results, filter_list)
|
||||
|
||||
# Convert to SearchResult objects
|
||||
return [
|
||||
SearchResult(
|
||||
link=result["link"],
|
||||
title=result.get("title"),
|
||||
snippet=result.get("snippet"),
|
||||
)
|
||||
for result in search_results
|
||||
]
|
||||
|
||||
except Exception as ex:
|
||||
log.error(f"Azure AI Search error: {ex}")
|
||||
raise ex
|
||||
|
|
@ -64,6 +64,7 @@ from open_webui.retrieval.web.serply import search_serply
|
|||
from open_webui.retrieval.web.serpstack import search_serpstack
|
||||
from open_webui.retrieval.web.tavily import search_tavily
|
||||
from open_webui.retrieval.web.bing import search_bing
|
||||
from open_webui.retrieval.web.azure import search_azure
|
||||
from open_webui.retrieval.web.exa import search_exa
|
||||
from open_webui.retrieval.web.perplexity import search_perplexity
|
||||
from open_webui.retrieval.web.sougou import search_sougou
|
||||
|
|
@ -2037,6 +2038,24 @@ def search_web(
|
|||
request.app.state.config.WEB_SEARCH_RESULT_COUNT,
|
||||
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||
)
|
||||
elif engine == "azure":
|
||||
if (
|
||||
request.app.state.config.AZURE_AI_SEARCH_API_KEY
|
||||
and request.app.state.config.AZURE_AI_SEARCH_ENDPOINT
|
||||
and request.app.state.config.AZURE_AI_SEARCH_INDEX_NAME
|
||||
):
|
||||
return search_azure(
|
||||
request.app.state.config.AZURE_AI_SEARCH_API_KEY,
|
||||
request.app.state.config.AZURE_AI_SEARCH_ENDPOINT,
|
||||
request.app.state.config.AZURE_AI_SEARCH_INDEX_NAME,
|
||||
query,
|
||||
request.app.state.config.WEB_SEARCH_RESULT_COUNT,
|
||||
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
"AZURE_AI_SEARCH_API_KEY, AZURE_AI_SEARCH_ENDPOINT, and AZURE_AI_SEARCH_INDEX_NAME are required for Azure AI Search"
|
||||
)
|
||||
elif engine == "exa":
|
||||
return search_exa(
|
||||
request.app.state.config.EXA_API_KEY,
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ dependencies = [
|
|||
"sentencepiece",
|
||||
"soundfile==0.13.1",
|
||||
"azure-ai-documentintelligence==1.0.2",
|
||||
"azure-search-documents>=11.4.0",
|
||||
|
||||
"pillow==11.3.0",
|
||||
"opencv-python-headless==4.11.0.86",
|
||||
|
|
|
|||
Loading…
Reference in a new issue