2024-06-22 14:36:15 +00:00
|
|
|
import logging
|
|
|
|
|
|
2024-08-27 22:10:27 +00:00
|
|
|
import requests
|
2024-09-04 14:54:48 +00:00
|
|
|
from open_webui.env import SRC_LOG_LEVELS
|
2025-12-03 14:42:56 +00:00
|
|
|
from open_webui.retrieval.web.main import SearchResult
|
2024-08-27 22:10:27 +00:00
|
|
|
from yarl import URL
|
2024-06-22 14:36:15 +00:00
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
|
|
|
|
|
|
|
|
|
|
2025-12-03 14:42:56 +00:00
|
|
|
def search_jina(
|
|
|
|
|
api_key: str,
|
|
|
|
|
query: str,
|
|
|
|
|
count: int,
|
|
|
|
|
api_url: str = "https://s.jina.ai/",
|
|
|
|
|
) -> list[SearchResult]:
|
2024-06-22 14:36:15 +00:00
|
|
|
"""
|
|
|
|
|
Search using Jina's Search API and return the results as a list of SearchResult objects.
|
|
|
|
|
Args:
|
|
|
|
|
query (str): The query to search for
|
|
|
|
|
count (int): The number of results to return
|
|
|
|
|
|
|
|
|
|
Returns:
|
2024-08-14 12:46:31 +00:00
|
|
|
list[SearchResult]: A list of search results
|
2024-06-22 14:36:15 +00:00
|
|
|
"""
|
2025-12-03 14:42:56 +00:00
|
|
|
# Handle PersistentConfig object
|
|
|
|
|
if hasattr(api_key, "__str__"):
|
|
|
|
|
api_key = str(api_key)
|
|
|
|
|
|
|
|
|
|
if hasattr(api_url, "__str__"):
|
|
|
|
|
api_url = str(api_url)
|
2025-02-06 13:30:27 +00:00
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
"Accept": "application/json",
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
"Authorization": api_key,
|
2025-02-10 06:20:47 +00:00
|
|
|
"X-Retain-Images": "none",
|
2025-02-06 13:30:27 +00:00
|
|
|
}
|
|
|
|
|
|
2025-02-10 06:20:47 +00:00
|
|
|
payload = {"q": query, "count": count if count <= 10 else 10}
|
2025-02-06 13:30:27 +00:00
|
|
|
|
2025-12-03 14:42:56 +00:00
|
|
|
url = str(URL(api_url))
|
2025-02-06 13:30:27 +00:00
|
|
|
response = requests.post(url, headers=headers, json=payload)
|
2024-06-22 14:36:15 +00:00
|
|
|
response.raise_for_status()
|
|
|
|
|
data = response.json()
|
|
|
|
|
|
|
|
|
|
results = []
|
2025-02-06 13:30:27 +00:00
|
|
|
for result in data["data"]:
|
2024-06-22 14:36:15 +00:00
|
|
|
results.append(
|
|
|
|
|
SearchResult(
|
|
|
|
|
link=result["url"],
|
|
|
|
|
title=result.get("title"),
|
|
|
|
|
snippet=result.get("content"),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
2024-06-22 23:15:19 +00:00
|
|
|
return results
|