open-webui/backend/open_webui/retrieval/web/firecrawl.py

42 lines
1.2 KiB
Python
Raw Normal View History

2025-04-24 06:57:28 +00:00
import logging
from typing import Optional, List
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
2025-04-24 06:57:28 +00:00
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_firecrawl(
firecrawl_url: str,
firecrawl_api_key: str,
query: str,
count: int,
filter_list: Optional[List[str]] = None,
) -> List[SearchResult]:
try:
2025-11-06 20:21:06 +00:00
from firecrawl import FirecrawlApp
2025-11-06 20:19:08 +00:00
2025-11-06 20:21:06 +00:00
firecrawl = FirecrawlApp(api_key=firecrawl_api_key, api_url=firecrawl_url)
response = firecrawl.search(
query=query, limit=count, ignore_invalid_urls=True, timeout=count * 3
2025-04-24 06:57:28 +00:00
)
results = response.web
2025-04-24 06:57:28 +00:00
if filter_list:
results = get_filtered_results(results, filter_list)
results = [
SearchResult(
link=result.url,
title=result.title,
snippet=result.description,
2025-04-24 06:57:28 +00:00
)
for result in results[:count]
]
log.info(f"External search results: {results}")
return results
except Exception as e:
log.error(f"Error in External search: {e}")
return []