From c96549eaa79f66a3506dbf45b947c6b2ad0f1d77 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 21 Dec 2025 18:08:36 +0400 Subject: [PATCH] refac --- backend/open_webui/routers/retrieval.py | 72 ++++++++------ src/lib/apis/retrieval/index.ts | 15 ++- src/lib/components/chat/Chat.svelte | 12 +-- src/lib/components/chat/MessageInput.svelte | 1 + .../MessageInput/AttachWebpageModal.svelte | 4 +- .../workspace/Knowledge/KnowledgeBase.svelte | 94 +++++++++++++++++-- .../Knowledge/KnowledgeBase/Files.svelte | 43 +++++---- 7 files changed, 170 insertions(+), 71 deletions(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 89aabf5573..a2c1cc80d5 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -14,6 +14,7 @@ from typing import Iterator, List, Optional, Sequence, Union from fastapi import ( Depends, FastAPI, + Query, File, Form, HTTPException, @@ -155,7 +156,9 @@ def get_rf( ): rf = None # Convert timeout string to int or None (system default) - timeout_value = int(external_reranker_timeout) if external_reranker_timeout else None + timeout_value = ( + int(external_reranker_timeout) if external_reranker_timeout else None + ) if reranking_model: if any(model in reranking_model for model in ["jinaai/jina-colbert-v2"]): try: @@ -1750,44 +1753,53 @@ async def process_text( @router.post("/process/youtube") @router.post("/process/web") async def process_web( - request: Request, form_data: ProcessUrlForm, user=Depends(get_verified_user) + request: Request, + form_data: ProcessUrlForm, + process: bool = Query(True, description="Whether to process and save the content"), + user=Depends(get_verified_user), ): try: - collection_name = form_data.collection_name - if not collection_name: - collection_name = calculate_sha256_string(form_data.url)[:63] - content, docs = await run_in_threadpool( get_content_from_url, request, form_data.url ) log.debug(f"text_content: {content}") - if not request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: - await run_in_threadpool( - save_docs_to_vector_db, - request, - docs, - collection_name, - overwrite=True, - user=user, - ) - else: - collection_name = None + if process: + collection_name = form_data.collection_name + if not collection_name: + collection_name = calculate_sha256_string(form_data.url)[:63] - return { - "status": True, - "collection_name": collection_name, - "filename": form_data.url, - "file": { - "data": { - "content": content, + if not request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: + await run_in_threadpool( + save_docs_to_vector_db, + request, + docs, + collection_name, + overwrite=True, + user=user, + ) + else: + collection_name = None + + return { + "status": True, + "collection_name": collection_name, + "filename": form_data.url, + "file": { + "data": { + "content": content, + }, + "meta": { + "name": form_data.url, + "source": form_data.url, + }, }, - "meta": { - "name": form_data.url, - "source": form_data.url, - }, - }, - } + } + else: + return { + "status": True, + "content": content, + } except Exception as e: log.exception(e) raise HTTPException( diff --git a/src/lib/apis/retrieval/index.ts b/src/lib/apis/retrieval/index.ts index 75065910d6..a84e7b6822 100644 --- a/src/lib/apis/retrieval/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -327,10 +327,21 @@ export const processYoutubeVideo = async (token: string, url: string) => { return res; }; -export const processWeb = async (token: string, collection_name: string, url: string) => { +export const processWeb = async ( + token: string, + collection_name: string, + url: string, + process: boolean = true +) => { let error = null; - const res = await fetch(`${RETRIEVAL_API_BASE_URL}/process/web`, { + const searchParams = new URLSearchParams(); + + if (!process) { + searchParams.append('process', 'false'); + } + + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/process/web?${searchParams.toString()}`, { method: 'POST', headers: { Accept: 'application/json', diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index d3e7641436..54bb4e4fac 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -779,9 +779,6 @@ urls = [urls]; } - // deduplicate URLs - urls = [...new Set(urls)]; - // Create file items first const fileItems = urls.map((url) => ({ type: 'text', @@ -796,7 +793,6 @@ // Display all items at once files = [...files, ...fileItems]; - // Process sequentially (NOT parallel) for (const fileItem of fileItems) { try { const res = isYoutubeUrl(fileItem.url) @@ -811,14 +807,12 @@ ...fileItem.file }; } + + files = [...files]; } catch (e) { - fileItem.status = 'error'; - fileItem.error = String(e); + files = files.filter((f) => f.name !== url); toast.error(`${e}`); } - - // Force UI reactivity after each file finishes - files = [...files]; } }; diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index ae7a8e1059..d5685d503b 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -93,6 +93,7 @@ const i18n = getContext('i18n'); + export let onUpload: Function = (e) => {}; export let onChange: Function = () => {}; export let createMessagePair: Function; diff --git a/src/lib/components/chat/MessageInput/AttachWebpageModal.svelte b/src/lib/components/chat/MessageInput/AttachWebpageModal.svelte index 52640969f7..d0cba1465d 100644 --- a/src/lib/components/chat/MessageInput/AttachWebpageModal.svelte +++ b/src/lib/components/chat/MessageInput/AttachWebpageModal.svelte @@ -15,12 +15,14 @@ let url = ''; const submitHandler = () => { - const urls = url + let urls = url .split('\n') .map((u) => u.trim()) .filter((u) => u !== '') .filter((u) => isValidHttpUrl(u)); + urls = [...new Set(urls)]; + if (urls.length === 0) { toast.error($i18n.t('Please enter a valid URL.')); return; diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index 67bfb9d092..23ce7b4fcc 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -33,7 +33,9 @@ updateKnowledgeById, searchKnowledgeFilesById } from '$lib/apis/knowledge'; - import { blobToFile } from '$lib/utils'; + import { processWeb, processYoutubeVideo } from '$lib/apis/retrieval'; + + import { blobToFile, isYoutubeUrl } from '$lib/utils'; import Spinner from '$lib/components/common/Spinner.svelte'; import Files from './KnowledgeBase/Files.svelte'; @@ -169,10 +171,85 @@ return file; }; + const uploadWeb = async (urls) => { + if (!Array.isArray(urls)) { + urls = [urls]; + } + + const newFileItems = urls.map((url) => ({ + type: 'file', + file: '', + id: null, + url: url, + name: url, + size: null, + status: 'uploading', + error: '', + itemId: uuidv4() + })); + + // Display all items at once + fileItems = [...newFileItems, ...(fileItems ?? [])]; + + for (const fileItem of newFileItems) { + try { + console.log(fileItem); + const res = await processWeb(localStorage.token, '', fileItem.url, false).catch((e) => { + console.error('Error processing web URL:', e); + return null; + }); + + if (res) { + console.log(res); + const file = createFileFromText( + // Use URL as filename, sanitized + fileItem.url + .replace(/[^a-z0-9]/gi, '_') + .toLowerCase() + .slice(0, 50), + res.content + ); + + const uploadedFile = await uploadFile(localStorage.token, file).catch((e) => { + toast.error(`${e}`); + return null; + }); + + if (uploadedFile) { + console.log(uploadedFile); + fileItems = fileItems.map((item) => { + if (item.itemId === fileItem.itemId) { + item.id = uploadedFile.id; + } + return item; + }); + + if (uploadedFile.error) { + console.warn('File upload warning:', uploadedFile.error); + toast.warning(uploadedFile.error); + fileItems = fileItems.filter((file) => file.id !== uploadedFile.id); + } else { + await addFileHandler(uploadedFile.id); + } + } else { + toast.error($i18n.t('Failed to upload file.')); + } + } else { + // remove the item from fileItems + fileItems = fileItems.filter((item) => item.itemId !== fileItem.itemId); + toast.error($i18n.t('Failed to process URL: {{url}}', { url: fileItem.url })); + } + } catch (e) { + // remove the item from fileItems + fileItems = fileItems.filter((item) => item.itemId !== fileItem.itemId); + toast.error(`${e}`); + } + } + }; + const uploadFileHandler = async (file) => { console.log(file); - const tempItemId = uuidv4(); const fileItem = { type: 'file', file: '', @@ -182,7 +259,7 @@ size: file.size, status: 'uploading', error: '', - itemId: tempItemId + itemId: uuidv4() }; if (fileItem.size == 0) { @@ -206,7 +283,7 @@ return; } - fileItems = [...(fileItems ?? []), fileItem]; + fileItems = [fileItem, ...(fileItems ?? [])]; try { let metadata = { knowledge_id: knowledge.id, @@ -227,12 +304,9 @@ if (uploadedFile) { console.log(uploadedFile); fileItems = fileItems.map((item) => { - if (item.itemId === tempItemId) { + if (item.itemId === fileItem.itemId) { item.id = uploadedFile.id; } - - // Remove temporary item id - delete item.itemId; return item; }); @@ -701,8 +775,8 @@ { - console.log(data); + onSubmit={async (e) => { + uploadWeb(e.data); }} /> diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase/Files.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase/Files.svelte index 644ab1b78c..9d42130234 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase/Files.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase/Files.svelte @@ -25,7 +25,7 @@
- {#each files as file (file?.id ?? file?.tempId)} + {#each files as file (file?.id ?? file?.itemId ?? file?.tempId)}
- -
- {dayjs(file.updated_at * 1000).fromNow()} -
-
- -
- {$i18n.t('By {{name}}', { - name: capitalizeFirstLetter( - file?.user?.name ?? file?.user?.email ?? $i18n.t('Deleted User') - ) - })} -
-
+ {#if file?.updated_at} + +
+ {dayjs(file.updated_at * 1000).fromNow()} +
+
+ {/if} + + {#if file?.user} + +
+ {$i18n.t('By {{name}}', { + name: capitalizeFirstLetter( + file?.user?.name ?? file?.user?.email ?? $i18n.t('Deleted User') + ) + })} +
+
+ {/if}