This commit is contained in:
Timothy Jaeryang Baek 2025-12-21 18:08:36 +04:00
parent 7746e9f4b8
commit c96549eaa7
7 changed files with 170 additions and 71 deletions

View file

@ -14,6 +14,7 @@ from typing import Iterator, List, Optional, Sequence, Union
from fastapi import (
Depends,
FastAPI,
Query,
File,
Form,
HTTPException,
@ -155,7 +156,9 @@ def get_rf(
):
rf = None
# Convert timeout string to int or None (system default)
timeout_value = int(external_reranker_timeout) if external_reranker_timeout else None
timeout_value = (
int(external_reranker_timeout) if external_reranker_timeout else None
)
if reranking_model:
if any(model in reranking_model for model in ["jinaai/jina-colbert-v2"]):
try:
@ -1750,18 +1753,22 @@ async def process_text(
@router.post("/process/youtube")
@router.post("/process/web")
async def process_web(
request: Request, form_data: ProcessUrlForm, user=Depends(get_verified_user)
request: Request,
form_data: ProcessUrlForm,
process: bool = Query(True, description="Whether to process and save the content"),
user=Depends(get_verified_user),
):
try:
collection_name = form_data.collection_name
if not collection_name:
collection_name = calculate_sha256_string(form_data.url)[:63]
content, docs = await run_in_threadpool(
get_content_from_url, request, form_data.url
)
log.debug(f"text_content: {content}")
if process:
collection_name = form_data.collection_name
if not collection_name:
collection_name = calculate_sha256_string(form_data.url)[:63]
if not request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
await run_in_threadpool(
save_docs_to_vector_db,
@ -1788,6 +1795,11 @@ async def process_web(
},
},
}
else:
return {
"status": True,
"content": content,
}
except Exception as e:
log.exception(e)
raise HTTPException(

View file

@ -327,10 +327,21 @@ export const processYoutubeVideo = async (token: string, url: string) => {
return res;
};
export const processWeb = async (token: string, collection_name: string, url: string) => {
export const processWeb = async (
token: string,
collection_name: string,
url: string,
process: boolean = true
) => {
let error = null;
const res = await fetch(`${RETRIEVAL_API_BASE_URL}/process/web`, {
const searchParams = new URLSearchParams();
if (!process) {
searchParams.append('process', 'false');
}
const res = await fetch(`${RETRIEVAL_API_BASE_URL}/process/web?${searchParams.toString()}`, {
method: 'POST',
headers: {
Accept: 'application/json',

View file

@ -779,9 +779,6 @@
urls = [urls];
}
// deduplicate URLs
urls = [...new Set(urls)];
// Create file items first
const fileItems = urls.map((url) => ({
type: 'text',
@ -796,7 +793,6 @@
// Display all items at once
files = [...files, ...fileItems];
// Process sequentially (NOT parallel)
for (const fileItem of fileItems) {
try {
const res = isYoutubeUrl(fileItem.url)
@ -811,14 +807,12 @@
...fileItem.file
};
}
files = [...files];
} catch (e) {
fileItem.status = 'error';
fileItem.error = String(e);
files = files.filter((f) => f.name !== url);
toast.error(`${e}`);
}
// Force UI reactivity after each file finishes
files = [...files];
}
};

View file

@ -93,6 +93,7 @@
const i18n = getContext('i18n');
export let onUpload: Function = (e) => {};
export let onChange: Function = () => {};
export let createMessagePair: Function;

View file

@ -15,12 +15,14 @@
let url = '';
const submitHandler = () => {
const urls = url
let urls = url
.split('\n')
.map((u) => u.trim())
.filter((u) => u !== '')
.filter((u) => isValidHttpUrl(u));
urls = [...new Set(urls)];
if (urls.length === 0) {
toast.error($i18n.t('Please enter a valid URL.'));
return;

View file

@ -33,7 +33,9 @@
updateKnowledgeById,
searchKnowledgeFilesById
} from '$lib/apis/knowledge';
import { blobToFile } from '$lib/utils';
import { processWeb, processYoutubeVideo } from '$lib/apis/retrieval';
import { blobToFile, isYoutubeUrl } from '$lib/utils';
import Spinner from '$lib/components/common/Spinner.svelte';
import Files from './KnowledgeBase/Files.svelte';
@ -169,10 +171,85 @@
return file;
};
const uploadWeb = async (urls) => {
if (!Array.isArray(urls)) {
urls = [urls];
}
const newFileItems = urls.map((url) => ({
type: 'file',
file: '',
id: null,
url: url,
name: url,
size: null,
status: 'uploading',
error: '',
itemId: uuidv4()
}));
// Display all items at once
fileItems = [...newFileItems, ...(fileItems ?? [])];
for (const fileItem of newFileItems) {
try {
console.log(fileItem);
const res = await processWeb(localStorage.token, '', fileItem.url, false).catch((e) => {
console.error('Error processing web URL:', e);
return null;
});
if (res) {
console.log(res);
const file = createFileFromText(
// Use URL as filename, sanitized
fileItem.url
.replace(/[^a-z0-9]/gi, '_')
.toLowerCase()
.slice(0, 50),
res.content
);
const uploadedFile = await uploadFile(localStorage.token, file).catch((e) => {
toast.error(`${e}`);
return null;
});
if (uploadedFile) {
console.log(uploadedFile);
fileItems = fileItems.map((item) => {
if (item.itemId === fileItem.itemId) {
item.id = uploadedFile.id;
}
return item;
});
if (uploadedFile.error) {
console.warn('File upload warning:', uploadedFile.error);
toast.warning(uploadedFile.error);
fileItems = fileItems.filter((file) => file.id !== uploadedFile.id);
} else {
await addFileHandler(uploadedFile.id);
}
} else {
toast.error($i18n.t('Failed to upload file.'));
}
} else {
// remove the item from fileItems
fileItems = fileItems.filter((item) => item.itemId !== fileItem.itemId);
toast.error($i18n.t('Failed to process URL: {{url}}', { url: fileItem.url }));
}
} catch (e) {
// remove the item from fileItems
fileItems = fileItems.filter((item) => item.itemId !== fileItem.itemId);
toast.error(`${e}`);
}
}
};
const uploadFileHandler = async (file) => {
console.log(file);
const tempItemId = uuidv4();
const fileItem = {
type: 'file',
file: '',
@ -182,7 +259,7 @@
size: file.size,
status: 'uploading',
error: '',
itemId: tempItemId
itemId: uuidv4()
};
if (fileItem.size == 0) {
@ -206,7 +283,7 @@
return;
}
fileItems = [...(fileItems ?? []), fileItem];
fileItems = [fileItem, ...(fileItems ?? [])];
try {
let metadata = {
knowledge_id: knowledge.id,
@ -227,12 +304,9 @@
if (uploadedFile) {
console.log(uploadedFile);
fileItems = fileItems.map((item) => {
if (item.itemId === tempItemId) {
if (item.itemId === fileItem.itemId) {
item.id = uploadedFile.id;
}
// Remove temporary item id
delete item.itemId;
return item;
});
@ -701,8 +775,8 @@
<AttachWebpageModal
bind:show={showAddWebpageModal}
onSubmit={async (data) => {
console.log(data);
onSubmit={async (e) => {
uploadWeb(e.data);
}}
/>

View file

@ -25,7 +25,7 @@
</script>
<div class=" max-h-full flex flex-col w-full gap-[0.5px]">
{#each files as file (file?.id ?? file?.tempId)}
{#each files as file (file?.id ?? file?.itemId ?? file?.tempId)}
<div
class=" flex cursor-pointer w-full px-1.5 py-0.5 bg-transparent dark:hover:bg-gray-850/50 hover:bg-white rounded-xl transition {selectedFileId
? ''
@ -59,11 +59,15 @@
</div>
<div class="flex items-center gap-2 shrink-0">
{#if file?.updated_at}
<Tooltip content={dayjs(file.updated_at * 1000).format('LLLL')}>
<div>
{dayjs(file.updated_at * 1000).fromNow()}
</div>
</Tooltip>
{/if}
{#if file?.user}
<Tooltip
content={file?.user?.email ?? $i18n.t('Deleted User')}
className="flex shrink-0"
@ -77,6 +81,7 @@
})}
</div>
</Tooltip>
{/if}
</div>
</button>