mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-17 14:55:23 +00:00
聊天记录提取器逻辑
modified: src/lib/components/layout/ImportChatsModal.svelte modified: src/lib/utils/index.ts
This commit is contained in:
parent
271af2b73d
commit
340ff50d3a
2 changed files with 159 additions and 39 deletions
|
|
@ -3,7 +3,7 @@
|
||||||
import Modal from '../common/Modal.svelte';
|
import Modal from '../common/Modal.svelte';
|
||||||
import Spinner from '../common/Spinner.svelte';
|
import Spinner from '../common/Spinner.svelte';
|
||||||
import { extractChatsFromFile } from '$lib/utils/chatImport';
|
import { extractChatsFromFile } from '$lib/utils/chatImport';
|
||||||
import { getImportOrigin, convertOpenAIChats } from '$lib/utils';
|
import { getImportOrigin, convertOpenAIChats, convertDeepseekChats } from '$lib/utils';
|
||||||
|
|
||||||
export let show = false;
|
export let show = false;
|
||||||
export let onImport: (chats: any[]) => Promise<void>;
|
export let onImport: (chats: any[]) => Promise<void>;
|
||||||
|
|
@ -41,17 +41,29 @@
|
||||||
.filter((l) => l.length > 0);
|
.filter((l) => l.length > 0);
|
||||||
|
|
||||||
if (lines.length === 0) {
|
if (lines.length === 0) {
|
||||||
throw new Error('文件为空,无法解析');
|
throw new Error('File is empty, nothing to parse');
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return lines.map((line) => JSON.parse(line));
|
return lines.map((line) => JSON.parse(line));
|
||||||
} catch (lineError) {
|
} catch (lineError) {
|
||||||
throw new Error('纯文本/JSONL 文件需包含有效的 JSON 或逐行 JSON 对象');
|
throw new Error('Plain text JSONL must contain one valid JSON object per line');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const normalizeChats = (chats: any) => {
|
||||||
|
if (Array.isArray(chats)) return chats;
|
||||||
|
|
||||||
|
if (chats && typeof chats === 'object') {
|
||||||
|
if (Array.isArray((chats as any).conversations)) {
|
||||||
|
return (chats as any).conversations;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('File content must be a JSON array of chats');
|
||||||
|
};
|
||||||
|
|
||||||
const handleFiles = async (files: FileList | File[]) => {
|
const handleFiles = async (files: FileList | File[]) => {
|
||||||
if (!files || files.length === 0) return;
|
if (!files || files.length === 0) return;
|
||||||
const file = files[0];
|
const file = files[0];
|
||||||
|
|
@ -68,12 +80,18 @@
|
||||||
chats = await extractChatsFromFile(file);
|
chats = await extractChatsFromFile(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getImportOrigin(chats) === 'openai') {
|
chats = normalizeChats(chats);
|
||||||
chats = convertOpenAIChats(chats);
|
|
||||||
|
if (chats.length === 0) {
|
||||||
|
throw new Error('File contained zero chat records');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Array.isArray(chats)) {
|
const origin = getImportOrigin(chats);
|
||||||
throw new Error('文件内容需为 JSON 数组');
|
|
||||||
|
if (origin === 'openai') {
|
||||||
|
chats = convertOpenAIChats(chats);
|
||||||
|
} else if (origin === 'deepseek') {
|
||||||
|
chats = convertDeepseekChats(chats);
|
||||||
}
|
}
|
||||||
|
|
||||||
rawChats = chats;
|
rawChats = chats;
|
||||||
|
|
@ -105,14 +123,14 @@
|
||||||
|
|
||||||
const confirmImport = async () => {
|
const confirmImport = async () => {
|
||||||
if (!rawChats.length) {
|
if (!rawChats.length) {
|
||||||
toast.error('请先上传对话记录文件');
|
toast.error('Please upload a chat history file first');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const chatsToImport =
|
const chatsToImport =
|
||||||
selectedIndices.size > 0 ? rawChats.filter((_, idx) => selectedIndices.has(idx)) : rawChats;
|
selectedIndices.size > 0 ? rawChats.filter((_, idx) => selectedIndices.has(idx)) : rawChats;
|
||||||
|
|
||||||
if (!chatsToImport.length) {
|
if (!chatsToImport.length) {
|
||||||
toast.error('未选择任何记录');
|
toast.error('No records selected');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -131,7 +149,7 @@
|
||||||
|
|
||||||
await onImport(chatsToImport);
|
await onImport(chatsToImport);
|
||||||
show = false;
|
show = false;
|
||||||
toast.success('开始导入筛选后的对话记录');
|
toast.success('Starting import for the filtered chats');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(error);
|
console.error(error);
|
||||||
toast.error(error instanceof Error ? error.message : `${error}`);
|
toast.error(error instanceof Error ? error.message : `${error}`);
|
||||||
|
|
@ -148,7 +166,7 @@
|
||||||
chat?.title ??
|
chat?.title ??
|
||||||
chat?.chat?.title ??
|
chat?.chat?.title ??
|
||||||
meta?.subject ??
|
meta?.subject ??
|
||||||
'未命名对话';
|
'Untitled chat';
|
||||||
const date =
|
const date =
|
||||||
meta?.inserted_at ??
|
meta?.inserted_at ??
|
||||||
meta?.created_at ??
|
meta?.created_at ??
|
||||||
|
|
@ -178,15 +196,15 @@
|
||||||
<div class="p-6 space-y-6 font-primary">
|
<div class="p-6 space-y-6 font-primary">
|
||||||
<div class="flex items-start justify-between gap-4">
|
<div class="flex items-start justify-between gap-4">
|
||||||
<div>
|
<div>
|
||||||
<div class="text-lg font-semibold text-gray-900 dark:text-white">对话记录导入中心</div>
|
<div class="text-lg font-semibold text-gray-900 dark:text-white">Chat Import Center</div>
|
||||||
<div class="text-sm text-gray-500 dark:text-gray-400 mt-1">
|
<div class="text-sm text-gray-500 dark:text-gray-400 mt-1">
|
||||||
完成准备、筛选后再执行导入,提升成功率与速度。
|
Upload your exported history, filter the records you need, then import.
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<button
|
<button
|
||||||
class="text-gray-500 hover:text-gray-900 dark:text-gray-400 dark:hover:text-white"
|
class="text-gray-500 hover:text-gray-900 dark:text-gray-400 dark:hover:text-white"
|
||||||
on:click={() => (show = false)}
|
on:click={() => (show = false)}
|
||||||
aria-label="关闭导入中心"
|
aria-label="Close import modal"
|
||||||
>
|
>
|
||||||
✕
|
✕
|
||||||
</button>
|
</button>
|
||||||
|
|
@ -194,27 +212,25 @@
|
||||||
|
|
||||||
<div class="space-y-4">
|
<div class="space-y-4">
|
||||||
<div class="rounded-2xl border border-gray-100 dark:border-gray-800 bg-gray-50/60 dark:bg-gray-900/60 p-4">
|
<div class="rounded-2xl border border-gray-100 dark:border-gray-800 bg-gray-50/60 dark:bg-gray-900/60 p-4">
|
||||||
<div class="text-sm font-semibold text-gray-800 dark:text-gray-100 mb-2">
|
<div class="text-sm font-semibold text-gray-800 dark:text-gray-100 mb-2">1. Prepare file</div>
|
||||||
1. 准备您的对话记录
|
|
||||||
</div>
|
|
||||||
<div class="text-sm text-gray-600 dark:text-gray-300 leading-relaxed space-y-1">
|
<div class="text-sm text-gray-600 dark:text-gray-300 leading-relaxed space-y-1">
|
||||||
<p>请确保导出文件格式为 <strong>JSON (.json)</strong> 或 <strong>纯文本 (.txt)</strong>。</p>
|
|
||||||
<p>
|
<p>
|
||||||
请前往原平台导出历史对话,如 DeepSeek — 系统设置 — 数据管理 — 导出所有历史对话,
|
Supported formats: <strong>JSON (.json)</strong>, <strong>JSONL (.jsonl/.txt)</strong>, or
|
||||||
ChatGPT — 设置 — 数据管理 — 导出数据。
|
OpenAI ZIP export (auto-converted).
|
||||||
</p>
|
</p>
|
||||||
<p>如果导出的文件内容过多或过大,建议使用下方的筛选功能生成新的导入文件,以加快导入速度。</p>
|
<p>Large exports can be filtered below to speed up the import.</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="rounded-2xl border border-gray-100 dark:border-gray-800 bg-white dark:bg-gray-900 p-4 space-y-3">
|
<div class="rounded-2xl border border-gray-100 dark:border-gray-800 bg-white dark:bg-gray-900 p-4 space-y-3">
|
||||||
<div class="flex items-center justify-between">
|
<div class="flex items-center justify-between">
|
||||||
<div class="text-sm font-semibold text-gray-800 dark:text-gray-100">2. 筛选记录</div>
|
<div class="text-sm font-semibold text-gray-800 dark:text-gray-100">2. Filter records</div>
|
||||||
<button
|
<button
|
||||||
class="text-xs px-3 py-1.5 rounded-full border border-gray-200 dark:border-gray-800 hover:bg-gray-100 dark:hover:bg-gray-850"
|
class="text-xs px-3 py-1.5 rounded-full border border-gray-200 dark:border-gray-800 hover:bg-gray-100 dark:hover:bg-gray-850"
|
||||||
on:click={() => (filterOpen = !filterOpen)}
|
on:click={() => (filterOpen = !filterOpen)}
|
||||||
|
type="button"
|
||||||
>
|
>
|
||||||
{filterOpen ? '收起高级筛选器' : '高级筛选器'}
|
{filterOpen ? 'Hide filters' : 'Show filters'}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
@ -233,7 +249,7 @@
|
||||||
>
|
>
|
||||||
<div class="flex flex-col items-center gap-2 text-sm text-gray-600 dark:text-gray-300">
|
<div class="flex flex-col items-center gap-2 text-sm text-gray-600 dark:text-gray-300">
|
||||||
<div class="font-medium text-gray-900 dark:text-white">
|
<div class="font-medium text-gray-900 dark:text-white">
|
||||||
{fileName ? `已选择:${fileName}` : '拖拽文件到此处,或点击上传'}
|
{fileName ? `Selected: ${fileName}` : 'Drag a file here or click to upload'}
|
||||||
</div>
|
</div>
|
||||||
<div class="flex items-center gap-2">
|
<div class="flex items-center gap-2">
|
||||||
<button
|
<button
|
||||||
|
|
@ -241,16 +257,16 @@
|
||||||
on:click={() => fileInputEl.click()}
|
on:click={() => fileInputEl.click()}
|
||||||
type="button"
|
type="button"
|
||||||
>
|
>
|
||||||
选择文件
|
Choose file
|
||||||
</button>
|
</button>
|
||||||
<div class="text-xs text-gray-500 dark:text-gray-400">
|
<div class="text-xs text-gray-500 dark:text-gray-400">
|
||||||
支持 .json / .txt,OpenAI 导出支持自动转换
|
Supports .json / .jsonl / .txt / .zip exports
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{#if loading}
|
{#if loading}
|
||||||
<div class="flex items-center gap-2 text-blue-600 dark:text-blue-300">
|
<div class="flex items-center gap-2 text-blue-600 dark:text-blue-300">
|
||||||
<Spinner className="size-4" />
|
<Spinner className="size-4" />
|
||||||
<span>正在解析文件...</span>
|
<span>Parsing file...</span>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
{#if errorMsg}
|
{#if errorMsg}
|
||||||
|
|
@ -270,8 +286,10 @@
|
||||||
<div class="space-y-3">
|
<div class="space-y-3">
|
||||||
<div class="flex items-center justify-between text-xs text-gray-600 dark:text-gray-400">
|
<div class="flex items-center justify-between text-xs text-gray-600 dark:text-gray-400">
|
||||||
<div>
|
<div>
|
||||||
总数:{rawChats.length} | 已选:{selectedIndices.size}
|
Total: {rawChats.length} | Selected: {selectedIndices.size}
|
||||||
{selectedIndices.size === 0 && rawChats.length > 0 ? '(未选则默认导入全部)' : ''}
|
{selectedIndices.size === 0 && rawChats.length > 0
|
||||||
|
? ' (none selected will import all)'
|
||||||
|
: ''}
|
||||||
</div>
|
</div>
|
||||||
<label class="flex items-center gap-2 cursor-pointer select-none">
|
<label class="flex items-center gap-2 cursor-pointer select-none">
|
||||||
<input
|
<input
|
||||||
|
|
@ -281,7 +299,7 @@
|
||||||
indeterminate={selectedIndices.size > 0 && selectedIndices.size < rawChats.length}
|
indeterminate={selectedIndices.size > 0 && selectedIndices.size < rawChats.length}
|
||||||
on:change={handleSelectAllChange}
|
on:change={handleSelectAllChange}
|
||||||
/>
|
/>
|
||||||
<span>全选 / 取消全选</span>
|
<span>Select / Deselect all</span>
|
||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
@ -289,16 +307,16 @@
|
||||||
<table class="w-full text-sm">
|
<table class="w-full text-sm">
|
||||||
<thead class="text-left bg-gray-50 dark:bg-gray-900 text-gray-600 dark:text-gray-300">
|
<thead class="text-left bg-gray-50 dark:bg-gray-900 text-gray-600 dark:text-gray-300">
|
||||||
<tr>
|
<tr>
|
||||||
<th class="w-14 py-2 px-3">选择</th>
|
<th class="w-14 py-2 px-3">Pick</th>
|
||||||
<th class="py-2 px-3">标题 / 摘要</th>
|
<th class="py-2 px-3">Title / Summary</th>
|
||||||
<th class="w-48 py-2 px-3">时间</th>
|
<th class="w-48 py-2 px-3">Timestamp</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{#if !rawChats.length}
|
{#if !rawChats.length}
|
||||||
<tr>
|
<tr>
|
||||||
<td colspan="3" class="py-4 text-center text-gray-500 dark:text-gray-400">
|
<td colspan="3" class="py-4 text-center text-gray-500 dark:text-gray-400">
|
||||||
请先上传文件以查看可筛选的记录
|
Upload a file to see filterable records
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{:else}
|
{:else}
|
||||||
|
|
@ -330,9 +348,9 @@
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="rounded-2xl border border-gray-100 dark:border-gray-800 bg-white dark:bg-gray-900 p-4 space-y-2">
|
<div class="rounded-2xl border border-gray-100 dark:border-gray-800 bg-white dark:bg-gray-900 p-4 space-y-2">
|
||||||
<div class="text-sm font-semibold text-gray-800 dark:text-gray-100">3. 导入记录</div>
|
<div class="text-sm font-semibold text-gray-800 dark:text-gray-100">3. Import</div>
|
||||||
<div class="text-xs text-gray-500 dark:text-gray-400">
|
<div class="text-xs text-gray-500 dark:text-gray-400">
|
||||||
确认后将按筛选结果导入到当前账户的对话列表中。
|
Confirmed records will be imported into your current account.
|
||||||
</div>
|
</div>
|
||||||
<div class="flex items-center justify-end gap-3">
|
<div class="flex items-center justify-end gap-3">
|
||||||
<button
|
<button
|
||||||
|
|
@ -340,7 +358,7 @@
|
||||||
on:click={() => (show = false)}
|
on:click={() => (show = false)}
|
||||||
type="button"
|
type="button"
|
||||||
>
|
>
|
||||||
取消
|
Cancel
|
||||||
</button>
|
</button>
|
||||||
<button
|
<button
|
||||||
class="px-4 py-2 text-sm rounded-xl bg-blue-600 text-white hover:bg-blue-700 disabled:opacity-60 disabled:cursor-not-allowed flex items-center gap-2"
|
class="px-4 py-2 text-sm rounded-xl bg-blue-600 text-white hover:bg-blue-700 disabled:opacity-60 disabled:cursor-not-allowed flex items-center gap-2"
|
||||||
|
|
@ -351,7 +369,7 @@
|
||||||
{#if importing}
|
{#if importing}
|
||||||
<Spinner className="size-4" />
|
<Spinner className="size-4" />
|
||||||
{/if}
|
{/if}
|
||||||
<span>{importing ? '正在导入...' : '确认导入'}</span>
|
<span>{importing ? 'Importing...' : 'Confirm import'}</span>
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -660,9 +660,19 @@ export const calculateSHA256 = async (file) => {
|
||||||
|
|
||||||
export const getImportOrigin = (_chats) => {
|
export const getImportOrigin = (_chats) => {
|
||||||
// Check what external service chat imports are from
|
// Check what external service chat imports are from
|
||||||
if ('mapping' in _chats[0]) {
|
const first = Array.isArray(_chats) ? _chats[0] : null;
|
||||||
|
if (!first || typeof first !== 'object') return 'webui';
|
||||||
|
|
||||||
|
if ('mapping' in first) {
|
||||||
|
// DeepSeek exports use mapping + fragments instead of content.parts/text
|
||||||
|
const mappingValues = Object.values(first.mapping || {});
|
||||||
|
const hasFragments = mappingValues.some(
|
||||||
|
(entry: any) => entry?.message && Array.isArray(entry.message.fragments)
|
||||||
|
);
|
||||||
|
if (hasFragments) return 'deepseek';
|
||||||
return 'openai';
|
return 'openai';
|
||||||
}
|
}
|
||||||
|
|
||||||
return 'webui';
|
return 'webui';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -747,6 +757,75 @@ const convertOpenAIMessages = (convo) => {
|
||||||
return chat;
|
return chat;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const fragmentsToContent = (fragments: any) => {
|
||||||
|
if (!Array.isArray(fragments)) return '';
|
||||||
|
return fragments
|
||||||
|
.map((frag) => (typeof frag?.content === 'string' ? frag.content : ''))
|
||||||
|
.filter(Boolean)
|
||||||
|
.join('\n\n');
|
||||||
|
};
|
||||||
|
|
||||||
|
const convertDeepseekMessages = (convo) => {
|
||||||
|
// Parse DeepSeek chat messages (mapping + fragments) into chat dictionary
|
||||||
|
const mapping = convo['mapping'];
|
||||||
|
const messages = [];
|
||||||
|
let currentId = '';
|
||||||
|
let lastId = null;
|
||||||
|
|
||||||
|
for (const message_id in mapping) {
|
||||||
|
const message = mapping[message_id];
|
||||||
|
currentId = message_id;
|
||||||
|
try {
|
||||||
|
const fragments = message?.message?.fragments;
|
||||||
|
const content = fragmentsToContent(fragments);
|
||||||
|
|
||||||
|
if (messages.length === 0 && (!content || content === '')) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const inferredRole = (() => {
|
||||||
|
if (Array.isArray(fragments)) {
|
||||||
|
const firstType = fragments.find((f) => typeof f?.type === 'string')?.type;
|
||||||
|
if (firstType === 'REQUEST') return 'user';
|
||||||
|
if (firstType === 'RESPONSE') return 'assistant';
|
||||||
|
}
|
||||||
|
return message?.message?.author?.role !== 'user' ? 'assistant' : 'user';
|
||||||
|
})();
|
||||||
|
|
||||||
|
const new_chat = {
|
||||||
|
id: message_id,
|
||||||
|
parentId: lastId,
|
||||||
|
childrenIds: message['children'] || [],
|
||||||
|
role: inferredRole,
|
||||||
|
content,
|
||||||
|
model: message?.message?.model || 'deepseek-chat',
|
||||||
|
done: true,
|
||||||
|
context: null
|
||||||
|
};
|
||||||
|
messages.push(new_chat);
|
||||||
|
lastId = currentId;
|
||||||
|
} catch (error) {
|
||||||
|
console.log('Error with DeepSeek message', message, '\nError:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const history: Record<PropertyKey, (typeof messages)[number]> = {};
|
||||||
|
messages.forEach((obj) => (history[obj.id] = obj));
|
||||||
|
|
||||||
|
const chat = {
|
||||||
|
history: {
|
||||||
|
currentId: currentId,
|
||||||
|
messages: history
|
||||||
|
},
|
||||||
|
models: [messages[0]?.model || 'deepseek-chat'],
|
||||||
|
messages: messages,
|
||||||
|
options: {},
|
||||||
|
timestamp: convo['inserted_at'] || convo['updated_at'] || convo['create_time'],
|
||||||
|
title: convo['title'] ?? 'New Chat'
|
||||||
|
};
|
||||||
|
return chat;
|
||||||
|
};
|
||||||
|
|
||||||
const validateChat = (chat) => {
|
const validateChat = (chat) => {
|
||||||
// Because ChatGPT sometimes has features we can't use like DALL-E or might have corrupted messages, need to validate
|
// Because ChatGPT sometimes has features we can't use like DALL-E or might have corrupted messages, need to validate
|
||||||
const messages = chat.messages;
|
const messages = chat.messages;
|
||||||
|
|
@ -801,6 +880,29 @@ export const convertOpenAIChats = (_chats) => {
|
||||||
return chats;
|
return chats;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const convertDeepseekChats = (_chats) => {
|
||||||
|
const chats = [];
|
||||||
|
let failed = 0;
|
||||||
|
|
||||||
|
for (const convo of _chats) {
|
||||||
|
const chat = convertDeepseekMessages(convo);
|
||||||
|
|
||||||
|
if (validateChat(chat)) {
|
||||||
|
chats.push({
|
||||||
|
id: convo['id'],
|
||||||
|
user_id: '',
|
||||||
|
title: convo['title'],
|
||||||
|
chat: chat,
|
||||||
|
timestamp: convo['inserted_at'] || convo['updated_at'] || convo['create_time']
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
failed++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log(failed, 'DeepSeek conversations could not be imported');
|
||||||
|
return chats;
|
||||||
|
};
|
||||||
|
|
||||||
export const isValidHttpUrl = (string: string) => {
|
export const isValidHttpUrl = (string: string) => {
|
||||||
let url;
|
let url;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue