mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-12 04:15:25 +00:00
enh/refac: temp chat file upload behaviour
client-side content extraction
This commit is contained in:
parent
8fb54b133e
commit
8d84b4c2a4
6 changed files with 350 additions and 38 deletions
|
|
@ -471,6 +471,12 @@ def get_sources_from_files(
|
||||||
"documents": [[doc.get("content") for doc in file.get("docs")]],
|
"documents": [[doc.get("content") for doc in file.get("docs")]],
|
||||||
"metadatas": [[doc.get("metadata") for doc in file.get("docs")]],
|
"metadatas": [[doc.get("metadata") for doc in file.get("docs")]],
|
||||||
}
|
}
|
||||||
|
elif file.get("type") == "text":
|
||||||
|
# Text File
|
||||||
|
query_result = {
|
||||||
|
"documents": [[file.get("content")]],
|
||||||
|
"metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],
|
||||||
|
}
|
||||||
elif file.get("type") == "note":
|
elif file.get("type") == "note":
|
||||||
# Note Attached
|
# Note Attached
|
||||||
note = Notes.get_note_by_id(file.get("id"))
|
note = Notes.get_note_by_id(file.get("id"))
|
||||||
|
|
|
||||||
198
package-lock.json
generated
198
package-lock.json
generated
|
|
@ -67,6 +67,7 @@
|
||||||
"mermaid": "^11.6.0",
|
"mermaid": "^11.6.0",
|
||||||
"paneforge": "^0.0.6",
|
"paneforge": "^0.0.6",
|
||||||
"panzoom": "^9.4.3",
|
"panzoom": "^9.4.3",
|
||||||
|
"pdfjs-dist": "^5.3.93",
|
||||||
"prosemirror-collab": "^1.3.1",
|
"prosemirror-collab": "^1.3.1",
|
||||||
"prosemirror-commands": "^1.6.0",
|
"prosemirror-commands": "^1.6.0",
|
||||||
"prosemirror-example-setup": "^1.2.3",
|
"prosemirror-example-setup": "^1.2.3",
|
||||||
|
|
@ -2083,6 +2084,191 @@
|
||||||
"resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz",
|
||||||
"integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="
|
"integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@napi-rs/canvas": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-9iwPZrNlCK4rG+vWyDvyvGeYjck9MoP0NVQP6N60gqJNFA1GsN0imG05pzNsqfCvFxUxgiTYlR8ff0HC1HXJiw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"workspaces": [
|
||||||
|
"e2e/*"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"@napi-rs/canvas-android-arm64": "0.1.73",
|
||||||
|
"@napi-rs/canvas-darwin-arm64": "0.1.73",
|
||||||
|
"@napi-rs/canvas-darwin-x64": "0.1.73",
|
||||||
|
"@napi-rs/canvas-linux-arm-gnueabihf": "0.1.73",
|
||||||
|
"@napi-rs/canvas-linux-arm64-gnu": "0.1.73",
|
||||||
|
"@napi-rs/canvas-linux-arm64-musl": "0.1.73",
|
||||||
|
"@napi-rs/canvas-linux-riscv64-gnu": "0.1.73",
|
||||||
|
"@napi-rs/canvas-linux-x64-gnu": "0.1.73",
|
||||||
|
"@napi-rs/canvas-linux-x64-musl": "0.1.73",
|
||||||
|
"@napi-rs/canvas-win32-x64-msvc": "0.1.73"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@napi-rs/canvas-android-arm64": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-s8dMhfYIHVv7gz8BXg3Nb6cFi950Y0xH5R/sotNZzUVvU9EVqHfkqiGJ4UIqu+15UhqguT6mI3Bv1mhpRkmMQw==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"android"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@napi-rs/canvas-darwin-arm64": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-bLPCq8Yyq1vMdVdIpQAqmgf6VGUknk8e7NdSZXJJFOA9gxkJ1RGcHOwoXo7h0gzhHxSorg71hIxyxtwXpq10Rw==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"darwin"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@napi-rs/canvas-darwin-x64": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-GR1CcehDjdNYXN3bj8PIXcXfYLUUOQANjQpM+KNnmpRo7ojsuqPjT7ZVH+6zoG/aqRJWhiSo+ChQMRazZlRU9g==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"darwin"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@napi-rs/canvas-linux-arm-gnueabihf": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-cM7F0kBJVFio0+U2iKSW4fWSfYQ8CPg4/DRZodSum/GcIyfB8+UPJSRM1BvvlcWinKLfX1zUYOwonZX9IFRRcw==",
|
||||||
|
"cpu": [
|
||||||
|
"arm"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@napi-rs/canvas-linux-arm64-gnu": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-PMWNrMON9uz9klz1B8ZY/RXepQSC5dxxHQTowfw93Tb3fLtWO5oNX2k9utw7OM4ypT9BUZUWJnDQ5bfuXc/EUQ==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@napi-rs/canvas-linux-arm64-musl": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-lX0z2bNmnk1PGZ+0a9OZwI2lPPvWjRYzPqvEitXX7lspyLFrOzh2kcQiLL7bhyODN23QvfriqwYqp5GreSzVvA==",
|
||||||
|
"cpu": [
|
||||||
|
"arm64"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@napi-rs/canvas-linux-riscv64-gnu": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-QDQgMElwxAoADsSR3UYvdTTQk5XOyD9J5kq15Z8XpGwpZOZsSE0zZ/X1JaOtS2x+HEZL6z1S6MF/1uhZFZb5ig==",
|
||||||
|
"cpu": [
|
||||||
|
"riscv64"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@napi-rs/canvas-linux-x64-gnu": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-wbzLJrTalQrpyrU1YRrO6w6pdr5vcebbJa+Aut5QfTaW9eEmMb1WFG6l1V+cCa5LdHmRr8bsvl0nJDU/IYDsmw==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@napi-rs/canvas-linux-x64-musl": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-xbfhYrUufoTAKvsEx2ZUN4jvACabIF0h1F5Ik1Rk4e/kQq6c+Dwa5QF0bGrfLhceLpzHT0pCMGMDeQKQrcUIyA==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"linux"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@napi-rs/canvas-win32-x64-msvc": {
|
||||||
|
"version": "0.1.73",
|
||||||
|
"resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.73.tgz",
|
||||||
|
"integrity": "sha512-YQmHXBufFBdWqhx+ympeTPkMfs3RNxaOgWm59vyjpsub7Us07BwCcmu1N5kildhO8Fm0syoI2kHnzGkJBLSvsg==",
|
||||||
|
"cpu": [
|
||||||
|
"x64"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"optional": true,
|
||||||
|
"os": [
|
||||||
|
"win32"
|
||||||
|
],
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@nodelib/fs.scandir": {
|
"node_modules/@nodelib/fs.scandir": {
|
||||||
"version": "2.1.5",
|
"version": "2.1.5",
|
||||||
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
"resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
|
||||||
|
|
@ -9504,6 +9690,18 @@
|
||||||
"node": "*"
|
"node": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/pdfjs-dist": {
|
||||||
|
"version": "5.3.93",
|
||||||
|
"resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.3.93.tgz",
|
||||||
|
"integrity": "sha512-w3fQKVL1oGn8FRyx5JUG5tnbblggDqyx2XzA5brsJ5hSuS+I0NdnJANhmeWKLjotdbPQucLBug5t0MeWr0AAdg==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=20.16.0 || >=22.3.0"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"@napi-rs/canvas": "^0.1.71"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/pend": {
|
"node_modules/pend": {
|
||||||
"version": "1.2.0",
|
"version": "1.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,7 @@
|
||||||
"mermaid": "^11.6.0",
|
"mermaid": "^11.6.0",
|
||||||
"paneforge": "^0.0.6",
|
"paneforge": "^0.0.6",
|
||||||
"panzoom": "^9.4.3",
|
"panzoom": "^9.4.3",
|
||||||
|
"pdfjs-dist": "^5.3.93",
|
||||||
"prosemirror-collab": "^1.3.1",
|
"prosemirror-collab": "^1.3.1",
|
||||||
"prosemirror-commands": "^1.6.0",
|
"prosemirror-commands": "^1.6.0",
|
||||||
"prosemirror-example-setup": "^1.2.3",
|
"prosemirror-example-setup": "^1.2.3",
|
||||||
|
|
|
||||||
|
|
@ -1597,7 +1597,7 @@
|
||||||
let files = JSON.parse(JSON.stringify(chatFiles));
|
let files = JSON.parse(JSON.stringify(chatFiles));
|
||||||
files.push(
|
files.push(
|
||||||
...(userMessage?.files ?? []).filter((item) =>
|
...(userMessage?.files ?? []).filter((item) =>
|
||||||
['doc', 'file', 'note', 'collection'].includes(item.type)
|
['doc', 'text', 'file', 'note', 'collection'].includes(item.type)
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
// Remove duplicates
|
// Remove duplicates
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,8 @@
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
|
import * as pdfjs from 'pdfjs-dist';
|
||||||
|
import * as pdfWorker from 'pdfjs-dist/build/pdf.worker.mjs';
|
||||||
|
pdfjs.GlobalWorkerOptions.workerSrc = import.meta.url + 'pdfjs-dist/build/pdf.worker.mjs';
|
||||||
|
|
||||||
import DOMPurify from 'dompurify';
|
import DOMPurify from 'dompurify';
|
||||||
import { marked } from 'marked';
|
import { marked } from 'marked';
|
||||||
import heic2any from 'heic2any';
|
import heic2any from 'heic2any';
|
||||||
|
|
@ -23,13 +27,15 @@
|
||||||
tools,
|
tools,
|
||||||
user as _user,
|
user as _user,
|
||||||
showControls,
|
showControls,
|
||||||
TTSWorker
|
TTSWorker,
|
||||||
|
temporaryChatEnabled
|
||||||
} from '$lib/stores';
|
} from '$lib/stores';
|
||||||
|
|
||||||
import {
|
import {
|
||||||
blobToFile,
|
blobToFile,
|
||||||
compressImage,
|
compressImage,
|
||||||
createMessagesList,
|
createMessagesList,
|
||||||
|
extractContentFromFile,
|
||||||
extractCurlyBraceWords,
|
extractCurlyBraceWords,
|
||||||
extractInputVariables,
|
extractInputVariables,
|
||||||
getCurrentDateTime,
|
getCurrentDateTime,
|
||||||
|
|
@ -529,47 +535,77 @@
|
||||||
|
|
||||||
files = [...files, fileItem];
|
files = [...files, fileItem];
|
||||||
|
|
||||||
try {
|
if (!$temporaryChatEnabled) {
|
||||||
// If the file is an audio file, provide the language for STT.
|
try {
|
||||||
let metadata = null;
|
// If the file is an audio file, provide the language for STT.
|
||||||
if (
|
let metadata = null;
|
||||||
(file.type.startsWith('audio/') || file.type.startsWith('video/')) &&
|
if (
|
||||||
$settings?.audio?.stt?.language
|
(file.type.startsWith('audio/') || file.type.startsWith('video/')) &&
|
||||||
) {
|
$settings?.audio?.stt?.language
|
||||||
metadata = {
|
) {
|
||||||
language: $settings?.audio?.stt?.language
|
metadata = {
|
||||||
};
|
language: $settings?.audio?.stt?.language
|
||||||
}
|
};
|
||||||
|
|
||||||
// During the file upload, file content is automatically extracted.
|
|
||||||
const uploadedFile = await uploadFile(localStorage.token, file, metadata);
|
|
||||||
|
|
||||||
if (uploadedFile) {
|
|
||||||
console.log('File upload completed:', {
|
|
||||||
id: uploadedFile.id,
|
|
||||||
name: fileItem.name,
|
|
||||||
collection: uploadedFile?.meta?.collection_name
|
|
||||||
});
|
|
||||||
|
|
||||||
if (uploadedFile.error) {
|
|
||||||
console.warn('File upload warning:', uploadedFile.error);
|
|
||||||
toast.warning(uploadedFile.error);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fileItem.status = 'uploaded';
|
// During the file upload, file content is automatically extracted.
|
||||||
fileItem.file = uploadedFile;
|
const uploadedFile = await uploadFile(localStorage.token, file, metadata);
|
||||||
fileItem.id = uploadedFile.id;
|
|
||||||
fileItem.collection_name =
|
|
||||||
uploadedFile?.meta?.collection_name || uploadedFile?.collection_name;
|
|
||||||
fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`;
|
|
||||||
|
|
||||||
files = files;
|
if (uploadedFile) {
|
||||||
} else {
|
console.log('File upload completed:', {
|
||||||
|
id: uploadedFile.id,
|
||||||
|
name: fileItem.name,
|
||||||
|
collection: uploadedFile?.meta?.collection_name
|
||||||
|
});
|
||||||
|
|
||||||
|
if (uploadedFile.error) {
|
||||||
|
console.warn('File upload warning:', uploadedFile.error);
|
||||||
|
toast.warning(uploadedFile.error);
|
||||||
|
}
|
||||||
|
|
||||||
|
fileItem.status = 'uploaded';
|
||||||
|
fileItem.file = uploadedFile;
|
||||||
|
fileItem.id = uploadedFile.id;
|
||||||
|
fileItem.collection_name =
|
||||||
|
uploadedFile?.meta?.collection_name || uploadedFile?.collection_name;
|
||||||
|
fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`;
|
||||||
|
|
||||||
|
files = files;
|
||||||
|
} else {
|
||||||
|
files = files.filter((item) => item?.itemId !== tempItemId);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
toast.error(`${e}`);
|
||||||
files = files.filter((item) => item?.itemId !== tempItemId);
|
files = files.filter((item) => item?.itemId !== tempItemId);
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} else {
|
||||||
toast.error(`${e}`);
|
// If temporary chat is enabled, we just add the file to the list without uploading it.
|
||||||
files = files.filter((item) => item?.itemId !== tempItemId);
|
|
||||||
|
const content = await extractContentFromFile(file, pdfjsLib).catch((error) => {
|
||||||
|
toast.error(
|
||||||
|
$i18n.t('Failed to extract content from the file: {{error}}', { error: error })
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (content === null) {
|
||||||
|
toast.error($i18n.t('Failed to extract content from the file.'));
|
||||||
|
files = files.filter((item) => item?.itemId !== tempItemId);
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
console.log('Extracted content from file:', {
|
||||||
|
name: file.name,
|
||||||
|
size: file.size,
|
||||||
|
content: content
|
||||||
|
});
|
||||||
|
|
||||||
|
fileItem.status = 'uploaded';
|
||||||
|
fileItem.type = 'text';
|
||||||
|
fileItem.content = content;
|
||||||
|
fileItem.id = uuidv4(); // Temporary ID for the file
|
||||||
|
|
||||||
|
files = files;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1507,3 +1507,74 @@ export const parseJsonValue = (value: string): any => {
|
||||||
|
|
||||||
return value;
|
return value;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const extractContentFromFile = async (file, pdfjsLib = null) => {
|
||||||
|
// Known text file extensions for extra fallback
|
||||||
|
const textExtensions = [
|
||||||
|
'.txt',
|
||||||
|
'.md',
|
||||||
|
'.csv',
|
||||||
|
'.json',
|
||||||
|
'.js',
|
||||||
|
'.ts',
|
||||||
|
'.css',
|
||||||
|
'.html',
|
||||||
|
'.xml',
|
||||||
|
'.yaml',
|
||||||
|
'.yml',
|
||||||
|
'.rtf'
|
||||||
|
];
|
||||||
|
|
||||||
|
function getExtension(filename) {
|
||||||
|
const dot = filename.lastIndexOf('.');
|
||||||
|
return dot === -1 ? '' : filename.substr(dot).toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Uses pdfjs to extract text from PDF
|
||||||
|
async function extractPdfText(file) {
|
||||||
|
if (!pdfjsLib) {
|
||||||
|
throw new Error('pdfjsLib is required for PDF extraction');
|
||||||
|
}
|
||||||
|
|
||||||
|
const arrayBuffer = await file.arrayBuffer();
|
||||||
|
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
|
||||||
|
let allText = '';
|
||||||
|
for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
|
||||||
|
const page = await pdf.getPage(pageNum);
|
||||||
|
const content = await page.getTextContent();
|
||||||
|
const strings = content.items.map((item) => item.str);
|
||||||
|
allText += strings.join(' ') + '\n';
|
||||||
|
}
|
||||||
|
return allText;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads file as text using FileReader
|
||||||
|
function readAsText(file) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onload = () => resolve(reader.result);
|
||||||
|
reader.onerror = reject;
|
||||||
|
reader.readAsText(file);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const type = file.type || '';
|
||||||
|
const ext = getExtension(file.name);
|
||||||
|
|
||||||
|
// PDF check
|
||||||
|
if (type === 'application/pdf' || ext === '.pdf') {
|
||||||
|
return await extractPdfText(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Text check (plain or common text-based)
|
||||||
|
if (type.startsWith('text/') || textExtensions.includes(ext)) {
|
||||||
|
return await readAsText(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: try to read as text, if decodable
|
||||||
|
try {
|
||||||
|
return await readAsText(file);
|
||||||
|
} catch (err) {
|
||||||
|
throw new Error('Unsupported or non-text file type: ' + (file.name || type));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue