mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-12 12:25:20 +00:00
Merge pull request #17222 from ShirasawaSama/patch-14
feat: dynamically load PDFjs to accelerate initial page loading speed
This commit is contained in:
commit
9764bf3b78
2 changed files with 20 additions and 14 deletions
|
|
@ -1,8 +1,4 @@
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import * as pdfjs from 'pdfjs-dist';
|
|
||||||
import * as pdfWorker from 'pdfjs-dist/build/pdf.worker.mjs';
|
|
||||||
pdfjs.GlobalWorkerOptions.workerSrc = import.meta.url + 'pdfjs-dist/build/pdf.worker.mjs';
|
|
||||||
|
|
||||||
import DOMPurify from 'dompurify';
|
import DOMPurify from 'dompurify';
|
||||||
import { marked } from 'marked';
|
import { marked } from 'marked';
|
||||||
import heic2any from 'heic2any';
|
import heic2any from 'heic2any';
|
||||||
|
|
@ -640,7 +636,7 @@
|
||||||
} else {
|
} else {
|
||||||
// If temporary chat is enabled, we just add the file to the list without uploading it.
|
// If temporary chat is enabled, we just add the file to the list without uploading it.
|
||||||
|
|
||||||
const content = await extractContentFromFile(file, pdfjsLib).catch((error) => {
|
const content = await extractContentFromFile(file).catch((error) => {
|
||||||
toast.error(
|
toast.error(
|
||||||
$i18n.t('Failed to extract content from the file: {{error}}', { error: error })
|
$i18n.t('Failed to extract content from the file: {{error}}', { error: error })
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,8 @@ dayjs.extend(localizedFormat);
|
||||||
|
|
||||||
import { TTS_RESPONSE_SPLIT } from '$lib/types';
|
import { TTS_RESPONSE_SPLIT } from '$lib/types';
|
||||||
|
|
||||||
|
import pdfWorkerUrl from 'pdfjs-dist/build/pdf.worker.mjs?url';
|
||||||
|
|
||||||
import { marked } from 'marked';
|
import { marked } from 'marked';
|
||||||
import markedExtension from '$lib/utils/marked/extension';
|
import markedExtension from '$lib/utils/marked/extension';
|
||||||
import markedKatexExtension from '$lib/utils/marked/katex-extension';
|
import markedKatexExtension from '$lib/utils/marked/katex-extension';
|
||||||
|
|
@ -1440,7 +1442,18 @@ export const parseJsonValue = (value: string): any => {
|
||||||
return value;
|
return value;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const extractContentFromFile = async (file, pdfjsLib = null) => {
|
async function ensurePDFjsLoaded() {
|
||||||
|
if (!window.pdfjsLib) {
|
||||||
|
const pdfjs = await import('pdfjs-dist');
|
||||||
|
pdfjs.GlobalWorkerOptions.workerSrc = pdfWorkerUrl;
|
||||||
|
if (!window.pdfjsLib) {
|
||||||
|
throw new Error('pdfjsLib is required for PDF extraction');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return window.pdfjsLib;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const extractContentFromFile = async (file: File) => {
|
||||||
// Known text file extensions for extra fallback
|
// Known text file extensions for extra fallback
|
||||||
const textExtensions = [
|
const textExtensions = [
|
||||||
'.txt',
|
'.txt',
|
||||||
|
|
@ -1457,31 +1470,28 @@ export const extractContentFromFile = async (file, pdfjsLib = null) => {
|
||||||
'.rtf'
|
'.rtf'
|
||||||
];
|
];
|
||||||
|
|
||||||
function getExtension(filename) {
|
function getExtension(filename: string) {
|
||||||
const dot = filename.lastIndexOf('.');
|
const dot = filename.lastIndexOf('.');
|
||||||
return dot === -1 ? '' : filename.substr(dot).toLowerCase();
|
return dot === -1 ? '' : filename.substr(dot).toLowerCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Uses pdfjs to extract text from PDF
|
// Uses pdfjs to extract text from PDF
|
||||||
async function extractPdfText(file) {
|
async function extractPdfText(file: File) {
|
||||||
if (!pdfjsLib) {
|
const pdfjsLib = await ensurePDFjsLoaded();
|
||||||
throw new Error('pdfjsLib is required for PDF extraction');
|
|
||||||
}
|
|
||||||
|
|
||||||
const arrayBuffer = await file.arrayBuffer();
|
const arrayBuffer = await file.arrayBuffer();
|
||||||
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
|
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
|
||||||
let allText = '';
|
let allText = '';
|
||||||
for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
|
for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
|
||||||
const page = await pdf.getPage(pageNum);
|
const page = await pdf.getPage(pageNum);
|
||||||
const content = await page.getTextContent();
|
const content = await page.getTextContent();
|
||||||
const strings = content.items.map((item) => item.str);
|
const strings = content.items.map((item: any) => item.str);
|
||||||
allText += strings.join(' ') + '\n';
|
allText += strings.join(' ') + '\n';
|
||||||
}
|
}
|
||||||
return allText;
|
return allText;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads file as text using FileReader
|
// Reads file as text using FileReader
|
||||||
function readAsText(file) {
|
function readAsText(file: File) {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
const reader = new FileReader();
|
const reader = new FileReader();
|
||||||
reader.onload = () => resolve(reader.result);
|
reader.onload = () => resolve(reader.result);
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue