From 6993b0b40b10af8cdbe6626702cc94080fff9e22 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 21 Dec 2025 14:37:55 +0400 Subject: [PATCH] enh: temp chat docx file support --- package-lock.json | 160 +++++++++++++++++++++++++++++++++++++---- package.json | 1 + src/lib/utils/index.ts | 15 ++++ 3 files changed, 162 insertions(+), 14 deletions(-) diff --git a/package-lock.json b/package-lock.json index 7bf8f4da90..333d9c2fa1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -67,6 +67,7 @@ "kokoro-js": "^1.1.1", "leaflet": "^1.9.4", "lowlight": "^3.3.0", + "mammoth": "^1.11.0", "marked": "^9.1.0", "mermaid": "^11.10.1", "paneforge": "^0.0.6", @@ -4605,6 +4606,15 @@ "resolved": "https://registry.npmjs.org/@webreflection/fetch/-/fetch-0.1.5.tgz", "integrity": "sha512-zCcqCJoNLvdeF41asAK71XPlwSPieeRDsE09albBunJEksuYPYNillKNQjf8p5BqSoTKTuKrW3lUm3MNodUC4g==" }, + "node_modules/@xmldom/xmldom": { + "version": "0.8.11", + "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.11.tgz", + "integrity": "sha512-cQzWCtO6C8TQiYl1ruKNn2U6Ao4o4WBBcbL61yJl84x+j5sOWWFU9X7DpND8XZG3daDppSsigMdfAIl2upQBRw==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/@xyflow/svelte": { "version": "0.1.19", "resolved": "https://registry.npmjs.org/@xyflow/svelte/-/svelte-0.1.19.tgz", @@ -4951,7 +4961,6 @@ "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "dev": true, "funding": [ { "type": "github", @@ -5966,8 +5975,7 @@ "node_modules/core-util-is": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", - "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==", - "dev": true + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==" }, "node_modules/cose-base": { "version": "1.0.3", @@ -6891,6 +6899,12 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/dingbat-to-unicode": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dingbat-to-unicode/-/dingbat-to-unicode-1.0.1.tgz", + "integrity": "sha512-98l0sW87ZT58pU4i61wa2OHwxbiYSbuxsCBozaVnYX2iCnr3bLM3fIes1/ej7h1YdOKuKt/MLs706TVnALA65w==", + "license": "BSD-2-Clause" + }, "node_modules/doctrine": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", @@ -6971,6 +6985,15 @@ "url": "https://github.com/fb55/domutils?sponsor=1" } }, + "node_modules/duck": { + "version": "0.1.12", + "resolved": "https://registry.npmjs.org/duck/-/duck-0.1.12.tgz", + "integrity": "sha512-wkctla1O6VfP89gQ+J/yDesM0S7B7XLXjKGzXxMDVFg7uEn706niAtyYovKbyq1oT9YwDcly721/iUWoc8MVRg==", + "license": "BSD", + "dependencies": { + "underscore": "^1.13.1" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -8537,6 +8560,12 @@ "node": ">= 4" } }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==", + "license": "MIT" + }, "node_modules/immutable": { "version": "5.0.3", "resolved": "https://registry.npmjs.org/immutable/-/immutable-5.0.3.tgz", @@ -8818,8 +8847,7 @@ "node_modules/isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", - "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", - "dev": true + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==" }, "node_modules/isexe": { "version": "2.0.0", @@ -9018,6 +9046,18 @@ "verror": "1.10.0" } }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "license": "(MIT OR GPL-3.0-or-later)", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, "node_modules/katex": { "version": "0.16.22", "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.22.tgz", @@ -9175,6 +9215,15 @@ "url": "https://github.com/sponsors/dmonad" } }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "license": "MIT", + "dependencies": { + "immediate": "~3.0.5" + } + }, "node_modules/lightningcss": { "version": "1.29.1", "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.29.1.tgz", @@ -9671,6 +9720,17 @@ "integrity": "sha512-qtzLbJE8hq7VabR3mISmVGtoXP8KGc2Z/AT8OuqlYD7JTR3oqrgwdjnk07wpj1twXxYmgDXgoKVWUG/fReSzHg==", "license": "Apache-2.0" }, + "node_modules/lop": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/lop/-/lop-0.4.2.tgz", + "integrity": "sha512-RefILVDQ4DKoRZsJ4Pj22TxE3omDO47yFpkIBoDKzkqPRISs5U1cnAdg/5583YPkWPaLIYHOKRMQSvjFsO26cw==", + "license": "BSD-2-Clause", + "dependencies": { + "duck": "^0.1.12", + "option": "~0.2.1", + "underscore": "^1.13.1" + } + }, "node_modules/loupe": { "version": "2.3.7", "resolved": "https://registry.npmjs.org/loupe/-/loupe-2.3.7.tgz", @@ -9711,6 +9771,51 @@ "@jridgewell/sourcemap-codec": "^1.5.5" } }, + "node_modules/mammoth": { + "version": "1.11.0", + "resolved": "https://registry.npmjs.org/mammoth/-/mammoth-1.11.0.tgz", + "integrity": "sha512-BcEqqY/BOwIcI1iR5tqyVlqc3KIaMRa4egSoK83YAVrBf6+yqdAAbtUcFDCWX8Zef8/fgNZ6rl4VUv+vVX8ddQ==", + "license": "BSD-2-Clause", + "dependencies": { + "@xmldom/xmldom": "^0.8.6", + "argparse": "~1.0.3", + "base64-js": "^1.5.1", + "bluebird": "~3.4.0", + "dingbat-to-unicode": "^1.0.1", + "jszip": "^3.7.1", + "lop": "^0.4.2", + "path-is-absolute": "^1.0.0", + "underscore": "^1.13.1", + "xmlbuilder": "^10.0.0" + }, + "bin": { + "mammoth": "bin/mammoth" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/mammoth/node_modules/argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "license": "MIT", + "dependencies": { + "sprintf-js": "~1.0.2" + } + }, + "node_modules/mammoth/node_modules/bluebird": { + "version": "3.4.7", + "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz", + "integrity": "sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==", + "license": "MIT" + }, + "node_modules/mammoth/node_modules/sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", + "license": "BSD-3-Clause" + }, "node_modules/markdown-it": { "version": "14.1.0", "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-14.1.0.tgz", @@ -10222,6 +10327,12 @@ "integrity": "sha512-TwaE51xV9q2y8pM61q73rbywJnusw9ivTEHAJ39GVWNZqxCoDBpe/tQkh/w9S+o/g+zS7YeeL0I/2mEWd+dgyA==", "license": "MIT" }, + "node_modules/option": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/option/-/option-0.2.4.tgz", + "integrity": "sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==", + "license": "BSD-2-Clause" + }, "node_modules/optionator": { "version": "0.9.3", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.3.tgz", @@ -10310,6 +10421,12 @@ "quansync": "^0.2.7" } }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", + "license": "(MIT AND Zlib)" + }, "node_modules/paneforge": { "version": "0.0.6", "resolved": "https://registry.npmjs.org/paneforge/-/paneforge-0.0.6.tgz", @@ -10402,7 +10519,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", - "dev": true, "engines": { "node": ">=0.10.0" } @@ -10803,8 +10919,7 @@ "node_modules/process-nextick-args": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", - "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", - "dev": true + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==" }, "node_modules/promise-map-series": { "version": "0.3.0", @@ -11246,7 +11361,6 @@ "version": "2.3.8", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", - "dev": true, "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", @@ -11568,8 +11682,7 @@ "node_modules/safe-buffer": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", - "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", - "dev": true + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" }, "node_modules/safer-buffer": { "version": "2.1.2", @@ -12005,6 +12118,12 @@ "node": ">= 0.4" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==", + "license": "MIT" + }, "node_modules/sharp": { "version": "0.33.5", "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.5.tgz", @@ -12284,7 +12403,6 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", - "dev": true, "dependencies": { "safe-buffer": "~5.1.0" } @@ -13029,6 +13147,12 @@ "integrity": "sha512-9a4/uxlTWJ4+a5i0ooc1rU7C7YOw3wT+UGqdeNNHWnOF9qcMBgLRS+4IYUqbczewFx4mLEig6gawh7X6mFlEkA==", "license": "MIT" }, + "node_modules/underscore": { + "version": "1.13.7", + "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.7.tgz", + "integrity": "sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g==", + "license": "MIT" + }, "node_modules/underscore.string": { "version": "3.3.6", "resolved": "https://registry.npmjs.org/underscore.string/-/underscore.string-3.3.6.tgz", @@ -13095,8 +13219,7 @@ "node_modules/util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "dev": true + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" }, "node_modules/utrie": { "version": "1.0.2", @@ -14682,6 +14805,15 @@ "node": ">=18" } }, + "node_modules/xmlbuilder": { + "version": "10.1.1", + "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-10.1.1.tgz", + "integrity": "sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==", + "license": "MIT", + "engines": { + "node": ">=4.0" + } + }, "node_modules/xmlchars": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", diff --git a/package.json b/package.json index aa36a06563..1381638435 100644 --- a/package.json +++ b/package.json @@ -111,6 +111,7 @@ "kokoro-js": "^1.1.1", "leaflet": "^1.9.4", "lowlight": "^3.3.0", + "mammoth": "^1.11.0", "marked": "^9.1.0", "mermaid": "^11.10.1", "paneforge": "^0.0.6", diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 32c56f2733..5343a30552 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -15,6 +15,7 @@ dayjs.extend(localizedFormat); import { TTS_RESPONSE_SPLIT } from '$lib/types'; +import mammoth from 'mammoth'; import pdfWorkerUrl from 'pdfjs-dist/build/pdf.worker.mjs?url'; import { marked } from 'marked'; @@ -1519,6 +1520,12 @@ export const extractContentFromFile = async (file: File) => { }); } + async function extractDocxText(file: File) { + const arrayBuffer = await file.arrayBuffer(); + const result = await mammoth.extractRawText({ arrayBuffer }); + return result.value; // plain text + } + const type = file.type || ''; const ext = getExtension(file.name); @@ -1527,6 +1534,14 @@ export const extractContentFromFile = async (file: File) => { return await extractPdfText(file); } + // DOCX check + if ( + type === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' || + ext === '.docx' + ) { + return await extractDocxText(file); + } + // Text check (plain or common text-based) if (type.startsWith('text/') || textExtensions.includes(ext)) { return await readAsText(file);