open-webui/src/lib/utils/marked/katex-extension.ts

167 lines
4.1 KiB
TypeScript
Raw Normal View History

2024-08-08 22:01:38 +00:00
import katex from 'katex';
2024-08-14 14:07:39 +00:00
const DELIMITER_LIST = [
2024-09-24 17:15:53 +00:00
{ left: '$$', right: '$$', display: true },
2024-08-14 14:39:02 +00:00
{ left: '$', right: '$', display: false },
{ left: '\\pu{', right: '}', display: false },
{ left: '\\ce{', right: '}', display: false },
{ left: '\\(', right: '\\)', display: false },
2024-09-24 17:15:53 +00:00
{ left: '\\[', right: '\\]', display: true },
{ left: '\\begin{equation}', right: '\\end{equation}', display: true }
2024-08-14 14:39:02 +00:00
];
2024-08-14 14:07:39 +00:00
// Defines characters that are allowed to immediately precede or follow a math delimiter.
const ALLOWED_SURROUNDING_CHARS = '\\s?。,!-\\/:-@\\[-`{-~\\p{Script=Han}';
2024-08-14 14:07:39 +00:00
// const DELIMITER_LIST = [
// { left: '$$', right: '$$', display: false },
// { left: '$', right: '$', display: false },
// ];
// const inlineRule = /^(\${1,2})(?!\$)((?:\\.|[^\\\n])*?(?:\\.|[^\\\n\$]))\1(?=[\s?!\.,:?!。,:]|$)/;
// const blockRule = /^(\${1,2})\n((?:\\[^]|[^\\])+?)\n\1(?:\n|$)/;
let inlinePatterns = [];
let blockPatterns = [];
function escapeRegex(string) {
2024-08-14 14:39:02 +00:00
return string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
2024-08-14 14:07:39 +00:00
}
function generateRegexRules(delimiters) {
2024-08-14 14:39:02 +00:00
delimiters.forEach((delimiter) => {
2024-09-24 07:58:15 +00:00
const { left, right, display } = delimiter;
2024-08-14 14:39:02 +00:00
// Ensure regex-safe delimiters
const escapedLeft = escapeRegex(left);
const escapedRight = escapeRegex(right);
2024-09-24 07:58:15 +00:00
if (!display) {
2024-10-14 07:13:26 +00:00
// For inline delimiters, we match everything
2024-09-24 16:10:14 +00:00
inlinePatterns.push(`${escapedLeft}((?:\\\\[^]|[^\\\\])+?)${escapedRight}`);
2024-09-24 07:58:15 +00:00
} else {
2024-09-24 17:15:53 +00:00
// Block delimiters doubles as inline delimiters when not followed by a newline
inlinePatterns.push(`${escapedLeft}(?!\\n)((?:\\\\[^]|[^\\\\])+?)(?!\\n)${escapedRight}`);
blockPatterns.push(`${escapedLeft}\\n((?:\\\\[^]|[^\\\\])+?)\\n${escapedRight}`);
2024-09-24 07:58:15 +00:00
}
2024-08-14 14:39:02 +00:00
});
2024-09-24 17:15:53 +00:00
// Math formulas can end in special characters
2024-09-25 13:45:36 +00:00
const inlineRule = new RegExp(
`^(${inlinePatterns.join('|')})(?=[${ALLOWED_SURROUNDING_CHARS}]|$)`,
'u'
);
const blockRule = new RegExp(
`^(${blockPatterns.join('|')})(?=[${ALLOWED_SURROUNDING_CHARS}]|$)`,
2024-09-25 13:45:36 +00:00
'u'
);
2024-08-14 14:39:02 +00:00
return { inlineRule, blockRule };
2024-08-14 14:07:39 +00:00
}
const { inlineRule, blockRule } = generateRegexRules(DELIMITER_LIST);
2024-08-14 14:39:02 +00:00
export default function (options = {}) {
return {
2024-09-25 13:45:36 +00:00
extensions: [inlineKatex(options), blockKatex(options)]
2024-08-14 14:39:02 +00:00
};
2024-08-08 22:01:38 +00:00
}
2024-09-24 07:58:15 +00:00
function katexStart(src, displayMode: boolean) {
let ruleReg = displayMode ? blockRule : inlineRule;
let indexSrc = src;
while (indexSrc) {
let index = -1;
let startIndex = -1;
let startDelimiter = '';
let endDelimiter = '';
for (let delimiter of DELIMITER_LIST) {
if (delimiter.display !== displayMode) {
continue;
}
startIndex = indexSrc.indexOf(delimiter.left);
if (startIndex === -1) {
continue;
}
index = startIndex;
startDelimiter = delimiter.left;
endDelimiter = delimiter.right;
}
if (index === -1) {
return;
}
2024-09-24 17:15:53 +00:00
// Check if the delimiter is preceded by a special character.
// If it does, then it's potentially a math formula.
const f =
index === 0 ||
indexSrc.charAt(index - 1).match(new RegExp(`[${ALLOWED_SURROUNDING_CHARS}]`, 'u'));
2024-09-24 07:58:15 +00:00
if (f) {
const possibleKatex = indexSrc.substring(index);
if (possibleKatex.match(ruleReg)) {
return index;
}
}
indexSrc = indexSrc.substring(index + startDelimiter.length).replace(endDelimiter, '');
}
}
function katexTokenizer(src, tokens, displayMode: boolean) {
let ruleReg = displayMode ? blockRule : inlineRule;
let type = displayMode ? 'blockKatex' : 'inlineKatex';
const match = src.match(ruleReg);
if (match) {
const text = match
.slice(2)
.filter((item) => item)
.find((item) => item.trim());
return {
type,
raw: match[0],
text: text,
2024-09-24 16:10:14 +00:00
displayMode
2024-09-24 07:58:15 +00:00
};
}
2024-08-08 22:01:38 +00:00
}
2024-09-24 07:58:15 +00:00
function inlineKatex(options) {
2024-08-14 14:39:02 +00:00
return {
name: 'inlineKatex',
level: 'inline',
start(src) {
2024-09-24 07:58:15 +00:00
return katexStart(src, false);
2024-08-14 14:39:02 +00:00
},
tokenizer(src, tokens) {
2024-09-24 07:58:15 +00:00
return katexTokenizer(src, tokens, false);
2025-02-20 07:37:11 +00:00
},
renderer(token) {
return `${token?.text ?? ''}`;
2024-09-24 16:10:14 +00:00
}
2024-08-14 14:39:02 +00:00
};
2024-08-08 22:01:38 +00:00
}
2024-09-24 07:58:15 +00:00
function blockKatex(options) {
2024-08-14 14:39:02 +00:00
return {
name: 'blockKatex',
level: 'block',
2024-09-24 07:58:15 +00:00
start(src) {
return katexStart(src, true);
},
2024-08-14 14:39:02 +00:00
tokenizer(src, tokens) {
2024-09-24 07:58:15 +00:00
return katexTokenizer(src, tokens, true);
2025-02-20 07:37:11 +00:00
},
renderer(token) {
return `${token?.text ?? ''}`;
2024-09-24 16:10:14 +00:00
}
2024-08-14 14:39:02 +00:00
};
2024-08-13 10:12:35 +00:00
}