2024-08-08 22:01:38 +00:00
import katex from 'katex' ;
2024-08-14 14:07:39 +00:00
const DELIMITER_LIST = [
2024-09-24 17:15:53 +00:00
{ left : '$$' , right : '$$' , display : true } ,
2024-08-14 14:39:02 +00:00
{ left : '$' , right : '$' , display : false } ,
{ left : '\\pu{' , right : '}' , display : false } ,
{ left : '\\ce{' , right : '}' , display : false } ,
{ left : '\\(' , right : '\\)' , display : false } ,
2024-09-24 17:15:53 +00:00
{ left : '\\[' , right : '\\]' , display : true } ,
{ left : '\\begin{equation}' , right : '\\end{equation}' , display : true }
2024-08-14 14:39:02 +00:00
] ;
2024-08-14 14:07:39 +00:00
2025-05-06 10:38:42 +00:00
// Defines characters that are allowed to immediately precede or follow a math delimiter.
2025-05-06 13:26:09 +00:00
const ALLOWED_SURROUNDING_CHARS =
2025-05-23 07:06:37 +00:00
'\\s。, 、、;;„“‘’“”()「」『』[]《》【】‹›«»…⋯::?!~⇒?!-\\/:-@\\[-`{-~\\p{Script=Han}\\p{Script=Hiragana}\\p{Script=Katakana}\\p{Script=Hangul}' ;
// Modified to fit more formats in different languages. Originally: '\\s?。,、;!-\\/:-@\\[-`{-~\\p{Script=Han}\\p{Script=Hiragana}\\p{Script=Katakana}\\p{Script=Hangul}';
2025-05-06 10:38:42 +00:00
2024-08-14 14:07:39 +00:00
// const DELIMITER_LIST = [
// { left: '$$', right: '$$', display: false },
// { left: '$', right: '$', display: false },
// ];
// const inlineRule = /^(\${1,2})(?!\$)((?:\\.|[^\\\n])*?(?:\\.|[^\\\n\$]))\1(?=[\s?!\.,:?!。,:]|$)/;
// const blockRule = /^(\${1,2})\n((?:\\[^]|[^\\])+?)\n\1(?:\n|$)/;
let inlinePatterns = [ ] ;
let blockPatterns = [ ] ;
function escapeRegex ( string ) {
2024-08-14 14:39:02 +00:00
return string . replace ( /[-\/\\^$*+?.()|[\]{}]/g , '\\$&' ) ;
2024-08-14 14:07:39 +00:00
}
function generateRegexRules ( delimiters ) {
2024-08-14 14:39:02 +00:00
delimiters . forEach ( ( delimiter ) = > {
2024-09-24 07:58:15 +00:00
const { left , right , display } = delimiter ;
2024-08-14 14:39:02 +00:00
// Ensure regex-safe delimiters
const escapedLeft = escapeRegex ( left ) ;
const escapedRight = escapeRegex ( right ) ;
2024-09-24 07:58:15 +00:00
if ( ! display ) {
2024-10-14 07:13:26 +00:00
// For inline delimiters, we match everything
2024-09-24 16:10:14 +00:00
inlinePatterns . push ( ` ${ escapedLeft } ((?: \\ \\ [^]|[^ \\ \\ ])+?) ${ escapedRight } ` ) ;
2024-09-24 07:58:15 +00:00
} else {
2024-09-24 17:15:53 +00:00
// Block delimiters doubles as inline delimiters when not followed by a newline
inlinePatterns . push ( ` ${ escapedLeft } (?! \\ n)((?: \\ \\ [^]|[^ \\ \\ ])+?)(?! \\ n) ${ escapedRight } ` ) ;
blockPatterns . push ( ` ${ escapedLeft } \\ n((?: \\ \\ [^]|[^ \\ \\ ])+?) \\ n ${ escapedRight } ` ) ;
2024-09-24 07:58:15 +00:00
}
2024-08-14 14:39:02 +00:00
} ) ;
2024-09-24 17:15:53 +00:00
// Math formulas can end in special characters
2024-09-25 13:45:36 +00:00
const inlineRule = new RegExp (
2025-05-06 10:38:42 +00:00
` ^( ${ inlinePatterns . join ( '|' ) } )(?=[ ${ ALLOWED_SURROUNDING_CHARS } ]| $ ) ` ,
'u'
) ;
const blockRule = new RegExp (
` ^( ${ blockPatterns . join ( '|' ) } )(?=[ ${ ALLOWED_SURROUNDING_CHARS } ]| $ ) ` ,
2024-09-25 13:45:36 +00:00
'u'
) ;
2024-08-14 14:39:02 +00:00
return { inlineRule , blockRule } ;
2024-08-14 14:07:39 +00:00
}
const { inlineRule , blockRule } = generateRegexRules ( DELIMITER_LIST ) ;
2024-08-14 14:39:02 +00:00
export default function ( options = { } ) {
return {
2024-09-25 13:45:36 +00:00
extensions : [ inlineKatex ( options ) , blockKatex ( options ) ]
2024-08-14 14:39:02 +00:00
} ;
2024-08-08 22:01:38 +00:00
}
2024-09-24 07:58:15 +00:00
function katexStart ( src , displayMode : boolean ) {
let ruleReg = displayMode ? blockRule : inlineRule ;
let indexSrc = src ;
while ( indexSrc ) {
let index = - 1 ;
let startIndex = - 1 ;
let startDelimiter = '' ;
let endDelimiter = '' ;
for ( let delimiter of DELIMITER_LIST ) {
if ( delimiter . display !== displayMode ) {
continue ;
}
startIndex = indexSrc . indexOf ( delimiter . left ) ;
if ( startIndex === - 1 ) {
continue ;
}
index = startIndex ;
startDelimiter = delimiter . left ;
endDelimiter = delimiter . right ;
}
if ( index === - 1 ) {
return ;
}
2024-09-24 17:15:53 +00:00
// Check if the delimiter is preceded by a special character.
// If it does, then it's potentially a math formula.
2025-05-06 10:38:42 +00:00
const f =
index === 0 ||
indexSrc . charAt ( index - 1 ) . match ( new RegExp ( ` [ ${ ALLOWED_SURROUNDING_CHARS } ] ` , 'u' ) ) ;
2024-09-24 07:58:15 +00:00
if ( f ) {
const possibleKatex = indexSrc . substring ( index ) ;
if ( possibleKatex . match ( ruleReg ) ) {
return index ;
}
}
indexSrc = indexSrc . substring ( index + startDelimiter . length ) . replace ( endDelimiter , '' ) ;
}
}
function katexTokenizer ( src , tokens , displayMode : boolean ) {
let ruleReg = displayMode ? blockRule : inlineRule ;
let type = displayMode ? 'blockKatex' : 'inlineKatex' ;
const match = src . match ( ruleReg ) ;
if ( match ) {
const text = match
. slice ( 2 )
. filter ( ( item ) = > item )
. find ( ( item ) = > item . trim ( ) ) ;
return {
type ,
raw : match [ 0 ] ,
text : text ,
2024-09-24 16:10:14 +00:00
displayMode
2024-09-24 07:58:15 +00:00
} ;
}
2024-08-08 22:01:38 +00:00
}
2024-09-24 07:58:15 +00:00
function inlineKatex ( options ) {
2024-08-14 14:39:02 +00:00
return {
name : 'inlineKatex' ,
level : 'inline' ,
start ( src ) {
2024-09-24 07:58:15 +00:00
return katexStart ( src , false ) ;
2024-08-14 14:39:02 +00:00
} ,
tokenizer ( src , tokens ) {
2024-09-24 07:58:15 +00:00
return katexTokenizer ( src , tokens , false ) ;
2025-02-20 07:37:11 +00:00
} ,
renderer ( token ) {
return ` ${ token ? . text ? ? '' } ` ;
2024-09-24 16:10:14 +00:00
}
2024-08-14 14:39:02 +00:00
} ;
2024-08-08 22:01:38 +00:00
}
2024-09-24 07:58:15 +00:00
function blockKatex ( options ) {
2024-08-14 14:39:02 +00:00
return {
name : 'blockKatex' ,
level : 'block' ,
2024-09-24 07:58:15 +00:00
start ( src ) {
return katexStart ( src , true ) ;
} ,
2024-08-14 14:39:02 +00:00
tokenizer ( src , tokens ) {
2024-09-24 07:58:15 +00:00
return katexTokenizer ( src , tokens , true ) ;
2025-02-20 07:37:11 +00:00
} ,
renderer ( token ) {
return ` ${ token ? . text ? ? '' } ` ;
2024-09-24 16:10:14 +00:00
}
2024-08-14 14:39:02 +00:00
} ;
2024-08-13 10:12:35 +00:00
}