refac: sources and citations

This commit is contained in:
Timothy Jaeryang Baek 2025-11-23 18:27:57 -05:00
parent b0491886bc
commit ec45d77ce9
11 changed files with 201 additions and 62 deletions

View file

@ -169,7 +169,7 @@
></iframe>
{:else}
<pre class="text-sm dark:text-gray-400 whitespace-pre-line">
{document.document}
{document.document.trim()}
</pre>
{/if}
</div>

View file

@ -9,6 +9,8 @@
import { mentionExtension } from '$lib/utils/marked/mention-extension';
import MarkdownTokens from './Markdown/MarkdownTokens.svelte';
import footnoteExtension from '$lib/utils/marked/footnote-extension';
import citationExtension from '$lib/utils/marked/citation-extension';
export let id = '';
export let content;
@ -39,6 +41,8 @@
marked.use(markedKatexExtension(options));
marked.use(markedExtension(options));
marked.use(citationExtension(options));
marked.use(footnoteExtension(options));
marked.use(disableSingleTilde);
marked.use({
extensions: [mentionExtension({ triggerChar: '@' }), mentionExtension({ triggerChar: '#' })]
@ -47,7 +51,7 @@
$: (async () => {
if (content) {
tokens = marked.lexer(
replaceTokens(processResponseContent(content), sourceIds, model?.name, $user?.name)
replaceTokens(processResponseContent(content), model?.name, $user?.name)
);
}
})();
@ -61,6 +65,7 @@
{save}
{preview}
{editCodeBlock}
{sourceIds}
{topPadding}
{onTaskClick}
{onSourceClick}

View file

@ -3,14 +3,11 @@
import type { Token } from 'marked';
import { WEBUI_BASE_URL } from '$lib/constants';
import Source from './Source.svelte';
import { settings } from '$lib/stores';
export let id: string;
export let token: Token;
export let onSourceClick: Function = () => {};
let html: string | null = null;
$: if (token.type === 'html' && token?.text) {
@ -129,8 +126,6 @@
}}
></iframe>
{/if}
{:else if token.text.includes(`<source_id`)}
<Source {id} {token} onClick={onSourceClick} />
{:else if token.text.trim().match(/^<br\s*\/?>$/i)}
<br />
{:else}

View file

@ -17,10 +17,12 @@
import TextToken from './MarkdownInlineTokens/TextToken.svelte';
import CodespanToken from './MarkdownInlineTokens/CodespanToken.svelte';
import MentionToken from './MarkdownInlineTokens/MentionToken.svelte';
import SourceToken from './SourceToken.svelte';
export let id: string;
export let done = true;
export let tokens: Token[];
export let sourceIds = [];
export let onSourceClick: Function = () => {};
</script>
@ -68,6 +70,17 @@
></iframe>
{:else if token.type === 'mention'}
<MentionToken {token} />
{:else if token.type === 'footnote'}
{@html DOMPurify.sanitize(
`<sup class="footnote-ref footnote-ref-text">${token.escapedText}</sup>`
) || ''}
{:else if token.type === 'citation'}
<SourceToken {id} {token} {sourceIds} onClick={onSourceClick} />
<!-- {#if token.ids && token.ids.length > 0}
{#each token.ids as sourceId}
<Source id={sourceId - 1} title={sourceIds[sourceId - 1]} onClick={onSourceClick} />
{/each}
{/if} -->
{:else if token.type === 'text'}
<TextToken {token} {done} />
{/if}

View file

@ -21,7 +21,6 @@
import Tooltip from '$lib/components/common/Tooltip.svelte';
import Download from '$lib/components/icons/Download.svelte';
import Source from './Source.svelte';
import HtmlToken from './HTMLToken.svelte';
import Clipboard from '$lib/components/icons/Clipboard.svelte';
@ -29,6 +28,7 @@
export let tokens: Token[];
export let top = true;
export let attributes = {};
export let sourceIds = [];
export let done = true;
@ -96,6 +96,7 @@
id={`${id}-${tokenIdx}-h`}
tokens={token.tokens}
{done}
{sourceIds}
{onSourceClick}
/>
</svelte:element>
@ -147,6 +148,7 @@
id={`${id}-${tokenIdx}-header-${headerIdx}`}
tokens={header.tokens}
{done}
{sourceIds}
{onSourceClick}
/>
</div>
@ -172,6 +174,7 @@
id={`${id}-${tokenIdx}-row-${rowIdx}-${cellIdx}`}
tokens={cell.tokens}
{done}
{sourceIds}
{onSourceClick}
/>
</div>
@ -348,6 +351,7 @@
id={`${id}-${tokenIdx}-p`}
tokens={token.tokens ?? []}
{done}
{sourceIds}
{onSourceClick}
/>
</p>
@ -359,6 +363,7 @@
id={`${id}-${tokenIdx}-t`}
tokens={token.tokens}
{done}
{sourceIds}
{onSourceClick}
/>
{:else}
@ -370,6 +375,7 @@
id={`${id}-${tokenIdx}-p`}
tokens={token.tokens ?? []}
{done}
{sourceIds}
{onSourceClick}
/>
{:else}

View file

@ -1,23 +1,10 @@
<script lang="ts">
export let id;
export let token;
export let title: string = 'N/A';
export let onClick: Function = () => {};
let attributes: Record<string, string | undefined> = {};
function extractAttributes(input: string): Record<string, string> {
const regex = /(\w+)="([^"]*)"/g;
let match;
let attrs: Record<string, string> = {};
// Loop through all matches and populate the attributes object
while ((match = regex.exec(input)) !== null) {
attrs[match[1]] = match[2];
}
return attrs;
}
// Helper function to return only the domain from a URL
function getDomain(url: string): string {
const domain = url.replace('http://', '').replace('https://', '').split(/[/?#]/)[0];
@ -44,23 +31,17 @@
}
return title;
};
$: attributes = extractAttributes(token.text);
</script>
{#if attributes.title !== 'N/A'}
{#if title !== 'N/A'}
<button
class="text-xs font-medium w-fit translate-y-[2px] px-2 py-0.5 dark:bg-white/5 dark:text-white/60 dark:hover:text-white bg-gray-50 text-black/60 hover:text-black transition rounded-lg"
class="text-[10px] w-fit translate-y-[2px] px-2 py-0.5 dark:bg-white/5 dark:text-white/80 dark:hover:text-white bg-gray-50 text-black/80 hover:text-black transition rounded-xl"
on:click={() => {
onClick(id, attributes.data);
onClick(id);
}}
>
<span class="line-clamp-1">
{getDisplayTitle(
decodeURIComponent(attributes.title)
? formattedTitle(decodeURIComponent(attributes.title))
: ''
)}
{getDisplayTitle(formattedTitle(decodeURIComponent(title)))}
</span>
</button>
{/if}

View file

@ -0,0 +1,70 @@
<script lang="ts">
import { LinkPreview } from 'bits-ui';
import Source from './Source.svelte';
export let id;
export let token;
export let sourceIds = [];
export let onClick: Function = () => {};
let containerElement;
// Helper function to return only the domain from a URL
function getDomain(url: string): string {
const domain = url.replace('http://', '').replace('https://', '').split(/[/?#]/)[0];
if (domain.startsWith('www.')) {
return domain.slice(4);
}
return domain;
}
// Helper function to check if text is a URL and return the domain
function formattedTitle(title: string): string {
if (title.startsWith('http')) {
return getDomain(title);
}
return title;
}
const getDisplayTitle = (title: string) => {
if (!title) return 'N/A';
if (title.length > 30) {
return title.slice(0, 15) + '...' + title.slice(-10);
}
return title;
};
</script>
{#if (token?.ids ?? []).length == 1}
<Source id={token.ids[0] - 1} title={sourceIds[token.ids[0] - 1]} {onClick} />
{:else}
<LinkPreview.Root openDelay={0}>
<LinkPreview.Trigger>
<button
class="text-[10px] w-fit translate-y-[2px] px-2 py-0.5 dark:bg-white/5 dark:text-white/80 dark:hover:text-white bg-gray-50 text-black/80 hover:text-black transition rounded-xl"
>
<span class="line-clamp-1">
{getDisplayTitle(formattedTitle(decodeURIComponent(sourceIds[token.ids[0] - 1])))}
<span class="dark:text-white/50 text-black/50">+{(token?.ids ?? []).length - 1}</span>
</span>
</button>
</LinkPreview.Trigger>
<LinkPreview.Content
class="z-[999]"
align="start"
strategy="fixed"
sideOffset={6}
el={containerElement}
>
<div class="bg-gray-50 dark:bg-gray-850 rounded-xl p-1 cursor-pointer">
{#each token.ids as sourceId}
<div class="">
<Source id={sourceId - 1} title={sourceIds[sourceId - 1]} {onClick} />
</div>
{/each}
</div>
</LinkPreview.Content>
</LinkPreview.Root>
{/if}

View file

@ -797,11 +797,11 @@
onTaskClick={async (e) => {
console.log(e);
}}
onSourceClick={async (id, idx) => {
console.log(id, idx);
onSourceClick={async (id) => {
console.log(id);
if (citationsElement) {
citationsElement?.showSourceModal(idx - 1);
citationsElement?.showSourceModal(id);
}
}}
onAddMessages={({ modelId, parentId, messages }) => {

View file

@ -32,7 +32,7 @@ function escapeRegExp(string: string): string {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
export const replaceTokens = (content, sourceIds, char, user) => {
export const replaceTokens = (content, char, user) => {
const tokens = [
{ regex: /{{char}}/gi, replacement: char },
{ regex: /{{user}}/gi, replacement: user },
@ -67,30 +67,6 @@ export const replaceTokens = (content, sourceIds, char, user) => {
}
});
if (Array.isArray(sourceIds)) {
// Match both [1], [2], and [1,2,3] forms
const multiRefRegex = /\[([\d,\s]+)\]/g;
segment = segment.replace(multiRefRegex, (match, group) => {
// Extract numbers like 1,2,3
const indices = group
.split(',')
.map((n) => parseInt(n.trim(), 10))
.filter((n) => !isNaN(n));
// Replace each index with a <source_id> tag
const sources = indices
.map((idx) => {
const sourceId = sourceIds[idx - 1];
return sourceId
? `<source_id data="${idx}" title="${encodeURIComponent(sourceId)}" />`
: `[${idx}]`;
})
.join('');
return sources;
});
}
return segment;
});

View file

@ -0,0 +1,55 @@
export function citationExtension() {
return {
name: 'citation',
level: 'inline' as const,
start(src: string) {
// Trigger on any [number]
return src.search(/\[(\d[\d,\s]*)\]/);
},
tokenizer(src: string) {
// Avoid matching footnotes
if (/^\[\^/.test(src)) return;
// Match ONE OR MORE adjacent [1] or [1,2] blocks
// Example matched: "[1][2,3][4]"
const rule = /^(\[(?:\d[\d,\s]*)\])+/;
const match = rule.exec(src);
if (!match) return;
const raw = match[0];
// Extract ALL bracket groups inside the big match
const groupRegex = /\[([\d,\s]+)\]/g;
const ids: number[] = [];
let m: RegExpExecArray | null;
while ((m = groupRegex.exec(raw))) {
const parsed = m[1]
.split(',')
.map((n) => parseInt(n.trim(), 10))
.filter((n) => !isNaN(n));
ids.push(...parsed);
}
return {
type: 'citation',
raw,
ids // merged list
};
},
renderer(token: any) {
// e.g. "1,2,3"
return token.ids.join(',');
}
};
}
export default function () {
return {
extensions: [citationExtension()]
};
}

View file

@ -0,0 +1,38 @@
// footnote-extension.ts
// Simple extension for marked to support footnote references like [^1], [^note]
function escapeHtml(s: string) {
return s.replace(
/[&<>"']/g,
(c) => ({ '&': '&amp;', '<': '&lt;', '>': '&gt;', '"': '&quot;', "'": '&#39;' })[c]!
);
}
export function footnoteExtension() {
return {
name: 'footnote',
level: 'inline' as const,
start(src: string) {
return src.search(/\[\^\s*[a-zA-Z0-9_-]+\s*\]/);
},
tokenizer(src: string) {
const rule = /^\[\^\s*([a-zA-Z0-9_-]+)\s*\]/;
const match = rule.exec(src);
if (match) {
const escapedText = escapeHtml(match[1]);
return {
type: 'footnote',
raw: match[0],
text: match[1],
escapedText: escapedText
};
}
}
};
}
export default function () {
return {
extensions: [footnoteExtension()]
};
}