From c0ec04935b4eea3d334bfdec2fc41278f1085a49 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Fri, 26 Dec 2025 02:05:03 +0400 Subject: [PATCH] refac: citation --- .../chat/ChatControls/Embeds.svelte | 9 +++- .../components/chat/Messages/Citations.svelte | 35 ++++++++++----- .../chat/Messages/Markdown/SourceToken.svelte | 10 +++-- src/lib/utils/marked/citation-extension.ts | 45 ++++++++++++------- 4 files changed, 69 insertions(+), 30 deletions(-) diff --git a/src/lib/components/chat/ChatControls/Embeds.svelte b/src/lib/components/chat/ChatControls/Embeds.svelte index e15c86c8bd..126124bc69 100644 --- a/src/lib/components/chat/ChatControls/Embeds.svelte +++ b/src/lib/components/chat/ChatControls/Embeds.svelte @@ -6,7 +6,7 @@ export let overlay = false; - const getSrcUrl = (url: string, chatId?: string, messageId?: string) => { + const getSrcUrl = (url: string, chatId?: string, messageId?: string, sourceId: string) => { try { const parsed = new URL(url); @@ -18,6 +18,10 @@ parsed.searchParams.set('message_id', messageId); } + if (sourceId) { + parsed.searchParams.set('source_id', sourceId); + } + return parsed.toString(); } catch { // Fallback for relative URLs or invalid input @@ -26,6 +30,7 @@ if (chatId) parts.push(`chat_id=${encodeURIComponent(chatId)}`); if (messageId) parts.push(`message_id=${encodeURIComponent(messageId)}`); + if (sourceId) parts.push(`source_id=${encodeURIComponent(sourceId)}`); if (parts.length === 0) return url; @@ -68,7 +73,7 @@ {/if} diff --git a/src/lib/components/chat/Messages/Citations.svelte b/src/lib/components/chat/Messages/Citations.svelte index 2799059b07..2db74581bd 100644 --- a/src/lib/components/chat/Messages/Citations.svelte +++ b/src/lib/components/chat/Messages/Citations.svelte @@ -23,12 +23,26 @@ let selectedCitation: any = null; - export const showSourceModal = (sourceIdx) => { - if (citations[sourceIdx]) { - console.log('Showing citation modal for:', citations[sourceIdx]); + export const showSourceModal = (sourceId) => { + let index; + let suffix = null; - if (citations[sourceIdx]?.source?.embed_url) { - const embedUrl = citations[sourceIdx].source.embed_url; + if (typeof sourceId === 'string') { + const output = sourceId.split('#'); + index = parseInt(output[0]) - 1; + + if (output.length > 1) { + suffix = output[1]; + } + } else { + index = sourceId - 1; + } + + if (citations[index]) { + console.log('Showing citation modal for:', citations[index]); + + if (citations[index]?.source?.embed_url) { + const embedUrl = citations[index].source.embed_url; if (embedUrl) { if (readOnly) { // Open in new tab if readOnly @@ -39,18 +53,19 @@ showEmbeds.set(true); embed.set({ url: embedUrl, - title: citations[sourceIdx]?.source?.name || 'Embedded Content', - source: citations[sourceIdx], + title: citations[index]?.source?.name || 'Embedded Content', + source: citations[index], chatId: chatId, - messageId: id + messageId: id, + sourceId: sourceId }); } } else { - selectedCitation = citations[sourceIdx]; + selectedCitation = citations[index]; showCitationModal = true; } } else { - selectedCitation = citations[sourceIdx]; + selectedCitation = citations[index]; showCitationModal = true; } } diff --git a/src/lib/components/chat/Messages/Markdown/SourceToken.svelte b/src/lib/components/chat/Messages/Markdown/SourceToken.svelte index 7da6d8f89f..ac2b84cdcd 100644 --- a/src/lib/components/chat/Messages/Markdown/SourceToken.svelte +++ b/src/lib/components/chat/Messages/Markdown/SourceToken.svelte @@ -41,7 +41,9 @@ {#if sourceIds} {#if (token?.ids ?? []).length == 1} - + {@const id = token.ids[0]} + {@const identifier = token.citationIdentifiers ? token.citationIdentifiers[0] : id - 1} + {:else} @@ -65,9 +67,11 @@ el={containerElement} >
- {#each token.ids as sourceId} + {#each token.citationIdentifiers ?? token.ids as identifier} + {@const id = + typeof identifier === 'string' ? parseInt(identifier.split('#')[0]) : identifier}
- +
{/each}
diff --git a/src/lib/utils/marked/citation-extension.ts b/src/lib/utils/marked/citation-extension.ts index dad266c9f0..ca8325fe11 100644 --- a/src/lib/utils/marked/citation-extension.ts +++ b/src/lib/utils/marked/citation-extension.ts @@ -4,46 +4,61 @@ export function citationExtension() { level: 'inline' as const, start(src: string) { - // Trigger on any [number] - return src.search(/\[(\d[\d,\s]*)\]/); + // Trigger on any [number] or [number#suffix] + // We check for a digit immediately after [ to avoid matching arbitrary links + return src.search(/\[\d/); }, tokenizer(src: string) { // Avoid matching footnotes if (/^\[\^/.test(src)) return; - // Match ONE OR MORE adjacent [1] or [1,2] blocks - // Example matched: "[1][2,3][4]" - const rule = /^(\[(?:\d[\d,\s]*)\])+/; + // Match ONE OR MORE adjacent [1], [1,2], or [1#foo] blocks + // Example matched: "[1][2,3][4#bar]" + // We allow: digits, commas, spaces, and # followed by non-control chars (excluding ] and ,) + const rule = /^(\[(?:\d+(?:#[^,\]\s]+)?(?:,\s*\d+(?:#[^,\]\s]+)?)*)\])+/; const match = rule.exec(src); if (!match) return; const raw = match[0]; // Extract ALL bracket groups inside the big match - const groupRegex = /\[([\d,\s]+)\]/g; + const groupRegex = /\[([^\]]+)\]/g; const ids: number[] = []; + const citationIdentifiers: string[] = []; let m: RegExpExecArray | null; while ((m = groupRegex.exec(raw))) { - const parsed = m[1] - .split(',') - .map((n) => parseInt(n.trim(), 10)) - .filter((n) => !isNaN(n)); - - ids.push(...parsed); + // m[1] is the content inside brackets, e.g. "1, 2#foo" + const parts = m[1].split(',').map((p) => p.trim()); + + parts.forEach((part) => { + // Check if it starts with digit + const match = /^(\d+)(?:#(.+))?$/.exec(part); + if (match) { + const index = parseInt(match[1], 10); + if (!isNaN(index)) { + ids.push(index); + // Store the full identifier ("1#foo" or "1") + citationIdentifiers.push(part); + } + } + }); } + + if (ids.length === 0) return; return { type: 'citation', raw, - ids // merged list + ids, // merged list of integers for legacy title lookup + citationIdentifiers // merged list of full identifiers for granular targeting }; }, renderer(token: any) { - // e.g. "1,2,3" - return token.ids.join(','); + // fallback text + return token.raw; } }; }