From ea655f4d4aeae5610d4980ba2fd01eeda6159616 Mon Sep 17 00:00:00 2001 From: bkellam Date: Wed, 23 Jul 2025 15:50:23 -0700 Subject: [PATCH] fix: Add citation repairing. Also remove revision as a tool param. --- packages/web/src/features/chat/agent.ts | 6 ++ packages/web/src/features/chat/tools.ts | 20 +++-- packages/web/src/features/chat/utils.test.ts | 87 +++++++++++++++++++- packages/web/src/features/chat/utils.ts | 35 ++++++-- 4 files changed, 132 insertions(+), 16 deletions(-) diff --git a/packages/web/src/features/chat/agent.ts b/packages/web/src/features/chat/agent.ts index 670672b4..96d6540c 100644 --- a/packages/web/src/features/chat/agent.ts +++ b/packages/web/src/features/chat/agent.ts @@ -207,6 +207,12 @@ When you have sufficient context, output your answer as a structured markdown re - Code patterns (e.g., "using \`file:\${suggestionQuery}\` pattern" → must include \`${fileReferenceToString({ fileName: 'search.ts', range: { startLine: 10, endLine: 15 } })}\`) - Any code snippet or line you're explaining - Class names, method calls, imports, etc. +- Some examples of both correct and incorrect code references: + - Correct: @file:{path/to/file.ts} + - Correct: @file:{path/to/file.ts:10-15} + - Incorrect: @file{path/to/file.ts} (missing colon) + - Incorrect: @file:path/to/file.ts (missing curly braces) + - Incorrect: @file:{path/to/file.ts:10-25,30-35} (multiple ranges not supported) - Be clear and very concise. Use bullet points where appropriate - Do NOT explain code without providing the exact location reference. Every code mention requires a corresponding \`${FILE_REFERENCE_PREFIX}\` reference - If you cannot provide a code reference for something you're discussing, do not mention that specific code element diff --git a/packages/web/src/features/chat/tools.ts b/packages/web/src/features/chat/tools.ts index a53d8259..fe6f8542 100644 --- a/packages/web/src/features/chat/tools.ts +++ b/packages/web/src/features/chat/tools.ts @@ -14,13 +14,15 @@ export const findSymbolReferencesTool = tool({ inputSchema: z.object({ symbol: z.string().describe("The symbol to find references to"), language: z.string().describe("The programming language of the symbol"), - revision: z.string().describe("The revision to search for the symbol in"), }), - execute: async ({ symbol, language, revision }) => { + execute: async ({ symbol, language }) => { + // @todo: make revision configurable. + const revision = "HEAD"; + const response = await findSearchBasedSymbolReferences({ symbolName: symbol, language, - revisionName: revision, + revisionName: "HEAD", // @todo(mt): handle multi-tenancy. }, SINGLE_TENANT_ORG_DOMAIN); @@ -50,9 +52,11 @@ export const findSymbolDefinitionsTool = tool({ inputSchema: z.object({ symbol: z.string().describe("The symbol to find definitions of"), language: z.string().describe("The programming language of the symbol"), - revision: z.string().describe("The revision to search for the symbol in"), }), - execute: async ({ symbol, language, revision }) => { + execute: async ({ symbol, language }) => { + // @todo: make revision configurable. + const revision = "HEAD"; + const response = await findSearchBasedSymbolDefinitions({ symbolName: symbol, language, @@ -86,9 +90,11 @@ export const readFilesTool = tool({ inputSchema: z.object({ paths: z.array(z.string()).describe("The paths to the files to read"), repository: z.string().describe("The repository to read the files from"), - revision: z.string().describe("The revision to read the files from"), }), - execute: async ({ paths, repository, revision }) => { + execute: async ({ paths, repository }) => { + // @todo: make revision configurable. + const revision = "HEAD"; + const responses = await Promise.all(paths.map(async (path) => { return getFileSource({ fileName: path, diff --git a/packages/web/src/features/chat/utils.test.ts b/packages/web/src/features/chat/utils.test.ts index cea6e79f..00d5c55b 100644 --- a/packages/web/src/features/chat/utils.test.ts +++ b/packages/web/src/features/chat/utils.test.ts @@ -1,5 +1,5 @@ import { expect, test, vi } from 'vitest' -import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps } from './utils' +import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps, repairCitations } from './utils' import { FILE_REFERENCE_REGEX, ANSWER_TAG } from './constants'; import { SBChatMessage, SBChatMessagePart } from './types'; @@ -238,3 +238,88 @@ test('getAnswerPartFromAssistantMessage returns undefined when streaming and no expect(result).toBeUndefined(); }); + +test('repairCitations fixes missing colon after @file', () => { + const input = 'See the function in @file{auth.ts} for details.'; + const expected = 'See the function in @file:{auth.ts} for details.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations fixes missing colon with range', () => { + const input = 'Check @file{config.ts:15-20} for the configuration.'; + const expected = 'Check @file:{config.ts:15-20} for the configuration.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations fixes missing braces around filename', () => { + const input = 'The logic is in @file:utils.js and handles validation.'; + const expected = 'The logic is in @file:{utils.js} and handles validation.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations fixes missing braces with path', () => { + const input = 'Look at @file:src/components/Button.tsx for the component.'; + const expected = 'Look at @file:{src/components/Button.tsx} for the component.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations removes multiple ranges keeping only first', () => { + const input = 'See @file:{service.ts:10-15,20-25,30-35} for implementation.'; + const expected = 'See @file:{service.ts:10-15} for implementation.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations fixes malformed triple number ranges', () => { + const input = 'Check @file:{handler.ts:5-10-15} for the logic.'; + const expected = 'Check @file:{handler.ts:5-10} for the logic.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations handles multiple citations in same text', () => { + const input = 'See @file{auth.ts} and @file:config.js for setup details.'; + const expected = 'See @file:{auth.ts} and @file:{config.js} for setup details.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations leaves correctly formatted citations unchanged', () => { + const input = 'The function @file:{utils.ts:42-50} handles validation correctly.'; + expect(repairCitations(input)).toBe(input); +}); + +test('repairCitations handles edge cases with spaces and punctuation', () => { + const input = 'Functions like @file:helper.ts, @file{main.js}, and @file:{app.ts:1-5,10-15} work.'; + const expected = 'Functions like @file:{helper.ts}, @file:{main.js}, and @file:{app.ts:1-5} work.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations returns empty string unchanged', () => { + expect(repairCitations('')).toBe(''); +}); + +test('repairCitations returns text without citations unchanged', () => { + const input = 'This is just regular text without any file references.'; + expect(repairCitations(input)).toBe(input); +}); + +test('repairCitations handles complex file paths correctly', () => { + const input = 'Check @file:src/components/ui/Button/index.tsx for implementation.'; + const expected = 'Check @file:{src/components/ui/Button/index.tsx} for implementation.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations handles files with numbers and special characters', () => { + const input = 'See @file{utils-v2.0.1.ts} and @file:config_2024.json for setup.'; + const expected = 'See @file:{utils-v2.0.1.ts} and @file:{config_2024.json} for setup.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations handles citation at end of sentence', () => { + const input = 'The implementation is in @file:helper.ts.'; + const expected = 'The implementation is in @file:{helper.ts}.'; + expect(repairCitations(input)).toBe(expected); +}); + +test('repairCitations preserves already correct citations with ranges', () => { + const input = 'The function @file:{utils.ts:10-20} and variable @file:{config.js:5} work correctly.'; + expect(repairCitations(input)).toBe(input); +}); diff --git a/packages/web/src/features/chat/utils.ts b/packages/web/src/features/chat/utils.ts index f2bb42ab..28c14228 100644 --- a/packages/web/src/features/chat/utils.ts +++ b/packages/web/src/features/chat/utils.ts @@ -243,7 +243,7 @@ export const createFileReference = ({ fileName, startLine, endLine }: { fileName export const convertLLMOutputToPortableMarkdown = (text: string): string => { return text.replace(FILE_REFERENCE_REGEX, (_, fileName, startLine, endLine) => { const displayName = fileName.split('/').pop() || fileName; - + let linkText = displayName; if (startLine) { if (endLine && startLine !== endLine) { @@ -252,7 +252,7 @@ export const convertLLMOutputToPortableMarkdown = (text: string): string => { linkText += `:${startLine}`; } } - + return `[${linkText}](${fileName})`; }); } @@ -265,10 +265,10 @@ export const groupMessageIntoSteps = (parts: SBChatMessagePart[]) => { const steps: SBChatMessagePart[][] = []; let currentStep: SBChatMessagePart[] = []; - + for (let i = 0; i < parts.length; i++) { const part = parts[i]; - + if (part.type === 'step-start') { if (currentStep.length > 0) { steps.push([...currentStep]); @@ -278,14 +278,27 @@ export const groupMessageIntoSteps = (parts: SBChatMessagePart[]) => { currentStep.push(part); } } - + if (currentStep.length > 0) { steps.push(currentStep); } - + return steps; } +// LLMs like to not follow instructions... this takes care of some common mistakes they tend to make. +export const repairCitations = (text: string): string => { + return text + // Fix missing colon: @file{...} -> @file:{...} + .replace(/@file\{([^}]+)\}/g, '@file:{$1}') + // Fix missing braces: @file:filename -> @file:{filename} + .replace(/@file:([^\s{]\S*?)(\s|[,;!?](?:\s|$)|\.(?:\s|$)|$)/g, '@file:{$1}$2') + // Fix multiple ranges: keep only first range + .replace(/@file:\{([^:}]+):(\d+-\d+),[\d,-]+\}/g, '@file:{$1:$2}') + // Fix malformed ranges + .replace(/@file:\{([^:}]+):(\d+)-(\d+)-(\d+)\}/g, '@file:{$1:$2-$3}'); +}; + // Attempts to find the part of the assistant's message // that contains the answer. export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStreaming: boolean): TextUIPart | undefined => { @@ -293,13 +306,19 @@ export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStre .findLast((part) => part.type === 'text') if (lastTextPart?.text.startsWith(ANSWER_TAG)) { - return lastTextPart; + return { + ...lastTextPart, + text: repairCitations(lastTextPart.text), + }; } // If the agent did not include the answer tag, then fallback to using the last text part. // Only do this when we are no longer streaming since the agent may still be thinking. if (!isStreaming && lastTextPart) { - return lastTextPart; + return { + ...lastTextPart, + text: repairCitations(lastTextPart.text), + }; } return undefined;