fix: Add citation repairing. Also remove revision as a tool param.

This commit is contained in:
bkellam 2025-07-23 15:50:23 -07:00
parent 11099695da
commit ea655f4d4a
4 changed files with 132 additions and 16 deletions

View file

@ -207,6 +207,12 @@ When you have sufficient context, output your answer as a structured markdown re
- Code patterns (e.g., "using \`file:\${suggestionQuery}\` pattern" must include \`${fileReferenceToString({ fileName: 'search.ts', range: { startLine: 10, endLine: 15 } })}\`) - Code patterns (e.g., "using \`file:\${suggestionQuery}\` pattern" must include \`${fileReferenceToString({ fileName: 'search.ts', range: { startLine: 10, endLine: 15 } })}\`)
- Any code snippet or line you're explaining - Any code snippet or line you're explaining
- Class names, method calls, imports, etc. - Class names, method calls, imports, etc.
- Some examples of both correct and incorrect code references:
- Correct: @file:{path/to/file.ts}
- Correct: @file:{path/to/file.ts:10-15}
- Incorrect: @file{path/to/file.ts} (missing colon)
- Incorrect: @file:path/to/file.ts (missing curly braces)
- Incorrect: @file:{path/to/file.ts:10-25,30-35} (multiple ranges not supported)
- Be clear and very concise. Use bullet points where appropriate - Be clear and very concise. Use bullet points where appropriate
- Do NOT explain code without providing the exact location reference. Every code mention requires a corresponding \`${FILE_REFERENCE_PREFIX}\` reference - Do NOT explain code without providing the exact location reference. Every code mention requires a corresponding \`${FILE_REFERENCE_PREFIX}\` reference
- If you cannot provide a code reference for something you're discussing, do not mention that specific code element - If you cannot provide a code reference for something you're discussing, do not mention that specific code element

View file

@ -14,13 +14,15 @@ export const findSymbolReferencesTool = tool({
inputSchema: z.object({ inputSchema: z.object({
symbol: z.string().describe("The symbol to find references to"), symbol: z.string().describe("The symbol to find references to"),
language: z.string().describe("The programming language of the symbol"), language: z.string().describe("The programming language of the symbol"),
revision: z.string().describe("The revision to search for the symbol in"),
}), }),
execute: async ({ symbol, language, revision }) => { execute: async ({ symbol, language }) => {
// @todo: make revision configurable.
const revision = "HEAD";
const response = await findSearchBasedSymbolReferences({ const response = await findSearchBasedSymbolReferences({
symbolName: symbol, symbolName: symbol,
language, language,
revisionName: revision, revisionName: "HEAD",
// @todo(mt): handle multi-tenancy. // @todo(mt): handle multi-tenancy.
}, SINGLE_TENANT_ORG_DOMAIN); }, SINGLE_TENANT_ORG_DOMAIN);
@ -50,9 +52,11 @@ export const findSymbolDefinitionsTool = tool({
inputSchema: z.object({ inputSchema: z.object({
symbol: z.string().describe("The symbol to find definitions of"), symbol: z.string().describe("The symbol to find definitions of"),
language: z.string().describe("The programming language of the symbol"), language: z.string().describe("The programming language of the symbol"),
revision: z.string().describe("The revision to search for the symbol in"),
}), }),
execute: async ({ symbol, language, revision }) => { execute: async ({ symbol, language }) => {
// @todo: make revision configurable.
const revision = "HEAD";
const response = await findSearchBasedSymbolDefinitions({ const response = await findSearchBasedSymbolDefinitions({
symbolName: symbol, symbolName: symbol,
language, language,
@ -86,9 +90,11 @@ export const readFilesTool = tool({
inputSchema: z.object({ inputSchema: z.object({
paths: z.array(z.string()).describe("The paths to the files to read"), paths: z.array(z.string()).describe("The paths to the files to read"),
repository: z.string().describe("The repository to read the files from"), repository: z.string().describe("The repository to read the files from"),
revision: z.string().describe("The revision to read the files from"),
}), }),
execute: async ({ paths, repository, revision }) => { execute: async ({ paths, repository }) => {
// @todo: make revision configurable.
const revision = "HEAD";
const responses = await Promise.all(paths.map(async (path) => { const responses = await Promise.all(paths.map(async (path) => {
return getFileSource({ return getFileSource({
fileName: path, fileName: path,

View file

@ -1,5 +1,5 @@
import { expect, test, vi } from 'vitest' import { expect, test, vi } from 'vitest'
import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps } from './utils' import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps, repairCitations } from './utils'
import { FILE_REFERENCE_REGEX, ANSWER_TAG } from './constants'; import { FILE_REFERENCE_REGEX, ANSWER_TAG } from './constants';
import { SBChatMessage, SBChatMessagePart } from './types'; import { SBChatMessage, SBChatMessagePart } from './types';
@ -238,3 +238,88 @@ test('getAnswerPartFromAssistantMessage returns undefined when streaming and no
expect(result).toBeUndefined(); expect(result).toBeUndefined();
}); });
test('repairCitations fixes missing colon after @file', () => {
const input = 'See the function in @file{auth.ts} for details.';
const expected = 'See the function in @file:{auth.ts} for details.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations fixes missing colon with range', () => {
const input = 'Check @file{config.ts:15-20} for the configuration.';
const expected = 'Check @file:{config.ts:15-20} for the configuration.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations fixes missing braces around filename', () => {
const input = 'The logic is in @file:utils.js and handles validation.';
const expected = 'The logic is in @file:{utils.js} and handles validation.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations fixes missing braces with path', () => {
const input = 'Look at @file:src/components/Button.tsx for the component.';
const expected = 'Look at @file:{src/components/Button.tsx} for the component.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations removes multiple ranges keeping only first', () => {
const input = 'See @file:{service.ts:10-15,20-25,30-35} for implementation.';
const expected = 'See @file:{service.ts:10-15} for implementation.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations fixes malformed triple number ranges', () => {
const input = 'Check @file:{handler.ts:5-10-15} for the logic.';
const expected = 'Check @file:{handler.ts:5-10} for the logic.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations handles multiple citations in same text', () => {
const input = 'See @file{auth.ts} and @file:config.js for setup details.';
const expected = 'See @file:{auth.ts} and @file:{config.js} for setup details.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations leaves correctly formatted citations unchanged', () => {
const input = 'The function @file:{utils.ts:42-50} handles validation correctly.';
expect(repairCitations(input)).toBe(input);
});
test('repairCitations handles edge cases with spaces and punctuation', () => {
const input = 'Functions like @file:helper.ts, @file{main.js}, and @file:{app.ts:1-5,10-15} work.';
const expected = 'Functions like @file:{helper.ts}, @file:{main.js}, and @file:{app.ts:1-5} work.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations returns empty string unchanged', () => {
expect(repairCitations('')).toBe('');
});
test('repairCitations returns text without citations unchanged', () => {
const input = 'This is just regular text without any file references.';
expect(repairCitations(input)).toBe(input);
});
test('repairCitations handles complex file paths correctly', () => {
const input = 'Check @file:src/components/ui/Button/index.tsx for implementation.';
const expected = 'Check @file:{src/components/ui/Button/index.tsx} for implementation.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations handles files with numbers and special characters', () => {
const input = 'See @file{utils-v2.0.1.ts} and @file:config_2024.json for setup.';
const expected = 'See @file:{utils-v2.0.1.ts} and @file:{config_2024.json} for setup.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations handles citation at end of sentence', () => {
const input = 'The implementation is in @file:helper.ts.';
const expected = 'The implementation is in @file:{helper.ts}.';
expect(repairCitations(input)).toBe(expected);
});
test('repairCitations preserves already correct citations with ranges', () => {
const input = 'The function @file:{utils.ts:10-20} and variable @file:{config.js:5} work correctly.';
expect(repairCitations(input)).toBe(input);
});

View file

@ -286,6 +286,19 @@ export const groupMessageIntoSteps = (parts: SBChatMessagePart[]) => {
return steps; return steps;
} }
// LLMs like to not follow instructions... this takes care of some common mistakes they tend to make.
export const repairCitations = (text: string): string => {
return text
// Fix missing colon: @file{...} -> @file:{...}
.replace(/@file\{([^}]+)\}/g, '@file:{$1}')
// Fix missing braces: @file:filename -> @file:{filename}
.replace(/@file:([^\s{]\S*?)(\s|[,;!?](?:\s|$)|\.(?:\s|$)|$)/g, '@file:{$1}$2')
// Fix multiple ranges: keep only first range
.replace(/@file:\{([^:}]+):(\d+-\d+),[\d,-]+\}/g, '@file:{$1:$2}')
// Fix malformed ranges
.replace(/@file:\{([^:}]+):(\d+)-(\d+)-(\d+)\}/g, '@file:{$1:$2-$3}');
};
// Attempts to find the part of the assistant's message // Attempts to find the part of the assistant's message
// that contains the answer. // that contains the answer.
export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStreaming: boolean): TextUIPart | undefined => { export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStreaming: boolean): TextUIPart | undefined => {
@ -293,13 +306,19 @@ export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStre
.findLast((part) => part.type === 'text') .findLast((part) => part.type === 'text')
if (lastTextPart?.text.startsWith(ANSWER_TAG)) { if (lastTextPart?.text.startsWith(ANSWER_TAG)) {
return lastTextPart; return {
...lastTextPart,
text: repairCitations(lastTextPart.text),
};
} }
// If the agent did not include the answer tag, then fallback to using the last text part. // If the agent did not include the answer tag, then fallback to using the last text part.
// Only do this when we are no longer streaming since the agent may still be thinking. // Only do this when we are no longer streaming since the agent may still be thinking.
if (!isStreaming && lastTextPart) { if (!isStreaming && lastTextPart) {
return lastTextPart; return {
...lastTextPart,
text: repairCitations(lastTextPart.text),
};
} }
return undefined; return undefined;