mirror of
https://github.com/sourcebot-dev/sourcebot.git
synced 2025-12-11 20:05:25 +00:00
fix: Add citation repairing. Also remove revision as a tool param.
This commit is contained in:
parent
11099695da
commit
ea655f4d4a
4 changed files with 132 additions and 16 deletions
|
|
@ -207,6 +207,12 @@ When you have sufficient context, output your answer as a structured markdown re
|
|||
- Code patterns (e.g., "using \`file:\${suggestionQuery}\` pattern" → must include \`${fileReferenceToString({ fileName: 'search.ts', range: { startLine: 10, endLine: 15 } })}\`)
|
||||
- Any code snippet or line you're explaining
|
||||
- Class names, method calls, imports, etc.
|
||||
- Some examples of both correct and incorrect code references:
|
||||
- Correct: @file:{path/to/file.ts}
|
||||
- Correct: @file:{path/to/file.ts:10-15}
|
||||
- Incorrect: @file{path/to/file.ts} (missing colon)
|
||||
- Incorrect: @file:path/to/file.ts (missing curly braces)
|
||||
- Incorrect: @file:{path/to/file.ts:10-25,30-35} (multiple ranges not supported)
|
||||
- Be clear and very concise. Use bullet points where appropriate
|
||||
- Do NOT explain code without providing the exact location reference. Every code mention requires a corresponding \`${FILE_REFERENCE_PREFIX}\` reference
|
||||
- If you cannot provide a code reference for something you're discussing, do not mention that specific code element
|
||||
|
|
|
|||
|
|
@ -14,13 +14,15 @@ export const findSymbolReferencesTool = tool({
|
|||
inputSchema: z.object({
|
||||
symbol: z.string().describe("The symbol to find references to"),
|
||||
language: z.string().describe("The programming language of the symbol"),
|
||||
revision: z.string().describe("The revision to search for the symbol in"),
|
||||
}),
|
||||
execute: async ({ symbol, language, revision }) => {
|
||||
execute: async ({ symbol, language }) => {
|
||||
// @todo: make revision configurable.
|
||||
const revision = "HEAD";
|
||||
|
||||
const response = await findSearchBasedSymbolReferences({
|
||||
symbolName: symbol,
|
||||
language,
|
||||
revisionName: revision,
|
||||
revisionName: "HEAD",
|
||||
// @todo(mt): handle multi-tenancy.
|
||||
}, SINGLE_TENANT_ORG_DOMAIN);
|
||||
|
||||
|
|
@ -50,9 +52,11 @@ export const findSymbolDefinitionsTool = tool({
|
|||
inputSchema: z.object({
|
||||
symbol: z.string().describe("The symbol to find definitions of"),
|
||||
language: z.string().describe("The programming language of the symbol"),
|
||||
revision: z.string().describe("The revision to search for the symbol in"),
|
||||
}),
|
||||
execute: async ({ symbol, language, revision }) => {
|
||||
execute: async ({ symbol, language }) => {
|
||||
// @todo: make revision configurable.
|
||||
const revision = "HEAD";
|
||||
|
||||
const response = await findSearchBasedSymbolDefinitions({
|
||||
symbolName: symbol,
|
||||
language,
|
||||
|
|
@ -86,9 +90,11 @@ export const readFilesTool = tool({
|
|||
inputSchema: z.object({
|
||||
paths: z.array(z.string()).describe("The paths to the files to read"),
|
||||
repository: z.string().describe("The repository to read the files from"),
|
||||
revision: z.string().describe("The revision to read the files from"),
|
||||
}),
|
||||
execute: async ({ paths, repository, revision }) => {
|
||||
execute: async ({ paths, repository }) => {
|
||||
// @todo: make revision configurable.
|
||||
const revision = "HEAD";
|
||||
|
||||
const responses = await Promise.all(paths.map(async (path) => {
|
||||
return getFileSource({
|
||||
fileName: path,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { expect, test, vi } from 'vitest'
|
||||
import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps } from './utils'
|
||||
import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps, repairCitations } from './utils'
|
||||
import { FILE_REFERENCE_REGEX, ANSWER_TAG } from './constants';
|
||||
import { SBChatMessage, SBChatMessagePart } from './types';
|
||||
|
||||
|
|
@ -238,3 +238,88 @@ test('getAnswerPartFromAssistantMessage returns undefined when streaming and no
|
|||
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
test('repairCitations fixes missing colon after @file', () => {
|
||||
const input = 'See the function in @file{auth.ts} for details.';
|
||||
const expected = 'See the function in @file:{auth.ts} for details.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations fixes missing colon with range', () => {
|
||||
const input = 'Check @file{config.ts:15-20} for the configuration.';
|
||||
const expected = 'Check @file:{config.ts:15-20} for the configuration.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations fixes missing braces around filename', () => {
|
||||
const input = 'The logic is in @file:utils.js and handles validation.';
|
||||
const expected = 'The logic is in @file:{utils.js} and handles validation.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations fixes missing braces with path', () => {
|
||||
const input = 'Look at @file:src/components/Button.tsx for the component.';
|
||||
const expected = 'Look at @file:{src/components/Button.tsx} for the component.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations removes multiple ranges keeping only first', () => {
|
||||
const input = 'See @file:{service.ts:10-15,20-25,30-35} for implementation.';
|
||||
const expected = 'See @file:{service.ts:10-15} for implementation.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations fixes malformed triple number ranges', () => {
|
||||
const input = 'Check @file:{handler.ts:5-10-15} for the logic.';
|
||||
const expected = 'Check @file:{handler.ts:5-10} for the logic.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations handles multiple citations in same text', () => {
|
||||
const input = 'See @file{auth.ts} and @file:config.js for setup details.';
|
||||
const expected = 'See @file:{auth.ts} and @file:{config.js} for setup details.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations leaves correctly formatted citations unchanged', () => {
|
||||
const input = 'The function @file:{utils.ts:42-50} handles validation correctly.';
|
||||
expect(repairCitations(input)).toBe(input);
|
||||
});
|
||||
|
||||
test('repairCitations handles edge cases with spaces and punctuation', () => {
|
||||
const input = 'Functions like @file:helper.ts, @file{main.js}, and @file:{app.ts:1-5,10-15} work.';
|
||||
const expected = 'Functions like @file:{helper.ts}, @file:{main.js}, and @file:{app.ts:1-5} work.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations returns empty string unchanged', () => {
|
||||
expect(repairCitations('')).toBe('');
|
||||
});
|
||||
|
||||
test('repairCitations returns text without citations unchanged', () => {
|
||||
const input = 'This is just regular text without any file references.';
|
||||
expect(repairCitations(input)).toBe(input);
|
||||
});
|
||||
|
||||
test('repairCitations handles complex file paths correctly', () => {
|
||||
const input = 'Check @file:src/components/ui/Button/index.tsx for implementation.';
|
||||
const expected = 'Check @file:{src/components/ui/Button/index.tsx} for implementation.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations handles files with numbers and special characters', () => {
|
||||
const input = 'See @file{utils-v2.0.1.ts} and @file:config_2024.json for setup.';
|
||||
const expected = 'See @file:{utils-v2.0.1.ts} and @file:{config_2024.json} for setup.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations handles citation at end of sentence', () => {
|
||||
const input = 'The implementation is in @file:helper.ts.';
|
||||
const expected = 'The implementation is in @file:{helper.ts}.';
|
||||
expect(repairCitations(input)).toBe(expected);
|
||||
});
|
||||
|
||||
test('repairCitations preserves already correct citations with ranges', () => {
|
||||
const input = 'The function @file:{utils.ts:10-20} and variable @file:{config.js:5} work correctly.';
|
||||
expect(repairCitations(input)).toBe(input);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -243,7 +243,7 @@ export const createFileReference = ({ fileName, startLine, endLine }: { fileName
|
|||
export const convertLLMOutputToPortableMarkdown = (text: string): string => {
|
||||
return text.replace(FILE_REFERENCE_REGEX, (_, fileName, startLine, endLine) => {
|
||||
const displayName = fileName.split('/').pop() || fileName;
|
||||
|
||||
|
||||
let linkText = displayName;
|
||||
if (startLine) {
|
||||
if (endLine && startLine !== endLine) {
|
||||
|
|
@ -252,7 +252,7 @@ export const convertLLMOutputToPortableMarkdown = (text: string): string => {
|
|||
linkText += `:${startLine}`;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return `[${linkText}](${fileName})`;
|
||||
});
|
||||
}
|
||||
|
|
@ -265,10 +265,10 @@ export const groupMessageIntoSteps = (parts: SBChatMessagePart[]) => {
|
|||
|
||||
const steps: SBChatMessagePart[][] = [];
|
||||
let currentStep: SBChatMessagePart[] = [];
|
||||
|
||||
|
||||
for (let i = 0; i < parts.length; i++) {
|
||||
const part = parts[i];
|
||||
|
||||
|
||||
if (part.type === 'step-start') {
|
||||
if (currentStep.length > 0) {
|
||||
steps.push([...currentStep]);
|
||||
|
|
@ -278,14 +278,27 @@ export const groupMessageIntoSteps = (parts: SBChatMessagePart[]) => {
|
|||
currentStep.push(part);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (currentStep.length > 0) {
|
||||
steps.push(currentStep);
|
||||
}
|
||||
|
||||
|
||||
return steps;
|
||||
}
|
||||
|
||||
// LLMs like to not follow instructions... this takes care of some common mistakes they tend to make.
|
||||
export const repairCitations = (text: string): string => {
|
||||
return text
|
||||
// Fix missing colon: @file{...} -> @file:{...}
|
||||
.replace(/@file\{([^}]+)\}/g, '@file:{$1}')
|
||||
// Fix missing braces: @file:filename -> @file:{filename}
|
||||
.replace(/@file:([^\s{]\S*?)(\s|[,;!?](?:\s|$)|\.(?:\s|$)|$)/g, '@file:{$1}$2')
|
||||
// Fix multiple ranges: keep only first range
|
||||
.replace(/@file:\{([^:}]+):(\d+-\d+),[\d,-]+\}/g, '@file:{$1:$2}')
|
||||
// Fix malformed ranges
|
||||
.replace(/@file:\{([^:}]+):(\d+)-(\d+)-(\d+)\}/g, '@file:{$1:$2-$3}');
|
||||
};
|
||||
|
||||
// Attempts to find the part of the assistant's message
|
||||
// that contains the answer.
|
||||
export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStreaming: boolean): TextUIPart | undefined => {
|
||||
|
|
@ -293,13 +306,19 @@ export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStre
|
|||
.findLast((part) => part.type === 'text')
|
||||
|
||||
if (lastTextPart?.text.startsWith(ANSWER_TAG)) {
|
||||
return lastTextPart;
|
||||
return {
|
||||
...lastTextPart,
|
||||
text: repairCitations(lastTextPart.text),
|
||||
};
|
||||
}
|
||||
|
||||
// If the agent did not include the answer tag, then fallback to using the last text part.
|
||||
// Only do this when we are no longer streaming since the agent may still be thinking.
|
||||
if (!isStreaming && lastTextPart) {
|
||||
return lastTextPart;
|
||||
return {
|
||||
...lastTextPart,
|
||||
text: repairCitations(lastTextPart.text),
|
||||
};
|
||||
}
|
||||
|
||||
return undefined;
|
||||
|
|
|
|||
Loading…
Reference in a new issue