mirror of
https://github.com/sourcebot-dev/sourcebot.git
synced 2025-12-12 04:15:30 +00:00
fix: Add citation repairing. Also remove revision as a tool param.
This commit is contained in:
parent
11099695da
commit
ea655f4d4a
4 changed files with 132 additions and 16 deletions
|
|
@ -207,6 +207,12 @@ When you have sufficient context, output your answer as a structured markdown re
|
||||||
- Code patterns (e.g., "using \`file:\${suggestionQuery}\` pattern" → must include \`${fileReferenceToString({ fileName: 'search.ts', range: { startLine: 10, endLine: 15 } })}\`)
|
- Code patterns (e.g., "using \`file:\${suggestionQuery}\` pattern" → must include \`${fileReferenceToString({ fileName: 'search.ts', range: { startLine: 10, endLine: 15 } })}\`)
|
||||||
- Any code snippet or line you're explaining
|
- Any code snippet or line you're explaining
|
||||||
- Class names, method calls, imports, etc.
|
- Class names, method calls, imports, etc.
|
||||||
|
- Some examples of both correct and incorrect code references:
|
||||||
|
- Correct: @file:{path/to/file.ts}
|
||||||
|
- Correct: @file:{path/to/file.ts:10-15}
|
||||||
|
- Incorrect: @file{path/to/file.ts} (missing colon)
|
||||||
|
- Incorrect: @file:path/to/file.ts (missing curly braces)
|
||||||
|
- Incorrect: @file:{path/to/file.ts:10-25,30-35} (multiple ranges not supported)
|
||||||
- Be clear and very concise. Use bullet points where appropriate
|
- Be clear and very concise. Use bullet points where appropriate
|
||||||
- Do NOT explain code without providing the exact location reference. Every code mention requires a corresponding \`${FILE_REFERENCE_PREFIX}\` reference
|
- Do NOT explain code without providing the exact location reference. Every code mention requires a corresponding \`${FILE_REFERENCE_PREFIX}\` reference
|
||||||
- If you cannot provide a code reference for something you're discussing, do not mention that specific code element
|
- If you cannot provide a code reference for something you're discussing, do not mention that specific code element
|
||||||
|
|
|
||||||
|
|
@ -14,13 +14,15 @@ export const findSymbolReferencesTool = tool({
|
||||||
inputSchema: z.object({
|
inputSchema: z.object({
|
||||||
symbol: z.string().describe("The symbol to find references to"),
|
symbol: z.string().describe("The symbol to find references to"),
|
||||||
language: z.string().describe("The programming language of the symbol"),
|
language: z.string().describe("The programming language of the symbol"),
|
||||||
revision: z.string().describe("The revision to search for the symbol in"),
|
|
||||||
}),
|
}),
|
||||||
execute: async ({ symbol, language, revision }) => {
|
execute: async ({ symbol, language }) => {
|
||||||
|
// @todo: make revision configurable.
|
||||||
|
const revision = "HEAD";
|
||||||
|
|
||||||
const response = await findSearchBasedSymbolReferences({
|
const response = await findSearchBasedSymbolReferences({
|
||||||
symbolName: symbol,
|
symbolName: symbol,
|
||||||
language,
|
language,
|
||||||
revisionName: revision,
|
revisionName: "HEAD",
|
||||||
// @todo(mt): handle multi-tenancy.
|
// @todo(mt): handle multi-tenancy.
|
||||||
}, SINGLE_TENANT_ORG_DOMAIN);
|
}, SINGLE_TENANT_ORG_DOMAIN);
|
||||||
|
|
||||||
|
|
@ -50,9 +52,11 @@ export const findSymbolDefinitionsTool = tool({
|
||||||
inputSchema: z.object({
|
inputSchema: z.object({
|
||||||
symbol: z.string().describe("The symbol to find definitions of"),
|
symbol: z.string().describe("The symbol to find definitions of"),
|
||||||
language: z.string().describe("The programming language of the symbol"),
|
language: z.string().describe("The programming language of the symbol"),
|
||||||
revision: z.string().describe("The revision to search for the symbol in"),
|
|
||||||
}),
|
}),
|
||||||
execute: async ({ symbol, language, revision }) => {
|
execute: async ({ symbol, language }) => {
|
||||||
|
// @todo: make revision configurable.
|
||||||
|
const revision = "HEAD";
|
||||||
|
|
||||||
const response = await findSearchBasedSymbolDefinitions({
|
const response = await findSearchBasedSymbolDefinitions({
|
||||||
symbolName: symbol,
|
symbolName: symbol,
|
||||||
language,
|
language,
|
||||||
|
|
@ -86,9 +90,11 @@ export const readFilesTool = tool({
|
||||||
inputSchema: z.object({
|
inputSchema: z.object({
|
||||||
paths: z.array(z.string()).describe("The paths to the files to read"),
|
paths: z.array(z.string()).describe("The paths to the files to read"),
|
||||||
repository: z.string().describe("The repository to read the files from"),
|
repository: z.string().describe("The repository to read the files from"),
|
||||||
revision: z.string().describe("The revision to read the files from"),
|
|
||||||
}),
|
}),
|
||||||
execute: async ({ paths, repository, revision }) => {
|
execute: async ({ paths, repository }) => {
|
||||||
|
// @todo: make revision configurable.
|
||||||
|
const revision = "HEAD";
|
||||||
|
|
||||||
const responses = await Promise.all(paths.map(async (path) => {
|
const responses = await Promise.all(paths.map(async (path) => {
|
||||||
return getFileSource({
|
return getFileSource({
|
||||||
fileName: path,
|
fileName: path,
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
import { expect, test, vi } from 'vitest'
|
import { expect, test, vi } from 'vitest'
|
||||||
import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps } from './utils'
|
import { fileReferenceToString, getAnswerPartFromAssistantMessage, groupMessageIntoSteps, repairCitations } from './utils'
|
||||||
import { FILE_REFERENCE_REGEX, ANSWER_TAG } from './constants';
|
import { FILE_REFERENCE_REGEX, ANSWER_TAG } from './constants';
|
||||||
import { SBChatMessage, SBChatMessagePart } from './types';
|
import { SBChatMessage, SBChatMessagePart } from './types';
|
||||||
|
|
||||||
|
|
@ -238,3 +238,88 @@ test('getAnswerPartFromAssistantMessage returns undefined when streaming and no
|
||||||
|
|
||||||
expect(result).toBeUndefined();
|
expect(result).toBeUndefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('repairCitations fixes missing colon after @file', () => {
|
||||||
|
const input = 'See the function in @file{auth.ts} for details.';
|
||||||
|
const expected = 'See the function in @file:{auth.ts} for details.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations fixes missing colon with range', () => {
|
||||||
|
const input = 'Check @file{config.ts:15-20} for the configuration.';
|
||||||
|
const expected = 'Check @file:{config.ts:15-20} for the configuration.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations fixes missing braces around filename', () => {
|
||||||
|
const input = 'The logic is in @file:utils.js and handles validation.';
|
||||||
|
const expected = 'The logic is in @file:{utils.js} and handles validation.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations fixes missing braces with path', () => {
|
||||||
|
const input = 'Look at @file:src/components/Button.tsx for the component.';
|
||||||
|
const expected = 'Look at @file:{src/components/Button.tsx} for the component.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations removes multiple ranges keeping only first', () => {
|
||||||
|
const input = 'See @file:{service.ts:10-15,20-25,30-35} for implementation.';
|
||||||
|
const expected = 'See @file:{service.ts:10-15} for implementation.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations fixes malformed triple number ranges', () => {
|
||||||
|
const input = 'Check @file:{handler.ts:5-10-15} for the logic.';
|
||||||
|
const expected = 'Check @file:{handler.ts:5-10} for the logic.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations handles multiple citations in same text', () => {
|
||||||
|
const input = 'See @file{auth.ts} and @file:config.js for setup details.';
|
||||||
|
const expected = 'See @file:{auth.ts} and @file:{config.js} for setup details.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations leaves correctly formatted citations unchanged', () => {
|
||||||
|
const input = 'The function @file:{utils.ts:42-50} handles validation correctly.';
|
||||||
|
expect(repairCitations(input)).toBe(input);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations handles edge cases with spaces and punctuation', () => {
|
||||||
|
const input = 'Functions like @file:helper.ts, @file{main.js}, and @file:{app.ts:1-5,10-15} work.';
|
||||||
|
const expected = 'Functions like @file:{helper.ts}, @file:{main.js}, and @file:{app.ts:1-5} work.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations returns empty string unchanged', () => {
|
||||||
|
expect(repairCitations('')).toBe('');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations returns text without citations unchanged', () => {
|
||||||
|
const input = 'This is just regular text without any file references.';
|
||||||
|
expect(repairCitations(input)).toBe(input);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations handles complex file paths correctly', () => {
|
||||||
|
const input = 'Check @file:src/components/ui/Button/index.tsx for implementation.';
|
||||||
|
const expected = 'Check @file:{src/components/ui/Button/index.tsx} for implementation.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations handles files with numbers and special characters', () => {
|
||||||
|
const input = 'See @file{utils-v2.0.1.ts} and @file:config_2024.json for setup.';
|
||||||
|
const expected = 'See @file:{utils-v2.0.1.ts} and @file:{config_2024.json} for setup.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations handles citation at end of sentence', () => {
|
||||||
|
const input = 'The implementation is in @file:helper.ts.';
|
||||||
|
const expected = 'The implementation is in @file:{helper.ts}.';
|
||||||
|
expect(repairCitations(input)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('repairCitations preserves already correct citations with ranges', () => {
|
||||||
|
const input = 'The function @file:{utils.ts:10-20} and variable @file:{config.js:5} work correctly.';
|
||||||
|
expect(repairCitations(input)).toBe(input);
|
||||||
|
});
|
||||||
|
|
|
||||||
|
|
@ -286,6 +286,19 @@ export const groupMessageIntoSteps = (parts: SBChatMessagePart[]) => {
|
||||||
return steps;
|
return steps;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LLMs like to not follow instructions... this takes care of some common mistakes they tend to make.
|
||||||
|
export const repairCitations = (text: string): string => {
|
||||||
|
return text
|
||||||
|
// Fix missing colon: @file{...} -> @file:{...}
|
||||||
|
.replace(/@file\{([^}]+)\}/g, '@file:{$1}')
|
||||||
|
// Fix missing braces: @file:filename -> @file:{filename}
|
||||||
|
.replace(/@file:([^\s{]\S*?)(\s|[,;!?](?:\s|$)|\.(?:\s|$)|$)/g, '@file:{$1}$2')
|
||||||
|
// Fix multiple ranges: keep only first range
|
||||||
|
.replace(/@file:\{([^:}]+):(\d+-\d+),[\d,-]+\}/g, '@file:{$1:$2}')
|
||||||
|
// Fix malformed ranges
|
||||||
|
.replace(/@file:\{([^:}]+):(\d+)-(\d+)-(\d+)\}/g, '@file:{$1:$2-$3}');
|
||||||
|
};
|
||||||
|
|
||||||
// Attempts to find the part of the assistant's message
|
// Attempts to find the part of the assistant's message
|
||||||
// that contains the answer.
|
// that contains the answer.
|
||||||
export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStreaming: boolean): TextUIPart | undefined => {
|
export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStreaming: boolean): TextUIPart | undefined => {
|
||||||
|
|
@ -293,13 +306,19 @@ export const getAnswerPartFromAssistantMessage = (message: SBChatMessage, isStre
|
||||||
.findLast((part) => part.type === 'text')
|
.findLast((part) => part.type === 'text')
|
||||||
|
|
||||||
if (lastTextPart?.text.startsWith(ANSWER_TAG)) {
|
if (lastTextPart?.text.startsWith(ANSWER_TAG)) {
|
||||||
return lastTextPart;
|
return {
|
||||||
|
...lastTextPart,
|
||||||
|
text: repairCitations(lastTextPart.text),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the agent did not include the answer tag, then fallback to using the last text part.
|
// If the agent did not include the answer tag, then fallback to using the last text part.
|
||||||
// Only do this when we are no longer streaming since the agent may still be thinking.
|
// Only do this when we are no longer streaming since the agent may still be thinking.
|
||||||
if (!isStreaming && lastTextPart) {
|
if (!isStreaming && lastTextPart) {
|
||||||
return lastTextPart;
|
return {
|
||||||
|
...lastTextPart,
|
||||||
|
text: repairCitations(lastTextPart.text),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return undefined;
|
return undefined;
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue