import { env } from "@/env.mjs"; import { getFileSource } from "@/features/search/fileSourceApi"; import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; import { isServiceError } from "@/lib/utils"; import { ProviderOptions } from "@ai-sdk/provider-utils"; import { createLogger } from "@sourcebot/logger"; import { LanguageModel, ModelMessage, StopCondition, streamText } from "ai"; import { ANSWER_TAG, FILE_REFERENCE_PREFIX, toolNames } from "./constants"; import { createCodeSearchTool, findSymbolDefinitionsTool, findSymbolReferencesTool, readFilesTool } from "./tools"; import { FileSource, Source } from "./types"; import { addLineNumbers, fileReferenceToString } from "./utils"; const logger = createLogger('chat-agent'); interface AgentOptions { model: LanguageModel; providerOptions?: ProviderOptions; headers?: Record; searchScopeRepoNames: string[]; inputMessages: ModelMessage[]; inputSources: Source[]; onWriteSource: (source: Source) => void; traceId: string; } // If the agent exceeds the step count, then we will stop. // eslint-disable-next-line @typescript-eslint/no-explicit-any const stepCountIsGTE = (stepCount: number): StopCondition => { return ({ steps }) => steps.length >= stepCount; } export const createAgentStream = async ({ model, providerOptions, headers, inputMessages, inputSources, searchScopeRepoNames, onWriteSource, traceId, }: AgentOptions) => { const baseSystemPrompt = createBaseSystemPrompt({ searchScopeRepoNames, }); const stream = streamText({ model, providerOptions, headers, system: baseSystemPrompt, messages: inputMessages, tools: { [toolNames.searchCode]: createCodeSearchTool(searchScopeRepoNames), [toolNames.readFiles]: readFilesTool, [toolNames.findSymbolReferences]: findSymbolReferencesTool, [toolNames.findSymbolDefinitions]: findSymbolDefinitionsTool, }, prepareStep: async ({ stepNumber }) => { // The first step attaches any mentioned sources to the system prompt. if (stepNumber === 0 && inputSources.length > 0) { const fileSources = inputSources.filter((source) => source.type === 'file'); const resolvedFileSources = ( await Promise.all(fileSources.map(resolveFileSource))) .filter((source) => source !== undefined) const fileSourcesSystemPrompt = await createFileSourcesSystemPrompt({ files: resolvedFileSources }); return { system: `${baseSystemPrompt}\n\n${fileSourcesSystemPrompt}` } } if (stepNumber === env.SOURCEBOT_CHAT_MAX_STEP_COUNT - 1) { return { system: `**CRITICAL**: You have reached the maximum number of steps!! YOU MUST PROVIDE YOUR FINAL ANSWER NOW. DO NOT KEEP RESEARCHING.\n\n${answerInstructions}`, activeTools: [], } } return undefined; }, temperature: env.SOURCEBOT_CHAT_MODEL_TEMPERATURE, stopWhen: [ stepCountIsGTE(env.SOURCEBOT_CHAT_MAX_STEP_COUNT), ], toolChoice: "auto", // Let the model decide when to use tools onStepFinish: ({ toolResults }) => { // This takes care of extracting any sources that the LLM has seen as part of // the tool calls it made. toolResults.forEach(({ toolName, output, dynamic }) => { // we don't care about dynamic tool results here. if (dynamic) { return; } if (isServiceError(output)) { // is there something we want to do here? return; } if (toolName === toolNames.readFiles) { output.forEach((file) => { onWriteSource({ type: 'file', language: file.language, repo: file.repository, path: file.path, revision: file.revision, name: file.path.split('/').pop() ?? file.path, }) }) } else if (toolName === toolNames.searchCode) { output.files.forEach((file) => { onWriteSource({ type: 'file', language: file.language, repo: file.repository, path: file.fileName, revision: file.revision, name: file.fileName.split('/').pop() ?? file.fileName, }) }) } else if (toolName === toolNames.findSymbolDefinitions || toolName === toolNames.findSymbolReferences) { output.forEach((file) => { onWriteSource({ type: 'file', language: file.language, repo: file.repository, path: file.fileName, revision: file.revision, name: file.fileName.split('/').pop() ?? file.fileName, }) }) } }) }, // Only enable langfuse traces in cloud environments. experimental_telemetry: { isEnabled: env.NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT !== undefined, metadata: { langfuseTraceId: traceId, }, }, onError: (error) => { logger.error(error); }, }); return stream; } interface BaseSystemPromptOptions { searchScopeRepoNames: string[]; } export const createBaseSystemPrompt = ({ searchScopeRepoNames, }: BaseSystemPromptOptions) => { return ` You are a powerful agentic AI code assistant built into Sourcebot, the world's best code-intelligence platform. Your job is to help developers understand and navigate their large codebases. Your workflow has two distinct phases: **Phase 1: Research & Analysis** - Analyze the user's question and determine what context you need - Use available tools to gather code, search repositories, find references, etc. - Think through the problem and collect all relevant information - Do NOT provide partial answers or explanations during this phase **Phase 2: Structured Response** - **MANDATORY**: You MUST always enter this phase and provide a structured markdown response, regardless of whether phase 1 was completed or interrupted - Provide your final response based on whatever context you have available - Always format your response according to the required response format below The user has selected the following repositories for analysis: ${searchScopeRepoNames.map(repo => `- ${repo}`).join('\n')} During the research phase, you have these tools available: - \`${toolNames.searchCode}\`: Search for code patterns, functions, or text across repositories - \`${toolNames.readFiles}\`: Read the contents of specific files - \`${toolNames.findSymbolReferences}\`: Find where symbols are referenced - \`${toolNames.findSymbolDefinitions}\`: Find where symbols are defined Use these tools to gather comprehensive context before answering. Always explain why you're using each tool. ${answerInstructions} `; } const answerInstructions = ` When you have sufficient context, output your answer as a structured markdown response. **Required Response Format:** - **CRITICAL**: You MUST always prefix your answer with a \`${ANSWER_TAG}\` tag at the very top of your response - **CRITICAL**: You MUST provide your complete response in markdown format with embedded code references - **CODE REFERENCE REQUIREMENT**: Whenever you mention, discuss, or refer to ANY specific part of the code (files, functions, variables, methods, classes, imports, etc.), you MUST immediately follow with a code reference using the format \`${fileReferenceToString({ repo: 'repository', path: 'filename'})}\` or \`${fileReferenceToString({ repo: 'repository', path: 'filename', range: { startLine: 1, endLine: 10 } })}\` (where the numbers are the start and end line numbers of the code snippet). This includes: - Files (e.g., "The \`auth.ts\` file" → must include \`${fileReferenceToString({ repo: 'repository', path: 'auth.ts' })}\`) - Function names (e.g., "The \`getRepos()\` function" → must include \`${fileReferenceToString({ repo: 'repository', path: 'auth.ts', range: { startLine: 15, endLine: 20 } })}\`) - Variable names (e.g., "The \`suggestionQuery\` variable" → must include \`${fileReferenceToString({ repo: 'repository', path: 'search.ts', range: { startLine: 42, endLine: 42 } })}\`) - Any code snippet or line you're explaining - Class names, method calls, imports, etc. - Some examples of both correct and incorrect code references: - Correct: @file:{repository::path/to/file.ts} - Correct: @file:{repository::path/to/file.ts:10-15} - Incorrect: @file{repository::path/to/file.ts} (missing colon) - Incorrect: @file:repository::path/to/file.ts (missing curly braces) - Incorrect: @file:{repository::path/to/file.ts:10-25,30-35} (multiple ranges not supported) - Incorrect: @file:{path/to/file.ts} (missing repository) - Be clear and very concise. Use bullet points where appropriate - Do NOT explain code without providing the exact location reference. Every code mention requires a corresponding \`${FILE_REFERENCE_PREFIX}\` reference - If you cannot provide a code reference for something you're discussing, do not mention that specific code element - Always prefer to use \`${FILE_REFERENCE_PREFIX}\` over \`\`\`code\`\`\` blocks. **Example answer structure:** \`\`\`markdown ${ANSWER_TAG} Authentication in Sourcebot is built on NextAuth.js with a session-based approach using JWT tokens and Prisma as the database adapter ${fileReferenceToString({ repo: 'github.com/sourcebot-dev/sourcebot', path: 'auth.ts', range: { startLine: 135, endLine: 140 } })}. The system supports multiple authentication providers and implements organization-based authorization with role-defined permissions. \`\`\` `; interface FileSourcesSystemPromptOptions { files: { path: string; source: string; repo: string; language: string; revision: string; }[]; } const createFileSourcesSystemPrompt = async ({ files }: FileSourcesSystemPromptOptions) => { return ` The user has mentioned the following files, which are automatically included for analysis. ${files.map(file => ` ${addLineNumbers(file.source)} `).join('\n\n')} `.trim(); } const resolveFileSource = async ({ path, repo, revision }: FileSource) => { const fileSource = await getFileSource({ fileName: path, repository: repo, branch: revision, // @todo: handle multi-tenancy. }, SINGLE_TENANT_ORG_DOMAIN); if (isServiceError(fileSource)) { // @todo: handle this logger.error("Error fetching file source:", fileSource) return undefined; } return { path, source: fileSource.source, repo, language: fileSource.language, revision, } }