mirror of
https://github.com/sourcebot-dev/sourcebot.git
synced 2025-12-12 20:35:24 +00:00
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
* generate protobuf types * stream poc over SSE * wip: make stream search api follow existing schema. Modify UI to support streaming * fix scrolling issue * Dockerfile * wip on lezer parser grammar for query language * add lezer tree -> grpc transformer * remove spammy log message * fix syntax highlighting by adding a module resolution for @lezer/common * further wip on query language * Add case sensitivity and regexp toggles * Improved type safety / cleanup for query lang * support search contexts * update Dockerfile with query langauge package * fix filter * Add skeletons to filter panel when search is streaming * add client side caching * improved cancelation handling * add isSearchExausted flag for flagging when a search captured all results * Add back posthog search_finished event * remove zoekt tenant enforcement * migrate blocking search over to grpc. Centralize everything in searchApi * branch handling * plumb file weburl * add repo_sets filter for repositories a user has access to * refactor a bunch of stuff + add support for passing in Query IR to search api * refactor * dev README * wip on better error handling * error handling for stream path * update mcp * changelog wip * type fix * style * Support rev:* wildcard * changelog * changelog nit * feedback * fix build * update docs and remove uneeded test file
270 lines
10 KiB
JavaScript
270 lines
10 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
// Entry point for the MCP server
|
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
import escapeStringRegexp from 'escape-string-regexp';
|
|
import { z } from 'zod';
|
|
import { listRepos, search, getFileSource } from './client.js';
|
|
import { env, numberSchema } from './env.js';
|
|
import { listReposRequestSchema } from './schemas.js';
|
|
import { TextContent } from './types.js';
|
|
import { isServiceError } from './utils.js';
|
|
|
|
// Create MCP server
|
|
const server = new McpServer({
|
|
name: 'sourcebot-mcp-server',
|
|
version: '0.1.0',
|
|
});
|
|
|
|
|
|
server.tool(
|
|
"search_code",
|
|
`Fetches code that matches the provided regex pattern in \`query\`. This is NOT a semantic search.
|
|
Results are returned as an array of matching files, with the file's URL, repository, and language.
|
|
If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.
|
|
If the \`includeCodeSnippets\` property is true, code snippets containing the matches will be included in the response. Only set this to true if the request requires code snippets (e.g., show me examples where library X is used).
|
|
When referencing a file in your response, **ALWAYS** include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out.
|
|
**ONLY USE** the \`filterByRepoIds\` property if the request requires searching a specific repo(s). Otherwise, leave it empty.`,
|
|
{
|
|
query: z
|
|
.string()
|
|
.describe(`The regex pattern to search for. RULES:
|
|
1. When a regex special character needs to be escaped, ALWAYS use a single backslash (\) (e.g., 'console\.log')
|
|
2. **ALWAYS** escape spaces with a single backslash (\) (e.g., 'console\ log')
|
|
`),
|
|
filterByRepoIds: z
|
|
.array(z.string())
|
|
.describe(`Scope the search to the provided repositories to the Sourcebot compatible repository IDs. **DO NOT** use this property if you want to search all repositories. **YOU MUST** call 'list_repos' first to obtain the exact repository ID.`)
|
|
.optional(),
|
|
filterByLanguages: z
|
|
.array(z.string())
|
|
.describe(`Scope the search to the provided languages. The language MUST be formatted as a GitHub linguist language. Examples: Python, JavaScript, TypeScript, Java, C#, C++, PHP, Go, Rust, Ruby, Swift, Kotlin, Shell, C, Dart, HTML, CSS, PowerShell, SQL, R`)
|
|
.optional(),
|
|
caseSensitive: z
|
|
.boolean()
|
|
.describe(`Whether the search should be case sensitive (default: false).`)
|
|
.optional(),
|
|
includeCodeSnippets: z
|
|
.boolean()
|
|
.describe(`Whether to include the code snippets in the response (default: false). If false, only the file's URL, repository, and language will be returned. Set to false to get a more concise response.`)
|
|
.optional(),
|
|
maxTokens: numberSchema
|
|
.describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`)
|
|
.transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val))
|
|
.optional(),
|
|
},
|
|
async ({
|
|
query,
|
|
filterByRepoIds: repoIds = [],
|
|
filterByLanguages: languages = [],
|
|
maxTokens = env.DEFAULT_MINIMUM_TOKENS,
|
|
includeCodeSnippets = false,
|
|
caseSensitive = false,
|
|
}) => {
|
|
if (repoIds.length > 0) {
|
|
query += ` ( repo:${repoIds.map(id => escapeStringRegexp(id)).join(' or repo:')} )`;
|
|
}
|
|
|
|
if (languages.length > 0) {
|
|
query += ` ( lang:${languages.join(' or lang:')} )`;
|
|
}
|
|
|
|
const response = await search({
|
|
query,
|
|
matches: env.DEFAULT_MATCHES,
|
|
contextLines: env.DEFAULT_CONTEXT_LINES,
|
|
isRegexEnabled: true,
|
|
isCaseSensitivityEnabled: caseSensitive,
|
|
});
|
|
|
|
if (isServiceError(response)) {
|
|
return {
|
|
content: [{
|
|
type: "text",
|
|
text: `Error searching code: ${response.message}`,
|
|
}],
|
|
};
|
|
}
|
|
|
|
if (response.files.length === 0) {
|
|
return {
|
|
content: [{
|
|
type: "text",
|
|
text: `No results found for the query: ${query}`,
|
|
}],
|
|
};
|
|
}
|
|
|
|
const content: TextContent[] = [];
|
|
let totalTokens = 0;
|
|
let isResponseTruncated = false;
|
|
|
|
for (const file of response.files) {
|
|
const numMatches = file.chunks.reduce(
|
|
(acc, chunk) => acc + chunk.matchRanges.length,
|
|
0,
|
|
);
|
|
let text = `file: ${file.webUrl}\nnum_matches: ${numMatches}\nrepository: ${file.repository}\nlanguage: ${file.language}`;
|
|
|
|
if (includeCodeSnippets) {
|
|
const snippets = file.chunks.map(chunk => {
|
|
return `\`\`\`\n${chunk.content}\n\`\`\``
|
|
}).join('\n');
|
|
text += `\n\n${snippets}`;
|
|
}
|
|
|
|
|
|
// Rough estimate of the number of tokens in the text
|
|
// @see: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
|
|
const tokens = text.length / 4;
|
|
|
|
if ((totalTokens + tokens) > maxTokens) {
|
|
// Calculate remaining token budget
|
|
const remainingTokens = maxTokens - totalTokens;
|
|
|
|
if (remainingTokens > 100) { // Only truncate if meaningful space left
|
|
// Truncate text to fit remaining tokens (tokens ≈ chars/4)
|
|
const maxLength = Math.floor(remainingTokens * 4);
|
|
const truncatedText = text.substring(0, maxLength) + "\n\n...[content truncated due to token limit]";
|
|
|
|
content.push({
|
|
type: "text",
|
|
text: truncatedText,
|
|
});
|
|
|
|
totalTokens += remainingTokens;
|
|
}
|
|
|
|
isResponseTruncated = true;
|
|
break;
|
|
}
|
|
|
|
totalTokens += tokens;
|
|
content.push({
|
|
type: "text",
|
|
text,
|
|
});
|
|
}
|
|
|
|
if (isResponseTruncated) {
|
|
content.push({
|
|
type: "text",
|
|
text: `The response was truncated because the number of tokens exceeded the maximum limit of ${maxTokens}.`,
|
|
});
|
|
}
|
|
|
|
return {
|
|
content,
|
|
}
|
|
}
|
|
);
|
|
|
|
server.tool(
|
|
"list_repos",
|
|
"Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.",
|
|
listReposRequestSchema.shape,
|
|
async ({ query, pageNumber = 1, limit = 50 }: {
|
|
query?: string;
|
|
pageNumber?: number;
|
|
limit?: number;
|
|
}) => {
|
|
const response = await listRepos();
|
|
if (isServiceError(response)) {
|
|
return {
|
|
content: [{
|
|
type: "text",
|
|
text: `Error listing repositories: ${response.message}`,
|
|
}],
|
|
};
|
|
}
|
|
|
|
// Apply query filter if provided
|
|
let filtered = response;
|
|
if (query) {
|
|
const lowerQuery = query.toLowerCase();
|
|
filtered = response.filter(repo =>
|
|
repo.repoName.toLowerCase().includes(lowerQuery) ||
|
|
repo.repoDisplayName?.toLowerCase().includes(lowerQuery)
|
|
);
|
|
}
|
|
|
|
// Sort alphabetically for consistent pagination
|
|
filtered.sort((a, b) => a.repoName.localeCompare(b.repoName));
|
|
|
|
// Apply pagination
|
|
const startIndex = (pageNumber - 1) * limit;
|
|
const endIndex = startIndex + limit;
|
|
const paginated = filtered.slice(startIndex, endIndex);
|
|
|
|
// Format output
|
|
const content: TextContent[] = paginated.map(repo => {
|
|
return {
|
|
type: "text",
|
|
text: `id: ${repo.repoName}\nurl: ${repo.webUrl}`,
|
|
}
|
|
});
|
|
|
|
// Add pagination info
|
|
if (content.length === 0 && filtered.length > 0) {
|
|
content.push({
|
|
type: "text",
|
|
text: `No results on page ${pageNumber}. Total matching repositories: ${filtered.length}`,
|
|
});
|
|
} else if (filtered.length > endIndex) {
|
|
content.push({
|
|
type: "text",
|
|
text: `Showing ${paginated.length} repositories (page ${pageNumber}). Total matching: ${filtered.length}. Use pageNumber ${pageNumber + 1} to see more.`,
|
|
});
|
|
}
|
|
|
|
return {
|
|
content,
|
|
};
|
|
}
|
|
);
|
|
|
|
server.tool(
|
|
"get_file_source",
|
|
"Fetches the source code for a given file. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.",
|
|
{
|
|
fileName: z.string().describe("The file to fetch the source code for."),
|
|
repoId: z.string().describe("The repository to fetch the source code for. This is the Sourcebot compatible repository ID."),
|
|
},
|
|
async ({ fileName, repoId }) => {
|
|
const response = await getFileSource({
|
|
fileName,
|
|
repository: repoId,
|
|
});
|
|
|
|
if (isServiceError(response)) {
|
|
return {
|
|
content: [{
|
|
type: "text",
|
|
text: `Error fetching file source: ${response.message}`,
|
|
}],
|
|
};
|
|
}
|
|
|
|
const content: TextContent[] = [{
|
|
type: "text",
|
|
text: `file: ${fileName}\nrepository: ${repoId}\nlanguage: ${response.language}\nsource:\n${response.source}`,
|
|
}]
|
|
|
|
return {
|
|
content,
|
|
};
|
|
}
|
|
);
|
|
|
|
|
|
|
|
const runServer = async () => {
|
|
const transport = new StdioServerTransport();
|
|
await server.connect(transport);
|
|
}
|
|
|
|
runServer().catch((error) => {
|
|
console.error('Failed to start MCP server:', error);
|
|
process.exit(1);
|
|
});
|