sourcebot/packages/backend/src/zoekt.ts
Michael Sukkarieh 3b36ffa17e
Add support for structured logs (#323)
* wip on refactoring docs

* wip

* initial structured logs impl

* structured log docs

* create logger package

* add news entry for structured logging

* add logger package to dockerfile and cleanup

* add gh workflow for catching broken links

* further wip

* fix

* further wip on docs

* review feedback

* remove logger dep from mcp package

* fix build errors

* add back auth_url warning

* fix sidebar title consistency

---------

Co-authored-by: bkellam <bshizzle1234@gmail.com>
2025-06-02 11:16:01 -07:00

85 lines
2.6 KiB
TypeScript

import { exec } from "child_process";
import { AppContext, repoMetadataSchema, Settings } from "./types.js";
import { Repo } from "@sourcebot/db";
import { getRepoPath } from "./utils.js";
import { getShardPrefix } from "./utils.js";
import { getBranches, getTags } from "./git.js";
import micromatch from "micromatch";
import { createLogger } from "@sourcebot/logger";
import { captureEvent } from "./posthog.js";
const logger = createLogger('zoekt');
export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: AppContext) => {
let revisions = [
'HEAD'
];
const { path: repoPath } = getRepoPath(repo, ctx);
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const metadata = repoMetadataSchema.parse(repo.metadata);
if (metadata.branches) {
const branchGlobs = metadata.branches
const allBranches = await getBranches(repoPath);
const matchingBranches =
allBranches
.filter((branch) => micromatch.isMatch(branch, branchGlobs))
.map((branch) => `refs/heads/${branch}`);
revisions = [
...revisions,
...matchingBranches
];
}
if (metadata.tags) {
const tagGlobs = metadata.tags;
const allTags = await getTags(repoPath);
const matchingTags =
allTags
.filter((tag) => micromatch.isMatch(tag, tagGlobs))
.map((tag) => `refs/tags/${tag}`);
revisions = [
...revisions,
...matchingTags
];
}
// zoekt has a limit of 64 branches/tags to index.
if (revisions.length > 64) {
logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`);
captureEvent('backend_revisions_truncated', {
repoId: repo.id,
revisionCount: revisions.length,
});
revisions = revisions.slice(0, 64);
}
const command = [
'zoekt-git-index',
'-allow_missing_branches',
`-index ${ctx.indexPath}`,
`-max_trigram_count ${settings.maxTrigramCount}`,
`-file_limit ${settings.maxFileSize}`,
`-branches ${revisions.join(',')}`,
`-tenant_id ${repo.orgId}`,
`-repo_id ${repo.id}`,
`-shard_prefix ${shardPrefix}`,
repoPath
].join(' ');
return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
exec(command, (error, stdout, stderr) => {
if (error) {
reject(error);
return;
}
resolve({
stdout,
stderr
});
})
});
}