Add repo index validation (#339)

* add repo index validation

* add entry to changelog
This commit is contained in:
Michael Sukkarieh 2025-06-06 13:15:27 -07:00 committed by GitHub
parent 27fb5ad294
commit 0f3cdb7dd7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 62 additions and 4 deletions

View file

@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added seperate page for signup. [#311](https://github.com/sourcebot-dev/sourcebot/pull/331)
- Fix repo images in authed instance case and add manifest json. [#332](https://github.com/sourcebot-dev/sourcebot/pull/332)
- Added encryption logic for license keys. [#335](https://github.com/sourcebot-dev/sourcebot/pull/335)
- Added repo shard validation on startup. [#339](https://github.com/sourcebot-dev/sourcebot/pull/339)
- Added support for a file explorer when browsing files. [#336](https://github.com/sourcebot-dev/sourcebot/pull/336)
## [4.1.1] - 2025-06-03

View file

@ -68,5 +68,6 @@ export const main = async (db: PrismaClient, context: AppContext) => {
connectionManager.registerPollingCallback();
const repoManager = new RepoManager(db, settings, redis, promClient, context);
await repoManager.validateIndexedReposHaveShards();
await repoManager.blockingPollLoop();
}

View file

@ -12,6 +12,7 @@ import { PromClient } from './promClient.js';
import * as Sentry from "@sentry/node";
interface IRepoManager {
validateIndexedReposHaveShards: () => Promise<void>;
blockingPollLoop: () => void;
dispose: () => void;
}
@ -526,6 +527,61 @@ export class RepoManager implements IRepoManager {
}
}
///////////////////////////
// Repo index validation
///////////////////////////
public async validateIndexedReposHaveShards() {
logger.info('Validating indexed repos have shards...');
const indexedRepos = await this.db.repo.findMany({
where: {
repoIndexingStatus: RepoIndexingStatus.INDEXED
}
});
logger.info(`Found ${indexedRepos.length} repos in the DB marked as INDEXED`);
if (indexedRepos.length === 0) {
return;
}
const reposToReindex: number[] = [];
for (const repo of indexedRepos) {
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
// TODO: this doesn't take into account if a repo has multiple shards and only some of them are missing. To support that, this logic
// would need to know how many total shards are expected for this repo
let hasShards = false;
try {
const files = readdirSync(this.ctx.indexPath);
hasShards = files.some(file => file.startsWith(shardPrefix));
} catch (error) {
logger.error(`Failed to read index directory ${this.ctx.indexPath}: ${error}`);
continue;
}
if (!hasShards) {
logger.info(`Repo ${repo.displayName} (id: ${repo.id}) is marked as INDEXED but has no shards on disk. Marking for reindexing.`);
reposToReindex.push(repo.id);
}
}
if (reposToReindex.length > 0) {
await this.db.repo.updateMany({
where: {
id: { in: reposToReindex }
},
data: {
repoIndexingStatus: RepoIndexingStatus.NEW
}
});
logger.info(`Marked ${reposToReindex.length} repos for reindexing due to missing shards`);
}
logger.info('Done validating indexed repos have shards');
}
private async fetchAndScheduleRepoTimeouts() {
const repos = await this.db.repo.findMany({
where: {

View file

@ -214,10 +214,8 @@ const initSingleTenancy = async () => {
// Load any connections defined declaratively in the config file.
const configPath = env.CONFIG_PATH;
if (configPath) {
await syncDeclarativeConfig(configPath);
// If we're given a config file, mark the org as onboarded so we don't go through
// the UI conneciton onboarding flow
// the UI connection onboarding flow
await prisma.org.update({
where: {
id: SINGLE_TENANT_ORG_ID,
@ -226,7 +224,9 @@ const initSingleTenancy = async () => {
isOnboarded: true,
}
});
await syncDeclarativeConfig(configPath);
// watch for changes assuming it is a local file
if (!isRemotePath(configPath)) {
watch(configPath, () => {