diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f593c1c..080f6489 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added seperate page for signup. [#311](https://github.com/sourcebot-dev/sourcebot/pull/331) - Fix repo images in authed instance case and add manifest json. [#332](https://github.com/sourcebot-dev/sourcebot/pull/332) - Added encryption logic for license keys. [#335](https://github.com/sourcebot-dev/sourcebot/pull/335) +- Added repo shard validation on startup. [#339](https://github.com/sourcebot-dev/sourcebot/pull/339) - Added support for a file explorer when browsing files. [#336](https://github.com/sourcebot-dev/sourcebot/pull/336) ## [4.1.1] - 2025-06-03 diff --git a/packages/backend/src/main.ts b/packages/backend/src/main.ts index 7a1aaad6..6806a4e3 100644 --- a/packages/backend/src/main.ts +++ b/packages/backend/src/main.ts @@ -68,5 +68,6 @@ export const main = async (db: PrismaClient, context: AppContext) => { connectionManager.registerPollingCallback(); const repoManager = new RepoManager(db, settings, redis, promClient, context); + await repoManager.validateIndexedReposHaveShards(); await repoManager.blockingPollLoop(); } diff --git a/packages/backend/src/repoManager.ts b/packages/backend/src/repoManager.ts index d2ae0503..8f16a9d0 100644 --- a/packages/backend/src/repoManager.ts +++ b/packages/backend/src/repoManager.ts @@ -12,6 +12,7 @@ import { PromClient } from './promClient.js'; import * as Sentry from "@sentry/node"; interface IRepoManager { + validateIndexedReposHaveShards: () => Promise; blockingPollLoop: () => void; dispose: () => void; } @@ -526,6 +527,61 @@ export class RepoManager implements IRepoManager { } } + /////////////////////////// + // Repo index validation + /////////////////////////// + + public async validateIndexedReposHaveShards() { + logger.info('Validating indexed repos have shards...'); + + const indexedRepos = await this.db.repo.findMany({ + where: { + repoIndexingStatus: RepoIndexingStatus.INDEXED + } + }); + logger.info(`Found ${indexedRepos.length} repos in the DB marked as INDEXED`); + + if (indexedRepos.length === 0) { + return; + } + + const reposToReindex: number[] = []; + + for (const repo of indexedRepos) { + const shardPrefix = getShardPrefix(repo.orgId, repo.id); + + // TODO: this doesn't take into account if a repo has multiple shards and only some of them are missing. To support that, this logic + // would need to know how many total shards are expected for this repo + let hasShards = false; + try { + const files = readdirSync(this.ctx.indexPath); + hasShards = files.some(file => file.startsWith(shardPrefix)); + } catch (error) { + logger.error(`Failed to read index directory ${this.ctx.indexPath}: ${error}`); + continue; + } + + if (!hasShards) { + logger.info(`Repo ${repo.displayName} (id: ${repo.id}) is marked as INDEXED but has no shards on disk. Marking for reindexing.`); + reposToReindex.push(repo.id); + } + } + + if (reposToReindex.length > 0) { + await this.db.repo.updateMany({ + where: { + id: { in: reposToReindex } + }, + data: { + repoIndexingStatus: RepoIndexingStatus.NEW + } + }); + logger.info(`Marked ${reposToReindex.length} repos for reindexing due to missing shards`); + } + + logger.info('Done validating indexed repos have shards'); + } + private async fetchAndScheduleRepoTimeouts() { const repos = await this.db.repo.findMany({ where: { diff --git a/packages/web/src/initialize.ts b/packages/web/src/initialize.ts index 5a9df2d8..bd120ab0 100644 --- a/packages/web/src/initialize.ts +++ b/packages/web/src/initialize.ts @@ -214,10 +214,8 @@ const initSingleTenancy = async () => { // Load any connections defined declaratively in the config file. const configPath = env.CONFIG_PATH; if (configPath) { - await syncDeclarativeConfig(configPath); - // If we're given a config file, mark the org as onboarded so we don't go through - // the UI conneciton onboarding flow + // the UI connection onboarding flow await prisma.org.update({ where: { id: SINGLE_TENANT_ORG_ID, @@ -226,7 +224,9 @@ const initSingleTenancy = async () => { isOnboarded: true, } }); - + + await syncDeclarativeConfig(configPath); + // watch for changes assuming it is a local file if (!isRemotePath(configPath)) { watch(configPath, () => {