mirror of
https://github.com/sourcebot-dev/sourcebot.git
synced 2025-12-12 04:15:30 +00:00
wip
This commit is contained in:
parent
775b87a06c
commit
32c68e7b72
2 changed files with 67 additions and 59 deletions
|
|
@ -1,17 +1,15 @@
|
||||||
import { createBullBoard } from '@bull-board/api';
|
|
||||||
import { ExpressAdapter } from '@bull-board/express';
|
|
||||||
import * as Sentry from '@sentry/node';
|
import * as Sentry from '@sentry/node';
|
||||||
import { PrismaClient, Repo, RepoJobStatus, RepoJobType } from "@sourcebot/db";
|
import { PrismaClient, Repo, RepoJobStatus, RepoJobType } from "@sourcebot/db";
|
||||||
import { createLogger, Logger } from "@sourcebot/logger";
|
import { createLogger, Logger } from "@sourcebot/logger";
|
||||||
import express from 'express';
|
|
||||||
import { BullBoardGroupMQAdapter, Job, Queue, ReservedJob, Worker } from "groupmq";
|
|
||||||
import { Redis } from 'ioredis';
|
|
||||||
import { AppContext, repoMetadataSchema, RepoWithConnections, Settings } from "./types.js";
|
|
||||||
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, measure } from './utils.js';
|
|
||||||
import { existsSync } from 'fs';
|
import { existsSync } from 'fs';
|
||||||
|
import { readdir, rm } from 'fs/promises';
|
||||||
|
import { Job, Queue, ReservedJob, Worker } from "groupmq";
|
||||||
|
import { Redis } from 'ioredis';
|
||||||
|
import { env } from './env.js';
|
||||||
import { cloneRepository, fetchRepository, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
|
import { cloneRepository, fetchRepository, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
|
||||||
|
import { AppContext, repoMetadataSchema, RepoWithConnections, Settings } from "./types.js";
|
||||||
|
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure } from './utils.js';
|
||||||
import { indexGitRepository } from './zoekt.js';
|
import { indexGitRepository } from './zoekt.js';
|
||||||
import { rm, readdir } from 'fs/promises';
|
|
||||||
|
|
||||||
const LOG_TAG = 'index-syncer';
|
const LOG_TAG = 'index-syncer';
|
||||||
const logger = createLogger(LOG_TAG);
|
const logger = createLogger(LOG_TAG);
|
||||||
|
|
@ -26,17 +24,6 @@ type JobPayload = {
|
||||||
|
|
||||||
const JOB_TIMEOUT_MS = 1000 * 60 * 60 * 6; // 6 hour indexing timeout
|
const JOB_TIMEOUT_MS = 1000 * 60 * 60 * 6; // 6 hour indexing timeout
|
||||||
|
|
||||||
|
|
||||||
const groupmqLifecycleExceptionWrapper = async (name: string, fn: () => Promise<void>) => {
|
|
||||||
try {
|
|
||||||
await fn();
|
|
||||||
} catch (error) {
|
|
||||||
Sentry.captureException(error);
|
|
||||||
logger.error(`Exception thrown while executing lifecycle function \`${name}\`.`, error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
export class IndexSyncer {
|
export class IndexSyncer {
|
||||||
private interval?: NodeJS.Timeout;
|
private interval?: NodeJS.Timeout;
|
||||||
private queue: Queue<JobPayload>;
|
private queue: Queue<JobPayload>;
|
||||||
|
|
@ -52,8 +39,10 @@ export class IndexSyncer {
|
||||||
redis,
|
redis,
|
||||||
namespace: 'index-sync-queue',
|
namespace: 'index-sync-queue',
|
||||||
jobTimeoutMs: JOB_TIMEOUT_MS,
|
jobTimeoutMs: JOB_TIMEOUT_MS,
|
||||||
logger,
|
maxAttempts: 3,
|
||||||
maxAttempts: 1,
|
...(env.SOURCEBOT_LOG_LEVEL === 'debug' ? {
|
||||||
|
logger,
|
||||||
|
}: {}),
|
||||||
});
|
});
|
||||||
|
|
||||||
this.worker = new Worker<JobPayload>({
|
this.worker = new Worker<JobPayload>({
|
||||||
|
|
@ -61,25 +50,15 @@ export class IndexSyncer {
|
||||||
maxStalledCount: 1,
|
maxStalledCount: 1,
|
||||||
handler: this.runJob.bind(this),
|
handler: this.runJob.bind(this),
|
||||||
concurrency: this.settings.maxRepoIndexingJobConcurrency,
|
concurrency: this.settings.maxRepoIndexingJobConcurrency,
|
||||||
logger,
|
...(env.SOURCEBOT_LOG_LEVEL === 'debug' ? {
|
||||||
|
logger,
|
||||||
|
}: {}),
|
||||||
});
|
});
|
||||||
|
|
||||||
this.worker.on('completed', this.onJobCompleted.bind(this));
|
this.worker.on('completed', this.onJobCompleted.bind(this));
|
||||||
this.worker.on('failed', this.onJobFailed.bind(this));
|
this.worker.on('failed', this.onJobFailed.bind(this));
|
||||||
this.worker.on('stalled', this.onJobStalled.bind(this));
|
this.worker.on('stalled', this.onJobStalled.bind(this));
|
||||||
this.worker.on('error', this.onWorkerError.bind(this));
|
this.worker.on('error', this.onWorkerError.bind(this));
|
||||||
|
|
||||||
// @nocheckin
|
|
||||||
const app = express();
|
|
||||||
const serverAdapter = new ExpressAdapter();
|
|
||||||
|
|
||||||
createBullBoard({
|
|
||||||
queues: [new BullBoardGroupMQAdapter(this.queue, { displayName: 'Index Sync' })],
|
|
||||||
serverAdapter,
|
|
||||||
});
|
|
||||||
|
|
||||||
app.use('/', serverAdapter.getRouter());
|
|
||||||
app.listen(3070);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public async startScheduler() {
|
public async startScheduler() {
|
||||||
|
|
@ -215,7 +194,7 @@ export class IndexSyncer {
|
||||||
|
|
||||||
for (const job of jobs) {
|
for (const job of jobs) {
|
||||||
await this.queue.add({
|
await this.queue.add({
|
||||||
groupId: `repo:${job.repoId}`,
|
groupId: `repo:${job.repoId}_${job.repo.name}`,
|
||||||
data: {
|
data: {
|
||||||
jobId: job.id,
|
jobId: job.id,
|
||||||
type,
|
type,
|
||||||
|
|
@ -230,7 +209,7 @@ export class IndexSyncer {
|
||||||
private async runJob(job: ReservedJob<JobPayload>) {
|
private async runJob(job: ReservedJob<JobPayload>) {
|
||||||
const id = job.data.jobId;
|
const id = job.data.jobId;
|
||||||
const logger = createJobLogger(id);
|
const logger = createJobLogger(id);
|
||||||
logger.info(`Running job ${id} for repo ${job.data.repoName}`);
|
logger.info(`Running ${job.data.type} job ${id} for repo ${job.data.repoName} (id: ${job.data.repoId}) (attempt ${job.attempts + 1} / ${job.maxAttempts})`);
|
||||||
|
|
||||||
const { repo, type: jobType } = await this.db.repoJob.update({
|
const { repo, type: jobType } = await this.db.repoJob.update({
|
||||||
where: {
|
where: {
|
||||||
|
|
@ -286,35 +265,35 @@ export class IndexSyncer {
|
||||||
// @see: https://github.com/sourcebot-dev/sourcebot/pull/483
|
// @see: https://github.com/sourcebot-dev/sourcebot/pull/483
|
||||||
await unsetGitConfig(repoPath, ["remote.origin.url"]);
|
await unsetGitConfig(repoPath, ["remote.origin.url"]);
|
||||||
|
|
||||||
logger.info(`Fetching ${repo.displayName}...`);
|
logger.info(`Fetching ${repo.name} (id: ${repo.id})...`);
|
||||||
const { durationMs } = await measure(() => fetchRepository({
|
const { durationMs } = await measure(() => fetchRepository({
|
||||||
cloneUrl: cloneUrlMaybeWithToken,
|
cloneUrl: cloneUrlMaybeWithToken,
|
||||||
authHeader,
|
authHeader,
|
||||||
path: repoPath,
|
path: repoPath,
|
||||||
onProgress: ({ method, stage, progress }) => {
|
onProgress: ({ method, stage, progress }) => {
|
||||||
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
|
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.name} (id: ${repo.id})`)
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
const fetchDuration_s = durationMs / 1000;
|
const fetchDuration_s = durationMs / 1000;
|
||||||
|
|
||||||
process.stdout.write('\n');
|
process.stdout.write('\n');
|
||||||
logger.info(`Fetched ${repo.displayName} in ${fetchDuration_s}s`);
|
logger.info(`Fetched ${repo.name} (id: ${repo.id}) in ${fetchDuration_s}s`);
|
||||||
|
|
||||||
} else if (!isReadOnly) {
|
} else if (!isReadOnly) {
|
||||||
logger.info(`Cloning ${repo.displayName}...`);
|
logger.info(`Cloning ${repo.name} (id: ${repo.id})...`);
|
||||||
|
|
||||||
const { durationMs } = await measure(() => cloneRepository({
|
const { durationMs } = await measure(() => cloneRepository({
|
||||||
cloneUrl: cloneUrlMaybeWithToken,
|
cloneUrl: cloneUrlMaybeWithToken,
|
||||||
authHeader,
|
authHeader,
|
||||||
path: repoPath,
|
path: repoPath,
|
||||||
onProgress: ({ method, stage, progress }) => {
|
onProgress: ({ method, stage, progress }) => {
|
||||||
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
|
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.name} (id: ${repo.id})`)
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
const cloneDuration_s = durationMs / 1000;
|
const cloneDuration_s = durationMs / 1000;
|
||||||
|
|
||||||
process.stdout.write('\n');
|
process.stdout.write('\n');
|
||||||
logger.info(`Cloned ${repo.displayName} in ${cloneDuration_s}s`);
|
logger.info(`Cloned ${repo.name} (id: ${repo.id}) in ${cloneDuration_s}s`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Regardless of clone or fetch, always upsert the git config for the repo.
|
// Regardless of clone or fetch, always upsert the git config for the repo.
|
||||||
|
|
@ -324,10 +303,10 @@ export class IndexSyncer {
|
||||||
await upsertGitConfig(repoPath, metadata.gitConfig);
|
await upsertGitConfig(repoPath, metadata.gitConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(`Indexing ${repo.displayName}...`);
|
logger.info(`Indexing ${repo.name} (id: ${repo.id})...`);
|
||||||
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
|
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
|
||||||
const indexDuration_s = durationMs / 1000;
|
const indexDuration_s = durationMs / 1000;
|
||||||
logger.info(`Indexed ${repo.displayName} in ${indexDuration_s}s`);
|
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
|
||||||
}
|
}
|
||||||
|
|
||||||
private async cleanupRepository(repo: Repo, logger: Logger) {
|
private async cleanupRepository(repo: Repo, logger: Logger) {
|
||||||
|
|
@ -347,7 +326,7 @@ export class IndexSyncer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private onJobCompleted = async (job: Job<JobPayload>) =>
|
private onJobCompleted = async (job: Job<JobPayload>) =>
|
||||||
groupmqLifecycleExceptionWrapper('onJobCompleted', async () => {
|
groupmqLifecycleExceptionWrapper('onJobCompleted', logger, async () => {
|
||||||
const logger = createJobLogger(job.data.jobId);
|
const logger = createJobLogger(job.data.jobId);
|
||||||
const jobData = await this.db.repoJob.update({
|
const jobData = await this.db.repoJob.update({
|
||||||
where: { id: job.data.jobId },
|
where: { id: job.data.jobId },
|
||||||
|
|
@ -365,35 +344,47 @@ export class IndexSyncer {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
logger.info(`Completed index job ${job.data.jobId} for repo ${repo.name}`);
|
logger.info(`Completed index job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id})`);
|
||||||
}
|
}
|
||||||
else if (jobData.type === RepoJobType.CLEANUP) {
|
else if (jobData.type === RepoJobType.CLEANUP) {
|
||||||
const repo = await this.db.repo.delete({
|
const repo = await this.db.repo.delete({
|
||||||
where: { id: jobData.repoId },
|
where: { id: jobData.repoId },
|
||||||
});
|
});
|
||||||
|
|
||||||
logger.info(`Completed cleanup job ${job.data.jobId} for repo ${repo.name}`);
|
logger.info(`Completed cleanup job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id})`);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
private onJobFailed = async (job: Job<JobPayload>) =>
|
private onJobFailed = async (job: Job<JobPayload>) =>
|
||||||
groupmqLifecycleExceptionWrapper('onJobFailed', async () => {
|
groupmqLifecycleExceptionWrapper('onJobFailed', logger, async () => {
|
||||||
const logger = createJobLogger(job.data.jobId);
|
const logger = createJobLogger(job.data.jobId);
|
||||||
|
|
||||||
const { repo } = await this.db.repoJob.update({
|
const attempt = job.attemptsMade + 1;
|
||||||
where: { id: job.data.jobId },
|
const wasLastAttempt = attempt >= job.opts.attempts;
|
||||||
data: {
|
|
||||||
completedAt: new Date(),
|
|
||||||
errorMessage: job.failedReason,
|
|
||||||
},
|
|
||||||
select: { repo: true }
|
|
||||||
});
|
|
||||||
|
|
||||||
logger.error(`Failed job ${job.data.jobId} for repo ${repo.name}`);
|
if (wasLastAttempt) {
|
||||||
|
const { repo } = await this.db.repoJob.update({
|
||||||
|
where: { id: job.data.jobId },
|
||||||
|
data: {
|
||||||
|
status: RepoJobStatus.FAILED,
|
||||||
|
completedAt: new Date(),
|
||||||
|
errorMessage: job.failedReason,
|
||||||
|
},
|
||||||
|
select: { repo: true }
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.error(`Failed job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id}). Attempt ${attempt} / ${job.opts.attempts}. Failing job.`);
|
||||||
|
} else {
|
||||||
|
const repo = await this.db.repo.findUniqueOrThrow({
|
||||||
|
where: { id: job.data.repoId },
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.warn(`Failed job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id}). Attempt ${attempt} / ${job.opts.attempts}. Retrying.`);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
private onJobStalled = async (jobId: string) =>
|
private onJobStalled = async (jobId: string) =>
|
||||||
groupmqLifecycleExceptionWrapper('onJobStalled', async () => {
|
groupmqLifecycleExceptionWrapper('onJobStalled', logger, async () => {
|
||||||
const logger = createJobLogger(jobId);
|
const logger = createJobLogger(jobId);
|
||||||
const { repo } = await this.db.repoJob.update({
|
const { repo } = await this.db.repoJob.update({
|
||||||
where: { id: jobId },
|
where: { id: jobId },
|
||||||
|
|
@ -405,7 +396,7 @@ export class IndexSyncer {
|
||||||
select: { repo: true }
|
select: { repo: true }
|
||||||
});
|
});
|
||||||
|
|
||||||
logger.error(`Job ${jobId} stalled for repo ${repo.name}`);
|
logger.error(`Job ${jobId} stalled for repo ${repo.name} (id: ${repo.id})`);
|
||||||
});
|
});
|
||||||
|
|
||||||
private async onWorkerError(error: Error) {
|
private async onWorkerError(error: Error) {
|
||||||
|
|
|
||||||
|
|
@ -241,3 +241,20 @@ const createGitCloneUrlWithToken = (cloneUrl: string, credentials: { username?:
|
||||||
}
|
}
|
||||||
return url.toString();
|
return url.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps groupmq worker lifecycle callbacks with exception handling. This prevents
|
||||||
|
* uncaught exceptions (e.g., like a RepoJob not existing in the DB) from crashing
|
||||||
|
* the app.
|
||||||
|
* @see: https://openpanel-dev.github.io/groupmq/api-worker/#events
|
||||||
|
*/
|
||||||
|
export const groupmqLifecycleExceptionWrapper = async (name: string, logger: Logger, fn: () => Promise<void>) => {
|
||||||
|
try {
|
||||||
|
await fn();
|
||||||
|
} catch (error) {
|
||||||
|
Sentry.captureException(error);
|
||||||
|
logger.error(`Exception thrown while executing lifecycle function \`${name}\`.`, error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue