fix(backend): Limit concurrent git operations to prevent resource exhaustion (#590)

When syncing generic-git-host connections with thousands of repositories,
unbounded Promise.all caused resource exhaustion (EAGAIN errors) by spawning
too many concurrent git processes. This resulted in valid repositories being
incorrectly skipped during sync.

- Add p-limit to control concurrent git operations (max 100)
- Follow existing pattern from github.ts for consistency
- Prevents file descriptor and process limit exhaustion
- Uses rolling concurrency to avoid head-of-line blocking

Fixes #590
This commit is contained in:
Derek Miller 2025-10-31 20:42:12 -05:00
parent d1655d4587
commit 0ea0b156a5
2 changed files with 9 additions and 2 deletions

View file

@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixed "The account is already associated with another user" errors with GitLab oauth provider. [#584](https://github.com/sourcebot-dev/sourcebot/pull/584) - Fixed "The account is already associated with another user" errors with GitLab oauth provider. [#584](https://github.com/sourcebot-dev/sourcebot/pull/584)
- Fixed error when viewing a generic git connection in `/settings/connections`. [#588](https://github.com/sourcebot-dev/sourcebot/pull/588) - Fixed error when viewing a generic git connection in `/settings/connections`. [#588](https://github.com/sourcebot-dev/sourcebot/pull/588)
- Fixed issue with an unbounded `Promise.allSettled(...)` when retrieving details from the GitHub API about a large number of repositories (or orgs or users). [#591](https://github.com/sourcebot-dev/sourcebot/pull/591) - Fixed issue with an unbounded `Promise.allSettled(...)` when retrieving details from the GitHub API about a large number of repositories (or orgs or users). [#591](https://github.com/sourcebot-dev/sourcebot/pull/591)
- Fixed resource exhaustion (EAGAIN errors) when syncing generic-git-host connections with thousands of repositories. [#593](https://github.com/sourcebot-dev/sourcebot/pull/593)
## Removed ## Removed
- Removed built-in secret manager. [#592](https://github.com/sourcebot-dev/sourcebot/pull/592) - Removed built-in secret manager. [#592](https://github.com/sourcebot-dev/sourcebot/pull/592)

View file

@ -20,11 +20,17 @@ import assert from 'assert';
import GitUrlParse from 'git-url-parse'; import GitUrlParse from 'git-url-parse';
import { RepoMetadata } from '@sourcebot/shared'; import { RepoMetadata } from '@sourcebot/shared';
import { SINGLE_TENANT_ORG_ID } from './constants.js'; import { SINGLE_TENANT_ORG_ID } from './constants.js';
import pLimit from 'p-limit';
export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>; export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;
const logger = createLogger('repo-compile-utils'); const logger = createLogger('repo-compile-utils');
// Limit concurrent git operations to prevent resource exhaustion (EAGAIN errors)
// when processing thousands of repositories simultaneously
const MAX_CONCURRENT_GIT_OPERATIONS = 100;
const gitOperationLimit = pLimit(MAX_CONCURRENT_GIT_OPERATIONS);
type CompileResult = { type CompileResult = {
repoData: RepoData[], repoData: RepoData[],
warnings: string[], warnings: string[],
@ -472,7 +478,7 @@ export const compileGenericGitHostConfig_file = async (
const repos: RepoData[] = []; const repos: RepoData[] = [];
const warnings: string[] = []; const warnings: string[] = [];
await Promise.all(repoPaths.map(async (repoPath) => { await Promise.all(repoPaths.map((repoPath) => gitOperationLimit(async () => {
const isGitRepo = await isPathAValidGitRepoRoot({ const isGitRepo = await isPathAValidGitRepoRoot({
path: repoPath, path: repoPath,
}); });
@ -526,7 +532,7 @@ export const compileGenericGitHostConfig_file = async (
} }
repos.push(repo); repos.push(repo);
})); })));
return { return {
repoData: repos, repoData: repos,