From 2b423ba7e96cadd33999cde201eed5319e6f0a35 Mon Sep 17 00:00:00 2001 From: Brendan Kellam Date: Sun, 31 Aug 2025 13:52:51 -0400 Subject: [PATCH] fix(worker): Remove setting `remote.origin.url` for remote git repositories (#483) --- CHANGELOG.md | 3 + packages/backend/src/env.ts | 1 + packages/backend/src/git.ts | 109 ++++++++++++------ packages/backend/src/repoManager.ts | 67 ++++++----- .../[...path]/components/codePreviewPanel.tsx | 1 + .../[...path]/components/treePreviewPanel.tsx | 1 + 6 files changed, 122 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3d3d717..40f3eaf0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- Remove setting `remote.origin.url` for remote git repositories. [#483](https://github.com/sourcebot-dev/sourcebot/pull/483) + ### Changed - Updated NextJS to version 15. [#477](https://github.com/sourcebot-dev/sourcebot/pull/477) - Add `sessionToken` as optional Bedrock configuration parameter. [#478](https://github.com/sourcebot-dev/sourcebot/pull/478) diff --git a/packages/backend/src/env.ts b/packages/backend/src/env.ts index 735361a8..0a533db0 100644 --- a/packages/backend/src/env.ts +++ b/packages/backend/src/env.ts @@ -43,6 +43,7 @@ export const env = createEnv({ LOGTAIL_TOKEN: z.string().optional(), LOGTAIL_HOST: z.string().url().optional(), + SOURCEBOT_LOG_LEVEL: z.enum(["info", "debug", "warn", "error"]).default("info"), DATABASE_URL: z.string().url().default("postgresql://postgres:postgres@localhost:5432/postgres"), CONFIG_PATH: z.string().optional(), diff --git a/packages/backend/src/git.ts b/packages/backend/src/git.ts index d3dfc76f..d35fd2a2 100644 --- a/packages/backend/src/git.ts +++ b/packages/backend/src/git.ts @@ -1,53 +1,72 @@ import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git'; +import { mkdir } from 'node:fs/promises'; +import { env } from './env.js'; type onProgressFn = (event: SimpleGitProgressEvent) => void; -export const cloneRepository = async (cloneURL: string, path: string, onProgress?: onProgressFn) => { - const git = simpleGit({ - progress: onProgress, - }); +export const cloneRepository = async ( + remoteUrl: URL, + path: string, + onProgress?: onProgressFn +) => { try { - await git.clone( - cloneURL, - path, - [ - "--bare", - ] - ); + await mkdir(path, { recursive: true }); - await git.cwd({ + const git = simpleGit({ + progress: onProgress, + }).cwd({ path, - }).addConfig("remote.origin.fetch", "+refs/heads/*:refs/heads/*"); + }) + + await git.init(/*bare = */ true); + + await git.fetch([ + remoteUrl.toString(), + // See https://git-scm.com/book/en/v2/Git-Internals-The-Refspec + "+refs/heads/*:refs/heads/*", + "--progress", + ]); } catch (error: unknown) { - if (error instanceof Error) { - throw new Error(`Failed to clone repository: ${error.message}`); + const baseLog = `Failed to clone repository: ${path}`; + + if (env.SOURCEBOT_LOG_LEVEL !== "debug") { + // Avoid printing the remote URL (that may contain credentials) to logs by default. + throw new Error(`${baseLog}. Set environment variable SOURCEBOT_LOG_LEVEL=debug to see the full error message.`); + } else if (error instanceof Error) { + throw new Error(`${baseLog}. Reason: ${error.message}`); } else { - throw new Error(`Failed to clone repository: ${error}`); + throw new Error(`${baseLog}. Error: ${error}`); } } -} - - -export const fetchRepository = async (path: string, onProgress?: onProgressFn) => { - const git = simpleGit({ - progress: onProgress, - }); +}; +export const fetchRepository = async ( + remoteUrl: URL, + path: string, + onProgress?: onProgressFn +) => { try { - await git.cwd({ + const git = simpleGit({ + progress: onProgress, + }).cwd({ path: path, - }).fetch( - "origin", - [ - "--prune", - "--progress" - ] - ); + }) + + await git.fetch([ + remoteUrl.toString(), + "+refs/heads/*:refs/heads/*", + "--prune", + "--progress" + ]); } catch (error: unknown) { - if (error instanceof Error) { - throw new Error(`Failed to fetch repository ${path}: ${error.message}`); + const baseLog = `Failed to fetch repository: ${path}`; + if (env.SOURCEBOT_LOG_LEVEL !== "debug") { + // Avoid printing the remote URL (that may contain credentials) to logs by default. + throw new Error(`${baseLog}. Set environment variable SOURCEBOT_LOG_LEVEL=debug to see the full error message.`); + } else if (error instanceof Error) { + throw new Error(`${baseLog}. Reason: ${error.message}`); } else { - throw new Error(`Failed to fetch repository ${path}: ${error}`); + throw new Error(`${baseLog}. Error: ${error}`); } } } @@ -76,6 +95,28 @@ export const upsertGitConfig = async (path: string, gitConfig: Record { + const git = simpleGit({ + progress: onProgress, + }).cwd(path); + + try { + for (const key of keys) { + await git.raw(['config', '--unset', key]); + } + } catch (error: unknown) { + if (error instanceof Error) { + throw new Error(`Failed to unset git config ${path}: ${error.message}`); + } else { + throw new Error(`Failed to unset git config ${path}: ${error}`); + } + } +} + /** * Returns true if `path` is the _root_ of a git repository. */ diff --git a/packages/backend/src/repoManager.ts b/packages/backend/src/repoManager.ts index 87c1f345..d8e091ec 100644 --- a/packages/backend/src/repoManager.ts +++ b/packages/backend/src/repoManager.ts @@ -5,7 +5,7 @@ import { Connection, PrismaClient, Repo, RepoToConnection, RepoIndexingStatus, S import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig, BitbucketConnectionConfig } from '@sourcebot/schemas/v3/connection.type'; import { AppContext, Settings, repoMetadataSchema } from "./types.js"; import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./utils.js"; -import { cloneRepository, fetchRepository, upsertGitConfig } from "./git.js"; +import { cloneRepository, fetchRepository, unsetGitConfig, upsertGitConfig } from "./git.js"; import { existsSync, readdirSync, promises } from 'fs'; import { indexGitRepository } from "./zoekt.js"; import { PromClient } from './promClient.js'; @@ -237,12 +237,39 @@ export class RepoManager implements IRepoManager { await promises.rm(repoPath, { recursive: true, force: true }); } - if (existsSync(repoPath) && !isReadOnly) { - logger.info(`Fetching ${repo.displayName}...`); + const credentials = await this.getCloneCredentialsForRepo(repo, this.db); + const remoteUrl = new URL(repo.cloneUrl); + if (credentials) { + // @note: URL has a weird behavior where if you set the password but + // _not_ the username, the ":" delimiter will still be present in the + // URL (e.g., https://:password@example.com). To get around this, if + // we only have a password, we set the username to the password. + // @see: https://www.typescriptlang.org/play/?#code/MYewdgzgLgBArgJwDYwLwzAUwO4wKoBKAMgBQBEAFlFAA4QBcA9I5gB4CGAtjUpgHShOZADQBKANwAoREj412ECNhAIAJmhhl5i5WrJTQkELz5IQAcxIy+UEAGUoCAJZhLo0UA + if (!credentials.username) { + remoteUrl.username = credentials.password; + } else { + remoteUrl.username = credentials.username; + remoteUrl.password = credentials.password; + } + } - const { durationMs } = await measure(() => fetchRepository(repoPath, ({ method, stage, progress }) => { - logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) - })); + if (existsSync(repoPath) && !isReadOnly) { + // @NOTE: in #483, we changed the cloning method s.t., we _no longer_ + // write the clone URL (which could contain a auth token) to the + // `remote.origin.url` entry. For the upgrade scenario, we want + // to unset this key since it is no longer needed, hence this line. + // This will no-op if the key is already unset. + // @see: https://github.com/sourcebot-dev/sourcebot/pull/483 + await unsetGitConfig(repoPath, ["remote.origin.url"]); + + logger.info(`Fetching ${repo.displayName}...`); + const { durationMs } = await measure(() => fetchRepository( + remoteUrl, + repoPath, + ({ method, stage, progress }) => { + logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) + } + )); const fetchDuration_s = durationMs / 1000; process.stdout.write('\n'); @@ -251,25 +278,13 @@ export class RepoManager implements IRepoManager { } else if (!isReadOnly) { logger.info(`Cloning ${repo.displayName}...`); - const auth = await this.getCloneCredentialsForRepo(repo, this.db); - const cloneUrl = new URL(repo.cloneUrl); - if (auth) { - // @note: URL has a weird behavior where if you set the password but - // _not_ the username, the ":" delimiter will still be present in the - // URL (e.g., https://:password@example.com). To get around this, if - // we only have a password, we set the username to the password. - // @see: https://www.typescriptlang.org/play/?#code/MYewdgzgLgBArgJwDYwLwzAUwO4wKoBKAMgBQBEAFlFAA4QBcA9I5gB4CGAtjUpgHShOZADQBKANwAoREj412ECNhAIAJmhhl5i5WrJTQkELz5IQAcxIy+UEAGUoCAJZhLo0UA - if (!auth.username) { - cloneUrl.username = auth.password; - } else { - cloneUrl.username = auth.username; - cloneUrl.password = auth.password; + const { durationMs } = await measure(() => cloneRepository( + remoteUrl, + repoPath, + ({ method, stage, progress }) => { + logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) } - } - - const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, ({ method, stage, progress }) => { - logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) - })); + )); const cloneDuration_s = durationMs / 1000; process.stdout.write('\n'); @@ -540,7 +555,7 @@ export class RepoManager implements IRepoManager { public async validateIndexedReposHaveShards() { logger.info('Validating indexed repos have shards...'); - + const indexedRepos = await this.db.repo.findMany({ where: { repoIndexingStatus: RepoIndexingStatus.INDEXED @@ -556,7 +571,7 @@ export class RepoManager implements IRepoManager { const reposToReindex: number[] = []; for (const repo of indexedRepos) { const shardPrefix = getShardPrefix(repo.orgId, repo.id); - + // TODO: this doesn't take into account if a repo has multiple shards and only some of them are missing. To support that, this logic // would need to know how many total shards are expected for this repo let hasShards = false; diff --git a/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx b/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx index c25f9a33..3330514f 100644 --- a/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx +++ b/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx @@ -45,6 +45,7 @@ export const CodePreviewPanel = async ({ path, repoName, revisionName, domain }: displayName: repoInfoResponse.displayName, webUrl: repoInfoResponse.webUrl, }} + branchDisplayName={revisionName} /> {(fileSourceResponse.webUrl && codeHostInfo) && (