fix(worker): Remove setting remote.origin.url for remote git repositories (#483)

This commit is contained in:
Brendan Kellam 2025-08-31 13:52:51 -04:00 committed by GitHub
parent ca9069e0fa
commit 2b423ba7e9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 122 additions and 60 deletions

View file

@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Fixed
- Remove setting `remote.origin.url` for remote git repositories. [#483](https://github.com/sourcebot-dev/sourcebot/pull/483)
### Changed ### Changed
- Updated NextJS to version 15. [#477](https://github.com/sourcebot-dev/sourcebot/pull/477) - Updated NextJS to version 15. [#477](https://github.com/sourcebot-dev/sourcebot/pull/477)
- Add `sessionToken` as optional Bedrock configuration parameter. [#478](https://github.com/sourcebot-dev/sourcebot/pull/478) - Add `sessionToken` as optional Bedrock configuration parameter. [#478](https://github.com/sourcebot-dev/sourcebot/pull/478)

View file

@ -43,6 +43,7 @@ export const env = createEnv({
LOGTAIL_TOKEN: z.string().optional(), LOGTAIL_TOKEN: z.string().optional(),
LOGTAIL_HOST: z.string().url().optional(), LOGTAIL_HOST: z.string().url().optional(),
SOURCEBOT_LOG_LEVEL: z.enum(["info", "debug", "warn", "error"]).default("info"),
DATABASE_URL: z.string().url().default("postgresql://postgres:postgres@localhost:5432/postgres"), DATABASE_URL: z.string().url().default("postgresql://postgres:postgres@localhost:5432/postgres"),
CONFIG_PATH: z.string().optional(), CONFIG_PATH: z.string().optional(),

View file

@ -1,53 +1,72 @@
import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git'; import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git';
import { mkdir } from 'node:fs/promises';
import { env } from './env.js';
type onProgressFn = (event: SimpleGitProgressEvent) => void; type onProgressFn = (event: SimpleGitProgressEvent) => void;
export const cloneRepository = async (cloneURL: string, path: string, onProgress?: onProgressFn) => { export const cloneRepository = async (
const git = simpleGit({ remoteUrl: URL,
progress: onProgress, path: string,
}); onProgress?: onProgressFn
) => {
try { try {
await git.clone( await mkdir(path, { recursive: true });
cloneURL,
path,
[
"--bare",
]
);
await git.cwd({ const git = simpleGit({
progress: onProgress,
}).cwd({
path, path,
}).addConfig("remote.origin.fetch", "+refs/heads/*:refs/heads/*"); })
await git.init(/*bare = */ true);
await git.fetch([
remoteUrl.toString(),
// See https://git-scm.com/book/en/v2/Git-Internals-The-Refspec
"+refs/heads/*:refs/heads/*",
"--progress",
]);
} catch (error: unknown) { } catch (error: unknown) {
if (error instanceof Error) { const baseLog = `Failed to clone repository: ${path}`;
throw new Error(`Failed to clone repository: ${error.message}`);
if (env.SOURCEBOT_LOG_LEVEL !== "debug") {
// Avoid printing the remote URL (that may contain credentials) to logs by default.
throw new Error(`${baseLog}. Set environment variable SOURCEBOT_LOG_LEVEL=debug to see the full error message.`);
} else if (error instanceof Error) {
throw new Error(`${baseLog}. Reason: ${error.message}`);
} else { } else {
throw new Error(`Failed to clone repository: ${error}`); throw new Error(`${baseLog}. Error: ${error}`);
} }
} }
} };
export const fetchRepository = async (path: string, onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
});
export const fetchRepository = async (
remoteUrl: URL,
path: string,
onProgress?: onProgressFn
) => {
try { try {
await git.cwd({ const git = simpleGit({
progress: onProgress,
}).cwd({
path: path, path: path,
}).fetch( })
"origin",
[ await git.fetch([
"--prune", remoteUrl.toString(),
"--progress" "+refs/heads/*:refs/heads/*",
] "--prune",
); "--progress"
]);
} catch (error: unknown) { } catch (error: unknown) {
if (error instanceof Error) { const baseLog = `Failed to fetch repository: ${path}`;
throw new Error(`Failed to fetch repository ${path}: ${error.message}`); if (env.SOURCEBOT_LOG_LEVEL !== "debug") {
// Avoid printing the remote URL (that may contain credentials) to logs by default.
throw new Error(`${baseLog}. Set environment variable SOURCEBOT_LOG_LEVEL=debug to see the full error message.`);
} else if (error instanceof Error) {
throw new Error(`${baseLog}. Reason: ${error.message}`);
} else { } else {
throw new Error(`Failed to fetch repository ${path}: ${error}`); throw new Error(`${baseLog}. Error: ${error}`);
} }
} }
} }
@ -76,6 +95,28 @@ export const upsertGitConfig = async (path: string, gitConfig: Record<string, st
} }
} }
/**
* Unsets the specified keys in the git config for the repo at the given path.
* If a key is not set, this is a no-op.
*/
export const unsetGitConfig = async (path: string, keys: string[], onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
}).cwd(path);
try {
for (const key of keys) {
await git.raw(['config', '--unset', key]);
}
} catch (error: unknown) {
if (error instanceof Error) {
throw new Error(`Failed to unset git config ${path}: ${error.message}`);
} else {
throw new Error(`Failed to unset git config ${path}: ${error}`);
}
}
}
/** /**
* Returns true if `path` is the _root_ of a git repository. * Returns true if `path` is the _root_ of a git repository.
*/ */

View file

@ -5,7 +5,7 @@ import { Connection, PrismaClient, Repo, RepoToConnection, RepoIndexingStatus, S
import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig, BitbucketConnectionConfig } from '@sourcebot/schemas/v3/connection.type'; import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig, BitbucketConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { AppContext, Settings, repoMetadataSchema } from "./types.js"; import { AppContext, Settings, repoMetadataSchema } from "./types.js";
import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./utils.js"; import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./utils.js";
import { cloneRepository, fetchRepository, upsertGitConfig } from "./git.js"; import { cloneRepository, fetchRepository, unsetGitConfig, upsertGitConfig } from "./git.js";
import { existsSync, readdirSync, promises } from 'fs'; import { existsSync, readdirSync, promises } from 'fs';
import { indexGitRepository } from "./zoekt.js"; import { indexGitRepository } from "./zoekt.js";
import { PromClient } from './promClient.js'; import { PromClient } from './promClient.js';
@ -237,12 +237,39 @@ export class RepoManager implements IRepoManager {
await promises.rm(repoPath, { recursive: true, force: true }); await promises.rm(repoPath, { recursive: true, force: true });
} }
if (existsSync(repoPath) && !isReadOnly) { const credentials = await this.getCloneCredentialsForRepo(repo, this.db);
logger.info(`Fetching ${repo.displayName}...`); const remoteUrl = new URL(repo.cloneUrl);
if (credentials) {
// @note: URL has a weird behavior where if you set the password but
// _not_ the username, the ":" delimiter will still be present in the
// URL (e.g., https://:password@example.com). To get around this, if
// we only have a password, we set the username to the password.
// @see: https://www.typescriptlang.org/play/?#code/MYewdgzgLgBArgJwDYwLwzAUwO4wKoBKAMgBQBEAFlFAA4QBcA9I5gB4CGAtjUpgHShOZADQBKANwAoREj412ECNhAIAJmhhl5i5WrJTQkELz5IQAcxIy+UEAGUoCAJZhLo0UA
if (!credentials.username) {
remoteUrl.username = credentials.password;
} else {
remoteUrl.username = credentials.username;
remoteUrl.password = credentials.password;
}
}
const { durationMs } = await measure(() => fetchRepository(repoPath, ({ method, stage, progress }) => { if (existsSync(repoPath) && !isReadOnly) {
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) // @NOTE: in #483, we changed the cloning method s.t., we _no longer_
})); // write the clone URL (which could contain a auth token) to the
// `remote.origin.url` entry. For the upgrade scenario, we want
// to unset this key since it is no longer needed, hence this line.
// This will no-op if the key is already unset.
// @see: https://github.com/sourcebot-dev/sourcebot/pull/483
await unsetGitConfig(repoPath, ["remote.origin.url"]);
logger.info(`Fetching ${repo.displayName}...`);
const { durationMs } = await measure(() => fetchRepository(
remoteUrl,
repoPath,
({ method, stage, progress }) => {
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
}
));
const fetchDuration_s = durationMs / 1000; const fetchDuration_s = durationMs / 1000;
process.stdout.write('\n'); process.stdout.write('\n');
@ -251,25 +278,13 @@ export class RepoManager implements IRepoManager {
} else if (!isReadOnly) { } else if (!isReadOnly) {
logger.info(`Cloning ${repo.displayName}...`); logger.info(`Cloning ${repo.displayName}...`);
const auth = await this.getCloneCredentialsForRepo(repo, this.db); const { durationMs } = await measure(() => cloneRepository(
const cloneUrl = new URL(repo.cloneUrl); remoteUrl,
if (auth) { repoPath,
// @note: URL has a weird behavior where if you set the password but ({ method, stage, progress }) => {
// _not_ the username, the ":" delimiter will still be present in the logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
// URL (e.g., https://:password@example.com). To get around this, if
// we only have a password, we set the username to the password.
// @see: https://www.typescriptlang.org/play/?#code/MYewdgzgLgBArgJwDYwLwzAUwO4wKoBKAMgBQBEAFlFAA4QBcA9I5gB4CGAtjUpgHShOZADQBKANwAoREj412ECNhAIAJmhhl5i5WrJTQkELz5IQAcxIy+UEAGUoCAJZhLo0UA
if (!auth.username) {
cloneUrl.username = auth.password;
} else {
cloneUrl.username = auth.username;
cloneUrl.password = auth.password;
} }
} ));
const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, ({ method, stage, progress }) => {
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
}));
const cloneDuration_s = durationMs / 1000; const cloneDuration_s = durationMs / 1000;
process.stdout.write('\n'); process.stdout.write('\n');
@ -540,7 +555,7 @@ export class RepoManager implements IRepoManager {
public async validateIndexedReposHaveShards() { public async validateIndexedReposHaveShards() {
logger.info('Validating indexed repos have shards...'); logger.info('Validating indexed repos have shards...');
const indexedRepos = await this.db.repo.findMany({ const indexedRepos = await this.db.repo.findMany({
where: { where: {
repoIndexingStatus: RepoIndexingStatus.INDEXED repoIndexingStatus: RepoIndexingStatus.INDEXED
@ -556,7 +571,7 @@ export class RepoManager implements IRepoManager {
const reposToReindex: number[] = []; const reposToReindex: number[] = [];
for (const repo of indexedRepos) { for (const repo of indexedRepos) {
const shardPrefix = getShardPrefix(repo.orgId, repo.id); const shardPrefix = getShardPrefix(repo.orgId, repo.id);
// TODO: this doesn't take into account if a repo has multiple shards and only some of them are missing. To support that, this logic // TODO: this doesn't take into account if a repo has multiple shards and only some of them are missing. To support that, this logic
// would need to know how many total shards are expected for this repo // would need to know how many total shards are expected for this repo
let hasShards = false; let hasShards = false;

View file

@ -45,6 +45,7 @@ export const CodePreviewPanel = async ({ path, repoName, revisionName, domain }:
displayName: repoInfoResponse.displayName, displayName: repoInfoResponse.displayName,
webUrl: repoInfoResponse.webUrl, webUrl: repoInfoResponse.webUrl,
}} }}
branchDisplayName={revisionName}
/> />
{(fileSourceResponse.webUrl && codeHostInfo) && ( {(fileSourceResponse.webUrl && codeHostInfo) && (
<a <a

View file

@ -40,6 +40,7 @@ export const TreePreviewPanel = async ({ path, repoName, revisionName, domain }:
}} }}
pathType="tree" pathType="tree"
isFileIconVisible={false} isFileIconVisible={false}
branchDisplayName={revisionName}
/> />
</div> </div>
<Separator /> <Separator />