Display name improvements (#259)

This commit is contained in:
Brendan Kellam 2025-04-02 17:50:48 -07:00 committed by GitHub
parent d55bf83ac1
commit bbd8b221d6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 140 additions and 63 deletions

16
.vscode/sourcebot.code-workspace vendored Normal file
View file

@ -0,0 +1,16 @@
{
"folders": [
{
"path": ".."
},
{
"path": "../vendor/zoekt"
}
],
"settings": {
"files.associations": {
"*.json": "jsonc",
"index.json": "json"
}
}
}

View file

@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixes
- Change connection manager upsert timeout to 5 minutes
- Fix issue with repo display names being poorly formatted, especially for gerrit. ([#259](https://github.com/sourcebot-dev/sourcebot/pull/259))
## [3.0.1] - 2025-04-01

View file

@ -1,21 +1,15 @@
import { simpleGit, SimpleGitProgressEvent } from 'simple-git';
export const cloneRepository = async (cloneURL: string, path: string, gitConfig?: Record<string, string>, onProgress?: (event: SimpleGitProgressEvent) => void) => {
export const cloneRepository = async (cloneURL: string, path: string, onProgress?: (event: SimpleGitProgressEvent) => void) => {
const git = simpleGit({
progress: onProgress,
});
const configParams = Object.entries(gitConfig ?? {}).flatMap(
([key, value]) => ['--config', `${key}=${value}`]
);
try {
await git.clone(
cloneURL,
path,
[
"--bare",
...configParams
]
);
@ -48,6 +42,26 @@ export const fetchRepository = async (path: string, onProgress?: (event: SimpleG
}
}
/**
* Applies the gitConfig to the repo at the given path. Note that this will
* override the values for any existing keys, and append new values for keys
* that do not exist yet. It will _not_ remove any existing keys that are not
* present in gitConfig.
*/
export const upsertGitConfig = async (path: string, gitConfig: Record<string, string>, onProgress?: (event: SimpleGitProgressEvent) => void) => {
const git = simpleGit({
progress: onProgress,
}).cwd(path);
try {
for (const [key, value] of Object.entries(gitConfig)) {
await git.addConfig(key, value);
}
} catch (error) {
throw new Error(`Failed to set git config ${path}`);
}
}
export const getBranches = async (path: string) => {
const git = simpleGit();
const branches = await git.cwd({

View file

@ -8,6 +8,7 @@ import { WithRequired } from "./types.js"
import { marshalBool } from "./utils.js";
import { GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { RepoMetadata } from './types.js';
import path from 'path';
export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;
@ -29,10 +30,13 @@ export const compileGithubConfig = async (
const notFound = gitHubReposResult.notFound;
const hostUrl = config.url ?? 'https://github.com';
const hostname = new URL(hostUrl).hostname;
const repoNameRoot = new URL(hostUrl)
.toString()
.replace(/^https?:\/\//, '');
const repos = gitHubRepos.map((repo) => {
const repoName = `${hostname}/${repo.full_name}`;
const repoDisplayName = repo.full_name;
const repoName = path.join(repoNameRoot, repoDisplayName);
const cloneUrl = new URL(repo.clone_url!);
const record: RepoData = {
@ -42,6 +46,7 @@ export const compileGithubConfig = async (
cloneUrl: cloneUrl.toString(),
webUrl: repo.html_url,
name: repoName,
displayName: repoDisplayName,
imageUrl: repo.owner.avatar_url,
isFork: repo.fork,
isArchived: !!repo.archived,
@ -67,6 +72,7 @@ export const compileGithubConfig = async (
'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork),
'zoekt.public': marshalBool(repo.private === false),
'zoekt.display-name': repoDisplayName,
},
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
@ -93,13 +99,16 @@ export const compileGitlabConfig = async (
const notFound = gitlabReposResult.notFound;
const hostUrl = config.url ?? 'https://gitlab.com';
const hostname = new URL(hostUrl).hostname;
const repoNameRoot = new URL(hostUrl)
.toString()
.replace(/^https?:\/\//, '');
const repos = gitlabRepos.map((project) => {
const projectUrl = `${hostUrl}/${project.path_with_namespace}`;
const cloneUrl = new URL(project.http_url_to_repo);
const isFork = project.forked_from_project !== undefined;
const repoName = `${hostname}/${project.path_with_namespace}`;
const repoDisplayName = project.path_with_namespace;
const repoName = path.join(repoNameRoot, repoDisplayName);
const record: RepoData = {
external_id: project.id.toString(),
@ -108,6 +117,7 @@ export const compileGitlabConfig = async (
cloneUrl: cloneUrl.toString(),
webUrl: projectUrl,
name: repoName,
displayName: repoDisplayName,
imageUrl: project.avatar_url,
isFork: isFork,
isArchived: !!project.archived,
@ -130,7 +140,8 @@ export const compileGitlabConfig = async (
'zoekt.gitlab-forks': (project.forks_count ?? 0).toString(),
'zoekt.archived': marshalBool(project.archived),
'zoekt.fork': marshalBool(isFork),
'zoekt.public': marshalBool(project.private === false)
'zoekt.public': marshalBool(project.private === false),
'zoekt.display-name': repoDisplayName,
},
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
@ -157,11 +168,14 @@ export const compileGiteaConfig = async (
const notFound = giteaReposResult.notFound;
const hostUrl = config.url ?? 'https://gitea.com';
const hostname = new URL(hostUrl).hostname;
const repoNameRoot = new URL(hostUrl)
.toString()
.replace(/^https?:\/\//, '');
const repos = giteaRepos.map((repo) => {
const cloneUrl = new URL(repo.clone_url!);
const repoName = `${hostname}/${repo.full_name!}`;
const repoDisplayName = repo.full_name!;
const repoName = path.join(repoNameRoot, repoDisplayName);
const record: RepoData = {
external_id: repo.id!.toString(),
@ -170,6 +184,7 @@ export const compileGiteaConfig = async (
cloneUrl: cloneUrl.toString(),
webUrl: repo.html_url,
name: repoName,
displayName: repoDisplayName,
imageUrl: repo.owner?.avatar_url,
isFork: repo.fork!,
isArchived: !!repo.archived,
@ -191,6 +206,7 @@ export const compileGiteaConfig = async (
'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork!),
'zoekt.public': marshalBool(repo.internal === false && repo.private === false),
'zoekt.display-name': repoDisplayName,
},
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
@ -212,27 +228,32 @@ export const compileGerritConfig = async (
orgId: number) => {
const gerritRepos = await getGerritReposFromConfig(config);
const hostUrl = (config.url ?? 'https://gerritcodereview.com').replace(/\/$/, ''); // Remove trailing slash
const hostname = new URL(hostUrl).hostname;
const hostUrl = config.url;
const repoNameRoot = new URL(hostUrl)
.toString()
.replace(/^https?:\/\//, '');
const repos = gerritRepos.map((project) => {
const repoId = `${hostname}/${project.name}`;
const cloneUrl = new URL(`${config.url}/${encodeURIComponent(project.name)}`);
const cloneUrl = new URL(path.join(hostUrl, encodeURIComponent(project.name)));
const repoDisplayName = project.name;
const repoName = path.join(repoNameRoot, repoDisplayName);
let webUrl = "https://www.gerritcodereview.com/";
// Gerrit projects can have multiple web links; use the first one
if (project.web_links) {
const webLink = project.web_links[0];
if (webLink) {
webUrl = webLink.url;
const webUrl = (() => {
if (!project.web_links || project.web_links.length === 0) {
return null;
}
}
// Handle case where webUrl is just a gitiles path
// https://github.com/GerritCodeReview/plugins_gitiles/blob/5ee7f57/src/main/java/com/googlesource/gerrit/plugins/gitiles/GitilesWeblinks.java#L50
if (webUrl.startsWith('/plugins/gitiles/')) {
webUrl = `${hostUrl}${webUrl}`;
}
const webLink = project.web_links[0];
const webUrl = webLink.url;
// Handle case where webUrl is just a gitiles path
// https://github.com/GerritCodeReview/plugins_gitiles/blob/5ee7f57/src/main/java/com/googlesource/gerrit/plugins/gitiles/GitilesWeblinks.java#L50
if (webUrl.startsWith('/plugins/gitiles/')) {
return path.join(hostUrl, webUrl);
} else {
return webUrl;
}
})();
const record: RepoData = {
external_id: project.id.toString(),
@ -240,7 +261,8 @@ export const compileGerritConfig = async (
external_codeHostUrl: hostUrl,
cloneUrl: cloneUrl.toString(),
webUrl: webUrl,
name: project.name,
name: repoName,
displayName: repoDisplayName,
isFork: false,
isArchived: false,
org: {
@ -256,11 +278,12 @@ export const compileGerritConfig = async (
metadata: {
gitConfig: {
'zoekt.web-url-type': 'gitiles',
'zoekt.web-url': webUrl,
'zoekt.name': repoId,
'zoekt.web-url': webUrl ?? '',
'zoekt.name': repoName,
'zoekt.archived': marshalBool(false),
'zoekt.fork': marshalBool(false),
'zoekt.public': marshalBool(true),
'zoekt.display-name': repoDisplayName,
},
} satisfies RepoMetadata,
};

View file

@ -3,9 +3,9 @@ import { Redis } from 'ioredis';
import { createLogger } from "./logger.js";
import { Connection, PrismaClient, Repo, RepoToConnection, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db";
import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { AppContext, Settings, RepoMetadata } from "./types.js";
import { AppContext, Settings, repoMetadataSchema } from "./types.js";
import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./utils.js";
import { cloneRepository, fetchRepository } from "./git.js";
import { cloneRepository, fetchRepository, upsertGitConfig } from "./git.js";
import { existsSync, readdirSync, promises } from 'fs';
import { indexGitRepository } from "./zoekt.js";
import { PromClient } from './promClient.js';
@ -200,8 +200,7 @@ export class RepoManager implements IRepoManager {
let cloneDuration_s: number | undefined = undefined;
const repoPath = getRepoPath(repo, this.ctx);
const metadata = repo.metadata as RepoMetadata;
const metadata = repoMetadataSchema.parse(repo.metadata);
// If the repo was already in the indexing state, this job was likely killed and picked up again. As a result,
// to ensure the repo state is valid, we delete the repo if it exists so we get a fresh clone
@ -240,7 +239,7 @@ export class RepoManager implements IRepoManager {
}
}
const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, metadata.gitConfig, ({ method, stage, progress }) => {
const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, ({ method, stage, progress }) => {
this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
}));
cloneDuration_s = durationMs / 1000;
@ -249,6 +248,13 @@ export class RepoManager implements IRepoManager {
this.logger.info(`Cloned ${repo.id} in ${cloneDuration_s}s`);
}
// Regardless of clone or fetch, always upsert the git config for the repo.
// This ensures that the git config is always up to date for whatever we
// have in the DB.
if (metadata.gitConfig) {
await upsertGitConfig(repoPath, metadata.gitConfig);
}
this.logger.info(`Indexing ${repo.id}...`);
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
const indexDuration_s = durationMs / 1000;

View file

@ -1,4 +1,5 @@
import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type";
import { z } from "zod";
export type AppContext = {
/**
@ -16,28 +17,32 @@ export type AppContext = {
export type Settings = Required<SettingsSchema>;
/**
* Structure of the `metadata` field in the `Repo` table.
*/
export type RepoMetadata = {
// Structure of the `metadata` field in the `Repo` table.
//
// @WARNING: If you modify this schema, please make sure it is backwards
// compatible with any prior versions of the schema!!
// @NOTE: If you move this schema, please update the comment in schema.prisma
// to point to the new location.
export const repoMetadataSchema = z.object({
/**
* A set of key-value pairs that will be used as git config
* variables when cloning the repo.
* @see: https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--configcodecodeltkeygtltvaluegtcode
*/
gitConfig?: Record<string, string>;
gitConfig: z.record(z.string(), z.string()).optional(),
/**
* A list of branches to index. Glob patterns are supported.
*/
branches?: string[];
branches: z.array(z.string()).optional(),
/**
* A list of tags to index. Glob patterns are supported.
*/
tags?: string[];
}
tags: z.array(z.string()).optional(),
});
export type RepoMetadata = z.infer<typeof repoMetadataSchema>;
// @see : https://stackoverflow.com/a/61132308
export type DeepPartial<T> = T extends object ? {

View file

@ -1,5 +1,5 @@
import { exec } from "child_process";
import { AppContext, RepoMetadata, Settings } from "./types.js";
import { AppContext, repoMetadataSchema, Settings } from "./types.js";
import { Repo } from "@sourcebot/db";
import { getRepoPath } from "./utils.js";
import { getShardPrefix } from "./utils.js";
@ -17,7 +17,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
const repoPath = getRepoPath(repo, ctx);
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const metadata = repo.metadata as RepoMetadata;
const metadata = repoMetadataSchema.parse(repo.metadata);
if (metadata.branches) {
const branchGlobs = metadata.branches
@ -57,7 +57,17 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
revisions = revisions.slice(0, 64);
}
const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -max_trigram_count ${settings.maxTrigramCount} -file_limit ${settings.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${repo.orgId} -shard_prefix ${shardPrefix} ${repoPath}`;
const command = [
'zoekt-git-index',
'-allow_missing_branches',
`-index ${ctx.indexPath}`,
`-max_trigram_count ${settings.maxTrigramCount}`,
`-file_limit ${settings.maxFileSize}`,
`-branches ${revisions.join(',')}`,
`-tenant_id ${repo.orgId}`,
`-shard_prefix ${shardPrefix}`,
repoPath
].join(' ');
return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
exec(command, (error, stdout, stderr) => {

View file

@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE "Repo" ADD COLUMN "displayName" TEXT;

View file

@ -38,12 +38,13 @@ enum StripeSubscriptionStatus {
model Repo {
id Int @id @default(autoincrement())
name String
displayName String?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
indexedAt DateTime?
isFork Boolean
isArchived Boolean
metadata Json
metadata Json // For schema see repoMetadataSchema in packages/backend/src/types.ts
cloneUrl String
webUrl String?
connections RepoToConnection[]

View file

@ -429,6 +429,7 @@ export const getRepos = async (domain: string, filter: { status?: RepoIndexingSt
codeHostType: repo.external_codeHostType,
repoId: repo.id,
repoName: repo.name,
repoDisplayName: repo.displayName ?? undefined,
repoCloneUrl: repo.cloneUrl,
webUrl: repo.webUrl ?? undefined,
linkedConnections: repo.connections.map(({ connection }) => ({

View file

@ -73,7 +73,7 @@ const RepositoryBadge = ({
return {
repoIcon: <FileIcon className="w-4 h-4" />,
displayName: repo.repoName.split('/').slice(-2).join('/'),
displayName: repo.repoName,
repoLink: undefined,
}
})();

View file

@ -39,7 +39,7 @@ export const RepositoryTable = ({ isAddNewRepoButtonVisible }: RepositoryTablePr
if (!repos) return [];
return repos.map((repo): RepositoryColumnInfo => ({
name: repo.repoName.split('/').length > 2 ? repo.repoName.split('/').slice(-2).join('/') : repo.repoName,
name: repo.repoDisplayName ?? repo.repoName,
imageUrl: repo.imageUrl,
connections: repo.linkedConnections,
repoIndexingStatus: repo.repoIndexingStatus as RepoIndexingStatus,

View file

@ -168,6 +168,7 @@ export const repositoryQuerySchema = z.object({
codeHostType: z.string(),
repoId: z.number(),
repoName: z.string(),
repoDisplayName: z.string().optional(),
repoCloneUrl: z.string(),
webUrl: z.string().optional(),
linkedConnections: z.array(z.object({

View file

@ -47,22 +47,19 @@ export const getRepoCodeHostInfo = (repo?: Repository): CodeHostInfo | undefined
return undefined;
}
const webUrlType = repo.RawConfig ? repo.RawConfig['web-url-type'] : undefined;
if (!webUrlType) {
if (!repo.RawConfig) {
return undefined;
}
const url = new URL(repo.URL);
const displayName = url.pathname.slice(1);
// @todo : use zod to validate config schema
const webUrlType = repo.RawConfig['web-url-type']!;
const displayName = repo.RawConfig['display-name'] ?? repo.RawConfig['name']!;
return _getCodeHostInfoInternal(webUrlType, displayName, repo.URL);
}
export const getRepoQueryCodeHostInfo = (repo?: RepositoryQuery): CodeHostInfo | undefined => {
if (!repo) {
return undefined;
}
const displayName = repo.repoName.split('/').slice(-2).join('/');
export const getRepoQueryCodeHostInfo = (repo: RepositoryQuery): CodeHostInfo | undefined => {
const displayName = repo.repoDisplayName ?? repo.repoName;
return _getCodeHostInfoInternal(repo.codeHostType, displayName, repo.webUrl ?? repo.repoCloneUrl);
}