From b452fd2983f0e92b8ba5ba7e41d40692af5aef9b Mon Sep 17 00:00:00 2001 From: Konrad Staniszewski Date: Mon, 2 Dec 2024 16:07:02 -0800 Subject: [PATCH] Gerrit sync (#104) * Basic gerrit sync with working gitiles web-links functionality This adds basic support for gerrit repo code host syncing. Gerrit uses gitiles plugin for code browsing (in most cases). It may be usefull to allow users to provide their own web code-browsing url templates in the future. * Add gerrit readme update * Remove config arg from gerrit fetchAllProjects * Remove example urls * Resolve comments --- README.md | 12 +++- packages/backend/src/gerrit.ts | 109 +++++++++++++++++++++++++++++ packages/backend/src/main.ts | 6 ++ packages/backend/src/schemas/v2.ts | 23 +++++- packages/backend/src/utils.ts | 10 +++ packages/web/public/gerrit.svg | 8 +++ packages/web/src/lib/utils.ts | 20 ++++-- schemas/v2/index.json | 58 +++++++++++++++ 8 files changed, 236 insertions(+), 10 deletions(-) create mode 100644 packages/backend/src/gerrit.ts create mode 100644 packages/web/public/gerrit.svg diff --git a/README.md b/README.md index c26e9b86..2161fcf1 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ https://github.com/user-attachments/assets/98d46192-5469-430f-ad9e-5c042adbb10d ## Features - 💻 **One-command deployment**: Get started instantly using Docker on your own machine. -- 🔍 **Multi-repo search**: Effortlessly index and search through multiple public and private repositories in GitHub, GitLab, or Gitea. +- 🔍 **Multi-repo search**: Effortlessly index and search through multiple public and private repositories in GitHub, GitLab, Gitea, or Gerrit. - ⚡**Lightning fast performance**: Built on top of the powerful [Zoekt](https://github.com/sourcegraph/zoekt) search engine. - 📂 **Full file visualization**: Instantly view the entire file when selecting any search result. - 🎨 **Modern web app**: Enjoy a sleek interface with features like syntax highlighting, light/dark mode, and vim-style navigation @@ -62,7 +62,7 @@ Sourcebot supports indexing and searching through public and private repositorie GitHub icon - GitHub, GitLab and Gitea. This section will guide you through configuring the repositories that Sourcebot indexes. + GitHub, GitLab, Gitea, and Gerrit. This section will guide you through configuring the repositories that Sourcebot indexes. 1. Create a new folder on your machine that stores your configs and `.sourcebot` cache, and navigate into it: ```sh @@ -261,6 +261,12 @@ docker run -e GITEA_TOKEN=my-secret-token /* additional args */ ghcr.io/s +
+ Gerrit +Gerrit authentication is not yet currently supported. +
+ + ## Using a self-hosted GitLab / GitHub instance @@ -397,4 +403,4 @@ NEXT_PUBLIC_SOURCEBOT_TELEMETRY_DISABLED=1 Sourcebot makes use of the following libraries: -- [@vscode/codicons](https://github.com/microsoft/vscode-codicons) under the [CC BY 4.0 License](https://github.com/microsoft/vscode-codicons/blob/main/LICENSE). \ No newline at end of file +- [@vscode/codicons](https://github.com/microsoft/vscode-codicons) under the [CC BY 4.0 License](https://github.com/microsoft/vscode-codicons/blob/main/LICENSE). diff --git a/packages/backend/src/gerrit.ts b/packages/backend/src/gerrit.ts new file mode 100644 index 00000000..376c662c --- /dev/null +++ b/packages/backend/src/gerrit.ts @@ -0,0 +1,109 @@ +import fetch from 'cross-fetch'; +import { GerritConfig } from './schemas/v2.js'; +import { AppContext, GitRepository } from './types.js'; +import { createLogger } from './logger.js'; +import path from 'path'; +import { measure, marshalBool, excludeReposByName, includeReposByName } from './utils.js'; + +// https://gerrit-review.googlesource.com/Documentation/rest-api.html +interface GerritProjects { + [projectName: string]: GerritProjectInfo; +} + +interface GerritProjectInfo { + id: string; + state?: string; + web_links?: GerritWebLink[]; +} + +interface GerritWebLink { + name: string; + url: string; +} + +const logger = createLogger('Gerrit'); + +export const getGerritReposFromConfig = async (config: GerritConfig, ctx: AppContext) => { + + const url = config.url.endsWith('/') ? config.url : `${config.url}/`; + const hostname = new URL(config.url).hostname; + + const { durationMs, data: projects } = await measure(() => + fetchAllProjects(url) + ); + + // exclude "All-Projects" and "All-Users" projects + delete projects['All-Projects']; + delete projects['All-Users']; + + logger.debug(`Fetched ${Object.keys(projects).length} projects in ${durationMs}ms.`); + + let repos: GitRepository[] = Object.keys(projects).map((projectName) => { + const project = projects[projectName]; + let webUrl = "https://www.gerritcodereview.com/"; + // Gerrit projects can have multiple web links; use the first one + if (project.web_links) { + const webLink = project.web_links[0]; + if (webLink) { + webUrl = webLink.url; + } + } + const repoId = `${hostname}/${projectName}`; + const repoPath = path.resolve(path.join(ctx.reposPath, `${repoId}.git`)); + + const cloneUrl = `${url}${encodeURIComponent(projectName)}`; + + return { + vcs: 'git', + codeHost: 'gerrit', + name: projectName, + id: repoId, + cloneUrl: cloneUrl, + path: repoPath, + isStale: false, // Gerrit projects are typically not stale + isFork: false, // Gerrit doesn't have forks in the same way as GitHub + isArchived: false, + gitConfigMetadata: { + // Gerrit uses Gitiles for web UI. This can sometimes be "browse" type in zoekt + 'zoekt.web-url-type': 'gitiles', + 'zoekt.web-url': webUrl, + 'zoekt.name': repoId, + 'zoekt.archived': marshalBool(false), + 'zoekt.fork': marshalBool(false), + 'zoekt.public': marshalBool(true), // Assuming projects are public; adjust as needed + }, + branches: [], + tags: [] + } satisfies GitRepository; + }); + + // include repos by glob if specified in config + if (config.projects) { + repos = includeReposByName(repos, config.projects); + } + + if (config.exclude && config.exclude.projects) { + repos = excludeReposByName(repos, config.exclude.projects); + } + + return repos; +}; + +const fetchAllProjects = async (url: string): Promise => { + + const projectsEndpoint = `${url}projects/`; + logger.debug(`Fetching projects from Gerrit at ${projectsEndpoint}...`); + const response = await fetch(projectsEndpoint); + + if (!response.ok) { + throw new Error(`Failed to fetch projects from Gerrit: ${response.statusText}`); + } + + const text = await response.text(); + + // Gerrit prepends ")]}'\n" to prevent XSSI attacks; remove it + // https://gerrit-review.googlesource.com/Documentation/rest-api.html + const jsonText = text.replace(")]}'\n", ''); + const data = JSON.parse(jsonText); + return data; +}; diff --git a/packages/backend/src/main.ts b/packages/backend/src/main.ts index 53d7c37e..8459a355 100644 --- a/packages/backend/src/main.ts +++ b/packages/backend/src/main.ts @@ -4,6 +4,7 @@ import { SourcebotConfigurationSchema } from "./schemas/v2.js"; import { getGitHubReposFromConfig } from "./github.js"; import { getGitLabReposFromConfig } from "./gitlab.js"; import { getGiteaReposFromConfig } from "./gitea.js"; +import { getGerritReposFromConfig } from "./gerrit.js"; import { AppContext, LocalRepository, GitRepository, Repository } from "./types.js"; import { cloneRepository, fetchRepository } from "./git.js"; import { createLogger } from "./logger.js"; @@ -139,6 +140,11 @@ const syncConfig = async (configPath: string, db: Database, signal: AbortSignal, configRepos.push(...giteaRepos); break; } + case 'gerrit': { + const gerritRepos = await getGerritReposFromConfig(repoConfig, ctx); + configRepos.push(...gerritRepos); + break; + } case 'local': { const repo = getLocalRepoFromConfig(repoConfig, ctx); configRepos.push(repo); diff --git a/packages/backend/src/schemas/v2.ts b/packages/backend/src/schemas/v2.ts index 2de8bf1b..56cd0f72 100644 --- a/packages/backend/src/schemas/v2.ts +++ b/packages/backend/src/schemas/v2.ts @@ -1,6 +1,6 @@ // THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! -export type Repos = GitHubConfig | GitLabConfig | GiteaConfig | LocalConfig; +export type Repos = GitHubConfig | GitLabConfig | GiteaConfig | GerritConfig | LocalConfig; /** * A Sourcebot configuration file outlines which repositories Sourcebot should sync and index. @@ -173,6 +173,27 @@ export interface GiteaConfig { }; revisions?: GitRevisions; } +export interface GerritConfig { + /** + * Gerrit Configuration + */ + type: "gerrit"; + /** + * The URL of the Gerrit host. + */ + url: string; + /** + * List of specific projects to sync. If not specified, all projects will be synced. Glob patterns are supported + */ + projects?: string[]; + exclude?: { + /** + * List of specific projects to exclude from syncing. + */ + projects?: string[]; + }; + revisions?: GitRevisions; +} export interface LocalConfig { /** * Local Configuration diff --git a/packages/backend/src/utils.ts b/packages/backend/src/utils.ts index 8996dd1e..676ca1a3 100644 --- a/packages/backend/src/utils.ts +++ b/packages/backend/src/utils.ts @@ -48,6 +48,16 @@ export const excludeReposByName = (repos: T[], excludedRep }); } +export const includeReposByName = (repos: T[], includedRepoNames: string[], logger?: Logger) => { + return repos.filter((repo) => { + if (micromatch.isMatch(repo.name, includedRepoNames)) { + logger?.debug(`Including repo ${repo.id}. Reason: repos contain ${repo.name}`); + return true; + } + return false; + }); +} + export const getTokenFromConfig = (token: string | { env: string }, ctx: AppContext) => { if (typeof token === 'string') { return token; diff --git a/packages/web/public/gerrit.svg b/packages/web/public/gerrit.svg new file mode 100644 index 00000000..d644a0b3 --- /dev/null +++ b/packages/web/public/gerrit.svg @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/packages/web/src/lib/utils.ts b/packages/web/src/lib/utils.ts index 3e6211bb..b63b784a 100644 --- a/packages/web/src/lib/utils.ts +++ b/packages/web/src/lib/utils.ts @@ -3,6 +3,7 @@ import { twMerge } from "tailwind-merge" import githubLogo from "../../public/github.svg"; import gitlabLogo from "../../public/gitlab.svg"; import giteaLogo from "../../public/gitea.svg"; +import gerritLogo from "../../public/gerrit.svg"; import { ServiceError } from "./serviceError"; import { Repository } from "./types"; @@ -31,7 +32,7 @@ export const createPathWithQueryParams = (path: string, ...queryParams: [string, } type CodeHostInfo = { - type: "github" | "gitlab" | "gitea"; + type: "github" | "gitlab" | "gitea" | "gerrit"; displayName: string; costHostName: string; repoLink: string; @@ -44,15 +45,14 @@ export const getRepoCodeHostInfo = (repo?: Repository): CodeHostInfo | undefined return undefined; } - const hostType = repo.RawConfig ? repo.RawConfig['web-url-type'] : undefined; - if (!hostType) { + const webUrlType = repo.RawConfig ? repo.RawConfig['web-url-type'] : undefined; + if (!webUrlType) { return undefined; } const url = new URL(repo.URL); const displayName = url.pathname.slice(1); - - switch (hostType) { + switch (webUrlType) { case 'github': return { type: "github", @@ -78,6 +78,14 @@ export const getRepoCodeHostInfo = (repo?: Repository): CodeHostInfo | undefined repoLink: repo.URL, icon: giteaLogo, } + case 'gitiles': + return { + type: "gerrit", + displayName: displayName, + costHostName: "Gerrit", + repoLink: repo.URL, + icon: gerritLogo, + } } } @@ -113,4 +121,4 @@ export const base64Decode = (base64: string): string => { // @see: https://stackoverflow.com/a/65959350/23221295 export const isDefined = (arg: T | null | undefined): arg is T extends null | undefined ? never : T => { return arg !== null && arg !== undefined; -} \ No newline at end of file +} diff --git a/schemas/v2/index.json b/schemas/v2/index.json index 4c2c3275..89967668 100644 --- a/schemas/v2/index.json +++ b/schemas/v2/index.json @@ -356,6 +356,61 @@ ], "additionalProperties": false }, + "GerritConfig": { + "type": "object", + "properties": { + "type": { + "const": "gerrit", + "description": "Gerrit Configuration" + }, + "url": { + "type": "string", + "format": "url", + "description": "The URL of the Gerrit host.", + "examples": [ + "https://gerrit.example.com" + ], + "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$" + }, + "projects": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of specific projects to sync. If not specified, all projects will be synced. Glob patterns are supported", + "examples": [ + [ + "project1/repo1", + "project2/**" + ] + ] + }, + "exclude": { + "type": "object", + "properties": { + "projects": { + "type": "array", + "items": { + "type": "string" + }, + "examples": [ + [ + "project1/repo1", + "project2/**" + ] + ], + "description": "List of specific projects to exclude from syncing." + } + }, + "additionalProperties": false + } + }, + "required": [ + "type", + "url" + ], + "additionalProperties": false + }, "LocalConfig": { "type": "object", "properties": { @@ -415,6 +470,9 @@ { "$ref": "#/definitions/GiteaConfig" }, + { + "$ref": "#/definitions/GerritConfig" + }, { "$ref": "#/definitions/LocalConfig" }