diff --git a/.github/images/gitea-pat-creation.png b/.github/images/gitea-pat-creation.png new file mode 100644 index 00000000..00d6ab94 Binary files /dev/null and b/.github/images/gitea-pat-creation.png differ diff --git a/CHANGELOG.md b/CHANGELOG.md index 545e4e93..dca1ed01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Gitea support ([#45](https://github.com/sourcebot-dev/sourcebot/pull/45)) + ## [2.0.2] - 2024-10-18 ### Added diff --git a/README.md b/README.md index b744cc15..e23c0920 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ https://github.com/user-attachments/assets/98d46192-5469-430f-ad9e-5c042adbb10d ## Features - 💻 **One-command deployment**: Get started instantly using Docker on your own machine. -- 🔍 **Multi-repo search**: Effortlessly index and search through multiple public and private repositories in GitHub or GitLab. +- 🔍 **Multi-repo search**: Effortlessly index and search through multiple public and private repositories in GitHub, GitLab, or Gitea. - ⚡**Lightning fast performance**: Built on top of the powerful [Zoekt](https://github.com/sourcegraph/zoekt) search engine. - 📂 **Full file visualization**: Instantly view the entire file when selecting any search result. - 🎨 **Modern web app**: Enjoy a sleek interface with features like syntax highlighting, light/dark mode, and vim-style navigation @@ -62,7 +62,7 @@ Sourcebot supports indexing and searching through public and private repositorie GitHub icon - GitHub and GitLab. This section will guide you through configuring the repositories that Sourcebot indexes. + GitHub, GitLab and Gitea. This section will guide you through configuring the repositories that Sourcebot indexes. 1. Create a new folder on your machine that stores your configs and `.sourcebot` cache, and navigate into it: ```sh @@ -214,6 +214,53 @@ docker run -e GITLAB_TOKEN=glpat-mytoken /* additional args */ ghcr.io/so +
+ Gitea + +Generate a Gitea access token [here](http://gitea.com/user/settings/applications). At minimum, you'll need to select the `read:repository` scope, but `read:user` and `read:organization` are required for the `user` and `org` fields of your config file: + +![Gitea Access token creation](.github/images/gitea-pat-creation.png) + +Next, update your configuration with the `token` field: +```json +{ + "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json", + "repos": [ + { + "type": "gitea", + "token": "my-secret-token", + ... + } + ] +} +``` + +You can also pass tokens as environment variables: +```json +{ + "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json", + "repos": [ + { + "type": "gitea", + "token": { + // note: this env var can be named anything. It + // doesn't need to be `GITEA_TOKEN`. + "env": "GITEA_TOKEN" + }, + ... + } + ] +} +``` + +You'll need to pass this environment variable each time you run Sourcebot: + +
+docker run -e GITEA_TOKEN=my-secret-token /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
+
+ +
+ ## Using a self-hosted GitLab / GitHub instance @@ -226,7 +273,7 @@ If you're using a self-hosted GitLab or GitHub instance with a custom domain, yo 1. Install go and NodeJS. Note that a NodeJS version of at least `21.1.0` is required. -2. Install [ctags](https://github.com/universal-ctags/ctags) (required by zoekt-indexserver) +2. Install [ctags](https://github.com/universal-ctags/ctags) (required by zoekt) ```sh // macOS: brew install universal-ctags diff --git a/packages/backend/package.json b/packages/backend/package.json index 18c69d67..932e43bd 100644 --- a/packages/backend/package.json +++ b/packages/backend/package.json @@ -22,6 +22,8 @@ "@gitbeaker/rest": "^40.5.1", "@octokit/rest": "^21.0.2", "argparse": "^2.0.1", + "cross-fetch": "^4.0.0", + "gitea-js": "^1.22.0", "lowdb": "^7.0.1", "simple-git": "^3.27.0", "strip-json-comments": "^5.0.1", diff --git a/packages/backend/src/gitea.ts b/packages/backend/src/gitea.ts new file mode 100644 index 00000000..e26d22b8 --- /dev/null +++ b/packages/backend/src/gitea.ts @@ -0,0 +1,150 @@ +import { Api, giteaApi, HttpResponse, Repository as GiteaRepository } from 'gitea-js'; +import { GiteaConfig } from './schemas/v2.js'; +import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool, measure } from './utils.js'; +import { AppContext, Repository } from './types.js'; +import fetch from 'cross-fetch'; +import { createLogger } from './logger.js'; +import path from 'path'; + +const logger = createLogger('Gitea'); + +export const getGiteaReposFromConfig = async (config: GiteaConfig, ctx: AppContext) => { + const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined; + + const api = giteaApi(config.url ?? 'https://gitea.com', { + token, + customFetch: fetch, + }); + + let allRepos: GiteaRepository[] = []; + + if (config.orgs) { + const _repos = await getReposForOrgs(config.orgs, api); + allRepos = allRepos.concat(_repos); + } + + if (config.repos) { + const _repos = await getRepos(config.repos, api); + allRepos = allRepos.concat(_repos); + } + + if (config.users) { + const _repos = await getReposOwnedByUsers(config.users, api); + allRepos = allRepos.concat(_repos); + } + + let repos: Repository[] = allRepos + .map((repo) => { + const hostname = config.url ? new URL(config.url).hostname : 'gitea.com'; + const repoId = `${hostname}/${repo.full_name!}`; + const repoPath = path.resolve(path.join(ctx.reposPath, `${repoId}.git`)); + + const cloneUrl = new URL(repo.clone_url!); + if (token) { + cloneUrl.username = token; + } + + return { + name: repo.full_name!, + id: repoId, + cloneUrl: cloneUrl.toString(), + path: repoPath, + isStale: false, + isFork: repo.fork!, + isArchived: !!repo.archived, + gitConfigMetadata: { + 'zoekt.web-url-type': 'gitea', + 'zoekt.web-url': repo.html_url!, + 'zoekt.name': repoId, + 'zoekt.archived': marshalBool(repo.archived), + 'zoekt.fork': marshalBool(repo.fork!), + 'zoekt.public': marshalBool(repo.internal === false && repo.private === false), + } + } satisfies Repository; + }); + + if (config.exclude) { + if (!!config.exclude.forks) { + repos = excludeForkedRepos(repos, logger); + } + + if (!!config.exclude.archived) { + repos = excludeArchivedRepos(repos, logger); + } + + if (config.exclude.repos) { + repos = excludeReposByName(repos, config.exclude.repos, logger); + } + } + + return repos; +} + +const getReposOwnedByUsers = async (users: string[], api: Api) => { + const repos = (await Promise.all(users.map(async (user) => { + logger.debug(`Fetching repos for user ${user}...`); + + const { durationMs, data } = await measure(() => + paginate((page) => api.users.userListRepos(user, { + page, + })) + ); + + logger.debug(`Found ${data.length} repos owned by user ${user} in ${durationMs}ms.`); + return data; + }))).flat(); + + return repos; +} + +const getReposForOrgs = async (orgs: string[], api: Api) => { + return (await Promise.all(orgs.map(async (org) => { + logger.debug(`Fetching repos for org ${org}...`); + + const { durationMs, data } = await measure(() => + paginate((page) => api.orgs.orgListRepos(org, { + limit: 100, + page, + })) + ); + + logger.debug(`Found ${data.length} repos for org ${org} in ${durationMs}ms.`); + return data; + }))).flat(); +} + +const getRepos = async (repos: string[], api: Api) => { + return Promise.all(repos.map(async (repo) => { + logger.debug(`Fetching repository info for ${repo}...`); + + const [owner, repoName] = repo.split('/'); + const { durationMs, data: response } = await measure(() => + api.repos.repoGet(owner, repoName), + ); + + logger.debug(`Found repo ${repo} in ${durationMs}ms.`); + + return response.data; + })); +} + +// @see : https://docs.gitea.com/development/api-usage#pagination +const paginate = async (request: (page: number) => Promise>) => { + let page = 1; + const result = await request(page); + const output: T[] = result.data; + + const totalCountString = result.headers.get('x-total-count'); + if (!totalCountString) { + throw new Error("Header 'x-total-count' not found"); + } + const totalCount = parseInt(totalCountString); + + while (output.length < totalCount) { + page++; + const result = await request(page); + output.push(...result.data); + } + + return output; +} \ No newline at end of file diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index 356732e8..871dbdbb 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -6,6 +6,7 @@ import path from 'path'; import { SourcebotConfigurationSchema } from "./schemas/v2.js"; import { getGitHubReposFromConfig } from "./github.js"; import { getGitLabReposFromConfig } from "./gitlab.js"; +import { getGiteaReposFromConfig } from "./gitea.js"; import { AppContext, Repository } from "./types.js"; import { cloneRepository, fetchRepository } from "./git.js"; import { createLogger } from "./logger.js"; @@ -75,6 +76,11 @@ const syncConfig = async (configPath: string, db: Database, signal: AbortSignal, configRepos.push(...gitLabRepos); break; } + case 'gitea': { + const giteaRepos = await getGiteaReposFromConfig(repoConfig, ctx); + configRepos.push(...giteaRepos); + break; + } } } @@ -180,7 +186,8 @@ const syncConfig = async (configPath: string, db: Database, signal: AbortSignal, // since it implies another sync is in progress. } else { isSyncing = false; - logger.error(`Failed to sync configuration file ${args.configPath} with error:\n`, err); + logger.error(`Failed to sync configuration file ${args.configPath} with error:`); + console.log(err); } }); } diff --git a/packages/backend/src/schemas/v2.ts b/packages/backend/src/schemas/v2.ts index 0da2d047..a5dcd6f1 100644 --- a/packages/backend/src/schemas/v2.ts +++ b/packages/backend/src/schemas/v2.ts @@ -1,6 +1,6 @@ // THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! -export type Repos = GitHubConfig | GitLabConfig; +export type Repos = GitHubConfig | GitLabConfig | GiteaConfig; /** * A Sourcebot configuration file outlines which repositories Sourcebot should sync and index. @@ -106,3 +106,50 @@ export interface GitLabConfig { projects?: string[]; }; } +export interface GiteaConfig { + /** + * Gitea Configuration + */ + type: "gitea"; + /** + * An access token. + */ + token?: + | string + | { + /** + * The name of the environment variable that contains the token. + */ + env: string; + }; + /** + * The URL of the Gitea host. Defaults to https://gitea.com + */ + url?: string; + /** + * List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. If a `token` is provided, it must have the read:organization scope. + */ + orgs?: string[]; + /** + * List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'. + */ + repos?: string[]; + /** + * List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property. If a `token` is provided, it must have the read:user scope. + */ + users?: string[]; + exclude?: { + /** + * Exlcude forked repositories from syncing. + */ + forks?: boolean; + /** + * Exlcude archived repositories from syncing. + */ + archived?: boolean; + /** + * List of individual repositories to exclude from syncing. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'. + */ + repos?: string[]; + }; +} diff --git a/packages/web/public/gitea.svg b/packages/web/public/gitea.svg new file mode 100644 index 00000000..7ed00121 --- /dev/null +++ b/packages/web/public/gitea.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/packages/web/public/gitlab.svg b/packages/web/public/gitlab.svg index e4a29630..ab63afd2 100644 --- a/packages/web/public/gitlab.svg +++ b/packages/web/public/gitlab.svg @@ -1 +1 @@ -GitLab \ No newline at end of file + \ No newline at end of file diff --git a/packages/web/src/app/repositoryCarousel.tsx b/packages/web/src/app/repositoryCarousel.tsx index 7b5dcc1d..f67aea0f 100644 --- a/packages/web/src/app/repositoryCarousel.tsx +++ b/packages/web/src/app/repositoryCarousel.tsx @@ -64,7 +64,7 @@ const RepositoryBadge = ({ repoIcon: {info.costHostName}, repoName: info.repoName, repoLink: info.repoLink, diff --git a/packages/web/src/app/search/components/searchResultsPanel/fileMatchContainer.tsx b/packages/web/src/app/search/components/searchResultsPanel/fileMatchContainer.tsx index e2c3084b..05cb3dff 100644 --- a/packages/web/src/app/search/components/searchResultsPanel/fileMatchContainer.tsx +++ b/packages/web/src/app/search/components/searchResultsPanel/fileMatchContainer.tsx @@ -63,7 +63,7 @@ export const FileMatchContainer = ({ repoIcon: {info.costHostName} } } diff --git a/packages/web/src/lib/utils.ts b/packages/web/src/lib/utils.ts index 15ed59fd..c04def13 100644 --- a/packages/web/src/lib/utils.ts +++ b/packages/web/src/lib/utils.ts @@ -2,6 +2,7 @@ import { type ClassValue, clsx } from "clsx" import { twMerge } from "tailwind-merge" import githubLogo from "../../public/github.svg"; import gitlabLogo from "../../public/gitlab.svg"; +import giteaLogo from "../../public/gitea.svg"; import { ServiceError } from "./serviceError"; export function cn(...inputs: ClassValue[]) { @@ -29,11 +30,12 @@ export const createPathWithQueryParams = (path: string, ...queryParams: [string, } type CodeHostInfo = { - type: "github" | "gitlab"; + type: "github" | "gitlab" | "gitea"; repoName: string; costHostName: string; repoLink: string; icon: string; + iconClassname?: string; } export const getRepoCodeHostInfo = (repoName: string): CodeHostInfo | undefined => { @@ -44,6 +46,7 @@ export const getRepoCodeHostInfo = (repoName: string): CodeHostInfo | undefined costHostName: "GitHub", repoLink: `https://${repoName}`, icon: githubLogo, + iconClassname: "dark:invert", } } @@ -57,6 +60,16 @@ export const getRepoCodeHostInfo = (repoName: string): CodeHostInfo | undefined } } + if (repoName.startsWith("gitea.com")) { + return { + type: "gitea", + repoName: repoName.substring("gitea.com/".length), + costHostName: "Gitea", + repoLink: `https://${repoName}`, + icon: giteaLogo, + } + } + return undefined; } @@ -71,6 +84,10 @@ export const getCodeHostFilePreviewLink = (repoName: string, filePath: string): return `${info.repoLink}/-/blob/HEAD/${filePath}`; } + if (info?.type === "gitea") { + return `${info.repoLink}/src/branch/HEAD/${filePath}`; + } + return undefined; } diff --git a/schemas/v2/index.json b/schemas/v2/index.json index 292fd0fd..ae02774f 100644 --- a/schemas/v2/index.json +++ b/schemas/v2/index.json @@ -214,6 +214,96 @@ ], "additionalProperties": false }, + "GiteaConfig": { + "type": "object", + "properties": { + "type": { + "const": "gitea", + "description": "Gitea Configuration" + }, + "token": { + "$ref": "#/definitions/Token", + "description": "An access token.", + "examples": [ + "secret-token", + { "env": "ENV_VAR_CONTAINING_TOKEN" } + ] + }, + "url": { + "type": "string", + "format": "url", + "default": "https://gitea.com", + "description": "The URL of the Gitea host. Defaults to https://gitea.com", + "examples": [ + "https://gitea.com", + "https://gitea.example.com" + ], + "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$" + }, + "orgs": { + "type": "array", + "items": { + "type": "string" + }, + "examples": [ + [ + "my-org-name" + ] + ], + "description": "List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. If a `token` is provided, it must have the read:organization scope." + }, + "repos": { + "type": "array", + "items": { + "type": "string", + "pattern": "^[\\w.-]+\\/[\\w.-]+$" + }, + "description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'." + }, + "users": { + "type": "array", + "items": { + "type": "string" + }, + "examples": [ + [ + "username-1", + "username-2" + ] + ], + "description": "List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property. If a `token` is provided, it must have the read:user scope." + }, + "exclude": { + "type": "object", + "properties": { + "forks": { + "type": "boolean", + "default": false, + "description": "Exlcude forked repositories from syncing." + }, + "archived": { + "type": "boolean", + "default": false, + "description": "Exlcude archived repositories from syncing." + }, + "repos": { + "type": "array", + "items": { + "type": "string", + "pattern": "^[\\w.-]+\\/[\\w.-]+$" + }, + "default": [], + "description": "List of individual repositories to exclude from syncing. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'." + } + }, + "additionalProperties": false + } + }, + "required": [ + "type" + ], + "additionalProperties": false + }, "Repos": { "anyOf": [ { @@ -221,6 +311,9 @@ }, { "$ref": "#/definitions/GitLabConfig" + }, + { + "$ref": "#/definitions/GiteaConfig" } ] } diff --git a/yarn.lock b/yarn.lock index 51b7b9a4..df394cff 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1961,6 +1961,13 @@ crelt@^1.0.5: resolved "https://registry.yarnpkg.com/crelt/-/crelt-1.0.6.tgz#7cc898ea74e190fb6ef9dae57f8f81cf7302df72" integrity sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g== +cross-fetch@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-4.0.0.tgz#f037aef1580bb3a1a35164ea2a848ba81b445983" + integrity sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g== + dependencies: + node-fetch "^2.6.12" + cross-spawn@^6.0.5: version "6.0.5" resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4" @@ -2764,6 +2771,11 @@ get-tsconfig@^4.7.5: dependencies: resolve-pkg-maps "^1.0.0" +gitea-js@^1.22.0: + version "1.22.0" + resolved "https://registry.yarnpkg.com/gitea-js/-/gitea-js-1.22.0.tgz#bf081fd69eff102d5a00660b6d5f5e8f8fd34f3a" + integrity sha512-vG3yNU2NKX7vbsqHH5U3q0u3OmWWh3c4nvyWtx022jQEDJDZP47EoGurXCmOhzvD5AwgUV6r+lVAz+Fa1dazgg== + glob-parent@^5.1.2, glob-parent@~5.1.2: version "5.1.2" resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4" @@ -3516,6 +3528,13 @@ node-cleanup@^2.1.2: resolved "https://registry.yarnpkg.com/node-cleanup/-/node-cleanup-2.1.2.tgz#7ac19abd297e09a7f72a71545d951b517e4dde2c" integrity sha512-qN8v/s2PAJwGUtr1/hYTpNKlD6Y9rc4p8KSmJXyGdYGZsDGKXrGThikLFP9OCHFeLeEpQzPwiAtdIvBLqm//Hw== +node-fetch@^2.6.12: + version "2.7.0" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d" + integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A== + dependencies: + whatwg-url "^5.0.0" + normalize-package-data@^2.3.2: version "2.5.0" resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-2.5.0.tgz#e66db1838b200c1dfc233225d12cb36520e234a8" @@ -4582,6 +4601,11 @@ to-regex-range@^5.0.1: dependencies: is-number "^7.0.0" +tr46@~0.0.3: + version "0.0.3" + resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" + integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw== + triple-beam@^1.3.0: version "1.4.1" resolved "https://registry.yarnpkg.com/triple-beam/-/triple-beam-1.4.1.tgz#6fde70271dc6e5d73ca0c3b24e2d92afb7441984" @@ -4765,6 +4789,19 @@ web-vitals@^4.0.1: resolved "https://registry.yarnpkg.com/web-vitals/-/web-vitals-4.2.3.tgz#270c4baecfbc6ec6fc15da1989e465e5f9b94fb7" integrity sha512-/CFAm1mNxSmOj6i0Co+iGFJ58OS4NRGVP+AWS/l509uIK5a1bSoIVaHz/ZumpHTfHSZBpgrJ+wjfpAOrTHok5Q== +webidl-conversions@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871" + integrity sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ== + +whatwg-url@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-5.0.0.tgz#966454e8765462e37644d3626f6742ce8b70965d" + integrity sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw== + dependencies: + tr46 "~0.0.3" + webidl-conversions "^3.0.0" + which-boxed-primitive@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6"