diff --git a/packages/backend/src/git.ts b/packages/backend/src/git.ts index d19e1572..80d34cef 100644 --- a/packages/backend/src/git.ts +++ b/packages/backend/src/git.ts @@ -278,6 +278,16 @@ export const getCommitHashForRefName = async ({ refName: string, }) => { const git = createGitClientForPath(path); - const rev = await git.revparse(refName); - return rev; + + try { + // The `^{commit}` suffix is used to fully dereference the ref to a commit hash. + const rev = await git.revparse(`${refName}^{commit}`); + return rev; + + // @note: Was hitting errors when the repository is empty, + // so we're catching the error and returning undefined. + } catch (error: unknown) { + console.error(error); + return undefined; + } } \ No newline at end of file diff --git a/packages/backend/src/repoCompileUtils.ts b/packages/backend/src/repoCompileUtils.ts index 80d80ebe..f5edb4f6 100644 --- a/packages/backend/src/repoCompileUtils.ts +++ b/packages/backend/src/repoCompileUtils.ts @@ -13,12 +13,12 @@ import { marshalBool } from "./utils.js"; import { createLogger } from '@sourcebot/logger'; import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type'; import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js"; -import { RepoMetadata } from './types.js'; import path from 'path'; import { glob } from 'glob'; import { getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js'; import assert from 'assert'; import GitUrlParse from 'git-url-parse'; +import { RepoMetadata } from '@sourcebot/shared'; export type RepoData = WithRequired; diff --git a/packages/backend/src/repoIndexManager.ts b/packages/backend/src/repoIndexManager.ts index 6db3e280..98258b0b 100644 --- a/packages/backend/src/repoIndexManager.ts +++ b/packages/backend/src/repoIndexManager.ts @@ -1,15 +1,18 @@ import * as Sentry from '@sentry/node'; import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db"; import { createLogger, Logger } from "@sourcebot/logger"; +import { repoMetadataSchema, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata } from '@sourcebot/shared'; import { existsSync } from 'fs'; import { readdir, rm } from 'fs/promises'; import { Job, Queue, ReservedJob, Worker } from "groupmq"; import { Redis } from 'ioredis'; +import micromatch from 'micromatch'; import { INDEX_CACHE_DIR } from './constants.js'; import { env } from './env.js'; -import { cloneRepository, fetchRepository, getCommitHashForRefName, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js'; +import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, getTags, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js'; +import { captureEvent } from './posthog.js'; import { PromClient } from './promClient.js'; -import { repoMetadataSchema, RepoWithConnections, Settings } from "./types.js"; +import { RepoWithConnections, Settings } from "./types.js"; import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure } from './utils.js'; import { indexGitRepository } from './zoekt.js'; @@ -61,7 +64,7 @@ export class RepoIndexManager { concurrency: this.settings.maxRepoIndexingJobConcurrency, ...(env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true' ? { logger: true, - }: {}), + } : {}), }); this.worker.on('completed', this.onJobCompleted.bind(this)); @@ -263,7 +266,16 @@ export class RepoIndexManager { try { if (jobType === RepoIndexingJobType.INDEX) { - await this.indexRepository(repo, logger, abortController.signal); + const revisions = await this.indexRepository(repo, logger, abortController.signal); + + await this.db.repoIndexingJob.update({ + where: { id }, + data: { + metadata: { + indexedRevisions: revisions, + } satisfies RepoIndexingJobMetadata, + }, + }); } else if (jobType === RepoIndexingJobType.CLEANUP) { await this.cleanupRepository(repo, logger); } @@ -285,7 +297,7 @@ export class RepoIndexManager { // If the repo path exists but it is not a valid git repository root, this indicates // that the repository is in a bad state. To fix, we remove the directory and perform // a fresh clone. - if (existsSync(repoPath) && !(await isPathAValidGitRepoRoot( { path: repoPath } ))) { + if (existsSync(repoPath) && !(await isPathAValidGitRepoRoot({ path: repoPath }))) { const isValidGitRepo = await isPathAValidGitRepoRoot({ path: repoPath, signal, @@ -354,10 +366,54 @@ export class RepoIndexManager { }); } + let revisions = [ + 'HEAD' + ]; + + if (metadata.branches) { + const branchGlobs = metadata.branches + const allBranches = await getBranches(repoPath); + const matchingBranches = + allBranches + .filter((branch) => micromatch.isMatch(branch, branchGlobs)) + .map((branch) => `refs/heads/${branch}`); + + revisions = [ + ...revisions, + ...matchingBranches + ]; + } + + if (metadata.tags) { + const tagGlobs = metadata.tags; + const allTags = await getTags(repoPath); + const matchingTags = + allTags + .filter((tag) => micromatch.isMatch(tag, tagGlobs)) + .map((tag) => `refs/tags/${tag}`); + + revisions = [ + ...revisions, + ...matchingTags + ]; + } + + // zoekt has a limit of 64 branches/tags to index. + if (revisions.length > 64) { + logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`); + captureEvent('backend_revisions_truncated', { + repoId: repo.id, + revisionCount: revisions.length, + }); + revisions = revisions.slice(0, 64); + } + logger.info(`Indexing ${repo.name} (id: ${repo.id})...`); - const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, signal)); + const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal)); const indexDuration_s = durationMs / 1000; logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`); + + return revisions; } private async cleanupRepository(repo: Repo, logger: Logger) { @@ -398,12 +454,18 @@ export class RepoIndexManager { path: repoPath, refName: 'HEAD', }); - + + const jobMetadata = repoIndexingJobMetadataSchema.parse(jobData.metadata); + const repo = await this.db.repo.update({ where: { id: jobData.repoId }, data: { indexedAt: new Date(), indexedCommitHash: commitHash, + metadata: { + ...(jobData.repo.metadata as RepoMetadata), + indexedRevisions: jobMetadata.indexedRevisions, + } satisfies RepoMetadata, } }); diff --git a/packages/backend/src/types.ts b/packages/backend/src/types.ts index 8e27867d..70e16c05 100644 --- a/packages/backend/src/types.ts +++ b/packages/backend/src/types.ts @@ -1,36 +1,8 @@ import { Connection, Repo, RepoToConnection } from "@sourcebot/db"; import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type"; -import { z } from "zod"; export type Settings = Required; -// Structure of the `metadata` field in the `Repo` table. -// -// @WARNING: If you modify this schema, please make sure it is backwards -// compatible with any prior versions of the schema!! -// @NOTE: If you move this schema, please update the comment in schema.prisma -// to point to the new location. -export const repoMetadataSchema = z.object({ - /** - * A set of key-value pairs that will be used as git config - * variables when cloning the repo. - * @see: https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--configcodecodeltkeygtltvaluegtcode - */ - gitConfig: z.record(z.string(), z.string()).optional(), - - /** - * A list of branches to index. Glob patterns are supported. - */ - branches: z.array(z.string()).optional(), - - /** - * A list of tags to index. Glob patterns are supported. - */ - tags: z.array(z.string()).optional(), -}); - -export type RepoMetadata = z.infer; - // @see : https://stackoverflow.com/a/61132308 export type DeepPartial = T extends object ? { [P in keyof T]?: DeepPartial; diff --git a/packages/backend/src/zoekt.ts b/packages/backend/src/zoekt.ts index ad75927a..54ae615e 100644 --- a/packages/backend/src/zoekt.ts +++ b/packages/backend/src/zoekt.ts @@ -1,62 +1,16 @@ import { Repo } from "@sourcebot/db"; import { createLogger } from "@sourcebot/logger"; import { exec } from "child_process"; -import micromatch from "micromatch"; import { INDEX_CACHE_DIR } from "./constants.js"; -import { getBranches, getTags } from "./git.js"; -import { captureEvent } from "./posthog.js"; -import { repoMetadataSchema, Settings } from "./types.js"; +import { Settings } from "./types.js"; import { getRepoPath, getShardPrefix } from "./utils.js"; const logger = createLogger('zoekt'); -export const indexGitRepository = async (repo: Repo, settings: Settings, signal?: AbortSignal) => { - let revisions = [ - 'HEAD' - ]; - +export const indexGitRepository = async (repo: Repo, settings: Settings, revisions: string[], signal?: AbortSignal) => { const { path: repoPath } = getRepoPath(repo); const shardPrefix = getShardPrefix(repo.orgId, repo.id); - const metadata = repoMetadataSchema.parse(repo.metadata); - if (metadata.branches) { - const branchGlobs = metadata.branches - const allBranches = await getBranches(repoPath); - const matchingBranches = - allBranches - .filter((branch) => micromatch.isMatch(branch, branchGlobs)) - .map((branch) => `refs/heads/${branch}`); - - revisions = [ - ...revisions, - ...matchingBranches - ]; - } - - if (metadata.tags) { - const tagGlobs = metadata.tags; - const allTags = await getTags(repoPath); - const matchingTags = - allTags - .filter((tag) => micromatch.isMatch(tag, tagGlobs)) - .map((tag) => `refs/tags/${tag}`); - - revisions = [ - ...revisions, - ...matchingTags - ]; - } - - // zoekt has a limit of 64 branches/tags to index. - if (revisions.length > 64) { - logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`); - captureEvent('backend_revisions_truncated', { - repoId: repo.id, - revisionCount: revisions.length, - }); - revisions = revisions.slice(0, 64); - } - const command = [ 'zoekt-git-index', '-allow_missing_branches', @@ -76,7 +30,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, signal? reject(error); return; } - + if (stdout) { stdout.split('\n').filter(line => line.trim()).forEach(line => { logger.info(line); @@ -89,7 +43,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, signal? logger.info(line); }); } - + resolve({ stdout, stderr diff --git a/packages/db/prisma/migrations/20251024214005_add_metadata_field_to_repo_job_table/migration.sql b/packages/db/prisma/migrations/20251024214005_add_metadata_field_to_repo_job_table/migration.sql new file mode 100644 index 00000000..3b05f19d --- /dev/null +++ b/packages/db/prisma/migrations/20251024214005_add_metadata_field_to_repo_job_table/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "RepoIndexingJob" ADD COLUMN "metadata" JSONB; diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index c2a7c32a..f04d293c 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -38,7 +38,7 @@ model Repo { isFork Boolean isArchived Boolean isPublic Boolean @default(false) - metadata Json /// For schema see repoMetadataSchema in packages/backend/src/types.ts + metadata Json /// For schema see repoMetadataSchema in packages/shared/src/types.ts cloneUrl String webUrl String? connections RepoToConnection[] @@ -84,6 +84,7 @@ model RepoIndexingJob { createdAt DateTime @default(now()) updatedAt DateTime @updatedAt completedAt DateTime? + metadata Json? /// For schema see repoIndexingJobMetadataSchema in packages/shared/src/types.ts errorMessage String? diff --git a/packages/shared/src/index.server.ts b/packages/shared/src/index.server.ts index 566e6718..bdaea067 100644 --- a/packages/shared/src/index.server.ts +++ b/packages/shared/src/index.server.ts @@ -9,6 +9,14 @@ export type { Plan, Entitlement, } from "./entitlements.js"; +export type { + RepoMetadata, + RepoIndexingJobMetadata, +} from "./types.js"; +export { + repoMetadataSchema, + repoIndexingJobMetadataSchema, +} from "./types.js"; export { base64Decode, loadConfig, diff --git a/packages/shared/src/types.ts b/packages/shared/src/types.ts index b6f6e159..a03b2e9d 100644 --- a/packages/shared/src/types.ts +++ b/packages/shared/src/types.ts @@ -1,3 +1,45 @@ import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type"; +import { z } from "zod"; -export type ConfigSettings = Required; \ No newline at end of file +export type ConfigSettings = Required; + +// Structure of the `metadata` field in the `Repo` table. +// +// @WARNING: If you modify this schema, please make sure it is backwards +// compatible with any prior versions of the schema!! +// @NOTE: If you move this schema, please update the comment in schema.prisma +// to point to the new location. +export const repoMetadataSchema = z.object({ + /** + * A set of key-value pairs that will be used as git config + * variables when cloning the repo. + * @see: https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--configcodecodeltkeygtltvaluegtcode + */ + gitConfig: z.record(z.string(), z.string()).optional(), + + /** + * A list of branches to index. Glob patterns are supported. + */ + branches: z.array(z.string()).optional(), + + /** + * A list of tags to index. Glob patterns are supported. + */ + tags: z.array(z.string()).optional(), + + /** + * A list of revisions that were indexed for the repo. + */ + indexedRevisions: z.array(z.string()).optional(), +}); + +export type RepoMetadata = z.infer; + +export const repoIndexingJobMetadataSchema = z.object({ + /** + * A list of revisions that were indexed for the repo. + */ + indexedRevisions: z.array(z.string()).optional(), +}); + +export type RepoIndexingJobMetadata = z.infer; diff --git a/packages/web/src/app/[domain]/repos/[id]/page.tsx b/packages/web/src/app/[domain]/repos/[id]/page.tsx index 0fb4a864..715afa5c 100644 --- a/packages/web/src/app/[domain]/repos/[id]/page.tsx +++ b/packages/web/src/app/[domain]/repos/[id]/page.tsx @@ -17,6 +17,8 @@ import { RepoJobsTable } from "../components/repoJobsTable" import { getConfigSettings } from "@sourcebot/shared" import { env } from "@/env.mjs" import { DisplayDate } from "../../components/DisplayDate" +import { RepoBranchesTable } from "../components/repoBranchesTable" +import { repoMetadataSchema } from "@sourcebot/shared" export default async function RepoDetailPage({ params }: { params: Promise<{ id: string }> }) { const { id } = await params @@ -47,6 +49,8 @@ export default async function RepoDetailPage({ params }: { params: Promise<{ id: return undefined; })(); + const repoMetadata = repoMetadataSchema.parse(repo.metadata); + return (
@@ -99,7 +103,7 @@ export default async function RepoDetailPage({ params }: { params: Promise<{ id: - + @@ -118,7 +122,7 @@ export default async function RepoDetailPage({ params }: { params: Promise<{ id: - {repo.indexedAt ? : "Never" } + {repo.indexedAt ? : "Never"} @@ -137,15 +141,35 @@ export default async function RepoDetailPage({ params }: { params: Promise<{ id: - {nextIndexAttempt ? : "-" } + {nextIndexAttempt ? : "-"}
+ {repoMetadata.indexedRevisions && ( + + +
+ Indexed Branches +
+ Branches that have been indexed for this repository. Docs +
+ + }> + + + +
+ )} + Indexing Jobs - History of all indexing and cleanup jobs for this repository + History of all indexing and cleanup jobs for this repository. }> diff --git a/packages/web/src/app/[domain]/repos/components/repoBranchesTable.tsx b/packages/web/src/app/[domain]/repos/components/repoBranchesTable.tsx new file mode 100644 index 00000000..bc186acd --- /dev/null +++ b/packages/web/src/app/[domain]/repos/components/repoBranchesTable.tsx @@ -0,0 +1,141 @@ +"use client" + +import * as React from "react" +import { + type ColumnDef, + type ColumnFiltersState, + type SortingState, + flexRender, + getCoreRowModel, + getFilteredRowModel, + getPaginationRowModel, + getSortedRowModel, + useReactTable, +} from "@tanstack/react-table" +import { Button } from "@/components/ui/button" +import { Input } from "@/components/ui/input" +import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table" +import { CodeHostType, getCodeHostBrowseAtBranchUrl } from "@/lib/utils" +import Link from "next/link" + +type RepoBranchesTableProps = { + indexRevisions: string[]; + repoWebUrl: string | null; + repoCodeHostType: string; +} + +export const RepoBranchesTable = ({ indexRevisions, repoWebUrl, repoCodeHostType }: RepoBranchesTableProps) => { + const [sorting, setSorting] = React.useState([]) + const [columnFilters, setColumnFilters] = React.useState([]) + + const columns = React.useMemo[]>(() => { + return [ + { + accessorKey: "refName", + header: "Revision", + cell: ({ row }) => { + const refName = row.original; + const shortRefName = refName.replace(/^refs\/(heads|tags)\//, ""); + + const branchUrl = getCodeHostBrowseAtBranchUrl({ + webUrl: repoWebUrl, + codeHostType: repoCodeHostType as CodeHostType, + branchName: refName, + }); + + return branchUrl ? ( + + {shortRefName} + + ) : ( + + {shortRefName} + + ) + }, + } + ] + }, [repoCodeHostType, repoWebUrl]); + + const table = useReactTable({ + data: indexRevisions, + columns, + getCoreRowModel: getCoreRowModel(), + getPaginationRowModel: getPaginationRowModel(), + getSortedRowModel: getSortedRowModel(), + getFilteredRowModel: getFilteredRowModel(), + onSortingChange: setSorting, + onColumnFiltersChange: setColumnFilters, + state: { + sorting, + columnFilters, + }, + initialState: { + pagination: { + pageSize: 5, + }, + }, + }) + + return ( +
+
+ table.getColumn("refName")?.setFilterValue(event.target.value)} + className="max-w-sm" + /> +
+ +
+ + + {table.getHeaderGroups().map((headerGroup) => ( + + {headerGroup.headers.map((header) => ( + + {header.isPlaceholder ? null : flexRender(header.column.columnDef.header, header.getContext())} + + ))} + + ))} + + + {table.getRowModel().rows?.length ? ( + table.getRowModel().rows.map((row) => ( + + {row.getVisibleCells().map((cell) => ( + {flexRender(cell.column.columnDef.cell, cell.getContext())} + ))} + + )) + ) : ( + + + No branches found. + + + )} + +
+
+ +
+ + +
+
+ ) +} diff --git a/packages/web/src/lib/utils.ts b/packages/web/src/lib/utils.ts index 270a291d..25093d64 100644 --- a/packages/web/src/lib/utils.ts +++ b/packages/web/src/lib/utils.ts @@ -352,6 +352,38 @@ export const getCodeHostCommitUrl = ({ } } +export const getCodeHostBrowseAtBranchUrl = ({ + webUrl, + codeHostType, + branchName, +}: { + webUrl?: string | null, + codeHostType: CodeHostType, + branchName: string, +}) => { + if (!webUrl) { + return undefined; + } + + switch (codeHostType) { + case 'github': + return `${webUrl}/tree/${branchName}`; + case 'gitlab': + return `${webUrl}/-/tree/${branchName}`; + case 'gitea': + return `${webUrl}/src/branch/${branchName}`; + case 'azuredevops': + return `${webUrl}?branch=${branchName}`; + case 'bitbucket-cloud': + return `${webUrl}?at=${branchName}`; + case 'bitbucket-server': + return `${webUrl}?at=${branchName}`; + case 'gerrit': + case 'generic-git-host': + return undefined; + } +} + export const isAuthSupportedForCodeHost = (codeHostType: CodeHostType): boolean => { switch (codeHostType) { case "github":