From b36de3412d945da82a71028ed09f5f9ad95cf042 Mon Sep 17 00:00:00 2001 From: Brendan Kellam Date: Mon, 18 Aug 2025 15:24:40 -0400 Subject: [PATCH] experiment: Self-serve repository indexing for public GitHub repositories (#468) --- packages/web/src/actions.ts | 141 +++++++++++++++++- .../src/app/[domain]/repos/addRepoButton.tsx | 64 -------- .../web/src/app/[domain]/repos/columns.tsx | 8 +- .../repos/components/addRepositoryDialog.tsx | 128 ++++++++++++++++ packages/web/src/app/[domain]/repos/page.tsx | 5 +- .../app/[domain]/repos/repositoryTable.tsx | 65 +++++++- packages/web/src/components/ui/data-table.tsx | 20 +-- packages/web/src/env.mjs | 4 + .../components/chatThread/detailsCard.tsx | 2 +- packages/web/src/lib/utils.ts | 10 +- 10 files changed, 350 insertions(+), 97 deletions(-) delete mode 100644 packages/web/src/app/[domain]/repos/addRepoButton.tsx create mode 100644 packages/web/src/app/[domain]/repos/components/addRepositoryDialog.tsx diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index 117d2a2e..92c5db84 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -3,7 +3,7 @@ import { env } from "@/env.mjs"; import { ErrorCode } from "@/lib/errorCodes"; import { notAuthenticated, notFound, orgNotFound, secretAlreadyExists, ServiceError, ServiceErrorException, unexpectedError } from "@/lib/serviceError"; -import { CodeHostType, isServiceError } from "@/lib/utils"; +import { CodeHostType, isHttpError, isServiceError } from "@/lib/utils"; import { prisma } from "@/prisma"; import { render } from "@react-email/components"; import * as Sentry from '@sentry/nextjs'; @@ -22,6 +22,7 @@ import { StatusCodes } from "http-status-codes"; import { cookies, headers } from "next/headers"; import { createTransport } from "nodemailer"; import { auth } from "./auth"; +import { Octokit } from "octokit"; import { getConnection } from "./data/connection"; import { IS_BILLING_ENABLED } from "./ee/features/billing/stripe"; import InviteUserEmail from "./emails/inviteUserEmail"; @@ -790,6 +791,144 @@ export const createConnection = async (name: string, type: CodeHostType, connect }, OrgRole.OWNER) )); +export const experimental_addGithubRepositoryByUrl = async (repositoryUrl: string, domain: string): Promise<{ connectionId: number } | ServiceError> => sew(() => + withAuth((userId) => + withOrgMembership(userId, domain, async ({ org }) => { + if (env.EXPERIMENT_SELF_SERVE_REPO_INDEXING_ENABLED !== 'true') { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: "This feature is not enabled.", + } satisfies ServiceError; + } + + // Parse repository URL to extract owner/repo + const repoInfo = (() => { + const url = repositoryUrl.trim(); + + // Handle various GitHub URL formats + const patterns = [ + // https://github.com/owner/repo or https://github.com/owner/repo.git + /^https?:\/\/github\.com\/([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+?)(?:\.git)?\/?$/, + // github.com/owner/repo + /^github\.com\/([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+?)(?:\.git)?\/?$/, + // owner/repo + /^([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+)$/ + ]; + + for (const pattern of patterns) { + const match = url.match(pattern); + if (match) { + return { + owner: match[1], + repo: match[2] + }; + } + } + + return null; + })(); + + if (!repoInfo) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: "Invalid repository URL format. Please use 'owner/repo' or 'https://github.com/owner/repo' format.", + } satisfies ServiceError; + } + + const { owner, repo } = repoInfo; + + // Use GitHub API to fetch repository information and get the external_id + const octokit = new Octokit({ + auth: env.EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN + }); + + let githubRepo; + try { + const response = await octokit.rest.repos.get({ + owner, + repo, + }); + githubRepo = response.data; + } catch (error) { + if (isHttpError(error, 404)) { + return { + statusCode: StatusCodes.NOT_FOUND, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `Repository '${owner}/${repo}' not found or is private. Only public repositories can be added.`, + } satisfies ServiceError; + } + + if (isHttpError(error, 403)) { + return { + statusCode: StatusCodes.FORBIDDEN, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `Access to repository '${owner}/${repo}' is forbidden. Only public repositories can be added.`, + } satisfies ServiceError; + } + + return { + statusCode: StatusCodes.INTERNAL_SERVER_ERROR, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `Failed to fetch repository information: ${error instanceof Error ? error.message : 'Unknown error'}`, + } satisfies ServiceError; + } + + if (githubRepo.private) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: "Only public repositories can be added.", + } satisfies ServiceError; + } + + // Check if this repository is already connected using the external_id + const existingRepo = await prisma.repo.findFirst({ + where: { + orgId: org.id, + external_id: githubRepo.id.toString(), + external_codeHostType: 'github', + external_codeHostUrl: 'https://github.com', + } + }); + + if (existingRepo) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.CONNECTION_ALREADY_EXISTS, + message: "This repository already exists.", + } satisfies ServiceError; + } + + const connectionName = `${owner}-${repo}-${Date.now()}`; + + // Create GitHub connection config + const connectionConfig: GithubConnectionConfig = { + type: "github" as const, + repos: [`${owner}/${repo}`], + ...(env.EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN ? { + token: { + env: 'EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN' + } + } : {}) + }; + + const connection = await prisma.connection.create({ + data: { + orgId: org.id, + name: connectionName, + config: connectionConfig as unknown as Prisma.InputJsonValue, + connectionType: 'github', + } + }); + + return { + connectionId: connection.id, + } + }, OrgRole.GUEST), /* allowAnonymousAccess = */ true + )); + export const updateConnectionDisplayName = async (connectionId: number, name: string, domain: string): Promise<{ success: boolean } | ServiceError> => sew(() => withAuth((userId) => withOrgMembership(userId, domain, async ({ org }) => { diff --git a/packages/web/src/app/[domain]/repos/addRepoButton.tsx b/packages/web/src/app/[domain]/repos/addRepoButton.tsx deleted file mode 100644 index 739f4703..00000000 --- a/packages/web/src/app/[domain]/repos/addRepoButton.tsx +++ /dev/null @@ -1,64 +0,0 @@ -"use client" - -import { Button } from "@/components/ui/button" -import { PlusCircle } from "lucide-react" -import { - Dialog, - DialogContent, - DialogHeader, - DialogTitle, - DialogDescription, - DialogClose, - DialogFooter, -} from "@/components/ui/dialog" -import { useState } from "react" -import { ConnectionList } from "../connections/components/connectionList" -import { useDomain } from "@/hooks/useDomain" -import Link from "next/link"; -import { useSession } from "next-auth/react" - -export function AddRepoButton() { - const [isOpen, setIsOpen] = useState(false) - const domain = useDomain() - const { data: session } = useSession(); - - return ( - <> - {session?.user && ( - <> - - - - - - Add a New Repository - - Repositories are added to Sourcebot using connections. To add a new repo, add it to an existing connection or create a new one. - - -
- -
- - - - - - -
-
- - ) - } - - ) -} \ No newline at end of file diff --git a/packages/web/src/app/[domain]/repos/columns.tsx b/packages/web/src/app/[domain]/repos/columns.tsx index 4d985cba..e27a8816 100644 --- a/packages/web/src/app/[domain]/repos/columns.tsx +++ b/packages/web/src/app/[domain]/repos/columns.tsx @@ -9,7 +9,6 @@ import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/comp import { cn, getRepoImageSrc } from "@/lib/utils" import { RepoIndexingStatus } from "@sourcebot/db"; import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, DropdownMenuTrigger } from "@/components/ui/dropdown-menu" -import { AddRepoButton } from "./addRepoButton" export type RepositoryColumnInfo = { repoId: number @@ -97,12 +96,7 @@ const StatusIndicator = ({ status }: { status: RepoIndexingStatus }) => { export const columns = (domain: string): ColumnDef[] => [ { accessorKey: "name", - header: () => ( -
- Repository - -
- ), + header: 'Repository', cell: ({ row }) => { const repo = row.original const url = repo.url diff --git a/packages/web/src/app/[domain]/repos/components/addRepositoryDialog.tsx b/packages/web/src/app/[domain]/repos/components/addRepositoryDialog.tsx new file mode 100644 index 00000000..79bfc1c8 --- /dev/null +++ b/packages/web/src/app/[domain]/repos/components/addRepositoryDialog.tsx @@ -0,0 +1,128 @@ +'use client'; + +import { Button } from "@/components/ui/button"; +import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from "@/components/ui/dialog"; +import { Form, FormControl, FormField, FormItem, FormLabel, FormMessage } from "@/components/ui/form"; +import { Input } from "@/components/ui/input"; +import { zodResolver } from "@hookform/resolvers/zod"; +import { useForm } from "react-hook-form"; +import { z } from "zod"; +import { experimental_addGithubRepositoryByUrl } from "@/actions"; +import { useDomain } from "@/hooks/useDomain"; +import { isServiceError } from "@/lib/utils"; +import { useToast } from "@/components/hooks/use-toast"; +import { useRouter } from "next/navigation"; + +interface AddRepositoryDialogProps { + isOpen: boolean; + onOpenChange: (open: boolean) => void; +} + +// Validation schema for repository URLs +const formSchema = z.object({ + repositoryUrl: z.string() + .min(1, "Repository URL is required") + .refine((url) => { + // Allow various GitHub URL formats: + // - https://github.com/owner/repo + // - github.com/owner/repo + // - owner/repo + const patterns = [ + /^https?:\/\/github\.com\/[a-zA-Z0-9_.-]+\/[a-zA-Z0-9_.-]+\/?$/, + /^github\.com\/[a-zA-Z0-9_.-]+\/[a-zA-Z0-9_.-]+\/?$/, + /^[a-zA-Z0-9_.-]+\/[a-zA-Z0-9_.-]+$/ + ]; + return patterns.some(pattern => pattern.test(url.trim())); + }, "Please enter a valid GitHub repository URL (e.g., owner/repo or https://github.com/owner/repo)"), +}); + +export const AddRepositoryDialog = ({ isOpen, onOpenChange }: AddRepositoryDialogProps) => { + const domain = useDomain(); + const { toast } = useToast(); + const router = useRouter(); + + const form = useForm>({ + resolver: zodResolver(formSchema), + defaultValues: { + repositoryUrl: "", + }, + }); + + const { isSubmitting } = form.formState; + + const onSubmit = async (data: z.infer) => { + + const result = await experimental_addGithubRepositoryByUrl(data.repositoryUrl.trim(), domain); + if (isServiceError(result)) { + toast({ + title: "Error adding repository", + description: result.message, + variant: "destructive", + }); + } else { + toast({ + title: "Repository added successfully!", + description: "It will be indexed shortly.", + }); + form.reset(); + onOpenChange(false); + router.refresh(); + } + }; + + const handleCancel = () => { + form.reset(); + onOpenChange(false); + }; + + return ( + + + + Add a public repository from GitHub + + Paste the repo URL - the code will be indexed and available in search. + + + +
+ + ( + + Repository URL + + + + + + )} + /> + + + + + + + +
+
+ ); +}; diff --git a/packages/web/src/app/[domain]/repos/page.tsx b/packages/web/src/app/[domain]/repos/page.tsx index f0ffa1e8..f456afa0 100644 --- a/packages/web/src/app/[domain]/repos/page.tsx +++ b/packages/web/src/app/[domain]/repos/page.tsx @@ -2,6 +2,7 @@ import { RepositoryTable } from "./repositoryTable"; import { getOrgFromDomain } from "@/data/org"; import { PageNotFound } from "../components/pageNotFound"; import { Header } from "../components/header"; +import { env } from "@/env.mjs"; export default async function ReposPage({ params: { domain } }: { params: { domain: string } }) { const org = await getOrgFromDomain(domain); @@ -16,7 +17,9 @@ export default async function ReposPage({ params: { domain } }: { params: { doma
- +
diff --git a/packages/web/src/app/[domain]/repos/repositoryTable.tsx b/packages/web/src/app/[domain]/repos/repositoryTable.tsx index 056c0843..9ccf7298 100644 --- a/packages/web/src/app/[domain]/repos/repositoryTable.tsx +++ b/packages/web/src/app/[domain]/repos/repositoryTable.tsx @@ -10,9 +10,20 @@ import { RepoIndexingStatus } from "@sourcebot/db"; import { useMemo } from "react"; import { Skeleton } from "@/components/ui/skeleton"; import { env } from "@/env.mjs"; +import { Button } from "@/components/ui/button"; +import { PlusIcon } from "lucide-react"; +import { AddRepositoryDialog } from "./components/addRepositoryDialog"; +import { useState } from "react"; -export const RepositoryTable = () => { +interface RepositoryTableProps { + isAddReposButtonVisible: boolean +} + +export const RepositoryTable = ({ + isAddReposButtonVisible, +}: RepositoryTableProps) => { const domain = useDomain(); + const [isAddDialogOpen, setIsAddDialogOpen] = useState(false); const { data: repos, isLoading: reposLoading, error: reposError } = useQuery({ queryKey: ['repos', domain], @@ -44,6 +55,29 @@ export const RepositoryTable = () => { lastIndexed: repo.indexedAt?.toISOString() ?? "", url: repo.webUrl ?? repo.repoCloneUrl, })).sort((a, b) => { + const getPriorityFromStatus = (status: RepoIndexingStatus) => { + switch (status) { + case RepoIndexingStatus.IN_INDEX_QUEUE: + case RepoIndexingStatus.INDEXING: + return 0 // Highest priority - currently indexing + case RepoIndexingStatus.FAILED: + return 1 // Second priority - failed repos need attention + case RepoIndexingStatus.INDEXED: + return 2 // Third priority - successfully indexed + default: + return 3 // Lowest priority - other statuses (NEW, etc.) + } + } + + // Sort by priority first + const aPriority = getPriorityFromStatus(a.repoIndexingStatus); + const bPriority = getPriorityFromStatus(b.repoIndexingStatus); + + if (aPriority !== bPriority) { + return aPriority - bPriority; // Lower priority number = higher precedence + } + + // If same priority, sort by last indexed date (most recent first) return new Date(b.lastIndexed).getTime() - new Date(a.lastIndexed).getTime(); }); }, [repos, reposLoading]); @@ -83,11 +117,28 @@ export const RepositoryTable = () => { } return ( - + <> + setIsAddDialogOpen(true)} + > + + Add repository + + )} + /> + + + ); } \ No newline at end of file diff --git a/packages/web/src/components/ui/data-table.tsx b/packages/web/src/components/ui/data-table.tsx index ac02210a..ce99592c 100644 --- a/packages/web/src/components/ui/data-table.tsx +++ b/packages/web/src/components/ui/data-table.tsx @@ -22,14 +22,13 @@ import { import { Button } from "@/components/ui/button" import { Input } from "@/components/ui/input" import * as React from "react" -import { PlusIcon } from "lucide-react" -import { env } from "@/env.mjs" interface DataTableProps { columns: ColumnDef[] data: TData[] searchKey: string - searchPlaceholder?: string + searchPlaceholder?: string, + headerActions?: React.ReactNode, } export function DataTable({ @@ -37,6 +36,8 @@ export function DataTable({ data, searchKey, searchPlaceholder, + headerActions, + }: DataTableProps) { const [sorting, setSorting] = React.useState([]) const [columnFilters, setColumnFilters] = React.useState( @@ -75,18 +76,7 @@ export function DataTable({ Show a button on the demo site that allows users to add new repositories by updating the demo-site-config.json file and opening a PR. */} - {env.NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT === "demo" && ( - - )} + {headerActions}
diff --git a/packages/web/src/env.mjs b/packages/web/src/env.mjs index 6dcfaef0..808395a7 100644 --- a/packages/web/src/env.mjs +++ b/packages/web/src/env.mjs @@ -131,6 +131,10 @@ export const env = createEnv({ LANGFUSE_SECRET_KEY: z.string().optional(), SOURCEBOT_DEMO_EXAMPLES_PATH: z.string().optional(), + + EXPERIMENT_SELF_SERVE_REPO_INDEXING_ENABLED: booleanSchema.default('false'), + // @NOTE: Take care to update actions.ts when changing the name of this. + EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN: z.string().optional(), }, // @NOTE: Please make sure of the following: // - Make sure you destructure all client variables in diff --git a/packages/web/src/features/chat/components/chatThread/detailsCard.tsx b/packages/web/src/features/chat/components/chatThread/detailsCard.tsx index d7b9966a..0fe18a64 100644 --- a/packages/web/src/features/chat/components/chatThread/detailsCard.tsx +++ b/packages/web/src/features/chat/components/chatThread/detailsCard.tsx @@ -6,7 +6,7 @@ import { Separator } from '@/components/ui/separator'; import { Skeleton } from '@/components/ui/skeleton'; import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; import { cn } from '@/lib/utils'; -import { Brain, ChevronDown, ChevronRight, Clock, Cpu, InfoIcon, Loader2, List, ScanSearchIcon, Zap } from 'lucide-react'; +import { Brain, ChevronDown, ChevronRight, Clock, InfoIcon, Loader2, List, ScanSearchIcon, Zap } from 'lucide-react'; import { MarkdownRenderer } from './markdownRenderer'; import { FindSymbolDefinitionsToolComponent } from './tools/findSymbolDefinitionsToolComponent'; import { FindSymbolReferencesToolComponent } from './tools/findSymbolReferencesToolComponent'; diff --git a/packages/web/src/lib/utils.ts b/packages/web/src/lib/utils.ts index 973a5b5d..12c486cf 100644 --- a/packages/web/src/lib/utils.ts +++ b/packages/web/src/lib/utils.ts @@ -460,4 +460,12 @@ export const getOrgMetadata = (org: Org): OrgMetadata | null => { return currentMetadata.success ? currentMetadata.data : null; } -export const IS_MAC = typeof navigator !== 'undefined' && /Mac OS X/.test(navigator.userAgent); \ No newline at end of file +export const IS_MAC = typeof navigator !== 'undefined' && /Mac OS X/.test(navigator.userAgent); + + +export const isHttpError = (error: unknown, status: number): boolean => { + return error !== null + && typeof error === 'object' + && 'status' in error + && error.status === status; +} \ No newline at end of file