sourcebot/packages/web/src/features/search/searchApi.ts

386 lines
17 KiB
TypeScript
Raw Normal View History

'use server';
import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
import { isServiceError } from "../../lib/utils";
import { zoektFetch } from "./zoektClient";
import { ErrorCode } from "../../lib/errorCodes";
import { StatusCodes } from "http-status-codes";
import { zoektSearchResponseSchema } from "./zoektSchema";
V4 (#311) Sourcebot V4 introduces authentication, performance improvements and code navigation. Checkout the [migration guide](https://docs.sourcebot.dev/self-hosting/upgrade/v3-to-v4-guide) for information on upgrading your instance to v4. ### Changed - [**Breaking Change**] Authentication is now required by default. Notes: - When setting up your instance, email / password login will be the default authentication provider. - The first user that logs into the instance is given the `owner` role. ([docs](https://docs.sourcebot.dev/docs/more/roles-and-permissions)). - Subsequent users can request to join the instance. The `owner` can approve / deny requests to join the instance via `Settings` > `Members` > `Pending Requests`. - If a user is approved to join the instance, they are given the `member` role. - Additional login providers, including email links and SSO, can be configured with additional environment variables. ([docs](https://docs.sourcebot.dev/self-hosting/configuration/authentication)). - Clicking on a search result now takes you to the `/browse` view. Files can still be previewed by clicking the "Preview" button or holding `Cmd` / `Ctrl` when clicking on a search result. [#315](https://github.com/sourcebot-dev/sourcebot/pull/315) ### Added - [Sourcebot EE] Added search-based code navigation, allowing you to jump between symbol definition and references when viewing source files. [Read the documentation](https://docs.sourcebot.dev/docs/search/code-navigation). [#315](https://github.com/sourcebot-dev/sourcebot/pull/315) - Added collapsible filter panel. [#315](https://github.com/sourcebot-dev/sourcebot/pull/315) ### Fixed - Improved scroll performance for large numbers of search results. [#315](https://github.com/sourcebot-dev/sourcebot/pull/315)
2025-05-28 23:08:42 +00:00
import { SearchRequest, SearchResponse, SourceRange } from "./types";
import { PrismaClient, Repo } from "@sourcebot/db";
import { sew } from "@/actions";
import { base64Decode } from "@sourcebot/shared";
import { withOptionalAuthV2 } from "@/withAuthV2";
// List of supported query prefixes in zoekt.
// @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417
enum zoektPrefixes {
archived = "archived:",
branchShort = "b:",
branch = "branch:",
caseShort = "c:",
case = "case:",
content = "content:",
fileShort = "f:",
file = "file:",
fork = "fork:",
public = "public:",
repoShort = "r:",
repo = "repo:",
regex = "regex:",
lang = "lang:",
sym = "sym:",
typeShort = "t:",
type = "type:",
reposet = "reposet:",
}
const transformZoektQuery = async (query: string, orgId: number, prisma: PrismaClient): Promise<string | ServiceError> => {
const prevQueryParts = query.split(" ");
const newQueryParts = [];
for (const part of prevQueryParts) {
// Handle mapping `rev:` and `revision:` to `branch:`
if (part.match(/^-?(rev|revision):.+$/)) {
const isNegated = part.startsWith("-");
let revisionName = part.slice(part.indexOf(":") + 1);
// Special case: `*` -> search all revisions.
// In zoekt, providing a blank string will match all branches.
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/eval.go#L560-L562
if (revisionName === "*") {
revisionName = "";
}
newQueryParts.push(`${isNegated ? "-" : ""}${zoektPrefixes.branch}${revisionName}`);
}
// Expand `context:` into `reposet:` atom.
else if (part.match(/^-?context:.+$/)) {
const isNegated = part.startsWith("-");
const contextName = part.slice(part.indexOf(":") + 1);
const context = await prisma.searchContext.findUnique({
where: {
name_orgId: {
name: contextName,
orgId,
}
},
include: {
repos: true,
}
});
// If the context doesn't exist, return an error.
if (!context) {
return {
errorCode: ErrorCode.SEARCH_CONTEXT_NOT_FOUND,
message: `Search context "${contextName}" not found`,
statusCode: StatusCodes.NOT_FOUND,
} satisfies ServiceError;
}
const names = context.repos.map((repo) => repo.name);
newQueryParts.push(`${isNegated ? "-" : ""}${zoektPrefixes.reposet}${names.join(",")}`);
}
// no-op: add the original part to the new query parts.
else {
newQueryParts.push(part);
}
}
return newQueryParts.join(" ");
}
2025-05-07 23:21:05 +00:00
// Extracts a repository file URL from a zoekt template, branch, and file name.
const getFileWebUrl = (template: string, branch: string, fileName: string): string | undefined => {
2025-05-07 23:21:05 +00:00
// This is a hacky parser for templates generated by
// the go text/template package. Example template:
// {{URLJoinPath "https://github.com/sourcebot-dev/sourcebot" "blob" .Version .Path}}
if (!template.match(/^{{URLJoinPath\s.*}}(\?.+)?$/)) {
return undefined;
}
2025-05-07 23:21:05 +00:00
const url =
template.substring("{{URLJoinPath ".length, template.indexOf("}}"))
.split(" ")
.map((part) => {
// remove wrapping quotes
if (part.startsWith("\"")) part = part.substring(1);
if (part.endsWith("\"")) part = part.substring(0, part.length - 1);
// Replace variable references
if (part == ".Version") part = branch;
if (part == ".Path") part = fileName;
return part;
})
.join("/");
2025-05-07 23:21:05 +00:00
const optionalQueryParams =
template.substring(template.indexOf("}}") + 2)
.replace("{{.Version}}", branch)
.replace("{{.Path}}", fileName);
2025-05-07 23:21:05 +00:00
return encodeURI(url + optionalQueryParams);
}
export const search = async ({ query, matches, contextLines, whole }: SearchRequest) => sew(() =>
withOptionalAuthV2(async ({ org, prisma }) => {
const transformedQuery = await transformZoektQuery(query, org.id, prisma);
if (isServiceError(transformedQuery)) {
return transformedQuery;
}
query = transformedQuery;
const isBranchFilteringEnabled = (
query.includes(zoektPrefixes.branch) ||
query.includes(zoektPrefixes.branchShort)
);
// We only want to show matches for the default branch when
// the user isn't explicitly filtering by branch.
if (!isBranchFilteringEnabled) {
query = query.concat(` branch:HEAD`);
}
const body = JSON.stringify({
q: query,
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892
opts: {
ChunkMatches: true,
// @note: Zoekt has several different ways to limit a given search. The two that
// we care about are `MaxMatchDisplayCount` and `TotalMaxMatchCount`:
// - `MaxMatchDisplayCount` truncates the number of matches AFTER performing
// a search (specifically, after collating and sorting the results). The number of
// results returned by the API will be less than or equal to this value.
//
// - `TotalMaxMatchCount` truncates the number of matches DURING a search. The results
// returned by the API the API can be less than, equal to, or greater than this value.
// Why greater? Because this value is compared _after_ a given shard has finished
// being processed, the number of matches returned by the last shard may have exceeded
// this value.
//
// Let's define two variables:
// - `actualMatchCount` : The number of matches that are returned by the API. This is
// always less than or equal to `MaxMatchDisplayCount`.
// - `totalMatchCount` : The number of matches that zoekt found before it either
// 1) found all matches or 2) hit the `TotalMaxMatchCount` limit. This number is
// not bounded and can be less than, equal to, or greater than both `TotalMaxMatchCount`
// and `MaxMatchDisplayCount`.
//
//
// Our challenge is to determine whether or not the search returned all possible matches/
// (it was exaustive) or if it was truncated. By setting the `TotalMaxMatchCount` to
// `MaxMatchDisplayCount + 1`, we can determine which of these occurred by comparing
// `totalMatchCount` to `MaxMatchDisplayCount`.
//
// if (totalMatchCount ≤ actualMatchCount):
// Search is EXHAUSTIVE (found all possible matches)
// Proof: totalMatchCount ≤ MaxMatchDisplayCount < TotalMaxMatchCount
// Therefore Zoekt stopped naturally, not due to limit
//
// if (totalMatchCount > actualMatchCount):
// Search is TRUNCATED (more matches exist)
// Proof: totalMatchCount > MaxMatchDisplayCount + 1 = TotalMaxMatchCount
// Therefore Zoekt hit the limit and stopped searching
//
MaxMatchDisplayCount: matches,
TotalMaxMatchCount: matches + 1,
NumContextLines: contextLines,
Whole: !!whole,
ShardMaxMatchCount: -1,
MaxWallTime: 0, // zoekt expects a duration in nanoseconds
}
});
let header: Record<string, string> = {};
header = {
"X-Tenant-ID": org.id.toString()
};
const searchResponse = await zoektFetch({
path: "/api/search",
body,
header,
method: "POST",
});
if (!searchResponse.ok) {
return invalidZoektResponse(searchResponse);
}
const searchBody = await searchResponse.json();
const parser = zoektSearchResponseSchema.transform(async ({ Result }) => {
// @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field
// which corresponds to the `id` in the Repo table. In order to efficiently fetch repository
// metadata when transforming (potentially thousands) of file matches, we aggregate a unique
// set of repository ids* and map them to their corresponding Repo record.
//
// *Q: Why is `RepositoryID` optional? And why are we falling back to `Repository`?
// A: Prior to this change, the repository id was not plumbed into zoekt, so RepositoryID was
// always undefined. To make this a non-breaking change, we fallback to using the repository's name
// (`Repository`) as the identifier in these cases. This is not guaranteed to be unique, but in
// practice it is since the repository name includes the host and path (e.g., 'github.com/org/repo',
// 'gitea.com/org/repo', etc.).
//
// Note: When a repository is re-indexed (every hour) this ID will be populated.
// @see: https://github.com/sourcebot-dev/zoekt/pull/6
const repoIdentifiers = new Set(Result.Files?.map((file) => file.RepositoryID ?? file.Repository) ?? []);
const repos = new Map<string | number, Repo>();
(await prisma.repo.findMany({
where: {
id: {
in: Array.from(repoIdentifiers).filter((id) => typeof id === "number"),
},
orgId: org.id,
}
})).forEach(repo => repos.set(repo.id, repo));
(await prisma.repo.findMany({
where: {
name: {
in: Array.from(repoIdentifiers).filter((id) => typeof id === "string"),
},
orgId: org.id,
}
})).forEach(repo => repos.set(repo.name, repo));
const files = Result.Files?.map((file) => {
const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName);
const webUrl = (() => {
const template: string | undefined = Result.RepoURLs[file.Repository];
if (!template) {
V4 (#311) Sourcebot V4 introduces authentication, performance improvements and code navigation. Checkout the [migration guide](https://docs.sourcebot.dev/self-hosting/upgrade/v3-to-v4-guide) for information on upgrading your instance to v4. ### Changed - [**Breaking Change**] Authentication is now required by default. Notes: - When setting up your instance, email / password login will be the default authentication provider. - The first user that logs into the instance is given the `owner` role. ([docs](https://docs.sourcebot.dev/docs/more/roles-and-permissions)). - Subsequent users can request to join the instance. The `owner` can approve / deny requests to join the instance via `Settings` > `Members` > `Pending Requests`. - If a user is approved to join the instance, they are given the `member` role. - Additional login providers, including email links and SSO, can be configured with additional environment variables. ([docs](https://docs.sourcebot.dev/self-hosting/configuration/authentication)). - Clicking on a search result now takes you to the `/browse` view. Files can still be previewed by clicking the "Preview" button or holding `Cmd` / `Ctrl` when clicking on a search result. [#315](https://github.com/sourcebot-dev/sourcebot/pull/315) ### Added - [Sourcebot EE] Added search-based code navigation, allowing you to jump between symbol definition and references when viewing source files. [Read the documentation](https://docs.sourcebot.dev/docs/search/code-navigation). [#315](https://github.com/sourcebot-dev/sourcebot/pull/315) - Added collapsible filter panel. [#315](https://github.com/sourcebot-dev/sourcebot/pull/315) ### Fixed - Improved scroll performance for large numbers of search results. [#315](https://github.com/sourcebot-dev/sourcebot/pull/315)
2025-05-28 23:08:42 +00:00
return undefined;
}
// If there are multiple branches pointing to the same revision of this file, it doesn't
// matter which branch we use here, so use the first one.
const branch = file.Branches && file.Branches.length > 0 ? file.Branches[0] : "HEAD";
return getFileWebUrl(template, branch, file.FileName);
})();
const identifier = file.RepositoryID ?? file.Repository;
const repo = repos.get(identifier);
// This can happen if the user doesn't have access to the repository.
if (!repo) {
return undefined;
}
V4 (#311) Sourcebot V4 introduces authentication, performance improvements and code navigation. Checkout the [migration guide](https://docs.sourcebot.dev/self-hosting/upgrade/v3-to-v4-guide) for information on upgrading your instance to v4. ### Changed - [**Breaking Change**] Authentication is now required by default. Notes: - When setting up your instance, email / password login will be the default authentication provider. - The first user that logs into the instance is given the `owner` role. ([docs](https://docs.sourcebot.dev/docs/more/roles-and-permissions)). - Subsequent users can request to join the instance. The `owner` can approve / deny requests to join the instance via `Settings` > `Members` > `Pending Requests`. - If a user is approved to join the instance, they are given the `member` role. - Additional login providers, including email links and SSO, can be configured with additional environment variables. ([docs](https://docs.sourcebot.dev/self-hosting/configuration/authentication)). - Clicking on a search result now takes you to the `/browse` view. Files can still be previewed by clicking the "Preview" button or holding `Cmd` / `Ctrl` when clicking on a search result. [#315](https://github.com/sourcebot-dev/sourcebot/pull/315) ### Added - [Sourcebot EE] Added search-based code navigation, allowing you to jump between symbol definition and references when viewing source files. [Read the documentation](https://docs.sourcebot.dev/docs/search/code-navigation). [#315](https://github.com/sourcebot-dev/sourcebot/pull/315) - Added collapsible filter panel. [#315](https://github.com/sourcebot-dev/sourcebot/pull/315) ### Fixed - Improved scroll performance for large numbers of search results. [#315](https://github.com/sourcebot-dev/sourcebot/pull/315)
2025-05-28 23:08:42 +00:00
return {
fileName: {
text: file.FileName,
matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
})) : [],
},
repository: repo.name,
repositoryId: repo.id,
webUrl: webUrl,
language: file.Language,
chunks: file.ChunkMatches
.filter((chunk) => !chunk.FileName) // Filter out filename chunks.
.map((chunk) => {
return {
content: base64Decode(chunk.Content),
matchRanges: chunk.Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
}) satisfies SourceRange),
contentStart: {
byteOffset: chunk.ContentStart.ByteOffset,
column: chunk.ContentStart.Column,
lineNumber: chunk.ContentStart.LineNumber,
},
symbols: chunk.SymbolInfo?.map((symbol) => {
return {
symbol: symbol.Sym,
kind: symbol.Kind,
parent: symbol.Parent.length > 0 ? {
symbol: symbol.Parent,
kind: symbol.ParentKind,
} : undefined,
}
}) ?? undefined,
}
}),
branches: file.Branches,
content: file.Content ? base64Decode(file.Content) : undefined,
}
}).filter((file) => file !== undefined) ?? [];
const actualMatchCount = files.reduce(
(acc, file) =>
// Match count is the sum of the number of chunk matches and file name matches.
acc + file.chunks.reduce(
(acc, chunk) => acc + chunk.matchRanges.length,
0,
) + file.fileName.matchRanges.length,
0,
);
const totalMatchCount = Result.MatchCount;
const isSearchExhaustive = totalMatchCount <= actualMatchCount;
return {
files,
repositoryInfo: Array.from(repos.values()).map((repo) => ({
id: repo.id,
codeHostType: repo.external_codeHostType,
name: repo.name,
displayName: repo.displayName ?? undefined,
webUrl: repo.webUrl ?? undefined,
})),
isBranchFilteringEnabled,
isSearchExhaustive,
stats: {
actualMatchCount,
totalMatchCount,
duration: Result.Duration,
fileCount: Result.FileCount,
filesSkipped: Result.FilesSkipped,
contentBytesLoaded: Result.ContentBytesLoaded,
indexBytesLoaded: Result.IndexBytesLoaded,
crashes: Result.Crashes,
shardFilesConsidered: Result.ShardFilesConsidered,
filesConsidered: Result.FilesConsidered,
filesLoaded: Result.FilesLoaded,
shardsScanned: Result.ShardsScanned,
shardsSkipped: Result.ShardsSkipped,
shardsSkippedFilter: Result.ShardsSkippedFilter,
ngramMatches: Result.NgramMatches,
ngramLookups: Result.NgramLookups,
wait: Result.Wait,
matchTreeConstruction: Result.MatchTreeConstruction,
matchTreeSearch: Result.MatchTreeSearch,
regexpsConsidered: Result.RegexpsConsidered,
flushReason: Result.FlushReason,
}
} satisfies SearchResponse;
});
return parser.parseAsync(searchBody);
}));