sourcebot/packages/web/src/features/search/searchApi.ts

270 lines
10 KiB
TypeScript
Raw Normal View History

import { env } from "@/env.mjs";
import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
import { isServiceError } from "../../lib/utils";
import { zoektFetch } from "./zoektClient";
import { prisma } from "@/prisma";
import { ErrorCode } from "../../lib/errorCodes";
import { StatusCodes } from "http-status-codes";
import { zoektSearchResponseSchema } from "./zoektSchema";
import { SearchRequest, SearchResponse, SearchResultRange } from "./types";
2025-05-07 23:21:05 +00:00
import assert from "assert";
// List of supported query prefixes in zoekt.
// @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417
enum zoektPrefixes {
archived = "archived:",
branchShort = "b:",
branch = "branch:",
caseShort = "c:",
case = "case:",
content = "content:",
fileShort = "f:",
file = "file:",
fork = "fork:",
public = "public:",
repoShort = "r:",
repo = "repo:",
regex = "regex:",
lang = "lang:",
sym = "sym:",
typeShort = "t:",
type = "type:",
reposet = "reposet:",
}
const transformZoektQuery = async (query: string, orgId: number): Promise<string | ServiceError> => {
const prevQueryParts = query.split(" ");
const newQueryParts = [];
for (const part of prevQueryParts) {
// Handle mapping `rev:` and `revision:` to `branch:`
if (part.match(/^-?(rev|revision):.+$/)) {
const isNegated = part.startsWith("-");
let revisionName = part.slice(part.indexOf(":") + 1);
// Special case: `*` -> search all revisions.
// In zoekt, providing a blank string will match all branches.
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/eval.go#L560-L562
if (revisionName === "*") {
revisionName = "";
}
newQueryParts.push(`${isNegated ? "-" : ""}${zoektPrefixes.branch}${revisionName}`);
}
// Expand `context:` into `reposet:` atom.
else if (part.match(/^-?context:.+$/)) {
const isNegated = part.startsWith("-");
const contextName = part.slice(part.indexOf(":") + 1);
const context = await prisma.searchContext.findUnique({
where: {
name_orgId: {
name: contextName,
orgId,
}
},
include: {
repos: true,
}
});
// If the context doesn't exist, return an error.
if (!context) {
return {
errorCode: ErrorCode.SEARCH_CONTEXT_NOT_FOUND,
message: `Search context "${contextName}" not found`,
statusCode: StatusCodes.NOT_FOUND,
} satisfies ServiceError;
}
const names = context.repos.map((repo) => repo.name);
newQueryParts.push(`${isNegated ? "-" : ""}${zoektPrefixes.reposet}${names.join(",")}`);
}
// no-op: add the original part to the new query parts.
else {
newQueryParts.push(part);
}
}
return newQueryParts.join(" ");
}
2025-05-07 23:21:05 +00:00
// Extracts a repository file URL from a zoekt template, branch, and file name.
function getRepositoryUrl(template: string, branch: string, fileName: string): string {
// This is a hacky parser for templates generated by
// the go text/template package. Example template:
// {{URLJoinPath "https://github.com/sourcebot-dev/sourcebot" "blob" .Version .Path}}
// The template should always match this regex, so let's assert that.
assert(template.match(/^{{URLJoinPath\s.*}}(\?.+)?$/), "Invalid template");
const url =
template.substring("{{URLJoinPath ".length, template.indexOf("}}"))
.replace(".Version", branch)
.replace(".Path", fileName)
.split(" ")
.map((part) => {
// remove wrapping quotes
if (part.startsWith("\"")) part = part.substring(1);
if (part.endsWith("\"")) part = part.substring(0, part.length - 1);
return part;
})
.join("/");
const optionalQueryParams =
template.substring(template.indexOf("}}") + 2)
.replace("{{.Version}}", branch)
.replace("{{.Path}}", fileName);
return encodeURI(url + optionalQueryParams);
}
export const search = async ({ query, matches, contextLines, whole }: SearchRequest, orgId: number) => {
const transformedQuery = await transformZoektQuery(query, orgId);
if (isServiceError(transformedQuery)) {
return transformedQuery;
}
query = transformedQuery;
const isBranchFilteringEnabled = (
query.includes(zoektPrefixes.branch) ||
query.includes(zoektPrefixes.branchShort)
);
// We only want to show matches for the default branch when
// the user isn't explicitly filtering by branch.
if (!isBranchFilteringEnabled) {
query = query.concat(` branch:HEAD`);
}
const body = JSON.stringify({
q: query,
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892
opts: {
ChunkMatches: true,
MaxMatchDisplayCount: matches,
NumContextLines: contextLines,
Whole: !!whole,
TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT,
ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT,
MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds
}
});
let header: Record<string, string> = {};
header = {
"X-Tenant-ID": orgId.toString()
};
const searchResponse = await zoektFetch({
path: "/api/search",
body,
header,
method: "POST",
});
if (!searchResponse.ok) {
return invalidZoektResponse(searchResponse);
}
const searchBody = await searchResponse.json();
const parser = zoektSearchResponseSchema.transform(({ Result }) => ({
zoektStats: {
duration: Result.Duration,
fileCount: Result.FileCount,
matchCount: Result.MatchCount,
filesSkipped: Result.FilesSkipped,
contentBytesLoaded: Result.ContentBytesLoaded,
indexBytesLoaded: Result.IndexBytesLoaded,
crashes: Result.Crashes,
shardFilesConsidered: Result.ShardFilesConsidered,
filesConsidered: Result.FilesConsidered,
filesLoaded: Result.FilesLoaded,
shardsScanned: Result.ShardsScanned,
shardsSkipped: Result.ShardsSkipped,
shardsSkippedFilter: Result.ShardsSkippedFilter,
ngramMatches: Result.NgramMatches,
ngramLookups: Result.NgramLookups,
wait: Result.Wait,
matchTreeConstruction: Result.MatchTreeConstruction,
matchTreeSearch: Result.MatchTreeSearch,
regexpsConsidered: Result.RegexpsConsidered,
flushReason: Result.FlushReason,
},
files: Result.Files?.map((file) => {
const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName);
2025-05-07 23:21:05 +00:00
const template = Result.RepoURLs[file.Repository];
assert(template, `Template not found for repository ${file.Repository}`);
// If there are multiple branches pointing to the same revision of this file, it doesn't
// matter which branch we use here, so use the first one.
const branch = file.Branches && file.Branches.length > 0 ? file.Branches[0] : "HEAD";
const url = getRepositoryUrl(template, branch, file.FileName);
return {
fileName: {
text: file.FileName,
matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
})) : [],
},
repository: file.Repository,
2025-05-07 23:21:05 +00:00
url: url,
language: file.Language,
chunks: file.ChunkMatches
.filter((chunk) => !chunk.FileName) // Filter out filename chunks.
.map((chunk) => {
return {
content: chunk.Content,
matchRanges: chunk.Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
}) satisfies SearchResultRange),
contentStart: {
byteOffset: chunk.ContentStart.ByteOffset,
column: chunk.ContentStart.Column,
lineNumber: chunk.ContentStart.LineNumber,
},
symbols: chunk.SymbolInfo?.map((symbol) => {
return {
symbol: symbol.Sym,
kind: symbol.Kind,
parent: symbol.Parent.length > 0 ? {
symbol: symbol.Parent,
kind: symbol.ParentKind,
} : undefined,
}
}) ?? undefined,
}
}),
branches: file.Branches,
content: file.Content,
}
2025-05-07 23:21:05 +00:00
}) ?? [],
isBranchFilteringEnabled: isBranchFilteringEnabled,
} satisfies SearchResponse));
return parser.parse(searchBody);
}