2025-05-03 18:33:58 +00:00
|
|
|
import { env } from "@/env.mjs";
|
|
|
|
|
import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
|
|
|
|
|
import { isServiceError } from "../../lib/utils";
|
|
|
|
|
import { zoektFetch } from "./zoektClient";
|
|
|
|
|
import { prisma } from "@/prisma";
|
|
|
|
|
import { ErrorCode } from "../../lib/errorCodes";
|
|
|
|
|
import { StatusCodes } from "http-status-codes";
|
|
|
|
|
import { zoektSearchResponseSchema } from "./zoektSchema";
|
|
|
|
|
import { SearchRequest, SearchResponse, SearchResultRange } from "./types";
|
2025-05-07 23:21:05 +00:00
|
|
|
import assert from "assert";
|
2025-05-03 18:33:58 +00:00
|
|
|
|
|
|
|
|
// List of supported query prefixes in zoekt.
|
|
|
|
|
// @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417
|
|
|
|
|
enum zoektPrefixes {
|
|
|
|
|
archived = "archived:",
|
|
|
|
|
branchShort = "b:",
|
|
|
|
|
branch = "branch:",
|
|
|
|
|
caseShort = "c:",
|
|
|
|
|
case = "case:",
|
|
|
|
|
content = "content:",
|
|
|
|
|
fileShort = "f:",
|
|
|
|
|
file = "file:",
|
|
|
|
|
fork = "fork:",
|
|
|
|
|
public = "public:",
|
|
|
|
|
repoShort = "r:",
|
|
|
|
|
repo = "repo:",
|
|
|
|
|
regex = "regex:",
|
|
|
|
|
lang = "lang:",
|
|
|
|
|
sym = "sym:",
|
|
|
|
|
typeShort = "t:",
|
|
|
|
|
type = "type:",
|
|
|
|
|
reposet = "reposet:",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const transformZoektQuery = async (query: string, orgId: number): Promise<string | ServiceError> => {
|
|
|
|
|
const prevQueryParts = query.split(" ");
|
|
|
|
|
const newQueryParts = [];
|
|
|
|
|
|
|
|
|
|
for (const part of prevQueryParts) {
|
|
|
|
|
|
|
|
|
|
// Handle mapping `rev:` and `revision:` to `branch:`
|
|
|
|
|
if (part.match(/^-?(rev|revision):.+$/)) {
|
|
|
|
|
const isNegated = part.startsWith("-");
|
|
|
|
|
let revisionName = part.slice(part.indexOf(":") + 1);
|
|
|
|
|
|
|
|
|
|
// Special case: `*` -> search all revisions.
|
|
|
|
|
// In zoekt, providing a blank string will match all branches.
|
|
|
|
|
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/eval.go#L560-L562
|
|
|
|
|
if (revisionName === "*") {
|
|
|
|
|
revisionName = "";
|
|
|
|
|
}
|
|
|
|
|
newQueryParts.push(`${isNegated ? "-" : ""}${zoektPrefixes.branch}${revisionName}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Expand `context:` into `reposet:` atom.
|
|
|
|
|
else if (part.match(/^-?context:.+$/)) {
|
|
|
|
|
const isNegated = part.startsWith("-");
|
|
|
|
|
const contextName = part.slice(part.indexOf(":") + 1);
|
|
|
|
|
|
|
|
|
|
const context = await prisma.searchContext.findUnique({
|
|
|
|
|
where: {
|
|
|
|
|
name_orgId: {
|
|
|
|
|
name: contextName,
|
|
|
|
|
orgId,
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
include: {
|
|
|
|
|
repos: true,
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// If the context doesn't exist, return an error.
|
|
|
|
|
if (!context) {
|
|
|
|
|
return {
|
|
|
|
|
errorCode: ErrorCode.SEARCH_CONTEXT_NOT_FOUND,
|
|
|
|
|
message: `Search context "${contextName}" not found`,
|
|
|
|
|
statusCode: StatusCodes.NOT_FOUND,
|
|
|
|
|
} satisfies ServiceError;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const names = context.repos.map((repo) => repo.name);
|
|
|
|
|
newQueryParts.push(`${isNegated ? "-" : ""}${zoektPrefixes.reposet}${names.join(",")}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// no-op: add the original part to the new query parts.
|
|
|
|
|
else {
|
|
|
|
|
newQueryParts.push(part);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return newQueryParts.join(" ");
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-07 23:21:05 +00:00
|
|
|
// Extracts a repository file URL from a zoekt template, branch, and file name.
|
|
|
|
|
function getRepositoryUrl(template: string, branch: string, fileName: string): string {
|
|
|
|
|
// This is a hacky parser for templates generated by
|
|
|
|
|
// the go text/template package. Example template:
|
|
|
|
|
// {{URLJoinPath "https://github.com/sourcebot-dev/sourcebot" "blob" .Version .Path}}
|
|
|
|
|
|
|
|
|
|
// The template should always match this regex, so let's assert that.
|
|
|
|
|
assert(template.match(/^{{URLJoinPath\s.*}}(\?.+)?$/), "Invalid template");
|
|
|
|
|
|
|
|
|
|
const url =
|
|
|
|
|
template.substring("{{URLJoinPath ".length, template.indexOf("}}"))
|
|
|
|
|
.replace(".Version", branch)
|
|
|
|
|
.replace(".Path", fileName)
|
|
|
|
|
.split(" ")
|
|
|
|
|
.map((part) => {
|
|
|
|
|
// remove wrapping quotes
|
|
|
|
|
if (part.startsWith("\"")) part = part.substring(1);
|
|
|
|
|
if (part.endsWith("\"")) part = part.substring(0, part.length - 1);
|
|
|
|
|
return part;
|
|
|
|
|
})
|
|
|
|
|
.join("/");
|
|
|
|
|
|
|
|
|
|
const optionalQueryParams =
|
|
|
|
|
template.substring(template.indexOf("}}") + 2)
|
|
|
|
|
.replace("{{.Version}}", branch)
|
|
|
|
|
.replace("{{.Path}}", fileName);
|
|
|
|
|
|
|
|
|
|
return encodeURI(url + optionalQueryParams);
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-03 18:33:58 +00:00
|
|
|
export const search = async ({ query, matches, contextLines, whole }: SearchRequest, orgId: number) => {
|
|
|
|
|
const transformedQuery = await transformZoektQuery(query, orgId);
|
|
|
|
|
if (isServiceError(transformedQuery)) {
|
|
|
|
|
return transformedQuery;
|
|
|
|
|
}
|
|
|
|
|
query = transformedQuery;
|
|
|
|
|
|
|
|
|
|
const isBranchFilteringEnabled = (
|
|
|
|
|
query.includes(zoektPrefixes.branch) ||
|
|
|
|
|
query.includes(zoektPrefixes.branchShort)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// We only want to show matches for the default branch when
|
|
|
|
|
// the user isn't explicitly filtering by branch.
|
|
|
|
|
if (!isBranchFilteringEnabled) {
|
|
|
|
|
query = query.concat(` branch:HEAD`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const body = JSON.stringify({
|
|
|
|
|
q: query,
|
|
|
|
|
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892
|
|
|
|
|
opts: {
|
|
|
|
|
ChunkMatches: true,
|
|
|
|
|
MaxMatchDisplayCount: matches,
|
|
|
|
|
NumContextLines: contextLines,
|
|
|
|
|
Whole: !!whole,
|
|
|
|
|
TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT,
|
|
|
|
|
ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT,
|
|
|
|
|
MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let header: Record<string, string> = {};
|
|
|
|
|
header = {
|
|
|
|
|
"X-Tenant-ID": orgId.toString()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const searchResponse = await zoektFetch({
|
|
|
|
|
path: "/api/search",
|
|
|
|
|
body,
|
|
|
|
|
header,
|
|
|
|
|
method: "POST",
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (!searchResponse.ok) {
|
|
|
|
|
return invalidZoektResponse(searchResponse);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const searchBody = await searchResponse.json();
|
|
|
|
|
|
|
|
|
|
const parser = zoektSearchResponseSchema.transform(({ Result }) => ({
|
|
|
|
|
zoektStats: {
|
|
|
|
|
duration: Result.Duration,
|
|
|
|
|
fileCount: Result.FileCount,
|
|
|
|
|
matchCount: Result.MatchCount,
|
|
|
|
|
filesSkipped: Result.FilesSkipped,
|
|
|
|
|
contentBytesLoaded: Result.ContentBytesLoaded,
|
|
|
|
|
indexBytesLoaded: Result.IndexBytesLoaded,
|
|
|
|
|
crashes: Result.Crashes,
|
|
|
|
|
shardFilesConsidered: Result.ShardFilesConsidered,
|
|
|
|
|
filesConsidered: Result.FilesConsidered,
|
|
|
|
|
filesLoaded: Result.FilesLoaded,
|
|
|
|
|
shardsScanned: Result.ShardsScanned,
|
|
|
|
|
shardsSkipped: Result.ShardsSkipped,
|
|
|
|
|
shardsSkippedFilter: Result.ShardsSkippedFilter,
|
|
|
|
|
ngramMatches: Result.NgramMatches,
|
|
|
|
|
ngramLookups: Result.NgramLookups,
|
|
|
|
|
wait: Result.Wait,
|
|
|
|
|
matchTreeConstruction: Result.MatchTreeConstruction,
|
|
|
|
|
matchTreeSearch: Result.MatchTreeSearch,
|
|
|
|
|
regexpsConsidered: Result.RegexpsConsidered,
|
|
|
|
|
flushReason: Result.FlushReason,
|
|
|
|
|
},
|
|
|
|
|
files: Result.Files?.map((file) => {
|
|
|
|
|
const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName);
|
2025-05-07 23:21:05 +00:00
|
|
|
|
|
|
|
|
const template = Result.RepoURLs[file.Repository];
|
|
|
|
|
assert(template, `Template not found for repository ${file.Repository}`);
|
|
|
|
|
|
|
|
|
|
// If there are multiple branches pointing to the same revision of this file, it doesn't
|
|
|
|
|
// matter which branch we use here, so use the first one.
|
|
|
|
|
const branch = file.Branches && file.Branches.length > 0 ? file.Branches[0] : "HEAD";
|
|
|
|
|
const url = getRepositoryUrl(template, branch, file.FileName);
|
|
|
|
|
|
2025-05-03 18:33:58 +00:00
|
|
|
return {
|
|
|
|
|
fileName: {
|
|
|
|
|
text: file.FileName,
|
|
|
|
|
matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].Ranges.map((range) => ({
|
|
|
|
|
start: {
|
|
|
|
|
byteOffset: range.Start.ByteOffset,
|
|
|
|
|
column: range.Start.Column,
|
|
|
|
|
lineNumber: range.Start.LineNumber,
|
|
|
|
|
},
|
|
|
|
|
end: {
|
|
|
|
|
byteOffset: range.End.ByteOffset,
|
|
|
|
|
column: range.End.Column,
|
|
|
|
|
lineNumber: range.End.LineNumber,
|
|
|
|
|
}
|
|
|
|
|
})) : [],
|
|
|
|
|
},
|
|
|
|
|
repository: file.Repository,
|
2025-05-07 23:21:05 +00:00
|
|
|
url: url,
|
2025-05-03 18:33:58 +00:00
|
|
|
language: file.Language,
|
|
|
|
|
chunks: file.ChunkMatches
|
|
|
|
|
.filter((chunk) => !chunk.FileName) // Filter out filename chunks.
|
|
|
|
|
.map((chunk) => {
|
|
|
|
|
return {
|
|
|
|
|
content: chunk.Content,
|
|
|
|
|
matchRanges: chunk.Ranges.map((range) => ({
|
|
|
|
|
start: {
|
|
|
|
|
byteOffset: range.Start.ByteOffset,
|
|
|
|
|
column: range.Start.Column,
|
|
|
|
|
lineNumber: range.Start.LineNumber,
|
|
|
|
|
},
|
|
|
|
|
end: {
|
|
|
|
|
byteOffset: range.End.ByteOffset,
|
|
|
|
|
column: range.End.Column,
|
|
|
|
|
lineNumber: range.End.LineNumber,
|
|
|
|
|
}
|
|
|
|
|
}) satisfies SearchResultRange),
|
|
|
|
|
contentStart: {
|
|
|
|
|
byteOffset: chunk.ContentStart.ByteOffset,
|
|
|
|
|
column: chunk.ContentStart.Column,
|
|
|
|
|
lineNumber: chunk.ContentStart.LineNumber,
|
|
|
|
|
},
|
|
|
|
|
symbols: chunk.SymbolInfo?.map((symbol) => {
|
|
|
|
|
return {
|
|
|
|
|
symbol: symbol.Sym,
|
|
|
|
|
kind: symbol.Kind,
|
|
|
|
|
parent: symbol.Parent.length > 0 ? {
|
|
|
|
|
symbol: symbol.Parent,
|
|
|
|
|
kind: symbol.ParentKind,
|
|
|
|
|
} : undefined,
|
|
|
|
|
}
|
|
|
|
|
}) ?? undefined,
|
|
|
|
|
}
|
|
|
|
|
}),
|
|
|
|
|
branches: file.Branches,
|
|
|
|
|
content: file.Content,
|
|
|
|
|
}
|
2025-05-07 23:21:05 +00:00
|
|
|
}) ?? [],
|
2025-05-03 18:33:58 +00:00
|
|
|
isBranchFilteringEnabled: isBranchFilteringEnabled,
|
|
|
|
|
} satisfies SearchResponse));
|
|
|
|
|
|
|
|
|
|
return parser.parse(searchBody);
|
|
|
|
|
}
|