2025-07-23 18:25:15 +00:00
|
|
|
'use server';
|
|
|
|
|
|
2025-05-03 18:33:58 +00:00
|
|
|
import { env } from "@/env.mjs";
|
|
|
|
|
import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
|
|
|
|
|
import { isServiceError } from "../../lib/utils";
|
|
|
|
|
import { zoektFetch } from "./zoektClient";
|
|
|
|
|
import { prisma } from "@/prisma";
|
|
|
|
|
import { ErrorCode } from "../../lib/errorCodes";
|
|
|
|
|
import { StatusCodes } from "http-status-codes";
|
|
|
|
|
import { zoektSearchResponseSchema } from "./zoektSchema";
|
2025-05-28 23:08:42 +00:00
|
|
|
import { SearchRequest, SearchResponse, SourceRange } from "./types";
|
|
|
|
|
import { OrgRole, Repo } from "@sourcebot/db";
|
2025-05-15 20:42:58 +00:00
|
|
|
import * as Sentry from "@sentry/nextjs";
|
|
|
|
|
import { sew, withAuth, withOrgMembership } from "@/actions";
|
2025-06-17 22:58:04 +00:00
|
|
|
import { base64Decode } from "@sourcebot/shared";
|
2025-05-03 18:33:58 +00:00
|
|
|
|
|
|
|
|
// List of supported query prefixes in zoekt.
|
|
|
|
|
// @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417
|
|
|
|
|
enum zoektPrefixes {
|
|
|
|
|
archived = "archived:",
|
|
|
|
|
branchShort = "b:",
|
|
|
|
|
branch = "branch:",
|
|
|
|
|
caseShort = "c:",
|
|
|
|
|
case = "case:",
|
|
|
|
|
content = "content:",
|
|
|
|
|
fileShort = "f:",
|
|
|
|
|
file = "file:",
|
|
|
|
|
fork = "fork:",
|
|
|
|
|
public = "public:",
|
|
|
|
|
repoShort = "r:",
|
|
|
|
|
repo = "repo:",
|
|
|
|
|
regex = "regex:",
|
|
|
|
|
lang = "lang:",
|
|
|
|
|
sym = "sym:",
|
|
|
|
|
typeShort = "t:",
|
|
|
|
|
type = "type:",
|
|
|
|
|
reposet = "reposet:",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const transformZoektQuery = async (query: string, orgId: number): Promise<string | ServiceError> => {
|
|
|
|
|
const prevQueryParts = query.split(" ");
|
|
|
|
|
const newQueryParts = [];
|
|
|
|
|
|
|
|
|
|
for (const part of prevQueryParts) {
|
|
|
|
|
|
|
|
|
|
// Handle mapping `rev:` and `revision:` to `branch:`
|
|
|
|
|
if (part.match(/^-?(rev|revision):.+$/)) {
|
|
|
|
|
const isNegated = part.startsWith("-");
|
|
|
|
|
let revisionName = part.slice(part.indexOf(":") + 1);
|
|
|
|
|
|
|
|
|
|
// Special case: `*` -> search all revisions.
|
|
|
|
|
// In zoekt, providing a blank string will match all branches.
|
|
|
|
|
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/eval.go#L560-L562
|
|
|
|
|
if (revisionName === "*") {
|
|
|
|
|
revisionName = "";
|
|
|
|
|
}
|
|
|
|
|
newQueryParts.push(`${isNegated ? "-" : ""}${zoektPrefixes.branch}${revisionName}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Expand `context:` into `reposet:` atom.
|
|
|
|
|
else if (part.match(/^-?context:.+$/)) {
|
|
|
|
|
const isNegated = part.startsWith("-");
|
|
|
|
|
const contextName = part.slice(part.indexOf(":") + 1);
|
|
|
|
|
|
|
|
|
|
const context = await prisma.searchContext.findUnique({
|
|
|
|
|
where: {
|
|
|
|
|
name_orgId: {
|
|
|
|
|
name: contextName,
|
|
|
|
|
orgId,
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
include: {
|
|
|
|
|
repos: true,
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// If the context doesn't exist, return an error.
|
|
|
|
|
if (!context) {
|
|
|
|
|
return {
|
|
|
|
|
errorCode: ErrorCode.SEARCH_CONTEXT_NOT_FOUND,
|
|
|
|
|
message: `Search context "${contextName}" not found`,
|
|
|
|
|
statusCode: StatusCodes.NOT_FOUND,
|
|
|
|
|
} satisfies ServiceError;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const names = context.repos.map((repo) => repo.name);
|
|
|
|
|
newQueryParts.push(`${isNegated ? "-" : ""}${zoektPrefixes.reposet}${names.join(",")}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// no-op: add the original part to the new query parts.
|
|
|
|
|
else {
|
|
|
|
|
newQueryParts.push(part);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return newQueryParts.join(" ");
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-07 23:21:05 +00:00
|
|
|
// Extracts a repository file URL from a zoekt template, branch, and file name.
|
2025-05-15 20:42:58 +00:00
|
|
|
const getFileWebUrl = (template: string, branch: string, fileName: string): string | undefined => {
|
2025-05-07 23:21:05 +00:00
|
|
|
// This is a hacky parser for templates generated by
|
|
|
|
|
// the go text/template package. Example template:
|
|
|
|
|
// {{URLJoinPath "https://github.com/sourcebot-dev/sourcebot" "blob" .Version .Path}}
|
2025-05-15 20:42:58 +00:00
|
|
|
|
|
|
|
|
if (!template.match(/^{{URLJoinPath\s.*}}(\?.+)?$/)) {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
2025-05-07 23:21:05 +00:00
|
|
|
|
|
|
|
|
const url =
|
|
|
|
|
template.substring("{{URLJoinPath ".length, template.indexOf("}}"))
|
2025-05-15 20:42:58 +00:00
|
|
|
.split(" ")
|
|
|
|
|
.map((part) => {
|
|
|
|
|
// remove wrapping quotes
|
|
|
|
|
if (part.startsWith("\"")) part = part.substring(1);
|
|
|
|
|
if (part.endsWith("\"")) part = part.substring(0, part.length - 1);
|
2025-06-24 18:57:55 +00:00
|
|
|
// Replace variable references
|
|
|
|
|
if (part == ".Version") part = branch;
|
|
|
|
|
if (part == ".Path") part = fileName;
|
2025-05-15 20:42:58 +00:00
|
|
|
return part;
|
|
|
|
|
})
|
|
|
|
|
.join("/");
|
2025-05-07 23:21:05 +00:00
|
|
|
|
|
|
|
|
const optionalQueryParams =
|
|
|
|
|
template.substring(template.indexOf("}}") + 2)
|
2025-05-15 20:42:58 +00:00
|
|
|
.replace("{{.Version}}", branch)
|
|
|
|
|
.replace("{{.Path}}", fileName);
|
2025-05-07 23:21:05 +00:00
|
|
|
|
|
|
|
|
return encodeURI(url + optionalQueryParams);
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-28 23:08:42 +00:00
|
|
|
export const search = async ({ query, matches, contextLines, whole }: SearchRequest, domain: string, apiKey: string | undefined = undefined) => sew(() =>
|
2025-06-20 21:57:05 +00:00
|
|
|
withAuth((userId, _apiKeyHash) =>
|
2025-05-28 23:08:42 +00:00
|
|
|
withOrgMembership(userId, domain, async ({ org }) => {
|
|
|
|
|
const transformedQuery = await transformZoektQuery(query, org.id);
|
2025-05-15 20:42:58 +00:00
|
|
|
if (isServiceError(transformedQuery)) {
|
|
|
|
|
return transformedQuery;
|
|
|
|
|
}
|
|
|
|
|
query = transformedQuery;
|
2025-05-03 18:33:58 +00:00
|
|
|
|
2025-05-15 20:42:58 +00:00
|
|
|
const isBranchFilteringEnabled = (
|
|
|
|
|
query.includes(zoektPrefixes.branch) ||
|
|
|
|
|
query.includes(zoektPrefixes.branchShort)
|
|
|
|
|
);
|
2025-05-03 18:33:58 +00:00
|
|
|
|
2025-05-15 20:42:58 +00:00
|
|
|
// We only want to show matches for the default branch when
|
|
|
|
|
// the user isn't explicitly filtering by branch.
|
|
|
|
|
if (!isBranchFilteringEnabled) {
|
|
|
|
|
query = query.concat(` branch:HEAD`);
|
|
|
|
|
}
|
2025-05-03 18:33:58 +00:00
|
|
|
|
2025-05-15 20:42:58 +00:00
|
|
|
const body = JSON.stringify({
|
|
|
|
|
q: query,
|
|
|
|
|
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892
|
|
|
|
|
opts: {
|
|
|
|
|
ChunkMatches: true,
|
|
|
|
|
MaxMatchDisplayCount: matches,
|
|
|
|
|
NumContextLines: contextLines,
|
|
|
|
|
Whole: !!whole,
|
|
|
|
|
TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT,
|
|
|
|
|
ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT,
|
|
|
|
|
MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let header: Record<string, string> = {};
|
|
|
|
|
header = {
|
2025-05-28 23:08:42 +00:00
|
|
|
"X-Tenant-ID": org.id.toString()
|
2025-05-15 20:42:58 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const searchResponse = await zoektFetch({
|
|
|
|
|
path: "/api/search",
|
|
|
|
|
body,
|
|
|
|
|
header,
|
|
|
|
|
method: "POST",
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (!searchResponse.ok) {
|
|
|
|
|
return invalidZoektResponse(searchResponse);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const searchBody = await searchResponse.json();
|
|
|
|
|
|
|
|
|
|
const parser = zoektSearchResponseSchema.transform(async ({ Result }) => {
|
|
|
|
|
// @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field
|
|
|
|
|
// which corresponds to the `id` in the Repo table. In order to efficiently fetch repository
|
|
|
|
|
// metadata when transforming (potentially thousands) of file matches, we aggregate a unique
|
|
|
|
|
// set of repository ids* and map them to their corresponding Repo record.
|
|
|
|
|
//
|
|
|
|
|
// *Q: Why is `RepositoryID` optional? And why are we falling back to `Repository`?
|
|
|
|
|
// A: Prior to this change, the repository id was not plumbed into zoekt, so RepositoryID was
|
|
|
|
|
// always undefined. To make this a non-breaking change, we fallback to using the repository's name
|
|
|
|
|
// (`Repository`) as the identifier in these cases. This is not guaranteed to be unique, but in
|
|
|
|
|
// practice it is since the repository name includes the host and path (e.g., 'github.com/org/repo',
|
|
|
|
|
// 'gitea.com/org/repo', etc.).
|
|
|
|
|
//
|
|
|
|
|
// Note: When a repository is re-indexed (every hour) this ID will be populated.
|
|
|
|
|
// @see: https://github.com/sourcebot-dev/zoekt/pull/6
|
|
|
|
|
const repoIdentifiers = new Set(Result.Files?.map((file) => file.RepositoryID ?? file.Repository) ?? []);
|
|
|
|
|
const repos = new Map<string | number, Repo>();
|
2025-05-03 18:33:58 +00:00
|
|
|
|
2025-05-15 20:42:58 +00:00
|
|
|
(await prisma.repo.findMany({
|
|
|
|
|
where: {
|
|
|
|
|
id: {
|
|
|
|
|
in: Array.from(repoIdentifiers).filter((id) => typeof id === "number"),
|
2025-05-03 18:33:58 +00:00
|
|
|
},
|
2025-05-28 23:08:42 +00:00
|
|
|
orgId: org.id,
|
2025-05-15 20:42:58 +00:00
|
|
|
}
|
|
|
|
|
})).forEach(repo => repos.set(repo.id, repo));
|
|
|
|
|
|
|
|
|
|
(await prisma.repo.findMany({
|
|
|
|
|
where: {
|
|
|
|
|
name: {
|
|
|
|
|
in: Array.from(repoIdentifiers).filter((id) => typeof id === "string"),
|
|
|
|
|
},
|
2025-05-28 23:08:42 +00:00
|
|
|
orgId: org.id,
|
2025-05-15 20:42:58 +00:00
|
|
|
}
|
|
|
|
|
})).forEach(repo => repos.set(repo.name, repo));
|
|
|
|
|
|
2025-05-28 23:08:42 +00:00
|
|
|
const files = Result.Files?.map((file) => {
|
|
|
|
|
const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName);
|
|
|
|
|
|
|
|
|
|
const webUrl = (() => {
|
|
|
|
|
const template: string | undefined = Result.RepoURLs[file.Repository];
|
|
|
|
|
if (!template) {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If there are multiple branches pointing to the same revision of this file, it doesn't
|
|
|
|
|
// matter which branch we use here, so use the first one.
|
|
|
|
|
const branch = file.Branches && file.Branches.length > 0 ? file.Branches[0] : "HEAD";
|
|
|
|
|
return getFileWebUrl(template, branch, file.FileName);
|
|
|
|
|
})();
|
|
|
|
|
|
|
|
|
|
const identifier = file.RepositoryID ?? file.Repository;
|
|
|
|
|
const repo = repos.get(identifier);
|
|
|
|
|
|
|
|
|
|
// This should never happen... but if it does, we skip the file.
|
|
|
|
|
if (!repo) {
|
|
|
|
|
Sentry.captureMessage(
|
|
|
|
|
`Repository not found for identifier: ${identifier}; skipping file "${file.FileName}"`,
|
|
|
|
|
'warning'
|
|
|
|
|
);
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
fileName: {
|
|
|
|
|
text: file.FileName,
|
|
|
|
|
matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].Ranges.map((range) => ({
|
|
|
|
|
start: {
|
|
|
|
|
byteOffset: range.Start.ByteOffset,
|
|
|
|
|
column: range.Start.Column,
|
|
|
|
|
lineNumber: range.Start.LineNumber,
|
|
|
|
|
},
|
|
|
|
|
end: {
|
|
|
|
|
byteOffset: range.End.ByteOffset,
|
|
|
|
|
column: range.End.Column,
|
|
|
|
|
lineNumber: range.End.LineNumber,
|
|
|
|
|
}
|
|
|
|
|
})) : [],
|
|
|
|
|
},
|
|
|
|
|
repository: repo.name,
|
|
|
|
|
repositoryId: repo.id,
|
|
|
|
|
webUrl: webUrl,
|
|
|
|
|
language: file.Language,
|
|
|
|
|
chunks: file.ChunkMatches
|
|
|
|
|
.filter((chunk) => !chunk.FileName) // Filter out filename chunks.
|
|
|
|
|
.map((chunk) => {
|
|
|
|
|
return {
|
2025-06-17 22:58:04 +00:00
|
|
|
content: base64Decode(chunk.Content),
|
2025-05-28 23:08:42 +00:00
|
|
|
matchRanges: chunk.Ranges.map((range) => ({
|
|
|
|
|
start: {
|
|
|
|
|
byteOffset: range.Start.ByteOffset,
|
|
|
|
|
column: range.Start.Column,
|
|
|
|
|
lineNumber: range.Start.LineNumber,
|
|
|
|
|
},
|
|
|
|
|
end: {
|
|
|
|
|
byteOffset: range.End.ByteOffset,
|
|
|
|
|
column: range.End.Column,
|
|
|
|
|
lineNumber: range.End.LineNumber,
|
|
|
|
|
}
|
|
|
|
|
}) satisfies SourceRange),
|
|
|
|
|
contentStart: {
|
|
|
|
|
byteOffset: chunk.ContentStart.ByteOffset,
|
|
|
|
|
column: chunk.ContentStart.Column,
|
|
|
|
|
lineNumber: chunk.ContentStart.LineNumber,
|
|
|
|
|
},
|
|
|
|
|
symbols: chunk.SymbolInfo?.map((symbol) => {
|
|
|
|
|
return {
|
|
|
|
|
symbol: symbol.Sym,
|
|
|
|
|
kind: symbol.Kind,
|
|
|
|
|
parent: symbol.Parent.length > 0 ? {
|
|
|
|
|
symbol: symbol.Parent,
|
|
|
|
|
kind: symbol.ParentKind,
|
|
|
|
|
} : undefined,
|
|
|
|
|
}
|
|
|
|
|
}) ?? undefined,
|
|
|
|
|
}
|
|
|
|
|
}),
|
|
|
|
|
branches: file.Branches,
|
2025-06-17 22:58:04 +00:00
|
|
|
content: file.Content ? base64Decode(file.Content) : undefined,
|
2025-05-28 23:08:42 +00:00
|
|
|
}
|
|
|
|
|
}).filter((file) => file !== undefined) ?? [];
|
|
|
|
|
|
2025-05-15 20:42:58 +00:00
|
|
|
return {
|
|
|
|
|
zoektStats: {
|
|
|
|
|
duration: Result.Duration,
|
|
|
|
|
fileCount: Result.FileCount,
|
|
|
|
|
matchCount: Result.MatchCount,
|
|
|
|
|
filesSkipped: Result.FilesSkipped,
|
|
|
|
|
contentBytesLoaded: Result.ContentBytesLoaded,
|
|
|
|
|
indexBytesLoaded: Result.IndexBytesLoaded,
|
|
|
|
|
crashes: Result.Crashes,
|
|
|
|
|
shardFilesConsidered: Result.ShardFilesConsidered,
|
|
|
|
|
filesConsidered: Result.FilesConsidered,
|
|
|
|
|
filesLoaded: Result.FilesLoaded,
|
|
|
|
|
shardsScanned: Result.ShardsScanned,
|
|
|
|
|
shardsSkipped: Result.ShardsSkipped,
|
|
|
|
|
shardsSkippedFilter: Result.ShardsSkippedFilter,
|
|
|
|
|
ngramMatches: Result.NgramMatches,
|
|
|
|
|
ngramLookups: Result.NgramLookups,
|
|
|
|
|
wait: Result.Wait,
|
|
|
|
|
matchTreeConstruction: Result.MatchTreeConstruction,
|
|
|
|
|
matchTreeSearch: Result.MatchTreeSearch,
|
|
|
|
|
regexpsConsidered: Result.RegexpsConsidered,
|
|
|
|
|
flushReason: Result.FlushReason,
|
|
|
|
|
},
|
2025-05-28 23:08:42 +00:00
|
|
|
files,
|
2025-05-15 20:42:58 +00:00
|
|
|
repositoryInfo: Array.from(repos.values()).map((repo) => ({
|
|
|
|
|
id: repo.id,
|
|
|
|
|
codeHostType: repo.external_codeHostType,
|
|
|
|
|
name: repo.name,
|
|
|
|
|
displayName: repo.displayName ?? undefined,
|
|
|
|
|
webUrl: repo.webUrl ?? undefined,
|
|
|
|
|
})),
|
|
|
|
|
isBranchFilteringEnabled: isBranchFilteringEnabled,
|
2025-05-28 23:08:42 +00:00
|
|
|
stats: {
|
|
|
|
|
matchCount: files.reduce(
|
|
|
|
|
(acc, file) =>
|
|
|
|
|
acc + file.chunks.reduce(
|
|
|
|
|
(acc, chunk) => acc + chunk.matchRanges.length,
|
|
|
|
|
0,
|
|
|
|
|
),
|
|
|
|
|
0,
|
|
|
|
|
)
|
|
|
|
|
}
|
2025-05-15 20:42:58 +00:00
|
|
|
} satisfies SearchResponse;
|
|
|
|
|
});
|
2025-05-03 18:33:58 +00:00
|
|
|
|
2025-05-15 20:42:58 +00:00
|
|
|
return parser.parseAsync(searchBody);
|
2025-07-19 21:04:41 +00:00
|
|
|
}, /* minRequiredRole = */ OrgRole.GUEST), /* allowAnonymousAccess = */ true, apiKey ? { apiKey, domain } : undefined)
|
2025-06-18 17:50:36 +00:00
|
|
|
);
|