fix(web): Search performance improvements (#615)

This commit is contained in:
Brendan Kellam 2025-11-12 23:20:26 -08:00 committed by GitHub
parent 06c84f0bf5
commit a814bd6f7e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 79 additions and 32 deletions

View file

@ -7,11 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Changed
- Bumped the default requested search result count from 5k to 100k after optimization pass. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
### Fixed
- Fixed incorrect shutdown of PostHog SDK in the worker. [#609](https://github.com/sourcebot-dev/sourcebot/pull/609)
- Fixed race condition in job schedulers. [#607](https://github.com/sourcebot-dev/sourcebot/pull/607)
- Fixed connection sync jobs getting stuck in pending or in progress after restarting the worker. [#612](https://github.com/sourcebot-dev/sourcebot/pull/612)
- Fixed issue where connections would always sync on startup, regardless if they changed or not. [#613](https://github.com/sourcebot-dev/sourcebot/pull/613)
- Fixed performance bottleneck in search api. Result is a order of magnitutde improvement to average search time according to benchmarks. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
### Added
- Added force resync buttons for connections and repositories. [#610](https://github.com/sourcebot-dev/sourcebot/pull/610)

View file

@ -35,7 +35,7 @@ import { FilterPanel } from "./filterPanel";
import { useFilteredMatches } from "./filterPanel/useFilterMatches";
import { SearchResultsPanel } from "./searchResultsPanel";
const DEFAULT_MAX_MATCH_COUNT = 5000;
const DEFAULT_MAX_MATCH_COUNT = 100_000;
interface SearchResultsPageProps {
searchQuery: string;

View file

@ -1,6 +1,5 @@
'use client';
import { getVersionResponseSchema, getReposResponseSchema } from "@/lib/schemas";
import { ServiceError } from "@/lib/serviceError";
import { GetVersionResponse, GetReposResponse } from "@/lib/types";
import { isServiceError } from "@/lib/utils";
@ -10,10 +9,6 @@ import {
SearchRequest,
SearchResponse,
} from "@/features/search/types";
import {
fileSourceResponseSchema,
searchResponseSchema,
} from "@/features/search/schemas";
export const search = async (body: SearchRequest, domain: string): Promise<SearchResponse | ServiceError> => {
const result = await fetch("/api/search", {
@ -29,10 +24,10 @@ export const search = async (body: SearchRequest, domain: string): Promise<Searc
return result;
}
return searchResponseSchema.parse(result);
return result as SearchResponse | ServiceError;
}
export const fetchFileSource = async (body: FileSourceRequest, domain: string): Promise<FileSourceResponse> => {
export const fetchFileSource = async (body: FileSourceRequest, domain: string): Promise<FileSourceResponse | ServiceError> => {
const result = await fetch("/api/source", {
method: "POST",
headers: {
@ -42,7 +37,7 @@ export const fetchFileSource = async (body: FileSourceRequest, domain: string):
body: JSON.stringify(body),
}).then(response => response.json());
return fileSourceResponseSchema.parse(result);
return result as FileSourceResponse | ServiceError;
}
export const getRepos = async (): Promise<GetReposResponse> => {
@ -53,7 +48,7 @@ export const getRepos = async (): Promise<GetReposResponse> => {
},
}).then(response => response.json());
return getReposResponseSchema.parse(result);
return result as GetReposResponse | ServiceError;
}
export const getVersion = async (): Promise<GetVersionResponse> => {
@ -63,5 +58,5 @@ export const getVersion = async (): Promise<GetVersionResponse> => {
"Content-Type": "application/json",
},
}).then(response => response.json());
return getVersionResponseSchema.parse(result);
return result as GetVersionResponse;
}

View file

@ -141,6 +141,7 @@ export const searchResponseSchema = z.object({
repositoryInfo: z.array(repositoryInfoSchema),
isBranchFilteringEnabled: z.boolean(),
isSearchExhaustive: z.boolean(),
__debug_timings: z.record(z.string(), z.number()).optional(),
});
export const fileSourceRequestSchema = z.object({

View file

@ -1,16 +1,18 @@
'use server';
import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
import { isServiceError } from "../../lib/utils";
import { zoektFetch } from "./zoektClient";
import { ErrorCode } from "../../lib/errorCodes";
import { StatusCodes } from "http-status-codes";
import { zoektSearchResponseSchema } from "./zoektSchema";
import { SearchRequest, SearchResponse, SourceRange } from "./types";
import { PrismaClient, Repo } from "@sourcebot/db";
import { sew } from "@/actions";
import { base64Decode } from "@sourcebot/shared";
import { withOptionalAuthV2 } from "@/withAuthV2";
import { PrismaClient, Repo } from "@sourcebot/db";
import { base64Decode, createLogger } from "@sourcebot/shared";
import { StatusCodes } from "http-status-codes";
import { ErrorCode } from "../../lib/errorCodes";
import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
import { isServiceError, measure } from "../../lib/utils";
import { SearchRequest, SearchResponse, SourceRange } from "./types";
import { zoektFetch } from "./zoektClient";
import { ZoektSearchResponse } from "./zoektSchema";
const logger = createLogger("searchApi");
// List of supported query prefixes in zoekt.
// @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417
@ -126,7 +128,7 @@ const getFileWebUrl = (template: string, branch: string, fileName: string): stri
return encodeURI(url + optionalQueryParams);
}
export const search = async ({ query, matches, contextLines, whole }: SearchRequest) => sew(() =>
export const search = async ({ query, matches, contextLines, whole }: SearchRequest): Promise<SearchResponse | ServiceError> => sew(() =>
withOptionalAuthV2(async ({ org, prisma }) => {
const transformedQuery = await transformZoektQuery(query, org.id, prisma);
if (isServiceError(transformedQuery)) {
@ -200,20 +202,22 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
"X-Tenant-ID": org.id.toString()
};
const searchResponse = await zoektFetch({
const { data: searchResponse, durationMs: fetchDurationMs } = await measure(
() => zoektFetch({
path: "/api/search",
body,
header,
method: "POST",
});
}),
"zoekt_fetch",
false
);
if (!searchResponse.ok) {
return invalidZoektResponse(searchResponse);
}
const searchBody = await searchResponse.json();
const parser = zoektSearchResponseSchema.transform(async ({ Result }) => {
const transformZoektSearchResponse = async ({ Result }: ZoektSearchResponse) => {
// @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field
// which corresponds to the `id` in the Repo table. In order to efficiently fetch repository
// metadata when transforming (potentially thousands) of file matches, we aggregate a unique
@ -379,7 +383,48 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
flushReason: Result.FlushReason,
}
} satisfies SearchResponse;
});
}
return parser.parseAsync(searchBody);
const { data: rawZoektResponse, durationMs: parseJsonDurationMs } = await measure(
() => searchResponse.json(),
"parse_json",
false
);
// @note: We do not use zod parseAsync here since in cases where the
// response is large (> 40MB), there can be significant performance issues.
const zoektResponse = rawZoektResponse as ZoektSearchResponse;
const { data: response, durationMs: transformZoektResponseDurationMs } = await measure(
() => transformZoektSearchResponse(zoektResponse),
"transform_zoekt_response",
false
);
const totalDurationMs = fetchDurationMs + parseJsonDurationMs + transformZoektResponseDurationMs;
// Debug log: timing breakdown
const timings = [
{ name: "zoekt_fetch", duration: fetchDurationMs },
{ name: "parse_json", duration: parseJsonDurationMs },
{ name: "transform_zoekt_response", duration: transformZoektResponseDurationMs },
];
logger.debug(`Search timing breakdown (query: "${query}"):`);
timings.forEach(({ name, duration }) => {
const percentage = ((duration / totalDurationMs) * 100).toFixed(1);
const durationStr = duration.toFixed(2).padStart(8);
const percentageStr = percentage.padStart(5);
logger.debug(` ${name.padEnd(25)} ${durationStr}ms (${percentageStr}%)`);
});
logger.debug(` ${"TOTAL".padEnd(25)} ${totalDurationMs.toFixed(2).padStart(8)}ms (100.0%)`);
return {
...response,
__debug_timings: {
zoekt_fetch: fetchDurationMs,
parse_json: parseJsonDurationMs,
transform_zoekt_response: transformZoektResponseDurationMs,
}
} satisfies SearchResponse;
}));

View file

@ -75,6 +75,8 @@ export const zoektSearchResponseSchema = z.object({
}),
});
export type ZoektSearchResponse = z.infer<typeof zoektSearchResponseSchema>;
// @see : https://github.com/sourcebot-dev/zoekt/blob/3780e68cdb537d5a7ed2c84d9b3784f80c7c5d04/api.go#L728
const zoektRepoStatsSchema = z.object({
Repos: z.number(),