mirror of
https://github.com/sourcebot-dev/sourcebot.git
synced 2025-12-11 20:05:25 +00:00
fix(web): Search performance improvements (#615)
This commit is contained in:
parent
06c84f0bf5
commit
a814bd6f7e
6 changed files with 79 additions and 32 deletions
|
|
@ -7,11 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
### Changed
|
||||
- Bumped the default requested search result count from 5k to 100k after optimization pass. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
|
||||
|
||||
### Fixed
|
||||
- Fixed incorrect shutdown of PostHog SDK in the worker. [#609](https://github.com/sourcebot-dev/sourcebot/pull/609)
|
||||
- Fixed race condition in job schedulers. [#607](https://github.com/sourcebot-dev/sourcebot/pull/607)
|
||||
- Fixed connection sync jobs getting stuck in pending or in progress after restarting the worker. [#612](https://github.com/sourcebot-dev/sourcebot/pull/612)
|
||||
- Fixed issue where connections would always sync on startup, regardless if they changed or not. [#613](https://github.com/sourcebot-dev/sourcebot/pull/613)
|
||||
- Fixed performance bottleneck in search api. Result is a order of magnitutde improvement to average search time according to benchmarks. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
|
||||
|
||||
### Added
|
||||
- Added force resync buttons for connections and repositories. [#610](https://github.com/sourcebot-dev/sourcebot/pull/610)
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import { FilterPanel } from "./filterPanel";
|
|||
import { useFilteredMatches } from "./filterPanel/useFilterMatches";
|
||||
import { SearchResultsPanel } from "./searchResultsPanel";
|
||||
|
||||
const DEFAULT_MAX_MATCH_COUNT = 5000;
|
||||
const DEFAULT_MAX_MATCH_COUNT = 100_000;
|
||||
|
||||
interface SearchResultsPageProps {
|
||||
searchQuery: string;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
'use client';
|
||||
|
||||
import { getVersionResponseSchema, getReposResponseSchema } from "@/lib/schemas";
|
||||
import { ServiceError } from "@/lib/serviceError";
|
||||
import { GetVersionResponse, GetReposResponse } from "@/lib/types";
|
||||
import { isServiceError } from "@/lib/utils";
|
||||
|
|
@ -10,10 +9,6 @@ import {
|
|||
SearchRequest,
|
||||
SearchResponse,
|
||||
} from "@/features/search/types";
|
||||
import {
|
||||
fileSourceResponseSchema,
|
||||
searchResponseSchema,
|
||||
} from "@/features/search/schemas";
|
||||
|
||||
export const search = async (body: SearchRequest, domain: string): Promise<SearchResponse | ServiceError> => {
|
||||
const result = await fetch("/api/search", {
|
||||
|
|
@ -29,10 +24,10 @@ export const search = async (body: SearchRequest, domain: string): Promise<Searc
|
|||
return result;
|
||||
}
|
||||
|
||||
return searchResponseSchema.parse(result);
|
||||
return result as SearchResponse | ServiceError;
|
||||
}
|
||||
|
||||
export const fetchFileSource = async (body: FileSourceRequest, domain: string): Promise<FileSourceResponse> => {
|
||||
export const fetchFileSource = async (body: FileSourceRequest, domain: string): Promise<FileSourceResponse | ServiceError> => {
|
||||
const result = await fetch("/api/source", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
|
|
@ -42,7 +37,7 @@ export const fetchFileSource = async (body: FileSourceRequest, domain: string):
|
|||
body: JSON.stringify(body),
|
||||
}).then(response => response.json());
|
||||
|
||||
return fileSourceResponseSchema.parse(result);
|
||||
return result as FileSourceResponse | ServiceError;
|
||||
}
|
||||
|
||||
export const getRepos = async (): Promise<GetReposResponse> => {
|
||||
|
|
@ -53,7 +48,7 @@ export const getRepos = async (): Promise<GetReposResponse> => {
|
|||
},
|
||||
}).then(response => response.json());
|
||||
|
||||
return getReposResponseSchema.parse(result);
|
||||
return result as GetReposResponse | ServiceError;
|
||||
}
|
||||
|
||||
export const getVersion = async (): Promise<GetVersionResponse> => {
|
||||
|
|
@ -63,5 +58,5 @@ export const getVersion = async (): Promise<GetVersionResponse> => {
|
|||
"Content-Type": "application/json",
|
||||
},
|
||||
}).then(response => response.json());
|
||||
return getVersionResponseSchema.parse(result);
|
||||
return result as GetVersionResponse;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -141,6 +141,7 @@ export const searchResponseSchema = z.object({
|
|||
repositoryInfo: z.array(repositoryInfoSchema),
|
||||
isBranchFilteringEnabled: z.boolean(),
|
||||
isSearchExhaustive: z.boolean(),
|
||||
__debug_timings: z.record(z.string(), z.number()).optional(),
|
||||
});
|
||||
|
||||
export const fileSourceRequestSchema = z.object({
|
||||
|
|
|
|||
|
|
@ -1,16 +1,18 @@
|
|||
'use server';
|
||||
|
||||
import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
|
||||
import { isServiceError } from "../../lib/utils";
|
||||
import { zoektFetch } from "./zoektClient";
|
||||
import { ErrorCode } from "../../lib/errorCodes";
|
||||
import { StatusCodes } from "http-status-codes";
|
||||
import { zoektSearchResponseSchema } from "./zoektSchema";
|
||||
import { SearchRequest, SearchResponse, SourceRange } from "./types";
|
||||
import { PrismaClient, Repo } from "@sourcebot/db";
|
||||
import { sew } from "@/actions";
|
||||
import { base64Decode } from "@sourcebot/shared";
|
||||
import { withOptionalAuthV2 } from "@/withAuthV2";
|
||||
import { PrismaClient, Repo } from "@sourcebot/db";
|
||||
import { base64Decode, createLogger } from "@sourcebot/shared";
|
||||
import { StatusCodes } from "http-status-codes";
|
||||
import { ErrorCode } from "../../lib/errorCodes";
|
||||
import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
|
||||
import { isServiceError, measure } from "../../lib/utils";
|
||||
import { SearchRequest, SearchResponse, SourceRange } from "./types";
|
||||
import { zoektFetch } from "./zoektClient";
|
||||
import { ZoektSearchResponse } from "./zoektSchema";
|
||||
|
||||
const logger = createLogger("searchApi");
|
||||
|
||||
// List of supported query prefixes in zoekt.
|
||||
// @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417
|
||||
|
|
@ -126,7 +128,7 @@ const getFileWebUrl = (template: string, branch: string, fileName: string): stri
|
|||
return encodeURI(url + optionalQueryParams);
|
||||
}
|
||||
|
||||
export const search = async ({ query, matches, contextLines, whole }: SearchRequest) => sew(() =>
|
||||
export const search = async ({ query, matches, contextLines, whole }: SearchRequest): Promise<SearchResponse | ServiceError> => sew(() =>
|
||||
withOptionalAuthV2(async ({ org, prisma }) => {
|
||||
const transformedQuery = await transformZoektQuery(query, org.id, prisma);
|
||||
if (isServiceError(transformedQuery)) {
|
||||
|
|
@ -200,20 +202,22 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
|
|||
"X-Tenant-ID": org.id.toString()
|
||||
};
|
||||
|
||||
const searchResponse = await zoektFetch({
|
||||
path: "/api/search",
|
||||
body,
|
||||
header,
|
||||
method: "POST",
|
||||
});
|
||||
const { data: searchResponse, durationMs: fetchDurationMs } = await measure(
|
||||
() => zoektFetch({
|
||||
path: "/api/search",
|
||||
body,
|
||||
header,
|
||||
method: "POST",
|
||||
}),
|
||||
"zoekt_fetch",
|
||||
false
|
||||
);
|
||||
|
||||
if (!searchResponse.ok) {
|
||||
return invalidZoektResponse(searchResponse);
|
||||
}
|
||||
|
||||
const searchBody = await searchResponse.json();
|
||||
|
||||
const parser = zoektSearchResponseSchema.transform(async ({ Result }) => {
|
||||
const transformZoektSearchResponse = async ({ Result }: ZoektSearchResponse) => {
|
||||
// @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field
|
||||
// which corresponds to the `id` in the Repo table. In order to efficiently fetch repository
|
||||
// metadata when transforming (potentially thousands) of file matches, we aggregate a unique
|
||||
|
|
@ -379,7 +383,48 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
|
|||
flushReason: Result.FlushReason,
|
||||
}
|
||||
} satisfies SearchResponse;
|
||||
});
|
||||
}
|
||||
|
||||
return parser.parseAsync(searchBody);
|
||||
const { data: rawZoektResponse, durationMs: parseJsonDurationMs } = await measure(
|
||||
() => searchResponse.json(),
|
||||
"parse_json",
|
||||
false
|
||||
);
|
||||
|
||||
// @note: We do not use zod parseAsync here since in cases where the
|
||||
// response is large (> 40MB), there can be significant performance issues.
|
||||
const zoektResponse = rawZoektResponse as ZoektSearchResponse;
|
||||
|
||||
const { data: response, durationMs: transformZoektResponseDurationMs } = await measure(
|
||||
() => transformZoektSearchResponse(zoektResponse),
|
||||
"transform_zoekt_response",
|
||||
false
|
||||
);
|
||||
|
||||
const totalDurationMs = fetchDurationMs + parseJsonDurationMs + transformZoektResponseDurationMs;
|
||||
|
||||
// Debug log: timing breakdown
|
||||
const timings = [
|
||||
{ name: "zoekt_fetch", duration: fetchDurationMs },
|
||||
{ name: "parse_json", duration: parseJsonDurationMs },
|
||||
{ name: "transform_zoekt_response", duration: transformZoektResponseDurationMs },
|
||||
];
|
||||
|
||||
logger.debug(`Search timing breakdown (query: "${query}"):`);
|
||||
timings.forEach(({ name, duration }) => {
|
||||
const percentage = ((duration / totalDurationMs) * 100).toFixed(1);
|
||||
const durationStr = duration.toFixed(2).padStart(8);
|
||||
const percentageStr = percentage.padStart(5);
|
||||
logger.debug(` ${name.padEnd(25)} ${durationStr}ms (${percentageStr}%)`);
|
||||
});
|
||||
logger.debug(` ${"TOTAL".padEnd(25)} ${totalDurationMs.toFixed(2).padStart(8)}ms (100.0%)`);
|
||||
|
||||
return {
|
||||
...response,
|
||||
__debug_timings: {
|
||||
zoekt_fetch: fetchDurationMs,
|
||||
parse_json: parseJsonDurationMs,
|
||||
transform_zoekt_response: transformZoektResponseDurationMs,
|
||||
}
|
||||
} satisfies SearchResponse;
|
||||
}));
|
||||
|
|
|
|||
|
|
@ -75,6 +75,8 @@ export const zoektSearchResponseSchema = z.object({
|
|||
}),
|
||||
});
|
||||
|
||||
export type ZoektSearchResponse = z.infer<typeof zoektSearchResponseSchema>;
|
||||
|
||||
// @see : https://github.com/sourcebot-dev/zoekt/blob/3780e68cdb537d5a7ed2c84d9b3784f80c7c5d04/api.go#L728
|
||||
const zoektRepoStatsSchema = z.object({
|
||||
Repos: z.number(),
|
||||
|
|
|
|||
Loading…
Reference in a new issue