mirror of
https://github.com/sourcebot-dev/sourcebot.git
synced 2025-12-11 20:05:25 +00:00
fix(web): Search performance improvements (#615)
This commit is contained in:
parent
06c84f0bf5
commit
a814bd6f7e
6 changed files with 79 additions and 32 deletions
|
|
@ -7,11 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Bumped the default requested search result count from 5k to 100k after optimization pass. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
- Fixed incorrect shutdown of PostHog SDK in the worker. [#609](https://github.com/sourcebot-dev/sourcebot/pull/609)
|
- Fixed incorrect shutdown of PostHog SDK in the worker. [#609](https://github.com/sourcebot-dev/sourcebot/pull/609)
|
||||||
- Fixed race condition in job schedulers. [#607](https://github.com/sourcebot-dev/sourcebot/pull/607)
|
- Fixed race condition in job schedulers. [#607](https://github.com/sourcebot-dev/sourcebot/pull/607)
|
||||||
- Fixed connection sync jobs getting stuck in pending or in progress after restarting the worker. [#612](https://github.com/sourcebot-dev/sourcebot/pull/612)
|
- Fixed connection sync jobs getting stuck in pending or in progress after restarting the worker. [#612](https://github.com/sourcebot-dev/sourcebot/pull/612)
|
||||||
- Fixed issue where connections would always sync on startup, regardless if they changed or not. [#613](https://github.com/sourcebot-dev/sourcebot/pull/613)
|
- Fixed issue where connections would always sync on startup, regardless if they changed or not. [#613](https://github.com/sourcebot-dev/sourcebot/pull/613)
|
||||||
|
- Fixed performance bottleneck in search api. Result is a order of magnitutde improvement to average search time according to benchmarks. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
- Added force resync buttons for connections and repositories. [#610](https://github.com/sourcebot-dev/sourcebot/pull/610)
|
- Added force resync buttons for connections and repositories. [#610](https://github.com/sourcebot-dev/sourcebot/pull/610)
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ import { FilterPanel } from "./filterPanel";
|
||||||
import { useFilteredMatches } from "./filterPanel/useFilterMatches";
|
import { useFilteredMatches } from "./filterPanel/useFilterMatches";
|
||||||
import { SearchResultsPanel } from "./searchResultsPanel";
|
import { SearchResultsPanel } from "./searchResultsPanel";
|
||||||
|
|
||||||
const DEFAULT_MAX_MATCH_COUNT = 5000;
|
const DEFAULT_MAX_MATCH_COUNT = 100_000;
|
||||||
|
|
||||||
interface SearchResultsPageProps {
|
interface SearchResultsPageProps {
|
||||||
searchQuery: string;
|
searchQuery: string;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import { getVersionResponseSchema, getReposResponseSchema } from "@/lib/schemas";
|
|
||||||
import { ServiceError } from "@/lib/serviceError";
|
import { ServiceError } from "@/lib/serviceError";
|
||||||
import { GetVersionResponse, GetReposResponse } from "@/lib/types";
|
import { GetVersionResponse, GetReposResponse } from "@/lib/types";
|
||||||
import { isServiceError } from "@/lib/utils";
|
import { isServiceError } from "@/lib/utils";
|
||||||
|
|
@ -10,10 +9,6 @@ import {
|
||||||
SearchRequest,
|
SearchRequest,
|
||||||
SearchResponse,
|
SearchResponse,
|
||||||
} from "@/features/search/types";
|
} from "@/features/search/types";
|
||||||
import {
|
|
||||||
fileSourceResponseSchema,
|
|
||||||
searchResponseSchema,
|
|
||||||
} from "@/features/search/schemas";
|
|
||||||
|
|
||||||
export const search = async (body: SearchRequest, domain: string): Promise<SearchResponse | ServiceError> => {
|
export const search = async (body: SearchRequest, domain: string): Promise<SearchResponse | ServiceError> => {
|
||||||
const result = await fetch("/api/search", {
|
const result = await fetch("/api/search", {
|
||||||
|
|
@ -29,10 +24,10 @@ export const search = async (body: SearchRequest, domain: string): Promise<Searc
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
return searchResponseSchema.parse(result);
|
return result as SearchResponse | ServiceError;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const fetchFileSource = async (body: FileSourceRequest, domain: string): Promise<FileSourceResponse> => {
|
export const fetchFileSource = async (body: FileSourceRequest, domain: string): Promise<FileSourceResponse | ServiceError> => {
|
||||||
const result = await fetch("/api/source", {
|
const result = await fetch("/api/source", {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
|
|
@ -42,7 +37,7 @@ export const fetchFileSource = async (body: FileSourceRequest, domain: string):
|
||||||
body: JSON.stringify(body),
|
body: JSON.stringify(body),
|
||||||
}).then(response => response.json());
|
}).then(response => response.json());
|
||||||
|
|
||||||
return fileSourceResponseSchema.parse(result);
|
return result as FileSourceResponse | ServiceError;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getRepos = async (): Promise<GetReposResponse> => {
|
export const getRepos = async (): Promise<GetReposResponse> => {
|
||||||
|
|
@ -53,7 +48,7 @@ export const getRepos = async (): Promise<GetReposResponse> => {
|
||||||
},
|
},
|
||||||
}).then(response => response.json());
|
}).then(response => response.json());
|
||||||
|
|
||||||
return getReposResponseSchema.parse(result);
|
return result as GetReposResponse | ServiceError;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getVersion = async (): Promise<GetVersionResponse> => {
|
export const getVersion = async (): Promise<GetVersionResponse> => {
|
||||||
|
|
@ -63,5 +58,5 @@ export const getVersion = async (): Promise<GetVersionResponse> => {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
},
|
},
|
||||||
}).then(response => response.json());
|
}).then(response => response.json());
|
||||||
return getVersionResponseSchema.parse(result);
|
return result as GetVersionResponse;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -141,6 +141,7 @@ export const searchResponseSchema = z.object({
|
||||||
repositoryInfo: z.array(repositoryInfoSchema),
|
repositoryInfo: z.array(repositoryInfoSchema),
|
||||||
isBranchFilteringEnabled: z.boolean(),
|
isBranchFilteringEnabled: z.boolean(),
|
||||||
isSearchExhaustive: z.boolean(),
|
isSearchExhaustive: z.boolean(),
|
||||||
|
__debug_timings: z.record(z.string(), z.number()).optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
export const fileSourceRequestSchema = z.object({
|
export const fileSourceRequestSchema = z.object({
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,18 @@
|
||||||
'use server';
|
'use server';
|
||||||
|
|
||||||
import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
|
|
||||||
import { isServiceError } from "../../lib/utils";
|
|
||||||
import { zoektFetch } from "./zoektClient";
|
|
||||||
import { ErrorCode } from "../../lib/errorCodes";
|
|
||||||
import { StatusCodes } from "http-status-codes";
|
|
||||||
import { zoektSearchResponseSchema } from "./zoektSchema";
|
|
||||||
import { SearchRequest, SearchResponse, SourceRange } from "./types";
|
|
||||||
import { PrismaClient, Repo } from "@sourcebot/db";
|
|
||||||
import { sew } from "@/actions";
|
import { sew } from "@/actions";
|
||||||
import { base64Decode } from "@sourcebot/shared";
|
|
||||||
import { withOptionalAuthV2 } from "@/withAuthV2";
|
import { withOptionalAuthV2 } from "@/withAuthV2";
|
||||||
|
import { PrismaClient, Repo } from "@sourcebot/db";
|
||||||
|
import { base64Decode, createLogger } from "@sourcebot/shared";
|
||||||
|
import { StatusCodes } from "http-status-codes";
|
||||||
|
import { ErrorCode } from "../../lib/errorCodes";
|
||||||
|
import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
|
||||||
|
import { isServiceError, measure } from "../../lib/utils";
|
||||||
|
import { SearchRequest, SearchResponse, SourceRange } from "./types";
|
||||||
|
import { zoektFetch } from "./zoektClient";
|
||||||
|
import { ZoektSearchResponse } from "./zoektSchema";
|
||||||
|
|
||||||
|
const logger = createLogger("searchApi");
|
||||||
|
|
||||||
// List of supported query prefixes in zoekt.
|
// List of supported query prefixes in zoekt.
|
||||||
// @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417
|
// @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417
|
||||||
|
|
@ -126,7 +128,7 @@ const getFileWebUrl = (template: string, branch: string, fileName: string): stri
|
||||||
return encodeURI(url + optionalQueryParams);
|
return encodeURI(url + optionalQueryParams);
|
||||||
}
|
}
|
||||||
|
|
||||||
export const search = async ({ query, matches, contextLines, whole }: SearchRequest) => sew(() =>
|
export const search = async ({ query, matches, contextLines, whole }: SearchRequest): Promise<SearchResponse | ServiceError> => sew(() =>
|
||||||
withOptionalAuthV2(async ({ org, prisma }) => {
|
withOptionalAuthV2(async ({ org, prisma }) => {
|
||||||
const transformedQuery = await transformZoektQuery(query, org.id, prisma);
|
const transformedQuery = await transformZoektQuery(query, org.id, prisma);
|
||||||
if (isServiceError(transformedQuery)) {
|
if (isServiceError(transformedQuery)) {
|
||||||
|
|
@ -200,20 +202,22 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
|
||||||
"X-Tenant-ID": org.id.toString()
|
"X-Tenant-ID": org.id.toString()
|
||||||
};
|
};
|
||||||
|
|
||||||
const searchResponse = await zoektFetch({
|
const { data: searchResponse, durationMs: fetchDurationMs } = await measure(
|
||||||
path: "/api/search",
|
() => zoektFetch({
|
||||||
body,
|
path: "/api/search",
|
||||||
header,
|
body,
|
||||||
method: "POST",
|
header,
|
||||||
});
|
method: "POST",
|
||||||
|
}),
|
||||||
|
"zoekt_fetch",
|
||||||
|
false
|
||||||
|
);
|
||||||
|
|
||||||
if (!searchResponse.ok) {
|
if (!searchResponse.ok) {
|
||||||
return invalidZoektResponse(searchResponse);
|
return invalidZoektResponse(searchResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
const searchBody = await searchResponse.json();
|
const transformZoektSearchResponse = async ({ Result }: ZoektSearchResponse) => {
|
||||||
|
|
||||||
const parser = zoektSearchResponseSchema.transform(async ({ Result }) => {
|
|
||||||
// @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field
|
// @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field
|
||||||
// which corresponds to the `id` in the Repo table. In order to efficiently fetch repository
|
// which corresponds to the `id` in the Repo table. In order to efficiently fetch repository
|
||||||
// metadata when transforming (potentially thousands) of file matches, we aggregate a unique
|
// metadata when transforming (potentially thousands) of file matches, we aggregate a unique
|
||||||
|
|
@ -379,7 +383,48 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
|
||||||
flushReason: Result.FlushReason,
|
flushReason: Result.FlushReason,
|
||||||
}
|
}
|
||||||
} satisfies SearchResponse;
|
} satisfies SearchResponse;
|
||||||
});
|
}
|
||||||
|
|
||||||
return parser.parseAsync(searchBody);
|
const { data: rawZoektResponse, durationMs: parseJsonDurationMs } = await measure(
|
||||||
|
() => searchResponse.json(),
|
||||||
|
"parse_json",
|
||||||
|
false
|
||||||
|
);
|
||||||
|
|
||||||
|
// @note: We do not use zod parseAsync here since in cases where the
|
||||||
|
// response is large (> 40MB), there can be significant performance issues.
|
||||||
|
const zoektResponse = rawZoektResponse as ZoektSearchResponse;
|
||||||
|
|
||||||
|
const { data: response, durationMs: transformZoektResponseDurationMs } = await measure(
|
||||||
|
() => transformZoektSearchResponse(zoektResponse),
|
||||||
|
"transform_zoekt_response",
|
||||||
|
false
|
||||||
|
);
|
||||||
|
|
||||||
|
const totalDurationMs = fetchDurationMs + parseJsonDurationMs + transformZoektResponseDurationMs;
|
||||||
|
|
||||||
|
// Debug log: timing breakdown
|
||||||
|
const timings = [
|
||||||
|
{ name: "zoekt_fetch", duration: fetchDurationMs },
|
||||||
|
{ name: "parse_json", duration: parseJsonDurationMs },
|
||||||
|
{ name: "transform_zoekt_response", duration: transformZoektResponseDurationMs },
|
||||||
|
];
|
||||||
|
|
||||||
|
logger.debug(`Search timing breakdown (query: "${query}"):`);
|
||||||
|
timings.forEach(({ name, duration }) => {
|
||||||
|
const percentage = ((duration / totalDurationMs) * 100).toFixed(1);
|
||||||
|
const durationStr = duration.toFixed(2).padStart(8);
|
||||||
|
const percentageStr = percentage.padStart(5);
|
||||||
|
logger.debug(` ${name.padEnd(25)} ${durationStr}ms (${percentageStr}%)`);
|
||||||
|
});
|
||||||
|
logger.debug(` ${"TOTAL".padEnd(25)} ${totalDurationMs.toFixed(2).padStart(8)}ms (100.0%)`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...response,
|
||||||
|
__debug_timings: {
|
||||||
|
zoekt_fetch: fetchDurationMs,
|
||||||
|
parse_json: parseJsonDurationMs,
|
||||||
|
transform_zoekt_response: transformZoektResponseDurationMs,
|
||||||
|
}
|
||||||
|
} satisfies SearchResponse;
|
||||||
}));
|
}));
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,8 @@ export const zoektSearchResponseSchema = z.object({
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
export type ZoektSearchResponse = z.infer<typeof zoektSearchResponseSchema>;
|
||||||
|
|
||||||
// @see : https://github.com/sourcebot-dev/zoekt/blob/3780e68cdb537d5a7ed2c84d9b3784f80c7c5d04/api.go#L728
|
// @see : https://github.com/sourcebot-dev/zoekt/blob/3780e68cdb537d5a7ed2c84d9b3784f80c7c5d04/api.go#L728
|
||||||
const zoektRepoStatsSchema = z.object({
|
const zoektRepoStatsSchema = z.object({
|
||||||
Repos: z.number(),
|
Repos: z.number(),
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue