add isSearchExausted flag for flagging when a search captured all results

This commit is contained in:
bkellam 2025-11-19 12:33:57 -08:00
parent 5ab585c021
commit 9c9b6b9578
10 changed files with 360 additions and 281 deletions

View file

@ -62,6 +62,8 @@ export const SearchResultsPage = ({
durationMs,
isStreaming,
numMatches,
isExhaustive,
stats,
} = useStreamedSearch({
query: searchQuery,
matches: maxMatchCount,
@ -170,10 +172,8 @@ export const SearchResultsPage = ({
repoInfo={repoInfo}
searchDurationMs={durationMs}
isStreaming={isStreaming}
// @todo: handle search stats
searchStats={undefined}
// @todo: detect when more results are available
isMoreResultsButtonVisible={false}
searchStats={stats}
isMoreResultsButtonVisible={!isExhaustive}
// @todo: handle branch filtering
isBranchFilteringEnabled={false}
/>

View file

@ -1,6 +1,6 @@
'use client';
import { RepositoryInfo, SearchRequest, SearchResponse, SearchResultFile } from '@/features/search/types';
import { RepositoryInfo, SearchRequest, SearchResultFile, SearchStats, StreamedSearchResponse } from '@/features/search/types';
import { useState, useCallback, useRef, useEffect } from 'react';
import * as Sentry from '@sentry/nextjs';
@ -10,6 +10,7 @@ interface CacheEntry {
numMatches: number;
durationMs: number;
timestamp: number;
isExhaustive: boolean;
}
const searchCache = new Map<string, CacheEntry>();
@ -34,18 +35,22 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex
const [state, setState] = useState<{
isStreaming: boolean,
isExhaustive: boolean,
error: Error | null,
files: SearchResultFile[],
repoInfo: Record<number, RepositoryInfo>,
durationMs: number,
numMatches: number,
stats?: SearchStats,
}>({
isStreaming: false,
isExhaustive: false,
error: null,
files: [],
repoInfo: {},
durationMs: 0,
numMatches: 0,
stats: undefined,
});
const abortControllerRef = useRef<AbortController | null>(null);
@ -85,6 +90,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex
console.debug('Using cached search results');
setState({
isStreaming: false,
isExhaustive: cachedEntry.isExhaustive,
error: null,
files: cachedEntry.files,
repoInfo: cachedEntry.repoInfo,
@ -96,6 +102,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex
setState({
isStreaming: true,
isExhaustive: false,
error: null,
files: [],
repoInfo: {},
@ -167,22 +174,33 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex
break;
}
const chunk: SearchResponse = JSON.parse(data);
const response: StreamedSearchResponse = JSON.parse(data);
switch (response.type) {
case 'chunk':
setState(prev => ({
...prev,
files: [
...prev.files,
...chunk.files
...response.files
],
repoInfo: {
...prev.repoInfo,
...chunk.repositoryInfo.reduce((acc, repo) => {
...response.repositoryInfo.reduce((acc, repo) => {
acc[repo.id] = repo;
return acc;
}, {} as Record<number, RepositoryInfo>),
},
numMatches: prev.numMatches + chunk.stats.actualMatchCount,
numMatches: prev.numMatches + response.stats.actualMatchCount,
}));
break;
case 'final':
setState(prev => ({
...prev,
isExhaustive: response.isSearchExhaustive,
stats: response.accumulatedStats,
}));
break;
}
}
}
@ -192,6 +210,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex
searchCache.set(cacheKey, {
files: prev.files,
repoInfo: prev.repoInfo,
isExhaustive: prev.isExhaustive,
numMatches: prev.numMatches,
durationMs,
timestamp: Date.now(),

View file

@ -4,7 +4,7 @@ import { search } from "@/features/search/searchApi";
import { isServiceError } from "@/lib/utils";
import { NextRequest } from "next/server";
import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError";
import { searchRequestSchema } from "@/features/search/schemas";
import { searchRequestSchema } from "@/features/search/types";
export const POST = async (request: NextRequest) => {
const body = await request.json();

View file

@ -4,7 +4,7 @@ import { getFileSource } from "@/features/search/fileSourceApi";
import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError";
import { isServiceError } from "@/lib/utils";
import { NextRequest } from "next/server";
import { fileSourceRequestSchema } from "@/features/search/schemas";
import { fileSourceRequestSchema } from "@/features/search/types";
export const POST = async (request: NextRequest) => {
const body = await request.json();

View file

@ -1,13 +1,14 @@
'use server';
import { searchRequestSchema } from '@/features/search/schemas';
import { SearchResponse, SourceRange } from '@/features/search/types';
import { searchRequestSchema, SearchStats, SourceRange, StreamedSearchResponse } from '@/features/search/types';
import { SINGLE_TENANT_ORG_ID } from '@/lib/constants';
import { schemaValidationError, serviceErrorResponse } from '@/lib/serviceError';
import { prisma } from '@/prisma';
import type { ProtoGrpcType } from '@/proto/webserver';
import { FileMatch__Output } from '@/proto/zoekt/webserver/v1/FileMatch';
import { Range__Output } from '@/proto/zoekt/webserver/v1/Range';
import type { SearchRequest } from '@/proto/zoekt/webserver/v1/SearchRequest';
import { SearchResponse__Output } from '@/proto/zoekt/webserver/v1/SearchResponse';
import type { StreamSearchRequest } from '@/proto/zoekt/webserver/v1/StreamSearchRequest';
import type { StreamSearchResponse__Output } from '@/proto/zoekt/webserver/v1/StreamSearchResponse';
import type { WebserverServiceClient } from '@/proto/zoekt/webserver/v1/WebserverService';
@ -109,8 +110,22 @@ export const POST = async (request: NextRequest) => {
},
});
console.log(JSON.stringify(zoektQuery, null, 2));
const searchRequest: SearchRequest = {
query: zoektQuery,
query: {
and: {
children: [
zoektQuery,
{
branch: {
pattern: 'HEAD',
exact: true,
}
}
]
}
},
opts: {
chunk_matches: true,
max_match_display_count: matches,
@ -158,11 +173,41 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
let grpcStream: ReturnType<WebserverServiceClient['StreamSearch']> | null = null;
let isStreamActive = true;
let pendingChunks = 0;
let accumulatedStats: SearchStats = {
actualMatchCount: 0,
totalMatchCount: 0,
duration: 0,
fileCount: 0,
filesSkipped: 0,
contentBytesLoaded: 0,
indexBytesLoaded: 0,
crashes: 0,
shardFilesConsidered: 0,
filesConsidered: 0,
filesLoaded: 0,
shardsScanned: 0,
shardsSkipped: 0,
shardsSkippedFilter: 0,
ngramMatches: 0,
ngramLookups: 0,
wait: 0,
matchTreeConstruction: 0,
matchTreeSearch: 0,
regexpsConsidered: 0,
flushReason: 0,
};
return new ReadableStream({
async start(controller) {
const tryCloseController = () => {
if (!isStreamActive && pendingChunks === 0) {
const finalResponse: StreamedSearchResponse = {
type: 'final',
accumulatedStats,
isSearchExhaustive: accumulatedStats.totalMatchCount <= accumulatedStats.actualMatchCount,
}
controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(finalResponse)}\n\n`));
controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n'));
controller.close();
client.close();
@ -195,7 +240,56 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
//
// Note: When a repository is re-indexed (every hour) this ID will be populated.
// @see: https://github.com/sourcebot-dev/zoekt/pull/6
const repos = new Map<string | number, Repo>();
const getRepoIdForFile = (file: FileMatch__Output): string | number => {
return file.repository_id ?? file.repository;
}
// `_reposMapCache` is used to cache repository metadata across all chunks.
// This reduces the number of database queries required to transform file matches.
const _reposMapCache = new Map<string | number, Repo>();
// Creates a mapping between all repository ids in a given response
// chunk. The mapping allows us to efficiently lookup repository metadata.
const createReposMapForChunk = async (chunk: SearchResponse__Output): Promise<Map<string | number, Repo>> => {
const reposMap = new Map<string | number, Repo>();
await Promise.all(chunk.files.map(async (file) => {
const id = getRepoIdForFile(file);
const repo = await (async () => {
// If it's in the cache, return the cached value.
if (_reposMapCache.has(id)) {
return _reposMapCache.get(id);
}
// Otherwise, query the database for the record.
const repo = typeof id === 'number' ?
await prisma.repo.findUnique({
where: {
id: id,
},
}) :
await prisma.repo.findFirst({
where: {
name: id,
},
});
// If a repository is found, cache it for future lookups.
if (repo) {
_reposMapCache.set(id, repo);
}
return repo;
})();
// Only add the repository to the map if it was found.
if (repo) {
reposMap.set(id, repo);
}
}));
return reposMap;
}
// Handle incoming data chunks
grpcStream.on('data', async (chunk: StreamSearchResponse__Output) => {
@ -218,32 +312,12 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
return;
}
const files = (await Promise.all(chunk.response_chunk.files.map(async (file) => {
const repoIdToRepoDBRecordMap = await createReposMapForChunk(chunk.response_chunk);
const files = chunk.response_chunk.files.map((file) => {
const fileNameChunks = file.chunk_matches.filter((chunk) => chunk.file_name);
const identifier = file.repository_id ?? file.repository;
// If the repository is not in the map, fetch it from the database.
if (!repos.has(identifier)) {
const repo = typeof identifier === 'number' ?
await prisma.repo.findUnique({
where: {
id: identifier,
},
}) :
await prisma.repo.findFirst({
where: {
name: identifier,
},
});
if (repo) {
repos.set(identifier, repo);
}
}
const repo = repos.get(identifier);
const repoId = getRepoIdForFile(file);
const repo = repoIdToRepoDBRecordMap.get(repoId);
// This can happen if the user doesn't have access to the repository.
if (!repo) {
@ -307,7 +381,7 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
branches: file.branches,
content: file.content ? file.content.toString('utf-8') : undefined,
}
}))).filter(file => file !== undefined);
}).filter(file => file !== undefined);
const actualMatchCount = files.reduce(
(acc, file) =>
@ -319,43 +393,45 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
0,
);
const response: SearchResponse = {
const stats: SearchStats = {
actualMatchCount,
totalMatchCount: chunk.response_chunk.stats?.match_count ?? 0,
duration: chunk.response_chunk.stats?.duration?.nanos ?? 0,
fileCount: chunk.response_chunk.stats?.file_count ?? 0,
filesSkipped: chunk.response_chunk.stats?.files_skipped ?? 0,
contentBytesLoaded: chunk.response_chunk.stats?.content_bytes_loaded ?? 0,
indexBytesLoaded: chunk.response_chunk.stats?.index_bytes_loaded ?? 0,
crashes: chunk.response_chunk.stats?.crashes ?? 0,
shardFilesConsidered: chunk.response_chunk.stats?.shard_files_considered ?? 0,
filesConsidered: chunk.response_chunk.stats?.files_considered ?? 0,
filesLoaded: chunk.response_chunk.stats?.files_loaded ?? 0,
shardsScanned: chunk.response_chunk.stats?.shards_scanned ?? 0,
shardsSkipped: chunk.response_chunk.stats?.shards_skipped ?? 0,
shardsSkippedFilter: chunk.response_chunk.stats?.shards_skipped_filter ?? 0,
ngramMatches: chunk.response_chunk.stats?.ngram_matches ?? 0,
ngramLookups: chunk.response_chunk.stats?.ngram_lookups ?? 0,
wait: chunk.response_chunk.stats?.wait?.nanos ?? 0,
matchTreeConstruction: chunk.response_chunk.stats?.match_tree_construction?.nanos ?? 0,
matchTreeSearch: chunk.response_chunk.stats?.match_tree_search?.nanos ?? 0,
regexpsConsidered: chunk.response_chunk.stats?.regexps_considered ?? 0,
// @todo: handle this.
// flushReason: chunk.response_chunk.stats?.flush_reason ?? 0,
flushReason: 0
}
accumulatedStats = accumulateStats(accumulatedStats, stats);
const response: StreamedSearchResponse = {
type: 'chunk',
files,
repositoryInfo: Array.from(repos.values()).map((repo) => ({
repositoryInfo: Array.from(repoIdToRepoDBRecordMap.values()).map((repo) => ({
id: repo.id,
codeHostType: repo.external_codeHostType,
name: repo.name,
displayName: repo.displayName ?? undefined,
webUrl: repo.webUrl ?? undefined,
})),
isBranchFilteringEnabled: false,
// @todo: we will need to figure out how to handle if a search is exhaustive or not
isSearchExhaustive: false,
stats: {
actualMatchCount,
// @todo: todo -
totalMatchCount: 0,
duration: chunk.response_chunk.stats?.duration?.nanos ?? 0,
fileCount: chunk.response_chunk.stats?.file_count.valueOf() ?? 0,
filesSkipped: chunk.response_chunk.stats?.files_skipped.valueOf() ?? 0,
contentBytesLoaded: chunk.response_chunk.stats?.content_bytes_loaded.valueOf() ?? 0,
indexBytesLoaded: chunk.response_chunk.stats?.index_bytes_loaded.valueOf() ?? 0,
crashes: chunk.response_chunk.stats?.crashes.valueOf() ?? 0,
shardFilesConsidered: chunk.response_chunk.stats?.shard_files_considered.valueOf() ?? 0,
filesConsidered: chunk.response_chunk.stats?.files_considered.valueOf() ?? 0,
filesLoaded: chunk.response_chunk.stats?.files_loaded.valueOf() ?? 0,
shardsScanned: chunk.response_chunk.stats?.shards_scanned.valueOf() ?? 0,
shardsSkipped: chunk.response_chunk.stats?.shards_skipped.valueOf() ?? 0,
shardsSkippedFilter: chunk.response_chunk.stats?.shards_skipped_filter.valueOf() ?? 0,
ngramMatches: chunk.response_chunk.stats?.ngram_matches.valueOf() ?? 0,
ngramLookups: chunk.response_chunk.stats?.ngram_lookups.valueOf() ?? 0,
wait: chunk.response_chunk.stats?.wait?.nanos ?? 0,
matchTreeConstruction: chunk.response_chunk.stats?.match_tree_construction?.nanos ?? 0,
matchTreeSearch: chunk.response_chunk.stats?.match_tree_search?.nanos ?? 0,
regexpsConsidered: chunk.response_chunk.stats?.regexps_considered.valueOf() ?? 0,
// @todo: handle this.
flushReason: 0,
}
stats
}
const sseData = `data: ${JSON.stringify(response)}\n\n`;
@ -435,3 +511,33 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
}
});
}
const accumulateStats = (a: SearchStats, b: SearchStats): SearchStats => {
return {
actualMatchCount: a.actualMatchCount + b.actualMatchCount,
totalMatchCount: a.totalMatchCount + b.totalMatchCount,
duration: a.duration + b.duration,
fileCount: a.fileCount + b.fileCount,
filesSkipped: a.filesSkipped + b.filesSkipped,
contentBytesLoaded: a.contentBytesLoaded + b.contentBytesLoaded,
indexBytesLoaded: a.indexBytesLoaded + b.indexBytesLoaded,
crashes: a.crashes + b.crashes,
shardFilesConsidered: a.shardFilesConsidered + b.shardFilesConsidered,
filesConsidered: a.filesConsidered + b.filesConsidered,
filesLoaded: a.filesLoaded + b.filesLoaded,
shardsScanned: a.shardsScanned + b.shardsScanned,
shardsSkipped: a.shardsSkipped + b.shardsSkipped,
shardsSkippedFilter: a.shardsSkippedFilter + b.shardsSkippedFilter,
ngramMatches: a.ngramMatches + b.ngramMatches,
ngramLookups: a.ngramLookups + b.ngramLookups,
wait: a.wait + b.wait,
matchTreeConstruction: a.matchTreeConstruction + b.matchTreeConstruction,
matchTreeSearch: a.matchTreeSearch + b.matchTreeSearch,
regexpsConsidered: a.regexpsConsidered + b.regexpsConsidered,
...(a.flushReason === 0 ? {
flushReason: b.flushReason
} : {
flushReason: a.flushReason,
}),
}
}

View file

@ -1,6 +1,6 @@
import { sourcebot_context, sourcebot_pr_payload } from "@/features/agents/review-agent/types";
import { getFileSource } from "@/features/search/fileSourceApi";
import { fileSourceResponseSchema } from "@/features/search/schemas";
import { fileSourceResponseSchema } from "@/features/search/types";
import { isServiceError } from "@/lib/utils";
import { createLogger } from "@sourcebot/shared";

View file

@ -1,7 +1,6 @@
import 'server-only';
import { sew } from "@/actions";
import { searchResponseSchema } from "@/features/search/schemas";
import { search } from "@/features/search/searchApi";
import { ServiceError } from "@/lib/serviceError";
import { isServiceError } from "@/lib/utils";
@ -59,12 +58,12 @@ export const findSearchBasedSymbolDefinitions = async (props: FindRelatedSymbols
return parseRelatedSymbolsSearchResponse(searchResult);
}));
const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse) => {
const parser = searchResponseSchema.transform(async ({ files }) => ({
const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse): FindRelatedSymbolsResponse => {
return {
stats: {
matchCount: searchResult.stats.actualMatchCount,
},
files: files.flatMap((file) => {
files: searchResult.files.flatMap((file) => {
const chunks = file.chunks;
return {
@ -82,9 +81,7 @@ const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse) => {
}
}).filter((file) => file.matches.length > 0),
repositoryInfo: searchResult.repositoryInfo
}));
return parser.parseAsync(searchResult);
};
}
// Expands the language filter to include all variants of the language.

View file

@ -1,5 +1,5 @@
import { z } from "zod";
import { rangeSchema, repositoryInfoSchema } from "../search/schemas";
import { rangeSchema, repositoryInfoSchema } from "../search/types";
export const findRelatedSymbolsRequestSchema = z.object({
symbolName: z.string(),

View file

@ -1,167 +0,0 @@
// @NOTE : Please keep this file in sync with @sourcebot/mcp/src/schemas.ts
import { CodeHostType } from "@sourcebot/db";
import { z } from "zod";
export const locationSchema = z.object({
// 0-based byte offset from the beginning of the file
byteOffset: z.number(),
// 1-based line number from the beginning of the file
lineNumber: z.number(),
// 1-based column number (in runes) from the beginning of line
column: z.number(),
});
export const rangeSchema = z.object({
start: locationSchema,
end: locationSchema,
});
export const symbolSchema = z.object({
symbol: z.string(),
kind: z.string(),
});
export const searchRequestSchema = z.object({
// The zoekt query to execute.
query: z.string(),
// The number of matches to return.
matches: z.number(),
// The number of context lines to return.
contextLines: z.number().optional(),
// Whether to return the whole file as part of the response.
whole: z.boolean().optional(),
// Whether to enable regular expression search.
isRegexEnabled: z.boolean().optional(),
// Whether to enable case sensitivity.
isCaseSensitivityEnabled: z.boolean().optional(),
});
export const repositoryInfoSchema = z.object({
id: z.number(),
codeHostType: z.nativeEnum(CodeHostType),
name: z.string(),
displayName: z.string().optional(),
webUrl: z.string().optional(),
});
// Many of these fields are defined in zoekt/api.go.
export const searchStatsSchema = z.object({
// The actual number of matches returned by the search.
// This will always be less than or equal to `totalMatchCount`.
actualMatchCount: z.number(),
// The total number of matches found during the search.
totalMatchCount: z.number(),
// The duration (in nanoseconds) of the search.
duration: z.number(),
// Number of files containing a match.
fileCount: z.number(),
// Candidate files whose contents weren't examined because we
// gathered enough matches.
filesSkipped: z.number(),
// Amount of I/O for reading contents.
contentBytesLoaded: z.number(),
// Amount of I/O for reading from index.
indexBytesLoaded: z.number(),
// Number of search shards that had a crash.
crashes: z.number(),
// Number of files in shards that we considered.
shardFilesConsidered: z.number(),
// Files that we evaluated. Equivalent to files for which all
// atom matches (including negations) evaluated to true.
filesConsidered: z.number(),
// Files for which we loaded file content to verify substring matches
filesLoaded: z.number(),
// Shards that we scanned to find matches.
shardsScanned: z.number(),
// Shards that we did not process because a query was canceled.
shardsSkipped: z.number(),
// Shards that we did not process because the query was rejected by the
// ngram filter indicating it had no matches.
shardsSkippedFilter: z.number(),
// Number of candidate matches as a result of searching ngrams.
ngramMatches: z.number(),
// NgramLookups is the number of times we accessed an ngram in the index.
ngramLookups: z.number(),
// Wall clock time for queued search.
wait: z.number(),
// Aggregate wall clock time spent constructing and pruning the match tree.
// This accounts for time such as lookups in the trigram index.
matchTreeConstruction: z.number(),
// Aggregate wall clock time spent searching the match tree. This accounts
// for the bulk of search work done looking for matches.
matchTreeSearch: z.number(),
// Number of times regexp was called on files that we evaluated.
regexpsConsidered: z.number(),
// FlushReason explains why results were flushed.
flushReason: z.number(),
});
export const searchResponseSchema = z.object({
stats: searchStatsSchema,
files: z.array(z.object({
fileName: z.object({
// The name of the file
text: z.string(),
// Any matching ranges
matchRanges: z.array(rangeSchema),
}),
webUrl: z.string().optional(),
repository: z.string(),
repositoryId: z.number(),
language: z.string(),
chunks: z.array(z.object({
content: z.string(),
matchRanges: z.array(rangeSchema),
contentStart: locationSchema,
symbols: z.array(z.object({
...symbolSchema.shape,
parent: symbolSchema.optional(),
})).optional(),
})),
branches: z.array(z.string()).optional(),
// Set if `whole` is true.
content: z.string().optional(),
})),
repositoryInfo: z.array(repositoryInfoSchema),
isBranchFilteringEnabled: z.boolean(),
isSearchExhaustive: z.boolean(),
__debug_timings: z.record(z.string(), z.number()).optional(),
});
export const fileSourceRequestSchema = z.object({
fileName: z.string(),
repository: z.string(),
branch: z.string().optional(),
});
export const fileSourceResponseSchema = z.object({
source: z.string(),
language: z.string(),
path: z.string(),
repository: z.string(),
repositoryCodeHostType: z.nativeEnum(CodeHostType),
repositoryDisplayName: z.string().optional(),
repositoryWebUrl: z.string().optional(),
branch: z.string().optional(),
webUrl: z.string().optional(),
});

View file

@ -1,27 +1,151 @@
// @NOTE : Please keep this file in sync with @sourcebot/mcp/src/types.ts
import {
fileSourceResponseSchema,
locationSchema,
searchRequestSchema,
searchResponseSchema,
rangeSchema,
fileSourceRequestSchema,
symbolSchema,
repositoryInfoSchema,
searchStatsSchema,
} from "./schemas";
import { CodeHostType } from "@sourcebot/db";
import { z } from "zod";
export type SearchRequest = z.infer<typeof searchRequestSchema>;
export type SearchResponse = z.infer<typeof searchResponseSchema>;
export type SearchResultLocation = z.infer<typeof locationSchema>;
export type SearchResultFile = SearchResponse["files"][number];
export type SearchResultChunk = SearchResultFile["chunks"][number];
export const locationSchema = z.object({
byteOffset: z.number(), // 0-based byte offset from the beginning of the file
lineNumber: z.number(), // 1-based line number from the beginning of the file
column: z.number(), // 1-based column number (in runes) from the beginning of line
});
export type SourceLocation = z.infer<typeof locationSchema>;
export const rangeSchema = z.object({
start: locationSchema,
end: locationSchema,
});
export type SourceRange = z.infer<typeof rangeSchema>;
export const symbolSchema = z.object({
symbol: z.string(),
kind: z.string(),
});
export type SearchSymbol = z.infer<typeof symbolSchema>;
export type FileSourceRequest = z.infer<typeof fileSourceRequestSchema>;
export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;
export const repositoryInfoSchema = z.object({
id: z.number(),
codeHostType: z.nativeEnum(CodeHostType),
name: z.string(),
displayName: z.string().optional(),
webUrl: z.string().optional(),
});
export type RepositoryInfo = z.infer<typeof repositoryInfoSchema>;
export type SourceRange = z.infer<typeof rangeSchema>;
// @note: Many of these fields are defined in zoekt/api.go.
export const searchStatsSchema = z.object({
actualMatchCount: z.number(), // The actual number of matches returned by the search. This will always be less than or equal to `totalMatchCount`.
totalMatchCount: z.number(), // The total number of matches found during the search.
duration: z.number(), // The duration (in nanoseconds) of the search.
fileCount: z.number(), // Number of files containing a match.
filesSkipped: z.number(), // Candidate files whose contents weren't examined because we gathered enough matches.
contentBytesLoaded: z.number(), // Amount of I/O for reading contents.
indexBytesLoaded: z.number(), // Amount of I/O for reading from index.
crashes: z.number(), // Number of search shards that had a crash.
shardFilesConsidered: z.number(), // Number of files in shards that we considered.
filesConsidered: z.number(), // Files that we evaluated. Equivalent to files for which all atom matches (including negations) evaluated to true.
filesLoaded: z.number(), // Files for which we loaded file content to verify substring matches
shardsScanned: z.number(), // Shards that we scanned to find matches.
shardsSkipped: z.number(), // Shards that we did not process because a query was canceled.
shardsSkippedFilter: z.number(), // Shards that we did not process because the query was rejected by the ngram filter indicating it had no matches.
ngramMatches: z.number(), // Number of candidate matches as a result of searching ngrams.
ngramLookups: z.number(), // NgramLookups is the number of times we accessed an ngram in the index.
wait: z.number(), // Wall clock time for queued search.
matchTreeConstruction: z.number(), // Aggregate wall clock time spent constructing and pruning the match tree. This accounts for time such as lookups in the trigram index.
matchTreeSearch: z.number(), // Aggregate wall clock time spent searching the match tree. This accounts for the bulk of search work done looking for matches.
regexpsConsidered: z.number(), // Number of times regexp was called on files that we evaluated.
flushReason: z.number(), // FlushReason explains why results were flushed.
});
export type SearchStats = z.infer<typeof searchStatsSchema>;
export const searchFileSchema = z.object({
fileName: z.object({
// The name of the file
text: z.string(),
// Any matching ranges
matchRanges: z.array(rangeSchema),
}),
webUrl: z.string().optional(),
repository: z.string(),
repositoryId: z.number(),
language: z.string(),
chunks: z.array(z.object({
content: z.string(),
matchRanges: z.array(rangeSchema),
contentStart: locationSchema,
symbols: z.array(z.object({
...symbolSchema.shape,
parent: symbolSchema.optional(),
})).optional(),
})),
branches: z.array(z.string()).optional(),
// Set if `whole` is true.
content: z.string().optional(),
});
export type SearchResultFile = z.infer<typeof searchFileSchema>;
export type SearchResultChunk = SearchResultFile["chunks"][number];
export const searchRequestSchema = z.object({
query: z.string(), // The zoekt query to execute.
matches: z.number(), // The number of matches to return.
contextLines: z.number().optional(), // The number of context lines to return.
whole: z.boolean().optional(), // Whether to return the whole file as part of the response.
isRegexEnabled: z.boolean().optional(), // Whether to enable regular expression search.
isCaseSensitivityEnabled: z.boolean().optional(), // Whether to enable case sensitivity.
});
export type SearchRequest = z.infer<typeof searchRequestSchema>;
export const searchResponseSchema = z.object({
stats: searchStatsSchema,
files: z.array(searchFileSchema),
repositoryInfo: z.array(repositoryInfoSchema),
isBranchFilteringEnabled: z.boolean(),
isSearchExhaustive: z.boolean(),
__debug_timings: z.record(z.string(), z.number()).optional(),
});
export type SearchResponse = z.infer<typeof searchResponseSchema>;
/**
* Sent after each chunk of results is processed.
*/
export const streamedSearchChunkResponseSchema = z.object({
type: z.literal('chunk'),
stats: searchStatsSchema,
files: z.array(searchFileSchema),
repositoryInfo: z.array(repositoryInfoSchema),
});
export type StreamedSearchChunkResponse = z.infer<typeof streamedSearchChunkResponseSchema>;
/**
* Sent after the search is complete.
*/
export const streamedSearchFinalResponseSchema = z.object({
type: z.literal('final'),
accumulatedStats: searchStatsSchema,
isSearchExhaustive: z.boolean(),
});
export type StreamedSearchFinalResponse = z.infer<typeof streamedSearchFinalResponseSchema>;
export const streamedSearchResponseSchema = z.discriminatedUnion('type', [
streamedSearchChunkResponseSchema,
streamedSearchFinalResponseSchema,
]);
export type StreamedSearchResponse = z.infer<typeof streamedSearchResponseSchema>;
export const fileSourceRequestSchema = z.object({
fileName: z.string(),
repository: z.string(),
branch: z.string().optional(),
});
export type FileSourceRequest = z.infer<typeof fileSourceRequestSchema>;
export const fileSourceResponseSchema = z.object({
source: z.string(),
language: z.string(),
path: z.string(),
repository: z.string(),
repositoryCodeHostType: z.nativeEnum(CodeHostType),
repositoryDisplayName: z.string().optional(),
repositoryWebUrl: z.string().optional(),
branch: z.string().optional(),
webUrl: z.string().optional(),
});
export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;