diff --git a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx index e1f58423..68c85f24 100644 --- a/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx +++ b/packages/web/src/app/[domain]/search/components/searchResultsPage.tsx @@ -62,6 +62,8 @@ export const SearchResultsPage = ({ durationMs, isStreaming, numMatches, + isExhaustive, + stats, } = useStreamedSearch({ query: searchQuery, matches: maxMatchCount, @@ -170,10 +172,8 @@ export const SearchResultsPage = ({ repoInfo={repoInfo} searchDurationMs={durationMs} isStreaming={isStreaming} - // @todo: handle search stats - searchStats={undefined} - // @todo: detect when more results are available - isMoreResultsButtonVisible={false} + searchStats={stats} + isMoreResultsButtonVisible={!isExhaustive} // @todo: handle branch filtering isBranchFilteringEnabled={false} /> diff --git a/packages/web/src/app/[domain]/search/useStreamedSearch.ts b/packages/web/src/app/[domain]/search/useStreamedSearch.ts index 85a6fd64..04514329 100644 --- a/packages/web/src/app/[domain]/search/useStreamedSearch.ts +++ b/packages/web/src/app/[domain]/search/useStreamedSearch.ts @@ -1,6 +1,6 @@ 'use client'; -import { RepositoryInfo, SearchRequest, SearchResponse, SearchResultFile } from '@/features/search/types'; +import { RepositoryInfo, SearchRequest, SearchResultFile, SearchStats, StreamedSearchResponse } from '@/features/search/types'; import { useState, useCallback, useRef, useEffect } from 'react'; import * as Sentry from '@sentry/nextjs'; @@ -10,6 +10,7 @@ interface CacheEntry { numMatches: number; durationMs: number; timestamp: number; + isExhaustive: boolean; } const searchCache = new Map(); @@ -34,18 +35,22 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex const [state, setState] = useState<{ isStreaming: boolean, + isExhaustive: boolean, error: Error | null, files: SearchResultFile[], repoInfo: Record, durationMs: number, numMatches: number, + stats?: SearchStats, }>({ isStreaming: false, + isExhaustive: false, error: null, files: [], repoInfo: {}, durationMs: 0, numMatches: 0, + stats: undefined, }); const abortControllerRef = useRef(null); @@ -85,6 +90,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex console.debug('Using cached search results'); setState({ isStreaming: false, + isExhaustive: cachedEntry.isExhaustive, error: null, files: cachedEntry.files, repoInfo: cachedEntry.repoInfo, @@ -96,6 +102,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex setState({ isStreaming: true, + isExhaustive: false, error: null, files: [], repoInfo: {}, @@ -167,22 +174,33 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex break; } - const chunk: SearchResponse = JSON.parse(data); - setState(prev => ({ - ...prev, - files: [ - ...prev.files, - ...chunk.files - ], - repoInfo: { - ...prev.repoInfo, - ...chunk.repositoryInfo.reduce((acc, repo) => { - acc[repo.id] = repo; - return acc; - }, {} as Record), - }, - numMatches: prev.numMatches + chunk.stats.actualMatchCount, - })); + const response: StreamedSearchResponse = JSON.parse(data); + switch (response.type) { + case 'chunk': + setState(prev => ({ + ...prev, + files: [ + ...prev.files, + ...response.files + ], + repoInfo: { + ...prev.repoInfo, + ...response.repositoryInfo.reduce((acc, repo) => { + acc[repo.id] = repo; + return acc; + }, {} as Record), + }, + numMatches: prev.numMatches + response.stats.actualMatchCount, + })); + break; + case 'final': + setState(prev => ({ + ...prev, + isExhaustive: response.isSearchExhaustive, + stats: response.accumulatedStats, + })); + break; + } } } @@ -192,6 +210,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex searchCache.set(cacheKey, { files: prev.files, repoInfo: prev.repoInfo, + isExhaustive: prev.isExhaustive, numMatches: prev.numMatches, durationMs, timestamp: Date.now(), diff --git a/packages/web/src/app/api/(server)/search/route.ts b/packages/web/src/app/api/(server)/search/route.ts index 83a5e6a0..027096f0 100644 --- a/packages/web/src/app/api/(server)/search/route.ts +++ b/packages/web/src/app/api/(server)/search/route.ts @@ -4,7 +4,7 @@ import { search } from "@/features/search/searchApi"; import { isServiceError } from "@/lib/utils"; import { NextRequest } from "next/server"; import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError"; -import { searchRequestSchema } from "@/features/search/schemas"; +import { searchRequestSchema } from "@/features/search/types"; export const POST = async (request: NextRequest) => { const body = await request.json(); diff --git a/packages/web/src/app/api/(server)/source/route.ts b/packages/web/src/app/api/(server)/source/route.ts index d64d701d..2fb785a8 100644 --- a/packages/web/src/app/api/(server)/source/route.ts +++ b/packages/web/src/app/api/(server)/source/route.ts @@ -4,7 +4,7 @@ import { getFileSource } from "@/features/search/fileSourceApi"; import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; import { NextRequest } from "next/server"; -import { fileSourceRequestSchema } from "@/features/search/schemas"; +import { fileSourceRequestSchema } from "@/features/search/types"; export const POST = async (request: NextRequest) => { const body = await request.json(); diff --git a/packages/web/src/app/api/(server)/stream_search/route.ts b/packages/web/src/app/api/(server)/stream_search/route.ts index 11905ee7..231ff712 100644 --- a/packages/web/src/app/api/(server)/stream_search/route.ts +++ b/packages/web/src/app/api/(server)/stream_search/route.ts @@ -1,13 +1,14 @@ 'use server'; -import { searchRequestSchema } from '@/features/search/schemas'; -import { SearchResponse, SourceRange } from '@/features/search/types'; +import { searchRequestSchema, SearchStats, SourceRange, StreamedSearchResponse } from '@/features/search/types'; import { SINGLE_TENANT_ORG_ID } from '@/lib/constants'; import { schemaValidationError, serviceErrorResponse } from '@/lib/serviceError'; import { prisma } from '@/prisma'; import type { ProtoGrpcType } from '@/proto/webserver'; +import { FileMatch__Output } from '@/proto/zoekt/webserver/v1/FileMatch'; import { Range__Output } from '@/proto/zoekt/webserver/v1/Range'; import type { SearchRequest } from '@/proto/zoekt/webserver/v1/SearchRequest'; +import { SearchResponse__Output } from '@/proto/zoekt/webserver/v1/SearchResponse'; import type { StreamSearchRequest } from '@/proto/zoekt/webserver/v1/StreamSearchRequest'; import type { StreamSearchResponse__Output } from '@/proto/zoekt/webserver/v1/StreamSearchResponse'; import type { WebserverServiceClient } from '@/proto/zoekt/webserver/v1/WebserverService'; @@ -109,8 +110,22 @@ export const POST = async (request: NextRequest) => { }, }); + console.log(JSON.stringify(zoektQuery, null, 2)); + const searchRequest: SearchRequest = { - query: zoektQuery, + query: { + and: { + children: [ + zoektQuery, + { + branch: { + pattern: 'HEAD', + exact: true, + } + } + ] + } + }, opts: { chunk_matches: true, max_match_display_count: matches, @@ -158,11 +173,41 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism let grpcStream: ReturnType | null = null; let isStreamActive = true; let pendingChunks = 0; + let accumulatedStats: SearchStats = { + actualMatchCount: 0, + totalMatchCount: 0, + duration: 0, + fileCount: 0, + filesSkipped: 0, + contentBytesLoaded: 0, + indexBytesLoaded: 0, + crashes: 0, + shardFilesConsidered: 0, + filesConsidered: 0, + filesLoaded: 0, + shardsScanned: 0, + shardsSkipped: 0, + shardsSkippedFilter: 0, + ngramMatches: 0, + ngramLookups: 0, + wait: 0, + matchTreeConstruction: 0, + matchTreeSearch: 0, + regexpsConsidered: 0, + flushReason: 0, + }; return new ReadableStream({ async start(controller) { const tryCloseController = () => { if (!isStreamActive && pendingChunks === 0) { + const finalResponse: StreamedSearchResponse = { + type: 'final', + accumulatedStats, + isSearchExhaustive: accumulatedStats.totalMatchCount <= accumulatedStats.actualMatchCount, + } + + controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(finalResponse)}\n\n`)); controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n')); controller.close(); client.close(); @@ -195,7 +240,56 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism // // Note: When a repository is re-indexed (every hour) this ID will be populated. // @see: https://github.com/sourcebot-dev/zoekt/pull/6 - const repos = new Map(); + const getRepoIdForFile = (file: FileMatch__Output): string | number => { + return file.repository_id ?? file.repository; + } + + // `_reposMapCache` is used to cache repository metadata across all chunks. + // This reduces the number of database queries required to transform file matches. + const _reposMapCache = new Map(); + + // Creates a mapping between all repository ids in a given response + // chunk. The mapping allows us to efficiently lookup repository metadata. + const createReposMapForChunk = async (chunk: SearchResponse__Output): Promise> => { + const reposMap = new Map(); + await Promise.all(chunk.files.map(async (file) => { + const id = getRepoIdForFile(file); + + const repo = await (async () => { + // If it's in the cache, return the cached value. + if (_reposMapCache.has(id)) { + return _reposMapCache.get(id); + } + + // Otherwise, query the database for the record. + const repo = typeof id === 'number' ? + await prisma.repo.findUnique({ + where: { + id: id, + }, + }) : + await prisma.repo.findFirst({ + where: { + name: id, + }, + }); + + // If a repository is found, cache it for future lookups. + if (repo) { + _reposMapCache.set(id, repo); + } + + return repo; + })(); + + // Only add the repository to the map if it was found. + if (repo) { + reposMap.set(id, repo); + } + })); + + return reposMap; + } // Handle incoming data chunks grpcStream.on('data', async (chunk: StreamSearchResponse__Output) => { @@ -218,32 +312,12 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism return; } - const files = (await Promise.all(chunk.response_chunk.files.map(async (file) => { + const repoIdToRepoDBRecordMap = await createReposMapForChunk(chunk.response_chunk); + + const files = chunk.response_chunk.files.map((file) => { const fileNameChunks = file.chunk_matches.filter((chunk) => chunk.file_name); - - const identifier = file.repository_id ?? file.repository; - - // If the repository is not in the map, fetch it from the database. - if (!repos.has(identifier)) { - const repo = typeof identifier === 'number' ? - await prisma.repo.findUnique({ - where: { - id: identifier, - }, - }) : - await prisma.repo.findFirst({ - where: { - name: identifier, - }, - }); - - if (repo) { - repos.set(identifier, repo); - } - } - - - const repo = repos.get(identifier); + const repoId = getRepoIdForFile(file); + const repo = repoIdToRepoDBRecordMap.get(repoId); // This can happen if the user doesn't have access to the repository. if (!repo) { @@ -307,7 +381,7 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism branches: file.branches, content: file.content ? file.content.toString('utf-8') : undefined, } - }))).filter(file => file !== undefined); + }).filter(file => file !== undefined); const actualMatchCount = files.reduce( (acc, file) => @@ -319,43 +393,45 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism 0, ); - const response: SearchResponse = { + const stats: SearchStats = { + actualMatchCount, + totalMatchCount: chunk.response_chunk.stats?.match_count ?? 0, + duration: chunk.response_chunk.stats?.duration?.nanos ?? 0, + fileCount: chunk.response_chunk.stats?.file_count ?? 0, + filesSkipped: chunk.response_chunk.stats?.files_skipped ?? 0, + contentBytesLoaded: chunk.response_chunk.stats?.content_bytes_loaded ?? 0, + indexBytesLoaded: chunk.response_chunk.stats?.index_bytes_loaded ?? 0, + crashes: chunk.response_chunk.stats?.crashes ?? 0, + shardFilesConsidered: chunk.response_chunk.stats?.shard_files_considered ?? 0, + filesConsidered: chunk.response_chunk.stats?.files_considered ?? 0, + filesLoaded: chunk.response_chunk.stats?.files_loaded ?? 0, + shardsScanned: chunk.response_chunk.stats?.shards_scanned ?? 0, + shardsSkipped: chunk.response_chunk.stats?.shards_skipped ?? 0, + shardsSkippedFilter: chunk.response_chunk.stats?.shards_skipped_filter ?? 0, + ngramMatches: chunk.response_chunk.stats?.ngram_matches ?? 0, + ngramLookups: chunk.response_chunk.stats?.ngram_lookups ?? 0, + wait: chunk.response_chunk.stats?.wait?.nanos ?? 0, + matchTreeConstruction: chunk.response_chunk.stats?.match_tree_construction?.nanos ?? 0, + matchTreeSearch: chunk.response_chunk.stats?.match_tree_search?.nanos ?? 0, + regexpsConsidered: chunk.response_chunk.stats?.regexps_considered ?? 0, + // @todo: handle this. + // flushReason: chunk.response_chunk.stats?.flush_reason ?? 0, + flushReason: 0 + } + + accumulatedStats = accumulateStats(accumulatedStats, stats); + + const response: StreamedSearchResponse = { + type: 'chunk', files, - repositoryInfo: Array.from(repos.values()).map((repo) => ({ + repositoryInfo: Array.from(repoIdToRepoDBRecordMap.values()).map((repo) => ({ id: repo.id, codeHostType: repo.external_codeHostType, name: repo.name, displayName: repo.displayName ?? undefined, webUrl: repo.webUrl ?? undefined, })), - isBranchFilteringEnabled: false, - // @todo: we will need to figure out how to handle if a search is exhaustive or not - isSearchExhaustive: false, - stats: { - actualMatchCount, - // @todo: todo - - totalMatchCount: 0, - duration: chunk.response_chunk.stats?.duration?.nanos ?? 0, - fileCount: chunk.response_chunk.stats?.file_count.valueOf() ?? 0, - filesSkipped: chunk.response_chunk.stats?.files_skipped.valueOf() ?? 0, - contentBytesLoaded: chunk.response_chunk.stats?.content_bytes_loaded.valueOf() ?? 0, - indexBytesLoaded: chunk.response_chunk.stats?.index_bytes_loaded.valueOf() ?? 0, - crashes: chunk.response_chunk.stats?.crashes.valueOf() ?? 0, - shardFilesConsidered: chunk.response_chunk.stats?.shard_files_considered.valueOf() ?? 0, - filesConsidered: chunk.response_chunk.stats?.files_considered.valueOf() ?? 0, - filesLoaded: chunk.response_chunk.stats?.files_loaded.valueOf() ?? 0, - shardsScanned: chunk.response_chunk.stats?.shards_scanned.valueOf() ?? 0, - shardsSkipped: chunk.response_chunk.stats?.shards_skipped.valueOf() ?? 0, - shardsSkippedFilter: chunk.response_chunk.stats?.shards_skipped_filter.valueOf() ?? 0, - ngramMatches: chunk.response_chunk.stats?.ngram_matches.valueOf() ?? 0, - ngramLookups: chunk.response_chunk.stats?.ngram_lookups.valueOf() ?? 0, - wait: chunk.response_chunk.stats?.wait?.nanos ?? 0, - matchTreeConstruction: chunk.response_chunk.stats?.match_tree_construction?.nanos ?? 0, - matchTreeSearch: chunk.response_chunk.stats?.match_tree_search?.nanos ?? 0, - regexpsConsidered: chunk.response_chunk.stats?.regexps_considered.valueOf() ?? 0, - // @todo: handle this. - flushReason: 0, - } + stats } const sseData = `data: ${JSON.stringify(response)}\n\n`; @@ -434,4 +510,34 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism client.close(); } }); +} + +const accumulateStats = (a: SearchStats, b: SearchStats): SearchStats => { + return { + actualMatchCount: a.actualMatchCount + b.actualMatchCount, + totalMatchCount: a.totalMatchCount + b.totalMatchCount, + duration: a.duration + b.duration, + fileCount: a.fileCount + b.fileCount, + filesSkipped: a.filesSkipped + b.filesSkipped, + contentBytesLoaded: a.contentBytesLoaded + b.contentBytesLoaded, + indexBytesLoaded: a.indexBytesLoaded + b.indexBytesLoaded, + crashes: a.crashes + b.crashes, + shardFilesConsidered: a.shardFilesConsidered + b.shardFilesConsidered, + filesConsidered: a.filesConsidered + b.filesConsidered, + filesLoaded: a.filesLoaded + b.filesLoaded, + shardsScanned: a.shardsScanned + b.shardsScanned, + shardsSkipped: a.shardsSkipped + b.shardsSkipped, + shardsSkippedFilter: a.shardsSkippedFilter + b.shardsSkippedFilter, + ngramMatches: a.ngramMatches + b.ngramMatches, + ngramLookups: a.ngramLookups + b.ngramLookups, + wait: a.wait + b.wait, + matchTreeConstruction: a.matchTreeConstruction + b.matchTreeConstruction, + matchTreeSearch: a.matchTreeSearch + b.matchTreeSearch, + regexpsConsidered: a.regexpsConsidered + b.regexpsConsidered, + ...(a.flushReason === 0 ? { + flushReason: b.flushReason + } : { + flushReason: a.flushReason, + }), + } } \ No newline at end of file diff --git a/packages/web/src/features/agents/review-agent/nodes/fetchFileContent.ts b/packages/web/src/features/agents/review-agent/nodes/fetchFileContent.ts index d9903b72..b7c38c3c 100644 --- a/packages/web/src/features/agents/review-agent/nodes/fetchFileContent.ts +++ b/packages/web/src/features/agents/review-agent/nodes/fetchFileContent.ts @@ -1,6 +1,6 @@ import { sourcebot_context, sourcebot_pr_payload } from "@/features/agents/review-agent/types"; import { getFileSource } from "@/features/search/fileSourceApi"; -import { fileSourceResponseSchema } from "@/features/search/schemas"; +import { fileSourceResponseSchema } from "@/features/search/types"; import { isServiceError } from "@/lib/utils"; import { createLogger } from "@sourcebot/shared"; diff --git a/packages/web/src/features/codeNav/api.ts b/packages/web/src/features/codeNav/api.ts index 1865ee53..d2912f82 100644 --- a/packages/web/src/features/codeNav/api.ts +++ b/packages/web/src/features/codeNav/api.ts @@ -1,7 +1,6 @@ import 'server-only'; import { sew } from "@/actions"; -import { searchResponseSchema } from "@/features/search/schemas"; import { search } from "@/features/search/searchApi"; import { ServiceError } from "@/lib/serviceError"; import { isServiceError } from "@/lib/utils"; @@ -59,12 +58,12 @@ export const findSearchBasedSymbolDefinitions = async (props: FindRelatedSymbols return parseRelatedSymbolsSearchResponse(searchResult); })); -const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse) => { - const parser = searchResponseSchema.transform(async ({ files }) => ({ +const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse): FindRelatedSymbolsResponse => { + return { stats: { matchCount: searchResult.stats.actualMatchCount, }, - files: files.flatMap((file) => { + files: searchResult.files.flatMap((file) => { const chunks = file.chunks; return { @@ -82,9 +81,7 @@ const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse) => { } }).filter((file) => file.matches.length > 0), repositoryInfo: searchResult.repositoryInfo - })); - - return parser.parseAsync(searchResult); + }; } // Expands the language filter to include all variants of the language. diff --git a/packages/web/src/features/codeNav/types.ts b/packages/web/src/features/codeNav/types.ts index 07f3cefd..b1dace76 100644 --- a/packages/web/src/features/codeNav/types.ts +++ b/packages/web/src/features/codeNav/types.ts @@ -1,5 +1,5 @@ import { z } from "zod"; -import { rangeSchema, repositoryInfoSchema } from "../search/schemas"; +import { rangeSchema, repositoryInfoSchema } from "../search/types"; export const findRelatedSymbolsRequestSchema = z.object({ symbolName: z.string(), diff --git a/packages/web/src/features/search/schemas.ts b/packages/web/src/features/search/schemas.ts deleted file mode 100644 index 6fefe0de..00000000 --- a/packages/web/src/features/search/schemas.ts +++ /dev/null @@ -1,167 +0,0 @@ -// @NOTE : Please keep this file in sync with @sourcebot/mcp/src/schemas.ts -import { CodeHostType } from "@sourcebot/db"; -import { z } from "zod"; - -export const locationSchema = z.object({ - // 0-based byte offset from the beginning of the file - byteOffset: z.number(), - // 1-based line number from the beginning of the file - lineNumber: z.number(), - // 1-based column number (in runes) from the beginning of line - column: z.number(), -}); - -export const rangeSchema = z.object({ - start: locationSchema, - end: locationSchema, -}); - -export const symbolSchema = z.object({ - symbol: z.string(), - kind: z.string(), -}); - -export const searchRequestSchema = z.object({ - // The zoekt query to execute. - query: z.string(), - // The number of matches to return. - matches: z.number(), - // The number of context lines to return. - contextLines: z.number().optional(), - // Whether to return the whole file as part of the response. - whole: z.boolean().optional(), - // Whether to enable regular expression search. - isRegexEnabled: z.boolean().optional(), - // Whether to enable case sensitivity. - isCaseSensitivityEnabled: z.boolean().optional(), -}); - -export const repositoryInfoSchema = z.object({ - id: z.number(), - codeHostType: z.nativeEnum(CodeHostType), - name: z.string(), - displayName: z.string().optional(), - webUrl: z.string().optional(), -}); - -// Many of these fields are defined in zoekt/api.go. -export const searchStatsSchema = z.object({ - // The actual number of matches returned by the search. - // This will always be less than or equal to `totalMatchCount`. - actualMatchCount: z.number(), - - // The total number of matches found during the search. - totalMatchCount: z.number(), - - // The duration (in nanoseconds) of the search. - duration: z.number(), - - // Number of files containing a match. - fileCount: z.number(), - - // Candidate files whose contents weren't examined because we - // gathered enough matches. - filesSkipped: z.number(), - - // Amount of I/O for reading contents. - contentBytesLoaded: z.number(), - - // Amount of I/O for reading from index. - indexBytesLoaded: z.number(), - - // Number of search shards that had a crash. - crashes: z.number(), - - // Number of files in shards that we considered. - shardFilesConsidered: z.number(), - - // Files that we evaluated. Equivalent to files for which all - // atom matches (including negations) evaluated to true. - filesConsidered: z.number(), - - // Files for which we loaded file content to verify substring matches - filesLoaded: z.number(), - - // Shards that we scanned to find matches. - shardsScanned: z.number(), - - // Shards that we did not process because a query was canceled. - shardsSkipped: z.number(), - - // Shards that we did not process because the query was rejected by the - // ngram filter indicating it had no matches. - shardsSkippedFilter: z.number(), - - // Number of candidate matches as a result of searching ngrams. - ngramMatches: z.number(), - - // NgramLookups is the number of times we accessed an ngram in the index. - ngramLookups: z.number(), - - // Wall clock time for queued search. - wait: z.number(), - - // Aggregate wall clock time spent constructing and pruning the match tree. - // This accounts for time such as lookups in the trigram index. - matchTreeConstruction: z.number(), - - // Aggregate wall clock time spent searching the match tree. This accounts - // for the bulk of search work done looking for matches. - matchTreeSearch: z.number(), - - // Number of times regexp was called on files that we evaluated. - regexpsConsidered: z.number(), - - // FlushReason explains why results were flushed. - flushReason: z.number(), -}); - -export const searchResponseSchema = z.object({ - stats: searchStatsSchema, - files: z.array(z.object({ - fileName: z.object({ - // The name of the file - text: z.string(), - // Any matching ranges - matchRanges: z.array(rangeSchema), - }), - webUrl: z.string().optional(), - repository: z.string(), - repositoryId: z.number(), - language: z.string(), - chunks: z.array(z.object({ - content: z.string(), - matchRanges: z.array(rangeSchema), - contentStart: locationSchema, - symbols: z.array(z.object({ - ...symbolSchema.shape, - parent: symbolSchema.optional(), - })).optional(), - })), - branches: z.array(z.string()).optional(), - // Set if `whole` is true. - content: z.string().optional(), - })), - repositoryInfo: z.array(repositoryInfoSchema), - isBranchFilteringEnabled: z.boolean(), - isSearchExhaustive: z.boolean(), - __debug_timings: z.record(z.string(), z.number()).optional(), -}); - -export const fileSourceRequestSchema = z.object({ - fileName: z.string(), - repository: z.string(), - branch: z.string().optional(), -}); - -export const fileSourceResponseSchema = z.object({ - source: z.string(), - language: z.string(), - path: z.string(), - repository: z.string(), - repositoryCodeHostType: z.nativeEnum(CodeHostType), - repositoryDisplayName: z.string().optional(), - repositoryWebUrl: z.string().optional(), - branch: z.string().optional(), - webUrl: z.string().optional(), -}); \ No newline at end of file diff --git a/packages/web/src/features/search/types.ts b/packages/web/src/features/search/types.ts index 2a238857..b46320dc 100644 --- a/packages/web/src/features/search/types.ts +++ b/packages/web/src/features/search/types.ts @@ -1,27 +1,151 @@ -// @NOTE : Please keep this file in sync with @sourcebot/mcp/src/types.ts -import { - fileSourceResponseSchema, - locationSchema, - searchRequestSchema, - searchResponseSchema, - rangeSchema, - fileSourceRequestSchema, - symbolSchema, - repositoryInfoSchema, - searchStatsSchema, -} from "./schemas"; +import { CodeHostType } from "@sourcebot/db"; import { z } from "zod"; -export type SearchRequest = z.infer; -export type SearchResponse = z.infer; -export type SearchResultLocation = z.infer; -export type SearchResultFile = SearchResponse["files"][number]; -export type SearchResultChunk = SearchResultFile["chunks"][number]; +export const locationSchema = z.object({ + byteOffset: z.number(), // 0-based byte offset from the beginning of the file + lineNumber: z.number(), // 1-based line number from the beginning of the file + column: z.number(), // 1-based column number (in runes) from the beginning of line +}); +export type SourceLocation = z.infer; + +export const rangeSchema = z.object({ + start: locationSchema, + end: locationSchema, +}); +export type SourceRange = z.infer; + +export const symbolSchema = z.object({ + symbol: z.string(), + kind: z.string(), +}); export type SearchSymbol = z.infer; -export type FileSourceRequest = z.infer; -export type FileSourceResponse = z.infer; - +export const repositoryInfoSchema = z.object({ + id: z.number(), + codeHostType: z.nativeEnum(CodeHostType), + name: z.string(), + displayName: z.string().optional(), + webUrl: z.string().optional(), +}); export type RepositoryInfo = z.infer; -export type SourceRange = z.infer; -export type SearchStats = z.infer; \ No newline at end of file + +// @note: Many of these fields are defined in zoekt/api.go. +export const searchStatsSchema = z.object({ + actualMatchCount: z.number(), // The actual number of matches returned by the search. This will always be less than or equal to `totalMatchCount`. + totalMatchCount: z.number(), // The total number of matches found during the search. + duration: z.number(), // The duration (in nanoseconds) of the search. + fileCount: z.number(), // Number of files containing a match. + filesSkipped: z.number(), // Candidate files whose contents weren't examined because we gathered enough matches. + contentBytesLoaded: z.number(), // Amount of I/O for reading contents. + indexBytesLoaded: z.number(), // Amount of I/O for reading from index. + crashes: z.number(), // Number of search shards that had a crash. + shardFilesConsidered: z.number(), // Number of files in shards that we considered. + filesConsidered: z.number(), // Files that we evaluated. Equivalent to files for which all atom matches (including negations) evaluated to true. + filesLoaded: z.number(), // Files for which we loaded file content to verify substring matches + shardsScanned: z.number(), // Shards that we scanned to find matches. + shardsSkipped: z.number(), // Shards that we did not process because a query was canceled. + shardsSkippedFilter: z.number(), // Shards that we did not process because the query was rejected by the ngram filter indicating it had no matches. + ngramMatches: z.number(), // Number of candidate matches as a result of searching ngrams. + ngramLookups: z.number(), // NgramLookups is the number of times we accessed an ngram in the index. + wait: z.number(), // Wall clock time for queued search. + matchTreeConstruction: z.number(), // Aggregate wall clock time spent constructing and pruning the match tree. This accounts for time such as lookups in the trigram index. + matchTreeSearch: z.number(), // Aggregate wall clock time spent searching the match tree. This accounts for the bulk of search work done looking for matches. + regexpsConsidered: z.number(), // Number of times regexp was called on files that we evaluated. + flushReason: z.number(), // FlushReason explains why results were flushed. +}); +export type SearchStats = z.infer; + +export const searchFileSchema = z.object({ + fileName: z.object({ + // The name of the file + text: z.string(), + // Any matching ranges + matchRanges: z.array(rangeSchema), + }), + webUrl: z.string().optional(), + repository: z.string(), + repositoryId: z.number(), + language: z.string(), + chunks: z.array(z.object({ + content: z.string(), + matchRanges: z.array(rangeSchema), + contentStart: locationSchema, + symbols: z.array(z.object({ + ...symbolSchema.shape, + parent: symbolSchema.optional(), + })).optional(), + })), + branches: z.array(z.string()).optional(), + // Set if `whole` is true. + content: z.string().optional(), +}); +export type SearchResultFile = z.infer; +export type SearchResultChunk = SearchResultFile["chunks"][number]; + +export const searchRequestSchema = z.object({ + query: z.string(), // The zoekt query to execute. + matches: z.number(), // The number of matches to return. + contextLines: z.number().optional(), // The number of context lines to return. + whole: z.boolean().optional(), // Whether to return the whole file as part of the response. + isRegexEnabled: z.boolean().optional(), // Whether to enable regular expression search. + isCaseSensitivityEnabled: z.boolean().optional(), // Whether to enable case sensitivity. +}); +export type SearchRequest = z.infer; + +export const searchResponseSchema = z.object({ + stats: searchStatsSchema, + files: z.array(searchFileSchema), + repositoryInfo: z.array(repositoryInfoSchema), + isBranchFilteringEnabled: z.boolean(), + isSearchExhaustive: z.boolean(), + __debug_timings: z.record(z.string(), z.number()).optional(), +}); +export type SearchResponse = z.infer; + +/** + * Sent after each chunk of results is processed. + */ +export const streamedSearchChunkResponseSchema = z.object({ + type: z.literal('chunk'), + stats: searchStatsSchema, + files: z.array(searchFileSchema), + repositoryInfo: z.array(repositoryInfoSchema), +}); +export type StreamedSearchChunkResponse = z.infer; + +/** + * Sent after the search is complete. + */ +export const streamedSearchFinalResponseSchema = z.object({ + type: z.literal('final'), + accumulatedStats: searchStatsSchema, + isSearchExhaustive: z.boolean(), +}); +export type StreamedSearchFinalResponse = z.infer; + + +export const streamedSearchResponseSchema = z.discriminatedUnion('type', [ + streamedSearchChunkResponseSchema, + streamedSearchFinalResponseSchema, +]); +export type StreamedSearchResponse = z.infer; + +export const fileSourceRequestSchema = z.object({ + fileName: z.string(), + repository: z.string(), + branch: z.string().optional(), +}); +export type FileSourceRequest = z.infer; + +export const fileSourceResponseSchema = z.object({ + source: z.string(), + language: z.string(), + path: z.string(), + repository: z.string(), + repositoryCodeHostType: z.nativeEnum(CodeHostType), + repositoryDisplayName: z.string().optional(), + repositoryWebUrl: z.string().optional(), + branch: z.string().optional(), + webUrl: z.string().optional(), +}); +export type FileSourceResponse = z.infer;