mirror of
https://github.com/sourcebot-dev/sourcebot.git
synced 2025-12-14 05:15:19 +00:00
add isSearchExausted flag for flagging when a search captured all results
This commit is contained in:
parent
5ab585c021
commit
9c9b6b9578
10 changed files with 360 additions and 281 deletions
|
|
@ -62,6 +62,8 @@ export const SearchResultsPage = ({
|
|||
durationMs,
|
||||
isStreaming,
|
||||
numMatches,
|
||||
isExhaustive,
|
||||
stats,
|
||||
} = useStreamedSearch({
|
||||
query: searchQuery,
|
||||
matches: maxMatchCount,
|
||||
|
|
@ -170,10 +172,8 @@ export const SearchResultsPage = ({
|
|||
repoInfo={repoInfo}
|
||||
searchDurationMs={durationMs}
|
||||
isStreaming={isStreaming}
|
||||
// @todo: handle search stats
|
||||
searchStats={undefined}
|
||||
// @todo: detect when more results are available
|
||||
isMoreResultsButtonVisible={false}
|
||||
searchStats={stats}
|
||||
isMoreResultsButtonVisible={!isExhaustive}
|
||||
// @todo: handle branch filtering
|
||||
isBranchFilteringEnabled={false}
|
||||
/>
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
'use client';
|
||||
|
||||
import { RepositoryInfo, SearchRequest, SearchResponse, SearchResultFile } from '@/features/search/types';
|
||||
import { RepositoryInfo, SearchRequest, SearchResultFile, SearchStats, StreamedSearchResponse } from '@/features/search/types';
|
||||
import { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import * as Sentry from '@sentry/nextjs';
|
||||
|
||||
|
|
@ -10,6 +10,7 @@ interface CacheEntry {
|
|||
numMatches: number;
|
||||
durationMs: number;
|
||||
timestamp: number;
|
||||
isExhaustive: boolean;
|
||||
}
|
||||
|
||||
const searchCache = new Map<string, CacheEntry>();
|
||||
|
|
@ -34,18 +35,22 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex
|
|||
|
||||
const [state, setState] = useState<{
|
||||
isStreaming: boolean,
|
||||
isExhaustive: boolean,
|
||||
error: Error | null,
|
||||
files: SearchResultFile[],
|
||||
repoInfo: Record<number, RepositoryInfo>,
|
||||
durationMs: number,
|
||||
numMatches: number,
|
||||
stats?: SearchStats,
|
||||
}>({
|
||||
isStreaming: false,
|
||||
isExhaustive: false,
|
||||
error: null,
|
||||
files: [],
|
||||
repoInfo: {},
|
||||
durationMs: 0,
|
||||
numMatches: 0,
|
||||
stats: undefined,
|
||||
});
|
||||
|
||||
const abortControllerRef = useRef<AbortController | null>(null);
|
||||
|
|
@ -85,6 +90,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex
|
|||
console.debug('Using cached search results');
|
||||
setState({
|
||||
isStreaming: false,
|
||||
isExhaustive: cachedEntry.isExhaustive,
|
||||
error: null,
|
||||
files: cachedEntry.files,
|
||||
repoInfo: cachedEntry.repoInfo,
|
||||
|
|
@ -96,6 +102,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex
|
|||
|
||||
setState({
|
||||
isStreaming: true,
|
||||
isExhaustive: false,
|
||||
error: null,
|
||||
files: [],
|
||||
repoInfo: {},
|
||||
|
|
@ -167,22 +174,33 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex
|
|||
break;
|
||||
}
|
||||
|
||||
const chunk: SearchResponse = JSON.parse(data);
|
||||
const response: StreamedSearchResponse = JSON.parse(data);
|
||||
switch (response.type) {
|
||||
case 'chunk':
|
||||
setState(prev => ({
|
||||
...prev,
|
||||
files: [
|
||||
...prev.files,
|
||||
...chunk.files
|
||||
...response.files
|
||||
],
|
||||
repoInfo: {
|
||||
...prev.repoInfo,
|
||||
...chunk.repositoryInfo.reduce((acc, repo) => {
|
||||
...response.repositoryInfo.reduce((acc, repo) => {
|
||||
acc[repo.id] = repo;
|
||||
return acc;
|
||||
}, {} as Record<number, RepositoryInfo>),
|
||||
},
|
||||
numMatches: prev.numMatches + chunk.stats.actualMatchCount,
|
||||
numMatches: prev.numMatches + response.stats.actualMatchCount,
|
||||
}));
|
||||
break;
|
||||
case 'final':
|
||||
setState(prev => ({
|
||||
...prev,
|
||||
isExhaustive: response.isSearchExhaustive,
|
||||
stats: response.accumulatedStats,
|
||||
}));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -192,6 +210,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex
|
|||
searchCache.set(cacheKey, {
|
||||
files: prev.files,
|
||||
repoInfo: prev.repoInfo,
|
||||
isExhaustive: prev.isExhaustive,
|
||||
numMatches: prev.numMatches,
|
||||
durationMs,
|
||||
timestamp: Date.now(),
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import { search } from "@/features/search/searchApi";
|
|||
import { isServiceError } from "@/lib/utils";
|
||||
import { NextRequest } from "next/server";
|
||||
import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError";
|
||||
import { searchRequestSchema } from "@/features/search/schemas";
|
||||
import { searchRequestSchema } from "@/features/search/types";
|
||||
|
||||
export const POST = async (request: NextRequest) => {
|
||||
const body = await request.json();
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import { getFileSource } from "@/features/search/fileSourceApi";
|
|||
import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError";
|
||||
import { isServiceError } from "@/lib/utils";
|
||||
import { NextRequest } from "next/server";
|
||||
import { fileSourceRequestSchema } from "@/features/search/schemas";
|
||||
import { fileSourceRequestSchema } from "@/features/search/types";
|
||||
|
||||
export const POST = async (request: NextRequest) => {
|
||||
const body = await request.json();
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
'use server';
|
||||
|
||||
import { searchRequestSchema } from '@/features/search/schemas';
|
||||
import { SearchResponse, SourceRange } from '@/features/search/types';
|
||||
import { searchRequestSchema, SearchStats, SourceRange, StreamedSearchResponse } from '@/features/search/types';
|
||||
import { SINGLE_TENANT_ORG_ID } from '@/lib/constants';
|
||||
import { schemaValidationError, serviceErrorResponse } from '@/lib/serviceError';
|
||||
import { prisma } from '@/prisma';
|
||||
import type { ProtoGrpcType } from '@/proto/webserver';
|
||||
import { FileMatch__Output } from '@/proto/zoekt/webserver/v1/FileMatch';
|
||||
import { Range__Output } from '@/proto/zoekt/webserver/v1/Range';
|
||||
import type { SearchRequest } from '@/proto/zoekt/webserver/v1/SearchRequest';
|
||||
import { SearchResponse__Output } from '@/proto/zoekt/webserver/v1/SearchResponse';
|
||||
import type { StreamSearchRequest } from '@/proto/zoekt/webserver/v1/StreamSearchRequest';
|
||||
import type { StreamSearchResponse__Output } from '@/proto/zoekt/webserver/v1/StreamSearchResponse';
|
||||
import type { WebserverServiceClient } from '@/proto/zoekt/webserver/v1/WebserverService';
|
||||
|
|
@ -109,8 +110,22 @@ export const POST = async (request: NextRequest) => {
|
|||
},
|
||||
});
|
||||
|
||||
console.log(JSON.stringify(zoektQuery, null, 2));
|
||||
|
||||
const searchRequest: SearchRequest = {
|
||||
query: zoektQuery,
|
||||
query: {
|
||||
and: {
|
||||
children: [
|
||||
zoektQuery,
|
||||
{
|
||||
branch: {
|
||||
pattern: 'HEAD',
|
||||
exact: true,
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
opts: {
|
||||
chunk_matches: true,
|
||||
max_match_display_count: matches,
|
||||
|
|
@ -158,11 +173,41 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
|
|||
let grpcStream: ReturnType<WebserverServiceClient['StreamSearch']> | null = null;
|
||||
let isStreamActive = true;
|
||||
let pendingChunks = 0;
|
||||
let accumulatedStats: SearchStats = {
|
||||
actualMatchCount: 0,
|
||||
totalMatchCount: 0,
|
||||
duration: 0,
|
||||
fileCount: 0,
|
||||
filesSkipped: 0,
|
||||
contentBytesLoaded: 0,
|
||||
indexBytesLoaded: 0,
|
||||
crashes: 0,
|
||||
shardFilesConsidered: 0,
|
||||
filesConsidered: 0,
|
||||
filesLoaded: 0,
|
||||
shardsScanned: 0,
|
||||
shardsSkipped: 0,
|
||||
shardsSkippedFilter: 0,
|
||||
ngramMatches: 0,
|
||||
ngramLookups: 0,
|
||||
wait: 0,
|
||||
matchTreeConstruction: 0,
|
||||
matchTreeSearch: 0,
|
||||
regexpsConsidered: 0,
|
||||
flushReason: 0,
|
||||
};
|
||||
|
||||
return new ReadableStream({
|
||||
async start(controller) {
|
||||
const tryCloseController = () => {
|
||||
if (!isStreamActive && pendingChunks === 0) {
|
||||
const finalResponse: StreamedSearchResponse = {
|
||||
type: 'final',
|
||||
accumulatedStats,
|
||||
isSearchExhaustive: accumulatedStats.totalMatchCount <= accumulatedStats.actualMatchCount,
|
||||
}
|
||||
|
||||
controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(finalResponse)}\n\n`));
|
||||
controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n'));
|
||||
controller.close();
|
||||
client.close();
|
||||
|
|
@ -195,7 +240,56 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
|
|||
//
|
||||
// Note: When a repository is re-indexed (every hour) this ID will be populated.
|
||||
// @see: https://github.com/sourcebot-dev/zoekt/pull/6
|
||||
const repos = new Map<string | number, Repo>();
|
||||
const getRepoIdForFile = (file: FileMatch__Output): string | number => {
|
||||
return file.repository_id ?? file.repository;
|
||||
}
|
||||
|
||||
// `_reposMapCache` is used to cache repository metadata across all chunks.
|
||||
// This reduces the number of database queries required to transform file matches.
|
||||
const _reposMapCache = new Map<string | number, Repo>();
|
||||
|
||||
// Creates a mapping between all repository ids in a given response
|
||||
// chunk. The mapping allows us to efficiently lookup repository metadata.
|
||||
const createReposMapForChunk = async (chunk: SearchResponse__Output): Promise<Map<string | number, Repo>> => {
|
||||
const reposMap = new Map<string | number, Repo>();
|
||||
await Promise.all(chunk.files.map(async (file) => {
|
||||
const id = getRepoIdForFile(file);
|
||||
|
||||
const repo = await (async () => {
|
||||
// If it's in the cache, return the cached value.
|
||||
if (_reposMapCache.has(id)) {
|
||||
return _reposMapCache.get(id);
|
||||
}
|
||||
|
||||
// Otherwise, query the database for the record.
|
||||
const repo = typeof id === 'number' ?
|
||||
await prisma.repo.findUnique({
|
||||
where: {
|
||||
id: id,
|
||||
},
|
||||
}) :
|
||||
await prisma.repo.findFirst({
|
||||
where: {
|
||||
name: id,
|
||||
},
|
||||
});
|
||||
|
||||
// If a repository is found, cache it for future lookups.
|
||||
if (repo) {
|
||||
_reposMapCache.set(id, repo);
|
||||
}
|
||||
|
||||
return repo;
|
||||
})();
|
||||
|
||||
// Only add the repository to the map if it was found.
|
||||
if (repo) {
|
||||
reposMap.set(id, repo);
|
||||
}
|
||||
}));
|
||||
|
||||
return reposMap;
|
||||
}
|
||||
|
||||
// Handle incoming data chunks
|
||||
grpcStream.on('data', async (chunk: StreamSearchResponse__Output) => {
|
||||
|
|
@ -218,32 +312,12 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
|
|||
return;
|
||||
}
|
||||
|
||||
const files = (await Promise.all(chunk.response_chunk.files.map(async (file) => {
|
||||
const repoIdToRepoDBRecordMap = await createReposMapForChunk(chunk.response_chunk);
|
||||
|
||||
const files = chunk.response_chunk.files.map((file) => {
|
||||
const fileNameChunks = file.chunk_matches.filter((chunk) => chunk.file_name);
|
||||
|
||||
const identifier = file.repository_id ?? file.repository;
|
||||
|
||||
// If the repository is not in the map, fetch it from the database.
|
||||
if (!repos.has(identifier)) {
|
||||
const repo = typeof identifier === 'number' ?
|
||||
await prisma.repo.findUnique({
|
||||
where: {
|
||||
id: identifier,
|
||||
},
|
||||
}) :
|
||||
await prisma.repo.findFirst({
|
||||
where: {
|
||||
name: identifier,
|
||||
},
|
||||
});
|
||||
|
||||
if (repo) {
|
||||
repos.set(identifier, repo);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const repo = repos.get(identifier);
|
||||
const repoId = getRepoIdForFile(file);
|
||||
const repo = repoIdToRepoDBRecordMap.get(repoId);
|
||||
|
||||
// This can happen if the user doesn't have access to the repository.
|
||||
if (!repo) {
|
||||
|
|
@ -307,7 +381,7 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
|
|||
branches: file.branches,
|
||||
content: file.content ? file.content.toString('utf-8') : undefined,
|
||||
}
|
||||
}))).filter(file => file !== undefined);
|
||||
}).filter(file => file !== undefined);
|
||||
|
||||
const actualMatchCount = files.reduce(
|
||||
(acc, file) =>
|
||||
|
|
@ -319,43 +393,45 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
|
|||
0,
|
||||
);
|
||||
|
||||
const response: SearchResponse = {
|
||||
const stats: SearchStats = {
|
||||
actualMatchCount,
|
||||
totalMatchCount: chunk.response_chunk.stats?.match_count ?? 0,
|
||||
duration: chunk.response_chunk.stats?.duration?.nanos ?? 0,
|
||||
fileCount: chunk.response_chunk.stats?.file_count ?? 0,
|
||||
filesSkipped: chunk.response_chunk.stats?.files_skipped ?? 0,
|
||||
contentBytesLoaded: chunk.response_chunk.stats?.content_bytes_loaded ?? 0,
|
||||
indexBytesLoaded: chunk.response_chunk.stats?.index_bytes_loaded ?? 0,
|
||||
crashes: chunk.response_chunk.stats?.crashes ?? 0,
|
||||
shardFilesConsidered: chunk.response_chunk.stats?.shard_files_considered ?? 0,
|
||||
filesConsidered: chunk.response_chunk.stats?.files_considered ?? 0,
|
||||
filesLoaded: chunk.response_chunk.stats?.files_loaded ?? 0,
|
||||
shardsScanned: chunk.response_chunk.stats?.shards_scanned ?? 0,
|
||||
shardsSkipped: chunk.response_chunk.stats?.shards_skipped ?? 0,
|
||||
shardsSkippedFilter: chunk.response_chunk.stats?.shards_skipped_filter ?? 0,
|
||||
ngramMatches: chunk.response_chunk.stats?.ngram_matches ?? 0,
|
||||
ngramLookups: chunk.response_chunk.stats?.ngram_lookups ?? 0,
|
||||
wait: chunk.response_chunk.stats?.wait?.nanos ?? 0,
|
||||
matchTreeConstruction: chunk.response_chunk.stats?.match_tree_construction?.nanos ?? 0,
|
||||
matchTreeSearch: chunk.response_chunk.stats?.match_tree_search?.nanos ?? 0,
|
||||
regexpsConsidered: chunk.response_chunk.stats?.regexps_considered ?? 0,
|
||||
// @todo: handle this.
|
||||
// flushReason: chunk.response_chunk.stats?.flush_reason ?? 0,
|
||||
flushReason: 0
|
||||
}
|
||||
|
||||
accumulatedStats = accumulateStats(accumulatedStats, stats);
|
||||
|
||||
const response: StreamedSearchResponse = {
|
||||
type: 'chunk',
|
||||
files,
|
||||
repositoryInfo: Array.from(repos.values()).map((repo) => ({
|
||||
repositoryInfo: Array.from(repoIdToRepoDBRecordMap.values()).map((repo) => ({
|
||||
id: repo.id,
|
||||
codeHostType: repo.external_codeHostType,
|
||||
name: repo.name,
|
||||
displayName: repo.displayName ?? undefined,
|
||||
webUrl: repo.webUrl ?? undefined,
|
||||
})),
|
||||
isBranchFilteringEnabled: false,
|
||||
// @todo: we will need to figure out how to handle if a search is exhaustive or not
|
||||
isSearchExhaustive: false,
|
||||
stats: {
|
||||
actualMatchCount,
|
||||
// @todo: todo -
|
||||
totalMatchCount: 0,
|
||||
duration: chunk.response_chunk.stats?.duration?.nanos ?? 0,
|
||||
fileCount: chunk.response_chunk.stats?.file_count.valueOf() ?? 0,
|
||||
filesSkipped: chunk.response_chunk.stats?.files_skipped.valueOf() ?? 0,
|
||||
contentBytesLoaded: chunk.response_chunk.stats?.content_bytes_loaded.valueOf() ?? 0,
|
||||
indexBytesLoaded: chunk.response_chunk.stats?.index_bytes_loaded.valueOf() ?? 0,
|
||||
crashes: chunk.response_chunk.stats?.crashes.valueOf() ?? 0,
|
||||
shardFilesConsidered: chunk.response_chunk.stats?.shard_files_considered.valueOf() ?? 0,
|
||||
filesConsidered: chunk.response_chunk.stats?.files_considered.valueOf() ?? 0,
|
||||
filesLoaded: chunk.response_chunk.stats?.files_loaded.valueOf() ?? 0,
|
||||
shardsScanned: chunk.response_chunk.stats?.shards_scanned.valueOf() ?? 0,
|
||||
shardsSkipped: chunk.response_chunk.stats?.shards_skipped.valueOf() ?? 0,
|
||||
shardsSkippedFilter: chunk.response_chunk.stats?.shards_skipped_filter.valueOf() ?? 0,
|
||||
ngramMatches: chunk.response_chunk.stats?.ngram_matches.valueOf() ?? 0,
|
||||
ngramLookups: chunk.response_chunk.stats?.ngram_lookups.valueOf() ?? 0,
|
||||
wait: chunk.response_chunk.stats?.wait?.nanos ?? 0,
|
||||
matchTreeConstruction: chunk.response_chunk.stats?.match_tree_construction?.nanos ?? 0,
|
||||
matchTreeSearch: chunk.response_chunk.stats?.match_tree_search?.nanos ?? 0,
|
||||
regexpsConsidered: chunk.response_chunk.stats?.regexps_considered.valueOf() ?? 0,
|
||||
// @todo: handle this.
|
||||
flushReason: 0,
|
||||
}
|
||||
stats
|
||||
}
|
||||
|
||||
const sseData = `data: ${JSON.stringify(response)}\n\n`;
|
||||
|
|
@ -435,3 +511,33 @@ const createSSESearchStream = async (searchRequest: SearchRequest, prisma: Prism
|
|||
}
|
||||
});
|
||||
}
|
||||
|
||||
const accumulateStats = (a: SearchStats, b: SearchStats): SearchStats => {
|
||||
return {
|
||||
actualMatchCount: a.actualMatchCount + b.actualMatchCount,
|
||||
totalMatchCount: a.totalMatchCount + b.totalMatchCount,
|
||||
duration: a.duration + b.duration,
|
||||
fileCount: a.fileCount + b.fileCount,
|
||||
filesSkipped: a.filesSkipped + b.filesSkipped,
|
||||
contentBytesLoaded: a.contentBytesLoaded + b.contentBytesLoaded,
|
||||
indexBytesLoaded: a.indexBytesLoaded + b.indexBytesLoaded,
|
||||
crashes: a.crashes + b.crashes,
|
||||
shardFilesConsidered: a.shardFilesConsidered + b.shardFilesConsidered,
|
||||
filesConsidered: a.filesConsidered + b.filesConsidered,
|
||||
filesLoaded: a.filesLoaded + b.filesLoaded,
|
||||
shardsScanned: a.shardsScanned + b.shardsScanned,
|
||||
shardsSkipped: a.shardsSkipped + b.shardsSkipped,
|
||||
shardsSkippedFilter: a.shardsSkippedFilter + b.shardsSkippedFilter,
|
||||
ngramMatches: a.ngramMatches + b.ngramMatches,
|
||||
ngramLookups: a.ngramLookups + b.ngramLookups,
|
||||
wait: a.wait + b.wait,
|
||||
matchTreeConstruction: a.matchTreeConstruction + b.matchTreeConstruction,
|
||||
matchTreeSearch: a.matchTreeSearch + b.matchTreeSearch,
|
||||
regexpsConsidered: a.regexpsConsidered + b.regexpsConsidered,
|
||||
...(a.flushReason === 0 ? {
|
||||
flushReason: b.flushReason
|
||||
} : {
|
||||
flushReason: a.flushReason,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
import { sourcebot_context, sourcebot_pr_payload } from "@/features/agents/review-agent/types";
|
||||
import { getFileSource } from "@/features/search/fileSourceApi";
|
||||
import { fileSourceResponseSchema } from "@/features/search/schemas";
|
||||
import { fileSourceResponseSchema } from "@/features/search/types";
|
||||
import { isServiceError } from "@/lib/utils";
|
||||
import { createLogger } from "@sourcebot/shared";
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import 'server-only';
|
||||
|
||||
import { sew } from "@/actions";
|
||||
import { searchResponseSchema } from "@/features/search/schemas";
|
||||
import { search } from "@/features/search/searchApi";
|
||||
import { ServiceError } from "@/lib/serviceError";
|
||||
import { isServiceError } from "@/lib/utils";
|
||||
|
|
@ -59,12 +58,12 @@ export const findSearchBasedSymbolDefinitions = async (props: FindRelatedSymbols
|
|||
return parseRelatedSymbolsSearchResponse(searchResult);
|
||||
}));
|
||||
|
||||
const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse) => {
|
||||
const parser = searchResponseSchema.transform(async ({ files }) => ({
|
||||
const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse): FindRelatedSymbolsResponse => {
|
||||
return {
|
||||
stats: {
|
||||
matchCount: searchResult.stats.actualMatchCount,
|
||||
},
|
||||
files: files.flatMap((file) => {
|
||||
files: searchResult.files.flatMap((file) => {
|
||||
const chunks = file.chunks;
|
||||
|
||||
return {
|
||||
|
|
@ -82,9 +81,7 @@ const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse) => {
|
|||
}
|
||||
}).filter((file) => file.matches.length > 0),
|
||||
repositoryInfo: searchResult.repositoryInfo
|
||||
}));
|
||||
|
||||
return parser.parseAsync(searchResult);
|
||||
};
|
||||
}
|
||||
|
||||
// Expands the language filter to include all variants of the language.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { z } from "zod";
|
||||
import { rangeSchema, repositoryInfoSchema } from "../search/schemas";
|
||||
import { rangeSchema, repositoryInfoSchema } from "../search/types";
|
||||
|
||||
export const findRelatedSymbolsRequestSchema = z.object({
|
||||
symbolName: z.string(),
|
||||
|
|
|
|||
|
|
@ -1,167 +0,0 @@
|
|||
// @NOTE : Please keep this file in sync with @sourcebot/mcp/src/schemas.ts
|
||||
import { CodeHostType } from "@sourcebot/db";
|
||||
import { z } from "zod";
|
||||
|
||||
export const locationSchema = z.object({
|
||||
// 0-based byte offset from the beginning of the file
|
||||
byteOffset: z.number(),
|
||||
// 1-based line number from the beginning of the file
|
||||
lineNumber: z.number(),
|
||||
// 1-based column number (in runes) from the beginning of line
|
||||
column: z.number(),
|
||||
});
|
||||
|
||||
export const rangeSchema = z.object({
|
||||
start: locationSchema,
|
||||
end: locationSchema,
|
||||
});
|
||||
|
||||
export const symbolSchema = z.object({
|
||||
symbol: z.string(),
|
||||
kind: z.string(),
|
||||
});
|
||||
|
||||
export const searchRequestSchema = z.object({
|
||||
// The zoekt query to execute.
|
||||
query: z.string(),
|
||||
// The number of matches to return.
|
||||
matches: z.number(),
|
||||
// The number of context lines to return.
|
||||
contextLines: z.number().optional(),
|
||||
// Whether to return the whole file as part of the response.
|
||||
whole: z.boolean().optional(),
|
||||
// Whether to enable regular expression search.
|
||||
isRegexEnabled: z.boolean().optional(),
|
||||
// Whether to enable case sensitivity.
|
||||
isCaseSensitivityEnabled: z.boolean().optional(),
|
||||
});
|
||||
|
||||
export const repositoryInfoSchema = z.object({
|
||||
id: z.number(),
|
||||
codeHostType: z.nativeEnum(CodeHostType),
|
||||
name: z.string(),
|
||||
displayName: z.string().optional(),
|
||||
webUrl: z.string().optional(),
|
||||
});
|
||||
|
||||
// Many of these fields are defined in zoekt/api.go.
|
||||
export const searchStatsSchema = z.object({
|
||||
// The actual number of matches returned by the search.
|
||||
// This will always be less than or equal to `totalMatchCount`.
|
||||
actualMatchCount: z.number(),
|
||||
|
||||
// The total number of matches found during the search.
|
||||
totalMatchCount: z.number(),
|
||||
|
||||
// The duration (in nanoseconds) of the search.
|
||||
duration: z.number(),
|
||||
|
||||
// Number of files containing a match.
|
||||
fileCount: z.number(),
|
||||
|
||||
// Candidate files whose contents weren't examined because we
|
||||
// gathered enough matches.
|
||||
filesSkipped: z.number(),
|
||||
|
||||
// Amount of I/O for reading contents.
|
||||
contentBytesLoaded: z.number(),
|
||||
|
||||
// Amount of I/O for reading from index.
|
||||
indexBytesLoaded: z.number(),
|
||||
|
||||
// Number of search shards that had a crash.
|
||||
crashes: z.number(),
|
||||
|
||||
// Number of files in shards that we considered.
|
||||
shardFilesConsidered: z.number(),
|
||||
|
||||
// Files that we evaluated. Equivalent to files for which all
|
||||
// atom matches (including negations) evaluated to true.
|
||||
filesConsidered: z.number(),
|
||||
|
||||
// Files for which we loaded file content to verify substring matches
|
||||
filesLoaded: z.number(),
|
||||
|
||||
// Shards that we scanned to find matches.
|
||||
shardsScanned: z.number(),
|
||||
|
||||
// Shards that we did not process because a query was canceled.
|
||||
shardsSkipped: z.number(),
|
||||
|
||||
// Shards that we did not process because the query was rejected by the
|
||||
// ngram filter indicating it had no matches.
|
||||
shardsSkippedFilter: z.number(),
|
||||
|
||||
// Number of candidate matches as a result of searching ngrams.
|
||||
ngramMatches: z.number(),
|
||||
|
||||
// NgramLookups is the number of times we accessed an ngram in the index.
|
||||
ngramLookups: z.number(),
|
||||
|
||||
// Wall clock time for queued search.
|
||||
wait: z.number(),
|
||||
|
||||
// Aggregate wall clock time spent constructing and pruning the match tree.
|
||||
// This accounts for time such as lookups in the trigram index.
|
||||
matchTreeConstruction: z.number(),
|
||||
|
||||
// Aggregate wall clock time spent searching the match tree. This accounts
|
||||
// for the bulk of search work done looking for matches.
|
||||
matchTreeSearch: z.number(),
|
||||
|
||||
// Number of times regexp was called on files that we evaluated.
|
||||
regexpsConsidered: z.number(),
|
||||
|
||||
// FlushReason explains why results were flushed.
|
||||
flushReason: z.number(),
|
||||
});
|
||||
|
||||
export const searchResponseSchema = z.object({
|
||||
stats: searchStatsSchema,
|
||||
files: z.array(z.object({
|
||||
fileName: z.object({
|
||||
// The name of the file
|
||||
text: z.string(),
|
||||
// Any matching ranges
|
||||
matchRanges: z.array(rangeSchema),
|
||||
}),
|
||||
webUrl: z.string().optional(),
|
||||
repository: z.string(),
|
||||
repositoryId: z.number(),
|
||||
language: z.string(),
|
||||
chunks: z.array(z.object({
|
||||
content: z.string(),
|
||||
matchRanges: z.array(rangeSchema),
|
||||
contentStart: locationSchema,
|
||||
symbols: z.array(z.object({
|
||||
...symbolSchema.shape,
|
||||
parent: symbolSchema.optional(),
|
||||
})).optional(),
|
||||
})),
|
||||
branches: z.array(z.string()).optional(),
|
||||
// Set if `whole` is true.
|
||||
content: z.string().optional(),
|
||||
})),
|
||||
repositoryInfo: z.array(repositoryInfoSchema),
|
||||
isBranchFilteringEnabled: z.boolean(),
|
||||
isSearchExhaustive: z.boolean(),
|
||||
__debug_timings: z.record(z.string(), z.number()).optional(),
|
||||
});
|
||||
|
||||
export const fileSourceRequestSchema = z.object({
|
||||
fileName: z.string(),
|
||||
repository: z.string(),
|
||||
branch: z.string().optional(),
|
||||
});
|
||||
|
||||
export const fileSourceResponseSchema = z.object({
|
||||
source: z.string(),
|
||||
language: z.string(),
|
||||
path: z.string(),
|
||||
repository: z.string(),
|
||||
repositoryCodeHostType: z.nativeEnum(CodeHostType),
|
||||
repositoryDisplayName: z.string().optional(),
|
||||
repositoryWebUrl: z.string().optional(),
|
||||
branch: z.string().optional(),
|
||||
webUrl: z.string().optional(),
|
||||
});
|
||||
|
|
@ -1,27 +1,151 @@
|
|||
// @NOTE : Please keep this file in sync with @sourcebot/mcp/src/types.ts
|
||||
import {
|
||||
fileSourceResponseSchema,
|
||||
locationSchema,
|
||||
searchRequestSchema,
|
||||
searchResponseSchema,
|
||||
rangeSchema,
|
||||
fileSourceRequestSchema,
|
||||
symbolSchema,
|
||||
repositoryInfoSchema,
|
||||
searchStatsSchema,
|
||||
} from "./schemas";
|
||||
import { CodeHostType } from "@sourcebot/db";
|
||||
import { z } from "zod";
|
||||
|
||||
export type SearchRequest = z.infer<typeof searchRequestSchema>;
|
||||
export type SearchResponse = z.infer<typeof searchResponseSchema>;
|
||||
export type SearchResultLocation = z.infer<typeof locationSchema>;
|
||||
export type SearchResultFile = SearchResponse["files"][number];
|
||||
export type SearchResultChunk = SearchResultFile["chunks"][number];
|
||||
export const locationSchema = z.object({
|
||||
byteOffset: z.number(), // 0-based byte offset from the beginning of the file
|
||||
lineNumber: z.number(), // 1-based line number from the beginning of the file
|
||||
column: z.number(), // 1-based column number (in runes) from the beginning of line
|
||||
});
|
||||
export type SourceLocation = z.infer<typeof locationSchema>;
|
||||
|
||||
export const rangeSchema = z.object({
|
||||
start: locationSchema,
|
||||
end: locationSchema,
|
||||
});
|
||||
export type SourceRange = z.infer<typeof rangeSchema>;
|
||||
|
||||
export const symbolSchema = z.object({
|
||||
symbol: z.string(),
|
||||
kind: z.string(),
|
||||
});
|
||||
export type SearchSymbol = z.infer<typeof symbolSchema>;
|
||||
|
||||
export type FileSourceRequest = z.infer<typeof fileSourceRequestSchema>;
|
||||
export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;
|
||||
|
||||
export const repositoryInfoSchema = z.object({
|
||||
id: z.number(),
|
||||
codeHostType: z.nativeEnum(CodeHostType),
|
||||
name: z.string(),
|
||||
displayName: z.string().optional(),
|
||||
webUrl: z.string().optional(),
|
||||
});
|
||||
export type RepositoryInfo = z.infer<typeof repositoryInfoSchema>;
|
||||
export type SourceRange = z.infer<typeof rangeSchema>;
|
||||
|
||||
// @note: Many of these fields are defined in zoekt/api.go.
|
||||
export const searchStatsSchema = z.object({
|
||||
actualMatchCount: z.number(), // The actual number of matches returned by the search. This will always be less than or equal to `totalMatchCount`.
|
||||
totalMatchCount: z.number(), // The total number of matches found during the search.
|
||||
duration: z.number(), // The duration (in nanoseconds) of the search.
|
||||
fileCount: z.number(), // Number of files containing a match.
|
||||
filesSkipped: z.number(), // Candidate files whose contents weren't examined because we gathered enough matches.
|
||||
contentBytesLoaded: z.number(), // Amount of I/O for reading contents.
|
||||
indexBytesLoaded: z.number(), // Amount of I/O for reading from index.
|
||||
crashes: z.number(), // Number of search shards that had a crash.
|
||||
shardFilesConsidered: z.number(), // Number of files in shards that we considered.
|
||||
filesConsidered: z.number(), // Files that we evaluated. Equivalent to files for which all atom matches (including negations) evaluated to true.
|
||||
filesLoaded: z.number(), // Files for which we loaded file content to verify substring matches
|
||||
shardsScanned: z.number(), // Shards that we scanned to find matches.
|
||||
shardsSkipped: z.number(), // Shards that we did not process because a query was canceled.
|
||||
shardsSkippedFilter: z.number(), // Shards that we did not process because the query was rejected by the ngram filter indicating it had no matches.
|
||||
ngramMatches: z.number(), // Number of candidate matches as a result of searching ngrams.
|
||||
ngramLookups: z.number(), // NgramLookups is the number of times we accessed an ngram in the index.
|
||||
wait: z.number(), // Wall clock time for queued search.
|
||||
matchTreeConstruction: z.number(), // Aggregate wall clock time spent constructing and pruning the match tree. This accounts for time such as lookups in the trigram index.
|
||||
matchTreeSearch: z.number(), // Aggregate wall clock time spent searching the match tree. This accounts for the bulk of search work done looking for matches.
|
||||
regexpsConsidered: z.number(), // Number of times regexp was called on files that we evaluated.
|
||||
flushReason: z.number(), // FlushReason explains why results were flushed.
|
||||
});
|
||||
export type SearchStats = z.infer<typeof searchStatsSchema>;
|
||||
|
||||
export const searchFileSchema = z.object({
|
||||
fileName: z.object({
|
||||
// The name of the file
|
||||
text: z.string(),
|
||||
// Any matching ranges
|
||||
matchRanges: z.array(rangeSchema),
|
||||
}),
|
||||
webUrl: z.string().optional(),
|
||||
repository: z.string(),
|
||||
repositoryId: z.number(),
|
||||
language: z.string(),
|
||||
chunks: z.array(z.object({
|
||||
content: z.string(),
|
||||
matchRanges: z.array(rangeSchema),
|
||||
contentStart: locationSchema,
|
||||
symbols: z.array(z.object({
|
||||
...symbolSchema.shape,
|
||||
parent: symbolSchema.optional(),
|
||||
})).optional(),
|
||||
})),
|
||||
branches: z.array(z.string()).optional(),
|
||||
// Set if `whole` is true.
|
||||
content: z.string().optional(),
|
||||
});
|
||||
export type SearchResultFile = z.infer<typeof searchFileSchema>;
|
||||
export type SearchResultChunk = SearchResultFile["chunks"][number];
|
||||
|
||||
export const searchRequestSchema = z.object({
|
||||
query: z.string(), // The zoekt query to execute.
|
||||
matches: z.number(), // The number of matches to return.
|
||||
contextLines: z.number().optional(), // The number of context lines to return.
|
||||
whole: z.boolean().optional(), // Whether to return the whole file as part of the response.
|
||||
isRegexEnabled: z.boolean().optional(), // Whether to enable regular expression search.
|
||||
isCaseSensitivityEnabled: z.boolean().optional(), // Whether to enable case sensitivity.
|
||||
});
|
||||
export type SearchRequest = z.infer<typeof searchRequestSchema>;
|
||||
|
||||
export const searchResponseSchema = z.object({
|
||||
stats: searchStatsSchema,
|
||||
files: z.array(searchFileSchema),
|
||||
repositoryInfo: z.array(repositoryInfoSchema),
|
||||
isBranchFilteringEnabled: z.boolean(),
|
||||
isSearchExhaustive: z.boolean(),
|
||||
__debug_timings: z.record(z.string(), z.number()).optional(),
|
||||
});
|
||||
export type SearchResponse = z.infer<typeof searchResponseSchema>;
|
||||
|
||||
/**
|
||||
* Sent after each chunk of results is processed.
|
||||
*/
|
||||
export const streamedSearchChunkResponseSchema = z.object({
|
||||
type: z.literal('chunk'),
|
||||
stats: searchStatsSchema,
|
||||
files: z.array(searchFileSchema),
|
||||
repositoryInfo: z.array(repositoryInfoSchema),
|
||||
});
|
||||
export type StreamedSearchChunkResponse = z.infer<typeof streamedSearchChunkResponseSchema>;
|
||||
|
||||
/**
|
||||
* Sent after the search is complete.
|
||||
*/
|
||||
export const streamedSearchFinalResponseSchema = z.object({
|
||||
type: z.literal('final'),
|
||||
accumulatedStats: searchStatsSchema,
|
||||
isSearchExhaustive: z.boolean(),
|
||||
});
|
||||
export type StreamedSearchFinalResponse = z.infer<typeof streamedSearchFinalResponseSchema>;
|
||||
|
||||
|
||||
export const streamedSearchResponseSchema = z.discriminatedUnion('type', [
|
||||
streamedSearchChunkResponseSchema,
|
||||
streamedSearchFinalResponseSchema,
|
||||
]);
|
||||
export type StreamedSearchResponse = z.infer<typeof streamedSearchResponseSchema>;
|
||||
|
||||
export const fileSourceRequestSchema = z.object({
|
||||
fileName: z.string(),
|
||||
repository: z.string(),
|
||||
branch: z.string().optional(),
|
||||
});
|
||||
export type FileSourceRequest = z.infer<typeof fileSourceRequestSchema>;
|
||||
|
||||
export const fileSourceResponseSchema = z.object({
|
||||
source: z.string(),
|
||||
language: z.string(),
|
||||
path: z.string(),
|
||||
repository: z.string(),
|
||||
repositoryCodeHostType: z.nativeEnum(CodeHostType),
|
||||
repositoryDisplayName: z.string().optional(),
|
||||
repositoryWebUrl: z.string().optional(),
|
||||
branch: z.string().optional(),
|
||||
webUrl: z.string().optional(),
|
||||
});
|
||||
export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;
|
||||
|
|
|
|||
Loading…
Reference in a new issue