2025-11-14 05:18:08 +00:00
|
|
|
'use server';
|
|
|
|
|
|
2025-11-19 20:33:57 +00:00
|
|
|
import { searchRequestSchema, SearchStats, SourceRange, StreamedSearchResponse } from '@/features/search/types';
|
2025-11-19 00:01:06 +00:00
|
|
|
import { SINGLE_TENANT_ORG_ID } from '@/lib/constants';
|
2025-11-15 04:53:55 +00:00
|
|
|
import { schemaValidationError, serviceErrorResponse } from '@/lib/serviceError';
|
|
|
|
|
import { prisma } from '@/prisma';
|
2025-11-14 05:18:08 +00:00
|
|
|
import type { ProtoGrpcType } from '@/proto/webserver';
|
2025-11-19 20:33:57 +00:00
|
|
|
import { FileMatch__Output } from '@/proto/zoekt/webserver/v1/FileMatch';
|
2025-11-15 04:53:55 +00:00
|
|
|
import { Range__Output } from '@/proto/zoekt/webserver/v1/Range';
|
2025-11-14 05:18:08 +00:00
|
|
|
import type { SearchRequest } from '@/proto/zoekt/webserver/v1/SearchRequest';
|
2025-11-19 20:33:57 +00:00
|
|
|
import { SearchResponse__Output } from '@/proto/zoekt/webserver/v1/SearchResponse';
|
2025-11-14 05:18:08 +00:00
|
|
|
import type { StreamSearchRequest } from '@/proto/zoekt/webserver/v1/StreamSearchRequest';
|
|
|
|
|
import type { StreamSearchResponse__Output } from '@/proto/zoekt/webserver/v1/StreamSearchResponse';
|
2025-11-15 04:53:55 +00:00
|
|
|
import type { WebserverServiceClient } from '@/proto/zoekt/webserver/v1/WebserverService';
|
|
|
|
|
import * as grpc from '@grpc/grpc-js';
|
|
|
|
|
import * as protoLoader from '@grpc/proto-loader';
|
2025-11-19 00:01:06 +00:00
|
|
|
import * as Sentry from '@sentry/nextjs';
|
2025-11-15 04:53:55 +00:00
|
|
|
import { PrismaClient, Repo } from '@sourcebot/db';
|
2025-11-19 00:01:06 +00:00
|
|
|
import { parser as _parser } from '@sourcebot/query-language';
|
2025-11-15 04:53:55 +00:00
|
|
|
import { createLogger, env } from '@sourcebot/shared';
|
|
|
|
|
import { NextRequest } from 'next/server';
|
|
|
|
|
import * as path from 'path';
|
2025-11-16 01:17:52 +00:00
|
|
|
import { transformToZoektQuery } from './transformer';
|
2025-11-15 04:53:55 +00:00
|
|
|
|
|
|
|
|
const logger = createLogger('streamSearchApi');
|
2025-11-14 05:18:08 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a gRPC client for the Zoekt webserver
|
|
|
|
|
*/
|
|
|
|
|
function createGrpcClient(): WebserverServiceClient {
|
|
|
|
|
// Path to proto files - these should match your monorepo structure
|
|
|
|
|
const protoBasePath = path.join(process.cwd(), '../../vendor/zoekt/grpc/protos');
|
|
|
|
|
const protoPath = path.join(protoBasePath, 'zoekt/webserver/v1/webserver.proto');
|
|
|
|
|
|
|
|
|
|
const packageDefinition = protoLoader.loadSync(protoPath, {
|
|
|
|
|
keepCase: true,
|
|
|
|
|
longs: Number,
|
|
|
|
|
enums: String,
|
|
|
|
|
defaults: true,
|
|
|
|
|
oneofs: true,
|
|
|
|
|
includeDirs: [protoBasePath],
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const proto = grpc.loadPackageDefinition(packageDefinition) as unknown as ProtoGrpcType;
|
|
|
|
|
|
|
|
|
|
// Extract host and port from ZOEKT_WEBSERVER_URL
|
|
|
|
|
const zoektUrl = new URL(env.ZOEKT_WEBSERVER_URL);
|
|
|
|
|
const grpcAddress = `${zoektUrl.hostname}:${zoektUrl.port}`;
|
|
|
|
|
|
|
|
|
|
return new proto.zoekt.webserver.v1.WebserverService(
|
|
|
|
|
grpcAddress,
|
|
|
|
|
grpc.credentials.createInsecure(),
|
|
|
|
|
{
|
|
|
|
|
'grpc.max_receive_message_length': 500 * 1024 * 1024, // 500MB
|
|
|
|
|
'grpc.max_send_message_length': 500 * 1024 * 1024, // 500MB
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* POST handler for streaming search via SSE
|
|
|
|
|
*/
|
|
|
|
|
export const POST = async (request: NextRequest) => {
|
|
|
|
|
try {
|
|
|
|
|
// Parse and validate request body
|
|
|
|
|
const body = await request.json();
|
|
|
|
|
const parsed = await searchRequestSchema.safeParseAsync(body);
|
2025-11-15 04:53:55 +00:00
|
|
|
|
2025-11-14 05:18:08 +00:00
|
|
|
if (!parsed.success) {
|
|
|
|
|
return serviceErrorResponse(schemaValidationError(parsed.error));
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-17 05:17:28 +00:00
|
|
|
const {
|
|
|
|
|
query,
|
|
|
|
|
matches,
|
|
|
|
|
contextLines,
|
|
|
|
|
whole,
|
|
|
|
|
isRegexEnabled = false,
|
|
|
|
|
isCaseSensitivityEnabled = false,
|
|
|
|
|
} = parsed.data;
|
2025-11-17 00:12:51 +00:00
|
|
|
|
|
|
|
|
const parser = _parser.configure({
|
|
|
|
|
strict: true,
|
2025-11-18 02:47:13 +00:00
|
|
|
});
|
2025-11-17 00:12:51 +00:00
|
|
|
|
2025-11-16 01:17:52 +00:00
|
|
|
const tree = parser.parse(query);
|
2025-11-18 02:47:13 +00:00
|
|
|
const zoektQuery = await transformToZoektQuery({
|
2025-11-17 00:12:51 +00:00
|
|
|
tree,
|
|
|
|
|
input: query,
|
|
|
|
|
isCaseSensitivityEnabled,
|
|
|
|
|
isRegexEnabled,
|
2025-11-19 00:01:06 +00:00
|
|
|
onExpandSearchContext: async (contextName: string) => {
|
|
|
|
|
const context = await prisma.searchContext.findUnique({
|
2025-11-18 02:47:13 +00:00
|
|
|
where: {
|
|
|
|
|
name_orgId: {
|
|
|
|
|
name: contextName,
|
|
|
|
|
orgId: SINGLE_TENANT_ORG_ID,
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
include: {
|
|
|
|
|
repos: true,
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (!context) {
|
|
|
|
|
throw new Error(`Search context "${contextName}" not found`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return context.repos.map((repo) => repo.name);
|
|
|
|
|
},
|
2025-11-17 00:12:51 +00:00
|
|
|
});
|
2025-11-16 01:17:52 +00:00
|
|
|
|
2025-11-19 20:33:57 +00:00
|
|
|
console.log(JSON.stringify(zoektQuery, null, 2));
|
|
|
|
|
|
2025-11-14 05:18:08 +00:00
|
|
|
const searchRequest: SearchRequest = {
|
2025-11-19 20:33:57 +00:00
|
|
|
query: {
|
|
|
|
|
and: {
|
|
|
|
|
children: [
|
|
|
|
|
zoektQuery,
|
|
|
|
|
{
|
|
|
|
|
branch: {
|
|
|
|
|
pattern: 'HEAD',
|
|
|
|
|
exact: true,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
},
|
2025-11-14 05:18:08 +00:00
|
|
|
opts: {
|
|
|
|
|
chunk_matches: true,
|
2025-11-15 04:53:55 +00:00
|
|
|
max_match_display_count: matches,
|
|
|
|
|
total_max_match_count: matches + 1,
|
|
|
|
|
num_context_lines: contextLines,
|
|
|
|
|
whole: !!whole,
|
|
|
|
|
shard_max_match_count: -1,
|
|
|
|
|
max_wall_time: {
|
|
|
|
|
seconds: 0,
|
|
|
|
|
}
|
2025-11-14 05:18:08 +00:00
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
2025-11-15 04:53:55 +00:00
|
|
|
// @nocheckin: this should be using the `prisma` instance from the auth context.
|
|
|
|
|
const stream = await createSSESearchStream(searchRequest, prisma);
|
|
|
|
|
|
2025-11-14 05:18:08 +00:00
|
|
|
|
|
|
|
|
// Return streaming response with SSE headers
|
|
|
|
|
return new Response(stream, {
|
|
|
|
|
headers: {
|
|
|
|
|
'Content-Type': 'text/event-stream',
|
|
|
|
|
'Cache-Control': 'no-cache, no-transform',
|
|
|
|
|
'Connection': 'keep-alive',
|
|
|
|
|
'X-Accel-Buffering': 'no', // Disable nginx buffering if applicable
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error('Request handling error:', error);
|
|
|
|
|
return new Response(
|
|
|
|
|
JSON.stringify({
|
|
|
|
|
error: {
|
|
|
|
|
message: error instanceof Error ? error.message : 'Internal server error'
|
|
|
|
|
}
|
|
|
|
|
}),
|
|
|
|
|
{
|
|
|
|
|
status: 500,
|
|
|
|
|
headers: { 'Content-Type': 'application/json' },
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
}
|
2025-11-15 04:53:55 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const createSSESearchStream = async (searchRequest: SearchRequest, prisma: PrismaClient): Promise<ReadableStream> => {
|
|
|
|
|
const client = createGrpcClient();
|
|
|
|
|
let grpcStream: ReturnType<WebserverServiceClient['StreamSearch']> | null = null;
|
|
|
|
|
let isStreamActive = true;
|
2025-11-19 00:01:06 +00:00
|
|
|
let pendingChunks = 0;
|
2025-11-19 20:33:57 +00:00
|
|
|
let accumulatedStats: SearchStats = {
|
|
|
|
|
actualMatchCount: 0,
|
|
|
|
|
totalMatchCount: 0,
|
|
|
|
|
duration: 0,
|
|
|
|
|
fileCount: 0,
|
|
|
|
|
filesSkipped: 0,
|
|
|
|
|
contentBytesLoaded: 0,
|
|
|
|
|
indexBytesLoaded: 0,
|
|
|
|
|
crashes: 0,
|
|
|
|
|
shardFilesConsidered: 0,
|
|
|
|
|
filesConsidered: 0,
|
|
|
|
|
filesLoaded: 0,
|
|
|
|
|
shardsScanned: 0,
|
|
|
|
|
shardsSkipped: 0,
|
|
|
|
|
shardsSkippedFilter: 0,
|
|
|
|
|
ngramMatches: 0,
|
|
|
|
|
ngramLookups: 0,
|
|
|
|
|
wait: 0,
|
|
|
|
|
matchTreeConstruction: 0,
|
|
|
|
|
matchTreeSearch: 0,
|
|
|
|
|
regexpsConsidered: 0,
|
|
|
|
|
flushReason: 0,
|
|
|
|
|
};
|
2025-11-15 04:53:55 +00:00
|
|
|
|
|
|
|
|
return new ReadableStream({
|
|
|
|
|
async start(controller) {
|
2025-11-19 00:01:06 +00:00
|
|
|
const tryCloseController = () => {
|
|
|
|
|
if (!isStreamActive && pendingChunks === 0) {
|
2025-11-19 20:33:57 +00:00
|
|
|
const finalResponse: StreamedSearchResponse = {
|
|
|
|
|
type: 'final',
|
|
|
|
|
accumulatedStats,
|
|
|
|
|
isSearchExhaustive: accumulatedStats.totalMatchCount <= accumulatedStats.actualMatchCount,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
controller.enqueue(new TextEncoder().encode(`data: ${JSON.stringify(finalResponse)}\n\n`));
|
2025-11-19 00:01:06 +00:00
|
|
|
controller.enqueue(new TextEncoder().encode('data: [DONE]\n\n'));
|
|
|
|
|
controller.close();
|
|
|
|
|
client.close();
|
|
|
|
|
logger.debug('SSE stream closed');
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2025-11-15 04:53:55 +00:00
|
|
|
try {
|
|
|
|
|
const metadata = new grpc.Metadata();
|
|
|
|
|
|
|
|
|
|
const streamRequest: StreamSearchRequest = {
|
|
|
|
|
request: searchRequest,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
grpcStream = client.StreamSearch(streamRequest, metadata);
|
|
|
|
|
|
|
|
|
|
// @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field
|
|
|
|
|
// which corresponds to the `id` in the Repo table. In order to efficiently fetch repository
|
|
|
|
|
// metadata when transforming (potentially thousands) of file matches, we aggregate a unique
|
|
|
|
|
// set of repository ids* and map them to their corresponding Repo record.
|
|
|
|
|
//
|
|
|
|
|
// *Q: Why is `RepositoryID` optional? And why are we falling back to `Repository`?
|
|
|
|
|
// A: Prior to this change, the repository id was not plumbed into zoekt, so RepositoryID was
|
|
|
|
|
// always undefined. To make this a non-breaking change, we fallback to using the repository's name
|
|
|
|
|
// (`Repository`) as the identifier in these cases. This is not guaranteed to be unique, but in
|
|
|
|
|
// practice it is since the repository name includes the host and path (e.g., 'github.com/org/repo',
|
|
|
|
|
// 'gitea.com/org/repo', etc.).
|
|
|
|
|
//
|
|
|
|
|
// Note: When a repository is re-indexed (every hour) this ID will be populated.
|
|
|
|
|
// @see: https://github.com/sourcebot-dev/zoekt/pull/6
|
2025-11-19 20:33:57 +00:00
|
|
|
const getRepoIdForFile = (file: FileMatch__Output): string | number => {
|
|
|
|
|
return file.repository_id ?? file.repository;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// `_reposMapCache` is used to cache repository metadata across all chunks.
|
|
|
|
|
// This reduces the number of database queries required to transform file matches.
|
|
|
|
|
const _reposMapCache = new Map<string | number, Repo>();
|
|
|
|
|
|
|
|
|
|
// Creates a mapping between all repository ids in a given response
|
|
|
|
|
// chunk. The mapping allows us to efficiently lookup repository metadata.
|
|
|
|
|
const createReposMapForChunk = async (chunk: SearchResponse__Output): Promise<Map<string | number, Repo>> => {
|
|
|
|
|
const reposMap = new Map<string | number, Repo>();
|
|
|
|
|
await Promise.all(chunk.files.map(async (file) => {
|
|
|
|
|
const id = getRepoIdForFile(file);
|
|
|
|
|
|
|
|
|
|
const repo = await (async () => {
|
|
|
|
|
// If it's in the cache, return the cached value.
|
|
|
|
|
if (_reposMapCache.has(id)) {
|
|
|
|
|
return _reposMapCache.get(id);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Otherwise, query the database for the record.
|
|
|
|
|
const repo = typeof id === 'number' ?
|
|
|
|
|
await prisma.repo.findUnique({
|
|
|
|
|
where: {
|
|
|
|
|
id: id,
|
|
|
|
|
},
|
|
|
|
|
}) :
|
|
|
|
|
await prisma.repo.findFirst({
|
|
|
|
|
where: {
|
|
|
|
|
name: id,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// If a repository is found, cache it for future lookups.
|
|
|
|
|
if (repo) {
|
|
|
|
|
_reposMapCache.set(id, repo);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return repo;
|
|
|
|
|
})();
|
|
|
|
|
|
|
|
|
|
// Only add the repository to the map if it was found.
|
|
|
|
|
if (repo) {
|
|
|
|
|
reposMap.set(id, repo);
|
|
|
|
|
}
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
return reposMap;
|
|
|
|
|
}
|
2025-11-15 04:53:55 +00:00
|
|
|
|
|
|
|
|
// Handle incoming data chunks
|
|
|
|
|
grpcStream.on('data', async (chunk: StreamSearchResponse__Output) => {
|
|
|
|
|
if (!isStreamActive) {
|
2025-11-19 00:01:06 +00:00
|
|
|
logger.debug('SSE stream closed, skipping chunk');
|
2025-11-15 04:53:55 +00:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-19 00:01:06 +00:00
|
|
|
// Track that we're processing a chunk
|
|
|
|
|
pendingChunks++;
|
|
|
|
|
|
2025-11-15 04:53:55 +00:00
|
|
|
// grpcStream.on doesn't actually await on our handler, so we need to
|
|
|
|
|
// explicitly pause the stream here to prevent the stream from completing
|
|
|
|
|
// prior to our asynchronous work being completed.
|
|
|
|
|
grpcStream?.pause();
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
if (!chunk.response_chunk) {
|
|
|
|
|
logger.warn('No response chunk received');
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-19 20:33:57 +00:00
|
|
|
const repoIdToRepoDBRecordMap = await createReposMapForChunk(chunk.response_chunk);
|
2025-11-15 04:53:55 +00:00
|
|
|
|
2025-11-19 20:33:57 +00:00
|
|
|
const files = chunk.response_chunk.files.map((file) => {
|
|
|
|
|
const fileNameChunks = file.chunk_matches.filter((chunk) => chunk.file_name);
|
|
|
|
|
const repoId = getRepoIdForFile(file);
|
|
|
|
|
const repo = repoIdToRepoDBRecordMap.get(repoId);
|
2025-11-15 04:53:55 +00:00
|
|
|
|
|
|
|
|
// This can happen if the user doesn't have access to the repository.
|
|
|
|
|
if (!repo) {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// @todo: address "file_name might not be a valid UTF-8 string" warning.
|
|
|
|
|
const fileName = file.file_name.toString('utf-8');
|
|
|
|
|
|
|
|
|
|
const convertRange = (range: Range__Output): SourceRange => ({
|
|
|
|
|
start: {
|
|
|
|
|
byteOffset: range.start?.byte_offset ?? 0,
|
|
|
|
|
column: range.start?.column ?? 0,
|
|
|
|
|
lineNumber: range.start?.line_number ?? 0,
|
|
|
|
|
},
|
|
|
|
|
end: {
|
|
|
|
|
byteOffset: range.end?.byte_offset ?? 0,
|
|
|
|
|
column: range.end?.column ?? 0,
|
|
|
|
|
lineNumber: range.end?.line_number ?? 0,
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
fileName: {
|
|
|
|
|
text: fileName,
|
|
|
|
|
matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].ranges.map(convertRange) : [],
|
|
|
|
|
},
|
|
|
|
|
repository: repo.name,
|
|
|
|
|
repositoryId: repo.id,
|
|
|
|
|
language: file.language,
|
|
|
|
|
// @todo: we will need to have a mechanism of forming the file's web url.
|
|
|
|
|
webUrl: '',
|
|
|
|
|
chunks: file.chunk_matches
|
|
|
|
|
.filter((chunk) => !chunk.file_name) // filter out filename chunks.
|
|
|
|
|
.map((chunk) => {
|
|
|
|
|
return {
|
|
|
|
|
content: chunk.content.toString('utf-8'),
|
|
|
|
|
matchRanges: chunk.ranges.map(convertRange),
|
|
|
|
|
contentStart: chunk.content_start ? {
|
2025-11-19 22:12:24 +00:00
|
|
|
byteOffset: chunk.content_start.byte_offset,
|
|
|
|
|
column: chunk.content_start.column,
|
|
|
|
|
lineNumber: chunk.content_start.line_number,
|
2025-11-15 04:53:55 +00:00
|
|
|
} : {
|
|
|
|
|
byteOffset: 0,
|
|
|
|
|
column: 0,
|
|
|
|
|
lineNumber: 0,
|
|
|
|
|
},
|
|
|
|
|
symbols: chunk.symbol_info.map((symbol) => {
|
|
|
|
|
return {
|
|
|
|
|
symbol: symbol.sym,
|
|
|
|
|
kind: symbol.kind,
|
|
|
|
|
parent: symbol.parent ? {
|
|
|
|
|
symbol: symbol.parent,
|
|
|
|
|
kind: symbol.parent_kind,
|
|
|
|
|
} : undefined,
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}),
|
|
|
|
|
branches: file.branches,
|
|
|
|
|
content: file.content ? file.content.toString('utf-8') : undefined,
|
|
|
|
|
}
|
2025-11-19 20:33:57 +00:00
|
|
|
}).filter(file => file !== undefined);
|
2025-11-15 04:53:55 +00:00
|
|
|
|
|
|
|
|
const actualMatchCount = files.reduce(
|
|
|
|
|
(acc, file) =>
|
|
|
|
|
// Match count is the sum of the number of chunk matches and file name matches.
|
|
|
|
|
acc + file.chunks.reduce(
|
|
|
|
|
(acc, chunk) => acc + chunk.matchRanges.length,
|
|
|
|
|
0,
|
|
|
|
|
) + file.fileName.matchRanges.length,
|
|
|
|
|
0,
|
|
|
|
|
);
|
|
|
|
|
|
2025-11-19 20:33:57 +00:00
|
|
|
const stats: SearchStats = {
|
|
|
|
|
actualMatchCount,
|
|
|
|
|
totalMatchCount: chunk.response_chunk.stats?.match_count ?? 0,
|
|
|
|
|
duration: chunk.response_chunk.stats?.duration?.nanos ?? 0,
|
|
|
|
|
fileCount: chunk.response_chunk.stats?.file_count ?? 0,
|
|
|
|
|
filesSkipped: chunk.response_chunk.stats?.files_skipped ?? 0,
|
|
|
|
|
contentBytesLoaded: chunk.response_chunk.stats?.content_bytes_loaded ?? 0,
|
|
|
|
|
indexBytesLoaded: chunk.response_chunk.stats?.index_bytes_loaded ?? 0,
|
|
|
|
|
crashes: chunk.response_chunk.stats?.crashes ?? 0,
|
|
|
|
|
shardFilesConsidered: chunk.response_chunk.stats?.shard_files_considered ?? 0,
|
|
|
|
|
filesConsidered: chunk.response_chunk.stats?.files_considered ?? 0,
|
|
|
|
|
filesLoaded: chunk.response_chunk.stats?.files_loaded ?? 0,
|
|
|
|
|
shardsScanned: chunk.response_chunk.stats?.shards_scanned ?? 0,
|
|
|
|
|
shardsSkipped: chunk.response_chunk.stats?.shards_skipped ?? 0,
|
|
|
|
|
shardsSkippedFilter: chunk.response_chunk.stats?.shards_skipped_filter ?? 0,
|
|
|
|
|
ngramMatches: chunk.response_chunk.stats?.ngram_matches ?? 0,
|
|
|
|
|
ngramLookups: chunk.response_chunk.stats?.ngram_lookups ?? 0,
|
|
|
|
|
wait: chunk.response_chunk.stats?.wait?.nanos ?? 0,
|
|
|
|
|
matchTreeConstruction: chunk.response_chunk.stats?.match_tree_construction?.nanos ?? 0,
|
|
|
|
|
matchTreeSearch: chunk.response_chunk.stats?.match_tree_search?.nanos ?? 0,
|
|
|
|
|
regexpsConsidered: chunk.response_chunk.stats?.regexps_considered ?? 0,
|
|
|
|
|
// @todo: handle this.
|
|
|
|
|
// flushReason: chunk.response_chunk.stats?.flush_reason ?? 0,
|
|
|
|
|
flushReason: 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
accumulatedStats = accumulateStats(accumulatedStats, stats);
|
|
|
|
|
|
|
|
|
|
const response: StreamedSearchResponse = {
|
|
|
|
|
type: 'chunk',
|
2025-11-15 04:53:55 +00:00
|
|
|
files,
|
2025-11-19 20:33:57 +00:00
|
|
|
repositoryInfo: Array.from(repoIdToRepoDBRecordMap.values()).map((repo) => ({
|
2025-11-15 04:53:55 +00:00
|
|
|
id: repo.id,
|
|
|
|
|
codeHostType: repo.external_codeHostType,
|
|
|
|
|
name: repo.name,
|
|
|
|
|
displayName: repo.displayName ?? undefined,
|
|
|
|
|
webUrl: repo.webUrl ?? undefined,
|
|
|
|
|
})),
|
2025-11-19 20:33:57 +00:00
|
|
|
stats
|
2025-11-15 04:53:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const sseData = `data: ${JSON.stringify(response)}\n\n`;
|
|
|
|
|
controller.enqueue(new TextEncoder().encode(sseData));
|
|
|
|
|
} catch (error) {
|
|
|
|
|
console.error('Error encoding chunk:', error);
|
|
|
|
|
} finally {
|
2025-11-19 00:01:06 +00:00
|
|
|
pendingChunks--;
|
2025-11-15 04:53:55 +00:00
|
|
|
grpcStream?.resume();
|
2025-11-19 00:01:06 +00:00
|
|
|
|
|
|
|
|
// @note: we were hitting "Controller is already closed" errors when calling
|
|
|
|
|
// `controller.enqueue` above for the last chunk. The reasoning was the event
|
|
|
|
|
// handler for 'end' was being invoked prior to the completion of the last chunk,
|
|
|
|
|
// resulting in the controller being closed prematurely. The workaround was to
|
|
|
|
|
// keep track of the number of pending chunks and only close the controller
|
|
|
|
|
// when there are no more chunks to process. We need to explicitly call
|
|
|
|
|
// `tryCloseController` since there _seems_ to be no ordering guarantees between
|
|
|
|
|
// the 'end' event handler and this callback.
|
|
|
|
|
tryCloseController();
|
2025-11-15 04:53:55 +00:00
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Handle stream completion
|
|
|
|
|
grpcStream.on('end', () => {
|
|
|
|
|
if (!isStreamActive) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
isStreamActive = false;
|
2025-11-19 00:01:06 +00:00
|
|
|
tryCloseController();
|
2025-11-15 04:53:55 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Handle errors
|
|
|
|
|
grpcStream.on('error', (error: grpc.ServiceError) => {
|
2025-11-19 00:01:06 +00:00
|
|
|
logger.error('gRPC stream error:', error);
|
|
|
|
|
Sentry.captureException(error);
|
2025-11-15 04:53:55 +00:00
|
|
|
|
|
|
|
|
if (!isStreamActive) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
isStreamActive = false;
|
|
|
|
|
|
|
|
|
|
// Send error as SSE event
|
|
|
|
|
const errorData = `data: ${JSON.stringify({
|
|
|
|
|
error: {
|
|
|
|
|
code: error.code,
|
|
|
|
|
message: error.details || error.message,
|
|
|
|
|
}
|
|
|
|
|
})}\n\n`;
|
|
|
|
|
controller.enqueue(new TextEncoder().encode(errorData));
|
|
|
|
|
|
|
|
|
|
controller.close();
|
|
|
|
|
client.close();
|
|
|
|
|
});
|
|
|
|
|
} catch (error) {
|
2025-11-19 00:01:06 +00:00
|
|
|
logger.error('Stream initialization error:', error);
|
2025-11-15 04:53:55 +00:00
|
|
|
|
|
|
|
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
|
|
|
const errorData = `data: ${JSON.stringify({
|
|
|
|
|
error: { message: errorMessage }
|
|
|
|
|
})}\n\n`;
|
|
|
|
|
controller.enqueue(new TextEncoder().encode(errorData));
|
|
|
|
|
|
|
|
|
|
controller.close();
|
|
|
|
|
client.close();
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
cancel() {
|
2025-11-19 00:01:06 +00:00
|
|
|
logger.warn('SSE stream cancelled by client');
|
2025-11-15 04:53:55 +00:00
|
|
|
isStreamActive = false;
|
|
|
|
|
|
|
|
|
|
// Cancel the gRPC stream to stop receiving data
|
|
|
|
|
if (grpcStream) {
|
|
|
|
|
grpcStream.cancel();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
client.close();
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-11-19 20:33:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const accumulateStats = (a: SearchStats, b: SearchStats): SearchStats => {
|
|
|
|
|
return {
|
|
|
|
|
actualMatchCount: a.actualMatchCount + b.actualMatchCount,
|
|
|
|
|
totalMatchCount: a.totalMatchCount + b.totalMatchCount,
|
|
|
|
|
duration: a.duration + b.duration,
|
|
|
|
|
fileCount: a.fileCount + b.fileCount,
|
|
|
|
|
filesSkipped: a.filesSkipped + b.filesSkipped,
|
|
|
|
|
contentBytesLoaded: a.contentBytesLoaded + b.contentBytesLoaded,
|
|
|
|
|
indexBytesLoaded: a.indexBytesLoaded + b.indexBytesLoaded,
|
|
|
|
|
crashes: a.crashes + b.crashes,
|
|
|
|
|
shardFilesConsidered: a.shardFilesConsidered + b.shardFilesConsidered,
|
|
|
|
|
filesConsidered: a.filesConsidered + b.filesConsidered,
|
|
|
|
|
filesLoaded: a.filesLoaded + b.filesLoaded,
|
|
|
|
|
shardsScanned: a.shardsScanned + b.shardsScanned,
|
|
|
|
|
shardsSkipped: a.shardsSkipped + b.shardsSkipped,
|
|
|
|
|
shardsSkippedFilter: a.shardsSkippedFilter + b.shardsSkippedFilter,
|
|
|
|
|
ngramMatches: a.ngramMatches + b.ngramMatches,
|
|
|
|
|
ngramLookups: a.ngramLookups + b.ngramLookups,
|
|
|
|
|
wait: a.wait + b.wait,
|
|
|
|
|
matchTreeConstruction: a.matchTreeConstruction + b.matchTreeConstruction,
|
|
|
|
|
matchTreeSearch: a.matchTreeSearch + b.matchTreeSearch,
|
|
|
|
|
regexpsConsidered: a.regexpsConsidered + b.regexpsConsidered,
|
|
|
|
|
...(a.flushReason === 0 ? {
|
|
|
|
|
flushReason: b.flushReason
|
|
|
|
|
} : {
|
|
|
|
|
flushReason: a.flushReason,
|
|
|
|
|
}),
|
|
|
|
|
}
|
2025-11-15 04:53:55 +00:00
|
|
|
}
|