Compare commits

...

16 commits

Author SHA1 Message Date
Brendan Kellam
dc6bb954ef
Merge 7341a49407 into c3fae1aaab 2025-10-13 14:47:56 -05:00
Brendan Kellam
c3fae1aaab
feat(web): Improved search performance on unbounded searches (#555) 2025-10-07 23:55:36 -07:00
Brendan Kellam
18ba1d2492
update demo deploy cadence (#556) 2025-10-07 23:51:52 -07:00
bkellam
8d7babc8d2 chore(worker): Change log message to debug 2025-10-07 16:38:56 -07:00
bkellam
595abc12be use blacksmith arm machine for arm builds 2025-10-07 10:21:19 -07:00
blacksmith-sh[bot]
0e8fdf0f97
Migrate workflows to Blacksmith (#554)
Co-authored-by: blacksmith-sh[bot] <157653362+blacksmith-sh[bot]@users.noreply.github.com>
2025-10-07 10:05:27 -07:00
Brendan Kellam
83c6704b01
fix: Fix git dubious ownership errors (#553) 2025-10-06 19:54:17 -07:00
Brendan Kellam
5e3e4f000a
chore(web): Remove spam "login page loaded" log (#552) 2025-10-06 15:04:41 -07:00
msukkari
623c794a75 update description in docs 2025-10-04 10:03:29 -07:00
Brendan Kellam
425a816fb6
Update README.md 2025-10-03 21:38:49 -07:00
bkellam
6a4c9220bd chore: try including platform pair in cache key 2025-10-03 21:24:14 -07:00
Brendan Kellam
eeb6b73a64
chore: Move helm chart to seperate repo (#549) 2025-10-03 15:45:36 -07:00
Andre Nogueira
9c8224e39f
Add Sourcebot Helm Chart (#370)
* feat: add helm chart

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* feat: add sts support to use internal DB and improve values docs

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: include postgresql extra dependency

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: remove autoscaler

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: remove sts

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: add more suggestive env var example

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: add chart dependency lock

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: add host infer to the chart docs

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

---------

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>
2025-10-03 15:39:26 -07:00
Brendan Kellam
c10010eb99
feat(db): Support passing db connection as separate env vars (#545) 2025-10-02 12:51:39 -07:00
bkellam
d24de793f2 Add roadmap link to docs 2025-10-01 14:47:26 -07:00
bkellam
7341a49407 fix 2025-08-19 11:13:30 -07:00
29 changed files with 373 additions and 180 deletions

View file

@ -4,8 +4,6 @@ DATABASE_URL="postgresql://postgres:postgres@localhost:5432/postgres"
# Zoekt # Zoekt
ZOEKT_WEBSERVER_URL="http://localhost:6070" ZOEKT_WEBSERVER_URL="http://localhost:6070"
# SHARD_MAX_MATCH_COUNT=10000
# TOTAL_MAX_MATCH_COUNT=100000
# The command to use for generating ctags. # The command to use for generating ctags.
CTAGS_COMMAND=ctags CTAGS_COMMAND=ctags
# logging, strict # logging, strict

View file

@ -2,7 +2,7 @@ name: Deploy Demo
on: on:
push: push:
branches: ["main"] tags: ["v*.*.*"]
workflow_dispatch: workflow_dispatch:
jobs: jobs:

View file

@ -27,9 +27,9 @@ jobs:
platform: [linux/amd64, linux/arm64] platform: [linux/amd64, linux/arm64]
include: include:
- platform: linux/amd64 - platform: linux/amd64
runs-on: ubuntu-latest runs-on: blacksmith-4vcpu-ubuntu-2404
- platform: linux/arm64 - platform: linux/arm64
runs-on: ubuntu-24.04-arm runs-on: blacksmith-8vcpu-ubuntu-2204-arm
steps: steps:
- name: Prepare - name: Prepare
@ -57,8 +57,8 @@ jobs:
with: with:
cosign-release: "v2.2.4" cosign-release: "v2.2.4"
- name: Set up Docker Buildx - name: Setup Blacksmith Builder
uses: docker/setup-buildx-action@v3 uses: useblacksmith/setup-docker-builder@v1
- name: Login to GitHub Packages Docker Registry - name: Login to GitHub Packages Docker Registry
uses: docker/login-action@v3 uses: docker/login-action@v3
@ -69,12 +69,10 @@ jobs:
- name: Build Docker image - name: Build Docker image
id: build id: build
uses: docker/build-push-action@v6 uses: useblacksmith/build-push-action@v2
with: with:
context: . context: .
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: ${{ matrix.platform }} platforms: ${{ matrix.platform }}
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true,annotation.org.opencontainers.image.description=Blazingly fast code search outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true,annotation.org.opencontainers.image.description=Blazingly fast code search
build-args: | build-args: |
@ -110,7 +108,7 @@ jobs:
run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}
merge: merge:
runs-on: ubuntu-latest runs-on: blacksmith-4vcpu-ubuntu-2404
permissions: permissions:
packages: write packages: write
needs: needs:
@ -123,8 +121,8 @@ jobs:
pattern: digests-* pattern: digests-*
merge-multiple: true merge-multiple: true
- name: Set up Docker Buildx - name: Setup Blacksmith Builder
uses: docker/setup-buildx-action@v3 uses: useblacksmith/setup-docker-builder@v1
- name: Extract Docker metadata - name: Extract Docker metadata
id: meta id: meta

View file

@ -8,7 +8,7 @@ on:
jobs: jobs:
build: build:
runs-on: ubuntu-latest runs-on: blacksmith-4vcpu-ubuntu-2404
permissions: permissions:
contents: read contents: read
steps: steps:
@ -19,6 +19,6 @@ jobs:
- name: Build Docker image - name: Build Docker image
id: build id: build
uses: docker/build-push-action@v6 uses: useblacksmith/build-push-action@v2
with: with:
context: . context: .

View file

@ -7,7 +7,7 @@ on:
jobs: jobs:
build: build:
runs-on: ubuntu-latest runs-on: blacksmith-4vcpu-ubuntu-2404
permissions: permissions:
contents: read contents: read
steps: steps:

View file

@ -7,7 +7,7 @@ on:
jobs: jobs:
build: build:
runs-on: ubuntu-latest runs-on: blacksmith-4vcpu-ubuntu-2404
permissions: permissions:
contents: read contents: read
steps: steps:

View file

@ -5,6 +5,9 @@
}, },
{ {
"path": "../vendor/zoekt" "path": "../vendor/zoekt"
},
{
"path": "../../sourcebot-helm-chart"
} }
], ],
"settings": { "settings": {

View file

@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
<!-- @NOTE: On next release, please bump the MCP pacakge as there are breaking changes in this! -->
### Fixed
- Fixed "dubious ownership" errors when cloning / fetching repos. [#553](https://github.com/sourcebot-dev/sourcebot/pull/553)
### Changed
- Remove spam "login page loaded" log. [#552](https://github.com/sourcebot-dev/sourcebot/pull/552)
- Improved search performance for unbounded search queries. [#555](https://github.com/sourcebot-dev/sourcebot/pull/555)
### Added
- Added support for passing db connection url as seperate `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` env vars. [#545](https://github.com/sourcebot-dev/sourcebot/pull/545)
## [4.7.3] - 2025-09-29 ## [4.7.3] - 2025-09-29
### Fixed ### Fixed

View file

@ -185,7 +185,6 @@ ENV DATA_DIR=/data
ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot
ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db
ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis
ENV DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
ENV REDIS_URL="redis://localhost:6379" ENV REDIS_URL="redis://localhost:6379"
ENV SRC_TENANT_ENFORCEMENT_MODE=strict ENV SRC_TENANT_ENFORCEMENT_MODE=strict
ENV SOURCEBOT_PUBLIC_KEY_PATH=/app/public.pem ENV SOURCEBOT_PUBLIC_KEY_PATH=/app/public.pem
@ -234,6 +233,9 @@ COPY --from=shared-libs-builder /app/packages/shared ./packages/shared
# Configure dependencies # Configure dependencies
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib openssl util-linux unzip RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib openssl util-linux unzip
# Fixes git "dubious ownership" issues when the volume is mounted with different permissions to the container.
RUN git config --global safe.directory "*"
# Configure the database # Configure the database
RUN mkdir -p /run/postgresql && \ RUN mkdir -p /run/postgresql && \
chown -R postgres:postgres /run/postgresql && \ chown -R postgres:postgres /run/postgresql && \

View file

@ -128,4 +128,3 @@ To configure Sourcebot (index your own repos, connect your LLMs, etc), check out
> Building from source is only required if you'd like to contribute. If you'd just like to use Sourcebot, we recommend checking out our self-hosting [docs](https://docs.sourcebot.dev/self-hosting/overview). > Building from source is only required if you'd like to contribute. If you'd just like to use Sourcebot, we recommend checking out our self-hosting [docs](https://docs.sourcebot.dev/self-hosting/overview).
If you'd like to build from source, please checkout the `CONTRIBUTING.md` file for more information. If you'd like to build from source, please checkout the `CONTRIBUTING.md` file for more information.

View file

@ -110,6 +110,11 @@
"href": "https://sourcebot.dev/changelog", "href": "https://sourcebot.dev/changelog",
"icon": "list-check" "icon": "list-check"
}, },
{
"anchor": "Roadmap",
"href": "https://github.com/sourcebot-dev/sourcebot/issues/459",
"icon": "map"
},
{ {
"anchor": "Support", "anchor": "Support",
"href": "https://github.com/sourcebot-dev/sourcebot/issues/new?template=get_help.md", "href": "https://github.com/sourcebot-dev/sourcebot/issues/new?template=get_help.md",

View file

@ -19,7 +19,7 @@ The following environment variables allow you to configure your Sourcebot deploy
| `DATA_CACHE_DIR` | `$DATA_DIR/.sourcebot` | <p>The root data directory in which all data written to disk by Sourcebot will be located.</p> | | `DATA_CACHE_DIR` | `$DATA_DIR/.sourcebot` | <p>The root data directory in which all data written to disk by Sourcebot will be located.</p> |
| `DATA_DIR` | `/data` | <p>The directory within the container to store all persistent data. Typically, this directory will be volume mapped such that data is persisted across container restarts (e.g., `docker run -v $(pwd):/data`)</p> | | `DATA_DIR` | `/data` | <p>The directory within the container to store all persistent data. Typically, this directory will be volume mapped such that data is persisted across container restarts (e.g., `docker run -v $(pwd):/data`)</p> |
| `DATABASE_DATA_DIR` | `$DATA_CACHE_DIR/db` | <p>The data directory for the default Postgres database.</p> | | `DATABASE_DATA_DIR` | `$DATA_CACHE_DIR/db` | <p>The data directory for the default Postgres database.</p> |
| `DATABASE_URL` | `postgresql://postgres@ localhost:5432/sourcebot` | <p>Connection string of your Postgres database. By default, a Postgres database is automatically provisioned at startup within the container.</p><p>If you'd like to use a non-default schema, you can provide it as a parameter in the database url </p> | | `DATABASE_URL` | `postgresql://postgres@ localhost:5432/sourcebot` | <p>Connection string of your Postgres database. By default, a Postgres database is automatically provisioned at startup within the container.</p><p>If you'd like to use a non-default schema, you can provide it as a parameter in the database url.</p><p>You can also use `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` to construct the database url.</p> |
| `EMAIL_FROM_ADDRESS` | `-` | <p>The email address that transactional emails will be sent from. See [this doc](/docs/configuration/transactional-emails) for more info.</p> | | `EMAIL_FROM_ADDRESS` | `-` | <p>The email address that transactional emails will be sent from. See [this doc](/docs/configuration/transactional-emails) for more info.</p> |
| `FORCE_ENABLE_ANONYMOUS_ACCESS` | `false` | <p>When enabled, [anonymous access](/docs/configuration/auth/access-settings#anonymous-access) to the organization will always be enabled</p> | `FORCE_ENABLE_ANONYMOUS_ACCESS` | `false` | <p>When enabled, [anonymous access](/docs/configuration/auth/access-settings#anonymous-access) to the organization will always be enabled</p>
| `REDIS_DATA_DIR` | `$DATA_CACHE_DIR/redis` | <p>The data directory for the default Redis instance.</p> | | `REDIS_DATA_DIR` | `$DATA_CACHE_DIR/redis` | <p>The data directory for the default Redis instance.</p> |
@ -28,7 +28,6 @@ The following environment variables allow you to configure your Sourcebot deploy
| `REDIS_REMOVE_ON_FAIL` | `100` | <p>Controls how many failed jobs are allowed to remain in Redis queues</p> | | `REDIS_REMOVE_ON_FAIL` | `100` | <p>Controls how many failed jobs are allowed to remain in Redis queues</p> |
| `REPO_SYNC_RETRY_BASE_SLEEP_SECONDS` | `60` | <p>The base sleep duration (in seconds) for exponential backoff when retrying repository sync operations that fail</p> | | `REPO_SYNC_RETRY_BASE_SLEEP_SECONDS` | `60` | <p>The base sleep duration (in seconds) for exponential backoff when retrying repository sync operations that fail</p> |
| `GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS` | `600` | <p>The timeout duration (in seconds) for GitLab client queries</p> | | `GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS` | `600` | <p>The timeout duration (in seconds) for GitLab client queries</p> |
| `SHARD_MAX_MATCH_COUNT` | `10000` | <p>The maximum shard count per query</p> |
| `SMTP_CONNECTION_URL` | `-` | <p>The url to the SMTP service used for sending transactional emails. See [this doc](/docs/configuration/transactional-emails) for more info.</p> | | `SMTP_CONNECTION_URL` | `-` | <p>The url to the SMTP service used for sending transactional emails. See [this doc](/docs/configuration/transactional-emails) for more info.</p> |
| `SOURCEBOT_ENCRYPTION_KEY` | Automatically generated at startup if no value is provided. Generated using `openssl rand -base64 24` | <p>Used to encrypt connection secrets and generate API keys.</p> | | `SOURCEBOT_ENCRYPTION_KEY` | Automatically generated at startup if no value is provided. Generated using `openssl rand -base64 24` | <p>Used to encrypt connection secrets and generate API keys.</p> |
| `SOURCEBOT_PUBLIC_KEY_PATH` | `/app/public.pem` | <p>Sourcebot's public key that's used to verify encrypted license key signatures.</p> | | `SOURCEBOT_PUBLIC_KEY_PATH` | `/app/public.pem` | <p>Sourcebot's public key that's used to verify encrypted license key signatures.</p> |
@ -36,8 +35,6 @@ The following environment variables allow you to configure your Sourcebot deploy
| `SOURCEBOT_STRUCTURED_LOGGING_ENABLED` | `false` | <p>Enables/disable structured JSON logging. See [this doc](/docs/configuration/structured-logging) for more info.</p> | | `SOURCEBOT_STRUCTURED_LOGGING_ENABLED` | `false` | <p>Enables/disable structured JSON logging. See [this doc](/docs/configuration/structured-logging) for more info.</p> |
| `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - | <p>Optional file to log to if structured logging is enabled</p> | | `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - | <p>Optional file to log to if structured logging is enabled</p> |
| `SOURCEBOT_TELEMETRY_DISABLED` | `false` | <p>Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.</p> | | `SOURCEBOT_TELEMETRY_DISABLED` | `false` | <p>Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.</p> |
| `TOTAL_MAX_MATCH_COUNT` | `100000` | <p>The maximum number of matches per query</p> |
| `ZOEKT_MAX_WALL_TIME_MS` | `10000` | <p>The maximum real world duration (in milliseconds) per zoekt query</p> |
### Enterprise Environment Variables ### Enterprise Environment Variables
| Variable | Default | Description | | Variable | Default | Description |

View file

@ -2,10 +2,11 @@
title: "Overview" title: "Overview"
--- ---
[Sourcebot](https://github.com/sourcebot-dev/sourcebot) is a self-hosted tool that helps you understand your codebase. [Sourcebot](https://github.com/sourcebot-dev/sourcebot) is a platform that helps humans and agents understand your codebase:
- [Code search](/docs/features/search/overview): Search and navigate across all your repos and branches, no matter where theyre hosted - [Code search](/docs/features/search/overview): Search and navigate across all your repos and branches, no matter where theyre hosted
- [Ask Sourcebot](/docs/features/ask): Ask questions about your codebase and have Sourcebot provide detailed answers grounded with inline citations - [Ask Sourcebot](/docs/features/ask): Ask questions about your codebase and have Sourcebot provide detailed answers grounded with inline citations
- [MCP](/docs/features/mcp-server): Enrich agent context windows with code across your organization
<CardGroup> <CardGroup>
<Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true"> <Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true">

View file

@ -1,6 +1,24 @@
#!/bin/sh #!/bin/sh
set -e set -e
# Check if DATABASE_URL is not set
if [ -z "$DATABASE_URL" ]; then
# Check if the individual database variables are set and construct the URL
if [ -n "$DATABASE_HOST" ] && [ -n "$DATABASE_USERNAME" ] && [ -n "$DATABASE_PASSWORD" ] && [ -n "$DATABASE_NAME" ]; then
DATABASE_URL="postgresql://${DATABASE_USERNAME}:${DATABASE_PASSWORD}@${DATABASE_HOST}/${DATABASE_NAME}"
if [ -n "$DATABASE_ARGS" ]; then
DATABASE_URL="${DATABASE_URL}?$DATABASE_ARGS"
fi
export DATABASE_URL
else
# Otherwise, fallback to a default value
DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
export DATABASE_URL
fi
fi
if [ "$DATABASE_URL" = "postgresql://postgres@localhost:5432/sourcebot" ]; then if [ "$DATABASE_URL" = "postgresql://postgres@localhost:5432/sourcebot" ]; then
DATABASE_EMBEDDED="true" DATABASE_EMBEDDED="true"
fi fi

View file

@ -250,7 +250,7 @@ export class ConnectionManager {
create: repo, create: repo,
}) })
const upsertDuration = performance.now() - upsertStart; const upsertDuration = performance.now() - upsertStart;
this.logger.info(`Upserted repo ${repo.displayName} (id: ${repo.external_id}) in ${upsertDuration}ms`); this.logger.debug(`Upserted repo ${repo.displayName} (id: ${repo.external_id}) in ${upsertDuration}ms`);
} }
const totalUpsertDuration = performance.now() - totalUpsertStart; const totalUpsertDuration = performance.now() - totalUpsertStart;
this.logger.info(`Upserted ${repoData.length} repos for connection ${connectionName} (id: ${job.data.connectionId}) in ${totalUpsertDuration}ms`); this.logger.info(`Upserted ${repoData.length} repos for connection ${connectionName} (id: ${job.data.connectionId}) in ${totalUpsertDuration}ms`);

View file

@ -38,32 +38,82 @@ export const repositoryInfoSchema = z.object({
name: z.string(), name: z.string(),
displayName: z.string().optional(), displayName: z.string().optional(),
webUrl: z.string().optional(), webUrl: z.string().optional(),
}) });
// Many of these fields are defined in zoekt/api.go.
export const searchStatsSchema = z.object({
// The actual number of matches returned by the search.
// This will always be less than or equal to `totalMatchCount`.
actualMatchCount: z.number(),
// The total number of matches found during the search.
totalMatchCount: z.number(),
export const searchResponseSchema = z.object({
zoektStats: z.object({
// The duration (in nanoseconds) of the search. // The duration (in nanoseconds) of the search.
duration: z.number(), duration: z.number(),
// Number of files containing a match.
fileCount: z.number(), fileCount: z.number(),
matchCount: z.number(),
// Candidate files whose contents weren't examined because we
// gathered enough matches.
filesSkipped: z.number(), filesSkipped: z.number(),
// Amount of I/O for reading contents.
contentBytesLoaded: z.number(), contentBytesLoaded: z.number(),
// Amount of I/O for reading from index.
indexBytesLoaded: z.number(), indexBytesLoaded: z.number(),
// Number of search shards that had a crash.
crashes: z.number(), crashes: z.number(),
// Number of files in shards that we considered.
shardFilesConsidered: z.number(), shardFilesConsidered: z.number(),
// Files that we evaluated. Equivalent to files for which all
// atom matches (including negations) evaluated to true.
filesConsidered: z.number(), filesConsidered: z.number(),
// Files for which we loaded file content to verify substring matches
filesLoaded: z.number(), filesLoaded: z.number(),
// Shards that we scanned to find matches.
shardsScanned: z.number(), shardsScanned: z.number(),
// Shards that we did not process because a query was canceled.
shardsSkipped: z.number(), shardsSkipped: z.number(),
// Shards that we did not process because the query was rejected by the
// ngram filter indicating it had no matches.
shardsSkippedFilter: z.number(), shardsSkippedFilter: z.number(),
// Number of candidate matches as a result of searching ngrams.
ngramMatches: z.number(), ngramMatches: z.number(),
// NgramLookups is the number of times we accessed an ngram in the index.
ngramLookups: z.number(), ngramLookups: z.number(),
// Wall clock time for queued search.
wait: z.number(), wait: z.number(),
// Aggregate wall clock time spent constructing and pruning the match tree.
// This accounts for time such as lookups in the trigram index.
matchTreeConstruction: z.number(), matchTreeConstruction: z.number(),
// Aggregate wall clock time spent searching the match tree. This accounts
// for the bulk of search work done looking for matches.
matchTreeSearch: z.number(), matchTreeSearch: z.number(),
// Number of times regexp was called on files that we evaluated.
regexpsConsidered: z.number(), regexpsConsidered: z.number(),
// FlushReason explains why results were flushed.
flushReason: z.number(), flushReason: z.number(),
}), });
export const searchResponseSchema = z.object({
stats: searchStatsSchema,
files: z.array(z.object({ files: z.array(z.object({
fileName: z.object({ fileName: z.object({
// The name of the file // The name of the file
@ -90,6 +140,7 @@ export const searchResponseSchema = z.object({
})), })),
repositoryInfo: z.array(repositoryInfoSchema), repositoryInfo: z.array(repositoryInfoSchema),
isBranchFilteringEnabled: z.boolean(), isBranchFilteringEnabled: z.boolean(),
isSearchExhaustive: z.boolean(),
}); });
enum RepoIndexingStatus { enum RepoIndexingStatus {

View file

@ -221,7 +221,7 @@ export const CodePreview = ({
}} }}
title={file.filepath} title={file.filepath}
> >
{file.filepath} <span>{file.filepath}</span>
</span> </span>
</div> </div>

View file

@ -52,7 +52,7 @@ export const Entry = ({
<div className="overflow-hidden flex-1 min-w-0"> <div className="overflow-hidden flex-1 min-w-0">
<Tooltip> <Tooltip>
<TooltipTrigger asChild> <TooltipTrigger asChild>
<p className="overflow-hidden text-ellipsis whitespace-nowrap truncate-start">{displayName}</p> <p className="overflow-hidden text-ellipsis whitespace-nowrap truncate-start"><span>{displayName}</span></p>
</TooltipTrigger> </TooltipTrigger>
<TooltipContent side="right" className="max-w-sm"> <TooltipContent side="right" className="max-w-sm">
<p className="font-mono text-sm break-all whitespace-pre-wrap">{displayName}</p> <p className="font-mono text-sm break-all whitespace-pre-wrap">{displayName}</p>

View file

@ -21,19 +21,21 @@ import { FilterPanel } from "./components/filterPanel";
import { SearchResultsPanel } from "./components/searchResultsPanel"; import { SearchResultsPanel } from "./components/searchResultsPanel";
import { useDomain } from "@/hooks/useDomain"; import { useDomain } from "@/hooks/useDomain";
import { useToast } from "@/components/hooks/use-toast"; import { useToast } from "@/components/hooks/use-toast";
import { RepositoryInfo, SearchResultFile } from "@/features/search/types"; import { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search/types";
import { AnimatedResizableHandle } from "@/components/ui/animatedResizableHandle"; import { AnimatedResizableHandle } from "@/components/ui/animatedResizableHandle";
import { useFilteredMatches } from "./components/filterPanel/useFilterMatches"; import { useFilteredMatches } from "./components/filterPanel/useFilterMatches";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { ImperativePanelHandle } from "react-resizable-panels"; import { ImperativePanelHandle } from "react-resizable-panels";
import { FilterIcon } from "lucide-react"; import { AlertTriangleIcon, BugIcon, FilterIcon } from "lucide-react";
import { useHotkeys } from "react-hotkeys-hook"; import { useHotkeys } from "react-hotkeys-hook";
import { useLocalStorage } from "@uidotdev/usehooks"; import { useLocalStorage } from "@uidotdev/usehooks";
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
import { KeyboardShortcutHint } from "@/app/components/keyboardShortcutHint"; import { KeyboardShortcutHint } from "@/app/components/keyboardShortcutHint";
import { SearchBar } from "../components/searchBar"; import { SearchBar } from "../components/searchBar";
import { CodeSnippet } from "@/app/components/codeSnippet";
import { CopyIconButton } from "../components/copyIconButton";
const DEFAULT_MAX_MATCH_COUNT = 10000; const DEFAULT_MAX_MATCH_COUNT = 500;
export default function SearchPage() { export default function SearchPage() {
// We need a suspense boundary here since we are accessing query params // We need a suspense boundary here since we are accessing query params
@ -58,7 +60,12 @@ const SearchPageInternal = () => {
const _maxMatchCount = parseInt(useNonEmptyQueryParam(SearchQueryParams.matches) ?? `${DEFAULT_MAX_MATCH_COUNT}`); const _maxMatchCount = parseInt(useNonEmptyQueryParam(SearchQueryParams.matches) ?? `${DEFAULT_MAX_MATCH_COUNT}`);
const maxMatchCount = isNaN(_maxMatchCount) ? DEFAULT_MAX_MATCH_COUNT : _maxMatchCount; const maxMatchCount = isNaN(_maxMatchCount) ? DEFAULT_MAX_MATCH_COUNT : _maxMatchCount;
const { data: searchResponse, isLoading: isSearchLoading, error } = useQuery({ const {
data: searchResponse,
isPending: isSearchPending,
isFetching: isFetching,
error
} = useQuery({
queryKey: ["search", searchQuery, maxMatchCount], queryKey: ["search", searchQuery, maxMatchCount],
queryFn: () => measure(() => unwrapServiceError(search({ queryFn: () => measure(() => unwrapServiceError(search({
query: searchQuery, query: searchQuery,
@ -68,12 +75,12 @@ const SearchPageInternal = () => {
}, domain)), "client.search"), }, domain)), "client.search"),
select: ({ data, durationMs }) => ({ select: ({ data, durationMs }) => ({
...data, ...data,
durationMs, totalClientSearchDurationMs: durationMs,
}), }),
enabled: searchQuery.length > 0, enabled: searchQuery.length > 0,
refetchOnWindowFocus: false, refetchOnWindowFocus: false,
retry: false, retry: false,
staleTime: Infinity, staleTime: 0,
}); });
useEffect(() => { useEffect(() => {
@ -109,58 +116,31 @@ const SearchPageInternal = () => {
const fileLanguages = searchResponse.files?.map(file => file.language) || []; const fileLanguages = searchResponse.files?.map(file => file.language) || [];
captureEvent("search_finished", { captureEvent("search_finished", {
durationMs: searchResponse.durationMs, durationMs: searchResponse.totalClientSearchDurationMs,
fileCount: searchResponse.zoektStats.fileCount, fileCount: searchResponse.stats.fileCount,
matchCount: searchResponse.zoektStats.matchCount, matchCount: searchResponse.stats.totalMatchCount,
filesSkipped: searchResponse.zoektStats.filesSkipped, actualMatchCount: searchResponse.stats.actualMatchCount,
contentBytesLoaded: searchResponse.zoektStats.contentBytesLoaded, filesSkipped: searchResponse.stats.filesSkipped,
indexBytesLoaded: searchResponse.zoektStats.indexBytesLoaded, contentBytesLoaded: searchResponse.stats.contentBytesLoaded,
crashes: searchResponse.zoektStats.crashes, indexBytesLoaded: searchResponse.stats.indexBytesLoaded,
shardFilesConsidered: searchResponse.zoektStats.shardFilesConsidered, crashes: searchResponse.stats.crashes,
filesConsidered: searchResponse.zoektStats.filesConsidered, shardFilesConsidered: searchResponse.stats.shardFilesConsidered,
filesLoaded: searchResponse.zoektStats.filesLoaded, filesConsidered: searchResponse.stats.filesConsidered,
shardsScanned: searchResponse.zoektStats.shardsScanned, filesLoaded: searchResponse.stats.filesLoaded,
shardsSkipped: searchResponse.zoektStats.shardsSkipped, shardsScanned: searchResponse.stats.shardsScanned,
shardsSkippedFilter: searchResponse.zoektStats.shardsSkippedFilter, shardsSkipped: searchResponse.stats.shardsSkipped,
ngramMatches: searchResponse.zoektStats.ngramMatches, shardsSkippedFilter: searchResponse.stats.shardsSkippedFilter,
ngramLookups: searchResponse.zoektStats.ngramLookups, ngramMatches: searchResponse.stats.ngramMatches,
wait: searchResponse.zoektStats.wait, ngramLookups: searchResponse.stats.ngramLookups,
matchTreeConstruction: searchResponse.zoektStats.matchTreeConstruction, wait: searchResponse.stats.wait,
matchTreeSearch: searchResponse.zoektStats.matchTreeSearch, matchTreeConstruction: searchResponse.stats.matchTreeConstruction,
regexpsConsidered: searchResponse.zoektStats.regexpsConsidered, matchTreeSearch: searchResponse.stats.matchTreeSearch,
flushReason: searchResponse.zoektStats.flushReason, regexpsConsidered: searchResponse.stats.regexpsConsidered,
flushReason: searchResponse.stats.flushReason,
fileLanguages, fileLanguages,
}); });
}, [captureEvent, searchQuery, searchResponse]); }, [captureEvent, searchQuery, searchResponse]);
const { fileMatches, searchDurationMs, totalMatchCount, isBranchFilteringEnabled, repositoryInfo, matchCount } = useMemo(() => {
if (!searchResponse) {
return {
fileMatches: [],
searchDurationMs: 0,
totalMatchCount: 0,
isBranchFilteringEnabled: false,
repositoryInfo: {},
matchCount: 0,
};
}
return {
fileMatches: searchResponse.files ?? [],
searchDurationMs: Math.round(searchResponse.durationMs),
totalMatchCount: searchResponse.zoektStats.matchCount,
isBranchFilteringEnabled: searchResponse.isBranchFilteringEnabled,
repositoryInfo: searchResponse.repositoryInfo.reduce((acc, repo) => {
acc[repo.id] = repo;
return acc;
}, {} as Record<number, RepositoryInfo>),
matchCount: searchResponse.stats.matchCount,
}
}, [searchResponse]);
const isMoreResultsButtonVisible = useMemo(() => {
return totalMatchCount > maxMatchCount;
}, [totalMatchCount, maxMatchCount]);
const onLoadMoreResults = useCallback(() => { const onLoadMoreResults = useCallback(() => {
const url = createPathWithQueryParams(`/${domain}/search`, const url = createPathWithQueryParams(`/${domain}/search`,
@ -183,20 +163,27 @@ const SearchPageInternal = () => {
/> />
</TopBar> </TopBar>
{(isSearchLoading) ? ( {(isSearchPending || isFetching) ? (
<div className="flex flex-col items-center justify-center h-full gap-2"> <div className="flex flex-col items-center justify-center h-full gap-2">
<SymbolIcon className="h-6 w-6 animate-spin" /> <SymbolIcon className="h-6 w-6 animate-spin" />
<p className="font-semibold text-center">Searching...</p> <p className="font-semibold text-center">Searching...</p>
</div> </div>
) : error ? (
<div className="flex flex-col items-center justify-center h-full gap-2">
<AlertTriangleIcon className="h-6 w-6" />
<p className="font-semibold text-center">Failed to search</p>
<p className="text-sm text-center">{error.message}</p>
</div>
) : ( ) : (
<PanelGroup <PanelGroup
fileMatches={fileMatches} fileMatches={searchResponse.files}
isMoreResultsButtonVisible={isMoreResultsButtonVisible} isMoreResultsButtonVisible={searchResponse.isSearchExhaustive === false}
onLoadMoreResults={onLoadMoreResults} onLoadMoreResults={onLoadMoreResults}
isBranchFilteringEnabled={isBranchFilteringEnabled} isBranchFilteringEnabled={searchResponse.isBranchFilteringEnabled}
repoInfo={repositoryInfo} repoInfo={searchResponse.repositoryInfo}
searchDurationMs={searchDurationMs} searchDurationMs={searchResponse.totalClientSearchDurationMs}
numMatches={matchCount} numMatches={searchResponse.stats.actualMatchCount}
searchStats={searchResponse.stats}
/> />
)} )}
</div> </div>
@ -208,9 +195,10 @@ interface PanelGroupProps {
isMoreResultsButtonVisible?: boolean; isMoreResultsButtonVisible?: boolean;
onLoadMoreResults: () => void; onLoadMoreResults: () => void;
isBranchFilteringEnabled: boolean; isBranchFilteringEnabled: boolean;
repoInfo: Record<number, RepositoryInfo>; repoInfo: RepositoryInfo[];
searchDurationMs: number; searchDurationMs: number;
numMatches: number; numMatches: number;
searchStats?: SearchStats;
} }
const PanelGroup = ({ const PanelGroup = ({
@ -218,9 +206,10 @@ const PanelGroup = ({
isMoreResultsButtonVisible, isMoreResultsButtonVisible,
onLoadMoreResults, onLoadMoreResults,
isBranchFilteringEnabled, isBranchFilteringEnabled,
repoInfo, repoInfo: _repoInfo,
searchDurationMs, searchDurationMs: _searchDurationMs,
numMatches, numMatches,
searchStats,
}: PanelGroupProps) => { }: PanelGroupProps) => {
const [previewedFile, setPreviewedFile] = useState<SearchResultFile | undefined>(undefined); const [previewedFile, setPreviewedFile] = useState<SearchResultFile | undefined>(undefined);
const filteredFileMatches = useFilteredMatches(fileMatches); const filteredFileMatches = useFilteredMatches(fileMatches);
@ -241,6 +230,17 @@ const PanelGroup = ({
description: "Toggle filter panel", description: "Toggle filter panel",
}); });
const searchDurationMs = useMemo(() => {
return Math.round(_searchDurationMs);
}, [_searchDurationMs]);
const repoInfo = useMemo(() => {
return _repoInfo.reduce((acc, repo) => {
acc[repo.id] = repo;
return acc;
}, {} as Record<number, RepositoryInfo>);
}, [_repoInfo]);
return ( return (
<ResizablePanelGroup <ResizablePanelGroup
direction="horizontal" direction="horizontal"
@ -297,7 +297,27 @@ const PanelGroup = ({
order={2} order={2}
> >
<div className="py-1 px-2 flex flex-row items-center"> <div className="py-1 px-2 flex flex-row items-center">
<Tooltip>
<TooltipTrigger asChild>
<InfoCircledIcon className="w-4 h-4 mr-2" /> <InfoCircledIcon className="w-4 h-4 mr-2" />
</TooltipTrigger>
<TooltipContent side="right" className="flex flex-col items-start gap-2 p-4">
<div className="flex flex-row items-center w-full">
<BugIcon className="w-4 h-4 mr-1.5" />
<p className="text-md font-medium">Search stats for nerds</p>
<CopyIconButton
onCopy={() => {
navigator.clipboard.writeText(JSON.stringify(searchStats, null, 2));
return true;
}}
className="ml-auto"
/>
</div>
<CodeSnippet renderNewlines>
{JSON.stringify(searchStats, null, 2)}
</CodeSnippet>
</TooltipContent>
</Tooltip>
{ {
fileMatches.length > 0 ? ( fileMatches.length > 0 ? (
<p className="text-sm font-medium">{`[${searchDurationMs} ms] Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}</p> <p className="text-sm font-medium">{`[${searchDurationMs} ms] Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}</p>

View file

@ -1,12 +1,12 @@
import { cn } from "@/lib/utils" import { cn } from "@/lib/utils"
export const CodeSnippet = ({ children, className, title }: { children: React.ReactNode, className?: string, title?: string }) => { export const CodeSnippet = ({ children, className, title, renderNewlines = false }: { children: React.ReactNode, className?: string, title?: string, renderNewlines?: boolean }) => {
return ( return (
<code <code
className={cn("bg-gray-100 dark:bg-gray-700 w-fit rounded-md px-2 py-0.5 font-medium font-mono", className)} className={cn("bg-gray-100 dark:bg-gray-700 w-fit rounded-md px-2 py-0.5 font-medium font-mono", className)}
title={title} title={title}
> >
{children} {renderNewlines ? <pre>{children}</pre> : children}
</code> </code>
) )
} }

View file

@ -311,6 +311,11 @@
text-overflow: ellipsis; text-overflow: ellipsis;
} }
.truncate-start > * {
direction: ltr;
unicode-bidi: embed;
}
@layer base { @layer base {
* { * {
@apply border-border; @apply border-border;

View file

@ -2,13 +2,10 @@ import { auth } from "@/auth";
import { LoginForm } from "./components/loginForm"; import { LoginForm } from "./components/loginForm";
import { redirect } from "next/navigation"; import { redirect } from "next/navigation";
import { Footer } from "@/app/components/footer"; import { Footer } from "@/app/components/footer";
import { createLogger } from "@sourcebot/logger";
import { getAuthProviders } from "@/lib/authProviders"; import { getAuthProviders } from "@/lib/authProviders";
import { getOrgFromDomain } from "@/data/org"; import { getOrgFromDomain } from "@/data/org";
import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants";
const logger = createLogger('login-page');
interface LoginProps { interface LoginProps {
searchParams: Promise<{ searchParams: Promise<{
callbackUrl?: string; callbackUrl?: string;
@ -18,10 +15,8 @@ interface LoginProps {
export default async function Login(props: LoginProps) { export default async function Login(props: LoginProps) {
const searchParams = await props.searchParams; const searchParams = await props.searchParams;
logger.info("Login page loaded");
const session = await auth(); const session = await auth();
if (session) { if (session) {
logger.info("Session found in login page, redirecting to home");
return redirect("/"); return redirect("/");
} }

View file

@ -15,9 +15,6 @@ export const env = createEnv({
server: { server: {
// Zoekt // Zoekt
ZOEKT_WEBSERVER_URL: z.string().url().default("http://localhost:6070"), ZOEKT_WEBSERVER_URL: z.string().url().default("http://localhost:6070"),
SHARD_MAX_MATCH_COUNT: numberSchema.default(10000),
TOTAL_MAX_MATCH_COUNT: numberSchema.default(100000),
ZOEKT_MAX_WALL_TIME_MS: numberSchema.default(10000),
// Auth // Auth
FORCE_ENABLE_ANONYMOUS_ACCESS: booleanSchema.default('false'), FORCE_ENABLE_ANONYMOUS_ACCESS: booleanSchema.default('false'),

View file

@ -32,7 +32,7 @@ export const FileListItem = ({
pathType: 'blob', pathType: 'blob',
})} })}
> >
{path} <span>{path}</span>
</Link> </Link>
</div> </div>
) )

View file

@ -80,7 +80,7 @@ export const findSearchBasedSymbolDefinitions = async (
const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse) => { const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse) => {
const parser = searchResponseSchema.transform(async ({ files }) => ({ const parser = searchResponseSchema.transform(async ({ files }) => ({
stats: { stats: {
matchCount: searchResult.stats.matchCount, matchCount: searchResult.stats.actualMatchCount,
}, },
files: files.flatMap((file) => { files: files.flatMap((file) => {
const chunks = file.chunks; const chunks = file.chunks;

View file

@ -37,35 +37,82 @@ export const repositoryInfoSchema = z.object({
name: z.string(), name: z.string(),
displayName: z.string().optional(), displayName: z.string().optional(),
webUrl: z.string().optional(), webUrl: z.string().optional(),
}) });
// Many of these fields are defined in zoekt/api.go.
export const searchStatsSchema = z.object({
// The actual number of matches returned by the search.
// This will always be less than or equal to `totalMatchCount`.
actualMatchCount: z.number(),
// The total number of matches found during the search.
totalMatchCount: z.number(),
export const searchResponseSchema = z.object({
zoektStats: z.object({
// The duration (in nanoseconds) of the search. // The duration (in nanoseconds) of the search.
duration: z.number(), duration: z.number(),
// Number of files containing a match.
fileCount: z.number(), fileCount: z.number(),
matchCount: z.number(),
// Candidate files whose contents weren't examined because we
// gathered enough matches.
filesSkipped: z.number(), filesSkipped: z.number(),
// Amount of I/O for reading contents.
contentBytesLoaded: z.number(), contentBytesLoaded: z.number(),
// Amount of I/O for reading from index.
indexBytesLoaded: z.number(), indexBytesLoaded: z.number(),
// Number of search shards that had a crash.
crashes: z.number(), crashes: z.number(),
// Number of files in shards that we considered.
shardFilesConsidered: z.number(), shardFilesConsidered: z.number(),
// Files that we evaluated. Equivalent to files for which all
// atom matches (including negations) evaluated to true.
filesConsidered: z.number(), filesConsidered: z.number(),
// Files for which we loaded file content to verify substring matches
filesLoaded: z.number(), filesLoaded: z.number(),
// Shards that we scanned to find matches.
shardsScanned: z.number(), shardsScanned: z.number(),
// Shards that we did not process because a query was canceled.
shardsSkipped: z.number(), shardsSkipped: z.number(),
// Shards that we did not process because the query was rejected by the
// ngram filter indicating it had no matches.
shardsSkippedFilter: z.number(), shardsSkippedFilter: z.number(),
// Number of candidate matches as a result of searching ngrams.
ngramMatches: z.number(), ngramMatches: z.number(),
// NgramLookups is the number of times we accessed an ngram in the index.
ngramLookups: z.number(), ngramLookups: z.number(),
// Wall clock time for queued search.
wait: z.number(), wait: z.number(),
// Aggregate wall clock time spent constructing and pruning the match tree.
// This accounts for time such as lookups in the trigram index.
matchTreeConstruction: z.number(), matchTreeConstruction: z.number(),
// Aggregate wall clock time spent searching the match tree. This accounts
// for the bulk of search work done looking for matches.
matchTreeSearch: z.number(), matchTreeSearch: z.number(),
// Number of times regexp was called on files that we evaluated.
regexpsConsidered: z.number(), regexpsConsidered: z.number(),
// FlushReason explains why results were flushed.
flushReason: z.number(), flushReason: z.number(),
}), });
stats: z.object({
matchCount: z.number(), export const searchResponseSchema = z.object({
}), stats: searchStatsSchema,
files: z.array(z.object({ files: z.array(z.object({
fileName: z.object({ fileName: z.object({
// The name of the file // The name of the file
@ -92,6 +139,7 @@ export const searchResponseSchema = z.object({
})), })),
repositoryInfo: z.array(repositoryInfoSchema), repositoryInfo: z.array(repositoryInfoSchema),
isBranchFilteringEnabled: z.boolean(), isBranchFilteringEnabled: z.boolean(),
isSearchExhaustive: z.boolean(),
}); });
export const fileSourceRequestSchema = z.object({ export const fileSourceRequestSchema = z.object({

View file

@ -151,12 +151,48 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892 // @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892
opts: { opts: {
ChunkMatches: true, ChunkMatches: true,
// @note: Zoekt has several different ways to limit a given search. The two that
// we care about are `MaxMatchDisplayCount` and `TotalMaxMatchCount`:
// - `MaxMatchDisplayCount` truncates the number of matches AFTER performing
// a search (specifically, after collating and sorting the results). The number of
// results returned by the API will be less than or equal to this value.
//
// - `TotalMaxMatchCount` truncates the number of matches DURING a search. The results
// returned by the API the API can be less than, equal to, or greater than this value.
// Why greater? Because this value is compared _after_ a given shard has finished
// being processed, the number of matches returned by the last shard may have exceeded
// this value.
//
// Let's define two variables:
// - `actualMatchCount` : The number of matches that are returned by the API. This is
// always less than or equal to `MaxMatchDisplayCount`.
// - `totalMatchCount` : The number of matches that zoekt found before it either
// 1) found all matches or 2) hit the `TotalMaxMatchCount` limit. This number is
// not bounded and can be less than, equal to, or greater than both `TotalMaxMatchCount`
// and `MaxMatchDisplayCount`.
//
//
// Our challenge is to determine whether or not the search returned all possible matches/
// (it was exaustive) or if it was truncated. By setting the `TotalMaxMatchCount` to
// `MaxMatchDisplayCount + 1`, we can determine which of these occurred by comparing
// `totalMatchCount` to `MaxMatchDisplayCount`.
//
// if (totalMatchCount ≤ actualMatchCount):
// Search is EXHAUSTIVE (found all possible matches)
// Proof: totalMatchCount ≤ MaxMatchDisplayCount < TotalMaxMatchCount
// Therefore Zoekt stopped naturally, not due to limit
//
// if (totalMatchCount > actualMatchCount):
// Search is TRUNCATED (more matches exist)
// Proof: totalMatchCount > MaxMatchDisplayCount + 1 = TotalMaxMatchCount
// Therefore Zoekt hit the limit and stopped searching
//
MaxMatchDisplayCount: matches, MaxMatchDisplayCount: matches,
TotalMaxMatchCount: matches + 1,
NumContextLines: contextLines, NumContextLines: contextLines,
Whole: !!whole, Whole: !!whole,
TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT, ShardMaxMatchCount: -1,
ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT, MaxWallTime: 0, // zoekt expects a duration in nanoseconds
MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds
} }
}); });
@ -296,11 +332,35 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
} }
}).filter((file) => file !== undefined) ?? []; }).filter((file) => file !== undefined) ?? [];
const actualMatchCount = files.reduce(
(acc, file) =>
// Match count is the sum of the number of chunk matches and file name matches.
acc + file.chunks.reduce(
(acc, chunk) => acc + chunk.matchRanges.length,
0,
) + file.fileName.matchRanges.length,
0,
);
const totalMatchCount = Result.MatchCount;
const isSearchExhaustive = totalMatchCount <= actualMatchCount;
return { return {
zoektStats: { files,
repositoryInfo: Array.from(repos.values()).map((repo) => ({
id: repo.id,
codeHostType: repo.external_codeHostType,
name: repo.name,
displayName: repo.displayName ?? undefined,
webUrl: repo.webUrl ?? undefined,
})),
isBranchFilteringEnabled,
isSearchExhaustive,
stats: {
actualMatchCount,
totalMatchCount,
duration: Result.Duration, duration: Result.Duration,
fileCount: Result.FileCount, fileCount: Result.FileCount,
matchCount: Result.MatchCount,
filesSkipped: Result.FilesSkipped, filesSkipped: Result.FilesSkipped,
contentBytesLoaded: Result.ContentBytesLoaded, contentBytesLoaded: Result.ContentBytesLoaded,
indexBytesLoaded: Result.IndexBytesLoaded, indexBytesLoaded: Result.IndexBytesLoaded,
@ -318,25 +378,6 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
matchTreeSearch: Result.MatchTreeSearch, matchTreeSearch: Result.MatchTreeSearch,
regexpsConsidered: Result.RegexpsConsidered, regexpsConsidered: Result.RegexpsConsidered,
flushReason: Result.FlushReason, flushReason: Result.FlushReason,
},
files,
repositoryInfo: Array.from(repos.values()).map((repo) => ({
id: repo.id,
codeHostType: repo.external_codeHostType,
name: repo.name,
displayName: repo.displayName ?? undefined,
webUrl: repo.webUrl ?? undefined,
})),
isBranchFilteringEnabled: isBranchFilteringEnabled,
stats: {
matchCount: files.reduce(
(acc, file) =>
acc + file.chunks.reduce(
(acc, chunk) => acc + chunk.matchRanges.length,
0,
),
0,
)
} }
} satisfies SearchResponse; } satisfies SearchResponse;
}); });

View file

@ -8,6 +8,7 @@ import {
fileSourceRequestSchema, fileSourceRequestSchema,
symbolSchema, symbolSchema,
repositoryInfoSchema, repositoryInfoSchema,
searchStatsSchema,
} from "./schemas"; } from "./schemas";
import { z } from "zod"; import { z } from "zod";
@ -23,3 +24,4 @@ export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;
export type RepositoryInfo = z.infer<typeof repositoryInfoSchema>; export type RepositoryInfo = z.infer<typeof repositoryInfoSchema>;
export type SourceRange = z.infer<typeof rangeSchema>; export type SourceRange = z.infer<typeof rangeSchema>;
export type SearchStats = z.infer<typeof searchStatsSchema>;

View file

@ -15,6 +15,7 @@ export type PosthogEventMap = {
shardsSkipped: number, shardsSkipped: number,
shardsSkippedFilter: number, shardsSkippedFilter: number,
matchCount: number, matchCount: number,
actualMatchCount: number,
ngramMatches: number, ngramMatches: number,
ngramLookups: number, ngramLookups: number,
wait: number, wait: number,