Merge 7341a49407 into c3fae1aaab

feat(web): Improved search performance on unbounded searches (#555 )
update demo deploy cadence (#556 )
2025-12-11 20:05:25 +00:00 · 2025-10-13 14:47:56 -05:00 · 2025-10-07 23:55:36 -07:00 · 2025-10-07 23:51:52 -07:00 · 2025-10-07 16:38:56 -07:00 · 2025-10-07 10:21:19 -07:00
29 changed files with 373 additions and 180 deletions
--- a/.env.development
+++ b/.env.development
@ -4,8 +4,6 @@ DATABASE_URL="postgresql://postgres:postgres@localhost:5432/postgres"

 # Zoekt
 ZOEKT_WEBSERVER_URL="http://localhost:6070"
-# SHARD_MAX_MATCH_COUNT=10000
-# TOTAL_MAX_MATCH_COUNT=100000
 # The command to use for generating ctags.
 CTAGS_COMMAND=ctags
 # logging, strict
--- a/.github/workflows/deploy-demo.yml
+++ b/.github/workflows/deploy-demo.yml
@ -2,7 +2,7 @@ name: Deploy Demo

 on:
  push:
-    branches: ["main"]
+    tags: ["v*.*.*"]
  workflow_dispatch:

 jobs:
--- a/.github/workflows/ghcr-publish.yml
+++ b/.github/workflows/ghcr-publish.yml
@ -27,9 +27,9 @@ jobs:
        platform: [linux/amd64, linux/arm64]
        include:
          - platform: linux/amd64
-            runs-on: ubuntu-latest
+            runs-on: blacksmith-4vcpu-ubuntu-2404
          - platform: linux/arm64
-            runs-on: ubuntu-24.04-arm
+            runs-on: blacksmith-8vcpu-ubuntu-2204-arm

    steps:
      - name: Prepare
@ -57,8 +57,8 @@ jobs:
        with:
          cosign-release: "v2.2.4"

-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+      - name: Setup Blacksmith Builder
+        uses: useblacksmith/setup-docker-builder@v1

      - name: Login to GitHub Packages Docker Registry
        uses: docker/login-action@v3
@ -69,12 +69,10 @@ jobs:

      - name: Build Docker image
        id: build
-        uses: docker/build-push-action@v6
+        uses: useblacksmith/build-push-action@v2
        with:
          context: .
          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
          platforms: ${{ matrix.platform }}
          outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true,annotation.org.opencontainers.image.description=Blazingly fast code search
          build-args: |
@ -110,7 +108,7 @@ jobs:
        run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}
  
  merge:
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
    permissions:
      packages: write
    needs:
@ -123,8 +121,8 @@ jobs:
          pattern: digests-*
          merge-multiple: true
      
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+      - name: Setup Blacksmith Builder
+        uses: useblacksmith/setup-docker-builder@v1
      
      - name: Extract Docker metadata
        id: meta
--- a/.github/workflows/pr-gate.yml
+++ b/.github/workflows/pr-gate.yml
@ -8,7 +8,7 @@ on:

 jobs:
  build:
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
    permissions:
      contents: read
    steps:
@ -19,6 +19,6 @@ jobs:

      - name: Build Docker image
        id: build
-        uses: docker/build-push-action@v6
+        uses: useblacksmith/build-push-action@v2
        with:
          context: .
--- a/.github/workflows/test-backend.yml
+++ b/.github/workflows/test-backend.yml
@ -7,7 +7,7 @@ on:

 jobs:
  build:
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
    permissions:
      contents: read
    steps:
--- a/.github/workflows/test-web.yml
+++ b/.github/workflows/test-web.yml
@ -7,7 +7,7 @@ on:

 jobs:
  build:
-    runs-on: ubuntu-latest
+    runs-on: blacksmith-4vcpu-ubuntu-2404
    permissions:
      contents: read
    steps:
--- a/.vscode/sourcebot.code-workspace
+++ b/.vscode/sourcebot.code-workspace
@ -5,6 +5,9 @@
 		},
 		{
 			"path": "../vendor/zoekt"
+		},
+		{
+			"path": "../../sourcebot-helm-chart"
 		}
 	],
 	"settings": {
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]

+<!-- @NOTE: On next release, please bump the MCP pacakge as there are breaking changes in this! -->
+
+### Fixed
+- Fixed "dubious ownership" errors when cloning / fetching repos. [#553](https://github.com/sourcebot-dev/sourcebot/pull/553)
+
+### Changed
+- Remove spam "login page loaded" log. [#552](https://github.com/sourcebot-dev/sourcebot/pull/552)
+- Improved search performance for unbounded search queries. [#555](https://github.com/sourcebot-dev/sourcebot/pull/555)
+
+### Added
+- Added support for passing db connection url as seperate `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` env vars. [#545](https://github.com/sourcebot-dev/sourcebot/pull/545)
+
 ## [4.7.3] - 2025-09-29

 ### Fixed
--- a/4
+++ b/4
@ -185,7 +185,6 @@ ENV DATA_DIR=/data
 ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot
 ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db
 ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis
-ENV DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
 ENV REDIS_URL="redis://localhost:6379"
 ENV SRC_TENANT_ENFORCEMENT_MODE=strict
 ENV SOURCEBOT_PUBLIC_KEY_PATH=/app/public.pem
@ -234,6 +233,9 @@ COPY --from=shared-libs-builder /app/packages/shared ./packages/shared
 # Configure dependencies
 RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib openssl util-linux unzip

+# Fixes git "dubious ownership" issues when the volume is mounted with different permissions to the container.
+RUN git config --global safe.directory "*"
+
 # Configure the database
 RUN mkdir -p /run/postgresql && \
    chown -R postgres:postgres /run/postgresql && \
--- a/README.md
+++ b/README.md
@ -128,4 +128,3 @@ To configure Sourcebot (index your own repos, connect your LLMs, etc), check out
 > Building from source is only required if you'd like to contribute. If you'd just like to use Sourcebot, we recommend checking out our self-hosting [docs](https://docs.sourcebot.dev/self-hosting/overview).

 If you'd like to build from source, please checkout the `CONTRIBUTING.md` file for more information.
-
--- a/docs/docs.json
+++ b/docs/docs.json
@ -110,6 +110,11 @@
                "href": "https://sourcebot.dev/changelog",
                "icon": "list-check"
            },
+            {
+                "anchor": "Roadmap",
+                "href": "https://github.com/sourcebot-dev/sourcebot/issues/459",
+                "icon": "map"
+            },
            {
                "anchor": "Support",
                "href": "https://github.com/sourcebot-dev/sourcebot/issues/new?template=get_help.md",
--- a/docs/docs/configuration/environment-variables.mdx
+++ b/docs/docs/configuration/environment-variables.mdx
@ -19,7 +19,7 @@ The following environment variables allow you to configure your Sourcebot deploy
 | `DATA_CACHE_DIR` | `$DATA_DIR/.sourcebot` | <p>The root data directory in which all data written to disk by Sourcebot will be located.</p> |
 | `DATA_DIR` | `/data` | <p>The directory within the container to store all persistent data. Typically, this directory will be volume mapped such that data is persisted across container restarts (e.g., `docker run -v $(pwd):/data`)</p> |
 | `DATABASE_DATA_DIR` | `$DATA_CACHE_DIR/db` | <p>The data directory for the default Postgres database.</p> |
-| `DATABASE_URL` | `postgresql://postgres@ localhost:5432/sourcebot` | <p>Connection string of your Postgres database. By default, a Postgres database is automatically provisioned at startup within the container.</p><p>If you'd like to use a non-default schema, you can provide it as a parameter in the database url </p> |
+| `DATABASE_URL` | `postgresql://postgres@ localhost:5432/sourcebot` | <p>Connection string of your Postgres database. By default, a Postgres database is automatically provisioned at startup within the container.</p><p>If you'd like to use a non-default schema, you can provide it as a parameter in the database url.</p><p>You can also use `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` to construct the database url.</p> |
 | `EMAIL_FROM_ADDRESS` | `-` | <p>The email address that transactional emails will be sent from. See [this doc](/docs/configuration/transactional-emails) for more info.</p> | 
 | `FORCE_ENABLE_ANONYMOUS_ACCESS` | `false` | <p>When enabled, [anonymous access](/docs/configuration/auth/access-settings#anonymous-access) to the organization will always be enabled</p>
 | `REDIS_DATA_DIR` | `$DATA_CACHE_DIR/redis` | <p>The data directory for the default Redis instance.</p> |
@ -28,7 +28,6 @@ The following environment variables allow you to configure your Sourcebot deploy
 | `REDIS_REMOVE_ON_FAIL` | `100` | <p>Controls how many failed jobs are allowed to remain in Redis queues</p> |
 | `REPO_SYNC_RETRY_BASE_SLEEP_SECONDS` | `60` | <p>The base sleep duration (in seconds) for exponential backoff when retrying repository sync operations that fail</p> |
 | `GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS` | `600` | <p>The timeout duration (in seconds) for GitLab client queries</p> |
-| `SHARD_MAX_MATCH_COUNT` | `10000` | <p>The maximum shard count per query</p> |
 | `SMTP_CONNECTION_URL` | `-` | <p>The url to the SMTP service used for sending transactional emails. See [this doc](/docs/configuration/transactional-emails) for more info.</p> |
 | `SOURCEBOT_ENCRYPTION_KEY` | Automatically generated at startup if no value is provided. Generated using `openssl rand -base64 24` | <p>Used to encrypt connection secrets and generate API keys.</p> |
 | `SOURCEBOT_PUBLIC_KEY_PATH` | `/app/public.pem` | <p>Sourcebot's public key that's used to verify encrypted license key signatures.</p> |
@ -36,8 +35,6 @@ The following environment variables allow you to configure your Sourcebot deploy
 | `SOURCEBOT_STRUCTURED_LOGGING_ENABLED` | `false` | <p>Enables/disable structured JSON logging. See [this doc](/docs/configuration/structured-logging) for more info.</p> |
 | `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - | <p>Optional file to log to if structured logging is enabled</p> | 
 | `SOURCEBOT_TELEMETRY_DISABLED` | `false` | <p>Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.</p> |
-| `TOTAL_MAX_MATCH_COUNT` | `100000` | <p>The maximum number of matches per query</p> |
-| `ZOEKT_MAX_WALL_TIME_MS` | `10000` | <p>The maximum real world duration (in milliseconds) per zoekt query</p> | 

 ### Enterprise Environment Variables
 | Variable | Default | Description |
--- a/docs/docs/overview.mdx
+++ b/docs/docs/overview.mdx
@ -2,10 +2,11 @@
 title: "Overview"
 ---

-[Sourcebot](https://github.com/sourcebot-dev/sourcebot) is a self-hosted tool that helps you understand your codebase. 
+[Sourcebot](https://github.com/sourcebot-dev/sourcebot) is a platform that helps humans and agents understand your codebase:

 - [Code search](/docs/features/search/overview): Search and navigate across all your repos and branches, no matter where they’re hosted
 - [Ask Sourcebot](/docs/features/ask): Ask questions about your codebase and have Sourcebot provide detailed answers grounded with inline citations
+- [MCP](/docs/features/mcp-server): Enrich agent context windows with code across your organization

 <CardGroup>
 <Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true">
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -1,6 +1,24 @@
 #!/bin/sh
 set -e

+# Check if DATABASE_URL is not set
+if [ -z "$DATABASE_URL" ]; then
+    # Check if the individual database variables are set and construct the URL
+    if [ -n "$DATABASE_HOST" ] && [ -n "$DATABASE_USERNAME" ] && [ -n "$DATABASE_PASSWORD" ]  && [ -n "$DATABASE_NAME" ]; then
+        DATABASE_URL="postgresql://${DATABASE_USERNAME}:${DATABASE_PASSWORD}@${DATABASE_HOST}/${DATABASE_NAME}"
+
+        if [ -n "$DATABASE_ARGS" ]; then
+            DATABASE_URL="${DATABASE_URL}?$DATABASE_ARGS"
+        fi
+
+        export DATABASE_URL
+    else
+        # Otherwise, fallback to a default value
+        DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
+        export DATABASE_URL
+    fi
+fi
+
 if [ "$DATABASE_URL" = "postgresql://postgres@localhost:5432/sourcebot" ]; then
    DATABASE_EMBEDDED="true"
 fi
--- a/packages/backend/src/connectionManager.ts
+++ b/packages/backend/src/connectionManager.ts
@ -250,7 +250,7 @@ export class ConnectionManager {
                    create: repo,
                })
                const upsertDuration = performance.now() - upsertStart;
-                this.logger.info(`Upserted repo ${repo.displayName} (id: ${repo.external_id}) in ${upsertDuration}ms`);
+                this.logger.debug(`Upserted repo ${repo.displayName} (id: ${repo.external_id}) in ${upsertDuration}ms`);
            }
            const totalUpsertDuration = performance.now() - totalUpsertStart;
            this.logger.info(`Upserted ${repoData.length} repos for connection ${connectionName} (id: ${job.data.connectionId}) in ${totalUpsertDuration}ms`);
--- a/packages/mcp/src/schemas.ts
+++ b/packages/mcp/src/schemas.ts
@ -38,32 +38,82 @@ export const repositoryInfoSchema = z.object({
    name: z.string(),
    displayName: z.string().optional(),
    webUrl: z.string().optional(),
-})
+});
+
+// Many of these fields are defined in zoekt/api.go.
+export const searchStatsSchema = z.object({
+    // The actual number of matches returned by the search.
+    // This will always be less than or equal to `totalMatchCount`.
+    actualMatchCount: z.number(),
+
+    // The total number of matches found during the search.
+    totalMatchCount: z.number(),

-export const searchResponseSchema = z.object({
-    zoektStats: z.object({
    // The duration (in nanoseconds) of the search.
    duration: z.number(),
+
+    // Number of files containing a match.
    fileCount: z.number(),
-        matchCount: z.number(),
+
+    // Candidate files whose contents weren't examined because we
+    // gathered enough matches.
    filesSkipped: z.number(),
+
+    // Amount of I/O for reading contents.
    contentBytesLoaded: z.number(),
+
+    // Amount of I/O for reading from index.
    indexBytesLoaded: z.number(),
+
+    // Number of search shards that had a crash.
    crashes: z.number(),
+
+    // Number of files in shards that we considered.
    shardFilesConsidered: z.number(),
+
+    // Files that we evaluated. Equivalent to files for which all
+    // atom matches (including negations) evaluated to true.
    filesConsidered: z.number(),
+
+    // Files for which we loaded file content to verify substring matches
    filesLoaded: z.number(),
+
+    // Shards that we scanned to find matches.
    shardsScanned: z.number(),
+
+    // Shards that we did not process because a query was canceled.
    shardsSkipped: z.number(),
+
+    // Shards that we did not process because the query was rejected by the
+    // ngram filter indicating it had no matches.
    shardsSkippedFilter: z.number(),
+
+    // Number of candidate matches as a result of searching ngrams.
    ngramMatches: z.number(),
+
+    // NgramLookups is the number of times we accessed an ngram in the index.
    ngramLookups: z.number(),
+
+    // Wall clock time for queued search.
    wait: z.number(),
+
+    // Aggregate wall clock time spent constructing and pruning the match tree.
+    // This accounts for time such as lookups in the trigram index.
    matchTreeConstruction: z.number(),
+
+    // Aggregate wall clock time spent searching the match tree. This accounts
+    // for the bulk of search work done looking for matches.
    matchTreeSearch: z.number(),
+
+    // Number of times regexp was called on files that we evaluated.
    regexpsConsidered: z.number(),
+
+    // FlushReason explains why results were flushed.
    flushReason: z.number(),
-    }),
+});
+
+export const searchResponseSchema = z.object({
+    stats: searchStatsSchema,
    files: z.array(z.object({
        fileName: z.object({
            // The name of the file
@ -90,6 +140,7 @@ export const searchResponseSchema = z.object({
    })),
    repositoryInfo: z.array(repositoryInfoSchema),
    isBranchFilteringEnabled: z.boolean(),
+    isSearchExhaustive: z.boolean(),
 });

 enum RepoIndexingStatus {
--- a/packages/web/src/app/[domain]/search/components/codePreviewPanel/codePreview.tsx
+++ b/packages/web/src/app/[domain]/search/components/codePreviewPanel/codePreview.tsx
@ -221,7 +221,7 @@ export const CodePreview = ({
                        }}
                        title={file.filepath}
                    >
-                        {file.filepath}
+                        <span>{file.filepath}</span>
                    </span>
                </div>

--- a/packages/web/src/app/[domain]/search/components/filterPanel/entry.tsx
+++ b/packages/web/src/app/[domain]/search/components/filterPanel/entry.tsx
@ -52,7 +52,7 @@ export const Entry = ({
                <div className="overflow-hidden flex-1 min-w-0">
                    <Tooltip>
                        <TooltipTrigger asChild>
-                            <p className="overflow-hidden text-ellipsis whitespace-nowrap truncate-start">{displayName}</p>
+                            <p className="overflow-hidden text-ellipsis whitespace-nowrap truncate-start"><span>{displayName}</span></p>
                        </TooltipTrigger>
                        <TooltipContent side="right" className="max-w-sm">
                            <p className="font-mono text-sm break-all whitespace-pre-wrap">{displayName}</p>
--- a/packages/web/src/app/[domain]/search/page.tsx
+++ b/packages/web/src/app/[domain]/search/page.tsx
@ -21,19 +21,21 @@ import { FilterPanel } from "./components/filterPanel";
 import { SearchResultsPanel } from "./components/searchResultsPanel";
 import { useDomain } from "@/hooks/useDomain";
 import { useToast } from "@/components/hooks/use-toast";
-import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
+import { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search/types";
 import { AnimatedResizableHandle } from "@/components/ui/animatedResizableHandle";
 import { useFilteredMatches } from "./components/filterPanel/useFilterMatches";
 import { Button } from "@/components/ui/button";
 import { ImperativePanelHandle } from "react-resizable-panels";
-import { FilterIcon } from "lucide-react";
+import { AlertTriangleIcon, BugIcon, FilterIcon } from "lucide-react";
 import { useHotkeys } from "react-hotkeys-hook";
 import { useLocalStorage } from "@uidotdev/usehooks";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import { KeyboardShortcutHint } from "@/app/components/keyboardShortcutHint";
 import { SearchBar } from "../components/searchBar";
+import { CodeSnippet } from "@/app/components/codeSnippet";
+import { CopyIconButton } from "../components/copyIconButton";

-const DEFAULT_MAX_MATCH_COUNT = 10000;
+const DEFAULT_MAX_MATCH_COUNT = 500;

 export default function SearchPage() {
    // We need a suspense boundary here since we are accessing query params
@ -58,7 +60,12 @@ const SearchPageInternal = () => {
    const _maxMatchCount = parseInt(useNonEmptyQueryParam(SearchQueryParams.matches) ?? `${DEFAULT_MAX_MATCH_COUNT}`);
    const maxMatchCount = isNaN(_maxMatchCount) ? DEFAULT_MAX_MATCH_COUNT : _maxMatchCount;

-    const { data: searchResponse, isLoading: isSearchLoading, error } = useQuery({
+    const {
+        data: searchResponse,
+        isPending: isSearchPending,
+        isFetching: isFetching,
+        error
+    } = useQuery({
        queryKey: ["search", searchQuery, maxMatchCount],
        queryFn: () => measure(() => unwrapServiceError(search({
            query: searchQuery,
@ -68,12 +75,12 @@ const SearchPageInternal = () => {
        }, domain)), "client.search"),
        select: ({ data, durationMs }) => ({
            ...data,
-            durationMs,
+            totalClientSearchDurationMs: durationMs,
        }),
        enabled: searchQuery.length > 0,
        refetchOnWindowFocus: false,
        retry: false,
-        staleTime: Infinity,
+        staleTime: 0,
    });

    useEffect(() => {
@ -109,58 +116,31 @@ const SearchPageInternal = () => {
        const fileLanguages = searchResponse.files?.map(file => file.language) || [];

        captureEvent("search_finished", {
-            durationMs: searchResponse.durationMs,
-            fileCount: searchResponse.zoektStats.fileCount,
-            matchCount: searchResponse.zoektStats.matchCount,
-            filesSkipped: searchResponse.zoektStats.filesSkipped,
-            contentBytesLoaded: searchResponse.zoektStats.contentBytesLoaded,
-            indexBytesLoaded: searchResponse.zoektStats.indexBytesLoaded,
-            crashes: searchResponse.zoektStats.crashes,
-            shardFilesConsidered: searchResponse.zoektStats.shardFilesConsidered,
-            filesConsidered: searchResponse.zoektStats.filesConsidered,
-            filesLoaded: searchResponse.zoektStats.filesLoaded,
-            shardsScanned: searchResponse.zoektStats.shardsScanned,
-            shardsSkipped: searchResponse.zoektStats.shardsSkipped,
-            shardsSkippedFilter: searchResponse.zoektStats.shardsSkippedFilter,
-            ngramMatches: searchResponse.zoektStats.ngramMatches,
-            ngramLookups: searchResponse.zoektStats.ngramLookups,
-            wait: searchResponse.zoektStats.wait,
-            matchTreeConstruction: searchResponse.zoektStats.matchTreeConstruction,
-            matchTreeSearch: searchResponse.zoektStats.matchTreeSearch,
-            regexpsConsidered: searchResponse.zoektStats.regexpsConsidered,
-            flushReason: searchResponse.zoektStats.flushReason,
+            durationMs: searchResponse.totalClientSearchDurationMs,
+            fileCount: searchResponse.stats.fileCount,
+            matchCount: searchResponse.stats.totalMatchCount,
+            actualMatchCount: searchResponse.stats.actualMatchCount,
+            filesSkipped: searchResponse.stats.filesSkipped,
+            contentBytesLoaded: searchResponse.stats.contentBytesLoaded,
+            indexBytesLoaded: searchResponse.stats.indexBytesLoaded,
+            crashes: searchResponse.stats.crashes,
+            shardFilesConsidered: searchResponse.stats.shardFilesConsidered,
+            filesConsidered: searchResponse.stats.filesConsidered,
+            filesLoaded: searchResponse.stats.filesLoaded,
+            shardsScanned: searchResponse.stats.shardsScanned,
+            shardsSkipped: searchResponse.stats.shardsSkipped,
+            shardsSkippedFilter: searchResponse.stats.shardsSkippedFilter,
+            ngramMatches: searchResponse.stats.ngramMatches,
+            ngramLookups: searchResponse.stats.ngramLookups,
+            wait: searchResponse.stats.wait,
+            matchTreeConstruction: searchResponse.stats.matchTreeConstruction,
+            matchTreeSearch: searchResponse.stats.matchTreeSearch,
+            regexpsConsidered: searchResponse.stats.regexpsConsidered,
+            flushReason: searchResponse.stats.flushReason,
            fileLanguages,
        });
    }, [captureEvent, searchQuery, searchResponse]);

-    const { fileMatches, searchDurationMs, totalMatchCount, isBranchFilteringEnabled, repositoryInfo, matchCount } = useMemo(() => {
-        if (!searchResponse) {
-            return {
-                fileMatches: [],
-                searchDurationMs: 0,
-                totalMatchCount: 0,
-                isBranchFilteringEnabled: false,
-                repositoryInfo: {},
-                matchCount: 0,
-            };
-        }
-
-        return {
-            fileMatches: searchResponse.files ?? [],
-            searchDurationMs: Math.round(searchResponse.durationMs),
-            totalMatchCount: searchResponse.zoektStats.matchCount,
-            isBranchFilteringEnabled: searchResponse.isBranchFilteringEnabled,
-            repositoryInfo: searchResponse.repositoryInfo.reduce((acc, repo) => {
-                acc[repo.id] = repo;
-                return acc;
-            }, {} as Record<number, RepositoryInfo>),
-            matchCount: searchResponse.stats.matchCount,
-        }
-    }, [searchResponse]);
-
-    const isMoreResultsButtonVisible = useMemo(() => {
-        return totalMatchCount > maxMatchCount;
-    }, [totalMatchCount, maxMatchCount]);

    const onLoadMoreResults = useCallback(() => {
        const url = createPathWithQueryParams(`/${domain}/search`,
@ -183,20 +163,27 @@ const SearchPageInternal = () => {
                />
            </TopBar>

-            {(isSearchLoading) ? (
+            {(isSearchPending || isFetching) ? (
                <div className="flex flex-col items-center justify-center h-full gap-2">
                    <SymbolIcon className="h-6 w-6 animate-spin" />
                    <p className="font-semibold text-center">Searching...</p>
                </div>
+            ) : error ? (
+                <div className="flex flex-col items-center justify-center h-full gap-2">
+                    <AlertTriangleIcon className="h-6 w-6" />
+                    <p className="font-semibold text-center">Failed to search</p>
+                    <p className="text-sm text-center">{error.message}</p>
+                </div>
            ) : (
                <PanelGroup
-                    fileMatches={fileMatches}
-                    isMoreResultsButtonVisible={isMoreResultsButtonVisible}
+                    fileMatches={searchResponse.files}
+                    isMoreResultsButtonVisible={searchResponse.isSearchExhaustive === false}
                    onLoadMoreResults={onLoadMoreResults}
-                    isBranchFilteringEnabled={isBranchFilteringEnabled}
-                    repoInfo={repositoryInfo}
-                    searchDurationMs={searchDurationMs}
-                    numMatches={matchCount}
+                    isBranchFilteringEnabled={searchResponse.isBranchFilteringEnabled}
+                    repoInfo={searchResponse.repositoryInfo}
+                    searchDurationMs={searchResponse.totalClientSearchDurationMs}
+                    numMatches={searchResponse.stats.actualMatchCount}
+                    searchStats={searchResponse.stats}
                />
            )}
        </div>
@ -208,9 +195,10 @@ interface PanelGroupProps {
    isMoreResultsButtonVisible?: boolean;
    onLoadMoreResults: () => void;
    isBranchFilteringEnabled: boolean;
-    repoInfo: Record<number, RepositoryInfo>;
+    repoInfo: RepositoryInfo[];
    searchDurationMs: number;
    numMatches: number;
+    searchStats?: SearchStats;
 }

 const PanelGroup = ({
@ -218,9 +206,10 @@ const PanelGroup = ({
    isMoreResultsButtonVisible,
    onLoadMoreResults,
    isBranchFilteringEnabled,
-    repoInfo,
-    searchDurationMs,
+    repoInfo: _repoInfo,
+    searchDurationMs: _searchDurationMs,
    numMatches,
+    searchStats,
 }: PanelGroupProps) => {
    const [previewedFile, setPreviewedFile] = useState<SearchResultFile | undefined>(undefined);
    const filteredFileMatches = useFilteredMatches(fileMatches);
@ -241,6 +230,17 @@ const PanelGroup = ({
        description: "Toggle filter panel",
    });

+    const searchDurationMs = useMemo(() => {
+        return Math.round(_searchDurationMs);
+    }, [_searchDurationMs]);
+
+    const repoInfo = useMemo(() => {
+        return _repoInfo.reduce((acc, repo) => {
+            acc[repo.id] = repo;
+            return acc;
+        }, {} as Record<number, RepositoryInfo>);
+    }, [_repoInfo]);
+
    return (
        <ResizablePanelGroup
            direction="horizontal"
@ -297,7 +297,27 @@ const PanelGroup = ({
                order={2}
            >
                <div className="py-1 px-2 flex flex-row items-center">
+                    <Tooltip>
+                        <TooltipTrigger asChild>
                            <InfoCircledIcon className="w-4 h-4 mr-2" />
+                        </TooltipTrigger>
+                        <TooltipContent side="right" className="flex flex-col items-start gap-2 p-4">
+                            <div className="flex flex-row items-center w-full">
+                                <BugIcon className="w-4 h-4 mr-1.5" />
+                                <p className="text-md font-medium">Search stats for nerds</p>
+                                <CopyIconButton
+                                    onCopy={() => {
+                                        navigator.clipboard.writeText(JSON.stringify(searchStats, null, 2));
+                                        return true;
+                                    }}
+                                    className="ml-auto"
+                                />
+                            </div>
+                            <CodeSnippet renderNewlines>
+                                {JSON.stringify(searchStats, null, 2)}
+                            </CodeSnippet>
+                        </TooltipContent>
+                    </Tooltip>
                    {
                        fileMatches.length > 0 ? (
                            <p className="text-sm font-medium">{`[${searchDurationMs} ms] Found ${numMatches} matches in ${fileMatches.length} ${fileMatches.length > 1 ? 'files' : 'file'}`}</p>
--- a/packages/web/src/app/components/codeSnippet.tsx
+++ b/packages/web/src/app/components/codeSnippet.tsx
@ -1,12 +1,12 @@
 import { cn } from "@/lib/utils"

-export const CodeSnippet = ({ children, className, title }: { children: React.ReactNode, className?: string, title?: string }) => {
+export const CodeSnippet = ({ children, className, title, renderNewlines = false }: { children: React.ReactNode, className?: string, title?: string, renderNewlines?: boolean }) => {
    return (
        <code
            className={cn("bg-gray-100 dark:bg-gray-700 w-fit rounded-md px-2 py-0.5 font-medium font-mono", className)}
            title={title}
        >
-            {children}
+            {renderNewlines ? <pre>{children}</pre> : children}
        </code>
    )
 }
--- a/packages/web/src/app/globals.css
+++ b/packages/web/src/app/globals.css
@ -311,6 +311,11 @@
  text-overflow: ellipsis;
 }

+.truncate-start > * {
+  direction: ltr;
+  unicode-bidi: embed;
+}
+
@layer base {
  * {
    @apply border-border;
--- a/packages/web/src/app/login/page.tsx
+++ b/packages/web/src/app/login/page.tsx
@ -2,13 +2,10 @@ import { auth } from "@/auth";
 import { LoginForm } from "./components/loginForm";
 import { redirect } from "next/navigation";
 import { Footer } from "@/app/components/footer";
-import { createLogger } from "@sourcebot/logger";
 import { getAuthProviders } from "@/lib/authProviders";
 import { getOrgFromDomain } from "@/data/org";
 import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants";

-const logger = createLogger('login-page');
-
 interface LoginProps {
    searchParams: Promise<{
        callbackUrl?: string;
@ -18,10 +15,8 @@ interface LoginProps {

 export default async function Login(props: LoginProps) {
    const searchParams = await props.searchParams;
-    logger.info("Login page loaded");
    const session = await auth();
    if (session) {
-        logger.info("Session found in login page, redirecting to home");
        return redirect("/");
    }

--- a/packages/web/src/env.mjs
+++ b/packages/web/src/env.mjs
@ -15,9 +15,6 @@ export const env = createEnv({
    server: {
        // Zoekt
        ZOEKT_WEBSERVER_URL: z.string().url().default("http://localhost:6070"),
-        SHARD_MAX_MATCH_COUNT: numberSchema.default(10000),
-        TOTAL_MAX_MATCH_COUNT: numberSchema.default(100000),
-        ZOEKT_MAX_WALL_TIME_MS: numberSchema.default(10000),
        
        // Auth
        FORCE_ENABLE_ANONYMOUS_ACCESS: booleanSchema.default('false'),
--- a/packages/web/src/features/chat/components/chatThread/tools/shared.tsx
+++ b/packages/web/src/features/chat/components/chatThread/tools/shared.tsx
@ -32,7 +32,7 @@ export const FileListItem = ({
                    pathType: 'blob',
                })}
            >
-                {path}
+                <span>{path}</span>
            </Link>
        </div>
    )
--- a/packages/web/src/features/codeNav/actions.ts
+++ b/packages/web/src/features/codeNav/actions.ts
@ -80,7 +80,7 @@ export const findSearchBasedSymbolDefinitions = async (
 const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse) => {
    const parser = searchResponseSchema.transform(async ({ files }) => ({
        stats: {
-            matchCount: searchResult.stats.matchCount,
+            matchCount: searchResult.stats.actualMatchCount,
        },
        files: files.flatMap((file) => {
            const chunks = file.chunks;
--- a/packages/web/src/features/search/schemas.ts
+++ b/packages/web/src/features/search/schemas.ts
@ -37,35 +37,82 @@ export const repositoryInfoSchema = z.object({
    name: z.string(),
    displayName: z.string().optional(),
    webUrl: z.string().optional(),
-})
+});
+
+// Many of these fields are defined in zoekt/api.go.
+export const searchStatsSchema = z.object({
+    // The actual number of matches returned by the search.
+    // This will always be less than or equal to `totalMatchCount`.
+    actualMatchCount: z.number(),
+
+    // The total number of matches found during the search.
+    totalMatchCount: z.number(),

-export const searchResponseSchema = z.object({
-    zoektStats: z.object({
    // The duration (in nanoseconds) of the search.
    duration: z.number(),
+
+    // Number of files containing a match.
    fileCount: z.number(),
-        matchCount: z.number(),
+
+    // Candidate files whose contents weren't examined because we
+    // gathered enough matches.
    filesSkipped: z.number(),
+
+    // Amount of I/O for reading contents.
    contentBytesLoaded: z.number(),
+
+    // Amount of I/O for reading from index.
    indexBytesLoaded: z.number(),
+
+    // Number of search shards that had a crash.
    crashes: z.number(),
+
+    // Number of files in shards that we considered.
    shardFilesConsidered: z.number(),
+
+    // Files that we evaluated. Equivalent to files for which all
+    // atom matches (including negations) evaluated to true.
    filesConsidered: z.number(),
+
+    // Files for which we loaded file content to verify substring matches
    filesLoaded: z.number(),
+
+    // Shards that we scanned to find matches.
    shardsScanned: z.number(),
+
+    // Shards that we did not process because a query was canceled.
    shardsSkipped: z.number(),
+
+    // Shards that we did not process because the query was rejected by the
+    // ngram filter indicating it had no matches.
    shardsSkippedFilter: z.number(),
+
+    // Number of candidate matches as a result of searching ngrams.
    ngramMatches: z.number(),
+
+    // NgramLookups is the number of times we accessed an ngram in the index.
    ngramLookups: z.number(),
+
+    // Wall clock time for queued search.
    wait: z.number(),
+
+    // Aggregate wall clock time spent constructing and pruning the match tree.
+    // This accounts for time such as lookups in the trigram index.
    matchTreeConstruction: z.number(),
+
+    // Aggregate wall clock time spent searching the match tree. This accounts
+    // for the bulk of search work done looking for matches.
    matchTreeSearch: z.number(),
+
+    // Number of times regexp was called on files that we evaluated.
    regexpsConsidered: z.number(),
+
+    // FlushReason explains why results were flushed.
    flushReason: z.number(),
-    }),
-    stats: z.object({
-        matchCount: z.number(),
-    }),
+});
+
+export const searchResponseSchema = z.object({
+    stats: searchStatsSchema,
    files: z.array(z.object({
        fileName: z.object({
            // The name of the file
@ -92,6 +139,7 @@ export const searchResponseSchema = z.object({
    })),
    repositoryInfo: z.array(repositoryInfoSchema),
    isBranchFilteringEnabled: z.boolean(),
+    isSearchExhaustive: z.boolean(),
 });

 export const fileSourceRequestSchema = z.object({
--- a/packages/web/src/features/search/searchApi.ts
+++ b/packages/web/src/features/search/searchApi.ts
@ -151,12 +151,48 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
            // @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892
            opts: {
                ChunkMatches: true,
+                // @note: Zoekt has several different ways to limit a given search. The two that
+                // we care about are `MaxMatchDisplayCount` and `TotalMaxMatchCount`:
+                // - `MaxMatchDisplayCount` truncates the number of matches AFTER performing
+                //   a search (specifically, after collating and sorting the results). The number of
+                //   results returned by the API will be less than or equal to this value.
+                //
+                // - `TotalMaxMatchCount` truncates the number of matches DURING a search. The results
+                //   returned by the API the API can be less than, equal to, or greater than this value.
+                //   Why greater? Because this value is compared _after_ a given shard has finished
+                //   being processed, the number of matches returned by the last shard may have exceeded
+                //   this value.
+                //
+                // Let's define two variables:
+                // - `actualMatchCount` : The number of matches that are returned by the API. This is
+                //   always less than or equal to `MaxMatchDisplayCount`.
+                // - `totalMatchCount` : The number of matches that zoekt found before it either
+                //   1) found all matches or 2) hit the `TotalMaxMatchCount` limit. This number is
+                //   not bounded and can be less than, equal to, or greater than both `TotalMaxMatchCount`
+                //   and `MaxMatchDisplayCount`.
+                //
+                //
+                // Our challenge is to determine whether or not the search returned all possible matches/
+                // (it was exaustive) or if it was truncated. By setting the `TotalMaxMatchCount` to
+                // `MaxMatchDisplayCount + 1`, we can determine which of these occurred by comparing
+                // `totalMatchCount` to `MaxMatchDisplayCount`.
+                //
+                // if (totalMatchCount ≤ actualMatchCount):
+                //     Search is EXHAUSTIVE (found all possible matches)
+                //     Proof: totalMatchCount ≤ MaxMatchDisplayCount < TotalMaxMatchCount
+                //         Therefore Zoekt stopped naturally, not due to limit
+                //     
+                // if (totalMatchCount > actualMatchCount):
+                //     Search is TRUNCATED (more matches exist)
+                //     Proof: totalMatchCount > MaxMatchDisplayCount + 1 = TotalMaxMatchCount
+                //         Therefore Zoekt hit the limit and stopped searching
+                //
                MaxMatchDisplayCount: matches,
+                TotalMaxMatchCount: matches + 1,
                NumContextLines: contextLines,
                Whole: !!whole,
-                TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT,
-                ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT,
-                MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds
+                ShardMaxMatchCount: -1,
+                MaxWallTime: 0, // zoekt expects a duration in nanoseconds
            }
        });

@ -296,11 +332,35 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
                }
            }).filter((file) => file !== undefined) ?? [];

+            const actualMatchCount = files.reduce(
+                (acc, file) =>
+                    // Match count is the sum of the number of chunk matches and file name matches.
+                    acc + file.chunks.reduce(
+                        (acc, chunk) => acc + chunk.matchRanges.length,
+                        0,
+                    ) + file.fileName.matchRanges.length,
+                0,
+            );
+
+            const totalMatchCount = Result.MatchCount;
+            const isSearchExhaustive = totalMatchCount <= actualMatchCount;
+
            return {
-                zoektStats: {
+                files,
+                repositoryInfo: Array.from(repos.values()).map((repo) => ({
+                    id: repo.id,
+                    codeHostType: repo.external_codeHostType,
+                    name: repo.name,
+                    displayName: repo.displayName ?? undefined,
+                    webUrl: repo.webUrl ?? undefined,
+                })),
+                isBranchFilteringEnabled,
+                isSearchExhaustive,
+                stats: {
+                    actualMatchCount,
+                    totalMatchCount,
                    duration: Result.Duration,
                    fileCount: Result.FileCount,
-                    matchCount: Result.MatchCount,
                    filesSkipped: Result.FilesSkipped,
                    contentBytesLoaded: Result.ContentBytesLoaded,
                    indexBytesLoaded: Result.IndexBytesLoaded,
@ -318,25 +378,6 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ
                    matchTreeSearch: Result.MatchTreeSearch,
                    regexpsConsidered: Result.RegexpsConsidered,
                    flushReason: Result.FlushReason,
-                },
-                files,
-                repositoryInfo: Array.from(repos.values()).map((repo) => ({
-                    id: repo.id,
-                    codeHostType: repo.external_codeHostType,
-                    name: repo.name,
-                    displayName: repo.displayName ?? undefined,
-                    webUrl: repo.webUrl ?? undefined,
-                })),
-                isBranchFilteringEnabled: isBranchFilteringEnabled,
-                stats: {
-                    matchCount: files.reduce(
-                        (acc, file) =>
-                            acc + file.chunks.reduce(
-                                (acc, chunk) => acc + chunk.matchRanges.length,
-                                0,
-                            ),
-                        0,
-                    )
                }
            } satisfies SearchResponse;
        });
--- a/packages/web/src/features/search/types.ts
+++ b/packages/web/src/features/search/types.ts
@ -8,6 +8,7 @@ import {
    fileSourceRequestSchema,
    symbolSchema,
    repositoryInfoSchema,
+    searchStatsSchema,
 } from "./schemas";
 import { z } from "zod";

@ -23,3 +24,4 @@ export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;

 export type RepositoryInfo = z.infer<typeof repositoryInfoSchema>;
 export type SourceRange = z.infer<typeof rangeSchema>;
+export type SearchStats = z.infer<typeof searchStatsSchema>;
--- a/packages/web/src/lib/posthogEvents.ts
+++ b/packages/web/src/lib/posthogEvents.ts
@ -15,6 +15,7 @@ export type PosthogEventMap = {
        shardsSkipped: number,
        shardsSkippedFilter: number,
        matchCount: number,
+        actualMatchCount: number,
        ngramMatches: number,
        ngramLookups: number,
        wait: number,
Author	SHA1	Message	Date
Brendan Kellam	dc6bb954ef	Merge `7341a49407` into `c3fae1aaab`	2025-10-13 14:47:56 -05:00
Brendan Kellam	c3fae1aaab	feat(web): Improved search performance on unbounded searches (#555 )	2025-10-07 23:55:36 -07:00
Brendan Kellam	18ba1d2492	update demo deploy cadence (#556 )	2025-10-07 23:51:52 -07:00
bkellam	8d7babc8d2	chore(worker): Change log message to debug	2025-10-07 16:38:56 -07:00
bkellam	595abc12be	use blacksmith arm machine for arm builds	2025-10-07 10:21:19 -07:00
blacksmith-sh[bot]	0e8fdf0f97	Migrate workflows to Blacksmith (#554 ) Co-authored-by: blacksmith-sh[bot] <157653362+blacksmith-sh[bot]@users.noreply.github.com>	2025-10-07 10:05:27 -07:00
Brendan Kellam	83c6704b01	fix: Fix git dubious ownership errors (#553 )	2025-10-06 19:54:17 -07:00
Brendan Kellam	5e3e4f000a	chore(web): Remove spam "login page loaded" log (#552 )	2025-10-06 15:04:41 -07:00
msukkari	623c794a75	update description in docs	2025-10-04 10:03:29 -07:00
Brendan Kellam	425a816fb6	Update README.md	2025-10-03 21:38:49 -07:00
bkellam	6a4c9220bd	chore: try including platform pair in cache key	2025-10-03 21:24:14 -07:00
Brendan Kellam	eeb6b73a64	chore: Move helm chart to seperate repo (#549 )	2025-10-03 15:45:36 -07:00
Andre Nogueira	9c8224e39f	Add Sourcebot Helm Chart (#370 ) * feat: add helm chart Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com> * feat: add sts support to use internal DB and improve values docs Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com> * fix: include postgresql extra dependency Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com> * fix: remove autoscaler Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com> * fix: remove sts Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com> * fix: add more suggestive env var example Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com> * fix: add chart dependency lock Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com> * fix: add host infer to the chart docs Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com> --------- Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>	2025-10-03 15:39:26 -07:00
Brendan Kellam	c10010eb99	feat(db): Support passing db connection as separate env vars (#545 )	2025-10-02 12:51:39 -07:00
bkellam	d24de793f2	Add roadmap link to docs	2025-10-01 14:47:26 -07:00
bkellam	7341a49407	fix	2025-08-19 11:13:30 -07:00