diff --git a/.env.development b/.env.development index ddb981af..57d75115 100644 --- a/.env.development +++ b/.env.development @@ -6,8 +6,6 @@ DATABASE_URL="postgresql://postgres:postgres@localhost:5432/postgres" ZOEKT_WEBSERVER_URL="http://localhost:6070" # The command to use for generating ctags. CTAGS_COMMAND=ctags -# logging, strict -SRC_TENANT_ENFORCEMENT_MODE=strict # Auth.JS # You can generate a new secret with: @@ -23,7 +21,7 @@ AUTH_URL="http://localhost:3000" DATA_CACHE_DIR=${PWD}/.sourcebot # Path to the sourcebot cache dir (ex. ~/sourcebot/.sourcebot) SOURCEBOT_PUBLIC_KEY_PATH=${PWD}/public.pem -# CONFIG_PATH=${PWD}/config.json # Path to the sourcebot config file (if one exists) +CONFIG_PATH=${PWD}/config.json # Path to the sourcebot config file (if one exists) # Email # EMAIL_FROM_ADDRESS="" # The from address for transactional emails. @@ -31,7 +29,6 @@ SOURCEBOT_PUBLIC_KEY_PATH=${PWD}/public.pem # PostHog # POSTHOG_PAPIK="" -# NEXT_PUBLIC_POSTHOG_PAPIK="" # Sentry # SENTRY_BACKEND_DSN="" diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index a129e5ce..f43a46bd 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,4 @@ contact_links: - name: 👾 Discord - url: https://discord.gg/GbXMEM5H + url: https://discord.gg/HDScTs3ptP about: Something else? Join the Discord! diff --git a/.github/workflows/_gcp-deploy.yml b/.github/workflows/_gcp-deploy.yml index 15fde89b..0454e5b6 100644 --- a/.github/workflows/_gcp-deploy.yml +++ b/.github/workflows/_gcp-deploy.yml @@ -55,7 +55,6 @@ jobs: ${{ env.IMAGE_PATH }}:latest build-args: | NEXT_PUBLIC_SOURCEBOT_VERSION=${{ github.ref_name }} - NEXT_PUBLIC_POSTHOG_PAPIK=${{ vars.NEXT_PUBLIC_POSTHOG_PAPIK }} NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT=${{ vars.NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT }} NEXT_PUBLIC_SENTRY_ENVIRONMENT=${{ vars.NEXT_PUBLIC_SENTRY_ENVIRONMENT }} NEXT_PUBLIC_SENTRY_WEBAPP_DSN=${{ vars.NEXT_PUBLIC_SENTRY_WEBAPP_DSN }} diff --git a/.github/workflows/ghcr-publish.yml b/.github/workflows/ghcr-publish.yml index cf96bea7..67bb9071 100644 --- a/.github/workflows/ghcr-publish.yml +++ b/.github/workflows/ghcr-publish.yml @@ -77,7 +77,6 @@ jobs: outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true,annotation.org.opencontainers.image.description=Blazingly fast code search build-args: | NEXT_PUBLIC_SOURCEBOT_VERSION=${{ github.ref_name }} - NEXT_PUBLIC_POSTHOG_PAPIK=${{ vars.NEXT_PUBLIC_POSTHOG_PAPIK }} - name: Export digest run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index b2d34c67..0d9acdf7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,50 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- Fixed review agent so that it works with GHES instances [#611](https://github.com/sourcebot-dev/sourcebot/pull/611) + +## [4.10.1] - 2025-12-03 + +### Added +- Added `ALWAYS_INDEX_FILE_PATTERNS` environment variable to allow specifying a comma seperated list of glob patterns matching file paths that should always be indexed, regardless of size or # of trigrams. [#631](https://github.com/sourcebot-dev/sourcebot/pull/631) +- Added button to explore menu to toggle cross-repository search. [#647](https://github.com/sourcebot-dev/sourcebot/pull/647) +- Added server side telemetry for search metrics. [#652](https://github.com/sourcebot-dev/sourcebot/pull/652) + +### Fixed +- Fixed issue where single quotes could not be used in search queries. [#629](https://github.com/sourcebot-dev/sourcebot/pull/629) +- Fixed issue where files with special characters would fail to load. [#636](https://github.com/sourcebot-dev/sourcebot/issues/636) +- Fixed Ask performance issues. [#632](https://github.com/sourcebot-dev/sourcebot/pull/632) +- Fixed regression where creating a new Ask thread when unauthenticated would result in a 404. [#641](https://github.com/sourcebot-dev/sourcebot/pull/641) +- Updated react and next package versions to fix CVE 2025-55182. [#654](https://github.com/sourcebot-dev/sourcebot/pull/654) + +### Changed +- Changed the default behaviour for code nav to scope references & definitions search to the current repository. [#647](https://github.com/sourcebot-dev/sourcebot/pull/647) + +## [4.10.0] - 2025-11-24 + +### Added +- Added support for streaming code search results. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Added buttons to toggle case sensitivity and regex patterns. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Added counts to members, requets, and invites tabs in the members settings. [#621](https://github.com/sourcebot-dev/sourcebot/pull/621) +- [Sourcebot EE] Add support for Authentik as a identity provider. [#627](https://github.com/sourcebot-dev/sourcebot/pull/627) + +### Changed +- Changed the default search behaviour to match patterns as substrings and **not** regular expressions. Regular expressions can be used by toggling the regex button in search bar. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Renamed `public` query prefix to `visibility`. Allowed values for `visibility` are `public`, `private`, and `any`. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Changed `archived` query prefix to accept values `yes`, `no`, and `only`. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) + +### Removed +- Removed `case` query prefix. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Removed `branch` and `b` query prefixes. Please use `rev:` instead. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Removed `regex` query prefix. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) + ### Fixed - Fixed spurious infinite loads with explore panel, file tree, and file search command. [#617](https://github.com/sourcebot-dev/sourcebot/pull/617) - Wipe search context on init if entitlement no longer exists [#618](https://github.com/sourcebot-dev/sourcebot/pull/618) - Fixed Bitbucket repository exclusions not supporting glob patterns. [#620](https://github.com/sourcebot-dev/sourcebot/pull/620) -- Fixed review agent so that it works with GHES instances [#611](https://github.com/sourcebot-dev/sourcebot/pull/611) +- Fixed issue where the repo driven permission syncer was attempting to sync public repositories. [#624](https://github.com/sourcebot-dev/sourcebot/pull/624) +- Fixed issue where worker would not shutdown while a permission sync job (repo or user) was in progress. [#624](https://github.com/sourcebot-dev/sourcebot/pull/624) ## [4.9.2] - 2025-11-13 diff --git a/Dockerfile b/Dockerfile index 41c67712..9a75ade0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,4 @@ +# syntax=docker/dockerfile:1 # ------ Global scope variables ------ # Set of global build arguments. @@ -8,11 +9,6 @@ # @see: https://docs.docker.com/build/building/variables/#scoping ARG NEXT_PUBLIC_SOURCEBOT_VERSION -# PAPIK = Project API Key -# Note that this key does not need to be kept secret, so it's not -# necessary to use Docker build secrets here. -# @see: https://posthog.com/tutorials/api-capture-events#authenticating-with-the-project-api-key -ARG NEXT_PUBLIC_POSTHOG_PAPIK ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN @@ -43,10 +39,12 @@ COPY .yarn ./.yarn COPY ./packages/db ./packages/db COPY ./packages/schemas ./packages/schemas COPY ./packages/shared ./packages/shared +COPY ./packages/queryLanguage ./packages/queryLanguage RUN yarn workspace @sourcebot/db install RUN yarn workspace @sourcebot/schemas install RUN yarn workspace @sourcebot/shared install +RUN yarn workspace @sourcebot/query-language install # ------------------------------------ # ------ Build Web ------ @@ -55,8 +53,6 @@ ENV SKIP_ENV_VALIDATION=1 # ----------- ARG NEXT_PUBLIC_SOURCEBOT_VERSION ENV NEXT_PUBLIC_SOURCEBOT_VERSION=$NEXT_PUBLIC_SOURCEBOT_VERSION -ARG NEXT_PUBLIC_POSTHOG_PAPIK -ENV NEXT_PUBLIC_POSTHOG_PAPIK=$NEXT_PUBLIC_POSTHOG_PAPIK ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT ENV NEXT_PUBLIC_SENTRY_ENVIRONMENT=$NEXT_PUBLIC_SENTRY_ENVIRONMENT ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT @@ -92,6 +88,7 @@ COPY --from=shared-libs-builder /app/node_modules ./node_modules COPY --from=shared-libs-builder /app/packages/db ./packages/db COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas COPY --from=shared-libs-builder /app/packages/shared ./packages/shared +COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage # Fixes arm64 timeouts RUN yarn workspace @sourcebot/web install @@ -130,6 +127,7 @@ COPY --from=shared-libs-builder /app/node_modules ./node_modules COPY --from=shared-libs-builder /app/packages/db ./packages/db COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas COPY --from=shared-libs-builder /app/packages/shared ./packages/shared +COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage RUN yarn workspace @sourcebot/backend install RUN yarn workspace @sourcebot/backend build @@ -144,14 +142,12 @@ fi ENV SKIP_ENV_VALIDATION=0 # ------------------------------ - + # ------ Runner ------ FROM node-alpine AS runner # ----------- ARG NEXT_PUBLIC_SOURCEBOT_VERSION ENV NEXT_PUBLIC_SOURCEBOT_VERSION=$NEXT_PUBLIC_SOURCEBOT_VERSION -ARG NEXT_PUBLIC_POSTHOG_PAPIK -ENV NEXT_PUBLIC_POSTHOG_PAPIK=$NEXT_PUBLIC_POSTHOG_PAPIK ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT ENV NEXT_PUBLIC_SENTRY_ENVIRONMENT=$NEXT_PUBLIC_SENTRY_ENVIRONMENT ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN @@ -173,8 +169,13 @@ ENV DATA_DIR=/data ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis -ENV SRC_TENANT_ENFORCEMENT_MODE=strict ENV SOURCEBOT_PUBLIC_KEY_PATH=/app/public.pem +# PAPIK = Project API Key +# Note that this key does not need to be kept secret, so it's not +# necessary to use Docker build secrets here. +# @see: https://posthog.com/tutorials/api-capture-events#authenticating-with-the-project-api-key +# @note: this is also declared in the shared env.server.ts file. +ENV POSTHOG_PAPIK=phc_lLPuFFi5LH6c94eFJcqvYVFwiJffVcV6HD8U4a1OnRW # Valid values are: debug, info, warn, error ENV SOURCEBOT_LOG_LEVEL=info @@ -217,18 +218,23 @@ COPY --from=zoekt-builder \ /cmd/zoekt-index \ /usr/local/bin/ +RUN chown -R sourcebot:sourcebot /app + +# Copy zoekt proto files (needed for gRPC client at runtime) +COPY --chown=sourcebot:sourcebot vendor/zoekt/grpc/protos /app/vendor/zoekt/grpc/protos + # Copy all of the things -COPY --from=web-builder /app/packages/web/public ./packages/web/public -COPY --from=web-builder /app/packages/web/.next/standalone ./ -COPY --from=web-builder /app/packages/web/.next/static ./packages/web/.next/static +COPY --chown=sourcebot:sourcebot --from=web-builder /app/packages/web/public ./packages/web/public +COPY --chown=sourcebot:sourcebot --from=web-builder /app/packages/web/.next/standalone ./ +COPY --chown=sourcebot:sourcebot --from=web-builder /app/packages/web/.next/static ./packages/web/.next/static -COPY --from=backend-builder /app/node_modules ./node_modules -COPY --from=backend-builder /app/packages/backend ./packages/backend +COPY --chown=sourcebot:sourcebot --from=backend-builder /app/node_modules ./node_modules +COPY --chown=sourcebot:sourcebot --from=backend-builder /app/packages/backend ./packages/backend -COPY --from=shared-libs-builder /app/node_modules ./node_modules -COPY --from=shared-libs-builder /app/packages/db ./packages/db -COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas -COPY --from=shared-libs-builder /app/packages/shared ./packages/shared +COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/db ./packages/db +COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/schemas ./packages/schemas +COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/shared ./packages/shared +COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage # Fixes git "dubious ownership" issues when the volume is mounted with different permissions to the container. RUN git config --global safe.directory "*" @@ -238,9 +244,6 @@ RUN mkdir -p /run/postgresql && \ chown -R postgres:postgres /run/postgresql && \ chmod 775 /run/postgresql -# Make app directory accessible to both root and sourcebot user -RUN chown -R sourcebot:sourcebot /app -# Make data directory accessible to both root and sourcebot user RUN chown -R sourcebot:sourcebot /data COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf diff --git a/docs/docs.json b/docs/docs.json index fccfe7f1..4237169e 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -144,7 +144,7 @@ "socials": { "github": "https://github.com/sourcebot-dev/sourcebot", "twitter": "https://x.com/sourcebot_dev", - "discord": "https://discord.gg/GbXMEM5H", + "discord": "https://discord.gg/HDScTs3ptP", "linkedin": "https://www.linkedin.com/company/sourcebot" } }, diff --git a/docs/docs/configuration/auth/overview.mdx b/docs/docs/configuration/auth/overview.mdx index a71de5fe..117eed4f 100644 --- a/docs/docs/configuration/auth/overview.mdx +++ b/docs/docs/configuration/auth/overview.mdx @@ -25,4 +25,4 @@ Sourcebot's built-in authentication system gates your deployment, and allows adm # Troubleshooting - If you experience issues logging in, logging out, or accessing an organization you should have access to, try clearing your cookies & performing a full page refresh (`Cmd/Ctrl + Shift + R` on most browsers). -- Still not working? Reach out to us on our [discord](https://discord.gg/GbXMEM5H) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) \ No newline at end of file +- Still not working? Reach out to us on our [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) \ No newline at end of file diff --git a/docs/docs/configuration/environment-variables.mdx b/docs/docs/configuration/environment-variables.mdx index 87167858..e29fb88f 100644 --- a/docs/docs/configuration/environment-variables.mdx +++ b/docs/docs/configuration/environment-variables.mdx @@ -35,6 +35,7 @@ The following environment variables allow you to configure your Sourcebot deploy | `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - |

Optional file to log to if structured logging is enabled

| | `SOURCEBOT_TELEMETRY_DISABLED` | `false` |

Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.

| | `DEFAULT_MAX_MATCH_COUNT` | `10000` |

The default maximum number of search results to return when using search in the web app.

| +| `ALWAYS_INDEX_FILE_PATTERNS` | - |

A comma separated list of glob patterns matching file paths that should always be indexed, regardless of size or number of trigrams.

| ### Enterprise Environment Variables | Variable | Default | Description | diff --git a/docs/docs/configuration/idp.mdx b/docs/docs/configuration/idp.mdx index 21ae756d..6d2475b5 100644 --- a/docs/docs/configuration/idp.mdx +++ b/docs/docs/configuration/idp.mdx @@ -366,3 +366,53 @@ A Microsoft Entra ID connection can be used for [authentication](/docs/configura +### Authentik + +[Auth.js Authentik Provider Docs](https://authjs.dev/getting-started/providers/authentik) + +An Authentik connection can be used for [authentication](/docs/configuration/auth). + + + + + To begin, you must create a OAuth2/OpenID Connect application in Authentik. For more information, see the [Authentik documentation](https://docs.goauthentik.io/add-secure-apps/applications/manage_apps/#create-an-application-and-provider-pair). + + When configuring your application: + - Set the provider type to "OAuth2/OpenID Connect" + - Set the client type to "Confidential" + - Add `/api/auth/callback/authentik` to the redirect URIs (ex. https://sourcebot.coolcorp.com/api/auth/callback/authentik) + + After creating the application, open the application details to obtain the client id, client secret, and issuer URL (typically in the format `https:///application/o//`). + + + The client id, secret, and issuer URL are provided to Sourcebot via environment variables. These can be named whatever you like + (ex. `AUTHENTIK_IDENTITY_PROVIDER_CLIENT_ID`, `AUTHENTIK_IDENTITY_PROVIDER_CLIENT_SECRET`, and `AUTHENTIK_IDENTITY_PROVIDER_ISSUER`) + + + Create a `identityProvider` object in the [config file](/docs/configuration/config-file) with the following fields: + + ```json wrap icon="code" + { + "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json", + "identityProviders": [ + { + "provider": "authentik", + "purpose": "sso", + "clientId": { + "env": "AUTHENTIK_IDENTITY_PROVIDER_CLIENT_ID" + }, + "clientSecret": { + "env": "AUTHENTIK_IDENTITY_PROVIDER_CLIENT_SECRET" + }, + "issuer": { + "env": "AUTHENTIK_IDENTITY_PROVIDER_ISSUER" + } + } + ] + } + ``` + + + + + diff --git a/docs/docs/connections/overview.mdx b/docs/docs/connections/overview.mdx index ab9f8ffc..cb3b1432 100644 --- a/docs/docs/connections/overview.mdx +++ b/docs/docs/connections/overview.mdx @@ -69,6 +69,26 @@ To learn more about how to create a connection for a specific code host, check o Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md). +## Indexing Large Files + +By default, Sourcebot will skip indexing files that are larger than 2MB or have more than 20,000 trigrams. You can configure this by setting the `maxFileSize` and `maxTrigramCount` [settings](/docs/configuration/config-file#settings). + +These limits can be ignored for specific files by passing in a comma separated list of glob patterns matching file paths to the `ALWAYS_INDEX_FILE_PATTERNS` environment variable. For example: + +```bash +# Always index all .sum and .lock files +ALWAYS_INDEX_FILE_PATTERNS=**/*.sum,**/*.lock +``` + +Files that have been skipped are assigned the `skipped` language. You can view a list of all skipped files by using the following query: +``` +lang:skipped +``` + +## Indexing Binary Files + +Binary files cannot be indexed by Sourcebot. See [#575](https://github.com/sourcebot-dev/sourcebot/issues/575) for more information. + ## Schema reference --- diff --git a/docs/docs/features/code-navigation.mdx b/docs/docs/features/code-navigation.mdx index 6720556d..9e9f7db6 100644 --- a/docs/docs/features/code-navigation.mdx +++ b/docs/docs/features/code-navigation.mdx @@ -21,6 +21,7 @@ import LicenseKeyRequired from '/snippets/license-key-required.mdx' | **Go to definition** | Clicking the "go to definition" button in the popover or clicking the symbol name navigates to the symbol's definition. | | **Find references** | Clicking the "find all references" button in the popover lists all references in the explore panel. | | **Explore panel** | Lists all references and definitions for the symbol selected in the popover. | +| **Cross-repository navigation** | You can search across all repositories by clicking the globe icon in the explore panel. By default, references and definitions are scoped to the repository where the symbol is being resolved. | ## How does it work? diff --git a/docs/docs/features/search/syntax-reference.mdx b/docs/docs/features/search/syntax-reference.mdx index cde52d0e..f5760002 100644 --- a/docs/docs/features/search/syntax-reference.mdx +++ b/docs/docs/features/search/syntax-reference.mdx @@ -4,32 +4,51 @@ title: Writing search queries Sourcebot uses a powerful regex-based query language that enabled precise code search within large codebases. - ## Syntax reference guide -Queries consist of space-separated regular expressions. Wrapping expressions in `""` combines them. By default, a file must have at least one match for each expression to be included. +Queries consist of space-separated search patterns that are matched against file contents. A file must have at least one match for each expression to be included. Queries can optionally contain search filters to further refine the search results. + +## Keyword search (default) + +Keyword search matches search patterns exactly in file contents. Wrapping search patterns in `""` combines them as a single expression. + +| Example | Explanation | +| :--- | :--- | +| `foo` | Match files containing the keyword `foo` | +| `foo bar` | Match files containing both `foo` **and** `bar` | +| `"foo bar"` | Match files containing the phrase `foo bar` | +| `"foo \"bar\""` | Match files containing `foo "bar"` exactly (escaped quotes) | + +## Regex search + +Toggle the regex button (`.*`) in the search bar to interpret search patterns as regular expressions. | Example | Explanation | | :--- | :--- | | `foo` | Match files with regex `/foo/` | -| `foo bar` | Match files with regex `/foo/` **and** `/bar/` | -| `"foo bar"` | Match files with regex `/foo bar/` | +| `foo.*bar` | Match files with regex `/foo.*bar/` (foo followed by any characters, then bar) | +| `^function\s+\w+` | Match files with regex `/^function\s+\w+/` (function at start of line, followed by whitespace and word characters) | +| `"foo bar"` | Match files with regex `/foo bar/`. Quotes are not matched. | -Multiple expressions can be or'd together with `or`, negated with `-`, or grouped with `()`. +## Search filters -| Example | Explanation | -| :--- | :--- | -| `foo or bar` | Match files with regex `/foo/` **or** `/bar/` | -| `foo -bar` | Match files with regex `/foo/` but **not** `/bar/` | -| `foo (bar or baz)` | Match files with regex `/foo/` **and** either `/bar/` **or** `/baz/` | - -Expressions can be prefixed with certain keywords to modify search behavior. Some keywords can be negated using the `-` prefix. +Search queries (keyword or regex) can include multiple search filters to further refine the search results. Some filters can be negated using the `-` prefix. | Prefix | Description | Example | | :--- | :--- | :--- | | `file:` | Filter results from filepaths that match the regex. By default all files are searched. | `file:README` - Filter results to filepaths that match regex `/README/`
`file:"my file"` - Filter results to filepaths that match regex `/my file/`
`-file:test\.ts$` - Ignore results from filepaths match regex `/test\.ts$/` | -| `repo:` | Filter results from repos that match the regex. By default all repos are searched. | `repo:linux` - Filter results to repos that match regex `/linux/`
`-repo:^web/.*` - Ignore results from repos that match regex `/^web\/.*` | +| `repo:` | Filter results from repos that match the regex. By default all repos are searched. | `repo:linux` - Filter results to repos that match regex `/linux/`
`-repo:^web/.*` - Ignore results from repos that match regex `/^web\/.*/` | | `rev:` | Filter results from a specific branch or tag. By default **only** the default branch is searched. | `rev:beta` - Filter results to branches that match regex `/beta/` | | `lang:` | Filter results by language (as defined by [linguist](https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml)). By default all languages are searched. | `lang:TypeScript` - Filter results to TypeScript files
`-lang:YAML` - Ignore results from YAML files | | `sym:` | Match symbol definitions created by [universal ctags](https://ctags.io/) at index time. | `sym:\bmain\b` - Filter results to symbols that match regex `/\bmain\b/` | -| `context:` | Filter results to a predefined [search context](/docs/features/search/search-contexts). | `context:web` - Filter results to the web context
`-context:pipelines` - Ignore results from the pipelines context | \ No newline at end of file +| `context:` | Filter results to a predefined [search context](/docs/features/search/search-contexts). | `context:web` - Filter results to the web context
`-context:pipelines` - Ignore results from the pipelines context | + +## Boolean operators & grouping + +By default, space-separated expressions are and'd together. Using the `or` keyword as well as parentheses `()` can be used to create more complex boolean logic. Parentheses can be negated using the `-` prefix. + +| Example | Explanation | +| :--- | :--- | +| `foo or bar` | Match files containing `foo` **or** `bar` | +| `foo (bar or baz)` | Match files containing `foo` **and** either `bar` **or** `baz`. | +| `-(foo) bar` | Match files containing `bar` **and not** `foo`. | diff --git a/docs/docs/upgrade/v2-to-v3-guide.mdx b/docs/docs/upgrade/v2-to-v3-guide.mdx index bd9f03d9..3d8828e9 100644 --- a/docs/docs/upgrade/v2-to-v3-guide.mdx +++ b/docs/docs/upgrade/v2-to-v3-guide.mdx @@ -78,7 +78,7 @@ If your deployment is dependent on these features, please [reach out](https://gi After updating your configuration file, restart your Sourcebot deployment to pick up the new changes. - Congrats, you've successfully migrated to v3! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/GbXMEM5H) or on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose). + Congrats, you've successfully migrated to v3! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/HDScTs3ptP) or on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose). @@ -90,4 +90,4 @@ Some things to check: - Make sure you have a name for each `connection`, and that the name only contains letters, digits, hyphens, or underscores - Make sure each `connection` has a `type` field with a valid value (`gitlab`, `github`, `gitea`, `gerrit`) -Having troubles migrating from v2 to v3? Reach out to us on [discord](https://discord.gg/GbXMEM5H) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help \ No newline at end of file +Having troubles migrating from v2 to v3? Reach out to us on [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help \ No newline at end of file diff --git a/docs/docs/upgrade/v3-to-v4-guide.mdx b/docs/docs/upgrade/v3-to-v4-guide.mdx index 1dc3ef2b..a29d12a1 100644 --- a/docs/docs/upgrade/v3-to-v4-guide.mdx +++ b/docs/docs/upgrade/v3-to-v4-guide.mdx @@ -40,7 +40,7 @@ Please note that the following features are no longer supported in v4: - Congrats, you've successfully migrated to v4! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/GbXMEM5H) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) + Congrats, you've successfully migrated to v4! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) @@ -58,4 +58,4 @@ to finish upgrading to v4 in single-tenant mode. - If you're hitting issues with signing into your Sourcebot instance, make sure you're setting `AUTH_URL` correctly to your deployment domain (ex. `https://sourcebot.yourcompany.com`) -Having troubles migrating from v3 to v4? Reach out to us on [discord](https://discord.gg/GbXMEM5H) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help \ No newline at end of file +Having troubles migrating from v3 to v4? Reach out to us on [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help \ No newline at end of file diff --git a/docs/snippets/schemas/v3/identityProvider.schema.mdx b/docs/snippets/schemas/v3/identityProvider.schema.mdx index 30c172be..da75427d 100644 --- a/docs/snippets/schemas/v3/identityProvider.schema.mdx +++ b/docs/snippets/schemas/v3/identityProvider.schema.mdx @@ -647,6 +647,115 @@ "purpose", "audience" ] + }, + "AuthentikIdentityProviderConfig": { + "type": "object", + "additionalProperties": false, + "properties": { + "provider": { + "const": "authentik" + }, + "purpose": { + "const": "sso" + }, + "clientId": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + }, + "clientSecret": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + }, + "issuer": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + } + }, + "required": [ + "provider", + "purpose", + "clientId", + "clientSecret", + "issuer" + ] } }, "oneOf": [ @@ -1293,6 +1402,115 @@ "purpose", "audience" ] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "provider": { + "const": "authentik" + }, + "purpose": { + "const": "sso" + }, + "clientId": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + }, + "clientSecret": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + }, + "issuer": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + } + }, + "required": [ + "provider", + "purpose", + "clientId", + "clientSecret", + "issuer" + ] } ] } diff --git a/docs/snippets/schemas/v3/index.schema.mdx b/docs/snippets/schemas/v3/index.schema.mdx index 413e51bd..f3da7ed4 100644 --- a/docs/snippets/schemas/v3/index.schema.mdx +++ b/docs/snippets/schemas/v3/index.schema.mdx @@ -5163,6 +5163,115 @@ "purpose", "audience" ] + }, + "AuthentikIdentityProviderConfig": { + "type": "object", + "additionalProperties": false, + "properties": { + "provider": { + "const": "authentik" + }, + "purpose": { + "const": "sso" + }, + "clientId": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + }, + "clientSecret": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + }, + "issuer": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + } + }, + "required": [ + "provider", + "purpose", + "clientId", + "clientSecret", + "issuer" + ] } }, "oneOf": [ @@ -5809,6 +5918,115 @@ "purpose", "audience" ] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "provider": { + "const": "authentik" + }, + "purpose": { + "const": "sso" + }, + "clientId": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + }, + "clientSecret": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + }, + "issuer": { + "anyOf": [ + { + "type": "object", + "properties": { + "env": { + "type": "string", + "description": "The name of the environment variable that contains the token." + } + }, + "required": [ + "env" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "googleCloudSecret": { + "type": "string", + "description": "The resource name of a Google Cloud secret. Must be in the format `projects//secrets//versions/`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets" + } + }, + "required": [ + "googleCloudSecret" + ], + "additionalProperties": false + } + ] + } + }, + "required": [ + "provider", + "purpose", + "clientId", + "clientSecret", + "issuer" + ] } ] } diff --git a/entrypoint.sh b/entrypoint.sh index 22a733e3..4a5f05a3 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -66,12 +66,6 @@ fi echo -e "\e[34m[Info] Sourcebot version: $NEXT_PUBLIC_SOURCEBOT_VERSION\e[0m" -# If we don't have a PostHog key, then we need to disable telemetry. -if [ -z "$NEXT_PUBLIC_POSTHOG_PAPIK" ]; then - echo -e "\e[33m[Warning] NEXT_PUBLIC_POSTHOG_PAPIK was not set. Setting SOURCEBOT_TELEMETRY_DISABLED.\e[0m" - export SOURCEBOT_TELEMETRY_DISABLED=true -fi - if [ -n "$SOURCEBOT_TELEMETRY_DISABLED" ]; then # Validate that SOURCEBOT_TELEMETRY_DISABLED is either "true" or "false" if [ "$SOURCEBOT_TELEMETRY_DISABLED" != "true" ] && [ "$SOURCEBOT_TELEMETRY_DISABLED" != "false" ]; then @@ -159,7 +153,7 @@ if [ ! -f "$FIRST_RUN_FILE" ]; then # (if telemetry is enabled) if [ "$SOURCEBOT_TELEMETRY_DISABLED" = "false" ]; then if ! ( curl -L --output /dev/null --silent --fail --header "Content-Type: application/json" -d '{ - "api_key": "'"$NEXT_PUBLIC_POSTHOG_PAPIK"'", + "api_key": "'"$POSTHOG_PAPIK"'", "event": "install", "distinct_id": "'"$SOURCEBOT_INSTALL_ID"'", "properties": { @@ -179,7 +173,7 @@ else if [ "$SOURCEBOT_TELEMETRY_DISABLED" = "false" ]; then if ! ( curl -L --output /dev/null --silent --fail --header "Content-Type: application/json" -d '{ - "api_key": "'"$NEXT_PUBLIC_POSTHOG_PAPIK"'", + "api_key": "'"$POSTHOG_PAPIK"'", "event": "upgrade", "distinct_id": "'"$SOURCEBOT_INSTALL_ID"'", "properties": { diff --git a/package.json b/package.json index a70bab99..c6621f53 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio", "dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset", "dev:prisma:db:push": "yarn with-env yarn workspace @sourcebot/db prisma:db:push", - "build:deps": "yarn workspaces foreach --recursive --topological --from '{@sourcebot/schemas,@sourcebot/db,@sourcebot/shared}' run build" + "build:deps": "yarn workspaces foreach --recursive --topological --from '{@sourcebot/schemas,@sourcebot/db,@sourcebot/shared,@sourcebot/query-language}' run build" }, "devDependencies": { "concurrently": "^9.2.1", @@ -27,6 +27,7 @@ }, "packageManager": "yarn@4.7.0", "resolutions": { - "prettier": "3.5.3" + "prettier": "3.5.3", + "@lezer/common": "1.3.0" } } diff --git a/packages/backend/src/ee/accountPermissionSyncer.ts b/packages/backend/src/ee/accountPermissionSyncer.ts index 81a1a135..a1c879a2 100644 --- a/packages/backend/src/ee/accountPermissionSyncer.ts +++ b/packages/backend/src/ee/accountPermissionSyncer.ts @@ -102,7 +102,7 @@ export class AccountPermissionSyncer { if (this.interval) { clearInterval(this.interval); } - await this.worker.close(); + await this.worker.close(/* force = */ true); await this.queue.close(); } diff --git a/packages/backend/src/ee/repoPermissionSyncer.ts b/packages/backend/src/ee/repoPermissionSyncer.ts index d48f510e..802da032 100644 --- a/packages/backend/src/ee/repoPermissionSyncer.ts +++ b/packages/backend/src/ee/repoPermissionSyncer.ts @@ -55,19 +55,27 @@ export class RepoPermissionSyncer { const repos = await this.db.repo.findMany({ // Repos need their permissions to be synced against the code host when... where: { - // They belong to a code host that supports permissions syncing AND: [ + // They are not public. Public repositories are always visible to all users, therefore we don't + // need to explicitly perform permission syncing for them. + // @see: packages/web/src/prisma.ts + { + isPublic: false + }, + // They belong to a code host that supports permissions syncing { external_codeHostType: { in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES, } }, + // They have not been synced within the threshold date. { OR: [ { permissionSyncedAt: null }, { permissionSyncedAt: { lt: thresholdDate } }, ], }, + // There aren't any active or recently failed jobs. { NOT: { permissionSyncJobs: { @@ -106,7 +114,7 @@ export class RepoPermissionSyncer { if (this.interval) { clearInterval(this.interval); } - await this.worker.close(); + await this.worker.close(/* force = */ true); await this.queue.close(); } diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index c3674834..2acf72fd 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -94,7 +94,6 @@ const listenToShutdownSignals = () => { const cleanup = async (signal: string) => { try { if (receivedSignal) { - logger.debug(`Recieved repeat signal ${signal}, ignoring.`); return; } receivedSignal = true; @@ -111,20 +110,21 @@ const listenToShutdownSignals = () => { await redis.quit(); await api.dispose(); await shutdownPosthog(); - logger.info('All workers shut down gracefully'); signals.forEach(sig => process.removeListener(sig, cleanup)); + return 0; } catch (error) { Sentry.captureException(error); logger.error('Error shutting down worker:', error); + return 1; } } signals.forEach(signal => { process.on(signal, (err) => { - cleanup(err).finally(() => { - process.kill(process.pid, signal); + cleanup(err).then(code => { + process.exit(code); }); }); }); @@ -132,14 +132,14 @@ const listenToShutdownSignals = () => { // Register handlers for uncaught exceptions and unhandled rejections process.on('uncaughtException', (err) => { logger.error(`Uncaught exception: ${err.message}`); - cleanup('uncaughtException').finally(() => { + cleanup('uncaughtException').then(() => { process.exit(1); }); }); process.on('unhandledRejection', (reason, promise) => { logger.error(`Unhandled rejection at: ${promise}, reason: ${reason}`); - cleanup('unhandledRejection').finally(() => { + cleanup('unhandledRejection').then(() => { process.exit(1); }); }); diff --git a/packages/backend/src/posthog.ts b/packages/backend/src/posthog.ts index 54b99e43..7034283f 100644 --- a/packages/backend/src/posthog.ts +++ b/packages/backend/src/posthog.ts @@ -5,9 +5,9 @@ import { PosthogEvent, PosthogEventMap } from './posthogEvents.js'; let posthog: PostHog | undefined = undefined; -if (clientEnv.NEXT_PUBLIC_POSTHOG_PAPIK) { +if (env.POSTHOG_PAPIK) { posthog = new PostHog( - clientEnv.NEXT_PUBLIC_POSTHOG_PAPIK, + env.POSTHOG_PAPIK, { host: "https://us.i.posthog.com", } diff --git a/packages/backend/src/zoekt.ts b/packages/backend/src/zoekt.ts index 9f65f473..68af1160 100644 --- a/packages/backend/src/zoekt.ts +++ b/packages/backend/src/zoekt.ts @@ -1,5 +1,5 @@ import { Repo } from "@sourcebot/db"; -import { createLogger } from "@sourcebot/shared"; +import { createLogger, env } from "@sourcebot/shared"; import { exec } from "child_process"; import { INDEX_CACHE_DIR } from "./constants.js"; import { Settings } from "./types.js"; @@ -11,6 +11,8 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio const { path: repoPath } = getRepoPath(repo); const shardPrefix = getShardPrefix(repo.orgId, repo.id); + const largeFileGlobPatterns = env.ALWAYS_INDEX_FILE_PATTERNS?.split(',').map(pattern => pattern.trim()) ?? []; + const command = [ 'zoekt-git-index', '-allow_missing_branches', @@ -21,6 +23,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio `-tenant_id ${repo.orgId}`, `-repo_id ${repo.id}`, `-shard_prefix ${shardPrefix}`, + ...largeFileGlobPatterns.map((pattern) => `-large_file ${pattern}`), repoPath ].join(' '); diff --git a/packages/db/prisma/migrations/20251129063148_change_chat_created_by_to_optional/migration.sql b/packages/db/prisma/migrations/20251129063148_change_chat_created_by_to_optional/migration.sql new file mode 100644 index 00000000..d0fd2ba0 --- /dev/null +++ b/packages/db/prisma/migrations/20251129063148_change_chat_created_by_to_optional/migration.sql @@ -0,0 +1,5 @@ +-- First, remove the NOT NULL constraint on the createdById column. +ALTER TABLE "Chat" ALTER COLUMN "createdById" DROP NOT NULL; + +-- Then, set all chats created by the guest user (id: 1) to have a NULL createdById. +UPDATE "Chat" SET "createdById" = NULL WHERE "createdById" = '1'; diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index 2e87ad4f..95460852 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -437,8 +437,8 @@ model Chat { name String? - createdBy User @relation(fields: [createdById], references: [id], onDelete: Cascade) - createdById String + createdBy User? @relation(fields: [createdById], references: [id], onDelete: Cascade) + createdById String? createdAt DateTime @default(now()) updatedAt DateTime @updatedAt diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index e7cb7554..245206d9 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -1 +1,3 @@ +import type { User, Account } from ".prisma/client"; +export type UserWithAccounts = User & { accounts: Account[] }; export * from ".prisma/client"; \ No newline at end of file diff --git a/packages/mcp/CHANGELOG.md b/packages/mcp/CHANGELOG.md index 94d37a42..e79ae284 100644 --- a/packages/mcp/CHANGELOG.md +++ b/packages/mcp/CHANGELOG.md @@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.0.11] - 2025-12-03 + +### Changed +- Updated API client to match the latest Sourcebot release. [#652](https://github.com/sourcebot-dev/sourcebot/pull/652) + +## [1.0.10] - 2025-11-24 + +### Changed +- Updated API client to match the latest Sourcebot release. [#555](https://github.com/sourcebot-dev/sourcebot/pull/555) + +## [1.0.9] - 2025-11-17 + +### Added +- Added pagination and filtering to `list_repos` tool to handle large repository lists efficiently and prevent oversized responses that waste token context. [#614](https://github.com/sourcebot-dev/sourcebot/pull/614) + ## [1.0.8] - 2025-11-10 ### Fixed diff --git a/packages/mcp/README.md b/packages/mcp/README.md index 0c64bdc3..a0a875a0 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -182,7 +182,18 @@ Fetches code that matches the provided regex pattern in `query`. ### list_repos -Lists all repositories indexed by Sourcebot. +Lists repositories indexed by Sourcebot with optional filtering and pagination. + +
+Parameters + +| Name | Required | Description | +|:-------------|:---------|:--------------------------------------------------------------------| +| `query` | no | Filter repositories by name (case-insensitive). | +| `pageNumber` | no | Page number (1-indexed, default: 1). | +| `limit` | no | Number of repositories per page (default: 50). | + +
### get_file_source diff --git a/packages/mcp/package.json b/packages/mcp/package.json index 2ebe8ff2..ca6ee314 100644 --- a/packages/mcp/package.json +++ b/packages/mcp/package.json @@ -1,6 +1,6 @@ { "name": "@sourcebot/mcp", - "version": "1.0.8", + "version": "1.0.11", "type": "module", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 3754c605..fdb3440e 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -8,7 +8,6 @@ export const search = async (request: SearchRequest): Promise response.json()); @@ -43,7 +41,6 @@ export const getFileSource = async (request: FileSourceRequest): Promise { + "Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.", + listReposRequestSchema.shape, + async ({ query, pageNumber = 1, limit = 50 }: { + query?: string; + pageNumber?: number; + limit?: number; + }) => { const response = await listRepos(); if (isServiceError(response)) { return { @@ -177,13 +180,45 @@ server.tool( }; } - const content: TextContent[] = response.map(repo => { + // Apply query filter if provided + let filtered = response; + if (query) { + const lowerQuery = query.toLowerCase(); + filtered = response.filter(repo => + repo.repoName.toLowerCase().includes(lowerQuery) || + repo.repoDisplayName?.toLowerCase().includes(lowerQuery) + ); + } + + // Sort alphabetically for consistent pagination + filtered.sort((a, b) => a.repoName.localeCompare(b.repoName)); + + // Apply pagination + const startIndex = (pageNumber - 1) * limit; + const endIndex = startIndex + limit; + const paginated = filtered.slice(startIndex, endIndex); + + // Format output + const content: TextContent[] = paginated.map(repo => { return { type: "text", text: `id: ${repo.repoName}\nurl: ${repo.webUrl}`, } }); + // Add pagination info + if (content.length === 0 && filtered.length > 0) { + content.push({ + type: "text", + text: `No results on page ${pageNumber}. Total matching repositories: ${filtered.length}`, + }); + } else if (filtered.length > endIndex) { + content.push({ + type: "text", + text: `Showing ${paginated.length} repositories (page ${pageNumber}). Total matching: ${filtered.length}. Use pageNumber ${pageNumber + 1} to see more.`, + }); + } + return { content, }; diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index b477e8f1..51063579 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -21,15 +21,18 @@ export const symbolSchema = z.object({ kind: z.string(), }); +export const searchOptionsSchema = z.object({ + matches: z.number(), // The number of matches to return. + contextLines: z.number().optional(), // The number of context lines to return. + whole: z.boolean().optional(), // Whether to return the whole file as part of the response. + isRegexEnabled: z.boolean().optional(), // Whether to enable regular expression search. + isCaseSensitivityEnabled: z.boolean().optional(), // Whether to enable case sensitivity. +}); + export const searchRequestSchema = z.object({ - // The zoekt query to execute. - query: z.string(), - // The number of matches to return. - matches: z.number(), - // The number of context lines to return. - contextLines: z.number().optional(), - // Whether to return the whole file as part of the response. - whole: z.boolean().optional(), + query: z.string(), // The zoekt query to execute. + source: z.string().optional(), // The source of the search request. + ...searchOptionsSchema.shape, }); export const repositoryInfoSchema = z.object({ @@ -109,7 +112,7 @@ export const searchStatsSchema = z.object({ regexpsConsidered: z.number(), // FlushReason explains why results were flushed. - flushReason: z.number(), + flushReason: z.string(), }); export const searchResponseSchema = z.object({ @@ -139,7 +142,6 @@ export const searchResponseSchema = z.object({ content: z.string().optional(), })), repositoryInfo: z.array(repositoryInfoSchema), - isBranchFilteringEnabled: z.boolean(), isSearchExhaustive: z.boolean(), }); @@ -156,6 +158,25 @@ export const repositoryQuerySchema = z.object({ export const listRepositoriesResponseSchema = repositoryQuerySchema.array(); +export const listReposRequestSchema = z.object({ + query: z + .string() + .describe("Filter repositories by name or displayName (case-insensitive)") + .optional(), + pageNumber: z + .number() + .int() + .positive() + .describe("Page number (1-indexed, default: 1)") + .default(1), + limit: z + .number() + .int() + .positive() + .describe("Number of repositories per page (default: 50)") + .default(50), +}); + export const fileSourceRequestSchema = z.object({ fileName: z.string(), repository: z.string(), diff --git a/packages/queryLanguage/.gitignore b/packages/queryLanguage/.gitignore new file mode 100644 index 00000000..81d9910b --- /dev/null +++ b/packages/queryLanguage/.gitignore @@ -0,0 +1,2 @@ +/node_modules/ +/dist diff --git a/packages/queryLanguage/package.json b/packages/queryLanguage/package.json new file mode 100644 index 00000000..b4ef8f42 --- /dev/null +++ b/packages/queryLanguage/package.json @@ -0,0 +1,20 @@ +{ + "name": "@sourcebot/query-language", + "private": true, + "main": "dist/index.js", + "scripts": { + "build": "lezer-generator src/query.grammar -o src/parser --typeScript --names && tsc", + "test": "vitest", + "postinstall": "yarn build" + }, + "devDependencies": { + "@lezer/generator": "^1.8.0", + "tsx": "^4.19.1", + "typescript": "^5.7.3", + "vitest": "^2.1.9" + }, + "dependencies": { + "@lezer/common": "^1.3.0", + "@lezer/lr": "^1.4.3" + } +} diff --git a/packages/queryLanguage/src/index.ts b/packages/queryLanguage/src/index.ts new file mode 100644 index 00000000..00cfbaad --- /dev/null +++ b/packages/queryLanguage/src/index.ts @@ -0,0 +1,7 @@ +import { parser } from "./parser"; + +type Tree = ReturnType; +type SyntaxNode = Tree['topNode']; +export type { Tree, SyntaxNode }; +export * from "./parser"; +export * from "./parser.terms"; \ No newline at end of file diff --git a/packages/queryLanguage/src/parser.terms.ts b/packages/queryLanguage/src/parser.terms.ts new file mode 100644 index 00000000..c093620e --- /dev/null +++ b/packages/queryLanguage/src/parser.terms.ts @@ -0,0 +1,22 @@ +// This file was generated by lezer-generator. You probably shouldn't edit it. +export const + negate = 23, + Program = 1, + OrExpr = 2, + AndExpr = 3, + NegateExpr = 4, + PrefixExpr = 5, + ArchivedExpr = 6, + RevisionExpr = 7, + ContentExpr = 8, + ContextExpr = 9, + FileExpr = 10, + ForkExpr = 11, + VisibilityExpr = 12, + RepoExpr = 13, + LangExpr = 14, + SymExpr = 15, + RepoSetExpr = 16, + ParenExpr = 17, + QuotedTerm = 18, + Term = 19 diff --git a/packages/queryLanguage/src/parser.ts b/packages/queryLanguage/src/parser.ts new file mode 100644 index 00000000..a254524c --- /dev/null +++ b/packages/queryLanguage/src/parser.ts @@ -0,0 +1,18 @@ +// This file was generated by lezer-generator. You probably shouldn't edit it. +import {LRParser} from "@lezer/lr" +import {negateToken} from "./tokens" +export const parser = LRParser.deserialize({ + version: 14, + states: "'hOVQROOO!WQQO'#CcO!WQQO'#CdO!WQQO'#CeO!WQQO'#CfO!`QSO'#CgO!kQSO'#ChO!WQQO'#CiO!WQQO'#CjO!WQQO'#CkO!WQQO'#ClOOQP'#Ca'#CaO!vQRO'#CmO!}QQO'#C`OOQP'#Cn'#CnOOQP'#Co'#CoOOQP'#Cx'#CxO#uQRO'#CwO$SQQO'#CwO$_QQO'#C^OOQO'#Cv'#CvQOQQOOO!`QSO'#CbOOQP'#DO'#DOOOQP,58},58}OOQP,59O,59OOOQP,59P,59POOQP,59Q,59QOOQP'#DV'#DVOOQP,59R,59ROOQP'#DX'#DXOOQP,59S,59SOOQP,59T,59TOOQP,59U,59UOOQP,59V,59VOOQP,59W,59WOOQP,59X,59XO$dQQO,59XOOQP,58z,58zOOQP'#Cp'#CpO$iQRO,58yOVQRO'#CqO$vQQO,58xOOQP,58|,58|OOQP1G.s1G.sOOQP-E6n-E6nO%RQRO'#CwOOQO'#Cw'#CwOOQO,59],59]OOQO-E6o-E6o", + stateData: "%p~OiOS~Og]OmfOqPOs^Ot_OuQOvROwSOxTOzUO!PVO!QWO!RXO!SYO!T[O~OsgOtgO~OnlOolOplO~O|nO}nO!OnO~O!UtO~PVOmfOqPOuQOvROwSOxTOzUO!PVO!QWO!RXO!SYO!T[O~OfjX!VkX!UjX~PVOfjX!VkX!UjX~O!VyO~O!U|O~OfRa!VRa!URa~PVO!VyOfQa!UQa~OfkX!VkX!UkX~PVOsmquvwxz!P!Q!R!S!Vtz~", + goto: "$a|PP}!R!Y!b!m!m!m!m!m!m!m!m!m!m!m!b!Y!Y!v!}PPPP#T#Z#bPPPPP#nPPPPPP$WP$^TdO[SbO[R!Py]`O[axy!O[`O[axy!ORv]_ZO[]axy!OSxa!OR}xQzcR!RzQeORu[ScO[R!QySaO[Uwax!OR!OyQhPQiQQjRQkSQpVQqWQrXRsYQmTR{fRoU", + nodeNames: "⚠ Program OrExpr AndExpr NegateExpr PrefixExpr ArchivedExpr RevisionExpr ContentExpr ContextExpr FileExpr ForkExpr VisibilityExpr RepoExpr LangExpr SymExpr RepoSetExpr ParenExpr QuotedTerm Term", + maxTerm: 53, + skippedNodes: [0], + repeatNodeCount: 2, + tokenData: "!Fj~RpOX#VXY$QYZ$QZp#Vpq$Qqr#Vrs$`sx#Vxy)zyz*Pz#T#V#T#U*U#U#V#V#V#W3s#W#Y#V#Y#Z=[#Z#`#V#`#aD}#a#b#V#b#cIY#c#dJz#d#e!!}#e#f#V#f#g!-m#g#h!7Q#h#j#V#j#k!:b#k#m#V#m#n!C}#n;'S#V;'S;=`#z<%lO#VP#[YtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#VP#}P;=`<%l#V~$VRi~XY$QYZ$Qpq$Q~$eatPOX$`XY%jZp$`pq%jqr$`rs'^sx$`xz%jz}$`}!O$`!O![$`![!]$`!]#O$`#O#P(T#P;'S$`;'S;=`)t<%lO$`~%mWOY%jZr%jrs&Vs#O%j#O#P&[#P;'S%j;'S;=`'W<%lO%j~&[Os~~&_RO;'S%j;'S;=`&h;=`O%j~&kXOY%jZr%jrs&Vs#O%j#O#P&[#P;'S%j;'S;=`'W;=`<%l%j<%lO%j~'ZP;=`<%l%j~'eYs~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~(Y^tPOX$`XZ%jZp$`pq%jqx$`xz%jz}$`}!O$`!O![$`![!]$`!];'S$`;'S;=`)U;=`<%l%j<%lO$`~)XXOY%jZr%jrs&Vs#O%j#O#P&[#P;'S%j;'S;=`'W;=`<%l$`<%lO%j~)wP;=`<%l$`~*PO!T~~*UO!U~~*Z^tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#b#V#b#c+V#c#f#V#f#g,w#g;'S#V;'S;=`#z<%lO#VR+[[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#m#V#m#n,Q#n;'S#V;'S;=`#z<%lO#VR,XY!OQtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~,|[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#V#V#V#W-r#W;'S#V;'S;=`#z<%lO#V~-w[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#[#V#[#].m#];'S#V;'S;=`#z<%lO#V~.r[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#]#V#]#^/h#^;'S#V;'S;=`#z<%lO#V~/m[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#j#V#j#k0c#k;'S#V;'S;=`#z<%lO#V~0h[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#X#V#X#Y1^#Y;'S#V;'S;=`#z<%lO#V~1c[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#W#V#W#X2X#X;'S#V;'S;=`#z<%lO#V~2^YtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]2|!];'S#V;'S;=`#z<%lO#V~3TYm~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~3x[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]4n!]#c#V#c#d5e#d;'S#V;'S;=`#z<%lO#V~4uYu~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~5j[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#b#V#b#c6`#c;'S#V;'S;=`#z<%lO#V~6e[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#h#V#h#i7Z#i;'S#V;'S;=`#z<%lO#V~7`[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#X#V#X#Y8U#Y;'S#V;'S;=`#z<%lO#V~8Z^tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#b#V#b#c9V#c#l#V#l#m:u#m;'S#V;'S;=`#z<%lO#V~9[[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#h#V#h#i:Q#i;'S#V;'S;=`#z<%lO#V~:VYtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]4n!];'S#V;'S;=`#z<%lO#V~:z[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#h#V#h#i;p#i;'S#V;'S;=`#z<%lO#V~;uYtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]]!]#]#V#]#^?S#^#c#V#c#dAm#d;'S#V;'S;=`#z<%lO#V~>dYw~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~?X[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#`#V#`#a?}#a;'S#V;'S;=`#z<%lO#V~@S[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#X#V#X#Y@x#Y;'S#V;'S;=`#z<%lO#V~@}YtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]>]!];'S#V;'S;=`#z<%lO#V~Ar[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#f#V#f#gBh#g;'S#V;'S;=`#z<%lO#V~Bm[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#_#V#_#`Cc#`;'S#V;'S;=`#z<%lO#V~ChYtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]DW!];'S#V;'S;=`#z<%lO#V~D_Yx~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~ES[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#T#V#T#UEx#U;'S#V;'S;=`#z<%lO#V~E}[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#b#V#b#cFs#c;'S#V;'S;=`#z<%lO#V~Fx[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#Z#V#Z#[Gn#[;'S#V;'S;=`#z<%lO#V~GsYtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]Hc!];'S#V;'S;=`#z<%lO#V~HjY!Q~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#VRI_[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#c#V#c#dJT#d;'S#V;'S;=`#z<%lO#VRJ[YoQtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~KP^tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#b#V#b#cK{#c#f#V#f#gNh#g;'S#V;'S;=`#z<%lO#VRLQ[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#`#V#`#aLv#a;'S#V;'S;=`#z<%lO#VRL{[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#m#V#m#nMq#n;'S#V;'S;=`#z<%lO#VRMxYpQtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~NmdtPOX! {XZ!!rZp! {pq!!rqx! {xz!!rz}! {}!O! {!O!Q! {!Q![#V![!]! {!]!c! {!c!}#V!}#R! {#R#S#V#S#T! {#T#o#V#o;'S! {;'S;=`!!w<%lO! {~!!SY!V~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~!!wO!V~~!!zP;=`<%l! {R!#S^tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#f#V#f#g!$O#g#i#V#i#j!)[#j;'S#V;'S;=`#z<%lO#VR!$T[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#]#V#]#^!$y#^;'S#V;'S;=`#z<%lO#VR!%O[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#j#V#j#k!%t#k;'S#V;'S;=`#z<%lO#VR!%y[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#T#V#T#U!&o#U;'S#V;'S;=`#z<%lO#VR!&t[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#h#V#h#i!'j#i;'S#V;'S;=`#z<%lO#VR!'o[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#X#V#X#Y!(e#Y;'S#V;'S;=`#z<%lO#VR!(lY}QtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#VR!)a[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#U#V#U#V!*V#V;'S#V;'S;=`#z<%lO#VR!*[[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#`#V#`#a!+Q#a;'S#V;'S;=`#z<%lO#VR!+V[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#]#V#]#^!+{#^;'S#V;'S;=`#z<%lO#VR!,Q[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#V#V#V#W!,v#W;'S#V;'S;=`#z<%lO#VR!,}Y|QtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~!-r[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]!.h!]#X#V#X#Y!/_#Y;'S#V;'S;=`#z<%lO#V~!.oY!P~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~!/d^tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#d#V#d#e!0`#e#j#V#j#k!5f#k;'S#V;'S;=`#z<%lO#V~!0e[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#c#V#c#d!1Z#d;'S#V;'S;=`#z<%lO#V~!1`[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]!.h!]#g#V#g#h!2U#h;'S#V;'S;=`#z<%lO#V~!2Z[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#X#V#X#Y!3P#Y;'S#V;'S;=`#z<%lO#V~!3U[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#h#V#h#i!3z#i;'S#V;'S;=`#z<%lO#V~!4PYtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]!4o!];'S#V;'S;=`#z<%lO#V~!4vY!S~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~!5kYtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]!6Z!];'S#V;'S;=`#z<%lO#V~!6bYq~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~!7V[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#m#V#m#n!7{#n;'S#V;'S;=`#z<%lO#V~!8Q[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#a#V#a#b!8v#b;'S#V;'S;=`#z<%lO#V~!8{YtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]!9k!];'S#V;'S;=`#z<%lO#V~!9rY!R~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V~!:g[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#]#V#]#^!;]#^;'S#V;'S;=`#z<%lO#V~!;b[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#g#V#g#h!R[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#]#V#]#^!>w#^;'S#V;'S;=`#z<%lO#V~!>|[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#`#V#`#a!?r#a;'S#V;'S;=`#z<%lO#V~!?w[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#]#V#]#^!@m#^;'S#V;'S;=`#z<%lO#V~!@r[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#h#V#h#i!Ah#i;'S#V;'S;=`#z<%lO#V~!Am[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#m#V#m#n!Bc#n;'S#V;'S;=`#z<%lO#V~!BhYtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]!CW!];'S#V;'S;=`#z<%lO#V~!C_Yz~tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#VR!DS[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#X#V#X#Y!Dx#Y;'S#V;'S;=`#z<%lO#VR!D}[tPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!]#g#V#g#h!Es#h;'S#V;'S;=`#z<%lO#VR!EzYnQtPOX#VZp#Vqx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#z<%lO#V", + tokenizers: [negateToken, 0, 1], + topRules: {"Program":[0,1]}, + tokenPrec: 200, + termNames: {"0":"⚠","1":"@top","2":"OrExpr","3":"AndExpr","4":"NegateExpr","5":"PrefixExpr","6":"ArchivedExpr","7":"RevisionExpr","8":"ContentExpr","9":"ContextExpr","10":"FileExpr","11":"ForkExpr","12":"VisibilityExpr","13":"RepoExpr","14":"LangExpr","15":"SymExpr","16":"RepoSetExpr","17":"ParenExpr","18":"QuotedTerm","19":"Term","20":"expr+","21":"(or andExpr)+","22":"␄","23":"negate","24":"%mainskip","25":"space","26":"query","27":"andExpr","28":"expr","29":"archivedKw","30":"\"yes\"","31":"\"no\"","32":"\"only\"","33":"revisionKw","34":"value","35":"quotedString","36":"word","37":"contentKw","38":"contextKw","39":"fileKw","40":"forkKw","41":"forkValue","42":"visibilityKw","43":"visibilityValue","44":"\"public\"","45":"\"private\"","46":"\"any\"","47":"repoKw","48":"langKw","49":"symKw","50":"reposetKw","51":"\"(\"","52":"\")\"","53":"or"} +}) diff --git a/packages/queryLanguage/src/query.grammar b/packages/queryLanguage/src/query.grammar new file mode 100644 index 00000000..f42b21b4 --- /dev/null +++ b/packages/queryLanguage/src/query.grammar @@ -0,0 +1,102 @@ +@external tokens negateToken from "./tokens" { negate } + +@top Program { query } + +@precedence { + negate, + and, + or @left +} + +query { + OrExpr | + AndExpr | + expr +} + +OrExpr { andExpr (or andExpr)+ } + +AndExpr { expr expr+ } + +andExpr { AndExpr | expr } + +expr { + NegateExpr | + ParenExpr | + PrefixExpr | + QuotedTerm | + Term +} + +NegateExpr { !negate negate (PrefixExpr | ParenExpr) } + +ParenExpr { "(" query? ")" } + +PrefixExpr { + ArchivedExpr | + RevisionExpr | + ContentExpr | + ContextExpr | + FileExpr | + ForkExpr | + VisibilityExpr | + RepoExpr | + LangExpr | + SymExpr | + RepoSetExpr +} + +RevisionExpr { revisionKw value } +ContentExpr { contentKw value } +ContextExpr { contextKw value } +FileExpr { fileKw value } +RepoExpr { repoKw value } +LangExpr { langKw value } +SymExpr { symKw value } +RepoSetExpr { reposetKw value } + +// Modifiers +ArchivedExpr { archivedKw archivedValue } +ForkExpr { forkKw forkValue } +VisibilityExpr { visibilityKw visibilityValue } + +archivedValue { "yes" | "no" | "only" } +forkValue { "yes" | "no" | "only" } +visibilityValue { "public" | "private" | "any" } + +QuotedTerm { quotedString } +Term { word } + +value { quotedString | word } + +@skip { space } + +@tokens { + archivedKw { "archived:" } + revisionKw { "rev:" } + contentKw { "content:" | "c:" } + contextKw { "context:" } + fileKw { "file:" | "f:" } + forkKw { "fork:" } + visibilityKw { "visibility:" } + repoKw { "repo:" | "r:" } + langKw { "lang:" } + symKw { "sym:" } + reposetKw { "reposet:" } + + or { "or" ![a-zA-Z0-9_] } + + quotedString { '"' (!["\\\n] | "\\" _)* '"' } + + word { (![ \t\n()]) (![ \t\n():] | ":" | "-")* } + + space { $[ \t\n]+ } + + @precedence { + quotedString, + archivedKw, revisionKw, contentKw, contextKw, fileKw, + forkKw, visibilityKw, repoKw, langKw, + symKw, reposetKw, or, + word + } +} \ No newline at end of file diff --git a/packages/queryLanguage/src/tokens.ts b/packages/queryLanguage/src/tokens.ts new file mode 100644 index 00000000..15a02525 --- /dev/null +++ b/packages/queryLanguage/src/tokens.ts @@ -0,0 +1,59 @@ +import { ExternalTokenizer } from "@lezer/lr"; +import { negate } from "./parser.terms"; + +// External tokenizer for negation +// Only tokenizes `-` as negate when followed by a prefix keyword or `(` +export const negateToken = new ExternalTokenizer((input) => { + if (input.next !== 45 /* '-' */) return; // Not a dash + + const startPos = input.pos; + + // Look ahead to see what follows the dash + input.advance(); + + // Skip whitespace + let ch = input.next; + while (ch === 32 || ch === 9 || ch === 10) { + input.advance(); + ch = input.next; + } + + // Check if followed by opening paren + if (ch === 40 /* '(' */) { + input.acceptToken(negate, -input.pos + startPos + 1); // Accept just the dash + return; + } + + // Check if followed by a prefix keyword (by checking for keyword followed by colon) + // Look ahead until we hit a delimiter or colon + const checkPos = input.pos; + let foundColon = false; + + // Look ahead until we hit a delimiter or colon + while (ch >= 0) { + if (ch === 58 /* ':' */) { + foundColon = true; + break; + } + // Hit a delimiter (whitespace, paren, or quote) - not a prefix keyword + if (ch === 32 || ch === 9 || ch === 10 || ch === 40 || ch === 41 || ch === 34) { + break; + } + input.advance(); + ch = input.next; + } + + // Reset position + while (input.pos > checkPos) { + input.advance(-1); + } + + if (foundColon) { + // It's a prefix keyword, accept as negate + input.acceptToken(negate, -input.pos + startPos + 1); + return; + } + + // Otherwise, don't tokenize as negate (let word handle it) +}); + diff --git a/packages/queryLanguage/test/basic.txt b/packages/queryLanguage/test/basic.txt new file mode 100644 index 00000000..de8bb93b --- /dev/null +++ b/packages/queryLanguage/test/basic.txt @@ -0,0 +1,72 @@ +# Single term + +hello + +==> + +Program(Term) + +# Multiple terms + +hello world + +==> + +Program(AndExpr(Term,Term)) + +# Multiple terms with various characters + +console.log error_handler + +==> + +Program(AndExpr(Term,Term)) + +# Term with underscores + +my_variable_name + +==> + +Program(Term) + +# Term with dots + +com.example.package + +==> + +Program(Term) + +# Term with numbers + +func123 test_456 + +==> + +Program(AndExpr(Term,Term)) + +# Regex pattern + +[a-z]+ + +==> + +Program(Term) + +# Wildcard pattern + +test.* + +==> + +Program(Term) + +# Multiple regex patterns + +\w+ [0-9]+ \s* + +==> + +Program(AndExpr(Term,Term,Term)) + diff --git a/packages/queryLanguage/test/grammar.test.ts b/packages/queryLanguage/test/grammar.test.ts new file mode 100644 index 00000000..a0286285 --- /dev/null +++ b/packages/queryLanguage/test/grammar.test.ts @@ -0,0 +1,21 @@ +import { parser } from "../src/parser"; +import { fileTests } from "@lezer/generator/dist/test"; +import { describe, it } from "vitest"; +import { fileURLToPath } from "url" +import * as fs from "fs"; +import * as path from "path"; + +const caseDir = path.dirname(fileURLToPath(import.meta.url)) + +for (const file of fs.readdirSync(caseDir)) { + if (!/\.txt$/.test(file)) { + continue; + } + + let name = /^[^\.]*/.exec(file)?.[0]; + describe(name ?? "unknown", () => { + for (const { name, run } of fileTests(fs.readFileSync(path.join(caseDir, file), "utf8"), file)) { + it(name, () => run(parser)); + } + }); +} \ No newline at end of file diff --git a/packages/queryLanguage/test/grouping.txt b/packages/queryLanguage/test/grouping.txt new file mode 100644 index 00000000..1c1c084b --- /dev/null +++ b/packages/queryLanguage/test/grouping.txt @@ -0,0 +1,120 @@ +# Empty parentheses + +() + +==> + +Program(ParenExpr) + +# Simple grouping + +(test) + +==> + +Program(ParenExpr(Term)) + +# Multiple terms in group + +(hello world) + +==> + +Program(ParenExpr(AndExpr(Term,Term))) + +# Nested parentheses + +((test)) + +==> + +Program(ParenExpr(ParenExpr(Term))) + +# Multiple groups + +(first) (second) + +==> + +Program(AndExpr(ParenExpr(Term),ParenExpr(Term))) + +# Group with multiple terms + +(one two three) + +==> + +Program(ParenExpr(AndExpr(Term,Term,Term))) + +# Mixed grouped and ungrouped + +test (grouped) another + +==> + +Program(AndExpr(Term,ParenExpr(Term),Term)) + +# Deeply nested + +(((nested))) + +==> + +Program(ParenExpr(ParenExpr(ParenExpr(Term)))) + +# Multiple nested groups + +((a b) (c d)) + +==> + +Program(ParenExpr(AndExpr(ParenExpr(AndExpr(Term,Term)),ParenExpr(AndExpr(Term,Term))))) + +# Group at start + +(start) middle end + +==> + +Program(AndExpr(ParenExpr(Term),Term,Term)) + +# Group at end + +start middle (end) + +==> + +Program(AndExpr(Term,Term,ParenExpr(Term))) + +# Complex grouping pattern + +(a (b c) d) + +==> + +Program(ParenExpr(AndExpr(Term,ParenExpr(AndExpr(Term,Term)),Term))) + +# Sequential groups + +(a)(b)(c) + +==> + +Program(AndExpr(ParenExpr(Term),ParenExpr(Term),ParenExpr(Term))) + +# Group with regex + +([a-z]+) + +==> + +Program(ParenExpr(Term)) + +# Group with dots + +(com.example.test) + +==> + +Program(ParenExpr(Term)) + diff --git a/packages/queryLanguage/test/negation.txt b/packages/queryLanguage/test/negation.txt new file mode 100644 index 00000000..c229324c --- /dev/null +++ b/packages/queryLanguage/test/negation.txt @@ -0,0 +1,255 @@ +# Literal dash term + +-test + +==> + +Program(Term) + +# Quoted dash term + +"-excluded" + +==> + +Program(QuotedTerm) + +# Dash in middle + +test-case + +==> + +Program(Term) + +# Multiple dash terms + +-one -two -three + +==> + +Program(AndExpr(Term,Term,Term)) + +# Negate file prefix + +-file:test.js + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) + +# Negate repo prefix + +-repo:archived + +==> + +Program(NegateExpr(PrefixExpr(RepoExpr))) + +# Negate lang prefix + +-lang:python + +==> + +Program(NegateExpr(PrefixExpr(LangExpr))) + +# Negate content prefix + +-content:TODO + +==> + +Program(NegateExpr(PrefixExpr(ContentExpr))) + +# Negate revision prefix + +-rev:develop + +==> + +Program(NegateExpr(PrefixExpr(RevisionExpr))) + +# Negate archived prefix + +-archived:yes + +==> + +Program(NegateExpr(PrefixExpr(ArchivedExpr))) + +# Negate fork prefix + +-fork:yes + +==> + +Program(NegateExpr(PrefixExpr(ForkExpr))) + +# Negate visibility prefix + +-visibility:any + +==> + +Program(NegateExpr(PrefixExpr(VisibilityExpr))) + +# Negate context prefix + +-context:backend + +==> + +Program(NegateExpr(PrefixExpr(ContextExpr))) + +# Negate symbol prefix + +-sym:OldClass + +==> + +Program(NegateExpr(PrefixExpr(SymExpr))) + +# Negate parentheses + +-(test) + +==> + +Program(NegateExpr(ParenExpr(Term))) + +# Negate group with multiple terms + +-(test exclude) + +==> + +Program(NegateExpr(ParenExpr(AndExpr(Term,Term)))) + +# Negate group with prefix + +-(file:test.js console.log) + +==> + +Program(NegateExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),Term)))) + +# Prefix with negated term + +file:test.js -console + +==> + +Program(AndExpr(PrefixExpr(FileExpr),Term)) + +# Multiple prefixes with negation + +file:test.js -lang:python + +==> + +Program(AndExpr(PrefixExpr(FileExpr),NegateExpr(PrefixExpr(LangExpr)))) + +# Complex negation pattern + +function -file:test.js -lang:java + +==> + +Program(AndExpr(Term,NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(LangExpr)))) + +# Negation inside parentheses + +(-file:test.js) + +==> + +Program(ParenExpr(NegateExpr(PrefixExpr(FileExpr)))) + +# Multiple negations in group + +(-file:a.js -lang:python) + +==> + +Program(ParenExpr(AndExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(LangExpr))))) + +# Mixed in parentheses + +(include -file:test.js) + +==> + +Program(ParenExpr(AndExpr(Term,NegateExpr(PrefixExpr(FileExpr))))) + +# Negate nested group + +-((file:test.js)) + +==> + +Program(NegateExpr(ParenExpr(ParenExpr(PrefixExpr(FileExpr))))) + +# Negate short form prefix + +-f:test.js + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) + +# Negate short form repo + +-r:myrepo + +==> + +Program(NegateExpr(PrefixExpr(RepoExpr))) + +# Negate short form content + +-c:console + +==> + +Program(NegateExpr(PrefixExpr(ContentExpr))) + +# Negate with prefix in quotes + +-file:"test file.js" + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) + +# Complex with multiple negated prefixes + +lang:typescript -file:*.test.ts -file:*.spec.ts + +==> + +Program(AndExpr(PrefixExpr(LangExpr),NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr)))) + +# Negated group with prefix + +-(file:test.js lang:python) + +==> + +Program(NegateExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))))) + +# Negate empty group + +-() + +==> + +Program(NegateExpr(ParenExpr)) + +# Negate with space after dash + +- file:test.js + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) diff --git a/packages/queryLanguage/test/operators.txt b/packages/queryLanguage/test/operators.txt new file mode 100644 index 00000000..d3c7061a --- /dev/null +++ b/packages/queryLanguage/test/operators.txt @@ -0,0 +1,271 @@ +# Simple OR + +test or example + +==> + +Program(OrExpr(Term,Term)) + +# Multiple OR + +one or two or three + +==> + +Program(OrExpr(Term,Term,Term)) + +# OR with prefixes + +file:test.js or file:example.js + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(FileExpr))) + +# OR with negation + +test or -file:excluded.js + +==> + +Program(OrExpr(Term,NegateExpr(PrefixExpr(FileExpr)))) + +# OR with quoted strings + +"first option" or "second option" + +==> + +Program(OrExpr(QuotedTerm,QuotedTerm)) + +# OR with different prefixes + +lang:python or lang:javascript + +==> + +Program(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))) + +# Multiple terms with OR + +function test or class example + +==> + +Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term))) + +# OR in parentheses + +(test or example) + +==> + +Program(ParenExpr(OrExpr(Term,Term))) + +# OR with parentheses outside + +(test) or (example) + +==> + +Program(OrExpr(ParenExpr(Term),ParenExpr(Term))) + +# Complex OR with grouping + +(file:*.js lang:javascript) or (file:*.ts lang:typescript) + +==> + +Program(OrExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))),ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))))) + +# OR with mixed content + +test or file:example.js + +==> + +Program(OrExpr(Term,PrefixExpr(FileExpr))) + +# Prefix OR term + +file:test.js or example + +==> + +Program(OrExpr(PrefixExpr(FileExpr),Term)) + +# OR with short form prefixes + +f:test.js or r:myrepo + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr))) + +# OR with repo prefixes + +repo:project1 or repo:project2 + +==> + +Program(OrExpr(PrefixExpr(RepoExpr),PrefixExpr(RepoExpr))) + +# OR with revision prefixes + +rev:main or rev:develop + +==> + +Program(OrExpr(PrefixExpr(RevisionExpr),PrefixExpr(RevisionExpr))) + +# OR with lang prefixes + +lang:rust or lang:go + +==> + +Program(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))) + +# OR with content + +content:TODO or content:FIXME + +==> + +Program(OrExpr(PrefixExpr(ContentExpr),PrefixExpr(ContentExpr))) + +# OR with negated terms + +-file:test.js or -file:spec.js + +==> + +Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr)))) + +# OR in nested parentheses + +((a or b) or (c or d)) + +==> + +Program(ParenExpr(OrExpr(ParenExpr(OrExpr(Term,Term)),ParenExpr(OrExpr(Term,Term))))) + +# Multiple OR with parentheses and implicit AND + +(a or b) and (c or d) + +==> + +Program(AndExpr(ParenExpr(OrExpr(Term,Term)),Term,ParenExpr(OrExpr(Term,Term)))) + +# OR with wildcards + +*.test.js or *.spec.js + +==> + +Program(OrExpr(Term,Term)) + +# OR with regex patterns + +[a-z]+ or [0-9]+ + +==> + +Program(OrExpr(Term,Term)) + +# OR with dots + +com.example.test or org.example.test + +==> + +Program(OrExpr(Term,Term)) + +# OR with dashes + +test-one or test-two + +==> + +Program(OrExpr(Term,Term)) + +# Word containing 'or' + +order + +==> + +Program(Term) + +# Word containing 'or' in middle + +before + +==> + +Program(Term) + +# OR at start + +or test + +==> + +Program(⚠,Term) + +# OR at end (or becomes term) + +test or + +==> + +Program(AndExpr(Term,Term)) + +# Multiple consecutive OR + +test or or example + +==> + +Program(OrExpr(Term,⚠,Term)) + +# OR with all prefix types + +file:*.js or repo:myrepo or lang:javascript + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr),PrefixExpr(LangExpr))) + +# Complex query with OR and negation + +(lang:python or lang:ruby) -file:test.py + +==> + +Program(AndExpr(ParenExpr(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))),NegateExpr(PrefixExpr(FileExpr)))) + +# OR with quoted prefix values + +file:"test one.js" or file:"test two.js" + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(FileExpr))) + +# OR with empty parentheses + +() or () + +==> + +Program(OrExpr(ParenExpr,ParenExpr)) + +# OR with negated groups + +-(file:a.js) or -(file:b.js) + +==> + +Program(OrExpr(NegateExpr(ParenExpr(PrefixExpr(FileExpr))),NegateExpr(ParenExpr(PrefixExpr(FileExpr))))) diff --git a/packages/queryLanguage/test/precedence.txt b/packages/queryLanguage/test/precedence.txt new file mode 100644 index 00000000..f25d3db9 --- /dev/null +++ b/packages/queryLanguage/test/precedence.txt @@ -0,0 +1,200 @@ +# OR has lowest precedence - implicit AND groups first + +a b or c d + +==> + +Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term))) + +# Multiple OR operators are left-associative + +a or b or c + +==> + +Program(OrExpr(Term,Term,Term)) + +# AND before OR + +file:test.js error or file:test.go panic + +==> + +Program(OrExpr(AndExpr(PrefixExpr(FileExpr),Term),AndExpr(PrefixExpr(FileExpr),Term))) + +# Negation binds tighter than AND + +-file:test.js error + +==> + +Program(AndExpr(NegateExpr(PrefixExpr(FileExpr)),Term)) + +# Negation binds tighter than OR + +-file:a.js or file:b.js + +==> + +Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),PrefixExpr(FileExpr))) + +# Parentheses override precedence + +(a or b) c + +==> + +Program(AndExpr(ParenExpr(OrExpr(Term,Term)),Term)) + +# Parentheses override - OR inside parens groups first + +a (b or c) + +==> + +Program(AndExpr(Term,ParenExpr(OrExpr(Term,Term)))) + +# Complex: AND, OR, and negation + +a -b or c d + +==> + +Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term))) + +# Negated group in OR expression + +-(a b) or c + +==> + +Program(OrExpr(NegateExpr(ParenExpr(AndExpr(Term,Term))),Term)) + +# Multiple negations in OR + +-file:a.js or -file:b.js or file:c.js + +==> + +Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr)),PrefixExpr(FileExpr))) + +# Prefix binds to its value only + +file:a.js b.js + +==> + +Program(AndExpr(PrefixExpr(FileExpr),Term)) + +# OR with prefixes and terms mixed + +repo:backend error or repo:frontend warning + +==> + +Program(OrExpr(AndExpr(PrefixExpr(RepoExpr),Term),AndExpr(PrefixExpr(RepoExpr),Term))) + +# Nested parentheses with OR + +((a or b) c) or d + +==> + +Program(OrExpr(ParenExpr(AndExpr(ParenExpr(OrExpr(Term,Term)),Term)),Term)) + +# OR at different nesting levels + +(a or (b or c)) + +==> + +Program(ParenExpr(OrExpr(Term,ParenExpr(OrExpr(Term,Term))))) + +# Implicit AND groups all adjacent terms before OR + +a b c or d e f + +==> + +Program(OrExpr(AndExpr(Term,Term,Term),AndExpr(Term,Term,Term))) + +# Mixed prefix and regular terms with OR + +lang:go func or lang:rust fn + +==> + +Program(OrExpr(AndExpr(PrefixExpr(LangExpr),Term),AndExpr(PrefixExpr(LangExpr),Term))) + +# Negation doesn't affect OR grouping + +a or -b or c + +==> + +Program(OrExpr(Term,Term,Term)) + +# Parentheses can isolate OR from surrounding AND + +a (b or c) d + +==> + +Program(AndExpr(Term,ParenExpr(OrExpr(Term,Term)),Term)) + +# Multiple parenthesized groups with AND + +(a or b) (c or d) + +==> + +Program(AndExpr(ParenExpr(OrExpr(Term,Term)),ParenExpr(OrExpr(Term,Term)))) + +# Quoted strings are atomic - no precedence inside + +"a or b" + +==> + +Program(QuotedTerm) + +# Prefix with OR value doesn't split + +file:"a.js or b.js" + +==> + +Program(PrefixExpr(FileExpr)) + +# Negated prefix in complex expression + +-file:test.js lang:go error or warning + +==> + +Program(OrExpr(AndExpr(NegateExpr(PrefixExpr(FileExpr)),PrefixExpr(LangExpr),Term),Term)) + +# OR followed by parenthesized AND + +a or (b c) + +==> + +Program(OrExpr(Term,ParenExpr(AndExpr(Term,Term)))) + +# Empty parens don't affect precedence + +() or a b + +==> + +Program(OrExpr(ParenExpr,AndExpr(Term,Term))) + +# Negation of empty group + +-() a + +==> + +Program(AndExpr(NegateExpr(ParenExpr),Term)) + diff --git a/packages/queryLanguage/test/prefixes.txt b/packages/queryLanguage/test/prefixes.txt new file mode 100644 index 00000000..00533ec0 --- /dev/null +++ b/packages/queryLanguage/test/prefixes.txt @@ -0,0 +1,336 @@ +# File prefix + +file:README.md + +==> + +Program(PrefixExpr(FileExpr)) + +# File prefix short form + +f:index.ts + +==> + +Program(PrefixExpr(FileExpr)) + +# Repo prefix + +repo:myproject + +==> + +Program(PrefixExpr(RepoExpr)) + +# Repo prefix short form + +r:github.com/user/repo + +==> + +Program(PrefixExpr(RepoExpr)) + +# Content prefix + +content:function + +==> + +Program(PrefixExpr(ContentExpr)) + +# Content prefix short form + +c:console.log + +==> + +Program(PrefixExpr(ContentExpr)) + +# Revision prefix + +rev:main + +==> + +Program(PrefixExpr(RevisionExpr)) + +# Lang prefix + +lang:typescript + +==> + +Program(PrefixExpr(LangExpr)) + +# Archived prefix - no + +archived:no + +==> + +Program(PrefixExpr(ArchivedExpr)) + +# Archived prefix - only + +archived:only + +==> + +Program(PrefixExpr(ArchivedExpr)) + +# Fork prefix - yes + +fork:yes + +==> + +Program(PrefixExpr(ForkExpr)) + +# Fork prefix - only + +fork:only + +==> + +Program(PrefixExpr(ForkExpr)) + +# Visibility prefix - public + +visibility:public + +==> + +Program(PrefixExpr(VisibilityExpr)) + +# Context prefix + +context:web + +==> + +Program(PrefixExpr(ContextExpr)) + +# Symbol prefix + +sym:MyClass + +==> + +Program(PrefixExpr(SymExpr)) + +# RepoSet prefix + +reposet:repo1,repo2 + +==> + +Program(PrefixExpr(RepoSetExpr)) + +# File with wildcard + +file:*.ts + +==> + +Program(PrefixExpr(FileExpr)) + +# File with path + +file:src/components/Button.tsx + +==> + +Program(PrefixExpr(FileExpr)) + +# Repo with full URL + +repo:github.com/org/project + +==> + +Program(PrefixExpr(RepoExpr)) + +# Multiple prefixes + +file:test.js repo:myproject + +==> + +Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr))) + +# Prefix with term + +file:test.js console.log + +==> + +Program(AndExpr(PrefixExpr(FileExpr),Term)) + +# Term then prefix + +console.log file:handler.ts + +==> + +Program(AndExpr(Term,PrefixExpr(FileExpr))) + +# Multiple prefixes and terms + +lang:typescript function file:handler.ts + +==> + +Program(AndExpr(PrefixExpr(LangExpr),Term,PrefixExpr(FileExpr))) + +# Prefix with regex pattern + +file:[a-z]+\.test\.js + +==> + +Program(PrefixExpr(FileExpr)) + +# Content with spaces in value (no quotes) + +content:hello + +==> + +Program(PrefixExpr(ContentExpr)) + +# Revision with slashes + +rev:feature/new-feature + +==> + +Program(PrefixExpr(RevisionExpr)) + +# RepoSet with multiple repos + +reposet:repo1,repo2,repo3 + +==> + +Program(PrefixExpr(RepoSetExpr)) + +# Symbol with dots + +sym:package.Class.method + +==> + +Program(PrefixExpr(SymExpr)) + +# Lang with various languages + +lang:python + +==> + +Program(PrefixExpr(LangExpr)) + +# Archived prefix - yes + +archived:yes + +==> + +Program(PrefixExpr(ArchivedExpr)) + +# Archived prefix - invalid value (error case) + +archived:invalid + +==> + +Program(AndExpr(PrefixExpr(ArchivedExpr(⚠)),Term)) + +# Fork prefix - no + +fork:no + +==> + +Program(PrefixExpr(ForkExpr)) + +# Fork prefix - invalid value (error case) + +fork:invalid + +==> + +Program(AndExpr(PrefixExpr(ForkExpr(⚠)),Term)) + +# Visibility prefix - private + +visibility:private + +==> + +Program(PrefixExpr(VisibilityExpr)) + +# Visibility prefix - any + +visibility:any + +==> + +Program(PrefixExpr(VisibilityExpr)) + +# Visibility prefix - invalid value (error case) + +visibility:invalid + +==> + +Program(AndExpr(PrefixExpr(VisibilityExpr(⚠)),Term)) + +# File with dashes + +file:my-component.tsx + +==> + +Program(PrefixExpr(FileExpr)) + +# Repo with numbers + +repo:project123 + +==> + +Program(PrefixExpr(RepoExpr)) + +# Content with special chars + +content:@Component + +==> + +Program(PrefixExpr(ContentExpr)) + +# Context with underscores + +context:data_engineering + +==> + +Program(PrefixExpr(ContextExpr)) + +# Prefix in parentheses + +(file:test.js) + +==> + +Program(ParenExpr(PrefixExpr(FileExpr))) + +# Multiple prefixes in group + +(file:*.ts lang:typescript) + +==> + +Program(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr)))) + diff --git a/packages/queryLanguage/test/quoted.txt b/packages/queryLanguage/test/quoted.txt new file mode 100644 index 00000000..b918ea04 --- /dev/null +++ b/packages/queryLanguage/test/quoted.txt @@ -0,0 +1,503 @@ +# Simple quoted string + +"hello" + +==> + +Program(QuotedTerm) + +# Leading unclosed quote + +"hello + +==> + +Program(Term) + +# Trailing unclosed quote + +hello" + +==> + +Program(Term) + +# Quoted string with spaces + +"hello world" + +==> + +Program(QuotedTerm) + +# Multiple words in quotes + +"this is a search term" + +==> + +Program(QuotedTerm) + +# Quoted string with escaped quote + +"hello \"world\"" + +==> + +Program(QuotedTerm) + +# Quoted string with escaped backslash + +"path\\to\\file" + +==> + +Program(QuotedTerm) + +# Double backslash + +"test\\\\path" + +==> + +Program(QuotedTerm) + +# Multiple escaped quotes + +"\"quoted\" \"words\"" + +==> + +Program(QuotedTerm) + +# Mixed escaped characters + +"test\\nvalue\"quoted" + +==> + +Program(QuotedTerm) + +# Empty quoted string + +"" + +==> + +Program(QuotedTerm) + +# Quoted string with only spaces + +" " + +==> + +Program(QuotedTerm) + +# Quoted string in file prefix + +file:"my file.txt" + +==> + +Program(PrefixExpr(FileExpr)) + +# Quoted string in repo prefix + +repo:"github.com/user/repo name" + +==> + +Program(PrefixExpr(RepoExpr)) + +# Quoted string in content prefix + +content:"console.log" + +==> + +Program(PrefixExpr(ContentExpr)) + +# Quoted string in revision prefix + +rev:"feature/my feature" + +==> + +Program(PrefixExpr(RevisionExpr)) + +# Multiple quoted strings + +"first string" "second string" + +==> + +Program(AndExpr(QuotedTerm,QuotedTerm)) + +# Quoted and unquoted mixed + +unquoted "quoted string" another + +==> + +Program(AndExpr(Term,QuotedTerm,Term)) + +# Quoted string with parentheses inside + +"(test)" + +==> + +Program(QuotedTerm) + +# Quoted string with brackets + +"[a-z]+" + +==> + +Program(QuotedTerm) + +# Quoted string with special chars + +"test@example.com" + +==> + +Program(QuotedTerm) + +# Quoted string with colons + +"key:value" + +==> + +Program(QuotedTerm) + +# Quoted string with dashes + +"test-case-example" + +==> + +Program(QuotedTerm) + +# Quoted string with dots + +"com.example.package" + +==> + +Program(QuotedTerm) + +# Quoted string with regex pattern + +"\\w+\\s*=\\s*\\d+" + +==> + +Program(QuotedTerm) + +# Quoted string with forward slashes + +"path/to/file" + +==> + +Program(QuotedTerm) + +# Quoted string with underscores + +"my_variable_name" + +==> + +Program(QuotedTerm) + +# Quoted string with numbers + +"test123" + +==> + +Program(QuotedTerm) + +# Quoted string with mixed case + +"CamelCaseTest" + +==> + +Program(QuotedTerm) + +# Quoted prefix value with spaces + +file:"test file.js" + +==> + +Program(PrefixExpr(FileExpr)) + +# Multiple prefixes with quoted values + +file:"my file.txt" repo:"my repo" + +==> + +Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr))) + +# Quoted string in parentheses + +("quoted term") + +==> + +Program(ParenExpr(QuotedTerm)) + +# Multiple quoted in parentheses + +("first" "second") + +==> + +Program(ParenExpr(AndExpr(QuotedTerm,QuotedTerm))) + +# Quoted with escaped newline + +"line1\\nline2" + +==> + +Program(QuotedTerm) + +# Quoted with tab character + +"value\\ttab" + +==> + +Program(QuotedTerm) + +# Lang prefix with quoted value + +lang:"objective-c" + +==> + +Program(PrefixExpr(LangExpr)) + +# Sym prefix with quoted value + +sym:"My Class" + +==> + +Program(PrefixExpr(SymExpr)) + +# Content with quoted phrase + +content:"TODO: fix this" + +==> + +Program(PrefixExpr(ContentExpr)) + +# Quoted string with at symbol + +"@decorator" + +==> + +Program(QuotedTerm) + +# Quoted string with hash + +"#define" + +==> + +Program(QuotedTerm) + +# Quoted string with dollar sign + +"$variable" + +==> + +Program(QuotedTerm) + +# Quoted string with percent + +"100%" + +==> + +Program(QuotedTerm) + +# Quoted string with ampersand + +"foo&bar" + +==> + +Program(QuotedTerm) + +# Quoted string with asterisk + +"test*" + +==> + +Program(QuotedTerm) + +# Quoted string with plus + +"a+b" + +==> + +Program(QuotedTerm) + +# Quoted string with equals + +"a=b" + +==> + +Program(QuotedTerm) + +# Quoted string with angle brackets + +"