feat: Generic git host support (local & remote) (#307)

This commit is contained in:
Brendan Kellam 2025-05-15 13:42:58 -07:00 committed by GitHub
parent bbdd9e7903
commit 1aafc228cf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
62 changed files with 6259 additions and 508 deletions

View file

@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Added support for indexing generic git hosts given a remote clone url or local path. [#307](https://github.com/sourcebot-dev/sourcebot/pull/307)
## [3.2.0] - 2025-05-12
### Added

View file

@ -14,6 +14,9 @@ zoekt:
export CTAGS_COMMANDS=ctags
clean:
redis-cli FLUSHALL
yarn dev:prisma:migrate:reset
rm -rf \
bin \
node_modules \

View file

@ -38,11 +38,21 @@
"docs/connections/bitbucket-data-center",
"docs/connections/gitea",
"docs/connections/gerrit",
"docs/connections/generic-git-host",
"docs/connections/local-repos",
"docs/connections/request-new"
]
}
]
},
{
"group": "Search",
"pages": [
"docs/search/syntax-reference",
"docs/search/multi-branch-indexing",
"docs/search/search-contexts"
]
},
{
"group": "Agents",
"pages": [
@ -53,11 +63,8 @@
{
"group": "More",
"pages": [
"docs/more/syntax-reference",
"docs/more/multi-branch-indexing",
"docs/more/roles-and-permissions",
"docs/more/mcp-server",
"docs/more/search-contexts"
"docs/more/mcp-server"
]
}
]

View file

@ -0,0 +1,29 @@
---
title: Other Git hosts
---
import GenericGitHost from '/snippets/schemas/v3/genericGitHost.schema.mdx'
Sourcebot can sync code from any Git host (by clone url). This is helpful when you want to search code that not in a [supported code host](/docs/connections/overview#supported-code-hosts).
## Getting Started
To connect to a Git host, create a new [connection](/docs/connections/overview) with type `git` and specify the clone url in the `url` property. For example:
```json
{
"type": "git",
"url": "https://github.com/sourcebot-dev/sourcebot"
}
```
Note that only `http` & `https` URLs are supported at this time.
## Schema reference
<Accordion title="Reference">
[schemas/v3/genericGitHost.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/genericGitHost.json)
<GenericGitHost />
</Accordion>

View file

@ -0,0 +1,87 @@
---
title: Local Git repositories
---
import GenericGitHost from '/snippets/schemas/v3/genericGitHost.schema.mdx'
<Note>
This feature is only supported when [self-hosting](/self-hosting/overview).
</Note>
Sourcebot can sync code from generic git repositories stored in a local directory. This can be helpful in scenarios where you already have a large number of repos already checked out. Local repositories are treated as **read-only**, meaing Sourcebot will **not** `git fetch` new revisions.
## Getting Started
<Warning>
Only folders containing git repositories at their root **and** have a `remote.origin.url` set in their git config are supported at this time. All other folders will be skipped.
</Warning>
Let's assume we have a `repos` directory located at `$(PWD)` with a collection of git repositories:
```sh
repos/
├─ repo_1/
├─ repo_2/
├─ repo_3/
├─ ...
```
To get Sourcebot to index these repositories:
<Steps>
<Step title="Mount a volume">
We need to mount a docker volume to the `repos` directory so Sourcebot can read it's contents. Sourcebot will **not** write to local repositories, so we can mount a seperate **read-only** volume:
``` bash
docker run \
-v $(pwd)/repos:/repos:ro \
/* additional args */ \
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Step>
<Step title="Create a connection">
We can now create a new git [connection](/docs/connections/overview), specifying local paths with the `file://` prefix. Glob patterns are supported. For example:
```json
{
"type": "git",
"url": "file:///repos/*"
}
```
Sourcebot will expand this glob pattern into paths `/repos/repo_1`, `/repos/repo_2`, etc. and index all valid git repositories.
</Step>
</Steps>
## Examples
<AccordionGroup>
<Accordion title="Sync individual repo">
```json
{
"type": "git",
"url": "file:///path/to/git_repo"
}
```
</Accordion>
<Accordion title="Sync multiple repos using glob patterns">
```json
// Attempt to sync directories contained in `repos/` (non-recursive)
{
"type": "git",
"url": "file:///repos/*"
}
```
</Accordion>
</AccordionGroup>
## Schema reference
<Accordion title="Reference">
[schemas/v3/genericGitHost.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/genericGitHost.json)
<GenericGitHost />
</Accordion>

View file

@ -30,6 +30,8 @@ There are two ways to define connections:
<Card horizontal title="Bitbucket Data Center" icon="bitbucket" href="/docs/connections/bitbucket-data-center" />
<Card horizontal title="Gitea" href="/docs/connections/gitea" />
<Card horizontal title="Gerrit" href="/docs/connections/gerrit" />
<Card horizontal title="Other Git hosts" icon="git-alt" href="/docs/connections/generic-git-host" />
<Card horizontal title="Local Git repos" icon="folder" href="/docs/connections/local-repos" />
</CardGroup>
<Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).</Note>

View file

@ -90,4 +90,5 @@ Additional info:
| Bitbucket Data Center | ✅ |
| Gitea | ✅ |
| Gerrit | ❌ |
| Generic git host | ✅ |

View file

@ -105,7 +105,7 @@ Like other prefixes, contexts can be negated using `-` or combined using `or`:
- `-context:web` excludes frontend repositories from results
- `( context:web or context:backend )` searches across both frontend and backend code
See [this doc](/docs/more/syntax-reference) for more details on the search query syntax.
See [this doc](/docs/search/syntax-reference) for more details on the search query syntax.
## Schema reference

View file

@ -32,4 +32,4 @@ Expressions can be prefixed with certain keywords to modify search behavior. Som
| `rev:` | Filter results from a specific branch or tag. By default **only** the default branch is searched. | `rev:beta` - Filter results to branches that match regex `/beta/` |
| `lang:` | Filter results by language (as defined by [linguist](https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml)). By default all languages are searched. | `lang:TypeScript` - Filter results to TypeScript files<br/>`-lang:YAML` - Ignore results from YAML files |
| `sym:` | Match symbol definitions created by [universal ctags](https://ctags.io/) at index time. | `sym:\bmain\b` - Filter results to symbols that match regex `/\bmain\b/` |
| `context:` | Filter results to a predefined [search context](/self-hosting/more/search-contexts). | `context:web` - Filter results to the web context<br/>`-context:pipelines` - Ignore results from the pipelines context |
| `context:` | Filter results to a predefined [search context](/docs/search/search-contexts). | `context:web` - Filter results to the web context<br/>`-context:pipelines` - Ignore results from the pipelines context |

View file

@ -82,6 +82,8 @@ Sourcebot is open source and can be self-hosted using our official [Docker image
<Card horizontal title="Bitbucket Data Center" icon="bitbucket" href="/docs/connections/bitbucket-data-center" />
<Card horizontal title="Gitea" href="/docs/connections/gitea" />
<Card horizontal title="Gerrit" href="/docs/connections/gerrit" />
<Card horizontal title="Other Git hosts" icon="git-alt" href="/docs/connections/generic-git-host" />
<Card horizontal title="Local Git repos" icon="folder" href="/docs/connections/local-repos" />
</CardGroup>
<Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).</Note>

View file

@ -19,10 +19,108 @@
"ZoektConfig": {
"anyOf": [
{
"$ref": "#/definitions/GitHubConfig"
"type": "object",
"properties": {
"Type": {
"const": "github"
},
"GitHubUrl": {
"type": "string",
"description": "GitHub Enterprise url. If not set github.com will be used as the host."
},
"GitHubUser": {
"type": "string",
"description": "The GitHub user to mirror"
},
"GitHubOrg": {
"type": "string",
"description": "The GitHub organization to mirror"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitHub access token.",
"default": "~/.github-token"
},
"Topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only mirror repos that have one of the given topics"
},
"ExcludeTopics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Don't mirror repos that have one of the given topics"
},
"NoArchived": {
"type": "boolean",
"description": "Mirror repos that are _not_ archived",
"default": false
},
"IncludeForks": {
"type": "boolean",
"description": "Also mirror forks",
"default": false
}
},
"required": [
"Type"
],
"additionalProperties": false
},
{
"$ref": "#/definitions/GitLabConfig"
"type": "object",
"properties": {
"Type": {
"const": "gitlab"
},
"GitLabURL": {
"type": "string",
"description": "The GitLab API url.",
"default": "https://gitlab.com/api/v4/"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"OnlyPublic": {
"type": "boolean",
"description": "Only mirror public repos",
"default": false
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitLab access token.",
"default": "~/.gitlab-token"
}
},
"required": [
"Type"
],
"additionalProperties": false
}
]
},
@ -45,10 +143,16 @@
"description": "The GitHub organization to mirror"
},
"Name": {
"$ref": "#/definitions/RepoNameRegexIncludeFilter"
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"$ref": "#/definitions/RepoNameRegexExcludeFilter"
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"CredentialPath": {
"type": "string",
@ -97,10 +201,16 @@
"default": "https://gitlab.com/api/v4/"
},
"Name": {
"$ref": "#/definitions/RepoNameRegexIncludeFilter"
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"$ref": "#/definitions/RepoNameRegexExcludeFilter"
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"OnlyPublic": {
"type": "boolean",
@ -126,7 +236,112 @@
"Configs": {
"type": "array",
"items": {
"$ref": "#/definitions/ZoektConfig"
"anyOf": [
{
"type": "object",
"properties": {
"Type": {
"const": "github"
},
"GitHubUrl": {
"type": "string",
"description": "GitHub Enterprise url. If not set github.com will be used as the host."
},
"GitHubUser": {
"type": "string",
"description": "The GitHub user to mirror"
},
"GitHubOrg": {
"type": "string",
"description": "The GitHub organization to mirror"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitHub access token.",
"default": "~/.github-token"
},
"Topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only mirror repos that have one of the given topics"
},
"ExcludeTopics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Don't mirror repos that have one of the given topics"
},
"NoArchived": {
"type": "boolean",
"description": "Mirror repos that are _not_ archived",
"default": false
},
"IncludeForks": {
"type": "boolean",
"description": "Also mirror forks",
"default": false
}
},
"required": [
"Type"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"Type": {
"const": "gitlab"
},
"GitLabURL": {
"type": "string",
"description": "The GitLab API url.",
"default": "https://gitlab.com/api/v4/"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"OnlyPublic": {
"type": "boolean",
"description": "Only mirror public repos",
"default": false
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitLab access token.",
"default": "~/.gitlab-token"
}
},
"required": [
"Type"
],
"additionalProperties": false
}
]
}
}
},

File diff suppressed because it is too large Load diff

View file

@ -227,12 +227,39 @@
"description": "GitLab Configuration"
},
"token": {
"$ref": "#/oneOf/0/properties/token",
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -346,7 +373,45 @@
"additionalProperties": false
},
"revisions": {
"$ref": "#/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -364,12 +429,39 @@
"description": "Gitea Configuration"
},
"token": {
"$ref": "#/oneOf/0/properties/token",
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -441,7 +533,45 @@
"additionalProperties": false
},
"revisions": {
"$ref": "#/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -530,12 +660,39 @@
"description": "The username to use for authentication. Only needed if token is an app password."
},
"token": {
"$ref": "#/oneOf/0/properties/token",
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -608,7 +765,45 @@
"additionalProperties": false
},
"revisions": {
"$ref": "#/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -627,6 +822,74 @@
]
},
"additionalProperties": false
},
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
}
]
}

View file

@ -0,0 +1,71 @@
{/* THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! */}
```json
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
}
```

View file

@ -115,14 +115,112 @@
"type": "string"
},
"settings": {
"$ref": "#/definitions/Settings"
"type": "object",
"description": "Defines the global settings for Sourcebot.",
"properties": {
"maxFileSize": {
"type": "number",
"description": "The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be indexed. Defaults to 2MB.",
"minimum": 1
},
"maxTrigramCount": {
"type": "number",
"description": "The maximum number of trigrams per document. Files that exceed this maximum will not be indexed. Default to 20000.",
"minimum": 1
},
"reindexIntervalMs": {
"type": "number",
"description": "The interval (in milliseconds) at which the indexer should re-index all repositories. Defaults to 1 hour.",
"minimum": 1
},
"resyncConnectionIntervalMs": {
"type": "number",
"description": "The interval (in milliseconds) at which the connection manager should check for connections that need to be re-synced. Defaults to 24 hours.",
"minimum": 1
},
"resyncConnectionPollingIntervalMs": {
"type": "number",
"description": "The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced. Defaults to 1 second.",
"minimum": 1
},
"reindexRepoPollingIntervalMs": {
"type": "number",
"description": "The polling rate (in milliseconds) at which the db should be checked for repos that should be re-indexed. Defaults to 1 second.",
"minimum": 1
},
"maxConnectionSyncJobConcurrency": {
"type": "number",
"description": "The number of connection sync jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"maxRepoIndexingJobConcurrency": {
"type": "number",
"description": "The number of repo indexing jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"maxRepoGarbageCollectionJobConcurrency": {
"type": "number",
"description": "The number of repo GC jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"repoGarbageCollectionGracePeriodMs": {
"type": "number",
"description": "The grace period (in milliseconds) for garbage collection. Used to prevent deleting shards while they're being loaded. Defaults to 10 seconds.",
"minimum": 1
},
"repoIndexTimeoutMs": {
"type": "number",
"description": "The timeout (in milliseconds) for a repo indexing to timeout. Defaults to 2 hours.",
"minimum": 1
}
},
"additionalProperties": false
},
"contexts": {
"type": "object",
"description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/self-hosting/more/search-contexts",
"description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/docs/search/search-contexts",
"patternProperties": {
"^[a-zA-Z0-9_-]+$": {
"$ref": "#/definitions/SearchContext"
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "SearchContext",
"description": "Search context",
"properties": {
"include": {
"type": "array",
"description": "List of repositories to include in the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.",
"items": {
"type": "string"
},
"examples": [
[
"github.com/sourcebot-dev/**",
"gerrit.example.org/sub/path/**"
]
]
},
"exclude": {
"type": "array",
"description": "List of repositories to exclude from the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.",
"items": {
"type": "string"
},
"examples": [
[
"github.com/sourcebot-dev/sourcebot",
"gerrit.example.org/sub/path/**"
]
]
},
"description": {
"type": "string",
"description": "Optional description of the search context that surfaces in the UI."
}
},
"required": [
"include"
],
"additionalProperties": false
}
},
"additionalProperties": false
@ -358,12 +456,39 @@
"description": "GitLab Configuration"
},
"token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -477,7 +602,45 @@
"additionalProperties": false
},
"revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -495,12 +658,39 @@
"description": "Gitea Configuration"
},
"token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -572,7 +762,45 @@
"additionalProperties": false
},
"revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -661,12 +889,39 @@
"description": "The username to use for authentication. Only needed if token is an app password."
},
"token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -739,7 +994,45 @@
"additionalProperties": false
},
"revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -758,6 +1051,74 @@
]
},
"additionalProperties": false
},
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
}
]
}

View file

@ -1,8 +1,7 @@
{
"private": true,
"workspaces": [
"packages/*",
"packages/agents/*"
"packages/*"
],
"scripts": {
"build": "cross-env SKIP_ENV_VALIDATION=1 yarn workspaces foreach -A run build",

View file

@ -41,6 +41,7 @@
"cross-fetch": "^4.0.0",
"dotenv": "^16.4.5",
"express": "^4.21.2",
"git-url-parse": "^16.1.0",
"gitea-js": "^1.22.0",
"glob": "^11.0.0",
"ioredis": "^5.4.2",

View file

@ -4,7 +4,7 @@ import { Settings } from "./types.js";
import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
import { createLogger } from "./logger.js";
import { Redis } from 'ioredis';
import { RepoData, compileGithubConfig, compileGitlabConfig, compileGiteaConfig, compileGerritConfig, compileBitbucketConfig } from "./repoCompileUtils.js";
import { RepoData, compileGithubConfig, compileGitlabConfig, compileGiteaConfig, compileGerritConfig, compileBitbucketConfig, compileGenericGitHostConfig } from "./repoCompileUtils.js";
import { BackendError, BackendException } from "@sourcebot/error";
import { captureEvent } from "./posthog.js";
import { env } from "./env.js";
@ -173,6 +173,9 @@ export class ConnectionManager implements IConnectionManager {
case 'bitbucket': {
return await compileBitbucketConfig(config, job.data.connectionId, orgId, this.db);
}
case 'git': {
return await compileGenericGitHostConfig(config, job.data.connectionId, orgId);
}
}
})();
} catch (err) {

View file

@ -1,6 +1,8 @@
import { simpleGit, SimpleGitProgressEvent } from 'simple-git';
import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git';
export const cloneRepository = async (cloneURL: string, path: string, onProgress?: (event: SimpleGitProgressEvent) => void) => {
type onProgressFn = (event: SimpleGitProgressEvent) => void;
export const cloneRepository = async (cloneURL: string, path: string, onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
});
@ -26,7 +28,7 @@ export const cloneRepository = async (cloneURL: string, path: string, onProgress
}
export const fetchRepository = async (path: string, onProgress?: (event: SimpleGitProgressEvent) => void) => {
export const fetchRepository = async (path: string, onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
});
@ -56,7 +58,7 @@ export const fetchRepository = async (path: string, onProgress?: (event: SimpleG
* that do not exist yet. It will _not_ remove any existing keys that are not
* present in gitConfig.
*/
export const upsertGitConfig = async (path: string, gitConfig: Record<string, string>, onProgress?: (event: SimpleGitProgressEvent) => void) => {
export const upsertGitConfig = async (path: string, gitConfig: Record<string, string>, onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
}).cwd(path);
@ -74,6 +76,52 @@ export const upsertGitConfig = async (path: string, gitConfig: Record<string, st
}
}
/**
* Returns true if `path` is the _root_ of a git repository.
*/
export const isPathAValidGitRepoRoot = async (path: string, onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
}).cwd(path);
try {
return git.checkIsRepo(CheckRepoActions.IS_REPO_ROOT);
} catch (error: unknown) {
if (error instanceof Error) {
throw new Error(`isPathAGitRepoRoot failed: ${error.message}`);
} else {
throw new Error(`isPathAGitRepoRoot failed: ${error}`);
}
}
}
export const isUrlAValidGitRepo = async (url: string) => {
const git = simpleGit();
// List the remote heads. If an exception is thrown, the URL is not a valid git repo.
try {
const result = await git.listRemote(['--heads', url]);
return result.trim().length > 0;
} catch (error: unknown) {
return false;
}
}
export const getOriginUrl = async (path: string) => {
const git = simpleGit().cwd(path);
try {
const remotes = await git.getConfig('remote.origin.url', GitConfigScope.local);
return remotes.value;
} catch (error: unknown) {
if (error instanceof Error) {
throw new Error(`Failed to get origin for ${path}: ${error.message}`);
} else {
throw new Error(`Failed to get origin for ${path}: ${error}`);
}
}
}
export const getBranches = async (path: string) => {
const git = simpleGit();
const branches = await git.cwd({

View file

@ -10,9 +10,13 @@ import { Prisma, PrismaClient } from '@sourcebot/db';
import { WithRequired } from "./types.js"
import { marshalBool } from "./utils.js";
import { createLogger } from './logger.js';
import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { RepoMetadata } from './types.js';
import path from 'path';
import { glob } from 'glob';
import { getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
import assert from 'assert';
import GitUrlParse from 'git-url-parse';
export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;
@ -434,3 +438,166 @@ export const compileBitbucketConfig = async (
notFound,
};
}
export const compileGenericGitHostConfig = async (
config: GenericGitHostConnectionConfig,
connectionId: number,
orgId: number,
) => {
const configUrl = new URL(config.url);
if (configUrl.protocol === 'file:') {
return compileGenericGitHostConfig_file(config, orgId, connectionId);
}
else if (configUrl.protocol === 'http:' || configUrl.protocol === 'https:') {
return compileGenericGitHostConfig_url(config, orgId, connectionId);
}
else {
// Schema should prevent this, but throw an error just in case.
throw new Error(`Unsupported protocol: ${configUrl.protocol}`);
}
}
export const compileGenericGitHostConfig_file = async (
config: GenericGitHostConnectionConfig,
orgId: number,
connectionId: number,
) => {
const configUrl = new URL(config.url);
assert(configUrl.protocol === 'file:', 'config.url must be a file:// URL');
// Resolve the glob pattern to a list of repo-paths
const repoPaths = await glob(configUrl.pathname, {
absolute: true,
});
const repos: RepoData[] = [];
const notFound: {
users: string[],
orgs: string[],
repos: string[],
} = {
users: [],
orgs: [],
repos: [],
};
await Promise.all(repoPaths.map(async (repoPath) => {
const isGitRepo = await isPathAValidGitRepoRoot(repoPath);
if (!isGitRepo) {
logger.warn(`Skipping ${repoPath} - not a git repository.`);
notFound.repos.push(repoPath);
return;
}
const origin = await getOriginUrl(repoPath);
if (!origin) {
logger.warn(`Skipping ${repoPath} - remote.origin.url not found in git config.`);
notFound.repos.push(repoPath);
return;
}
const remoteUrl = GitUrlParse(origin);
// @note: matches the naming here:
// https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
const repo: RepoData = {
external_codeHostType: 'generic-git-host',
external_codeHostUrl: remoteUrl.resource,
external_id: remoteUrl.toString(),
cloneUrl: `file://${repoPath}`,
name: repoName,
displayName: repoName,
isFork: false,
isArchived: false,
org: {
connect: {
id: orgId,
},
},
connections: {
create: {
connectionId: connectionId,
}
},
metadata: {
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
// @NOTE: We don't set a gitConfig here since local repositories
// are readonly.
gitConfig: undefined,
} satisfies RepoMetadata,
}
repos.push(repo);
}));
return {
repoData: repos,
notFound,
}
}
export const compileGenericGitHostConfig_url = async (
config: GenericGitHostConnectionConfig,
orgId: number,
connectionId: number,
) => {
const remoteUrl = new URL(config.url);
assert(remoteUrl.protocol === 'http:' || remoteUrl.protocol === 'https:', 'config.url must be a http:// or https:// URL');
const notFound: {
users: string[],
orgs: string[],
repos: string[],
} = {
users: [],
orgs: [],
repos: [],
};
// Validate that we are dealing with a valid git repo.
const isGitRepo = await isUrlAValidGitRepo(remoteUrl.toString());
if (!isGitRepo) {
notFound.repos.push(remoteUrl.toString());
return {
repoData: [],
notFound,
}
}
// @note: matches the naming here:
// https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
const repo: RepoData = {
external_codeHostType: 'generic-git-host',
external_codeHostUrl: remoteUrl.origin,
external_id: remoteUrl.toString(),
cloneUrl: remoteUrl.toString(),
name: repoName,
displayName: repoName,
isFork: false,
isArchived: false,
org: {
connect: {
id: orgId,
},
},
connections: {
create: {
connectionId: connectionId,
}
},
metadata: {
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
}
};
return {
repoData: [repo],
notFound,
}
}

View file

@ -221,31 +221,29 @@ export class RepoManager implements IRepoManager {
}
private async syncGitRepository(repo: RepoWithConnections, repoAlreadyInIndexingState: boolean) {
let fetchDuration_s: number | undefined = undefined;
let cloneDuration_s: number | undefined = undefined;
const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
const repoPath = getRepoPath(repo, this.ctx);
const metadata = repoMetadataSchema.parse(repo.metadata);
// If the repo was already in the indexing state, this job was likely killed and picked up again. As a result,
// to ensure the repo state is valid, we delete the repo if it exists so we get a fresh clone
if (repoAlreadyInIndexingState && existsSync(repoPath)) {
if (repoAlreadyInIndexingState && existsSync(repoPath) && !isReadOnly) {
this.logger.info(`Deleting repo directory ${repoPath} during sync because it was already in the indexing state`);
await promises.rm(repoPath, { recursive: true, force: true });
}
if (existsSync(repoPath)) {
if (existsSync(repoPath) && !isReadOnly) {
this.logger.info(`Fetching ${repo.displayName}...`);
const { durationMs } = await measure(() => fetchRepository(repoPath, ({ method, stage, progress }) => {
this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
}));
fetchDuration_s = durationMs / 1000;
const fetchDuration_s = durationMs / 1000;
process.stdout.write('\n');
this.logger.info(`Fetched ${repo.displayName} in ${fetchDuration_s}s`);
} else {
} else if (!isReadOnly) {
this.logger.info(`Cloning ${repo.displayName}...`);
const auth = await this.getCloneCredentialsForRepo(repo, this.db);
@ -267,7 +265,7 @@ export class RepoManager implements IRepoManager {
const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, ({ method, stage, progress }) => {
this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
}));
cloneDuration_s = durationMs / 1000;
const cloneDuration_s = durationMs / 1000;
process.stdout.write('\n');
this.logger.info(`Cloned ${repo.displayName} in ${cloneDuration_s}s`);
@ -276,7 +274,7 @@ export class RepoManager implements IRepoManager {
// Regardless of clone or fetch, always upsert the git config for the repo.
// This ensures that the git config is always up to date for whatever we
// have in the DB.
if (metadata.gitConfig) {
if (metadata.gitConfig && !isReadOnly) {
await upsertGitConfig(repoPath, metadata.gitConfig);
}
@ -284,12 +282,6 @@ export class RepoManager implements IRepoManager {
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
const indexDuration_s = durationMs / 1000;
this.logger.info(`Indexed ${repo.displayName} in ${indexDuration_s}s`);
return {
fetchDuration_s,
cloneDuration_s,
indexDuration_s,
}
}
private async runIndexJob(job: Job<RepoIndexingPayload>) {
@ -323,17 +315,12 @@ export class RepoManager implements IRepoManager {
this.promClient.activeRepoIndexingJobs.inc();
this.promClient.pendingRepoIndexingJobs.dec({ repo: repo.id.toString() });
let indexDuration_s: number | undefined;
let fetchDuration_s: number | undefined;
let cloneDuration_s: number | undefined;
let stats;
let attempts = 0;
const maxAttempts = 3;
while (attempts < maxAttempts) {
try {
stats = await this.syncGitRepository(repo, repoAlreadyInIndexingState);
await this.syncGitRepository(repo, repoAlreadyInIndexingState);
break;
} catch (error) {
Sentry.captureException(error);
@ -350,10 +337,6 @@ export class RepoManager implements IRepoManager {
await new Promise(resolve => setTimeout(resolve, sleepDuration));
}
}
indexDuration_s = stats!.indexDuration_s;
fetchDuration_s = stats!.fetchDuration_s;
cloneDuration_s = stats!.cloneDuration_s;
}
private async onIndexJobCompleted(job: Job<RepoIndexingPayload>) {
@ -489,8 +472,8 @@ export class RepoManager implements IRepoManager {
});
// delete cloned repo
const repoPath = getRepoPath(repo, this.ctx);
if (existsSync(repoPath)) {
const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
if (existsSync(repoPath) && !isReadOnly) {
this.logger.info(`Deleting repo directory ${repoPath}`);
await promises.rm(repoPath, { recursive: true, force: true });
}

View file

@ -94,8 +94,21 @@ export const arraysEqualShallow = <T>(a?: readonly T[], b?: readonly T[]) => {
return true;
}
export const getRepoPath = (repo: Repo, ctx: AppContext) => {
return path.join(ctx.reposPath, repo.id.toString());
export const getRepoPath = (repo: Repo, ctx: AppContext): { path: string, isReadOnly: boolean } => {
// If we are dealing with a local repository, then use that as the path.
// Mark as read-only since we aren't guaranteed to have write access to the local filesystem.
const cloneUrl = new URL(repo.cloneUrl);
if (repo.external_codeHostType === 'generic-git-host' && cloneUrl.protocol === 'file:') {
return {
path: cloneUrl.pathname,
isReadOnly: true,
}
}
return {
path: path.join(ctx.reposPath, repo.id.toString()),
isReadOnly: false,
}
}
export const getShardPrefix = (orgId: number, repoId: number) => {

View file

@ -15,7 +15,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
'HEAD'
];
const repoPath = getRepoPath(repo, ctx);
const { path: repoPath } = getRepoPath(repo, ctx);
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const metadata = repoMetadataSchema.parse(repo.metadata);
@ -65,6 +65,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
`-file_limit ${settings.maxFileSize}`,
`-branches ${revisions.join(',')}`,
`-tenant_id ${repo.orgId}`,
`-repo_id ${repo.id}`,
`-shard_prefix ${shardPrefix}`,
repoPath
].join(' ');

View file

@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Changed
- Updated API client to match the latest Sourcebot release. [#307](https://github.com/sourcebot-dev/sourcebot/pull/307)
## [1.0.0] - 2025-05-07
### Added

View file

@ -109,7 +109,7 @@ server.tool(
(acc, chunk) => acc + chunk.matchRanges.length,
0,
);
let text = `file: ${file.url}\nnum_matches: ${numMatches}\nrepository: ${file.repository}\nlanguage: ${file.language}`;
let text = `file: ${file.webUrl}\nnum_matches: ${numMatches}\nrepository: ${file.repository}\nlanguage: ${file.language}`;
if (includeCodeSnippets) {
const snippets = file.chunks.map(chunk => {
@ -166,7 +166,7 @@ server.tool(
const content: TextContent[] = response.repos.map(repo => {
return {
type: "text",
text: `id: ${repo.name}\nurl: ${repo.url}`,
text: `id: ${repo.name}\nurl: ${repo.webUrl}`,
}
});

View file

@ -32,6 +32,14 @@ export const searchRequestSchema = z.object({
whole: z.boolean().optional(),
});
export const repositoryInfoSchema = z.object({
id: z.number(),
codeHostType: z.string(),
name: z.string(),
displayName: z.string().optional(),
webUrl: z.string().optional(),
})
export const searchResponseSchema = z.object({
zoektStats: z.object({
// The duration (in nanoseconds) of the search.
@ -63,9 +71,10 @@ export const searchResponseSchema = z.object({
// Any matching ranges
matchRanges: z.array(rangeSchema),
}),
webUrl: z.string().optional(),
repository: z.string(),
repositoryId: z.number(),
language: z.string(),
url: z.string(),
chunks: z.array(z.object({
content: z.string(),
matchRanges: z.array(rangeSchema),
@ -79,13 +88,14 @@ export const searchResponseSchema = z.object({
// Set if `whole` is true.
content: z.string().optional(),
})),
repositoryInfo: z.array(repositoryInfoSchema),
isBranchFilteringEnabled: z.boolean(),
});
export const repositorySchema = z.object({
name: z.string(),
url: z.string(),
branches: z.array(z.string()),
webUrl: z.string().optional(),
rawConfig: z.record(z.string(), z.string()).optional(),
});

View file

@ -18,10 +18,108 @@ const schema = {
"ZoektConfig": {
"anyOf": [
{
"$ref": "#/definitions/GitHubConfig"
"type": "object",
"properties": {
"Type": {
"const": "github"
},
"GitHubUrl": {
"type": "string",
"description": "GitHub Enterprise url. If not set github.com will be used as the host."
},
"GitHubUser": {
"type": "string",
"description": "The GitHub user to mirror"
},
"GitHubOrg": {
"type": "string",
"description": "The GitHub organization to mirror"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitHub access token.",
"default": "~/.github-token"
},
"Topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only mirror repos that have one of the given topics"
},
"ExcludeTopics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Don't mirror repos that have one of the given topics"
},
"NoArchived": {
"type": "boolean",
"description": "Mirror repos that are _not_ archived",
"default": false
},
"IncludeForks": {
"type": "boolean",
"description": "Also mirror forks",
"default": false
}
},
"required": [
"Type"
],
"additionalProperties": false
},
{
"$ref": "#/definitions/GitLabConfig"
"type": "object",
"properties": {
"Type": {
"const": "gitlab"
},
"GitLabURL": {
"type": "string",
"description": "The GitLab API url.",
"default": "https://gitlab.com/api/v4/"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"OnlyPublic": {
"type": "boolean",
"description": "Only mirror public repos",
"default": false
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitLab access token.",
"default": "~/.gitlab-token"
}
},
"required": [
"Type"
],
"additionalProperties": false
}
]
},
@ -44,10 +142,16 @@ const schema = {
"description": "The GitHub organization to mirror"
},
"Name": {
"$ref": "#/definitions/RepoNameRegexIncludeFilter"
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"$ref": "#/definitions/RepoNameRegexExcludeFilter"
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"CredentialPath": {
"type": "string",
@ -96,10 +200,16 @@ const schema = {
"default": "https://gitlab.com/api/v4/"
},
"Name": {
"$ref": "#/definitions/RepoNameRegexIncludeFilter"
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"$ref": "#/definitions/RepoNameRegexExcludeFilter"
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"OnlyPublic": {
"type": "boolean",
@ -125,7 +235,112 @@ const schema = {
"Configs": {
"type": "array",
"items": {
"$ref": "#/definitions/ZoektConfig"
"anyOf": [
{
"type": "object",
"properties": {
"Type": {
"const": "github"
},
"GitHubUrl": {
"type": "string",
"description": "GitHub Enterprise url. If not set github.com will be used as the host."
},
"GitHubUser": {
"type": "string",
"description": "The GitHub user to mirror"
},
"GitHubOrg": {
"type": "string",
"description": "The GitHub organization to mirror"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitHub access token.",
"default": "~/.github-token"
},
"Topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only mirror repos that have one of the given topics"
},
"ExcludeTopics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Don't mirror repos that have one of the given topics"
},
"NoArchived": {
"type": "boolean",
"description": "Mirror repos that are _not_ archived",
"default": false
},
"IncludeForks": {
"type": "boolean",
"description": "Also mirror forks",
"default": false
}
},
"required": [
"Type"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"Type": {
"const": "gitlab"
},
"GitLabURL": {
"type": "string",
"description": "The GitLab API url.",
"default": "https://gitlab.com/api/v4/"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"OnlyPublic": {
"type": "boolean",
"description": "Only mirror public repos",
"default": false
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitLab access token.",
"default": "~/.gitlab-token"
}
},
"required": [
"Type"
],
"additionalProperties": false
}
]
}
}
},

File diff suppressed because it is too large Load diff

View file

@ -226,12 +226,39 @@ const schema = {
"description": "GitLab Configuration"
},
"token": {
"$ref": "#/oneOf/0/properties/token",
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -345,7 +372,45 @@ const schema = {
"additionalProperties": false
},
"revisions": {
"$ref": "#/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -363,12 +428,39 @@ const schema = {
"description": "Gitea Configuration"
},
"token": {
"$ref": "#/oneOf/0/properties/token",
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -440,7 +532,45 @@ const schema = {
"additionalProperties": false
},
"revisions": {
"$ref": "#/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -529,12 +659,39 @@ const schema = {
"description": "The username to use for authentication. Only needed if token is an app password."
},
"token": {
"$ref": "#/oneOf/0/properties/token",
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -607,7 +764,45 @@ const schema = {
"additionalProperties": false
},
"revisions": {
"$ref": "#/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -626,6 +821,74 @@ const schema = {
]
},
"additionalProperties": false
},
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
}
]
} as const;

View file

@ -5,7 +5,8 @@ export type ConnectionConfig =
| GitlabConnectionConfig
| GiteaConnectionConfig
| GerritConnectionConfig
| BitbucketConnectionConfig;
| BitbucketConnectionConfig
| GenericGitHostConnectionConfig;
export interface GithubConnectionConfig {
/**
@ -305,3 +306,14 @@ export interface BitbucketConnectionConfig {
};
revisions?: GitRevisions;
}
export interface GenericGitHostConnectionConfig {
/**
* Generic Git host configuration
*/
type: "git";
/**
* The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.
*/
url: string;
revisions?: GitRevisions;
}

View file

@ -0,0 +1,70 @@
// THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY!
const schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
} as const;
export { schema as genericGitHostSchema };

View file

@ -0,0 +1,26 @@
// THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY!
export interface GenericGitHostConnectionConfig {
/**
* Generic Git host configuration
*/
type: "git";
/**
* The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.
*/
url: string;
revisions?: GitRevisions;
}
/**
* The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.
*/
export interface GitRevisions {
/**
* List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.
*/
branches?: string[];
/**
* List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.
*/
tags?: string[];
}

View file

@ -114,14 +114,112 @@ const schema = {
"type": "string"
},
"settings": {
"$ref": "#/definitions/Settings"
"type": "object",
"description": "Defines the global settings for Sourcebot.",
"properties": {
"maxFileSize": {
"type": "number",
"description": "The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be indexed. Defaults to 2MB.",
"minimum": 1
},
"maxTrigramCount": {
"type": "number",
"description": "The maximum number of trigrams per document. Files that exceed this maximum will not be indexed. Default to 20000.",
"minimum": 1
},
"reindexIntervalMs": {
"type": "number",
"description": "The interval (in milliseconds) at which the indexer should re-index all repositories. Defaults to 1 hour.",
"minimum": 1
},
"resyncConnectionIntervalMs": {
"type": "number",
"description": "The interval (in milliseconds) at which the connection manager should check for connections that need to be re-synced. Defaults to 24 hours.",
"minimum": 1
},
"resyncConnectionPollingIntervalMs": {
"type": "number",
"description": "The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced. Defaults to 1 second.",
"minimum": 1
},
"reindexRepoPollingIntervalMs": {
"type": "number",
"description": "The polling rate (in milliseconds) at which the db should be checked for repos that should be re-indexed. Defaults to 1 second.",
"minimum": 1
},
"maxConnectionSyncJobConcurrency": {
"type": "number",
"description": "The number of connection sync jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"maxRepoIndexingJobConcurrency": {
"type": "number",
"description": "The number of repo indexing jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"maxRepoGarbageCollectionJobConcurrency": {
"type": "number",
"description": "The number of repo GC jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"repoGarbageCollectionGracePeriodMs": {
"type": "number",
"description": "The grace period (in milliseconds) for garbage collection. Used to prevent deleting shards while they're being loaded. Defaults to 10 seconds.",
"minimum": 1
},
"repoIndexTimeoutMs": {
"type": "number",
"description": "The timeout (in milliseconds) for a repo indexing to timeout. Defaults to 2 hours.",
"minimum": 1
}
},
"additionalProperties": false
},
"contexts": {
"type": "object",
"description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/self-hosting/more/search-contexts",
"description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/docs/search/search-contexts",
"patternProperties": {
"^[a-zA-Z0-9_-]+$": {
"$ref": "#/definitions/SearchContext"
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "SearchContext",
"description": "Search context",
"properties": {
"include": {
"type": "array",
"description": "List of repositories to include in the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.",
"items": {
"type": "string"
},
"examples": [
[
"github.com/sourcebot-dev/**",
"gerrit.example.org/sub/path/**"
]
]
},
"exclude": {
"type": "array",
"description": "List of repositories to exclude from the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.",
"items": {
"type": "string"
},
"examples": [
[
"github.com/sourcebot-dev/sourcebot",
"gerrit.example.org/sub/path/**"
]
]
},
"description": {
"type": "string",
"description": "Optional description of the search context that surfaces in the UI."
}
},
"required": [
"include"
],
"additionalProperties": false
}
},
"additionalProperties": false
@ -357,12 +455,39 @@ const schema = {
"description": "GitLab Configuration"
},
"token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -476,7 +601,45 @@ const schema = {
"additionalProperties": false
},
"revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -494,12 +657,39 @@ const schema = {
"description": "Gitea Configuration"
},
"token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -571,7 +761,45 @@ const schema = {
"additionalProperties": false
},
"revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -660,12 +888,39 @@ const schema = {
"description": "The username to use for authentication. Only needed if token is an app password."
},
"token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
@ -738,7 +993,45 @@ const schema = {
"additionalProperties": false
},
"revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions"
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
@ -757,6 +1050,74 @@ const schema = {
]
},
"additionalProperties": false
},
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
}
]
}

View file

@ -9,13 +9,14 @@ export type ConnectionConfig =
| GitlabConnectionConfig
| GiteaConnectionConfig
| GerritConnectionConfig
| BitbucketConnectionConfig;
| BitbucketConnectionConfig
| GenericGitHostConnectionConfig;
export interface SourcebotConfig {
$schema?: string;
settings?: Settings;
/**
* [Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/self-hosting/more/search-contexts
* [Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/docs/search/search-contexts
*/
contexts?: {
[k: string]: SearchContext;
@ -400,3 +401,14 @@ export interface BitbucketConnectionConfig {
};
revisions?: GitRevisions;
}
export interface GenericGitHostConnectionConfig {
/**
* Generic Git host configuration
*/
type: "git";
/**
* The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.
*/
url: string;
revisions?: GitRevisions;
}

View file

@ -1,5 +1,5 @@
import path, { dirname } from "path";
import { mkdir, rm, writeFile } from "fs/promises";
import { mkdir, writeFile } from "fs/promises";
import $RefParser from "@apidevtools/json-schema-ref-parser";
import { compileFromFile } from "json-schema-to-typescript";
import { glob } from "glob";
@ -25,15 +25,15 @@ const BANNER_COMMENT = 'THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY!';
await mkdir(docsOutDir, { recursive: true });
// Generate schema
const schema = JSON.stringify(await $RefParser.bundle(schemaPath), null, 2);
const schema = JSON.stringify(await $RefParser.dereference(schemaPath), null, 2);
// Write to src
await writeFile(
path.join(srcOutDir, `${name}.schema.ts`),
`// ${BANNER_COMMENT}\n` +
'const schema = ' +
schema +
` as const;\nexport { schema as ${name}Schema };`,
schema +
` as const;\nexport { schema as ${name}Schema };`,
);
// Write to docs

View file

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xml:space="preserve" width="97" height="97">
<path fill="#F05133" d="M92.71 44.408 52.591 4.291c-2.31-2.311-6.057-2.311-8.369 0l-8.33 8.332L46.459 23.19c2.456-.83 5.272-.273 7.229 1.685 1.969 1.97 2.521 4.81 1.67 7.275l10.186 10.185c2.465-.85 5.307-.3 7.275 1.671 2.75 2.75 2.75 7.206 0 9.958-2.752 2.751-7.208 2.751-9.961 0-2.068-2.07-2.58-5.11-1.531-7.658l-9.5-9.499v24.997c.67.332 1.303.774 1.861 1.332 2.75 2.75 2.75 7.206 0 9.959-2.75 2.749-7.209 2.749-9.957 0-2.75-2.754-2.75-7.21 0-9.959.68-.679 1.467-1.193 2.307-1.537v-25.23c-.84-.344-1.625-.853-2.307-1.537-2.083-2.082-2.584-5.14-1.516-7.698L31.798 16.715 4.288 44.222c-2.311 2.313-2.311 6.06 0 8.371l40.121 40.118c2.31 2.311 6.056 2.311 8.369 0L92.71 52.779c2.311-2.311 2.311-6.06 0-8.371z"/>
</svg>

After

Width:  |  Height:  |  Size: 841 B

View file

@ -28,6 +28,7 @@ import { orgDomainSchema, orgNameSchema, repositoryQuerySchema } from "./lib/sch
import { TenancyMode } from "./lib/types";
import { decrementOrgSeatCount, getSubscriptionForOrg, incrementOrgSeatCount } from "./ee/features/billing/serverUtils";
import { bitbucketSchema } from "@sourcebot/schemas/v3/bitbucket.schema";
import { genericGitHostSchema } from "@sourcebot/schemas/v3/genericGitHost.schema";
const ajv = new Ajv({
validateFormats: false,
@ -443,6 +444,67 @@ export const getRepos = async (domain: string, filter: { status?: RepoIndexingSt
}
), /* allowSingleTenantUnauthedAccess = */ true));
export const getRepoInfoByName = async (repoName: string, domain: string) => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
// @note: repo names are represented by their remote url
// on the code host. E.g.,:
// - github.com/sourcebot-dev/sourcebot
// - gitlab.com/gitlab-org/gitlab
// - gerrit.wikimedia.org/r/mediawiki/extensions/OnionsPorFavor
// etc.
//
// For most purposes, repo names are unique within an org, so using
// findFirst is equivalent to findUnique. Duplicates _can_ occur when
// a repository is specified by its remote url in a generic `git`
// connection. For example:
//
// ```json
// {
// "connections": {
// "connection-1": {
// "type": "github",
// "repos": [
// "sourcebot-dev/sourcebot"
// ]
// },
// "connection-2": {
// "type": "git",
// "url": "file:///tmp/repos/sourcebot"
// }
// }
// }
// ```
//
// In this scenario, both repos will be named "github.com/sourcebot-dev/sourcebot".
// We will leave this as an edge case for now since it's unlikely to happen in practice.
//
// @v4-todo: we could add a unique contraint on repo name + orgId to help de-duplicate
// these cases.
// @see: repoCompileUtils.ts
const repo = await prisma.repo.findFirst({
where: {
name: repoName,
orgId,
},
});
if (!repo) {
return notFound();
}
return {
id: repo.id,
name: repo.name,
displayName: repo.displayName ?? undefined,
codeHostType: repo.external_codeHostType,
webUrl: repo.webUrl ?? undefined,
imageUrl: repo.imageUrl ?? undefined,
indexedAt: repo.indexedAt ?? undefined,
repoIndexingStatus: repo.repoIndexingStatus,
}
}), /* allowSingleTenantUnauthedAccess = */ true));
export const createConnection = async (name: string, type: CodeHostType, connectionConfig: string, domain: string): Promise<{ id: number } | ServiceError> => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
@ -1180,6 +1242,8 @@ const parseConnectionConfig = (config: string) => {
return gerritSchema;
case 'bitbucket':
return bitbucketSchema;
case 'git':
return genericGitHostSchema;
}
})();
@ -1230,6 +1294,12 @@ const parseConnectionConfig = (config: string) => {
hasToken: true, // gerrit doesn't use a token atm
}
}
case "git": {
return {
numRepos: 1,
hasToken: false,
}
}
}
})();

View file

@ -2,7 +2,6 @@ import { FileHeader } from "@/app/[domain]/components/fileHeader";
import { TopBar } from "@/app/[domain]/components/topBar";
import { Separator } from '@/components/ui/separator';
import { getFileSource } from '@/features/search/fileSourceApi';
import { listRepositories } from '@/features/search/listReposApi';
import { isServiceError } from "@/lib/utils";
import { base64Decode } from "@/lib/utils";
import { CodePreview } from "./codePreview";
@ -11,6 +10,8 @@ import { LuFileX2, LuBookX } from "react-icons/lu";
import { getOrgFromDomain } from "@/data/org";
import { notFound } from "next/navigation";
import { ServiceErrorException } from "@/lib/serviceError";
import { getRepoInfoByName } from "@/actions";
interface BrowsePageProps {
params: {
path: string[];
@ -48,19 +49,11 @@ export default async function BrowsePage({
}
})();
const org = await getOrgFromDomain(params.domain);
if (!org) {
notFound();
const repoInfo = await getRepoInfoByName(repoName, params.domain);
if (isServiceError(repoInfo) && repoInfo.errorCode !== ErrorCode.NOT_FOUND) {
throw new ServiceErrorException(repoInfo);
}
// @todo (bkellam) : We should probably have a endpoint to fetch repository metadata
// given it's name or id.
const reposResponse = await listRepositories(org.id);
if (isServiceError(reposResponse)) {
throw new ServiceErrorException(reposResponse);
}
const repo = reposResponse.repos.find(r => r.name === repoName);
if (pathType === 'tree') {
// @todo : proper tree handling
return (
@ -78,12 +71,17 @@ export default async function BrowsePage({
domain={params.domain}
/>
<Separator />
{repo && (
{!isServiceError(repoInfo) && (
<>
<div className="bg-accent py-1 px-2 flex flex-row">
<FileHeader
fileName={path}
repo={repo}
repo={{
name: repoInfo.name,
displayName: repoInfo.displayName,
webUrl: repoInfo.webUrl,
codeHostType: repoInfo.codeHostType,
}}
branchDisplayName={revisionName}
/>
</div>
@ -91,7 +89,7 @@ export default async function BrowsePage({
</>
)}
</div>
{repo === undefined ? (
{isServiceError(repoInfo) ? (
<div className="flex h-full">
<div className="m-auto flex flex-col items-center gap-2">
<LuBookX className="h-12 w-12 text-secondary-foreground" />
@ -101,9 +99,9 @@ export default async function BrowsePage({
) : (
<CodePreviewWrapper
path={path}
repoName={repoName}
repoName={repoInfo.name}
revisionName={revisionName ?? 'HEAD'}
orgId={org.id}
domain={params.domain}
/>
)}
</div>
@ -114,21 +112,21 @@ interface CodePreviewWrapper {
path: string,
repoName: string,
revisionName: string,
orgId: number,
domain: string,
}
const CodePreviewWrapper = async ({
path,
repoName,
revisionName,
orgId,
domain,
}: CodePreviewWrapper) => {
// @todo: this will depend on `pathType`.
const fileSourceResponse = await getFileSource({
fileName: path,
repository: repoName,
branch: revisionName,
}, orgId);
}, domain);
if (isServiceError(fileSourceResponse)) {
if (fileSourceResponse.errorCode === ErrorCode.FILE_NOT_FOUND) {

View file

@ -1,17 +1,22 @@
import { Repository } from "@/features/search/types";
import { getRepoCodeHostInfo } from "@/lib/utils";
import { getCodeHostInfoForRepo } from "@/lib/utils";
import { LaptopIcon } from "@radix-ui/react-icons";
import clsx from "clsx";
import Image from "next/image";
import Link from "next/link";
interface FileHeaderProps {
repo?: Repository;
fileName: string;
fileNameHighlightRange?: {
from: number;
to: number;
}
repo: {
name: string;
codeHostType: string;
displayName?: string;
webUrl?: string;
},
branchDisplayName?: string;
branchDisplayTitle?: string;
}
@ -23,7 +28,12 @@ export const FileHeader = ({
branchDisplayName,
branchDisplayTitle,
}: FileHeaderProps) => {
const info = getRepoCodeHostInfo(repo);
const info = getCodeHostInfoForRepo({
name: repo.name,
codeHostType: repo.codeHostType,
displayName: repo.displayName,
webUrl: repo.webUrl,
});
return (
<div className="flex flex-row gap-2 items-center w-full overflow-hidden">

View file

@ -6,7 +6,7 @@ import {
CarouselItem,
} from "@/components/ui/carousel";
import Autoscroll from "embla-carousel-auto-scroll";
import { getRepoQueryCodeHostInfo } from "@/lib/utils";
import { getCodeHostInfoForRepo } from "@/lib/utils";
import Image from "next/image";
import { FileIcon } from "@radix-ui/react-icons";
import clsx from "clsx";
@ -57,7 +57,12 @@ const RepositoryBadge = ({
repo
}: RepositoryBadgeProps) => {
const { repoIcon, displayName, repoLink } = (() => {
const info = getRepoQueryCodeHostInfo(repo);
const info = getCodeHostInfoForRepo({
codeHostType: repo.codeHostType,
name: repo.repoName,
displayName: repo.repoDisplayName,
webUrl: repo.webUrl,
});
if (info) {
return {

View file

@ -46,7 +46,7 @@ export const CodePreviewPanel = ({
content: decodedSource,
filepath: fileMatch.fileName.text,
matches: fileMatch.chunks,
link: fileMatch.url,
link: fileMatch.webUrl,
language: fileMatch.language,
revision: branch ?? "HEAD",
};

View file

@ -1,8 +1,8 @@
'use client';
import { FileIcon } from "@/components/ui/fileIcon";
import { Repository, SearchResultFile } from "@/features/search/types";
import { cn, getRepoCodeHostInfo } from "@/lib/utils";
import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
import { cn, getCodeHostInfoForRepo } from "@/lib/utils";
import { LaptopIcon } from "@radix-ui/react-icons";
import Image from "next/image";
import { useRouter, useSearchParams } from "next/navigation";
@ -13,7 +13,7 @@ import { Filter } from "./filter";
interface FilePanelProps {
matches: SearchResultFile[];
onFilterChanged: (filteredMatches: SearchResultFile[]) => void,
repoMetadata: Record<string, Repository>;
repoInfo: Record<number, RepositoryInfo>;
}
const LANGUAGES_QUERY_PARAM = "langs";
@ -22,7 +22,7 @@ const REPOS_QUERY_PARAM = "repos";
export const FilterPanel = ({
matches,
onFilterChanged,
repoMetadata,
repoInfo,
}: FilePanelProps) => {
const router = useRouter();
const searchParams = useSearchParams();
@ -38,9 +38,16 @@ export const FilterPanel = ({
return aggregateMatches(
"repository",
matches,
(key) => {
const repo: Repository | undefined = repoMetadata[key];
const info = getRepoCodeHostInfo(repo);
({ key, match }) => {
const repo: RepositoryInfo | undefined = repoInfo[match.repositoryId];
const info = repo ? getCodeHostInfoForRepo({
name: repo.name,
codeHostType: repo.codeHostType,
displayName: repo.displayName,
webUrl: repo.webUrl,
}) : undefined;
const Icon = info ? (
<Image
src={info.icon}
@ -60,14 +67,14 @@ export const FilterPanel = ({
};
}
)
}, [getSelectedFromQuery, matches, repoMetadata]);
}, [getSelectedFromQuery, matches, repoInfo]);
const languages = useMemo(() => {
const selectedLanguages = getSelectedFromQuery(LANGUAGES_QUERY_PARAM);
return aggregateMatches(
"language",
matches,
(key) => {
({ key }) => {
const Icon = (
<FileIcon language={key} />
)
@ -168,14 +175,14 @@ export const FilterPanel = ({
const aggregateMatches = (
propName: 'repository' | 'language',
matches: SearchResultFile[],
createEntry: (key: string) => Entry
createEntry: (props: { key: string, match: SearchResultFile }) => Entry
) => {
return matches
.map((match) => match[propName])
.filter((key) => key.length > 0)
.reduce((aggregation, key) => {
.map((match) => ({ key: match[propName], match }))
.filter(({ key }) => key.length > 0)
.reduce((aggregation, { key, match }) => {
if (!aggregation[key]) {
aggregation[key] = createEntry(key);
aggregation[key] = createEntry({ key, match });
}
aggregation[key].count += 1;
return aggregation;

View file

@ -5,7 +5,7 @@ import { Separator } from "@/components/ui/separator";
import { DoubleArrowDownIcon, DoubleArrowUpIcon } from "@radix-ui/react-icons";
import { useCallback, useMemo } from "react";
import { FileMatch } from "./fileMatch";
import { Repository, SearchResultFile } from "@/features/search/types";
import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
export const MAX_MATCHES_TO_PREVIEW = 3;
@ -16,7 +16,7 @@ interface FileMatchContainerProps {
showAllMatches: boolean;
onShowAllMatchesButtonClicked: () => void;
isBranchFilteringEnabled: boolean;
repoMetadata: Record<string, Repository>;
repoInfo: Record<number, RepositoryInfo>;
yOffset: number;
}
@ -27,7 +27,7 @@ export const FileMatchContainer = ({
showAllMatches,
onShowAllMatchesButtonClicked,
isBranchFilteringEnabled,
repoMetadata,
repoInfo,
yOffset,
}: FileMatchContainerProps) => {
@ -87,6 +87,10 @@ export const FileMatchContainer = ({
return `${branches[0]}${branches.length > 1 ? ` +${branches.length - 1}` : ''}`;
}, [isBranchFilteringEnabled, branches]);
const repo = useMemo(() => {
return repoInfo[file.repositoryId];
}, [repoInfo, file.repositoryId]);
return (
<div>
@ -101,7 +105,12 @@ export const FileMatchContainer = ({
}}
>
<FileHeader
repo={repoMetadata[file.repository]}
repo={{
name: repo.name,
codeHostType: repo.codeHostType,
displayName: repo.displayName,
webUrl: repo.webUrl,
}}
fileName={file.fileName.text}
fileNameHighlightRange={fileNameRange}
branchDisplayName={branchDisplayName}

View file

@ -1,6 +1,6 @@
'use client';
import { Repository, SearchResultFile } from "@/features/search/types";
import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
import { FileMatchContainer, MAX_MATCHES_TO_PREVIEW } from "./fileMatchContainer";
import { useVirtualizer } from "@tanstack/react-virtual";
import { useCallback, useEffect, useLayoutEffect, useRef, useState } from "react";
@ -12,7 +12,7 @@ interface SearchResultsPanelProps {
isLoadMoreButtonVisible: boolean;
onLoadMoreButtonClicked: () => void;
isBranchFilteringEnabled: boolean;
repoMetadata: Record<string, Repository>;
repoInfo: Record<number, RepositoryInfo>;
}
const ESTIMATED_LINE_HEIGHT_PX = 20;
@ -26,7 +26,7 @@ export const SearchResultsPanel = ({
isLoadMoreButtonVisible,
onLoadMoreButtonClicked,
isBranchFilteringEnabled,
repoMetadata,
repoInfo,
}: SearchResultsPanelProps) => {
const parentRef = useRef<HTMLDivElement>(null);
const [showAllMatchesStates, setShowAllMatchesStates] = useState(Array(fileMatches.length).fill(false));
@ -151,7 +151,7 @@ export const SearchResultsPanel = ({
onShowAllMatchesButtonClicked(virtualRow.index);
}}
isBranchFilteringEnabled={isBranchFilteringEnabled}
repoMetadata={repoMetadata}
repoInfo={repoInfo}
yOffset={virtualRow.start}
/>
</div>

View file

@ -16,14 +16,14 @@ import { useQuery } from "@tanstack/react-query";
import { useRouter } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useRef, useState } from "react";
import { ImperativePanelHandle } from "react-resizable-panels";
import { getRepos, search } from "../../api/(client)/client";
import { search } from "../../api/(client)/client";
import { TopBar } from "../components/topBar";
import { CodePreviewPanel } from "./components/codePreviewPanel";
import { FilterPanel } from "./components/filterPanel";
import { SearchResultsPanel } from "./components/searchResultsPanel";
import { useDomain } from "@/hooks/useDomain";
import { useToast } from "@/components/hooks/use-toast";
import { Repository, SearchResultFile } from "@/features/search/types";
import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
const DEFAULT_MATCH_COUNT = 10000;
@ -90,25 +90,6 @@ const SearchPageInternal = () => {
])
}, [searchQuery, setSearchHistory]);
// Use the /api/repos endpoint to get a useful list of
// repository metadata (like host type, repo name, etc.)
// Convert this into a map of repo name to repo metadata
// for easy lookup.
const { data: repoMetadata, isLoading: isRepoMetadataLoading } = useQuery({
queryKey: ["repos"],
queryFn: () => getRepos(domain),
select: (data): Record<string, Repository> =>
data.repos
.reduce(
(acc, repo) => ({
...acc,
[repo.name]: repo,
}),
{},
),
refetchOnWindowFocus: false,
});
useEffect(() => {
if (!searchResponse) {
return;
@ -141,13 +122,14 @@ const SearchPageInternal = () => {
});
}, [captureEvent, searchQuery, searchResponse]);
const { fileMatches, searchDurationMs, totalMatchCount, isBranchFilteringEnabled } = useMemo(() => {
const { fileMatches, searchDurationMs, totalMatchCount, isBranchFilteringEnabled, repositoryInfo } = useMemo(() => {
if (!searchResponse) {
return {
fileMatches: [],
searchDurationMs: 0,
totalMatchCount: 0,
isBranchFilteringEnabled: false,
repositoryInfo: {},
};
}
@ -156,6 +138,10 @@ const SearchPageInternal = () => {
searchDurationMs: Math.round(searchResponse.durationMs),
totalMatchCount: searchResponse.zoektStats.matchCount,
isBranchFilteringEnabled: searchResponse.isBranchFilteringEnabled,
repositoryInfo: searchResponse.repositoryInfo.reduce((acc, repo) => {
acc[repo.id] = repo;
return acc;
}, {} as Record<number, RepositoryInfo>),
}
}, [searchResponse]);
@ -194,7 +180,7 @@ const SearchPageInternal = () => {
<Separator />
</div>
{(isSearchLoading || isRepoMetadataLoading) ? (
{(isSearchLoading) ? (
<div className="flex flex-col items-center justify-center h-full gap-2">
<SymbolIcon className="h-6 w-6 animate-spin" />
<p className="font-semibold text-center">Searching...</p>
@ -205,7 +191,7 @@ const SearchPageInternal = () => {
isMoreResultsButtonVisible={isMoreResultsButtonVisible}
onLoadMoreResults={onLoadMoreResults}
isBranchFilteringEnabled={isBranchFilteringEnabled}
repoMetadata={repoMetadata ?? {}}
repoInfo={repositoryInfo}
searchDurationMs={searchDurationMs}
numMatches={numMatches}
/>
@ -219,7 +205,7 @@ interface PanelGroupProps {
isMoreResultsButtonVisible?: boolean;
onLoadMoreResults: () => void;
isBranchFilteringEnabled: boolean;
repoMetadata: Record<string, Repository>;
repoInfo: Record<number, RepositoryInfo>;
searchDurationMs: number;
numMatches: number;
}
@ -229,7 +215,7 @@ const PanelGroup = ({
isMoreResultsButtonVisible,
onLoadMoreResults,
isBranchFilteringEnabled,
repoMetadata,
repoInfo,
searchDurationMs,
numMatches,
}: PanelGroupProps) => {
@ -267,7 +253,7 @@ const PanelGroup = ({
<FilterPanel
matches={fileMatches}
onFilterChanged={onFilterChanged}
repoMetadata={repoMetadata}
repoInfo={repoInfo}
/>
</ResizablePanel>
<ResizableHandle
@ -310,7 +296,7 @@ const PanelGroup = ({
isLoadMoreButtonVisible={!!isMoreResultsButtonVisible}
onLoadMoreButtonClicked={onLoadMoreResults}
isBranchFilteringEnabled={isBranchFilteringEnabled}
repoMetadata={repoMetadata}
repoInfo={repoInfo}
/>
) : (
<div className="flex flex-col items-center justify-center h-full">

View file

@ -2,25 +2,24 @@
import { listRepositories } from "@/features/search/listReposApi";
import { NextRequest } from "next/server";
import { sew, withAuth, withOrgMembership } from "@/actions";
import { isServiceError } from "@/lib/utils";
import { serviceErrorResponse } from "@/lib/serviceError";
import { StatusCodes } from "http-status-codes";
import { ErrorCode } from "@/lib/errorCodes";
export const GET = async (request: NextRequest) => {
const domain = request.headers.get("X-Org-Domain")!;
const response = await getRepos(domain);
const domain = request.headers.get("X-Org-Domain");
if (!domain) {
return serviceErrorResponse({
statusCode: StatusCodes.BAD_REQUEST,
errorCode: ErrorCode.MISSING_ORG_DOMAIN_HEADER,
message: "Missing X-Org-Domain header",
});
}
const response = await listRepositories(domain);
if (isServiceError(response)) {
return serviceErrorResponse(response);
}
return Response.json(response);
}
const getRepos = (domain: string) => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
const response = await listRepositories(orgId);
return response;
}
), /* allowSingleTenantUnauthedAccess */ true));

View file

@ -3,13 +3,21 @@
import { search } from "@/features/search/searchApi";
import { isServiceError } from "@/lib/utils";
import { NextRequest } from "next/server";
import { sew, withAuth, withOrgMembership } from "@/actions";
import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError";
import { searchRequestSchema } from "@/features/search/schemas";
import { SearchRequest } from "@/features/search/types";
import { ErrorCode } from "@/lib/errorCodes";
import { StatusCodes } from "http-status-codes";
export const POST = async (request: NextRequest) => {
const domain = request.headers.get("X-Org-Domain")!;
const domain = request.headers.get("X-Org-Domain");
if (!domain) {
return serviceErrorResponse({
statusCode: StatusCodes.BAD_REQUEST,
errorCode: ErrorCode.MISSING_ORG_DOMAIN_HEADER,
message: "Missing X-Org-Domain header",
});
}
const body = await request.json();
const parsed = await searchRequestSchema.safeParseAsync(body);
if (!parsed.success) {
@ -18,17 +26,9 @@ export const POST = async (request: NextRequest) => {
);
}
const response = await postSearch(parsed.data, domain);
const response = await search(parsed.data, domain);
if (isServiceError(response)) {
return serviceErrorResponse(response);
}
return Response.json(response);
}
const postSearch = (request: SearchRequest, domain: string) => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
const response = await search(request, orgId);
return response;
}
), /* allowSingleTenantUnauthedAccess */ true));

View file

@ -4,11 +4,20 @@ import { getFileSource } from "@/features/search/fileSourceApi";
import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError";
import { isServiceError } from "@/lib/utils";
import { NextRequest } from "next/server";
import { sew, withAuth, withOrgMembership } from "@/actions";
import { fileSourceRequestSchema } from "@/features/search/schemas";
import { FileSourceRequest } from "@/features/search/types";
import { ErrorCode } from "@/lib/errorCodes";
import { StatusCodes } from "http-status-codes";
export const POST = async (request: NextRequest) => {
const domain = request.headers.get("X-Org-Domain");
if (!domain) {
return serviceErrorResponse({
statusCode: StatusCodes.BAD_REQUEST,
errorCode: ErrorCode.MISSING_ORG_DOMAIN_HEADER,
message: "Missing X-Org-Domain header",
});
}
const body = await request.json();
const parsed = await fileSourceRequestSchema.safeParseAsync(body);
if (!parsed.success) {
@ -18,19 +27,11 @@ export const POST = async (request: NextRequest) => {
}
const response = await postSource(parsed.data, request.headers.get("X-Org-Domain")!);
const response = await getFileSource(parsed.data, domain);
if (isServiceError(response)) {
return serviceErrorResponse(response);
}
return Response.json(response);
}
export const postSource = (request: FileSourceRequest, domain: string) => sew(() =>
withAuth(async (session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
const response = await getFileSource(request, orgId);
return response;
}
), /* allowSingleTenantUnauthedAccess */ true));

View file

@ -1,7 +1,7 @@
import { sourcebot_context, sourcebot_pr_payload } from "@/features/agents/review-agent/types";
import { getFileSource } from "@/features/search/fileSourceApi";
import { fileSourceResponseSchema } from "@/features/search/schemas";
import { base64Decode } from "@/lib/utils";
import { postSource } from "@/app/api/(server)/source/route";
import { isServiceError } from "@/lib/utils";
export const fetchFileContent = async (pr_payload: sourcebot_pr_payload, filename: string): Promise<sourcebot_context> => {
@ -14,7 +14,7 @@ export const fetchFileContent = async (pr_payload: sourcebot_pr_payload, filenam
}
console.log(JSON.stringify(fileSourceRequest, null, 2));
const response = await postSource(fileSourceRequest, "~");
const response = await getFileSource(fileSourceRequest, "~");
if (isServiceError(response)) {
throw new Error(`Failed to fetch file content for ${filename} from ${repoPath}: ${response.message}`);
}

View file

@ -3,40 +3,44 @@ import { fileNotFound, ServiceError } from "../../lib/serviceError";
import { FileSourceRequest, FileSourceResponse } from "./types";
import { isServiceError } from "../../lib/utils";
import { search } from "./searchApi";
import { sew, withAuth, withOrgMembership } from "@/actions";
// @todo (bkellam) : We should really be using `git show <hash>:<path>` to fetch file contents here.
// This will allow us to support permalinks to files at a specific revision that may not be indexed
// by zoekt.
export const getFileSource = async ({ fileName, repository, branch }: FileSourceRequest, orgId: number): Promise<FileSourceResponse | ServiceError> => {
const escapedFileName = escapeStringRegexp(fileName);
const escapedRepository = escapeStringRegexp(repository);
export const getFileSource = async ({ fileName, repository, branch }: FileSourceRequest, domain: string): Promise<FileSourceResponse | ServiceError> => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async () => {
const escapedFileName = escapeStringRegexp(fileName);
const escapedRepository = escapeStringRegexp(repository);
let query = `file:${escapedFileName} repo:^${escapedRepository}$`;
if (branch) {
query = query.concat(` branch:${branch}`);
}
let query = `file:${escapedFileName} repo:^${escapedRepository}$`;
if (branch) {
query = query.concat(` branch:${branch}`);
}
const searchResponse = await search({
query,
matches: 1,
whole: true,
}, orgId);
const searchResponse = await search({
query,
matches: 1,
whole: true,
}, domain);
if (isServiceError(searchResponse)) {
return searchResponse;
}
if (isServiceError(searchResponse)) {
return searchResponse;
}
const files = searchResponse.files;
const files = searchResponse.files;
if (!files || files.length === 0) {
return fileNotFound(fileName, repository);
}
if (!files || files.length === 0) {
return fileNotFound(fileName, repository);
}
const file = files[0];
const source = file.content ?? '';
const language = file.language;
return {
source,
language,
} satisfies FileSourceResponse;
}
const file = files[0];
const source = file.content ?? '';
const language = file.language;
return {
source,
language,
} satisfies FileSourceResponse;
}), /* allowSingleTenantUnauthedAccess = */ true)
);

View file

@ -2,42 +2,45 @@ import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
import { ListRepositoriesResponse } from "./types";
import { zoektFetch } from "./zoektClient";
import { zoektListRepositoriesResponseSchema } from "./zoektSchema";
import { sew, withAuth, withOrgMembership } from "@/actions";
export const listRepositories = async (domain: string): Promise<ListRepositoriesResponse | ServiceError> => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
const body = JSON.stringify({
opts: {
Field: 0,
}
});
export const listRepositories = async (orgId: number): Promise<ListRepositoriesResponse | ServiceError> => {
const body = JSON.stringify({
opts: {
Field: 0,
}
});
let header: Record<string, string> = {};
header = {
"X-Tenant-ID": orgId.toString()
};
let header: Record<string, string> = {};
header = {
"X-Tenant-ID": orgId.toString()
};
const listResponse = await zoektFetch({
path: "/api/list",
body,
header,
method: "POST",
cache: "no-store",
});
const listResponse = await zoektFetch({
path: "/api/list",
body,
header,
method: "POST",
cache: "no-store",
});
if (!listResponse.ok) {
return invalidZoektResponse(listResponse);
}
if (!listResponse.ok) {
return invalidZoektResponse(listResponse);
}
const listBody = await listResponse.json();
const listBody = await listResponse.json();
const parser = zoektListRepositoriesResponseSchema.transform(({ List }) => ({
repos: List.Repos.map((repo) => ({
name: repo.Repository.Name,
webUrl: repo.Repository.URL.length > 0 ? repo.Repository.URL : undefined,
branches: repo.Repository.Branches?.map((branch) => branch.Name) ?? [],
rawConfig: repo.Repository.RawConfig ?? undefined,
}))
} satisfies ListRepositoriesResponse));
const parser = zoektListRepositoriesResponseSchema.transform(({ List }) => ({
repos: List.Repos.map((repo) => ({
name: repo.Repository.Name,
url: repo.Repository.URL,
branches: repo.Repository.Branches?.map((branch) => branch.Name) ?? [],
rawConfig: repo.Repository.RawConfig ?? undefined,
}))
} satisfies ListRepositoriesResponse));
return parser.parse(listBody);
}
return parser.parse(listBody);
}), /* allowSingleTenantUnauthedAccess = */ true)
);

View file

@ -31,6 +31,14 @@ export const searchRequestSchema = z.object({
whole: z.boolean().optional(),
});
export const repositoryInfoSchema = z.object({
id: z.number(),
codeHostType: z.string(),
name: z.string(),
displayName: z.string().optional(),
webUrl: z.string().optional(),
})
export const searchResponseSchema = z.object({
zoektStats: z.object({
// The duration (in nanoseconds) of the search.
@ -62,8 +70,9 @@ export const searchResponseSchema = z.object({
// Any matching ranges
matchRanges: z.array(rangeSchema),
}),
url: z.string(),
webUrl: z.string().optional(),
repository: z.string(),
repositoryId: z.number(),
language: z.string(),
chunks: z.array(z.object({
content: z.string(),
@ -78,13 +87,14 @@ export const searchResponseSchema = z.object({
// Set if `whole` is true.
content: z.string().optional(),
})),
repositoryInfo: z.array(repositoryInfoSchema),
isBranchFilteringEnabled: z.boolean(),
});
export const repositorySchema = z.object({
name: z.string(),
url: z.string(),
branches: z.array(z.string()),
webUrl: z.string().optional(),
rawConfig: z.record(z.string(), z.string()).optional(),
});

View file

@ -7,7 +7,9 @@ import { ErrorCode } from "../../lib/errorCodes";
import { StatusCodes } from "http-status-codes";
import { zoektSearchResponseSchema } from "./zoektSchema";
import { SearchRequest, SearchResponse, SearchResultRange } from "./types";
import assert from "assert";
import { Repo } from "@sourcebot/db";
import * as Sentry from "@sentry/nextjs";
import { sew, withAuth, withOrgMembership } from "@/actions";
// List of supported query prefixes in zoekt.
// @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417
@ -92,178 +94,244 @@ const transformZoektQuery = async (query: string, orgId: number): Promise<string
}
// Extracts a repository file URL from a zoekt template, branch, and file name.
function getRepositoryUrl(template: string, branch: string, fileName: string): string {
const getFileWebUrl = (template: string, branch: string, fileName: string): string | undefined => {
// This is a hacky parser for templates generated by
// the go text/template package. Example template:
// {{URLJoinPath "https://github.com/sourcebot-dev/sourcebot" "blob" .Version .Path}}
// The template should always match this regex, so let's assert that.
assert(template.match(/^{{URLJoinPath\s.*}}(\?.+)?$/), "Invalid template");
if (!template.match(/^{{URLJoinPath\s.*}}(\?.+)?$/)) {
return undefined;
}
const url =
template.substring("{{URLJoinPath ".length, template.indexOf("}}"))
.replace(".Version", branch)
.replace(".Path", fileName)
.split(" ")
.map((part) => {
// remove wrapping quotes
if (part.startsWith("\"")) part = part.substring(1);
if (part.endsWith("\"")) part = part.substring(0, part.length - 1);
return part;
})
.join("/");
.replace(".Version", branch)
.replace(".Path", fileName)
.split(" ")
.map((part) => {
// remove wrapping quotes
if (part.startsWith("\"")) part = part.substring(1);
if (part.endsWith("\"")) part = part.substring(0, part.length - 1);
return part;
})
.join("/");
const optionalQueryParams =
template.substring(template.indexOf("}}") + 2)
.replace("{{.Version}}", branch)
.replace("{{.Path}}", fileName);
.replace("{{.Version}}", branch)
.replace("{{.Path}}", fileName);
return encodeURI(url + optionalQueryParams);
}
export const search = async ({ query, matches, contextLines, whole }: SearchRequest, orgId: number) => {
const transformedQuery = await transformZoektQuery(query, orgId);
if (isServiceError(transformedQuery)) {
return transformedQuery;
}
query = transformedQuery;
const isBranchFilteringEnabled = (
query.includes(zoektPrefixes.branch) ||
query.includes(zoektPrefixes.branchShort)
);
// We only want to show matches for the default branch when
// the user isn't explicitly filtering by branch.
if (!isBranchFilteringEnabled) {
query = query.concat(` branch:HEAD`);
}
const body = JSON.stringify({
q: query,
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892
opts: {
ChunkMatches: true,
MaxMatchDisplayCount: matches,
NumContextLines: contextLines,
Whole: !!whole,
TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT,
ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT,
MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds
}
});
let header: Record<string, string> = {};
header = {
"X-Tenant-ID": orgId.toString()
};
const searchResponse = await zoektFetch({
path: "/api/search",
body,
header,
method: "POST",
});
if (!searchResponse.ok) {
return invalidZoektResponse(searchResponse);
}
const searchBody = await searchResponse.json();
const parser = zoektSearchResponseSchema.transform(({ Result }) => ({
zoektStats: {
duration: Result.Duration,
fileCount: Result.FileCount,
matchCount: Result.MatchCount,
filesSkipped: Result.FilesSkipped,
contentBytesLoaded: Result.ContentBytesLoaded,
indexBytesLoaded: Result.IndexBytesLoaded,
crashes: Result.Crashes,
shardFilesConsidered: Result.ShardFilesConsidered,
filesConsidered: Result.FilesConsidered,
filesLoaded: Result.FilesLoaded,
shardsScanned: Result.ShardsScanned,
shardsSkipped: Result.ShardsSkipped,
shardsSkippedFilter: Result.ShardsSkippedFilter,
ngramMatches: Result.NgramMatches,
ngramLookups: Result.NgramLookups,
wait: Result.Wait,
matchTreeConstruction: Result.MatchTreeConstruction,
matchTreeSearch: Result.MatchTreeSearch,
regexpsConsidered: Result.RegexpsConsidered,
flushReason: Result.FlushReason,
},
files: Result.Files?.map((file) => {
const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName);
const template = Result.RepoURLs[file.Repository];
assert(template, `Template not found for repository ${file.Repository}`);
// If there are multiple branches pointing to the same revision of this file, it doesn't
// matter which branch we use here, so use the first one.
const branch = file.Branches && file.Branches.length > 0 ? file.Branches[0] : "HEAD";
const url = getRepositoryUrl(template, branch, file.FileName);
return {
fileName: {
text: file.FileName,
matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
})) : [],
},
repository: file.Repository,
url: url,
language: file.Language,
chunks: file.ChunkMatches
.filter((chunk) => !chunk.FileName) // Filter out filename chunks.
.map((chunk) => {
return {
content: chunk.Content,
matchRanges: chunk.Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
}) satisfies SearchResultRange),
contentStart: {
byteOffset: chunk.ContentStart.ByteOffset,
column: chunk.ContentStart.Column,
lineNumber: chunk.ContentStart.LineNumber,
},
symbols: chunk.SymbolInfo?.map((symbol) => {
return {
symbol: symbol.Sym,
kind: symbol.Kind,
parent: symbol.Parent.length > 0 ? {
symbol: symbol.Parent,
kind: symbol.ParentKind,
} : undefined,
}
}) ?? undefined,
}
}),
branches: file.Branches,
content: file.Content,
export const search = async ({ query, matches, contextLines, whole }: SearchRequest, domain: string) => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
const transformedQuery = await transformZoektQuery(query, orgId);
if (isServiceError(transformedQuery)) {
return transformedQuery;
}
}) ?? [],
isBranchFilteringEnabled: isBranchFilteringEnabled,
} satisfies SearchResponse));
query = transformedQuery;
return parser.parse(searchBody);
}
const isBranchFilteringEnabled = (
query.includes(zoektPrefixes.branch) ||
query.includes(zoektPrefixes.branchShort)
);
// We only want to show matches for the default branch when
// the user isn't explicitly filtering by branch.
if (!isBranchFilteringEnabled) {
query = query.concat(` branch:HEAD`);
}
const body = JSON.stringify({
q: query,
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892
opts: {
ChunkMatches: true,
MaxMatchDisplayCount: matches,
NumContextLines: contextLines,
Whole: !!whole,
TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT,
ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT,
MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds
}
});
let header: Record<string, string> = {};
header = {
"X-Tenant-ID": orgId.toString()
};
const searchResponse = await zoektFetch({
path: "/api/search",
body,
header,
method: "POST",
});
if (!searchResponse.ok) {
return invalidZoektResponse(searchResponse);
}
const searchBody = await searchResponse.json();
const parser = zoektSearchResponseSchema.transform(async ({ Result }) => {
// @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field
// which corresponds to the `id` in the Repo table. In order to efficiently fetch repository
// metadata when transforming (potentially thousands) of file matches, we aggregate a unique
// set of repository ids* and map them to their corresponding Repo record.
//
// *Q: Why is `RepositoryID` optional? And why are we falling back to `Repository`?
// A: Prior to this change, the repository id was not plumbed into zoekt, so RepositoryID was
// always undefined. To make this a non-breaking change, we fallback to using the repository's name
// (`Repository`) as the identifier in these cases. This is not guaranteed to be unique, but in
// practice it is since the repository name includes the host and path (e.g., 'github.com/org/repo',
// 'gitea.com/org/repo', etc.).
//
// Note: When a repository is re-indexed (every hour) this ID will be populated.
// @see: https://github.com/sourcebot-dev/zoekt/pull/6
const repoIdentifiers = new Set(Result.Files?.map((file) => file.RepositoryID ?? file.Repository) ?? []);
const repos = new Map<string | number, Repo>();
(await prisma.repo.findMany({
where: {
id: {
in: Array.from(repoIdentifiers).filter((id) => typeof id === "number"),
},
orgId,
}
})).forEach(repo => repos.set(repo.id, repo));
(await prisma.repo.findMany({
where: {
name: {
in: Array.from(repoIdentifiers).filter((id) => typeof id === "string"),
},
orgId,
}
})).forEach(repo => repos.set(repo.name, repo));
return {
zoektStats: {
duration: Result.Duration,
fileCount: Result.FileCount,
matchCount: Result.MatchCount,
filesSkipped: Result.FilesSkipped,
contentBytesLoaded: Result.ContentBytesLoaded,
indexBytesLoaded: Result.IndexBytesLoaded,
crashes: Result.Crashes,
shardFilesConsidered: Result.ShardFilesConsidered,
filesConsidered: Result.FilesConsidered,
filesLoaded: Result.FilesLoaded,
shardsScanned: Result.ShardsScanned,
shardsSkipped: Result.ShardsSkipped,
shardsSkippedFilter: Result.ShardsSkippedFilter,
ngramMatches: Result.NgramMatches,
ngramLookups: Result.NgramLookups,
wait: Result.Wait,
matchTreeConstruction: Result.MatchTreeConstruction,
matchTreeSearch: Result.MatchTreeSearch,
regexpsConsidered: Result.RegexpsConsidered,
flushReason: Result.FlushReason,
},
files: Result.Files?.map((file) => {
const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName);
const webUrl = (() => {
const template: string | undefined = Result.RepoURLs[file.Repository];
if (!template) {
return undefined;
}
// If there are multiple branches pointing to the same revision of this file, it doesn't
// matter which branch we use here, so use the first one.
const branch = file.Branches && file.Branches.length > 0 ? file.Branches[0] : "HEAD";
return getFileWebUrl(template, branch, file.FileName);
})();
const identifier = file.RepositoryID ?? file.Repository;
const repo = repos.get(identifier);
// This should never happen... but if it does, we skip the file.
if (!repo) {
Sentry.captureMessage(
`Repository not found for identifier: ${identifier}; skipping file "${file.FileName}"`,
'warning'
);
return undefined;
}
return {
fileName: {
text: file.FileName,
matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
})) : [],
},
repository: repo.name,
repositoryId: repo.id,
webUrl: webUrl,
language: file.Language,
chunks: file.ChunkMatches
.filter((chunk) => !chunk.FileName) // Filter out filename chunks.
.map((chunk) => {
return {
content: chunk.Content,
matchRanges: chunk.Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
}) satisfies SearchResultRange),
contentStart: {
byteOffset: chunk.ContentStart.ByteOffset,
column: chunk.ContentStart.Column,
lineNumber: chunk.ContentStart.LineNumber,
},
symbols: chunk.SymbolInfo?.map((symbol) => {
return {
symbol: symbol.Sym,
kind: symbol.Kind,
parent: symbol.Parent.length > 0 ? {
symbol: symbol.Parent,
kind: symbol.ParentKind,
} : undefined,
}
}) ?? undefined,
}
}),
branches: file.Branches,
content: file.Content,
}
}).filter((file) => file !== undefined) ?? [],
repositoryInfo: Array.from(repos.values()).map((repo) => ({
id: repo.id,
codeHostType: repo.external_codeHostType,
name: repo.name,
displayName: repo.displayName ?? undefined,
webUrl: repo.webUrl ?? undefined,
})),
isBranchFilteringEnabled: isBranchFilteringEnabled,
} satisfies SearchResponse;
});
return parser.parseAsync(searchBody);
}), /* allowSingleTenantUnauthedAccess = */ true)
)

View file

@ -8,6 +8,7 @@ import {
rangeSchema,
fileSourceRequestSchema,
symbolSchema,
repositoryInfoSchema,
} from "./schemas";
import { z } from "zod";
@ -24,3 +25,5 @@ export type Repository = ListRepositoriesResponse["repos"][number];
export type FileSourceRequest = z.infer<typeof fileSourceRequestSchema>;
export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;
export type RepositoryInfo = z.infer<typeof repositoryInfoSchema>;

View file

@ -54,6 +54,7 @@ export const zoektSearchResponseSchema = z.object({
Files: z.array(z.object({
FileName: z.string(),
Repository: z.string(),
RepositoryID: z.number().optional(),
Version: z.string().optional(),
Language: z.string(),
Branches: z.array(z.string()).optional(),

View file

@ -23,4 +23,5 @@ export enum ErrorCode {
STRIPE_CLIENT_NOT_INITIALIZED = 'STRIPE_CLIENT_NOT_INITIALIZED',
ACTION_DISALLOWED_IN_TENANCY_MODE = 'ACTION_DISALLOWED_IN_TENANCY_MODE',
SEARCH_CONTEXT_NOT_FOUND = 'SEARCH_CONTEXT_NOT_FOUND',
MISSING_ORG_DOMAIN_HEADER = 'MISSING_ORG_DOMAIN_HEADER',
}

View file

@ -5,9 +5,8 @@ import gitlabLogo from "@/public/gitlab.svg";
import giteaLogo from "@/public/gitea.svg";
import gerritLogo from "@/public/gerrit.svg";
import bitbucketLogo from "@/public/bitbucket.svg";
import gitLogo from "@/public/git.svg";
import { ServiceError } from "./serviceError";
import { RepositoryQuery } from "./types";
import { Repository } from "@/features/search/types";
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs))
@ -33,47 +32,40 @@ export const createPathWithQueryParams = (path: string, ...queryParams: [string,
return `${path}?${queryString}`;
}
export type CodeHostType = "github" | "gitlab" | "gitea" | "gerrit" | "bitbucket-cloud" | "bitbucket-server";
export type CodeHostType =
"github" |
"gitlab" |
"gitea" |
"gerrit" |
"bitbucket-cloud" |
"bitbucket-server" |
"generic-git-host";
type CodeHostInfo = {
type: CodeHostType;
displayName: string;
codeHostName: string;
repoLink: string;
repoLink?: string;
icon: string;
iconClassName?: string;
}
export const getRepoCodeHostInfo = (repo?: Repository): CodeHostInfo | undefined => {
if (!repo) {
return undefined;
}
export const getCodeHostInfoForRepo = (repo: {
codeHostType: string,
name: string,
displayName?: string,
webUrl?: string,
}): CodeHostInfo | undefined => {
const { codeHostType, name, displayName, webUrl } = repo;
if (!repo.rawConfig) {
return undefined;
}
// @todo : use zod to validate config schema
const webUrlType = repo.rawConfig['web-url-type']!;
const displayName = repo.rawConfig['display-name'] ?? repo.rawConfig['name']!;
return _getCodeHostInfoInternal(webUrlType, displayName, repo.url);
}
export const getRepoQueryCodeHostInfo = (repo: RepositoryQuery): CodeHostInfo | undefined => {
const displayName = repo.repoDisplayName ?? repo.repoName;
return _getCodeHostInfoInternal(repo.codeHostType, displayName, repo.webUrl ?? repo.repoCloneUrl);
}
const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: string): CodeHostInfo | undefined => {
switch (type) {
switch (codeHostType) {
case 'github': {
const { src, className } = getCodeHostIcon('github')!;
return {
type: "github",
displayName: displayName,
displayName: displayName ?? name,
codeHostName: "GitHub",
repoLink: cloneUrl,
repoLink: webUrl,
icon: src,
iconClassName: className,
}
@ -82,9 +74,9 @@ const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: s
const { src, className } = getCodeHostIcon('gitlab')!;
return {
type: "gitlab",
displayName: displayName,
displayName: displayName ?? name,
codeHostName: "GitLab",
repoLink: cloneUrl,
repoLink: webUrl,
icon: src,
iconClassName: className,
}
@ -93,9 +85,9 @@ const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: s
const { src, className } = getCodeHostIcon('gitea')!;
return {
type: "gitea",
displayName: displayName,
displayName: displayName ?? name,
codeHostName: "Gitea",
repoLink: cloneUrl,
repoLink: webUrl,
icon: src,
iconClassName: className,
}
@ -105,9 +97,9 @@ const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: s
const { src, className } = getCodeHostIcon('gerrit')!;
return {
type: "gerrit",
displayName: displayName,
displayName: displayName ?? name,
codeHostName: "Gerrit",
repoLink: cloneUrl,
repoLink: webUrl,
icon: src,
iconClassName: className,
}
@ -116,9 +108,9 @@ const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: s
const { src, className } = getCodeHostIcon('bitbucket-server')!;
return {
type: "bitbucket-server",
displayName: displayName,
displayName: displayName ?? name,
codeHostName: "Bitbucket",
repoLink: cloneUrl,
repoLink: webUrl,
icon: src,
iconClassName: className,
}
@ -127,9 +119,20 @@ const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: s
const { src, className } = getCodeHostIcon('bitbucket-cloud')!;
return {
type: "bitbucket-cloud",
displayName: displayName,
displayName: displayName ?? name,
codeHostName: "Bitbucket",
repoLink: cloneUrl,
repoLink: webUrl,
icon: src,
iconClassName: className,
}
}
case "generic-git-host": {
const { src, className } = getCodeHostIcon('generic-git-host')!;
return {
type: "generic-git-host",
displayName: displayName ?? name,
codeHostName: "Generic Git Host",
repoLink: webUrl,
icon: src,
iconClassName: className,
}
@ -161,6 +164,10 @@ export const getCodeHostIcon = (codeHostType: CodeHostType): { src: string, clas
return {
src: bitbucketLogo,
}
case "generic-git-host":
return {
src: gitLogo,
}
default:
return null;
}
@ -174,6 +181,7 @@ export const isAuthSupportedForCodeHost = (codeHostType: CodeHostType): boolean
case "bitbucket-cloud":
case "bitbucket-server":
return true;
case "generic-git-host":
case "gerrit":
return false;
}

View file

@ -16,6 +16,9 @@
},
{
"$ref": "./bitbucket.json"
},
{
"$ref": "./genericGitHost.json"
}
]
}

View file

@ -0,0 +1,30 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"$ref": "./shared.json#/definitions/GitRevisions"
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
}

View file

@ -78,7 +78,7 @@
},
"contexts": {
"type": "object",
"description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/self-hosting/more/search-contexts",
"description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/docs/search/search-contexts",
"patternProperties": {
"^[a-zA-Z0-9_-]+$": {
"$ref": "#/definitions/SearchContext"

2
vendor/zoekt vendored

@ -1 +1 @@
Subproject commit 7d1896215eea6f97af66c9549c9ec70436356b51
Subproject commit 12a2f4ad075359a09bd8a91793acb002211217aa

View file

@ -5460,6 +5460,7 @@ __metadata:
cross-fetch: "npm:^4.0.0"
dotenv: "npm:^16.4.5"
express: "npm:^4.21.2"
git-url-parse: "npm:^16.1.0"
gitea-js: "npm:^1.22.0"
glob: "npm:^11.0.0"
ioredis: "npm:^5.4.2"
@ -6084,6 +6085,13 @@ __metadata:
languageName: node
linkType: hard
"@types/parse-path@npm:^7.0.0":
version: 7.0.3
resolution: "@types/parse-path@npm:7.0.3"
checksum: 10c0/8344b6c7acba4e4e5a8d542f56f53c297685fa92f9b0c085d7532cc7e1b661432cecfc1c75c76cdb0d161c95679b6ecfe0573d9fef7c836962aacf604150a984
languageName: node
linkType: hard
"@types/pg-pool@npm:2.0.6":
version: 2.0.6
resolution: "@types/pg-pool@npm:2.0.6"
@ -9830,6 +9838,25 @@ __metadata:
languageName: node
linkType: hard
"git-up@npm:^8.1.0":
version: 8.1.1
resolution: "git-up@npm:8.1.1"
dependencies:
is-ssh: "npm:^1.4.0"
parse-url: "npm:^9.2.0"
checksum: 10c0/2cc4461d8565a3f7a1ecd3d262a58ddb8df0a67f7f7d4915df2913c460b2e88ae570a6ea810700a6d22fb3b9e4bea8dd10a8eb469900ddc12e35c62208608c03
languageName: node
linkType: hard
"git-url-parse@npm:^16.1.0":
version: 16.1.0
resolution: "git-url-parse@npm:16.1.0"
dependencies:
git-up: "npm:^8.1.0"
checksum: 10c0/b8f5ebcbd5b2baf9f1bb77a217376f0247c47fe1d42811ccaac3015768eebb0759a59051f758e50e70adf5c67ae059d1975bf6b750164f36bfd39138d11b940b
languageName: node
linkType: hard
"gitea-js@npm:^1.22.0":
version: 1.23.0
resolution: "gitea-js@npm:1.23.0"
@ -10633,6 +10660,15 @@ __metadata:
languageName: node
linkType: hard
"is-ssh@npm:^1.4.0":
version: 1.4.1
resolution: "is-ssh@npm:1.4.1"
dependencies:
protocols: "npm:^2.0.1"
checksum: 10c0/021a7355cb032625d58db3cc8266ad9aa698cbabf460b71376a0307405577fd7d3aa0826c0bf1951d7809f134c0ee80403306f6d7633db94a5a3600a0106b398
languageName: node
linkType: hard
"is-stream@npm:^2.0.0":
version: 2.0.1
resolution: "is-stream@npm:2.0.1"
@ -12400,6 +12436,25 @@ __metadata:
languageName: node
linkType: hard
"parse-path@npm:^7.0.0":
version: 7.1.0
resolution: "parse-path@npm:7.1.0"
dependencies:
protocols: "npm:^2.0.0"
checksum: 10c0/8c8c8b3019323d686e7b1cd6fd9653bc233404403ad68827836fbfe59dfe26aaef64ed4e0396d0e20c4a7e1469312ec969a679618960e79d5e7c652dc0da5a0f
languageName: node
linkType: hard
"parse-url@npm:^9.2.0":
version: 9.2.0
resolution: "parse-url@npm:9.2.0"
dependencies:
"@types/parse-path": "npm:^7.0.0"
parse-path: "npm:^7.0.0"
checksum: 10c0/b8f56cdb01e76616255dff82544f4b5ab4378f6f4bac8604ed6fde03a75b0f71c547d92688386d8f22f38fad3c928c075abf69458677c6185da76c841bfd7a93
languageName: node
linkType: hard
"parse5@npm:^7.1.2":
version: 7.2.1
resolution: "parse5@npm:7.2.1"
@ -13010,6 +13065,13 @@ __metadata:
languageName: node
linkType: hard
"protocols@npm:^2.0.0, protocols@npm:^2.0.1":
version: 2.0.2
resolution: "protocols@npm:2.0.2"
checksum: 10c0/b87d78c1fcf038d33691da28447ce94011d5c7f0c7fd25bcb5fb4d975991c99117873200c84f4b6a9d7f8b9092713a064356236960d1473a7d6fcd4228897b60
languageName: node
linkType: hard
"proxy-addr@npm:^2.0.7, proxy-addr@npm:~2.0.7":
version: 2.0.7
resolution: "proxy-addr@npm:2.0.7"