feat: Generic git host support (local & remote) (#307)

This commit is contained in:
Brendan Kellam 2025-05-15 13:42:58 -07:00 committed by GitHub
parent bbdd9e7903
commit 1aafc228cf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
62 changed files with 6259 additions and 508 deletions

View file

@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Added
- Added support for indexing generic git hosts given a remote clone url or local path. [#307](https://github.com/sourcebot-dev/sourcebot/pull/307)
## [3.2.0] - 2025-05-12 ## [3.2.0] - 2025-05-12
### Added ### Added

View file

@ -14,6 +14,9 @@ zoekt:
export CTAGS_COMMANDS=ctags export CTAGS_COMMANDS=ctags
clean: clean:
redis-cli FLUSHALL
yarn dev:prisma:migrate:reset
rm -rf \ rm -rf \
bin \ bin \
node_modules \ node_modules \

View file

@ -38,11 +38,21 @@
"docs/connections/bitbucket-data-center", "docs/connections/bitbucket-data-center",
"docs/connections/gitea", "docs/connections/gitea",
"docs/connections/gerrit", "docs/connections/gerrit",
"docs/connections/generic-git-host",
"docs/connections/local-repos",
"docs/connections/request-new" "docs/connections/request-new"
] ]
} }
] ]
}, },
{
"group": "Search",
"pages": [
"docs/search/syntax-reference",
"docs/search/multi-branch-indexing",
"docs/search/search-contexts"
]
},
{ {
"group": "Agents", "group": "Agents",
"pages": [ "pages": [
@ -53,11 +63,8 @@
{ {
"group": "More", "group": "More",
"pages": [ "pages": [
"docs/more/syntax-reference",
"docs/more/multi-branch-indexing",
"docs/more/roles-and-permissions", "docs/more/roles-and-permissions",
"docs/more/mcp-server", "docs/more/mcp-server"
"docs/more/search-contexts"
] ]
} }
] ]

View file

@ -0,0 +1,29 @@
---
title: Other Git hosts
---
import GenericGitHost from '/snippets/schemas/v3/genericGitHost.schema.mdx'
Sourcebot can sync code from any Git host (by clone url). This is helpful when you want to search code that not in a [supported code host](/docs/connections/overview#supported-code-hosts).
## Getting Started
To connect to a Git host, create a new [connection](/docs/connections/overview) with type `git` and specify the clone url in the `url` property. For example:
```json
{
"type": "git",
"url": "https://github.com/sourcebot-dev/sourcebot"
}
```
Note that only `http` & `https` URLs are supported at this time.
## Schema reference
<Accordion title="Reference">
[schemas/v3/genericGitHost.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/genericGitHost.json)
<GenericGitHost />
</Accordion>

View file

@ -0,0 +1,87 @@
---
title: Local Git repositories
---
import GenericGitHost from '/snippets/schemas/v3/genericGitHost.schema.mdx'
<Note>
This feature is only supported when [self-hosting](/self-hosting/overview).
</Note>
Sourcebot can sync code from generic git repositories stored in a local directory. This can be helpful in scenarios where you already have a large number of repos already checked out. Local repositories are treated as **read-only**, meaing Sourcebot will **not** `git fetch` new revisions.
## Getting Started
<Warning>
Only folders containing git repositories at their root **and** have a `remote.origin.url` set in their git config are supported at this time. All other folders will be skipped.
</Warning>
Let's assume we have a `repos` directory located at `$(PWD)` with a collection of git repositories:
```sh
repos/
├─ repo_1/
├─ repo_2/
├─ repo_3/
├─ ...
```
To get Sourcebot to index these repositories:
<Steps>
<Step title="Mount a volume">
We need to mount a docker volume to the `repos` directory so Sourcebot can read it's contents. Sourcebot will **not** write to local repositories, so we can mount a seperate **read-only** volume:
``` bash
docker run \
-v $(pwd)/repos:/repos:ro \
/* additional args */ \
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Step>
<Step title="Create a connection">
We can now create a new git [connection](/docs/connections/overview), specifying local paths with the `file://` prefix. Glob patterns are supported. For example:
```json
{
"type": "git",
"url": "file:///repos/*"
}
```
Sourcebot will expand this glob pattern into paths `/repos/repo_1`, `/repos/repo_2`, etc. and index all valid git repositories.
</Step>
</Steps>
## Examples
<AccordionGroup>
<Accordion title="Sync individual repo">
```json
{
"type": "git",
"url": "file:///path/to/git_repo"
}
```
</Accordion>
<Accordion title="Sync multiple repos using glob patterns">
```json
// Attempt to sync directories contained in `repos/` (non-recursive)
{
"type": "git",
"url": "file:///repos/*"
}
```
</Accordion>
</AccordionGroup>
## Schema reference
<Accordion title="Reference">
[schemas/v3/genericGitHost.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/genericGitHost.json)
<GenericGitHost />
</Accordion>

View file

@ -30,6 +30,8 @@ There are two ways to define connections:
<Card horizontal title="Bitbucket Data Center" icon="bitbucket" href="/docs/connections/bitbucket-data-center" /> <Card horizontal title="Bitbucket Data Center" icon="bitbucket" href="/docs/connections/bitbucket-data-center" />
<Card horizontal title="Gitea" href="/docs/connections/gitea" /> <Card horizontal title="Gitea" href="/docs/connections/gitea" />
<Card horizontal title="Gerrit" href="/docs/connections/gerrit" /> <Card horizontal title="Gerrit" href="/docs/connections/gerrit" />
<Card horizontal title="Other Git hosts" icon="git-alt" href="/docs/connections/generic-git-host" />
<Card horizontal title="Local Git repos" icon="folder" href="/docs/connections/local-repos" />
</CardGroup> </CardGroup>
<Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).</Note> <Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).</Note>

View file

@ -90,4 +90,5 @@ Additional info:
| Bitbucket Data Center | ✅ | | Bitbucket Data Center | ✅ |
| Gitea | ✅ | | Gitea | ✅ |
| Gerrit | ❌ | | Gerrit | ❌ |
| Generic git host | ✅ |

View file

@ -105,7 +105,7 @@ Like other prefixes, contexts can be negated using `-` or combined using `or`:
- `-context:web` excludes frontend repositories from results - `-context:web` excludes frontend repositories from results
- `( context:web or context:backend )` searches across both frontend and backend code - `( context:web or context:backend )` searches across both frontend and backend code
See [this doc](/docs/more/syntax-reference) for more details on the search query syntax. See [this doc](/docs/search/syntax-reference) for more details on the search query syntax.
## Schema reference ## Schema reference

View file

@ -32,4 +32,4 @@ Expressions can be prefixed with certain keywords to modify search behavior. Som
| `rev:` | Filter results from a specific branch or tag. By default **only** the default branch is searched. | `rev:beta` - Filter results to branches that match regex `/beta/` | | `rev:` | Filter results from a specific branch or tag. By default **only** the default branch is searched. | `rev:beta` - Filter results to branches that match regex `/beta/` |
| `lang:` | Filter results by language (as defined by [linguist](https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml)). By default all languages are searched. | `lang:TypeScript` - Filter results to TypeScript files<br/>`-lang:YAML` - Ignore results from YAML files | | `lang:` | Filter results by language (as defined by [linguist](https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml)). By default all languages are searched. | `lang:TypeScript` - Filter results to TypeScript files<br/>`-lang:YAML` - Ignore results from YAML files |
| `sym:` | Match symbol definitions created by [universal ctags](https://ctags.io/) at index time. | `sym:\bmain\b` - Filter results to symbols that match regex `/\bmain\b/` | | `sym:` | Match symbol definitions created by [universal ctags](https://ctags.io/) at index time. | `sym:\bmain\b` - Filter results to symbols that match regex `/\bmain\b/` |
| `context:` | Filter results to a predefined [search context](/self-hosting/more/search-contexts). | `context:web` - Filter results to the web context<br/>`-context:pipelines` - Ignore results from the pipelines context | | `context:` | Filter results to a predefined [search context](/docs/search/search-contexts). | `context:web` - Filter results to the web context<br/>`-context:pipelines` - Ignore results from the pipelines context |

View file

@ -82,6 +82,8 @@ Sourcebot is open source and can be self-hosted using our official [Docker image
<Card horizontal title="Bitbucket Data Center" icon="bitbucket" href="/docs/connections/bitbucket-data-center" /> <Card horizontal title="Bitbucket Data Center" icon="bitbucket" href="/docs/connections/bitbucket-data-center" />
<Card horizontal title="Gitea" href="/docs/connections/gitea" /> <Card horizontal title="Gitea" href="/docs/connections/gitea" />
<Card horizontal title="Gerrit" href="/docs/connections/gerrit" /> <Card horizontal title="Gerrit" href="/docs/connections/gerrit" />
<Card horizontal title="Other Git hosts" icon="git-alt" href="/docs/connections/generic-git-host" />
<Card horizontal title="Local Git repos" icon="folder" href="/docs/connections/local-repos" />
</CardGroup> </CardGroup>
<Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).</Note> <Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).</Note>

View file

@ -19,10 +19,108 @@
"ZoektConfig": { "ZoektConfig": {
"anyOf": [ "anyOf": [
{ {
"$ref": "#/definitions/GitHubConfig" "type": "object",
"properties": {
"Type": {
"const": "github"
},
"GitHubUrl": {
"type": "string",
"description": "GitHub Enterprise url. If not set github.com will be used as the host."
},
"GitHubUser": {
"type": "string",
"description": "The GitHub user to mirror"
},
"GitHubOrg": {
"type": "string",
"description": "The GitHub organization to mirror"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitHub access token.",
"default": "~/.github-token"
},
"Topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only mirror repos that have one of the given topics"
},
"ExcludeTopics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Don't mirror repos that have one of the given topics"
},
"NoArchived": {
"type": "boolean",
"description": "Mirror repos that are _not_ archived",
"default": false
},
"IncludeForks": {
"type": "boolean",
"description": "Also mirror forks",
"default": false
}
},
"required": [
"Type"
],
"additionalProperties": false
}, },
{ {
"$ref": "#/definitions/GitLabConfig" "type": "object",
"properties": {
"Type": {
"const": "gitlab"
},
"GitLabURL": {
"type": "string",
"description": "The GitLab API url.",
"default": "https://gitlab.com/api/v4/"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"OnlyPublic": {
"type": "boolean",
"description": "Only mirror public repos",
"default": false
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitLab access token.",
"default": "~/.gitlab-token"
}
},
"required": [
"Type"
],
"additionalProperties": false
} }
] ]
}, },
@ -45,10 +143,16 @@
"description": "The GitHub organization to mirror" "description": "The GitHub organization to mirror"
}, },
"Name": { "Name": {
"$ref": "#/definitions/RepoNameRegexIncludeFilter" "type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
}, },
"Exclude": { "Exclude": {
"$ref": "#/definitions/RepoNameRegexExcludeFilter" "type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
}, },
"CredentialPath": { "CredentialPath": {
"type": "string", "type": "string",
@ -97,10 +201,16 @@
"default": "https://gitlab.com/api/v4/" "default": "https://gitlab.com/api/v4/"
}, },
"Name": { "Name": {
"$ref": "#/definitions/RepoNameRegexIncludeFilter" "type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
}, },
"Exclude": { "Exclude": {
"$ref": "#/definitions/RepoNameRegexExcludeFilter" "type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
}, },
"OnlyPublic": { "OnlyPublic": {
"type": "boolean", "type": "boolean",
@ -126,7 +236,112 @@
"Configs": { "Configs": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/ZoektConfig" "anyOf": [
{
"type": "object",
"properties": {
"Type": {
"const": "github"
},
"GitHubUrl": {
"type": "string",
"description": "GitHub Enterprise url. If not set github.com will be used as the host."
},
"GitHubUser": {
"type": "string",
"description": "The GitHub user to mirror"
},
"GitHubOrg": {
"type": "string",
"description": "The GitHub organization to mirror"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitHub access token.",
"default": "~/.github-token"
},
"Topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only mirror repos that have one of the given topics"
},
"ExcludeTopics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Don't mirror repos that have one of the given topics"
},
"NoArchived": {
"type": "boolean",
"description": "Mirror repos that are _not_ archived",
"default": false
},
"IncludeForks": {
"type": "boolean",
"description": "Also mirror forks",
"default": false
}
},
"required": [
"Type"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"Type": {
"const": "gitlab"
},
"GitLabURL": {
"type": "string",
"description": "The GitLab API url.",
"default": "https://gitlab.com/api/v4/"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"OnlyPublic": {
"type": "boolean",
"description": "Only mirror public repos",
"default": false
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitLab access token.",
"default": "~/.gitlab-token"
}
},
"required": [
"Type"
],
"additionalProperties": false
}
]
} }
} }
}, },

File diff suppressed because it is too large Load diff

View file

@ -227,12 +227,39 @@
"description": "GitLab Configuration" "description": "GitLab Configuration"
}, },
"token": { "token": {
"$ref": "#/oneOf/0/properties/token",
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -346,7 +373,45 @@
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -364,12 +429,39 @@
"description": "Gitea Configuration" "description": "Gitea Configuration"
}, },
"token": { "token": {
"$ref": "#/oneOf/0/properties/token",
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -441,7 +533,45 @@
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -530,12 +660,39 @@
"description": "The username to use for authentication. Only needed if token is an app password." "description": "The username to use for authentication. Only needed if token is an app password."
}, },
"token": { "token": {
"$ref": "#/oneOf/0/properties/token",
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -608,7 +765,45 @@
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -627,6 +822,74 @@
] ]
}, },
"additionalProperties": false "additionalProperties": false
},
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
} }
] ]
} }

View file

@ -0,0 +1,71 @@
{/* THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! */}
```json
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
}
```

View file

@ -115,14 +115,112 @@
"type": "string" "type": "string"
}, },
"settings": { "settings": {
"$ref": "#/definitions/Settings" "type": "object",
"description": "Defines the global settings for Sourcebot.",
"properties": {
"maxFileSize": {
"type": "number",
"description": "The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be indexed. Defaults to 2MB.",
"minimum": 1
},
"maxTrigramCount": {
"type": "number",
"description": "The maximum number of trigrams per document. Files that exceed this maximum will not be indexed. Default to 20000.",
"minimum": 1
},
"reindexIntervalMs": {
"type": "number",
"description": "The interval (in milliseconds) at which the indexer should re-index all repositories. Defaults to 1 hour.",
"minimum": 1
},
"resyncConnectionIntervalMs": {
"type": "number",
"description": "The interval (in milliseconds) at which the connection manager should check for connections that need to be re-synced. Defaults to 24 hours.",
"minimum": 1
},
"resyncConnectionPollingIntervalMs": {
"type": "number",
"description": "The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced. Defaults to 1 second.",
"minimum": 1
},
"reindexRepoPollingIntervalMs": {
"type": "number",
"description": "The polling rate (in milliseconds) at which the db should be checked for repos that should be re-indexed. Defaults to 1 second.",
"minimum": 1
},
"maxConnectionSyncJobConcurrency": {
"type": "number",
"description": "The number of connection sync jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"maxRepoIndexingJobConcurrency": {
"type": "number",
"description": "The number of repo indexing jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"maxRepoGarbageCollectionJobConcurrency": {
"type": "number",
"description": "The number of repo GC jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"repoGarbageCollectionGracePeriodMs": {
"type": "number",
"description": "The grace period (in milliseconds) for garbage collection. Used to prevent deleting shards while they're being loaded. Defaults to 10 seconds.",
"minimum": 1
},
"repoIndexTimeoutMs": {
"type": "number",
"description": "The timeout (in milliseconds) for a repo indexing to timeout. Defaults to 2 hours.",
"minimum": 1
}
},
"additionalProperties": false
}, },
"contexts": { "contexts": {
"type": "object", "type": "object",
"description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/self-hosting/more/search-contexts", "description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/docs/search/search-contexts",
"patternProperties": { "patternProperties": {
"^[a-zA-Z0-9_-]+$": { "^[a-zA-Z0-9_-]+$": {
"$ref": "#/definitions/SearchContext" "$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "SearchContext",
"description": "Search context",
"properties": {
"include": {
"type": "array",
"description": "List of repositories to include in the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.",
"items": {
"type": "string"
},
"examples": [
[
"github.com/sourcebot-dev/**",
"gerrit.example.org/sub/path/**"
]
]
},
"exclude": {
"type": "array",
"description": "List of repositories to exclude from the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.",
"items": {
"type": "string"
},
"examples": [
[
"github.com/sourcebot-dev/sourcebot",
"gerrit.example.org/sub/path/**"
]
]
},
"description": {
"type": "string",
"description": "Optional description of the search context that surfaces in the UI."
}
},
"required": [
"include"
],
"additionalProperties": false
} }
}, },
"additionalProperties": false "additionalProperties": false
@ -358,12 +456,39 @@
"description": "GitLab Configuration" "description": "GitLab Configuration"
}, },
"token": { "token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -477,7 +602,45 @@
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -495,12 +658,39 @@
"description": "Gitea Configuration" "description": "Gitea Configuration"
}, },
"token": { "token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -572,7 +762,45 @@
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -661,12 +889,39 @@
"description": "The username to use for authentication. Only needed if token is an app password." "description": "The username to use for authentication. Only needed if token is an app password."
}, },
"token": { "token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -739,7 +994,45 @@
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -758,6 +1051,74 @@
] ]
}, },
"additionalProperties": false "additionalProperties": false
},
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
} }
] ]
} }

View file

@ -1,8 +1,7 @@
{ {
"private": true, "private": true,
"workspaces": [ "workspaces": [
"packages/*", "packages/*"
"packages/agents/*"
], ],
"scripts": { "scripts": {
"build": "cross-env SKIP_ENV_VALIDATION=1 yarn workspaces foreach -A run build", "build": "cross-env SKIP_ENV_VALIDATION=1 yarn workspaces foreach -A run build",

View file

@ -41,6 +41,7 @@
"cross-fetch": "^4.0.0", "cross-fetch": "^4.0.0",
"dotenv": "^16.4.5", "dotenv": "^16.4.5",
"express": "^4.21.2", "express": "^4.21.2",
"git-url-parse": "^16.1.0",
"gitea-js": "^1.22.0", "gitea-js": "^1.22.0",
"glob": "^11.0.0", "glob": "^11.0.0",
"ioredis": "^5.4.2", "ioredis": "^5.4.2",

View file

@ -4,7 +4,7 @@ import { Settings } from "./types.js";
import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type"; import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
import { createLogger } from "./logger.js"; import { createLogger } from "./logger.js";
import { Redis } from 'ioredis'; import { Redis } from 'ioredis';
import { RepoData, compileGithubConfig, compileGitlabConfig, compileGiteaConfig, compileGerritConfig, compileBitbucketConfig } from "./repoCompileUtils.js"; import { RepoData, compileGithubConfig, compileGitlabConfig, compileGiteaConfig, compileGerritConfig, compileBitbucketConfig, compileGenericGitHostConfig } from "./repoCompileUtils.js";
import { BackendError, BackendException } from "@sourcebot/error"; import { BackendError, BackendException } from "@sourcebot/error";
import { captureEvent } from "./posthog.js"; import { captureEvent } from "./posthog.js";
import { env } from "./env.js"; import { env } from "./env.js";
@ -173,6 +173,9 @@ export class ConnectionManager implements IConnectionManager {
case 'bitbucket': { case 'bitbucket': {
return await compileBitbucketConfig(config, job.data.connectionId, orgId, this.db); return await compileBitbucketConfig(config, job.data.connectionId, orgId, this.db);
} }
case 'git': {
return await compileGenericGitHostConfig(config, job.data.connectionId, orgId);
}
} }
})(); })();
} catch (err) { } catch (err) {

View file

@ -1,6 +1,8 @@
import { simpleGit, SimpleGitProgressEvent } from 'simple-git'; import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git';
export const cloneRepository = async (cloneURL: string, path: string, onProgress?: (event: SimpleGitProgressEvent) => void) => { type onProgressFn = (event: SimpleGitProgressEvent) => void;
export const cloneRepository = async (cloneURL: string, path: string, onProgress?: onProgressFn) => {
const git = simpleGit({ const git = simpleGit({
progress: onProgress, progress: onProgress,
}); });
@ -26,7 +28,7 @@ export const cloneRepository = async (cloneURL: string, path: string, onProgress
} }
export const fetchRepository = async (path: string, onProgress?: (event: SimpleGitProgressEvent) => void) => { export const fetchRepository = async (path: string, onProgress?: onProgressFn) => {
const git = simpleGit({ const git = simpleGit({
progress: onProgress, progress: onProgress,
}); });
@ -56,7 +58,7 @@ export const fetchRepository = async (path: string, onProgress?: (event: SimpleG
* that do not exist yet. It will _not_ remove any existing keys that are not * that do not exist yet. It will _not_ remove any existing keys that are not
* present in gitConfig. * present in gitConfig.
*/ */
export const upsertGitConfig = async (path: string, gitConfig: Record<string, string>, onProgress?: (event: SimpleGitProgressEvent) => void) => { export const upsertGitConfig = async (path: string, gitConfig: Record<string, string>, onProgress?: onProgressFn) => {
const git = simpleGit({ const git = simpleGit({
progress: onProgress, progress: onProgress,
}).cwd(path); }).cwd(path);
@ -74,6 +76,52 @@ export const upsertGitConfig = async (path: string, gitConfig: Record<string, st
} }
} }
/**
* Returns true if `path` is the _root_ of a git repository.
*/
export const isPathAValidGitRepoRoot = async (path: string, onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
}).cwd(path);
try {
return git.checkIsRepo(CheckRepoActions.IS_REPO_ROOT);
} catch (error: unknown) {
if (error instanceof Error) {
throw new Error(`isPathAGitRepoRoot failed: ${error.message}`);
} else {
throw new Error(`isPathAGitRepoRoot failed: ${error}`);
}
}
}
export const isUrlAValidGitRepo = async (url: string) => {
const git = simpleGit();
// List the remote heads. If an exception is thrown, the URL is not a valid git repo.
try {
const result = await git.listRemote(['--heads', url]);
return result.trim().length > 0;
} catch (error: unknown) {
return false;
}
}
export const getOriginUrl = async (path: string) => {
const git = simpleGit().cwd(path);
try {
const remotes = await git.getConfig('remote.origin.url', GitConfigScope.local);
return remotes.value;
} catch (error: unknown) {
if (error instanceof Error) {
throw new Error(`Failed to get origin for ${path}: ${error.message}`);
} else {
throw new Error(`Failed to get origin for ${path}: ${error}`);
}
}
}
export const getBranches = async (path: string) => { export const getBranches = async (path: string) => {
const git = simpleGit(); const git = simpleGit();
const branches = await git.cwd({ const branches = await git.cwd({

View file

@ -10,9 +10,13 @@ import { Prisma, PrismaClient } from '@sourcebot/db';
import { WithRequired } from "./types.js" import { WithRequired } from "./types.js"
import { marshalBool } from "./utils.js"; import { marshalBool } from "./utils.js";
import { createLogger } from './logger.js'; import { createLogger } from './logger.js';
import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig } from '@sourcebot/schemas/v3/connection.type'; import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { RepoMetadata } from './types.js'; import { RepoMetadata } from './types.js';
import path from 'path'; import path from 'path';
import { glob } from 'glob';
import { getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
import assert from 'assert';
import GitUrlParse from 'git-url-parse';
export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>; export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;
@ -433,4 +437,167 @@ export const compileBitbucketConfig = async (
repoData: repos, repoData: repos,
notFound, notFound,
}; };
}
export const compileGenericGitHostConfig = async (
config: GenericGitHostConnectionConfig,
connectionId: number,
orgId: number,
) => {
const configUrl = new URL(config.url);
if (configUrl.protocol === 'file:') {
return compileGenericGitHostConfig_file(config, orgId, connectionId);
}
else if (configUrl.protocol === 'http:' || configUrl.protocol === 'https:') {
return compileGenericGitHostConfig_url(config, orgId, connectionId);
}
else {
// Schema should prevent this, but throw an error just in case.
throw new Error(`Unsupported protocol: ${configUrl.protocol}`);
}
}
export const compileGenericGitHostConfig_file = async (
config: GenericGitHostConnectionConfig,
orgId: number,
connectionId: number,
) => {
const configUrl = new URL(config.url);
assert(configUrl.protocol === 'file:', 'config.url must be a file:// URL');
// Resolve the glob pattern to a list of repo-paths
const repoPaths = await glob(configUrl.pathname, {
absolute: true,
});
const repos: RepoData[] = [];
const notFound: {
users: string[],
orgs: string[],
repos: string[],
} = {
users: [],
orgs: [],
repos: [],
};
await Promise.all(repoPaths.map(async (repoPath) => {
const isGitRepo = await isPathAValidGitRepoRoot(repoPath);
if (!isGitRepo) {
logger.warn(`Skipping ${repoPath} - not a git repository.`);
notFound.repos.push(repoPath);
return;
}
const origin = await getOriginUrl(repoPath);
if (!origin) {
logger.warn(`Skipping ${repoPath} - remote.origin.url not found in git config.`);
notFound.repos.push(repoPath);
return;
}
const remoteUrl = GitUrlParse(origin);
// @note: matches the naming here:
// https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
const repo: RepoData = {
external_codeHostType: 'generic-git-host',
external_codeHostUrl: remoteUrl.resource,
external_id: remoteUrl.toString(),
cloneUrl: `file://${repoPath}`,
name: repoName,
displayName: repoName,
isFork: false,
isArchived: false,
org: {
connect: {
id: orgId,
},
},
connections: {
create: {
connectionId: connectionId,
}
},
metadata: {
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
// @NOTE: We don't set a gitConfig here since local repositories
// are readonly.
gitConfig: undefined,
} satisfies RepoMetadata,
}
repos.push(repo);
}));
return {
repoData: repos,
notFound,
}
}
export const compileGenericGitHostConfig_url = async (
config: GenericGitHostConnectionConfig,
orgId: number,
connectionId: number,
) => {
const remoteUrl = new URL(config.url);
assert(remoteUrl.protocol === 'http:' || remoteUrl.protocol === 'https:', 'config.url must be a http:// or https:// URL');
const notFound: {
users: string[],
orgs: string[],
repos: string[],
} = {
users: [],
orgs: [],
repos: [],
};
// Validate that we are dealing with a valid git repo.
const isGitRepo = await isUrlAValidGitRepo(remoteUrl.toString());
if (!isGitRepo) {
notFound.repos.push(remoteUrl.toString());
return {
repoData: [],
notFound,
}
}
// @note: matches the naming here:
// https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
const repo: RepoData = {
external_codeHostType: 'generic-git-host',
external_codeHostUrl: remoteUrl.origin,
external_id: remoteUrl.toString(),
cloneUrl: remoteUrl.toString(),
name: repoName,
displayName: repoName,
isFork: false,
isArchived: false,
org: {
connect: {
id: orgId,
},
},
connections: {
create: {
connectionId: connectionId,
}
},
metadata: {
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
}
};
return {
repoData: [repo],
notFound,
}
} }

View file

@ -221,31 +221,29 @@ export class RepoManager implements IRepoManager {
} }
private async syncGitRepository(repo: RepoWithConnections, repoAlreadyInIndexingState: boolean) { private async syncGitRepository(repo: RepoWithConnections, repoAlreadyInIndexingState: boolean) {
let fetchDuration_s: number | undefined = undefined; const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
let cloneDuration_s: number | undefined = undefined;
const repoPath = getRepoPath(repo, this.ctx);
const metadata = repoMetadataSchema.parse(repo.metadata); const metadata = repoMetadataSchema.parse(repo.metadata);
// If the repo was already in the indexing state, this job was likely killed and picked up again. As a result, // If the repo was already in the indexing state, this job was likely killed and picked up again. As a result,
// to ensure the repo state is valid, we delete the repo if it exists so we get a fresh clone // to ensure the repo state is valid, we delete the repo if it exists so we get a fresh clone
if (repoAlreadyInIndexingState && existsSync(repoPath)) { if (repoAlreadyInIndexingState && existsSync(repoPath) && !isReadOnly) {
this.logger.info(`Deleting repo directory ${repoPath} during sync because it was already in the indexing state`); this.logger.info(`Deleting repo directory ${repoPath} during sync because it was already in the indexing state`);
await promises.rm(repoPath, { recursive: true, force: true }); await promises.rm(repoPath, { recursive: true, force: true });
} }
if (existsSync(repoPath)) { if (existsSync(repoPath) && !isReadOnly) {
this.logger.info(`Fetching ${repo.displayName}...`); this.logger.info(`Fetching ${repo.displayName}...`);
const { durationMs } = await measure(() => fetchRepository(repoPath, ({ method, stage, progress }) => { const { durationMs } = await measure(() => fetchRepository(repoPath, ({ method, stage, progress }) => {
this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
})); }));
fetchDuration_s = durationMs / 1000; const fetchDuration_s = durationMs / 1000;
process.stdout.write('\n'); process.stdout.write('\n');
this.logger.info(`Fetched ${repo.displayName} in ${fetchDuration_s}s`); this.logger.info(`Fetched ${repo.displayName} in ${fetchDuration_s}s`);
} else { } else if (!isReadOnly) {
this.logger.info(`Cloning ${repo.displayName}...`); this.logger.info(`Cloning ${repo.displayName}...`);
const auth = await this.getCloneCredentialsForRepo(repo, this.db); const auth = await this.getCloneCredentialsForRepo(repo, this.db);
@ -267,7 +265,7 @@ export class RepoManager implements IRepoManager {
const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, ({ method, stage, progress }) => { const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, ({ method, stage, progress }) => {
this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
})); }));
cloneDuration_s = durationMs / 1000; const cloneDuration_s = durationMs / 1000;
process.stdout.write('\n'); process.stdout.write('\n');
this.logger.info(`Cloned ${repo.displayName} in ${cloneDuration_s}s`); this.logger.info(`Cloned ${repo.displayName} in ${cloneDuration_s}s`);
@ -276,7 +274,7 @@ export class RepoManager implements IRepoManager {
// Regardless of clone or fetch, always upsert the git config for the repo. // Regardless of clone or fetch, always upsert the git config for the repo.
// This ensures that the git config is always up to date for whatever we // This ensures that the git config is always up to date for whatever we
// have in the DB. // have in the DB.
if (metadata.gitConfig) { if (metadata.gitConfig && !isReadOnly) {
await upsertGitConfig(repoPath, metadata.gitConfig); await upsertGitConfig(repoPath, metadata.gitConfig);
} }
@ -284,12 +282,6 @@ export class RepoManager implements IRepoManager {
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx)); const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
const indexDuration_s = durationMs / 1000; const indexDuration_s = durationMs / 1000;
this.logger.info(`Indexed ${repo.displayName} in ${indexDuration_s}s`); this.logger.info(`Indexed ${repo.displayName} in ${indexDuration_s}s`);
return {
fetchDuration_s,
cloneDuration_s,
indexDuration_s,
}
} }
private async runIndexJob(job: Job<RepoIndexingPayload>) { private async runIndexJob(job: Job<RepoIndexingPayload>) {
@ -323,17 +315,12 @@ export class RepoManager implements IRepoManager {
this.promClient.activeRepoIndexingJobs.inc(); this.promClient.activeRepoIndexingJobs.inc();
this.promClient.pendingRepoIndexingJobs.dec({ repo: repo.id.toString() }); this.promClient.pendingRepoIndexingJobs.dec({ repo: repo.id.toString() });
let indexDuration_s: number | undefined;
let fetchDuration_s: number | undefined;
let cloneDuration_s: number | undefined;
let stats;
let attempts = 0; let attempts = 0;
const maxAttempts = 3; const maxAttempts = 3;
while (attempts < maxAttempts) { while (attempts < maxAttempts) {
try { try {
stats = await this.syncGitRepository(repo, repoAlreadyInIndexingState); await this.syncGitRepository(repo, repoAlreadyInIndexingState);
break; break;
} catch (error) { } catch (error) {
Sentry.captureException(error); Sentry.captureException(error);
@ -350,10 +337,6 @@ export class RepoManager implements IRepoManager {
await new Promise(resolve => setTimeout(resolve, sleepDuration)); await new Promise(resolve => setTimeout(resolve, sleepDuration));
} }
} }
indexDuration_s = stats!.indexDuration_s;
fetchDuration_s = stats!.fetchDuration_s;
cloneDuration_s = stats!.cloneDuration_s;
} }
private async onIndexJobCompleted(job: Job<RepoIndexingPayload>) { private async onIndexJobCompleted(job: Job<RepoIndexingPayload>) {
@ -489,8 +472,8 @@ export class RepoManager implements IRepoManager {
}); });
// delete cloned repo // delete cloned repo
const repoPath = getRepoPath(repo, this.ctx); const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
if (existsSync(repoPath)) { if (existsSync(repoPath) && !isReadOnly) {
this.logger.info(`Deleting repo directory ${repoPath}`); this.logger.info(`Deleting repo directory ${repoPath}`);
await promises.rm(repoPath, { recursive: true, force: true }); await promises.rm(repoPath, { recursive: true, force: true });
} }

View file

@ -94,8 +94,21 @@ export const arraysEqualShallow = <T>(a?: readonly T[], b?: readonly T[]) => {
return true; return true;
} }
export const getRepoPath = (repo: Repo, ctx: AppContext) => { export const getRepoPath = (repo: Repo, ctx: AppContext): { path: string, isReadOnly: boolean } => {
return path.join(ctx.reposPath, repo.id.toString()); // If we are dealing with a local repository, then use that as the path.
// Mark as read-only since we aren't guaranteed to have write access to the local filesystem.
const cloneUrl = new URL(repo.cloneUrl);
if (repo.external_codeHostType === 'generic-git-host' && cloneUrl.protocol === 'file:') {
return {
path: cloneUrl.pathname,
isReadOnly: true,
}
}
return {
path: path.join(ctx.reposPath, repo.id.toString()),
isReadOnly: false,
}
} }
export const getShardPrefix = (orgId: number, repoId: number) => { export const getShardPrefix = (orgId: number, repoId: number) => {

View file

@ -15,7 +15,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
'HEAD' 'HEAD'
]; ];
const repoPath = getRepoPath(repo, ctx); const { path: repoPath } = getRepoPath(repo, ctx);
const shardPrefix = getShardPrefix(repo.orgId, repo.id); const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const metadata = repoMetadataSchema.parse(repo.metadata); const metadata = repoMetadataSchema.parse(repo.metadata);
@ -65,6 +65,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
`-file_limit ${settings.maxFileSize}`, `-file_limit ${settings.maxFileSize}`,
`-branches ${revisions.join(',')}`, `-branches ${revisions.join(',')}`,
`-tenant_id ${repo.orgId}`, `-tenant_id ${repo.orgId}`,
`-repo_id ${repo.id}`,
`-shard_prefix ${shardPrefix}`, `-shard_prefix ${shardPrefix}`,
repoPath repoPath
].join(' '); ].join(' ');

View file

@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Changed
- Updated API client to match the latest Sourcebot release. [#307](https://github.com/sourcebot-dev/sourcebot/pull/307)
## [1.0.0] - 2025-05-07 ## [1.0.0] - 2025-05-07
### Added ### Added

View file

@ -109,7 +109,7 @@ server.tool(
(acc, chunk) => acc + chunk.matchRanges.length, (acc, chunk) => acc + chunk.matchRanges.length,
0, 0,
); );
let text = `file: ${file.url}\nnum_matches: ${numMatches}\nrepository: ${file.repository}\nlanguage: ${file.language}`; let text = `file: ${file.webUrl}\nnum_matches: ${numMatches}\nrepository: ${file.repository}\nlanguage: ${file.language}`;
if (includeCodeSnippets) { if (includeCodeSnippets) {
const snippets = file.chunks.map(chunk => { const snippets = file.chunks.map(chunk => {
@ -166,7 +166,7 @@ server.tool(
const content: TextContent[] = response.repos.map(repo => { const content: TextContent[] = response.repos.map(repo => {
return { return {
type: "text", type: "text",
text: `id: ${repo.name}\nurl: ${repo.url}`, text: `id: ${repo.name}\nurl: ${repo.webUrl}`,
} }
}); });

View file

@ -32,6 +32,14 @@ export const searchRequestSchema = z.object({
whole: z.boolean().optional(), whole: z.boolean().optional(),
}); });
export const repositoryInfoSchema = z.object({
id: z.number(),
codeHostType: z.string(),
name: z.string(),
displayName: z.string().optional(),
webUrl: z.string().optional(),
})
export const searchResponseSchema = z.object({ export const searchResponseSchema = z.object({
zoektStats: z.object({ zoektStats: z.object({
// The duration (in nanoseconds) of the search. // The duration (in nanoseconds) of the search.
@ -63,9 +71,10 @@ export const searchResponseSchema = z.object({
// Any matching ranges // Any matching ranges
matchRanges: z.array(rangeSchema), matchRanges: z.array(rangeSchema),
}), }),
webUrl: z.string().optional(),
repository: z.string(), repository: z.string(),
repositoryId: z.number(),
language: z.string(), language: z.string(),
url: z.string(),
chunks: z.array(z.object({ chunks: z.array(z.object({
content: z.string(), content: z.string(),
matchRanges: z.array(rangeSchema), matchRanges: z.array(rangeSchema),
@ -79,13 +88,14 @@ export const searchResponseSchema = z.object({
// Set if `whole` is true. // Set if `whole` is true.
content: z.string().optional(), content: z.string().optional(),
})), })),
repositoryInfo: z.array(repositoryInfoSchema),
isBranchFilteringEnabled: z.boolean(), isBranchFilteringEnabled: z.boolean(),
}); });
export const repositorySchema = z.object({ export const repositorySchema = z.object({
name: z.string(), name: z.string(),
url: z.string(),
branches: z.array(z.string()), branches: z.array(z.string()),
webUrl: z.string().optional(),
rawConfig: z.record(z.string(), z.string()).optional(), rawConfig: z.record(z.string(), z.string()).optional(),
}); });

View file

@ -18,10 +18,108 @@ const schema = {
"ZoektConfig": { "ZoektConfig": {
"anyOf": [ "anyOf": [
{ {
"$ref": "#/definitions/GitHubConfig" "type": "object",
"properties": {
"Type": {
"const": "github"
},
"GitHubUrl": {
"type": "string",
"description": "GitHub Enterprise url. If not set github.com will be used as the host."
},
"GitHubUser": {
"type": "string",
"description": "The GitHub user to mirror"
},
"GitHubOrg": {
"type": "string",
"description": "The GitHub organization to mirror"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitHub access token.",
"default": "~/.github-token"
},
"Topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only mirror repos that have one of the given topics"
},
"ExcludeTopics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Don't mirror repos that have one of the given topics"
},
"NoArchived": {
"type": "boolean",
"description": "Mirror repos that are _not_ archived",
"default": false
},
"IncludeForks": {
"type": "boolean",
"description": "Also mirror forks",
"default": false
}
},
"required": [
"Type"
],
"additionalProperties": false
}, },
{ {
"$ref": "#/definitions/GitLabConfig" "type": "object",
"properties": {
"Type": {
"const": "gitlab"
},
"GitLabURL": {
"type": "string",
"description": "The GitLab API url.",
"default": "https://gitlab.com/api/v4/"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"OnlyPublic": {
"type": "boolean",
"description": "Only mirror public repos",
"default": false
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitLab access token.",
"default": "~/.gitlab-token"
}
},
"required": [
"Type"
],
"additionalProperties": false
} }
] ]
}, },
@ -44,10 +142,16 @@ const schema = {
"description": "The GitHub organization to mirror" "description": "The GitHub organization to mirror"
}, },
"Name": { "Name": {
"$ref": "#/definitions/RepoNameRegexIncludeFilter" "type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
}, },
"Exclude": { "Exclude": {
"$ref": "#/definitions/RepoNameRegexExcludeFilter" "type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
}, },
"CredentialPath": { "CredentialPath": {
"type": "string", "type": "string",
@ -96,10 +200,16 @@ const schema = {
"default": "https://gitlab.com/api/v4/" "default": "https://gitlab.com/api/v4/"
}, },
"Name": { "Name": {
"$ref": "#/definitions/RepoNameRegexIncludeFilter" "type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
}, },
"Exclude": { "Exclude": {
"$ref": "#/definitions/RepoNameRegexExcludeFilter" "type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
}, },
"OnlyPublic": { "OnlyPublic": {
"type": "boolean", "type": "boolean",
@ -125,7 +235,112 @@ const schema = {
"Configs": { "Configs": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/ZoektConfig" "anyOf": [
{
"type": "object",
"properties": {
"Type": {
"const": "github"
},
"GitHubUrl": {
"type": "string",
"description": "GitHub Enterprise url. If not set github.com will be used as the host."
},
"GitHubUser": {
"type": "string",
"description": "The GitHub user to mirror"
},
"GitHubOrg": {
"type": "string",
"description": "The GitHub organization to mirror"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitHub access token.",
"default": "~/.github-token"
},
"Topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Only mirror repos that have one of the given topics"
},
"ExcludeTopics": {
"type": "array",
"items": {
"type": "string"
},
"description": "Don't mirror repos that have one of the given topics"
},
"NoArchived": {
"type": "boolean",
"description": "Mirror repos that are _not_ archived",
"default": false
},
"IncludeForks": {
"type": "boolean",
"description": "Also mirror forks",
"default": false
}
},
"required": [
"Type"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"Type": {
"const": "gitlab"
},
"GitLabURL": {
"type": "string",
"description": "The GitLab API url.",
"default": "https://gitlab.com/api/v4/"
},
"Name": {
"type": "string",
"description": "Only clone repos whose name matches the given regexp.",
"format": "regexp",
"default": "^(foo|bar)$"
},
"Exclude": {
"type": "string",
"description": "Don't mirror repos whose names match this regexp.",
"format": "regexp",
"default": "^(fizz|buzz)$"
},
"OnlyPublic": {
"type": "boolean",
"description": "Only mirror public repos",
"default": false
},
"CredentialPath": {
"type": "string",
"description": "Path to a file containing a GitLab access token.",
"default": "~/.gitlab-token"
}
},
"required": [
"Type"
],
"additionalProperties": false
}
]
} }
} }
}, },

File diff suppressed because it is too large Load diff

View file

@ -226,12 +226,39 @@ const schema = {
"description": "GitLab Configuration" "description": "GitLab Configuration"
}, },
"token": { "token": {
"$ref": "#/oneOf/0/properties/token",
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -345,7 +372,45 @@ const schema = {
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -363,12 +428,39 @@ const schema = {
"description": "Gitea Configuration" "description": "Gitea Configuration"
}, },
"token": { "token": {
"$ref": "#/oneOf/0/properties/token",
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -440,7 +532,45 @@ const schema = {
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -529,12 +659,39 @@ const schema = {
"description": "The username to use for authentication. Only needed if token is an app password." "description": "The username to use for authentication. Only needed if token is an app password."
}, },
"token": { "token": {
"$ref": "#/oneOf/0/properties/token",
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -607,7 +764,45 @@ const schema = {
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -626,6 +821,74 @@ const schema = {
] ]
}, },
"additionalProperties": false "additionalProperties": false
},
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
} }
] ]
} as const; } as const;

View file

@ -5,7 +5,8 @@ export type ConnectionConfig =
| GitlabConnectionConfig | GitlabConnectionConfig
| GiteaConnectionConfig | GiteaConnectionConfig
| GerritConnectionConfig | GerritConnectionConfig
| BitbucketConnectionConfig; | BitbucketConnectionConfig
| GenericGitHostConnectionConfig;
export interface GithubConnectionConfig { export interface GithubConnectionConfig {
/** /**
@ -305,3 +306,14 @@ export interface BitbucketConnectionConfig {
}; };
revisions?: GitRevisions; revisions?: GitRevisions;
} }
export interface GenericGitHostConnectionConfig {
/**
* Generic Git host configuration
*/
type: "git";
/**
* The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.
*/
url: string;
revisions?: GitRevisions;
}

View file

@ -0,0 +1,70 @@
// THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY!
const schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
} as const;
export { schema as genericGitHostSchema };

View file

@ -0,0 +1,26 @@
// THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY!
export interface GenericGitHostConnectionConfig {
/**
* Generic Git host configuration
*/
type: "git";
/**
* The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.
*/
url: string;
revisions?: GitRevisions;
}
/**
* The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.
*/
export interface GitRevisions {
/**
* List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.
*/
branches?: string[];
/**
* List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.
*/
tags?: string[];
}

View file

@ -114,14 +114,112 @@ const schema = {
"type": "string" "type": "string"
}, },
"settings": { "settings": {
"$ref": "#/definitions/Settings" "type": "object",
"description": "Defines the global settings for Sourcebot.",
"properties": {
"maxFileSize": {
"type": "number",
"description": "The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be indexed. Defaults to 2MB.",
"minimum": 1
},
"maxTrigramCount": {
"type": "number",
"description": "The maximum number of trigrams per document. Files that exceed this maximum will not be indexed. Default to 20000.",
"minimum": 1
},
"reindexIntervalMs": {
"type": "number",
"description": "The interval (in milliseconds) at which the indexer should re-index all repositories. Defaults to 1 hour.",
"minimum": 1
},
"resyncConnectionIntervalMs": {
"type": "number",
"description": "The interval (in milliseconds) at which the connection manager should check for connections that need to be re-synced. Defaults to 24 hours.",
"minimum": 1
},
"resyncConnectionPollingIntervalMs": {
"type": "number",
"description": "The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced. Defaults to 1 second.",
"minimum": 1
},
"reindexRepoPollingIntervalMs": {
"type": "number",
"description": "The polling rate (in milliseconds) at which the db should be checked for repos that should be re-indexed. Defaults to 1 second.",
"minimum": 1
},
"maxConnectionSyncJobConcurrency": {
"type": "number",
"description": "The number of connection sync jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"maxRepoIndexingJobConcurrency": {
"type": "number",
"description": "The number of repo indexing jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"maxRepoGarbageCollectionJobConcurrency": {
"type": "number",
"description": "The number of repo GC jobs to run concurrently. Defaults to 8.",
"minimum": 1
},
"repoGarbageCollectionGracePeriodMs": {
"type": "number",
"description": "The grace period (in milliseconds) for garbage collection. Used to prevent deleting shards while they're being loaded. Defaults to 10 seconds.",
"minimum": 1
},
"repoIndexTimeoutMs": {
"type": "number",
"description": "The timeout (in milliseconds) for a repo indexing to timeout. Defaults to 2 hours.",
"minimum": 1
}
},
"additionalProperties": false
}, },
"contexts": { "contexts": {
"type": "object", "type": "object",
"description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/self-hosting/more/search-contexts", "description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/docs/search/search-contexts",
"patternProperties": { "patternProperties": {
"^[a-zA-Z0-9_-]+$": { "^[a-zA-Z0-9_-]+$": {
"$ref": "#/definitions/SearchContext" "$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "SearchContext",
"description": "Search context",
"properties": {
"include": {
"type": "array",
"description": "List of repositories to include in the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.",
"items": {
"type": "string"
},
"examples": [
[
"github.com/sourcebot-dev/**",
"gerrit.example.org/sub/path/**"
]
]
},
"exclude": {
"type": "array",
"description": "List of repositories to exclude from the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.",
"items": {
"type": "string"
},
"examples": [
[
"github.com/sourcebot-dev/sourcebot",
"gerrit.example.org/sub/path/**"
]
]
},
"description": {
"type": "string",
"description": "Optional description of the search context that surfaces in the UI."
}
},
"required": [
"include"
],
"additionalProperties": false
} }
}, },
"additionalProperties": false "additionalProperties": false
@ -357,12 +455,39 @@ const schema = {
"description": "GitLab Configuration" "description": "GitLab Configuration"
}, },
"token": { "token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -476,7 +601,45 @@ const schema = {
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -494,12 +657,39 @@ const schema = {
"description": "Gitea Configuration" "description": "Gitea Configuration"
}, },
"token": { "token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -571,7 +761,45 @@ const schema = {
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -660,12 +888,39 @@ const schema = {
"description": "The username to use for authentication. Only needed if token is an app password." "description": "The username to use for authentication. Only needed if token is an app password."
}, },
"token": { "token": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/token",
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
{ {
"secret": "SECRET_KEY" "secret": "SECRET_KEY"
} }
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
] ]
}, },
"url": { "url": {
@ -738,7 +993,45 @@ const schema = {
"additionalProperties": false "additionalProperties": false
}, },
"revisions": { "revisions": {
"$ref": "#/properties/connections/patternProperties/%5E%5Ba-zA-Z0-9_-%5D%2B%24/oneOf/0/properties/revisions" "type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -757,6 +1050,74 @@ const schema = {
] ]
}, },
"additionalProperties": false "additionalProperties": false
},
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
} }
] ]
} }

View file

@ -9,13 +9,14 @@ export type ConnectionConfig =
| GitlabConnectionConfig | GitlabConnectionConfig
| GiteaConnectionConfig | GiteaConnectionConfig
| GerritConnectionConfig | GerritConnectionConfig
| BitbucketConnectionConfig; | BitbucketConnectionConfig
| GenericGitHostConnectionConfig;
export interface SourcebotConfig { export interface SourcebotConfig {
$schema?: string; $schema?: string;
settings?: Settings; settings?: Settings;
/** /**
* [Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/self-hosting/more/search-contexts * [Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/docs/search/search-contexts
*/ */
contexts?: { contexts?: {
[k: string]: SearchContext; [k: string]: SearchContext;
@ -400,3 +401,14 @@ export interface BitbucketConnectionConfig {
}; };
revisions?: GitRevisions; revisions?: GitRevisions;
} }
export interface GenericGitHostConnectionConfig {
/**
* Generic Git host configuration
*/
type: "git";
/**
* The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.
*/
url: string;
revisions?: GitRevisions;
}

View file

@ -1,5 +1,5 @@
import path, { dirname } from "path"; import path, { dirname } from "path";
import { mkdir, rm, writeFile } from "fs/promises"; import { mkdir, writeFile } from "fs/promises";
import $RefParser from "@apidevtools/json-schema-ref-parser"; import $RefParser from "@apidevtools/json-schema-ref-parser";
import { compileFromFile } from "json-schema-to-typescript"; import { compileFromFile } from "json-schema-to-typescript";
import { glob } from "glob"; import { glob } from "glob";
@ -25,15 +25,15 @@ const BANNER_COMMENT = 'THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY!';
await mkdir(docsOutDir, { recursive: true }); await mkdir(docsOutDir, { recursive: true });
// Generate schema // Generate schema
const schema = JSON.stringify(await $RefParser.bundle(schemaPath), null, 2); const schema = JSON.stringify(await $RefParser.dereference(schemaPath), null, 2);
// Write to src // Write to src
await writeFile( await writeFile(
path.join(srcOutDir, `${name}.schema.ts`), path.join(srcOutDir, `${name}.schema.ts`),
`// ${BANNER_COMMENT}\n` + `// ${BANNER_COMMENT}\n` +
'const schema = ' + 'const schema = ' +
schema + schema +
` as const;\nexport { schema as ${name}Schema };`, ` as const;\nexport { schema as ${name}Schema };`,
); );
// Write to docs // Write to docs

View file

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xml:space="preserve" width="97" height="97">
<path fill="#F05133" d="M92.71 44.408 52.591 4.291c-2.31-2.311-6.057-2.311-8.369 0l-8.33 8.332L46.459 23.19c2.456-.83 5.272-.273 7.229 1.685 1.969 1.97 2.521 4.81 1.67 7.275l10.186 10.185c2.465-.85 5.307-.3 7.275 1.671 2.75 2.75 2.75 7.206 0 9.958-2.752 2.751-7.208 2.751-9.961 0-2.068-2.07-2.58-5.11-1.531-7.658l-9.5-9.499v24.997c.67.332 1.303.774 1.861 1.332 2.75 2.75 2.75 7.206 0 9.959-2.75 2.749-7.209 2.749-9.957 0-2.75-2.754-2.75-7.21 0-9.959.68-.679 1.467-1.193 2.307-1.537v-25.23c-.84-.344-1.625-.853-2.307-1.537-2.083-2.082-2.584-5.14-1.516-7.698L31.798 16.715 4.288 44.222c-2.311 2.313-2.311 6.06 0 8.371l40.121 40.118c2.31 2.311 6.056 2.311 8.369 0L92.71 52.779c2.311-2.311 2.311-6.06 0-8.371z"/>
</svg>

After

Width:  |  Height:  |  Size: 841 B

View file

@ -28,6 +28,7 @@ import { orgDomainSchema, orgNameSchema, repositoryQuerySchema } from "./lib/sch
import { TenancyMode } from "./lib/types"; import { TenancyMode } from "./lib/types";
import { decrementOrgSeatCount, getSubscriptionForOrg, incrementOrgSeatCount } from "./ee/features/billing/serverUtils"; import { decrementOrgSeatCount, getSubscriptionForOrg, incrementOrgSeatCount } from "./ee/features/billing/serverUtils";
import { bitbucketSchema } from "@sourcebot/schemas/v3/bitbucket.schema"; import { bitbucketSchema } from "@sourcebot/schemas/v3/bitbucket.schema";
import { genericGitHostSchema } from "@sourcebot/schemas/v3/genericGitHost.schema";
const ajv = new Ajv({ const ajv = new Ajv({
validateFormats: false, validateFormats: false,
@ -443,6 +444,67 @@ export const getRepos = async (domain: string, filter: { status?: RepoIndexingSt
} }
), /* allowSingleTenantUnauthedAccess = */ true)); ), /* allowSingleTenantUnauthedAccess = */ true));
export const getRepoInfoByName = async (repoName: string, domain: string) => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
// @note: repo names are represented by their remote url
// on the code host. E.g.,:
// - github.com/sourcebot-dev/sourcebot
// - gitlab.com/gitlab-org/gitlab
// - gerrit.wikimedia.org/r/mediawiki/extensions/OnionsPorFavor
// etc.
//
// For most purposes, repo names are unique within an org, so using
// findFirst is equivalent to findUnique. Duplicates _can_ occur when
// a repository is specified by its remote url in a generic `git`
// connection. For example:
//
// ```json
// {
// "connections": {
// "connection-1": {
// "type": "github",
// "repos": [
// "sourcebot-dev/sourcebot"
// ]
// },
// "connection-2": {
// "type": "git",
// "url": "file:///tmp/repos/sourcebot"
// }
// }
// }
// ```
//
// In this scenario, both repos will be named "github.com/sourcebot-dev/sourcebot".
// We will leave this as an edge case for now since it's unlikely to happen in practice.
//
// @v4-todo: we could add a unique contraint on repo name + orgId to help de-duplicate
// these cases.
// @see: repoCompileUtils.ts
const repo = await prisma.repo.findFirst({
where: {
name: repoName,
orgId,
},
});
if (!repo) {
return notFound();
}
return {
id: repo.id,
name: repo.name,
displayName: repo.displayName ?? undefined,
codeHostType: repo.external_codeHostType,
webUrl: repo.webUrl ?? undefined,
imageUrl: repo.imageUrl ?? undefined,
indexedAt: repo.indexedAt ?? undefined,
repoIndexingStatus: repo.repoIndexingStatus,
}
}), /* allowSingleTenantUnauthedAccess = */ true));
export const createConnection = async (name: string, type: CodeHostType, connectionConfig: string, domain: string): Promise<{ id: number } | ServiceError> => sew(() => export const createConnection = async (name: string, type: CodeHostType, connectionConfig: string, domain: string): Promise<{ id: number } | ServiceError> => sew(() =>
withAuth((session) => withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => { withOrgMembership(session, domain, async ({ orgId }) => {
@ -1180,6 +1242,8 @@ const parseConnectionConfig = (config: string) => {
return gerritSchema; return gerritSchema;
case 'bitbucket': case 'bitbucket':
return bitbucketSchema; return bitbucketSchema;
case 'git':
return genericGitHostSchema;
} }
})(); })();
@ -1230,6 +1294,12 @@ const parseConnectionConfig = (config: string) => {
hasToken: true, // gerrit doesn't use a token atm hasToken: true, // gerrit doesn't use a token atm
} }
} }
case "git": {
return {
numRepos: 1,
hasToken: false,
}
}
} }
})(); })();

View file

@ -2,7 +2,6 @@ import { FileHeader } from "@/app/[domain]/components/fileHeader";
import { TopBar } from "@/app/[domain]/components/topBar"; import { TopBar } from "@/app/[domain]/components/topBar";
import { Separator } from '@/components/ui/separator'; import { Separator } from '@/components/ui/separator';
import { getFileSource } from '@/features/search/fileSourceApi'; import { getFileSource } from '@/features/search/fileSourceApi';
import { listRepositories } from '@/features/search/listReposApi';
import { isServiceError } from "@/lib/utils"; import { isServiceError } from "@/lib/utils";
import { base64Decode } from "@/lib/utils"; import { base64Decode } from "@/lib/utils";
import { CodePreview } from "./codePreview"; import { CodePreview } from "./codePreview";
@ -11,6 +10,8 @@ import { LuFileX2, LuBookX } from "react-icons/lu";
import { getOrgFromDomain } from "@/data/org"; import { getOrgFromDomain } from "@/data/org";
import { notFound } from "next/navigation"; import { notFound } from "next/navigation";
import { ServiceErrorException } from "@/lib/serviceError"; import { ServiceErrorException } from "@/lib/serviceError";
import { getRepoInfoByName } from "@/actions";
interface BrowsePageProps { interface BrowsePageProps {
params: { params: {
path: string[]; path: string[];
@ -48,19 +49,11 @@ export default async function BrowsePage({
} }
})(); })();
const org = await getOrgFromDomain(params.domain); const repoInfo = await getRepoInfoByName(repoName, params.domain);
if (!org) { if (isServiceError(repoInfo) && repoInfo.errorCode !== ErrorCode.NOT_FOUND) {
notFound(); throw new ServiceErrorException(repoInfo);
} }
// @todo (bkellam) : We should probably have a endpoint to fetch repository metadata
// given it's name or id.
const reposResponse = await listRepositories(org.id);
if (isServiceError(reposResponse)) {
throw new ServiceErrorException(reposResponse);
}
const repo = reposResponse.repos.find(r => r.name === repoName);
if (pathType === 'tree') { if (pathType === 'tree') {
// @todo : proper tree handling // @todo : proper tree handling
return ( return (
@ -78,12 +71,17 @@ export default async function BrowsePage({
domain={params.domain} domain={params.domain}
/> />
<Separator /> <Separator />
{repo && ( {!isServiceError(repoInfo) && (
<> <>
<div className="bg-accent py-1 px-2 flex flex-row"> <div className="bg-accent py-1 px-2 flex flex-row">
<FileHeader <FileHeader
fileName={path} fileName={path}
repo={repo} repo={{
name: repoInfo.name,
displayName: repoInfo.displayName,
webUrl: repoInfo.webUrl,
codeHostType: repoInfo.codeHostType,
}}
branchDisplayName={revisionName} branchDisplayName={revisionName}
/> />
</div> </div>
@ -91,7 +89,7 @@ export default async function BrowsePage({
</> </>
)} )}
</div> </div>
{repo === undefined ? ( {isServiceError(repoInfo) ? (
<div className="flex h-full"> <div className="flex h-full">
<div className="m-auto flex flex-col items-center gap-2"> <div className="m-auto flex flex-col items-center gap-2">
<LuBookX className="h-12 w-12 text-secondary-foreground" /> <LuBookX className="h-12 w-12 text-secondary-foreground" />
@ -101,9 +99,9 @@ export default async function BrowsePage({
) : ( ) : (
<CodePreviewWrapper <CodePreviewWrapper
path={path} path={path}
repoName={repoName} repoName={repoInfo.name}
revisionName={revisionName ?? 'HEAD'} revisionName={revisionName ?? 'HEAD'}
orgId={org.id} domain={params.domain}
/> />
)} )}
</div> </div>
@ -114,21 +112,21 @@ interface CodePreviewWrapper {
path: string, path: string,
repoName: string, repoName: string,
revisionName: string, revisionName: string,
orgId: number, domain: string,
} }
const CodePreviewWrapper = async ({ const CodePreviewWrapper = async ({
path, path,
repoName, repoName,
revisionName, revisionName,
orgId, domain,
}: CodePreviewWrapper) => { }: CodePreviewWrapper) => {
// @todo: this will depend on `pathType`. // @todo: this will depend on `pathType`.
const fileSourceResponse = await getFileSource({ const fileSourceResponse = await getFileSource({
fileName: path, fileName: path,
repository: repoName, repository: repoName,
branch: revisionName, branch: revisionName,
}, orgId); }, domain);
if (isServiceError(fileSourceResponse)) { if (isServiceError(fileSourceResponse)) {
if (fileSourceResponse.errorCode === ErrorCode.FILE_NOT_FOUND) { if (fileSourceResponse.errorCode === ErrorCode.FILE_NOT_FOUND) {

View file

@ -1,17 +1,22 @@
import { Repository } from "@/features/search/types";
import { getRepoCodeHostInfo } from "@/lib/utils"; import { getCodeHostInfoForRepo } from "@/lib/utils";
import { LaptopIcon } from "@radix-ui/react-icons"; import { LaptopIcon } from "@radix-ui/react-icons";
import clsx from "clsx"; import clsx from "clsx";
import Image from "next/image"; import Image from "next/image";
import Link from "next/link"; import Link from "next/link";
interface FileHeaderProps { interface FileHeaderProps {
repo?: Repository;
fileName: string; fileName: string;
fileNameHighlightRange?: { fileNameHighlightRange?: {
from: number; from: number;
to: number; to: number;
} }
repo: {
name: string;
codeHostType: string;
displayName?: string;
webUrl?: string;
},
branchDisplayName?: string; branchDisplayName?: string;
branchDisplayTitle?: string; branchDisplayTitle?: string;
} }
@ -23,7 +28,12 @@ export const FileHeader = ({
branchDisplayName, branchDisplayName,
branchDisplayTitle, branchDisplayTitle,
}: FileHeaderProps) => { }: FileHeaderProps) => {
const info = getRepoCodeHostInfo(repo); const info = getCodeHostInfoForRepo({
name: repo.name,
codeHostType: repo.codeHostType,
displayName: repo.displayName,
webUrl: repo.webUrl,
});
return ( return (
<div className="flex flex-row gap-2 items-center w-full overflow-hidden"> <div className="flex flex-row gap-2 items-center w-full overflow-hidden">

View file

@ -6,7 +6,7 @@ import {
CarouselItem, CarouselItem,
} from "@/components/ui/carousel"; } from "@/components/ui/carousel";
import Autoscroll from "embla-carousel-auto-scroll"; import Autoscroll from "embla-carousel-auto-scroll";
import { getRepoQueryCodeHostInfo } from "@/lib/utils"; import { getCodeHostInfoForRepo } from "@/lib/utils";
import Image from "next/image"; import Image from "next/image";
import { FileIcon } from "@radix-ui/react-icons"; import { FileIcon } from "@radix-ui/react-icons";
import clsx from "clsx"; import clsx from "clsx";
@ -57,7 +57,12 @@ const RepositoryBadge = ({
repo repo
}: RepositoryBadgeProps) => { }: RepositoryBadgeProps) => {
const { repoIcon, displayName, repoLink } = (() => { const { repoIcon, displayName, repoLink } = (() => {
const info = getRepoQueryCodeHostInfo(repo); const info = getCodeHostInfoForRepo({
codeHostType: repo.codeHostType,
name: repo.repoName,
displayName: repo.repoDisplayName,
webUrl: repo.webUrl,
});
if (info) { if (info) {
return { return {

View file

@ -46,7 +46,7 @@ export const CodePreviewPanel = ({
content: decodedSource, content: decodedSource,
filepath: fileMatch.fileName.text, filepath: fileMatch.fileName.text,
matches: fileMatch.chunks, matches: fileMatch.chunks,
link: fileMatch.url, link: fileMatch.webUrl,
language: fileMatch.language, language: fileMatch.language,
revision: branch ?? "HEAD", revision: branch ?? "HEAD",
}; };

View file

@ -1,8 +1,8 @@
'use client'; 'use client';
import { FileIcon } from "@/components/ui/fileIcon"; import { FileIcon } from "@/components/ui/fileIcon";
import { Repository, SearchResultFile } from "@/features/search/types"; import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
import { cn, getRepoCodeHostInfo } from "@/lib/utils"; import { cn, getCodeHostInfoForRepo } from "@/lib/utils";
import { LaptopIcon } from "@radix-ui/react-icons"; import { LaptopIcon } from "@radix-ui/react-icons";
import Image from "next/image"; import Image from "next/image";
import { useRouter, useSearchParams } from "next/navigation"; import { useRouter, useSearchParams } from "next/navigation";
@ -13,7 +13,7 @@ import { Filter } from "./filter";
interface FilePanelProps { interface FilePanelProps {
matches: SearchResultFile[]; matches: SearchResultFile[];
onFilterChanged: (filteredMatches: SearchResultFile[]) => void, onFilterChanged: (filteredMatches: SearchResultFile[]) => void,
repoMetadata: Record<string, Repository>; repoInfo: Record<number, RepositoryInfo>;
} }
const LANGUAGES_QUERY_PARAM = "langs"; const LANGUAGES_QUERY_PARAM = "langs";
@ -22,7 +22,7 @@ const REPOS_QUERY_PARAM = "repos";
export const FilterPanel = ({ export const FilterPanel = ({
matches, matches,
onFilterChanged, onFilterChanged,
repoMetadata, repoInfo,
}: FilePanelProps) => { }: FilePanelProps) => {
const router = useRouter(); const router = useRouter();
const searchParams = useSearchParams(); const searchParams = useSearchParams();
@ -38,9 +38,16 @@ export const FilterPanel = ({
return aggregateMatches( return aggregateMatches(
"repository", "repository",
matches, matches,
(key) => { ({ key, match }) => {
const repo: Repository | undefined = repoMetadata[key]; const repo: RepositoryInfo | undefined = repoInfo[match.repositoryId];
const info = getRepoCodeHostInfo(repo);
const info = repo ? getCodeHostInfoForRepo({
name: repo.name,
codeHostType: repo.codeHostType,
displayName: repo.displayName,
webUrl: repo.webUrl,
}) : undefined;
const Icon = info ? ( const Icon = info ? (
<Image <Image
src={info.icon} src={info.icon}
@ -60,14 +67,14 @@ export const FilterPanel = ({
}; };
} }
) )
}, [getSelectedFromQuery, matches, repoMetadata]); }, [getSelectedFromQuery, matches, repoInfo]);
const languages = useMemo(() => { const languages = useMemo(() => {
const selectedLanguages = getSelectedFromQuery(LANGUAGES_QUERY_PARAM); const selectedLanguages = getSelectedFromQuery(LANGUAGES_QUERY_PARAM);
return aggregateMatches( return aggregateMatches(
"language", "language",
matches, matches,
(key) => { ({ key }) => {
const Icon = ( const Icon = (
<FileIcon language={key} /> <FileIcon language={key} />
) )
@ -168,14 +175,14 @@ export const FilterPanel = ({
const aggregateMatches = ( const aggregateMatches = (
propName: 'repository' | 'language', propName: 'repository' | 'language',
matches: SearchResultFile[], matches: SearchResultFile[],
createEntry: (key: string) => Entry createEntry: (props: { key: string, match: SearchResultFile }) => Entry
) => { ) => {
return matches return matches
.map((match) => match[propName]) .map((match) => ({ key: match[propName], match }))
.filter((key) => key.length > 0) .filter(({ key }) => key.length > 0)
.reduce((aggregation, key) => { .reduce((aggregation, { key, match }) => {
if (!aggregation[key]) { if (!aggregation[key]) {
aggregation[key] = createEntry(key); aggregation[key] = createEntry({ key, match });
} }
aggregation[key].count += 1; aggregation[key].count += 1;
return aggregation; return aggregation;

View file

@ -5,7 +5,7 @@ import { Separator } from "@/components/ui/separator";
import { DoubleArrowDownIcon, DoubleArrowUpIcon } from "@radix-ui/react-icons"; import { DoubleArrowDownIcon, DoubleArrowUpIcon } from "@radix-ui/react-icons";
import { useCallback, useMemo } from "react"; import { useCallback, useMemo } from "react";
import { FileMatch } from "./fileMatch"; import { FileMatch } from "./fileMatch";
import { Repository, SearchResultFile } from "@/features/search/types"; import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
export const MAX_MATCHES_TO_PREVIEW = 3; export const MAX_MATCHES_TO_PREVIEW = 3;
@ -16,7 +16,7 @@ interface FileMatchContainerProps {
showAllMatches: boolean; showAllMatches: boolean;
onShowAllMatchesButtonClicked: () => void; onShowAllMatchesButtonClicked: () => void;
isBranchFilteringEnabled: boolean; isBranchFilteringEnabled: boolean;
repoMetadata: Record<string, Repository>; repoInfo: Record<number, RepositoryInfo>;
yOffset: number; yOffset: number;
} }
@ -27,7 +27,7 @@ export const FileMatchContainer = ({
showAllMatches, showAllMatches,
onShowAllMatchesButtonClicked, onShowAllMatchesButtonClicked,
isBranchFilteringEnabled, isBranchFilteringEnabled,
repoMetadata, repoInfo,
yOffset, yOffset,
}: FileMatchContainerProps) => { }: FileMatchContainerProps) => {
@ -87,6 +87,10 @@ export const FileMatchContainer = ({
return `${branches[0]}${branches.length > 1 ? ` +${branches.length - 1}` : ''}`; return `${branches[0]}${branches.length > 1 ? ` +${branches.length - 1}` : ''}`;
}, [isBranchFilteringEnabled, branches]); }, [isBranchFilteringEnabled, branches]);
const repo = useMemo(() => {
return repoInfo[file.repositoryId];
}, [repoInfo, file.repositoryId]);
return ( return (
<div> <div>
@ -101,7 +105,12 @@ export const FileMatchContainer = ({
}} }}
> >
<FileHeader <FileHeader
repo={repoMetadata[file.repository]} repo={{
name: repo.name,
codeHostType: repo.codeHostType,
displayName: repo.displayName,
webUrl: repo.webUrl,
}}
fileName={file.fileName.text} fileName={file.fileName.text}
fileNameHighlightRange={fileNameRange} fileNameHighlightRange={fileNameRange}
branchDisplayName={branchDisplayName} branchDisplayName={branchDisplayName}

View file

@ -1,6 +1,6 @@
'use client'; 'use client';
import { Repository, SearchResultFile } from "@/features/search/types"; import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
import { FileMatchContainer, MAX_MATCHES_TO_PREVIEW } from "./fileMatchContainer"; import { FileMatchContainer, MAX_MATCHES_TO_PREVIEW } from "./fileMatchContainer";
import { useVirtualizer } from "@tanstack/react-virtual"; import { useVirtualizer } from "@tanstack/react-virtual";
import { useCallback, useEffect, useLayoutEffect, useRef, useState } from "react"; import { useCallback, useEffect, useLayoutEffect, useRef, useState } from "react";
@ -12,7 +12,7 @@ interface SearchResultsPanelProps {
isLoadMoreButtonVisible: boolean; isLoadMoreButtonVisible: boolean;
onLoadMoreButtonClicked: () => void; onLoadMoreButtonClicked: () => void;
isBranchFilteringEnabled: boolean; isBranchFilteringEnabled: boolean;
repoMetadata: Record<string, Repository>; repoInfo: Record<number, RepositoryInfo>;
} }
const ESTIMATED_LINE_HEIGHT_PX = 20; const ESTIMATED_LINE_HEIGHT_PX = 20;
@ -26,7 +26,7 @@ export const SearchResultsPanel = ({
isLoadMoreButtonVisible, isLoadMoreButtonVisible,
onLoadMoreButtonClicked, onLoadMoreButtonClicked,
isBranchFilteringEnabled, isBranchFilteringEnabled,
repoMetadata, repoInfo,
}: SearchResultsPanelProps) => { }: SearchResultsPanelProps) => {
const parentRef = useRef<HTMLDivElement>(null); const parentRef = useRef<HTMLDivElement>(null);
const [showAllMatchesStates, setShowAllMatchesStates] = useState(Array(fileMatches.length).fill(false)); const [showAllMatchesStates, setShowAllMatchesStates] = useState(Array(fileMatches.length).fill(false));
@ -151,7 +151,7 @@ export const SearchResultsPanel = ({
onShowAllMatchesButtonClicked(virtualRow.index); onShowAllMatchesButtonClicked(virtualRow.index);
}} }}
isBranchFilteringEnabled={isBranchFilteringEnabled} isBranchFilteringEnabled={isBranchFilteringEnabled}
repoMetadata={repoMetadata} repoInfo={repoInfo}
yOffset={virtualRow.start} yOffset={virtualRow.start}
/> />
</div> </div>

View file

@ -16,14 +16,14 @@ import { useQuery } from "@tanstack/react-query";
import { useRouter } from "next/navigation"; import { useRouter } from "next/navigation";
import { Suspense, useCallback, useEffect, useMemo, useRef, useState } from "react"; import { Suspense, useCallback, useEffect, useMemo, useRef, useState } from "react";
import { ImperativePanelHandle } from "react-resizable-panels"; import { ImperativePanelHandle } from "react-resizable-panels";
import { getRepos, search } from "../../api/(client)/client"; import { search } from "../../api/(client)/client";
import { TopBar } from "../components/topBar"; import { TopBar } from "../components/topBar";
import { CodePreviewPanel } from "./components/codePreviewPanel"; import { CodePreviewPanel } from "./components/codePreviewPanel";
import { FilterPanel } from "./components/filterPanel"; import { FilterPanel } from "./components/filterPanel";
import { SearchResultsPanel } from "./components/searchResultsPanel"; import { SearchResultsPanel } from "./components/searchResultsPanel";
import { useDomain } from "@/hooks/useDomain"; import { useDomain } from "@/hooks/useDomain";
import { useToast } from "@/components/hooks/use-toast"; import { useToast } from "@/components/hooks/use-toast";
import { Repository, SearchResultFile } from "@/features/search/types"; import { RepositoryInfo, SearchResultFile } from "@/features/search/types";
const DEFAULT_MATCH_COUNT = 10000; const DEFAULT_MATCH_COUNT = 10000;
@ -90,25 +90,6 @@ const SearchPageInternal = () => {
]) ])
}, [searchQuery, setSearchHistory]); }, [searchQuery, setSearchHistory]);
// Use the /api/repos endpoint to get a useful list of
// repository metadata (like host type, repo name, etc.)
// Convert this into a map of repo name to repo metadata
// for easy lookup.
const { data: repoMetadata, isLoading: isRepoMetadataLoading } = useQuery({
queryKey: ["repos"],
queryFn: () => getRepos(domain),
select: (data): Record<string, Repository> =>
data.repos
.reduce(
(acc, repo) => ({
...acc,
[repo.name]: repo,
}),
{},
),
refetchOnWindowFocus: false,
});
useEffect(() => { useEffect(() => {
if (!searchResponse) { if (!searchResponse) {
return; return;
@ -141,13 +122,14 @@ const SearchPageInternal = () => {
}); });
}, [captureEvent, searchQuery, searchResponse]); }, [captureEvent, searchQuery, searchResponse]);
const { fileMatches, searchDurationMs, totalMatchCount, isBranchFilteringEnabled } = useMemo(() => { const { fileMatches, searchDurationMs, totalMatchCount, isBranchFilteringEnabled, repositoryInfo } = useMemo(() => {
if (!searchResponse) { if (!searchResponse) {
return { return {
fileMatches: [], fileMatches: [],
searchDurationMs: 0, searchDurationMs: 0,
totalMatchCount: 0, totalMatchCount: 0,
isBranchFilteringEnabled: false, isBranchFilteringEnabled: false,
repositoryInfo: {},
}; };
} }
@ -156,6 +138,10 @@ const SearchPageInternal = () => {
searchDurationMs: Math.round(searchResponse.durationMs), searchDurationMs: Math.round(searchResponse.durationMs),
totalMatchCount: searchResponse.zoektStats.matchCount, totalMatchCount: searchResponse.zoektStats.matchCount,
isBranchFilteringEnabled: searchResponse.isBranchFilteringEnabled, isBranchFilteringEnabled: searchResponse.isBranchFilteringEnabled,
repositoryInfo: searchResponse.repositoryInfo.reduce((acc, repo) => {
acc[repo.id] = repo;
return acc;
}, {} as Record<number, RepositoryInfo>),
} }
}, [searchResponse]); }, [searchResponse]);
@ -194,7 +180,7 @@ const SearchPageInternal = () => {
<Separator /> <Separator />
</div> </div>
{(isSearchLoading || isRepoMetadataLoading) ? ( {(isSearchLoading) ? (
<div className="flex flex-col items-center justify-center h-full gap-2"> <div className="flex flex-col items-center justify-center h-full gap-2">
<SymbolIcon className="h-6 w-6 animate-spin" /> <SymbolIcon className="h-6 w-6 animate-spin" />
<p className="font-semibold text-center">Searching...</p> <p className="font-semibold text-center">Searching...</p>
@ -205,7 +191,7 @@ const SearchPageInternal = () => {
isMoreResultsButtonVisible={isMoreResultsButtonVisible} isMoreResultsButtonVisible={isMoreResultsButtonVisible}
onLoadMoreResults={onLoadMoreResults} onLoadMoreResults={onLoadMoreResults}
isBranchFilteringEnabled={isBranchFilteringEnabled} isBranchFilteringEnabled={isBranchFilteringEnabled}
repoMetadata={repoMetadata ?? {}} repoInfo={repositoryInfo}
searchDurationMs={searchDurationMs} searchDurationMs={searchDurationMs}
numMatches={numMatches} numMatches={numMatches}
/> />
@ -219,7 +205,7 @@ interface PanelGroupProps {
isMoreResultsButtonVisible?: boolean; isMoreResultsButtonVisible?: boolean;
onLoadMoreResults: () => void; onLoadMoreResults: () => void;
isBranchFilteringEnabled: boolean; isBranchFilteringEnabled: boolean;
repoMetadata: Record<string, Repository>; repoInfo: Record<number, RepositoryInfo>;
searchDurationMs: number; searchDurationMs: number;
numMatches: number; numMatches: number;
} }
@ -229,7 +215,7 @@ const PanelGroup = ({
isMoreResultsButtonVisible, isMoreResultsButtonVisible,
onLoadMoreResults, onLoadMoreResults,
isBranchFilteringEnabled, isBranchFilteringEnabled,
repoMetadata, repoInfo,
searchDurationMs, searchDurationMs,
numMatches, numMatches,
}: PanelGroupProps) => { }: PanelGroupProps) => {
@ -267,7 +253,7 @@ const PanelGroup = ({
<FilterPanel <FilterPanel
matches={fileMatches} matches={fileMatches}
onFilterChanged={onFilterChanged} onFilterChanged={onFilterChanged}
repoMetadata={repoMetadata} repoInfo={repoInfo}
/> />
</ResizablePanel> </ResizablePanel>
<ResizableHandle <ResizableHandle
@ -310,7 +296,7 @@ const PanelGroup = ({
isLoadMoreButtonVisible={!!isMoreResultsButtonVisible} isLoadMoreButtonVisible={!!isMoreResultsButtonVisible}
onLoadMoreButtonClicked={onLoadMoreResults} onLoadMoreButtonClicked={onLoadMoreResults}
isBranchFilteringEnabled={isBranchFilteringEnabled} isBranchFilteringEnabled={isBranchFilteringEnabled}
repoMetadata={repoMetadata} repoInfo={repoInfo}
/> />
) : ( ) : (
<div className="flex flex-col items-center justify-center h-full"> <div className="flex flex-col items-center justify-center h-full">

View file

@ -2,25 +2,24 @@
import { listRepositories } from "@/features/search/listReposApi"; import { listRepositories } from "@/features/search/listReposApi";
import { NextRequest } from "next/server"; import { NextRequest } from "next/server";
import { sew, withAuth, withOrgMembership } from "@/actions";
import { isServiceError } from "@/lib/utils"; import { isServiceError } from "@/lib/utils";
import { serviceErrorResponse } from "@/lib/serviceError"; import { serviceErrorResponse } from "@/lib/serviceError";
import { StatusCodes } from "http-status-codes";
import { ErrorCode } from "@/lib/errorCodes";
export const GET = async (request: NextRequest) => { export const GET = async (request: NextRequest) => {
const domain = request.headers.get("X-Org-Domain")!; const domain = request.headers.get("X-Org-Domain");
const response = await getRepos(domain); if (!domain) {
return serviceErrorResponse({
statusCode: StatusCodes.BAD_REQUEST,
errorCode: ErrorCode.MISSING_ORG_DOMAIN_HEADER,
message: "Missing X-Org-Domain header",
});
}
const response = await listRepositories(domain);
if (isServiceError(response)) { if (isServiceError(response)) {
return serviceErrorResponse(response); return serviceErrorResponse(response);
} }
return Response.json(response); return Response.json(response);
} }
const getRepos = (domain: string) => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
const response = await listRepositories(orgId);
return response;
}
), /* allowSingleTenantUnauthedAccess */ true));

View file

@ -3,13 +3,21 @@
import { search } from "@/features/search/searchApi"; import { search } from "@/features/search/searchApi";
import { isServiceError } from "@/lib/utils"; import { isServiceError } from "@/lib/utils";
import { NextRequest } from "next/server"; import { NextRequest } from "next/server";
import { sew, withAuth, withOrgMembership } from "@/actions";
import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError"; import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError";
import { searchRequestSchema } from "@/features/search/schemas"; import { searchRequestSchema } from "@/features/search/schemas";
import { SearchRequest } from "@/features/search/types"; import { ErrorCode } from "@/lib/errorCodes";
import { StatusCodes } from "http-status-codes";
export const POST = async (request: NextRequest) => { export const POST = async (request: NextRequest) => {
const domain = request.headers.get("X-Org-Domain")!; const domain = request.headers.get("X-Org-Domain");
if (!domain) {
return serviceErrorResponse({
statusCode: StatusCodes.BAD_REQUEST,
errorCode: ErrorCode.MISSING_ORG_DOMAIN_HEADER,
message: "Missing X-Org-Domain header",
});
}
const body = await request.json(); const body = await request.json();
const parsed = await searchRequestSchema.safeParseAsync(body); const parsed = await searchRequestSchema.safeParseAsync(body);
if (!parsed.success) { if (!parsed.success) {
@ -18,17 +26,9 @@ export const POST = async (request: NextRequest) => {
); );
} }
const response = await postSearch(parsed.data, domain); const response = await search(parsed.data, domain);
if (isServiceError(response)) { if (isServiceError(response)) {
return serviceErrorResponse(response); return serviceErrorResponse(response);
} }
return Response.json(response); return Response.json(response);
} }
const postSearch = (request: SearchRequest, domain: string) => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
const response = await search(request, orgId);
return response;
}
), /* allowSingleTenantUnauthedAccess */ true));

View file

@ -4,11 +4,20 @@ import { getFileSource } from "@/features/search/fileSourceApi";
import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError"; import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError";
import { isServiceError } from "@/lib/utils"; import { isServiceError } from "@/lib/utils";
import { NextRequest } from "next/server"; import { NextRequest } from "next/server";
import { sew, withAuth, withOrgMembership } from "@/actions";
import { fileSourceRequestSchema } from "@/features/search/schemas"; import { fileSourceRequestSchema } from "@/features/search/schemas";
import { FileSourceRequest } from "@/features/search/types"; import { ErrorCode } from "@/lib/errorCodes";
import { StatusCodes } from "http-status-codes";
export const POST = async (request: NextRequest) => { export const POST = async (request: NextRequest) => {
const domain = request.headers.get("X-Org-Domain");
if (!domain) {
return serviceErrorResponse({
statusCode: StatusCodes.BAD_REQUEST,
errorCode: ErrorCode.MISSING_ORG_DOMAIN_HEADER,
message: "Missing X-Org-Domain header",
});
}
const body = await request.json(); const body = await request.json();
const parsed = await fileSourceRequestSchema.safeParseAsync(body); const parsed = await fileSourceRequestSchema.safeParseAsync(body);
if (!parsed.success) { if (!parsed.success) {
@ -18,19 +27,11 @@ export const POST = async (request: NextRequest) => {
} }
const response = await postSource(parsed.data, request.headers.get("X-Org-Domain")!);
const response = await getFileSource(parsed.data, domain);
if (isServiceError(response)) { if (isServiceError(response)) {
return serviceErrorResponse(response); return serviceErrorResponse(response);
} }
return Response.json(response); return Response.json(response);
} }
export const postSource = (request: FileSourceRequest, domain: string) => sew(() =>
withAuth(async (session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
const response = await getFileSource(request, orgId);
return response;
}
), /* allowSingleTenantUnauthedAccess */ true));

View file

@ -1,7 +1,7 @@
import { sourcebot_context, sourcebot_pr_payload } from "@/features/agents/review-agent/types"; import { sourcebot_context, sourcebot_pr_payload } from "@/features/agents/review-agent/types";
import { getFileSource } from "@/features/search/fileSourceApi";
import { fileSourceResponseSchema } from "@/features/search/schemas"; import { fileSourceResponseSchema } from "@/features/search/schemas";
import { base64Decode } from "@/lib/utils"; import { base64Decode } from "@/lib/utils";
import { postSource } from "@/app/api/(server)/source/route";
import { isServiceError } from "@/lib/utils"; import { isServiceError } from "@/lib/utils";
export const fetchFileContent = async (pr_payload: sourcebot_pr_payload, filename: string): Promise<sourcebot_context> => { export const fetchFileContent = async (pr_payload: sourcebot_pr_payload, filename: string): Promise<sourcebot_context> => {
@ -14,7 +14,7 @@ export const fetchFileContent = async (pr_payload: sourcebot_pr_payload, filenam
} }
console.log(JSON.stringify(fileSourceRequest, null, 2)); console.log(JSON.stringify(fileSourceRequest, null, 2));
const response = await postSource(fileSourceRequest, "~"); const response = await getFileSource(fileSourceRequest, "~");
if (isServiceError(response)) { if (isServiceError(response)) {
throw new Error(`Failed to fetch file content for ${filename} from ${repoPath}: ${response.message}`); throw new Error(`Failed to fetch file content for ${filename} from ${repoPath}: ${response.message}`);
} }

View file

@ -3,40 +3,44 @@ import { fileNotFound, ServiceError } from "../../lib/serviceError";
import { FileSourceRequest, FileSourceResponse } from "./types"; import { FileSourceRequest, FileSourceResponse } from "./types";
import { isServiceError } from "../../lib/utils"; import { isServiceError } from "../../lib/utils";
import { search } from "./searchApi"; import { search } from "./searchApi";
import { sew, withAuth, withOrgMembership } from "@/actions";
// @todo (bkellam) : We should really be using `git show <hash>:<path>` to fetch file contents here. // @todo (bkellam) : We should really be using `git show <hash>:<path>` to fetch file contents here.
// This will allow us to support permalinks to files at a specific revision that may not be indexed // This will allow us to support permalinks to files at a specific revision that may not be indexed
// by zoekt. // by zoekt.
export const getFileSource = async ({ fileName, repository, branch }: FileSourceRequest, orgId: number): Promise<FileSourceResponse | ServiceError> => { export const getFileSource = async ({ fileName, repository, branch }: FileSourceRequest, domain: string): Promise<FileSourceResponse | ServiceError> => sew(() =>
const escapedFileName = escapeStringRegexp(fileName); withAuth((session) =>
const escapedRepository = escapeStringRegexp(repository); withOrgMembership(session, domain, async () => {
const escapedFileName = escapeStringRegexp(fileName);
const escapedRepository = escapeStringRegexp(repository);
let query = `file:${escapedFileName} repo:^${escapedRepository}$`; let query = `file:${escapedFileName} repo:^${escapedRepository}$`;
if (branch) { if (branch) {
query = query.concat(` branch:${branch}`); query = query.concat(` branch:${branch}`);
} }
const searchResponse = await search({ const searchResponse = await search({
query, query,
matches: 1, matches: 1,
whole: true, whole: true,
}, orgId); }, domain);
if (isServiceError(searchResponse)) { if (isServiceError(searchResponse)) {
return searchResponse; return searchResponse;
} }
const files = searchResponse.files; const files = searchResponse.files;
if (!files || files.length === 0) { if (!files || files.length === 0) {
return fileNotFound(fileName, repository); return fileNotFound(fileName, repository);
} }
const file = files[0]; const file = files[0];
const source = file.content ?? ''; const source = file.content ?? '';
const language = file.language; const language = file.language;
return { return {
source, source,
language, language,
} satisfies FileSourceResponse; } satisfies FileSourceResponse;
} }), /* allowSingleTenantUnauthedAccess = */ true)
);

View file

@ -2,42 +2,45 @@ import { invalidZoektResponse, ServiceError } from "../../lib/serviceError";
import { ListRepositoriesResponse } from "./types"; import { ListRepositoriesResponse } from "./types";
import { zoektFetch } from "./zoektClient"; import { zoektFetch } from "./zoektClient";
import { zoektListRepositoriesResponseSchema } from "./zoektSchema"; import { zoektListRepositoriesResponseSchema } from "./zoektSchema";
import { sew, withAuth, withOrgMembership } from "@/actions";
export const listRepositories = async (domain: string): Promise<ListRepositoriesResponse | ServiceError> => sew(() =>
withAuth((session) =>
withOrgMembership(session, domain, async ({ orgId }) => {
const body = JSON.stringify({
opts: {
Field: 0,
}
});
export const listRepositories = async (orgId: number): Promise<ListRepositoriesResponse | ServiceError> => { let header: Record<string, string> = {};
const body = JSON.stringify({ header = {
opts: { "X-Tenant-ID": orgId.toString()
Field: 0, };
}
});
let header: Record<string, string> = {}; const listResponse = await zoektFetch({
header = { path: "/api/list",
"X-Tenant-ID": orgId.toString() body,
}; header,
method: "POST",
cache: "no-store",
});
const listResponse = await zoektFetch({ if (!listResponse.ok) {
path: "/api/list", return invalidZoektResponse(listResponse);
body, }
header,
method: "POST",
cache: "no-store",
});
if (!listResponse.ok) { const listBody = await listResponse.json();
return invalidZoektResponse(listResponse);
}
const listBody = await listResponse.json(); const parser = zoektListRepositoriesResponseSchema.transform(({ List }) => ({
repos: List.Repos.map((repo) => ({
name: repo.Repository.Name,
webUrl: repo.Repository.URL.length > 0 ? repo.Repository.URL : undefined,
branches: repo.Repository.Branches?.map((branch) => branch.Name) ?? [],
rawConfig: repo.Repository.RawConfig ?? undefined,
}))
} satisfies ListRepositoriesResponse));
const parser = zoektListRepositoriesResponseSchema.transform(({ List }) => ({ return parser.parse(listBody);
repos: List.Repos.map((repo) => ({ }), /* allowSingleTenantUnauthedAccess = */ true)
name: repo.Repository.Name, );
url: repo.Repository.URL,
branches: repo.Repository.Branches?.map((branch) => branch.Name) ?? [],
rawConfig: repo.Repository.RawConfig ?? undefined,
}))
} satisfies ListRepositoriesResponse));
return parser.parse(listBody);
}

View file

@ -31,6 +31,14 @@ export const searchRequestSchema = z.object({
whole: z.boolean().optional(), whole: z.boolean().optional(),
}); });
export const repositoryInfoSchema = z.object({
id: z.number(),
codeHostType: z.string(),
name: z.string(),
displayName: z.string().optional(),
webUrl: z.string().optional(),
})
export const searchResponseSchema = z.object({ export const searchResponseSchema = z.object({
zoektStats: z.object({ zoektStats: z.object({
// The duration (in nanoseconds) of the search. // The duration (in nanoseconds) of the search.
@ -62,8 +70,9 @@ export const searchResponseSchema = z.object({
// Any matching ranges // Any matching ranges
matchRanges: z.array(rangeSchema), matchRanges: z.array(rangeSchema),
}), }),
url: z.string(), webUrl: z.string().optional(),
repository: z.string(), repository: z.string(),
repositoryId: z.number(),
language: z.string(), language: z.string(),
chunks: z.array(z.object({ chunks: z.array(z.object({
content: z.string(), content: z.string(),
@ -78,13 +87,14 @@ export const searchResponseSchema = z.object({
// Set if `whole` is true. // Set if `whole` is true.
content: z.string().optional(), content: z.string().optional(),
})), })),
repositoryInfo: z.array(repositoryInfoSchema),
isBranchFilteringEnabled: z.boolean(), isBranchFilteringEnabled: z.boolean(),
}); });
export const repositorySchema = z.object({ export const repositorySchema = z.object({
name: z.string(), name: z.string(),
url: z.string(),
branches: z.array(z.string()), branches: z.array(z.string()),
webUrl: z.string().optional(),
rawConfig: z.record(z.string(), z.string()).optional(), rawConfig: z.record(z.string(), z.string()).optional(),
}); });

View file

@ -7,7 +7,9 @@ import { ErrorCode } from "../../lib/errorCodes";
import { StatusCodes } from "http-status-codes"; import { StatusCodes } from "http-status-codes";
import { zoektSearchResponseSchema } from "./zoektSchema"; import { zoektSearchResponseSchema } from "./zoektSchema";
import { SearchRequest, SearchResponse, SearchResultRange } from "./types"; import { SearchRequest, SearchResponse, SearchResultRange } from "./types";
import assert from "assert"; import { Repo } from "@sourcebot/db";
import * as Sentry from "@sentry/nextjs";
import { sew, withAuth, withOrgMembership } from "@/actions";
// List of supported query prefixes in zoekt. // List of supported query prefixes in zoekt.
// @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417 // @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417
@ -92,178 +94,244 @@ const transformZoektQuery = async (query: string, orgId: number): Promise<string
} }
// Extracts a repository file URL from a zoekt template, branch, and file name. // Extracts a repository file URL from a zoekt template, branch, and file name.
function getRepositoryUrl(template: string, branch: string, fileName: string): string { const getFileWebUrl = (template: string, branch: string, fileName: string): string | undefined => {
// This is a hacky parser for templates generated by // This is a hacky parser for templates generated by
// the go text/template package. Example template: // the go text/template package. Example template:
// {{URLJoinPath "https://github.com/sourcebot-dev/sourcebot" "blob" .Version .Path}} // {{URLJoinPath "https://github.com/sourcebot-dev/sourcebot" "blob" .Version .Path}}
// The template should always match this regex, so let's assert that. if (!template.match(/^{{URLJoinPath\s.*}}(\?.+)?$/)) {
assert(template.match(/^{{URLJoinPath\s.*}}(\?.+)?$/), "Invalid template"); return undefined;
}
const url = const url =
template.substring("{{URLJoinPath ".length, template.indexOf("}}")) template.substring("{{URLJoinPath ".length, template.indexOf("}}"))
.replace(".Version", branch) .replace(".Version", branch)
.replace(".Path", fileName) .replace(".Path", fileName)
.split(" ") .split(" ")
.map((part) => { .map((part) => {
// remove wrapping quotes // remove wrapping quotes
if (part.startsWith("\"")) part = part.substring(1); if (part.startsWith("\"")) part = part.substring(1);
if (part.endsWith("\"")) part = part.substring(0, part.length - 1); if (part.endsWith("\"")) part = part.substring(0, part.length - 1);
return part; return part;
}) })
.join("/"); .join("/");
const optionalQueryParams = const optionalQueryParams =
template.substring(template.indexOf("}}") + 2) template.substring(template.indexOf("}}") + 2)
.replace("{{.Version}}", branch) .replace("{{.Version}}", branch)
.replace("{{.Path}}", fileName); .replace("{{.Path}}", fileName);
return encodeURI(url + optionalQueryParams); return encodeURI(url + optionalQueryParams);
} }
export const search = async ({ query, matches, contextLines, whole }: SearchRequest, orgId: number) => { export const search = async ({ query, matches, contextLines, whole }: SearchRequest, domain: string) => sew(() =>
const transformedQuery = await transformZoektQuery(query, orgId); withAuth((session) =>
if (isServiceError(transformedQuery)) { withOrgMembership(session, domain, async ({ orgId }) => {
return transformedQuery; const transformedQuery = await transformZoektQuery(query, orgId);
} if (isServiceError(transformedQuery)) {
query = transformedQuery; return transformedQuery;
const isBranchFilteringEnabled = (
query.includes(zoektPrefixes.branch) ||
query.includes(zoektPrefixes.branchShort)
);
// We only want to show matches for the default branch when
// the user isn't explicitly filtering by branch.
if (!isBranchFilteringEnabled) {
query = query.concat(` branch:HEAD`);
}
const body = JSON.stringify({
q: query,
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892
opts: {
ChunkMatches: true,
MaxMatchDisplayCount: matches,
NumContextLines: contextLines,
Whole: !!whole,
TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT,
ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT,
MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds
}
});
let header: Record<string, string> = {};
header = {
"X-Tenant-ID": orgId.toString()
};
const searchResponse = await zoektFetch({
path: "/api/search",
body,
header,
method: "POST",
});
if (!searchResponse.ok) {
return invalidZoektResponse(searchResponse);
}
const searchBody = await searchResponse.json();
const parser = zoektSearchResponseSchema.transform(({ Result }) => ({
zoektStats: {
duration: Result.Duration,
fileCount: Result.FileCount,
matchCount: Result.MatchCount,
filesSkipped: Result.FilesSkipped,
contentBytesLoaded: Result.ContentBytesLoaded,
indexBytesLoaded: Result.IndexBytesLoaded,
crashes: Result.Crashes,
shardFilesConsidered: Result.ShardFilesConsidered,
filesConsidered: Result.FilesConsidered,
filesLoaded: Result.FilesLoaded,
shardsScanned: Result.ShardsScanned,
shardsSkipped: Result.ShardsSkipped,
shardsSkippedFilter: Result.ShardsSkippedFilter,
ngramMatches: Result.NgramMatches,
ngramLookups: Result.NgramLookups,
wait: Result.Wait,
matchTreeConstruction: Result.MatchTreeConstruction,
matchTreeSearch: Result.MatchTreeSearch,
regexpsConsidered: Result.RegexpsConsidered,
flushReason: Result.FlushReason,
},
files: Result.Files?.map((file) => {
const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName);
const template = Result.RepoURLs[file.Repository];
assert(template, `Template not found for repository ${file.Repository}`);
// If there are multiple branches pointing to the same revision of this file, it doesn't
// matter which branch we use here, so use the first one.
const branch = file.Branches && file.Branches.length > 0 ? file.Branches[0] : "HEAD";
const url = getRepositoryUrl(template, branch, file.FileName);
return {
fileName: {
text: file.FileName,
matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
})) : [],
},
repository: file.Repository,
url: url,
language: file.Language,
chunks: file.ChunkMatches
.filter((chunk) => !chunk.FileName) // Filter out filename chunks.
.map((chunk) => {
return {
content: chunk.Content,
matchRanges: chunk.Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
}) satisfies SearchResultRange),
contentStart: {
byteOffset: chunk.ContentStart.ByteOffset,
column: chunk.ContentStart.Column,
lineNumber: chunk.ContentStart.LineNumber,
},
symbols: chunk.SymbolInfo?.map((symbol) => {
return {
symbol: symbol.Sym,
kind: symbol.Kind,
parent: symbol.Parent.length > 0 ? {
symbol: symbol.Parent,
kind: symbol.ParentKind,
} : undefined,
}
}) ?? undefined,
}
}),
branches: file.Branches,
content: file.Content,
} }
}) ?? [], query = transformedQuery;
isBranchFilteringEnabled: isBranchFilteringEnabled,
} satisfies SearchResponse));
return parser.parse(searchBody); const isBranchFilteringEnabled = (
} query.includes(zoektPrefixes.branch) ||
query.includes(zoektPrefixes.branchShort)
);
// We only want to show matches for the default branch when
// the user isn't explicitly filtering by branch.
if (!isBranchFilteringEnabled) {
query = query.concat(` branch:HEAD`);
}
const body = JSON.stringify({
q: query,
// @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892
opts: {
ChunkMatches: true,
MaxMatchDisplayCount: matches,
NumContextLines: contextLines,
Whole: !!whole,
TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT,
ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT,
MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds
}
});
let header: Record<string, string> = {};
header = {
"X-Tenant-ID": orgId.toString()
};
const searchResponse = await zoektFetch({
path: "/api/search",
body,
header,
method: "POST",
});
if (!searchResponse.ok) {
return invalidZoektResponse(searchResponse);
}
const searchBody = await searchResponse.json();
const parser = zoektSearchResponseSchema.transform(async ({ Result }) => {
// @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field
// which corresponds to the `id` in the Repo table. In order to efficiently fetch repository
// metadata when transforming (potentially thousands) of file matches, we aggregate a unique
// set of repository ids* and map them to their corresponding Repo record.
//
// *Q: Why is `RepositoryID` optional? And why are we falling back to `Repository`?
// A: Prior to this change, the repository id was not plumbed into zoekt, so RepositoryID was
// always undefined. To make this a non-breaking change, we fallback to using the repository's name
// (`Repository`) as the identifier in these cases. This is not guaranteed to be unique, but in
// practice it is since the repository name includes the host and path (e.g., 'github.com/org/repo',
// 'gitea.com/org/repo', etc.).
//
// Note: When a repository is re-indexed (every hour) this ID will be populated.
// @see: https://github.com/sourcebot-dev/zoekt/pull/6
const repoIdentifiers = new Set(Result.Files?.map((file) => file.RepositoryID ?? file.Repository) ?? []);
const repos = new Map<string | number, Repo>();
(await prisma.repo.findMany({
where: {
id: {
in: Array.from(repoIdentifiers).filter((id) => typeof id === "number"),
},
orgId,
}
})).forEach(repo => repos.set(repo.id, repo));
(await prisma.repo.findMany({
where: {
name: {
in: Array.from(repoIdentifiers).filter((id) => typeof id === "string"),
},
orgId,
}
})).forEach(repo => repos.set(repo.name, repo));
return {
zoektStats: {
duration: Result.Duration,
fileCount: Result.FileCount,
matchCount: Result.MatchCount,
filesSkipped: Result.FilesSkipped,
contentBytesLoaded: Result.ContentBytesLoaded,
indexBytesLoaded: Result.IndexBytesLoaded,
crashes: Result.Crashes,
shardFilesConsidered: Result.ShardFilesConsidered,
filesConsidered: Result.FilesConsidered,
filesLoaded: Result.FilesLoaded,
shardsScanned: Result.ShardsScanned,
shardsSkipped: Result.ShardsSkipped,
shardsSkippedFilter: Result.ShardsSkippedFilter,
ngramMatches: Result.NgramMatches,
ngramLookups: Result.NgramLookups,
wait: Result.Wait,
matchTreeConstruction: Result.MatchTreeConstruction,
matchTreeSearch: Result.MatchTreeSearch,
regexpsConsidered: Result.RegexpsConsidered,
flushReason: Result.FlushReason,
},
files: Result.Files?.map((file) => {
const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName);
const webUrl = (() => {
const template: string | undefined = Result.RepoURLs[file.Repository];
if (!template) {
return undefined;
}
// If there are multiple branches pointing to the same revision of this file, it doesn't
// matter which branch we use here, so use the first one.
const branch = file.Branches && file.Branches.length > 0 ? file.Branches[0] : "HEAD";
return getFileWebUrl(template, branch, file.FileName);
})();
const identifier = file.RepositoryID ?? file.Repository;
const repo = repos.get(identifier);
// This should never happen... but if it does, we skip the file.
if (!repo) {
Sentry.captureMessage(
`Repository not found for identifier: ${identifier}; skipping file "${file.FileName}"`,
'warning'
);
return undefined;
}
return {
fileName: {
text: file.FileName,
matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
})) : [],
},
repository: repo.name,
repositoryId: repo.id,
webUrl: webUrl,
language: file.Language,
chunks: file.ChunkMatches
.filter((chunk) => !chunk.FileName) // Filter out filename chunks.
.map((chunk) => {
return {
content: chunk.Content,
matchRanges: chunk.Ranges.map((range) => ({
start: {
byteOffset: range.Start.ByteOffset,
column: range.Start.Column,
lineNumber: range.Start.LineNumber,
},
end: {
byteOffset: range.End.ByteOffset,
column: range.End.Column,
lineNumber: range.End.LineNumber,
}
}) satisfies SearchResultRange),
contentStart: {
byteOffset: chunk.ContentStart.ByteOffset,
column: chunk.ContentStart.Column,
lineNumber: chunk.ContentStart.LineNumber,
},
symbols: chunk.SymbolInfo?.map((symbol) => {
return {
symbol: symbol.Sym,
kind: symbol.Kind,
parent: symbol.Parent.length > 0 ? {
symbol: symbol.Parent,
kind: symbol.ParentKind,
} : undefined,
}
}) ?? undefined,
}
}),
branches: file.Branches,
content: file.Content,
}
}).filter((file) => file !== undefined) ?? [],
repositoryInfo: Array.from(repos.values()).map((repo) => ({
id: repo.id,
codeHostType: repo.external_codeHostType,
name: repo.name,
displayName: repo.displayName ?? undefined,
webUrl: repo.webUrl ?? undefined,
})),
isBranchFilteringEnabled: isBranchFilteringEnabled,
} satisfies SearchResponse;
});
return parser.parseAsync(searchBody);
}), /* allowSingleTenantUnauthedAccess = */ true)
)

View file

@ -8,6 +8,7 @@ import {
rangeSchema, rangeSchema,
fileSourceRequestSchema, fileSourceRequestSchema,
symbolSchema, symbolSchema,
repositoryInfoSchema,
} from "./schemas"; } from "./schemas";
import { z } from "zod"; import { z } from "zod";
@ -23,4 +24,6 @@ export type ListRepositoriesResponse = z.infer<typeof listRepositoriesResponseSc
export type Repository = ListRepositoriesResponse["repos"][number]; export type Repository = ListRepositoriesResponse["repos"][number];
export type FileSourceRequest = z.infer<typeof fileSourceRequestSchema>; export type FileSourceRequest = z.infer<typeof fileSourceRequestSchema>;
export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>; export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;
export type RepositoryInfo = z.infer<typeof repositoryInfoSchema>;

View file

@ -54,6 +54,7 @@ export const zoektSearchResponseSchema = z.object({
Files: z.array(z.object({ Files: z.array(z.object({
FileName: z.string(), FileName: z.string(),
Repository: z.string(), Repository: z.string(),
RepositoryID: z.number().optional(),
Version: z.string().optional(), Version: z.string().optional(),
Language: z.string(), Language: z.string(),
Branches: z.array(z.string()).optional(), Branches: z.array(z.string()).optional(),

View file

@ -23,4 +23,5 @@ export enum ErrorCode {
STRIPE_CLIENT_NOT_INITIALIZED = 'STRIPE_CLIENT_NOT_INITIALIZED', STRIPE_CLIENT_NOT_INITIALIZED = 'STRIPE_CLIENT_NOT_INITIALIZED',
ACTION_DISALLOWED_IN_TENANCY_MODE = 'ACTION_DISALLOWED_IN_TENANCY_MODE', ACTION_DISALLOWED_IN_TENANCY_MODE = 'ACTION_DISALLOWED_IN_TENANCY_MODE',
SEARCH_CONTEXT_NOT_FOUND = 'SEARCH_CONTEXT_NOT_FOUND', SEARCH_CONTEXT_NOT_FOUND = 'SEARCH_CONTEXT_NOT_FOUND',
MISSING_ORG_DOMAIN_HEADER = 'MISSING_ORG_DOMAIN_HEADER',
} }

View file

@ -5,9 +5,8 @@ import gitlabLogo from "@/public/gitlab.svg";
import giteaLogo from "@/public/gitea.svg"; import giteaLogo from "@/public/gitea.svg";
import gerritLogo from "@/public/gerrit.svg"; import gerritLogo from "@/public/gerrit.svg";
import bitbucketLogo from "@/public/bitbucket.svg"; import bitbucketLogo from "@/public/bitbucket.svg";
import gitLogo from "@/public/git.svg";
import { ServiceError } from "./serviceError"; import { ServiceError } from "./serviceError";
import { RepositoryQuery } from "./types";
import { Repository } from "@/features/search/types";
export function cn(...inputs: ClassValue[]) { export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs)) return twMerge(clsx(inputs))
@ -33,47 +32,40 @@ export const createPathWithQueryParams = (path: string, ...queryParams: [string,
return `${path}?${queryString}`; return `${path}?${queryString}`;
} }
export type CodeHostType = "github" | "gitlab" | "gitea" | "gerrit" | "bitbucket-cloud" | "bitbucket-server"; export type CodeHostType =
"github" |
"gitlab" |
"gitea" |
"gerrit" |
"bitbucket-cloud" |
"bitbucket-server" |
"generic-git-host";
type CodeHostInfo = { type CodeHostInfo = {
type: CodeHostType; type: CodeHostType;
displayName: string; displayName: string;
codeHostName: string; codeHostName: string;
repoLink: string; repoLink?: string;
icon: string; icon: string;
iconClassName?: string; iconClassName?: string;
} }
export const getRepoCodeHostInfo = (repo?: Repository): CodeHostInfo | undefined => { export const getCodeHostInfoForRepo = (repo: {
if (!repo) { codeHostType: string,
return undefined; name: string,
} displayName?: string,
webUrl?: string,
}): CodeHostInfo | undefined => {
const { codeHostType, name, displayName, webUrl } = repo;
if (!repo.rawConfig) { switch (codeHostType) {
return undefined;
}
// @todo : use zod to validate config schema
const webUrlType = repo.rawConfig['web-url-type']!;
const displayName = repo.rawConfig['display-name'] ?? repo.rawConfig['name']!;
return _getCodeHostInfoInternal(webUrlType, displayName, repo.url);
}
export const getRepoQueryCodeHostInfo = (repo: RepositoryQuery): CodeHostInfo | undefined => {
const displayName = repo.repoDisplayName ?? repo.repoName;
return _getCodeHostInfoInternal(repo.codeHostType, displayName, repo.webUrl ?? repo.repoCloneUrl);
}
const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: string): CodeHostInfo | undefined => {
switch (type) {
case 'github': { case 'github': {
const { src, className } = getCodeHostIcon('github')!; const { src, className } = getCodeHostIcon('github')!;
return { return {
type: "github", type: "github",
displayName: displayName, displayName: displayName ?? name,
codeHostName: "GitHub", codeHostName: "GitHub",
repoLink: cloneUrl, repoLink: webUrl,
icon: src, icon: src,
iconClassName: className, iconClassName: className,
} }
@ -82,9 +74,9 @@ const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: s
const { src, className } = getCodeHostIcon('gitlab')!; const { src, className } = getCodeHostIcon('gitlab')!;
return { return {
type: "gitlab", type: "gitlab",
displayName: displayName, displayName: displayName ?? name,
codeHostName: "GitLab", codeHostName: "GitLab",
repoLink: cloneUrl, repoLink: webUrl,
icon: src, icon: src,
iconClassName: className, iconClassName: className,
} }
@ -93,9 +85,9 @@ const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: s
const { src, className } = getCodeHostIcon('gitea')!; const { src, className } = getCodeHostIcon('gitea')!;
return { return {
type: "gitea", type: "gitea",
displayName: displayName, displayName: displayName ?? name,
codeHostName: "Gitea", codeHostName: "Gitea",
repoLink: cloneUrl, repoLink: webUrl,
icon: src, icon: src,
iconClassName: className, iconClassName: className,
} }
@ -105,9 +97,9 @@ const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: s
const { src, className } = getCodeHostIcon('gerrit')!; const { src, className } = getCodeHostIcon('gerrit')!;
return { return {
type: "gerrit", type: "gerrit",
displayName: displayName, displayName: displayName ?? name,
codeHostName: "Gerrit", codeHostName: "Gerrit",
repoLink: cloneUrl, repoLink: webUrl,
icon: src, icon: src,
iconClassName: className, iconClassName: className,
} }
@ -116,9 +108,9 @@ const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: s
const { src, className } = getCodeHostIcon('bitbucket-server')!; const { src, className } = getCodeHostIcon('bitbucket-server')!;
return { return {
type: "bitbucket-server", type: "bitbucket-server",
displayName: displayName, displayName: displayName ?? name,
codeHostName: "Bitbucket", codeHostName: "Bitbucket",
repoLink: cloneUrl, repoLink: webUrl,
icon: src, icon: src,
iconClassName: className, iconClassName: className,
} }
@ -127,9 +119,20 @@ const _getCodeHostInfoInternal = (type: string, displayName: string, cloneUrl: s
const { src, className } = getCodeHostIcon('bitbucket-cloud')!; const { src, className } = getCodeHostIcon('bitbucket-cloud')!;
return { return {
type: "bitbucket-cloud", type: "bitbucket-cloud",
displayName: displayName, displayName: displayName ?? name,
codeHostName: "Bitbucket", codeHostName: "Bitbucket",
repoLink: cloneUrl, repoLink: webUrl,
icon: src,
iconClassName: className,
}
}
case "generic-git-host": {
const { src, className } = getCodeHostIcon('generic-git-host')!;
return {
type: "generic-git-host",
displayName: displayName ?? name,
codeHostName: "Generic Git Host",
repoLink: webUrl,
icon: src, icon: src,
iconClassName: className, iconClassName: className,
} }
@ -161,6 +164,10 @@ export const getCodeHostIcon = (codeHostType: CodeHostType): { src: string, clas
return { return {
src: bitbucketLogo, src: bitbucketLogo,
} }
case "generic-git-host":
return {
src: gitLogo,
}
default: default:
return null; return null;
} }
@ -174,6 +181,7 @@ export const isAuthSupportedForCodeHost = (codeHostType: CodeHostType): boolean
case "bitbucket-cloud": case "bitbucket-cloud":
case "bitbucket-server": case "bitbucket-server":
return true; return true;
case "generic-git-host":
case "gerrit": case "gerrit":
return false; return false;
} }

View file

@ -16,6 +16,9 @@
}, },
{ {
"$ref": "./bitbucket.json" "$ref": "./bitbucket.json"
},
{
"$ref": "./genericGitHost.json"
} }
] ]
} }

View file

@ -0,0 +1,30 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GenericGitHostConnectionConfig",
"properties": {
"type": {
"const": "git",
"description": "Generic Git host configuration"
},
"url": {
"type": "string",
"format": "url",
"description": "The URL to the git repository. This can either be a remote URL (prefixed with `http://` or `https://`) or a absolute path to a directory on the local machine (prefixed with `file://`). If a local directory is specified, it must point to the root of a git repository. Local directories are treated as read-only modified. Local directories support glob patterns.",
"pattern": "^(https?:\\/\\/[^\\s/$.?#].[^\\s]*|file:\\/\\/\\/[^\\s]+)$",
"examples": [
"https://github.com/sourcebot-dev/sourcebot",
"file:///path/to/repo",
"file:///repos/*"
]
},
"revisions": {
"$ref": "./shared.json#/definitions/GitRevisions"
}
},
"required": [
"type",
"url"
],
"additionalProperties": false
}

View file

@ -78,7 +78,7 @@
}, },
"contexts": { "contexts": {
"type": "object", "type": "object",
"description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/self-hosting/more/search-contexts", "description": "[Sourcebot EE] Defines a collection of search contexts. This is only available in single-tenancy mode. See: https://docs.sourcebot.dev/docs/search/search-contexts",
"patternProperties": { "patternProperties": {
"^[a-zA-Z0-9_-]+$": { "^[a-zA-Z0-9_-]+$": {
"$ref": "#/definitions/SearchContext" "$ref": "#/definitions/SearchContext"

2
vendor/zoekt vendored

@ -1 +1 @@
Subproject commit 7d1896215eea6f97af66c9549c9ec70436356b51 Subproject commit 12a2f4ad075359a09bd8a91793acb002211217aa

View file

@ -5460,6 +5460,7 @@ __metadata:
cross-fetch: "npm:^4.0.0" cross-fetch: "npm:^4.0.0"
dotenv: "npm:^16.4.5" dotenv: "npm:^16.4.5"
express: "npm:^4.21.2" express: "npm:^4.21.2"
git-url-parse: "npm:^16.1.0"
gitea-js: "npm:^1.22.0" gitea-js: "npm:^1.22.0"
glob: "npm:^11.0.0" glob: "npm:^11.0.0"
ioredis: "npm:^5.4.2" ioredis: "npm:^5.4.2"
@ -6084,6 +6085,13 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"@types/parse-path@npm:^7.0.0":
version: 7.0.3
resolution: "@types/parse-path@npm:7.0.3"
checksum: 10c0/8344b6c7acba4e4e5a8d542f56f53c297685fa92f9b0c085d7532cc7e1b661432cecfc1c75c76cdb0d161c95679b6ecfe0573d9fef7c836962aacf604150a984
languageName: node
linkType: hard
"@types/pg-pool@npm:2.0.6": "@types/pg-pool@npm:2.0.6":
version: 2.0.6 version: 2.0.6
resolution: "@types/pg-pool@npm:2.0.6" resolution: "@types/pg-pool@npm:2.0.6"
@ -9830,6 +9838,25 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"git-up@npm:^8.1.0":
version: 8.1.1
resolution: "git-up@npm:8.1.1"
dependencies:
is-ssh: "npm:^1.4.0"
parse-url: "npm:^9.2.0"
checksum: 10c0/2cc4461d8565a3f7a1ecd3d262a58ddb8df0a67f7f7d4915df2913c460b2e88ae570a6ea810700a6d22fb3b9e4bea8dd10a8eb469900ddc12e35c62208608c03
languageName: node
linkType: hard
"git-url-parse@npm:^16.1.0":
version: 16.1.0
resolution: "git-url-parse@npm:16.1.0"
dependencies:
git-up: "npm:^8.1.0"
checksum: 10c0/b8f5ebcbd5b2baf9f1bb77a217376f0247c47fe1d42811ccaac3015768eebb0759a59051f758e50e70adf5c67ae059d1975bf6b750164f36bfd39138d11b940b
languageName: node
linkType: hard
"gitea-js@npm:^1.22.0": "gitea-js@npm:^1.22.0":
version: 1.23.0 version: 1.23.0
resolution: "gitea-js@npm:1.23.0" resolution: "gitea-js@npm:1.23.0"
@ -10633,6 +10660,15 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"is-ssh@npm:^1.4.0":
version: 1.4.1
resolution: "is-ssh@npm:1.4.1"
dependencies:
protocols: "npm:^2.0.1"
checksum: 10c0/021a7355cb032625d58db3cc8266ad9aa698cbabf460b71376a0307405577fd7d3aa0826c0bf1951d7809f134c0ee80403306f6d7633db94a5a3600a0106b398
languageName: node
linkType: hard
"is-stream@npm:^2.0.0": "is-stream@npm:^2.0.0":
version: 2.0.1 version: 2.0.1
resolution: "is-stream@npm:2.0.1" resolution: "is-stream@npm:2.0.1"
@ -12400,6 +12436,25 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"parse-path@npm:^7.0.0":
version: 7.1.0
resolution: "parse-path@npm:7.1.0"
dependencies:
protocols: "npm:^2.0.0"
checksum: 10c0/8c8c8b3019323d686e7b1cd6fd9653bc233404403ad68827836fbfe59dfe26aaef64ed4e0396d0e20c4a7e1469312ec969a679618960e79d5e7c652dc0da5a0f
languageName: node
linkType: hard
"parse-url@npm:^9.2.0":
version: 9.2.0
resolution: "parse-url@npm:9.2.0"
dependencies:
"@types/parse-path": "npm:^7.0.0"
parse-path: "npm:^7.0.0"
checksum: 10c0/b8f56cdb01e76616255dff82544f4b5ab4378f6f4bac8604ed6fde03a75b0f71c547d92688386d8f22f38fad3c928c075abf69458677c6185da76c841bfd7a93
languageName: node
linkType: hard
"parse5@npm:^7.1.2": "parse5@npm:^7.1.2":
version: 7.2.1 version: 7.2.1
resolution: "parse5@npm:7.2.1" resolution: "parse5@npm:7.2.1"
@ -13010,6 +13065,13 @@ __metadata:
languageName: node languageName: node
linkType: hard linkType: hard
"protocols@npm:^2.0.0, protocols@npm:^2.0.1":
version: 2.0.2
resolution: "protocols@npm:2.0.2"
checksum: 10c0/b87d78c1fcf038d33691da28447ce94011d5c7f0c7fd25bcb5fb4d975991c99117873200c84f4b6a9d7f8b9092713a064356236960d1473a7d6fcd4228897b60
languageName: node
linkType: hard
"proxy-addr@npm:^2.0.7, proxy-addr@npm:~2.0.7": "proxy-addr@npm:^2.0.7, proxy-addr@npm:~2.0.7":
version: 2.0.7 version: 2.0.7
resolution: "proxy-addr@npm:2.0.7" resolution: "proxy-addr@npm:2.0.7"