Merge branch 'sourcebot-dev:main' into main

This commit is contained in:
Andre Nogueira 2025-10-03 18:31:22 +01:00 committed by GitHub
commit 48af3fd328
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
343 changed files with 36963 additions and 4345 deletions

View file

@ -1,7 +1,4 @@
contact_links: contact_links:
- name: 💡 Feature Request - name: 👾 Discord
url: https://github.com/sourcebot-dev/sourcebot/discussions/new?category=ideas url: https://discord.gg/f4Cbf3HT
about: Suggest any ideas you have using our discussion forums. about: Something else? Join the Discord!
- name: 🛟 Get Help
url: https://github.com/sourcebot-dev/sourcebot/discussions/new?category=support
about: If you can't get something to work the way you expect, open a question in our discussion forums.

View file

@ -0,0 +1,12 @@
---
name: "💡 Feature Request"
about: Suggest an idea for this project
title: "[FR] "
labels: enhancement
assignees: ''
---
<!-- Please search existing issues to avoid creating duplicates. -->
<!-- Describe the feature you'd like. -->

12
.github/ISSUE_TEMPLATE/get_help.md vendored Normal file
View file

@ -0,0 +1,12 @@
---
name: "🛟 Get Help"
about: Something isn't working the way you expect
title: ""
labels: help wanted
assignees: ''
---
<!-- Please search existing issues to avoid creating duplicates. -->
<!-- Describe the issue you are facing. -->

View file

@ -60,6 +60,8 @@ jobs:
NEXT_PUBLIC_SENTRY_ENVIRONMENT=${{ vars.NEXT_PUBLIC_SENTRY_ENVIRONMENT }} NEXT_PUBLIC_SENTRY_ENVIRONMENT=${{ vars.NEXT_PUBLIC_SENTRY_ENVIRONMENT }}
NEXT_PUBLIC_SENTRY_WEBAPP_DSN=${{ vars.NEXT_PUBLIC_SENTRY_WEBAPP_DSN }} NEXT_PUBLIC_SENTRY_WEBAPP_DSN=${{ vars.NEXT_PUBLIC_SENTRY_WEBAPP_DSN }}
NEXT_PUBLIC_SENTRY_BACKEND_DSN=${{ vars.NEXT_PUBLIC_SENTRY_BACKEND_DSN }} NEXT_PUBLIC_SENTRY_BACKEND_DSN=${{ vars.NEXT_PUBLIC_SENTRY_BACKEND_DSN }}
NEXT_PUBLIC_LANGFUSE_PUBLIC_KEY=${{ vars.NEXT_PUBLIC_LANGFUSE_PUBLIC_KEY }}
NEXT_PUBLIC_LANGFUSE_BASE_URL=${{ vars.NEXT_PUBLIC_LANGFUSE_BASE_URL }}
SENTRY_SMUAT=${{ secrets.SENTRY_SMUAT }} SENTRY_SMUAT=${{ secrets.SENTRY_SMUAT }}
SENTRY_ORG=${{ vars.SENTRY_ORG }} SENTRY_ORG=${{ vars.SENTRY_ORG }}
SENTRY_WEBAPP_PROJECT=${{ vars.SENTRY_WEBAPP_PROJECT }} SENTRY_WEBAPP_PROJECT=${{ vars.SENTRY_WEBAPP_PROJECT }}

View file

@ -2,7 +2,7 @@ name: Deploy Demo
on: on:
push: push:
tags: ["v*.*.*"] branches: ["main"]
workflow_dispatch: workflow_dispatch:
jobs: jobs:

View file

@ -0,0 +1,76 @@
name: Update Roadmap Released
on:
pull_request:
types: [closed]
workflow_dispatch:
schedule:
- cron: "0 */6 * * *"
permissions:
pull-requests: read
contents: read
issues: write
jobs:
update:
runs-on: ubuntu-latest
steps:
- name: Update "Released" section with last 10 merged PRs
uses: actions/github-script@v7
env:
ROADMAP_ISSUE_NUMBER: "459"
with:
script: |
const issue_number = parseInt(process.env.ROADMAP_ISSUE_NUMBER, 10);
const {owner, repo} = context.repo;
// Fetch more than 10, then sort by closed_at to be precise
const batchSize = 50;
const { data: prBatch } = await github.rest.pulls.list({
owner,
repo,
state: "closed",
per_page: batchSize,
sort: "updated",
direction: "desc"
});
const last10 = prBatch
.filter(pr => pr.merged_at) // only merged PRs
.sort((a, b) => new Date(b.merged_at) - new Date(a.merged_at))
.slice(0, 10);
const list = last10.map(pr => `- #${pr.number}`).join("\n");
const start = "<!-- RELEASED:START -->";
const end = "<!-- RELEASED:END -->";
const mergedUrl = `https://github.com/${owner}/${repo}/pulls?q=is%3Apr+is%3Amerged`;
const replacementBlock = [
start,
"",
`10 most recent [merged PRs](${mergedUrl}):`,
"",
list,
"",
end
].join("\n");
const { data: issue } = await github.rest.issues.get({ owner, repo, issue_number });
let body = issue.body || "";
if (body.includes(start) && body.includes(end)) {
const pattern = new RegExp(`${start}[\\s\\S]*?${end}`);
body = body.replace(pattern, replacementBlock);
} else {
core.setFailed('Missing RELEASED markers in roadmap issue body. Please add <!-- RELEASED:START --> and <!-- RELEASED:END --> to the issue.');
return;
}
await github.rest.issues.update({
owner,
repo,
issue_number,
body
});

View file

@ -7,6 +7,144 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Added
- Added support for passing db connection url as seperate `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` env vars. [#545](https://github.com/sourcebot-dev/sourcebot/pull/545)
## [4.7.3] - 2025-09-29
### Fixed
- Manually pass auth token for ado server deployments. [#543](https://github.com/sourcebot-dev/sourcebot/pull/543)
## [4.7.2] - 2025-09-22
### Fixed
- Fix support email. [#529](https://github.com/sourcebot-dev/sourcebot/pull/529)
### Added
- [Experimental][Sourcebot EE] Added permission syncing repository Access Control Lists (ACLs) between Sourcebot and GitHub. [#508](https://github.com/sourcebot-dev/sourcebot/pull/508)
### Changed
- Improved repository query performance by adding db indices. [#526](https://github.com/sourcebot-dev/sourcebot/pull/526)
- Improved repository query performance by removing JOIN on `Connection` table. [#527](https://github.com/sourcebot-dev/sourcebot/pull/527)
- Changed repo carousel and repo list links to redirect to the file browser. [#528](https://github.com/sourcebot-dev/sourcebot/pull/528)
- Changed file headers, files/directories in file tree, and reference list buttons into links. [#532](https://github.com/sourcebot-dev/sourcebot/pull/532)
## [4.7.1] - 2025-09-19
### Fixed
- Fixed sourcebot not pulling github forked repos [#499](https://github.com/sourcebot-dev/sourcebot/pull/499)
- Fixed azure devop cloud pat issue [#524](https://github.com/sourcebot-dev/sourcebot/pull/524)
## [4.7.0] - 2025-09-17
### Added
- Added fallback to default the Node.JS AWS SDK's `fromNodeProviderChain` when no credentials are provided for a bedrock config. [#513](https://github.com/sourcebot-dev/sourcebot/pull/513)
- Added support for Azure Devops support. [#514](https://github.com/sourcebot-dev/sourcebot/pull/514)
### Fixed
- Fixed "At least one project, user, or group must be specified" for GitLab configs with `all` in web configurator. [#512](https://github.com/sourcebot-dev/sourcebot/pull/512)
- Fixed zoekt indexing failing with pipe in branch/tag names [#506](https://github.com/sourcebot-dev/sourcebot/pull/506)
- Removed deprecated connection creation/edit UI [#515](https://github.com/sourcebot-dev/sourcebot/pull/515)
## [4.6.8] - 2025-09-15
### Fixed
- Fixed Bitbucket Cloud pagination not working beyond first page. [#295](https://github.com/sourcebot-dev/sourcebot/issues/295)
- Fixed search bar line wrapping. [#501](https://github.com/sourcebot-dev/sourcebot/pull/501)
- Fixed carousel perf issues. [#507](https://github.com/sourcebot-dev/sourcebot/pull/507)
## [4.6.7] - 2025-09-08
### Added
- Added `exclude.userOwnedProjects` setting to GitLab configs. [#498](https://github.com/sourcebot-dev/sourcebot/pull/498)
### Fixed
- Fixed "couldn't find remote ref HEAD" errors when re-indexing certain repositories. [#497](https://github.com/sourcebot-dev/sourcebot/pull/497)
### Changed
- Disable page scroll when using arrow keys on search suggestions box. [#493](https://github.com/sourcebot-dev/sourcebot/pull/493)
## [4.6.6] - 2025-09-04
### Added
- Added support for specifying query params for openai compatible language models. [#490](https://github.com/sourcebot-dev/sourcebot/pull/490)
### Fixed
- Fix issue where zoekt was failing to index repositories due to `HEAD` pointing to a branch that does not exist. [#488](https://github.com/sourcebot-dev/sourcebot/pull/488)
## [4.6.5] - 2025-09-02
### Fixed
- Remove setting `remote.origin.url` for remote git repositories. [#483](https://github.com/sourcebot-dev/sourcebot/pull/483)
- Fix error when navigating to paths with percentage symbols. [#485](https://github.com/sourcebot-dev/sourcebot/pull/485)
### Changed
- Updated NextJS to version 15. [#477](https://github.com/sourcebot-dev/sourcebot/pull/477)
- Add `sessionToken` as optional Bedrock configuration parameter. [#478](https://github.com/sourcebot-dev/sourcebot/pull/478)
## [4.6.4] - 2025-08-11
### Added
- Added multi-branch indexing support for Gerrit. [#433](https://github.com/sourcebot-dev/sourcebot/pull/433)
- [ask sb] Added `reasoningEffort` option to OpenAI provider. [#446](https://github.com/sourcebot-dev/sourcebot/pull/446)
- [ask db] Added `headers` option to all providers. [#449](https://github.com/sourcebot-dev/sourcebot/pull/449)
### Fixed
- Removed prefix from structured log output. [#443](https://github.com/sourcebot-dev/sourcebot/pull/443)
- [ask sb] Fixed long generation times for first message in a chat thread. [#447](https://github.com/sourcebot-dev/sourcebot/pull/447)
### Changed
- Bumped AI SDK and associated packages version. [#444](https://github.com/sourcebot-dev/sourcebot/pull/444)
## [4.6.3] - 2025-08-04
### Fixed
- Fixed issue where `users` specified in a GitHub config were not getting picked up when a `token` is also specified. [#428](https://github.com/sourcebot-dev/sourcebot/pull/428)
### Added
- [ask sb] Added OpenAI Compatible Language Provider. [#424](https://github.com/sourcebot-dev/sourcebot/pull/424)
## [4.6.2] - 2025-07-31
### Changed
- Bumped AI SDK and associated packages version. [#417](https://github.com/sourcebot-dev/sourcebot/pull/417)
### Fixed
- [ask sb] Fixed "413 content too large" error when starting a new chat with many repos selected. [#416](https://github.com/sourcebot-dev/sourcebot/pull/416)
### Added
- [ask sb] PostHog telemetry for chat thread creation. [#418](https://github.com/sourcebot-dev/sourcebot/pull/418)
## [4.6.1] - 2025-07-29
### Added
- Add search context to ask sourcebot context selector. [#397](https://github.com/sourcebot-dev/sourcebot/pull/397)
- Add ability to include/exclude connection in search context. [#399](https://github.com/sourcebot-dev/sourcebot/pull/399)
- Search context refactor to search scope and demo card UI changes. [#405](https://github.com/sourcebot-dev/sourcebot/pull/405)
- Add GitHub star toast. [#409](https://github.com/sourcebot-dev/sourcebot/pull/409)
- Added a onboarding modal when first visiting the homepage when `ask` mode is selected. [#408](https://github.com/sourcebot-dev/sourcebot/pull/408)
- [ask sb] Added `searchReposTool` and `listAllReposTool`. [#400](https://github.com/sourcebot-dev/sourcebot/pull/400)
### Fixed
- Fixed multiple writes race condition on config file watcher. [#398](https://github.com/sourcebot-dev/sourcebot/pull/398)
### Changed
- Bumped AI SDK and associated packages version. [#404](https://github.com/sourcebot-dev/sourcebot/pull/404)
- Bumped form-data package version. [#407](https://github.com/sourcebot-dev/sourcebot/pull/407)
- Bumped next version. [#406](https://github.com/sourcebot-dev/sourcebot/pull/406)
- [ask sb] Improved search code tool with filter options. [#400](https://github.com/sourcebot-dev/sourcebot/pull/400)
- [ask sb] Removed search scope constraint. [#400](https://github.com/sourcebot-dev/sourcebot/pull/400)
- Update README with new features and videos. [#410](https://github.com/sourcebot-dev/sourcebot/pull/410)
- [ask sb] Add back search scope requirement and other UI changes. [#411](https://github.com/sourcebot-dev/sourcebot/pull/411)
## [4.6.0] - 2025-07-25
### Added
- Introducing Ask Sourcebot - ask natural langauge about your codebase. Get back comprehensive Markdown responses with inline citations back to the code. Bring your own LLM api key. [#392](https://github.com/sourcebot-dev/sourcebot/pull/392)
### Fixed
- Fixed onboarding infinite loop when GCP IAP Auth is enabled. [#381](https://github.com/sourcebot-dev/sourcebot/pull/381)
## [4.5.3] - 2025-07-20 ## [4.5.3] - 2025-07-20
### Changed ### Changed
@ -223,7 +361,7 @@ Sourcebot v3 is here and brings a number of structural changes to the tool's fou
### Removed ### Removed
- [**Breaking Change**] Removed `db.json` in favour of a Postgres database for transactional workloads. See the [architecture overview](https://docs.sourcebot.dev/self-hosting/overview#architecture). - [**Breaking Change**] Removed `db.json` in favour of a Postgres database for transactional workloads. See the [architecture overview](https://docs.sourcebot.dev/self-hosting/overview#architecture).
- [**Breaking Change**] Removed local folder & arbitrary .git repo support. If your deployment depended on these features, please [open a discussion](https://github.com/sourcebot-dev/sourcebot/discussions/categories/support) and let us know. - [**Breaking Change**] Removed local folder & arbitrary .git repo support. If your deployment depended on these features, please [open a issue](https://github.com/sourcebot-dev/sourcebot/issues/new?template=get_help.md) and let us know.
- [**Breaking Chnage**] Removed ability to specify a `token` as a string literal from the schema. - [**Breaking Chnage**] Removed ability to specify a `token` as a string literal from the schema.
- [**Breaking Change**] Removed support for `DOMAIN_SUB_PATH` configuration. - [**Breaking Change**] Removed support for `DOMAIN_SUB_PATH` configuration.

View file

@ -17,6 +17,8 @@ ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT
ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT
ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN
ARG NEXT_PUBLIC_SENTRY_BACKEND_DSN ARG NEXT_PUBLIC_SENTRY_BACKEND_DSN
ARG NEXT_PUBLIC_LANGFUSE_PUBLIC_KEY
ARG NEXT_PUBLIC_LANGFUSE_BASE_URL
FROM node:20-alpine3.19 AS node-alpine FROM node:20-alpine3.19 AS node-alpine
FROM golang:1.23.4-alpine3.19 AS go-alpine FROM golang:1.23.4-alpine3.19 AS go-alpine
@ -67,6 +69,10 @@ ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT
ENV NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT=$NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT ENV NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT=$NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT
ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN
ENV NEXT_PUBLIC_SENTRY_WEBAPP_DSN=$NEXT_PUBLIC_SENTRY_WEBAPP_DSN ENV NEXT_PUBLIC_SENTRY_WEBAPP_DSN=$NEXT_PUBLIC_SENTRY_WEBAPP_DSN
ARG NEXT_PUBLIC_LANGFUSE_PUBLIC_KEY
ENV NEXT_PUBLIC_LANGFUSE_PUBLIC_KEY=$NEXT_PUBLIC_LANGFUSE_PUBLIC_KEY
ARG NEXT_PUBLIC_LANGFUSE_BASE_URL
ENV NEXT_PUBLIC_LANGFUSE_BASE_URL=$NEXT_PUBLIC_LANGFUSE_BASE_URL
# To upload source maps to Sentry, we need to set the following build-time args. # To upload source maps to Sentry, we need to set the following build-time args.
# It's important that we don't set these for oss builds, otherwise the Sentry # It's important that we don't set these for oss builds, otherwise the Sentry
@ -164,6 +170,10 @@ ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN
ENV NEXT_PUBLIC_SENTRY_WEBAPP_DSN=$NEXT_PUBLIC_SENTRY_WEBAPP_DSN ENV NEXT_PUBLIC_SENTRY_WEBAPP_DSN=$NEXT_PUBLIC_SENTRY_WEBAPP_DSN
ARG NEXT_PUBLIC_SENTRY_BACKEND_DSN ARG NEXT_PUBLIC_SENTRY_BACKEND_DSN
ENV NEXT_PUBLIC_SENTRY_BACKEND_DSN=$NEXT_PUBLIC_SENTRY_BACKEND_DSN ENV NEXT_PUBLIC_SENTRY_BACKEND_DSN=$NEXT_PUBLIC_SENTRY_BACKEND_DSN
ARG NEXT_PUBLIC_LANGFUSE_PUBLIC_KEY
ENV NEXT_PUBLIC_LANGFUSE_PUBLIC_KEY=$NEXT_PUBLIC_LANGFUSE_PUBLIC_KEY
ARG NEXT_PUBLIC_LANGFUSE_BASE_URL
ENV NEXT_PUBLIC_LANGFUSE_BASE_URL=$NEXT_PUBLIC_LANGFUSE_BASE_URL
# ----------- # -----------
RUN echo "Sourcebot Version: $NEXT_PUBLIC_SOURCEBOT_VERSION" RUN echo "Sourcebot Version: $NEXT_PUBLIC_SOURCEBOT_VERSION"
@ -175,7 +185,6 @@ ENV DATA_DIR=/data
ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot
ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db
ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis
ENV DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
ENV REDIS_URL="redis://localhost:6379" ENV REDIS_URL="redis://localhost:6379"
ENV SRC_TENANT_ENFORCEMENT_MODE=strict ENV SRC_TENANT_ENFORCEMENT_MODE=strict
ENV SOURCEBOT_PUBLIC_KEY_PATH=/app/public.pem ENV SOURCEBOT_PUBLIC_KEY_PATH=/app/public.pem

View file

@ -2,7 +2,7 @@ Copyright (c) 2025 Taqla Inc.
Portions of this software are licensed as follows: Portions of this software are licensed as follows:
- All content that resides under the "ee/", "packages/web/src/ee/", and "packages/shared/src/ee/" directories of this repository, if these directories exist, is licensed under the license defined in "ee/LICENSE". - All content that resides under the "ee/", "packages/web/src/ee/", "packages/backend/src/ee/", and "packages/shared/src/ee/" directories of this repository, if these directories exist, is licensed under the license defined in "ee/LICENSE".
- All third party components incorporated into the Sourcebot Software are licensed under the original license provided by the owner of the applicable component. - All third party components incorporated into the Sourcebot Software are licensed under the original license provided by the owner of the applicable component.
- Content outside of the above mentioned directories or restrictions above is available under the "Functional Source License" as defined below. - Content outside of the above mentioned directories or restrictions above is available under the "Functional Source License" as defined below.

View file

@ -12,22 +12,19 @@
<strong>Self Host</strong> <strong>Self Host</strong>
</a> · </a> ·
<a href="https://demo.sourcebot.dev"> <a href="https://demo.sourcebot.dev">
<strong>Demo</strong> <strong>Public Demo</strong>
</a> </a>
</h3> </h3>
</div> </div>
<div> <div>
<a href="https://docs.sourcebot.dev/"><strong>Docs</strong></a> · <a href="https://docs.sourcebot.dev/"><strong>Docs</strong></a> ·
<a href="https://github.com/sourcebot-dev/sourcebot/issues"><strong>Report Bug</strong></a> · <a href="https://github.com/sourcebot-dev/sourcebot/issues/459"><strong>Roadmap</strong></a> ·
<a href="https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas"><strong>Feature Request</strong></a> · <a href="https://github.com/sourcebot-dev/sourcebot/issues/new?template=bug_report.yml"><strong>Report Bug</strong></a> ·
<a href="https://www.sourcebot.dev/changelog"><strong>Changelog</strong></a> · <a href="https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md"><strong>Feature Request</strong></a> ·
<a href="https://www.sourcebot.dev/contact"><strong>Contact</strong></a> · <a href="https://www.sourcebot.dev/changelog"><strong>Changelog</strong></a>
</div> </div>
<br/> <br/>
<span>Sourcebot uses <a href="https://github.com/sourcebot-dev/sourcebot/discussions"><strong>Github Discussions</strong></a> for Support and Feature Requests.</span>
<br/>
<br/>
<div> <div>
</div> </div>
</div> </div>
@ -37,30 +34,45 @@
<a href="https://github.com/sourcebot-dev/sourcebot/stargazers"><img src="https://img.shields.io/github/stars/sourcebot-dev/sourcebot" /></a> <a href="https://github.com/sourcebot-dev/sourcebot/stargazers"><img src="https://img.shields.io/github/stars/sourcebot-dev/sourcebot" /></a>
</p> </p>
<p align="center"> <p align="center">
<p align="center">
<a href="https://discord.gg/6Fhp27x7Pb"><img src="https://dcbadge.limes.pink/api/server/https://discord.gg/6Fhp27x7Pb?style=flat"/></a>
</p>
</p> </p>
# About Sourcebot is a self-hosted tool that helps you understand your codebase.
Sourcebot lets you index all your repos and branches across multiple code hosts (GitHub, GitLab, Bitbucket, Gitea, or Gerrit) and search through them using a blazingly fast interface. - **Ask Sourcebot:** Ask questions about your codebase and have Sourcebot provide detailed answers grounded with inline citations.
- **Code search:** Search and navigate across all your repos and branches, no matter where theyre hosted.
https://github.com/user-attachments/assets/ced355f3-967e-4f37-ae6e-74ab8c06b9ec Try it out in our [public demo](https://demo.sourcebot.dev)!
https://github.com/user-attachments/assets/ed66a622-e38f-4947-a531-86df1e1e0218
## Features # Features
- 💻 **One-command deployment**: Get started instantly using Docker on your own machine. ![Sourcebot Features](https://github.com/user-attachments/assets/3aed7348-7aeb-4af3-89da-b617c3db2e02)
- 🔍 **Multi-repo search**: Index and search through multiple public and private repositories and branches on GitHub, GitLab, Bitbucket, Gitea, or Gerrit.
- ⚡**Lightning fast performance**: Built on top of the powerful [Zoekt](https://github.com/sourcegraph/zoekt) search engine.
- 🎨 **Modern web app**: Enjoy a sleek interface with features like syntax highlighting, light/dark mode, and vim-style navigation
- 📂 **Full file visualization**: Instantly view the entire file when selecting any search result.
You can try out our public hosted demo [here](https://demo.sourcebot.dev)! ## Ask Sourcebot
Ask Sourcebot gives you the ability to ask complex questions about your codebase in natural language.
It uses Sourcebot's existing code search and navigation tools to allow reasoning models to search your code, follow code nav references, and provide an answer that's rich with inline citations and navigable code snippets.
https://github.com/user-attachments/assets/8212cd16-683f-468f-8ea5-67455c0931e2
## Code Search
Search across all your repos/branches across any code host platform. Blazingly fast, and supports regular expressions, repo/language search filters, boolean logic, and more.
https://github.com/user-attachments/assets/3b381452-d329-4949-b6f2-2fc38952e481
## Code Navigation
IDE-level code navigation (goto definition and find references) across all your repos.
https://github.com/user-attachments/assets/e2da2829-71cc-40af-98b4-7ba52e945530
## Built-in File Explorer
Explore every file across all of your repos. Modern UI with syntax highlighting, file tree, code navigation, etc.
https://github.com/user-attachments/assets/31ec0669-707d-4e03-b511-1bc33d44197a
# Deploy Sourcebot # Deploy Sourcebot
Sourcebot can be deployed in seconds using our official docker image. Visit our [docs](https://docs.sourcebot.dev/self-hosting/overview) for more information. Sourcebot can be deployed in seconds using our official docker image. Visit our [docs](https://docs.sourcebot.dev/docs/deployment-guide) for more information.
1. Create a config 1. Create a config
```sh ```sh
@ -102,10 +114,10 @@ docker run \
</details> </details>
</br> </br>
3. Start searching at `http://localhost:3000` 3. Visit `http://localhost:3000` to start using Sourcebot
</br> </br>
To learn how to configure Sourcebot to index your own repos, please refer to our [docs](https://docs.sourcebot.dev/self-hosting/overview). To configure Sourcebot (index your own repos, connect your LLMs, etc), check out our [docs](https://docs.sourcebot.dev/docs/configuration/config-file).
> [!NOTE] > [!NOTE]
> Sourcebot collects <a href="https://demo.sourcebot.dev/~/search?query=captureEvent%5C(%20repo%3Asourcebot">anonymous usage data</a> by default to help us improve the product. No sensitive data is collected, but if you'd like to disable this you can do so by setting the `SOURCEBOT_TELEMETRY_DISABLED` environment > Sourcebot collects <a href="https://demo.sourcebot.dev/~/search?query=captureEvent%5C(%20repo%3Asourcebot">anonymous usage data</a> by default to help us improve the product. No sensitive data is collected, but if you'd like to disable this you can do so by setting the `SOURCEBOT_TELEMETRY_DISABLED` environment

View file

@ -28,16 +28,25 @@
"group": "Features", "group": "Features",
"pages": [ "pages": [
{ {
"group": "Search", "group": "Code Search",
"pages": [ "pages": [
"docs/features/search/overview",
"docs/features/search/syntax-reference", "docs/features/search/syntax-reference",
"docs/features/search/multi-branch-indexing", "docs/features/search/multi-branch-indexing",
"docs/features/search/search-contexts" "docs/features/search/search-contexts"
] ]
}, },
{
"group": "Ask Sourcebot",
"pages": [
"docs/features/ask/overview",
"docs/features/ask/add-model-providers"
]
},
"docs/features/code-navigation", "docs/features/code-navigation",
"docs/features/analytics", "docs/features/analytics",
"docs/features/mcp-server", "docs/features/mcp-server",
"docs/features/permission-syncing",
{ {
"group": "Agents", "group": "Agents",
"tag": "experimental", "tag": "experimental",
@ -51,6 +60,7 @@
{ {
"group": "Configuration", "group": "Configuration",
"pages": [ "pages": [
"docs/configuration/config-file",
{ {
"group": "Indexing your code", "group": "Indexing your code",
"pages": [ "pages": [
@ -59,6 +69,8 @@
"docs/connections/gitlab", "docs/connections/gitlab",
"docs/connections/bitbucket-cloud", "docs/connections/bitbucket-cloud",
"docs/connections/bitbucket-data-center", "docs/connections/bitbucket-data-center",
"docs/connections/ado-cloud",
"docs/connections/ado-server",
"docs/connections/gitea", "docs/connections/gitea",
"docs/connections/gerrit", "docs/connections/gerrit",
"docs/connections/generic-git-host", "docs/connections/generic-git-host",
@ -66,8 +78,7 @@
"docs/connections/request-new" "docs/connections/request-new"
] ]
}, },
"docs/license-key", "docs/configuration/language-model-providers",
"docs/configuration/environment-variables",
{ {
"group": "Authentication", "group": "Authentication",
"pages": [ "pages": [
@ -78,6 +89,8 @@
"docs/configuration/auth/faq" "docs/configuration/auth/faq"
] ]
}, },
"docs/configuration/environment-variables",
"docs/license-key",
"docs/configuration/transactional-emails", "docs/configuration/transactional-emails",
"docs/configuration/structured-logging", "docs/configuration/structured-logging",
"docs/configuration/audit-logs" "docs/configuration/audit-logs"
@ -97,9 +110,14 @@
"href": "https://sourcebot.dev/changelog", "href": "https://sourcebot.dev/changelog",
"icon": "list-check" "icon": "list-check"
}, },
{
"anchor": "Roadmap",
"href": "https://github.com/sourcebot-dev/sourcebot/issues/459",
"icon": "map"
},
{ {
"anchor": "Support", "anchor": "Support",
"href": "https://github.com/sourcebot-dev/sourcebot/discussions/categories/support", "href": "https://github.com/sourcebot-dev/sourcebot/issues/new?template=get_help.md",
"icon": "life-ring" "icon": "life-ring"
} }
] ]

View file

@ -41,6 +41,4 @@ This page covers a range of frequently asked questions about Sourcebot's built-i
</Accordion> </Accordion>
</AccordionGroup> </AccordionGroup>
Have a question that's not answered here? Submit an issue on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll get back to you as soon as we can.
Have a question that's not answered here? Submit it on our [GitHub discussions](https://github.com/sourcebot-dev/sourcebot/discussions)
page and we'll get back to you as soon as we can!

View file

@ -25,4 +25,4 @@ Sourcebot's built-in authentication system gates your deployment, and allows adm
# Troubleshooting # Troubleshooting
- If you experience issues logging in, logging out, or accessing an organization you should have access to, try clearing your cookies & performing a full page refresh (`Cmd/Ctrl + Shift + R` on most browsers). - If you experience issues logging in, logging out, or accessing an organization you should have access to, try clearing your cookies & performing a full page refresh (`Cmd/Ctrl + Shift + R` on most browsers).
- Still not working? Reach out to us on our [discord](https://discord.com/invite/6Fhp27x7Pb) or [github discussions](https://github.com/sourcebot-dev/sourcebot/discussions) - Still not working? Reach out to us on our [discord](https://discord.com/invite/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose)

View file

@ -0,0 +1,51 @@
---
title: Config File
sidebarTitle: Config file
---
When self-hosting Sourcebot, you **must** provide it a config file. This is done by defining a config file in a volume that's mounted to Sourcebot, and providing the path to this
file in the `CONFIG_PATH` environment variable. For example:
```bash icon="terminal" Passing in a CONFIG_PATH to Sourcebot
docker run \
-v $(pwd)/config.json:/data/config.json \
-e CONFIG_PATH=/data/config.json \
... \ # other options
ghcr.io/sourcebot-dev/sourcebot:latest
```
The config file tells Sourcebot which repos to index, what language models to use, and various other settings as defined in the [schema](#config-file-schema).
# Config File Schema
The config file you provide Sourcebot must follow the [schema](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/index.json). This schema consists of the following properties:
- [Connections](/docs/connections/overview) (`connections`): Defines a set of connections that tell Sourcebot which repos to index and from where
- [Language Models](/docs/configuration/language-model-providers) (`models`): Defines a set of language model providers for use with [Ask Sourcebot](/docs/features/ask)
- [Settings](#settings) (`settings`): Additional settings to tweak your Sourcebot deployment
- [Search Contexts](/docs/features/search/search-contexts) (`contexts`): Groupings of repos that you can search against
# Config File Syncing
Sourcebot syncs the config file on startup, and automatically whenever a change is detected.
# Settings
The following are settings that can be provided in your config file to modify Sourcebot's behavior
| Setting | Type | Default | Minimum | Description / Notes |
|-------------------------------------------------|---------|------------|---------|----------------------------------------------------------------------------------------|
| `maxFileSize` | number | 2MB | 1 | Maximum size (bytes) of a file to index. Files exceeding this are skipped. |
| `maxTrigramCount` | number | 20000 | 1 | Maximum trigrams per document. Larger files are skipped. |
| `reindexIntervalMs` | number | 1hour | 1 | Interval at which all repositories are reindexed. |
| `resyncConnectionIntervalMs` | number | 24hours | 1 | Interval for checking connections that need resyncing. |
| `resyncConnectionPollingIntervalMs` | number | 1second | 1 | DB polling rate for connections that need resyncing. |
| `reindexRepoPollingIntervalMs` | number | 1second | 1 | DB polling rate for repos that should be reindexed. |
| `maxConnectionSyncJobConcurrency` | number | 8 | 1 | Concurrent connectionsync jobs. |
| `maxRepoIndexingJobConcurrency` | number | 8 | 1 | Concurrent repoindexing jobs. |
| `maxRepoGarbageCollectionJobConcurrency` | number | 8 | 1 | Concurrent repogarbagecollection jobs. |
| `repoGarbageCollectionGracePeriodMs` | number | 10seconds | 1 | Grace period to avoid deleting shards while loading. |
| `repoIndexTimeoutMs` | number | 2hours | 1 | Timeout for a single repoindexing run. |
| `enablePublicAccess` **(deprecated)** | boolean | false | — | Use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead. |
| `experiment_repoDrivenPermissionSyncIntervalMs` | number | 24hours | 1 | Interval at which the repo permission syncer should run. |
| `experiment_userDrivenPermissionSyncIntervalMs` | number | 24hours | 1 | Interval at which the user permission syncer should run. |

View file

@ -19,7 +19,7 @@ The following environment variables allow you to configure your Sourcebot deploy
| `DATA_CACHE_DIR` | `$DATA_DIR/.sourcebot` | <p>The root data directory in which all data written to disk by Sourcebot will be located.</p> | | `DATA_CACHE_DIR` | `$DATA_DIR/.sourcebot` | <p>The root data directory in which all data written to disk by Sourcebot will be located.</p> |
| `DATA_DIR` | `/data` | <p>The directory within the container to store all persistent data. Typically, this directory will be volume mapped such that data is persisted across container restarts (e.g., `docker run -v $(pwd):/data`)</p> | | `DATA_DIR` | `/data` | <p>The directory within the container to store all persistent data. Typically, this directory will be volume mapped such that data is persisted across container restarts (e.g., `docker run -v $(pwd):/data`)</p> |
| `DATABASE_DATA_DIR` | `$DATA_CACHE_DIR/db` | <p>The data directory for the default Postgres database.</p> | | `DATABASE_DATA_DIR` | `$DATA_CACHE_DIR/db` | <p>The data directory for the default Postgres database.</p> |
| `DATABASE_URL` | `postgresql://postgres@ localhost:5432/sourcebot` | <p>Connection string of your Postgres database. By default, a Postgres database is automatically provisioned at startup within the container.</p><p>If you'd like to use a non-default schema, you can provide it as a parameter in the database url </p> | | `DATABASE_URL` | `postgresql://postgres@ localhost:5432/sourcebot` | <p>Connection string of your Postgres database. By default, a Postgres database is automatically provisioned at startup within the container.</p><p>If you'd like to use a non-default schema, you can provide it as a parameter in the database url.</p><p>You can also use `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` to construct the database url.</p> |
| `EMAIL_FROM_ADDRESS` | `-` | <p>The email address that transactional emails will be sent from. See [this doc](/docs/configuration/transactional-emails) for more info.</p> | | `EMAIL_FROM_ADDRESS` | `-` | <p>The email address that transactional emails will be sent from. See [this doc](/docs/configuration/transactional-emails) for more info.</p> |
| `FORCE_ENABLE_ANONYMOUS_ACCESS` | `false` | <p>When enabled, [anonymous access](/docs/configuration/auth/access-settings#anonymous-access) to the organization will always be enabled</p> | `FORCE_ENABLE_ANONYMOUS_ACCESS` | `false` | <p>When enabled, [anonymous access](/docs/configuration/auth/access-settings#anonymous-access) to the organization will always be enabled</p>
| `REDIS_DATA_DIR` | `$DATA_CACHE_DIR/redis` | <p>The data directory for the default Redis instance.</p> | | `REDIS_DATA_DIR` | `$DATA_CACHE_DIR/redis` | <p>The data directory for the default Redis instance.</p> |
@ -59,6 +59,7 @@ The following environment variables allow you to configure your Sourcebot deploy
| `AUTH_EE_OKTA_ISSUER` | `-` | <p>The issuer URL for Okta SSO authentication.</p> | | `AUTH_EE_OKTA_ISSUER` | `-` | <p>The issuer URL for Okta SSO authentication.</p> |
| `AUTH_EE_GCP_IAP_ENABLED` | `false` | <p>When enabled, allows Sourcebot to automatically register/login from a successful GCP IAP redirect</p> | | `AUTH_EE_GCP_IAP_ENABLED` | `false` | <p>When enabled, allows Sourcebot to automatically register/login from a successful GCP IAP redirect</p> |
| `AUTH_EE_GCP_IAP_AUDIENCE` | - | <p>The GCP IAP audience to use when verifying JWT tokens. Must be set to enable GCP IAP JIT provisioning</p> | | `AUTH_EE_GCP_IAP_AUDIENCE` | - | <p>The GCP IAP audience to use when verifying JWT tokens. Must be set to enable GCP IAP JIT provisioning</p> |
| `EXPERIMENT_EE_PERMISSION_SYNC_ENABLED` | `false` | <p>Enables [permission syncing](/docs/features/permission-syncing).</p> |
### Review Agent Environment Variables ### Review Agent Environment Variables

View file

@ -0,0 +1,373 @@
---
title: Language Model Providers
sidebarTitle: Language model providers
---
import LanguageModelSchema from '/snippets/schemas/v3/languageModel.schema.mdx'
<Note>
Looking to self-host your own model? Check out the [OpenAI Compatible](#openai-compatible) provider.
</Note>
To use [Ask Sourcebot](/docs/features/ask) you must define at least one Language Model Provider. These providers are defined within the [config file](/docs/configuration/config-file) you
provide Sourcebot.
```json wrap icon="code" Example config with language model provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
// 1. Google Vertex config for Gemini 2.5 Pro
{
"provider": "google-vertex",
"model": "gemini-2.5-pro",
"displayName": "Gemini 2.5 Pro",
"project": "sourcebot",
"credentials": {
"env": "GOOGLE_APPLICATION_CREDENTIALS"
}
},
// 2. OpenAI config for o3
{
"provider": "openai",
"model": "o3",
"displayName": "o3",
"token": {
"env": "OPENAI_API_KEY"
}
}
]
}
```
# Supported Providers
Sourcebot uses the [Vercel AI SDK](https://ai-sdk.dev/docs/introduction), so it can integrate with any provider the SDK supports. If you don't see your provider below please submit
a [feature request](https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md).
For a detailed description of all the providers, please refer to the [schema](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/languageModel.json).
<Note>Any parameter defined using `env` will read the value from the corresponding environment variable you provide Sourcebot</Note>
### Amazon Bedrock
[Vercel AI SDK Amazon Bedrock Docs](https://ai-sdk.dev/providers/ai-sdk-providers/amazon-bedrock)
```json wrap icon="code" Example config with Amazon Bedrock provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "amazon-bedrock",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"accessKeyId": {
"env": "AWS_ACCESS_KEY_ID"
},
"accessKeySecret": {
"env": "AWS_SECRET_ACCESS_KEY"
},
"sessionToken": {
"env": "AWS_SESSION_TOKEN"
},
"region": "YOUR_REGION_HERE", // defaults to the AWS_REGION env var if not set
"baseUrl": "OPTIONAL_BASE_URL"
}
]
}
```
### Anthropic
[Vercel AI SDK Anthropic Docs](https://ai-sdk.dev/providers/ai-sdk-providers/anthropic)
```json wrap icon="code" Example config with Anthropic provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "anthropic",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"token": {
"env": "ANTHROPIC_API_KEY"
},
"baseUrl": "OPTIONAL_BASE_URL"
}
]
}
```
### Azure OpenAI
[Vercel AI SDK Azure OpenAI Docs](https://ai-sdk.dev/providers/ai-sdk-providers/azure)
```json wrap icon="code" Example config with Azure AI provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "azure",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"resourceName": "YOUR_RESOURCE_NAME", // defaults to the AZURE_RESOURCE_NAME env var if not set
"apiVersion": "OPTIONAL_API_VERSION", // defailts to 'preview' if not set
"token": {
"env": "AZURE_API_KEY"
},
"baseUrl": "OPTIONAL_BASE_URL"
}
]
}
```
### Deepseek
[Vercel AI SDK Deepseek Docs](https://ai-sdk.dev/providers/ai-sdk-providers/deepseek)
```json wrap icon="code" Example config with Deepseek provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "deepseek",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"token": {
"env": "DEEPSEEK_API_KEY"
},
"baseUrl": "OPTIONAL_BASE_URL"
}
]
}
```
### Google Generative AI
[Vercel AI SDK Google Generative AI Docs](https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai)
```json wrap icon="code" Example config with Google Generative AI provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "google-generative-ai",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"token": {
"env": "GOOGLE_GENERATIVE_AI_API_KEY"
},
"baseUrl": "OPTIONAL_BASE_URL"
}
]
}
```
### Google Vertex
<Note>If you're using an Anthropic model on Google Vertex, you must define a [Google Vertex Anthropic](#google-vertex-anthropic) provider instead</Note>
<Note>The `credentials` paramater here expects a **path** to a [credentials](https://console.cloud.google.com/apis/credentials) file. This file **must be in a volume mounted by Sourcebot** for it to be readable.</Note>
[Vercel AI SDK Google Vertex AI Docs](https://ai-sdk.dev/providers/ai-sdk-providers/google-vertex)
```json wrap icon="code" Example config with Google Vertex provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "google-vertex",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"project": "YOUR_PROJECT_ID", // defaults to the GOOGLE_VERTEX_PROJECT env var if not set
"region": "YOUR_REGION_HERE", // defaults to the GOOGLE_VERTEX_REGION env var if not set
"credentials": {
"env": "GOOGLE_APPLICATION_CREDENTIALS"
},
"baseUrl": "OPTIONAL_BASE_URL"
}
]
}
```
### Google Vertex Anthropic
<Note>The `credentials` paramater here expects a **path** to a [credentials](https://console.cloud.google.com/apis/credentials) file. This file **must be in a volume mounted by Sourcebot** for it to be readable.</Note>
[Vercel AI SDK Google Vertex Anthropic Docs](https://ai-sdk.dev/providers/ai-sdk-providers/google-vertex#google-vertex-anthropic-provider-usage)
```json wrap icon="code" Example config with Google Vertex Anthropic provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "google-vertex-anthropic",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"project": "YOUR_PROJECT_ID", // defaults to the GOOGLE_VERTEX_PROJECT env var if not set
"region": "YOUR_REGION_HERE", // defaults to the GOOGLE_VERTEX_REGION env var if not set
"credentials": {
"env": "GOOGLE_APPLICATION_CREDENTIALS"
},
"baseUrl": "OPTIONAL_BASE_URL"
}
]
}
```
### Mistral
[Vercel AI SDK Mistral Docs](https://ai-sdk.dev/providers/ai-sdk-providers/mistral)
```json wrap icon="code" Example config with Mistral provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "mistral",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"token": {
"env": "MISTRAL_API_KEY"
},
"baseUrl": "OPTIONAL_BASE_URL"
}
]
}
```
### OpenAI
[Vercel AI SDK OpenAI Docs](https://ai-sdk.dev/providers/ai-sdk-providers/openai)
```json wrap icon="code" Example config with OpenAI provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "openai",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"token": {
"env": "OPENAI_API_KEY"
},
"baseUrl": "OPTIONAL_BASE_URL",
"reasoningEffort": "OPTIONAL_REASONING_EFFORT" // defaults to "medium"
}
]
}
```
### OpenAI Compatible
[Vercel AI SDK OpenAI Compatible Docs](https://ai-sdk.dev/providers/openai-compatible-providers)
The OpenAI compatible provider allows you to use any model that is compatible with the OpenAI [Chat Completions API](https://github.com/ollama/ollama/blob/main/docs/openai.md). This includes self-hosted tools like [Ollama](https://ollama.ai/) and [llama.cpp](https://github.com/ggerganov/llama.cpp).
```json wrap icon="code" Example config with OpenAI Compatible provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "openai-compatible",
"baseUrl": "BASE_URL_HERE",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"token": {
"env": "OPTIONAL_API_KEY"
},
// Optional query parameters can be passed in the request url as:
"queryParams": {
// raw string values
"optional-query-param": "foo",
// or as environment variables
"optional-query-param-secret": {
"env": "MY_SECRET_ENV_VAR"
}
}
}
]
}
```
<Accordion title="Troubleshooting">
- When using [llama.cpp](https://github.com/ggml-org/llama.cpp), if you hit "Failed after 3 attempts. Last error: tools param requires --jinja flag", add the `--jinja` flag to your `llama-server` command.
</Accordion>
### OpenRouter
[Vercel AI SDK OpenRouter Docs](https://ai-sdk.dev/providers/community-providers/openrouter)
```json wrap icon="code" Example config with OpenRouter provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "openai",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"token": {
"env": "OPENROUTER_API_KEY"
},
"baseUrl": "OPTIONAL_BASE_URL"
}
]
}
```
### xAI
[Vercel AI SDK xAI Docs](https://ai-sdk.dev/providers/ai-sdk-providers/xai)
```json wrap icon="code" Example config with xAI provider
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
"provider": "xai",
"model": "YOUR_MODEL_HERE",
"displayName": "OPTIONAL_DISPLAY_NAME",
"token": {
"env": "XAI_API_KEY"
},
"baseUrl": "OPTIONAL_BASE_URL"
}
]
}
```
# Custom headers
You can pass custom headers to the language model provider by using the `headers` parameter. Header values can either be a string or a environment variable. Headers are supported for all providers.
```json wrap icon="code" Example config with custom headers
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"models": [
{
// ... provider, model, displayName, etc...
// Key-value pairs of headers
"headers": {
// Header values can be passed as a environment variable...
"my-secret-header": {
"env": "MY_SECRET_HEADER_ENV_VAR"
},
// ... or directly as a string.
"my-non-secret-header": "plaintextvalue"
}
}
]
}
```
# Schema reference
<Accordion title="Reference">
[schemas/v3/languageModel.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/languageModel.json)
<LanguageModelSchema />
</Accordion>

View file

@ -0,0 +1,147 @@
---
title: Linking code from Azure Devops Cloud
sidebarTitle: Azure Devops Cloud
icon: https://www.svgrepo.com/show/448307/azure-devops.svg
---
import AzureDevopsSchema from '/snippets/schemas/v3/azuredevops.schema.mdx'
If you're not familiar with Sourcebot [connections](/docs/connections/overview), please read that overview first.
## Examples
<AccordionGroup>
<Accordion title="Sync individual repos">
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
"repos": [
"organizationName/projectName/repoName",
"organizationName/projectName/repoName2
]
}
```
</Accordion>
<Accordion title="Sync all repos in a organization">
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
"orgs": [
"organizationName",
"organizationName2
]
}
```
</Accordion>
<Accordion title="Sync all repos in a project">
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
"projects": [
"organizationName/projectName",
"organizationName/projectName2"
]
}
```
</Accordion>
<Accordion title="Exclude repos from syncing">
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
// Include all repos in my-org...
"orgs": [
"my-org"
],
// ...except:
"exclude": {
// repos that are disabled
"disabled": true,
// repos that match these glob patterns
"repos": [
"reposToExclude*"
],
// projects that match these glob patterns
"projects": [
"projectstoExclude*"
]
// repos less than the defined min OR larger than the defined max
"size": {
// repos that are less than 1MB (in bytes)...
"min": 1048576,
// or repos greater than 100MB (in bytes)
"max": 104857600
}
}
}
```
</Accordion>
</AccordionGroup>
## Authenticating with Azure Devops Cloud
Azure Devops Cloud requires you to provide a PAT in order to index your repositories. To learn how to create PAT, check out the [Azure Devops docs](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows).
Sourcebot needs the `Read` access for the `Code` scope in order to find and clone your repos.
Next, provide the access token via the `token` property, either as an environment variable or a secret:
<Tabs>
<Tab title="Environment Variable">
1. Add the `token` property to your connection config:
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
"token": {
// note: this env var can be named anything. It
// doesn't need to be `ADO_TOKEN`.
"env": "ADO_TOKEN"
}
// .. rest of config ..
}
```
2. Pass this environment variable each time you run Sourcebot:
```bash
docker run \
-e ADO_TOKEN=<PAT> \
/* additional args */ \
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs>
## Schema reference
<Accordion title="Reference">
[schemas/v3/azuredevops.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/azuredevops.json)
<AzureDevopsSchema />
</Accordion>

View file

@ -0,0 +1,161 @@
---
title: Linking code from Azure Devops Server
sidebarTitle: Azure Devops Server
icon: https://www.svgrepo.com/show/448307/azure-devops.svg
---
import AzureDevopsSchema from '/snippets/schemas/v3/azuredevops.schema.mdx'
If you're not familiar with Sourcebot [connections](/docs/connections/overview), please read that overview first.
## Examples
<AccordionGroup>
<Accordion title="Enable TFS path support">
This is required if you're using an older version of ADO Server which has `/tfs` in the repo paths.
```json
{
"type": "azuredevops",
"deploymentType": "server",
"useTfsPath": true,
"repos": [
"organizationName/projectName/repoName",
"organizationName/projectName/repoName2
]
}
```
</Accordion>
<Accordion title="Sync individual repos">
```json
{
"type": "azuredevops",
"deploymentType": "server",
"repos": [
"organizationName/projectName/repoName",
"organizationName/projectName/repoName2
]
}
```
</Accordion>
<Accordion title="Sync all repos in a collection">
```json
{
"type": "azuredevops",
"deploymentType": "server",
"orgs": [
"collectionName",
"collectionName2"
]
}
```
</Accordion>
<Accordion title="Sync all repos in a project">
```json
{
"type": "azuredevops",
"deploymentType": "server",
"projects": [
"collectionName/projectName",
"collectionName/projectName2"
]
}
```
</Accordion>
<Accordion title="Exclude repos from syncing">
```json
{
"type": "azuredevops",
"deploymentType": "server",
// Include all repos in my-org...
"orgs": [
"my-org"
],
// ...except:
"exclude": {
// repos that are disabled
"disabled": true,
// repos that match these glob patterns
"repos": [
"reposToExclude*"
],
// projects that match these glob patterns
"projects": [
"projectstoExclude*"
]
// repos less than the defined min OR larger than the defined max
"size": {
// repos that are less than 1MB (in bytes)...
"min": 1048576,
// or repos greater than 100MB (in bytes)
"max": 104857600
}
}
}
```
</Accordion>
</AccordionGroup>
## Authenticating with Azure Devops Server
Azure Devops Server requires you to provide a PAT in order to index your repositories. To learn how to create PAT, check out the [Azure Devops docs](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows).
Sourcebot needs the `Read` access for the `Code` scope in order to find and clone your repos.
Next, provide the access token via the `token` property, either as an environment variable or a secret:
<Tabs>
<Tab title="Environment Variable">
1. Add the `token` property to your connection config:
```json
{
"type": "azuredevops",
"deploymentType": "server",
"token": {
// note: this env var can be named anything. It
// doesn't need to be `ADO_TOKEN`.
"env": "ADO_TOKEN"
}
// .. rest of config ..
}
```
2. Pass this environment variable each time you run Sourcebot:
```bash
docker run \
-e ADO_TOKEN=<PAT> \
/* additional args */ \
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "azuredevops",
"deploymentType": "server",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs>
## Schema reference
<Accordion title="Reference">
[schemas/v3/azuredevops.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/azuredevops.json)
<AzureDevopsSchema />
</Accordion>

View file

@ -12,6 +12,8 @@ import BitbucketSchema from '/snippets/schemas/v3/bitbucket.schema.mdx'
Looking for docs on Bitbucket Data Center? See [this doc](/docs/connections/bitbucket-data-center). Looking for docs on Bitbucket Data Center? See [this doc](/docs/connections/bitbucket-data-center).
</Note> </Note>
If you're not familiar with Sourcebot [connections](/docs/connections/overview), please read that overview first.
## Examples ## Examples
<AccordionGroup> <AccordionGroup>

View file

@ -12,6 +12,8 @@ import BitbucketSchema from '/snippets/schemas/v3/bitbucket.schema.mdx'
Looking for docs on Bitbucket Cloud? See [this doc](/docs/connections/bitbucket-cloud). Looking for docs on Bitbucket Cloud? See [this doc](/docs/connections/bitbucket-cloud).
</Note> </Note>
If you're not familiar with Sourcebot [connections](/docs/connections/overview), please read that overview first.
## Examples ## Examples
<AccordionGroup> <AccordionGroup>

View file

@ -7,6 +7,8 @@ import GenericGitHost from '/snippets/schemas/v3/genericGitHost.schema.mdx'
Sourcebot can sync code from any Git host (by clone url). This is helpful when you want to search code that not in a [supported code host](/docs/connections/overview#supported-code-hosts). Sourcebot can sync code from any Git host (by clone url). This is helpful when you want to search code that not in a [supported code host](/docs/connections/overview#supported-code-hosts).
If you're not familiar with Sourcebot [connections](/docs/connections/overview), please read that overview first.
## Getting Started ## Getting Started
To connect to a Git host, create a new [connection](/docs/connections/overview) with type `git` and specify the clone url in the `url` property. For example: To connect to a Git host, create a new [connection](/docs/connections/overview) with type `git` and specify the clone url in the `url` property. For example:

View file

@ -6,10 +6,12 @@ icon: crow
import GerritSchema from '/snippets/schemas/v3/gerrit.schema.mdx' import GerritSchema from '/snippets/schemas/v3/gerrit.schema.mdx'
<Note>Authenticating with Gerrit is currently not supported. If you need this capability, please raise a [feature request](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).</Note> <Note>Authenticating with Gerrit is currently not supported. If you need this capability, please raise a [feature request](https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md).</Note>
Sourcebot can sync code from self-hosted gerrit instances. Sourcebot can sync code from self-hosted gerrit instances.
If you're not familiar with Sourcebot [connections](/docs/connections/overview), please read that overview first.
## Connecting to a Gerrit instance ## Connecting to a Gerrit instance
To connect to a gerrit instance, provide the `url` property to your config: To connect to a gerrit instance, provide the `url` property to your config:

View file

@ -8,6 +8,8 @@ import GiteaSchema from '/snippets/schemas/v3/gitea.schema.mdx'
Sourcebot can sync code from Gitea Cloud, and self-hosted. Sourcebot can sync code from Gitea Cloud, and self-hosted.
If you're not familiar with Sourcebot [connections](/docs/connections/overview), please read that overview first.
## Examples ## Examples
<AccordionGroup> <AccordionGroup>
@ -83,7 +85,6 @@ Next, provide the access token via the `token` property, either as an environmen
<Tabs> <Tabs>
<Tab title="Environment Variable"> <Tab title="Environment Variable">
<Note>Environment variables are only supported in a [declarative config](/docs/configuration/declarative-config) and cannot be used in the web UI.</Note>
1. Add the `token` property to your connection config: 1. Add the `token` property to your connection config:
```json ```json

View file

@ -8,6 +8,8 @@ import GitHubSchema from '/snippets/schemas/v3/github.schema.mdx'
Sourcebot can sync code from GitHub.com, GitHub Enterprise Server, and GitHub Enterprise Cloud. Sourcebot can sync code from GitHub.com, GitHub Enterprise Server, and GitHub Enterprise Cloud.
If you're not familiar with Sourcebot [connections](/docs/connections/overview), please read that overview first.
## Examples ## Examples
<AccordionGroup> <AccordionGroup>
@ -130,7 +132,6 @@ Next, provide the access token via the `token` property, either as an environmen
<Tabs> <Tabs>
<Tab title="Environment Variable"> <Tab title="Environment Variable">
<Note>Environment variables are only supported in a [declarative config](/docs/configuration/declarative-config) and cannot be used in the web UI.</Note>
1. Add the `token` property to your connection config: 1. Add the `token` property to your connection config:
```json ```json
@ -196,3 +197,7 @@ To connect to a GitHub host other than `github.com`, provide the `url` property
<GitHubSchema /> <GitHubSchema />
</Accordion> </Accordion>
## See also
- [Syncing GitHub Access permissions to Sourcebot](/docs/features/permission-syncing#github)

View file

@ -8,6 +8,7 @@ import GitLabSchema from '/snippets/schemas/v3/gitlab.schema.mdx'
Sourcebot can sync code from GitLab.com, Self Managed (CE & EE), and Dedicated. Sourcebot can sync code from GitLab.com, Self Managed (CE & EE), and Dedicated.
If you're not familiar with Sourcebot [connections](/docs/connections/overview), please read that overview first.
## Examples ## Examples
@ -89,6 +90,8 @@ Sourcebot can sync code from GitLab.com, Self Managed (CE & EE), and Dedicated.
"archived": true, "archived": true,
// projects that are forks // projects that are forks
"forks": true, "forks": true,
// projects that are owned by users (not groups)
"userOwnedProjects": true,
// projects that match these glob patterns // projects that match these glob patterns
"projects": [ "projects": [
"my-group/foo/**", "my-group/foo/**",
@ -117,7 +120,6 @@ Next, provide the PAT via the `token` property, either as an environment variabl
<Tabs> <Tabs>
<Tab title="Environment Variable"> <Tab title="Environment Variable">
<Note>Environment variables are only supported in a [declarative config](/docs/configuration/declarative-config) and cannot be used in the web UI.</Note>
1. Add the `token` property to your connection config: 1. Add the `token` property to your connection config:
```json ```json

View file

@ -7,6 +7,8 @@ import GenericGitHost from '/snippets/schemas/v3/genericGitHost.schema.mdx'
Sourcebot can sync code from generic git repositories stored in a local directory. This can be helpful in scenarios where you already have a large number of repos already checked out. Local repositories are treated as **read-only**, meaning Sourcebot will **not** `git fetch` new revisions. Sourcebot can sync code from generic git repositories stored in a local directory. This can be helpful in scenarios where you already have a large number of repos already checked out. Local repositories are treated as **read-only**, meaning Sourcebot will **not** `git fetch` new revisions.
If you're not familiar with Sourcebot [connections](/docs/connections/overview), please read that overview first.
## Getting Started ## Getting Started
<Warning> <Warning>

View file

@ -6,20 +6,7 @@ sidebarTitle: Overview
import SupportedPlatforms from '/snippets/platform-support.mdx' import SupportedPlatforms from '/snippets/platform-support.mdx'
import ConfigSchema from '/snippets/schemas/v3/index.schema.mdx' import ConfigSchema from '/snippets/schemas/v3/index.schema.mdx'
To index your code with Sourcebot, you must provide a configuration file. When running Sourcebot, this file must be mounted in a volume that is accessible to the container, with its A **connection** represents Sourcebot's link to a code host platform (GitHub, GitLab, etc). Connections are defined within the [config file](/docs/configuration/config-file) you provide Sourcebot.
path specified in the `CONFIG_PATH` environment variable. For example:
```bash icon="terminal" Passing in a CONFIG_PATH to Sourcebot
docker run \
-v $(pwd)/config.json:/data/config.json \
-e CONFIG_PATH=/data/config.json \
... \ # other config
ghcr.io/sourcebot-dev/sourcebot:latest
```
## Config Schema
The configuration file defines a set of **connections**. A connection in Sourcebot represents a link to a code host (such as GitHub, GitLab, Bitbucket, etc.).
Each connection defines how Sourcebot should authenticate and interact with a particular host, and which repositories to sync and index from that host. Connections are uniquely identified by their name. Each connection defines how Sourcebot should authenticate and interact with a particular host, and which repositories to sync and index from that host. Connections are uniquely identified by their name.
@ -55,10 +42,11 @@ Each connection defines how Sourcebot should authenticate and interact with a pa
Configuration files must conform to the [JSON schema](#schema-reference). Configuration files must conform to the [JSON schema](#schema-reference).
## Config Syncing ## Connection Syncing
Sourcebot performs syncing in the background. Syncing consists of two steps:
1. Fetch the latest changes from `HEAD` (and any [additional branches](/docs/features/search/multi-branch-indexing)) from the code host. When a connection is first discovered, or the `resyncConnectionIntervalMs` [setting](/docs/configuration/config-file#settings) has exceeded, the connection will be synced. This consists of:
2. Re-indexes the repository. 1. Fetching the latest changes from `HEAD` (and any [additional branches](/docs/features/search/multi-branch-indexing)) from the code host.
2. Re-indexing the repository.
This is processed in a [job queue](/docs/overview#architecture), and is parallelized across multiple worker processes. Jobs will take longer to complete the first time a repository is synced, or when a diff is large. This is processed in a [job queue](/docs/overview#architecture), and is parallelized across multiple worker processes. Jobs will take longer to complete the first time a repository is synced, or when a diff is large.
@ -79,7 +67,7 @@ To learn more about how to create a connection for a specific code host, check o
<SupportedPlatforms /> <SupportedPlatforms />
<Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas).</Note> <Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md).</Note>
## Schema reference ## Schema reference

View file

@ -1,8 +1,8 @@
--- ---
sidebarTitle: Request another host sidebarTitle: Request another host
url: https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas url: https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md
title: Request another code host title: Request another code host
icon: plus icon: plus
--- ---
Is your code host not supported? Please open a [feature request](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas). Is your code host not supported? Please open a [feature request](https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md).

View file

@ -7,7 +7,7 @@ import SupportedPlatforms from '/snippets/platform-support.mdx'
The following guide will walk you through the steps to deploy Sourcebot on your own infrastructure. Sourcebot is distributed as a [single docker container](/docs/overview#architecture) that can be deployed to a k8s cluster, a VM, or any platform that supports docker. The following guide will walk you through the steps to deploy Sourcebot on your own infrastructure. Sourcebot is distributed as a [single docker container](/docs/overview#architecture) that can be deployed to a k8s cluster, a VM, or any platform that supports docker.
<Note>Hit an issue? Please let us know on [GitHub discussions](https://github.com/sourcebot-dev/sourcebot/discussions/categories/support) or by [emailing us](mailto:team@sourcebot.dev).</Note> <Note>Hit an issue? Please let us know on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) or by [emailing us](mailto:team@sourcebot.dev).</Note>
<Steps> <Steps>
<Step title="Requirements"> <Step title="Requirements">
@ -32,7 +32,7 @@ The following guide will walk you through the steps to deploy Sourcebot on your
}' > config.json }' > config.json
``` ```
This config creates a single GitHub connection named `starter-connection` that specifies [Sourcebot](https://github.com/sourcebot-dev/sourcebot) as a repo to sync. [Learn more about the config file](/docs/connections/overview). This config creates a single GitHub connection named `starter-connection` that specifies [Sourcebot](https://github.com/sourcebot-dev/sourcebot) as a repo to sync. [Learn more about the config file](/docs/configuration/config-file).
</Step> </Step>
<Step title="Launch your instance"> <Step title="Launch your instance">
@ -65,25 +65,24 @@ The following guide will walk you through the steps to deploy Sourcebot on your
<Step title="Complete onboarding"> <Step title="Complete onboarding">
Navigate to `http://localhost:3000` and complete the onboarding flow. Navigate to `http://localhost:3000` and complete the onboarding flow.
<Note>
By default, only email / password authentication is enabled. [Learn more about authentication](/docs/configuration/auth/overview).
</Note>
</Step> </Step>
<Step title="Done"> <Step title="Done">
You're all set! You can now start searching - checkout the [syntax guide](/docs/features/search/syntax-reference) to learn more about how to search. You're all set! If you'd like to setup [Ask Sourcebot](/docs/features/ask/overview), configure a language model [provider](/docs/configuration/language-model-providers).
</Step> </Step>
</Steps> </Steps>
## Next steps ## Next steps
--- ---
<CardGroup cols={2}> <CardGroup cols={3}>
<Card title="Connecting your code" icon="code" href="/docs/connections/overview"> <Card title="Index your code" icon="code" href="/docs/connections/overview">
Learn more about how to connect your code to Sourcebot. Learn how to index your code using Sourcebot
</Card> </Card>
<Card title="Setup other authentication providers" icon="lock" href="/docs/configuration/auth/overview"> <Card title="Language models" icon="brain" href="/docs/configuration/language-model-providers">
Learn how to configure language model providers to start using [Ask Sourcebot](/docs/features/ask/overview)
</Card>
<Card title="Authentication" icon="lock" href="/docs/configuration/auth/overview">
Learn more about how to setup SSO, email codes, and other authentication providers. Learn more about how to setup SSO, email codes, and other authentication providers.
</Card> </Card>
</CardGroup> </CardGroup>

View file

@ -3,9 +3,9 @@ title: "Agents Overview"
sidebarTitle: "Overview" sidebarTitle: "Overview"
--- ---
<Warning> import ExperimentalFeatureWarning from '/snippets/experimental-feature-warning.mdx'
Agents are currently a experimental feature. Have an idea for an agent that we haven't built? Submit a [feature request](https://github.com/sourcebot-dev/sourcebot/discussions/categories/feature-requests) on our GitHub.
</Warning> <ExperimentalFeatureWarning />
Agents are automations that leverage the code indexed on Sourcebot to perform a specific task. Once you've setup Sourcebot, check out the Agents are automations that leverage the code indexed on Sourcebot to perform a specific task. Once you've setup Sourcebot, check out the
guides below to configure additional agents. guides below to configure additional agents.

View file

@ -10,7 +10,7 @@ codebase that the agent may fetch to perform the review.
This agent provides codebase-aware reviews for your PRs. For each diff, this agent fetches relevant context from Sourcebot and feeds it into an LLM for a detailed review of your changes. This agent provides codebase-aware reviews for your PRs. For each diff, this agent fetches relevant context from Sourcebot and feeds it into an LLM for a detailed review of your changes.
The AI Code Review Agent is [open source](https://github.com/sourcebot-dev/sourcebot/tree/main/packages/web/src/features/agents/review-agent) and packaged in [Sourcebot](https://github.com/sourcebot-dev/sourcebot). To get started using this agent, [deploy Sourcebot](/docs/deployment-guide) The AI Code Review Agent is [fair source](https://github.com/sourcebot-dev/sourcebot/tree/main/packages/web/src/features/agents/review-agent) and packaged in [Sourcebot](https://github.com/sourcebot-dev/sourcebot). To get started using this agent, [deploy Sourcebot](/docs/deployment-guide)
and then follow the configuration instructions below. and then follow the configuration instructions below.
![AI Code Review Agent Example](/images/review_agent_example.png) ![AI Code Review Agent Example](/images/review_agent_example.png)

View file

@ -0,0 +1,5 @@
---
sidebarTitle: Configure language models
url: /docs/configuration/language-model-providers
title: Configure Language Models
---

View file

@ -0,0 +1,56 @@
---
title: Overview
---
Ask Sourcebot gives you the ability to ask complex questions about your codebase in natural language.
It uses Sourcebots existing [code search](/docs/features/search/overview) and [navigation](/docs/features/code-navigation) tools to allow reasoning models to search your code,
follow code nav references, and provide an answer thats rich with inline citations and navigable code snippets.
<CardGroup>
<Card title="Configure language models" icon="robot" href="/docs/configuration/language-model-providers" horizontal="true">
Learn how to connect your language model to Sourcebot
</Card>
<Card title="Index repos" icon="book" href="/docs/connections/overview" horizontal="true">
Learn how to index your repos so you can ask questions about them
</Card>
<Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true">
Learn how to self-host Sourcebot in a few simple steps.
</Card>
<Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true">
Try Ask Sourcebot on our public demo instance.
</Card>
</CardGroup>
<video
autoPlay
muted
loop
playsInline
className="w-full aspect-video"
src="/images/ask_sourcebot_low_res.mp4"
></video>
# Why do we need another AI dev tool?
Existing AI dev tools (Cursor, Claude Code, Copilot) are great at generating code. However, we believe one of the hardest parts of being
a software engineer is **understanding code**.
In this domain, these tools fall short:
- You can only ask questions about the code you have checked out locally
- You get a wall of text that's difficult to parse, requiring you to go back and forth through different code snippets in the response
- The richness of the explanation is limited by the fact that you're in your IDE
We built Ask Sourcebot to address these problems. With Ask Sourcebot, you can:
- Ask questions about your teams entire codebase (even on repos you don't have locally)
- Easily parse the response with side-by-side citations and code navigation
- Share answers with your team to spread the knowledge
Being a web app is less convenient than being in your IDE, but it allows Sourcebot to provide responses in a richer UI that isn't constrained by the IDE.
We believe this experience of understanding your codebase is superior, and we hope you find it useful. We'd love to know what you think! Feel free to join the discussion on our
[GitHub](https://github.com/sourcebot-dev/sourcebot/discussions).
# Troubleshooting
- **Network timeouts**: If you are hitting generic "network error" message while the answer is streaming when Sourcebot is deployed in a production environment, it may be due to your load balancer or proxy not being configured to handle long-lived connections. The timeout should be configured to a sufficiently large value (e.g., 5 minutes).

View file

@ -0,0 +1,72 @@
---
title: "Permission syncing"
sidebarTitle: "Permission syncing"
tag: "experimental"
---
import LicenseKeyRequired from '/snippets/license-key-required.mdx'
import ExperimentalFeatureWarning from '/snippets/experimental-feature-warning.mdx'
<LicenseKeyRequired />
<ExperimentalFeatureWarning />
# Overview
Permission syncing allows you to sync Access Permission Lists (ACLs) from a code host to Sourcebot. When configured, users signed into Sourcebot (via the code host's OAuth provider) will only be able to access repositories that they have access to on the code host. Practically, this means:
- Code Search results will only include repositories that the user has access to.
- Code navigation results will only include repositories that the user has access to.
- Ask Sourcebot (and the underlying LLM) will only have access to repositories that the user has access to.
- File browsing is scoped to the repositories that the user has access to.
Permission syncing can be enabled by setting the `EXPERIMENT_EE_PERMISSION_SYNC_ENABLED` environment variable to `true`.
```bash
docker run \
-e EXPERIMENT_EE_PERMISSION_SYNC_ENABLED=true \
/* additional args */ \
ghcr.io/sourcebot-dev/sourcebot:latest
```
## Platform support
We are actively working on supporting more code hosts. If you'd like to see a specific code host supported, please [reach out](https://www.sourcebot.dev/contact).
| Platform | Permission syncing |
|:----------|------------------------------|
| [GitHub (GHEC & GHEC Server)](/docs/features/permission-syncing#github) | ✅ |
| GitLab | 🛑 |
| Bitbucket Cloud | 🛑 |
| Bitbucket Data Center | 🛑 |
| Gitea | 🛑 |
| Gerrit | 🛑 |
| Generic git host | 🛑 |
# Getting started
## GitHub
Prerequisite: [Add GitHub as an OAuth provider](/docs/configuration/auth/providers#github).
Permission syncing works with **GitHub.com**, **GitHub Enterprise Cloud**, and **GitHub Enterprise Server**. For organization-owned repositories, users that have **read-only** access (or above) via the following methods will have their access synced to Sourcebot:
- Outside collaborators
- Organization members that are direct collaborators
- Organization members with access through team memberships
- Organization members with access through default organization permissions
- Organization owners.
**Notes:**
- A GitHub OAuth provider must be configured to (1) correlate a Sourcebot user with a GitHub user, and (2) to list repositories that the user has access to for [User driven syncing](/docs/features/permission-syncing#how-it-works).
- OAuth tokens must assume the `repo` scope in order to use the [List repositories for the authenticated user API](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repositories-for-the-authenticated-user) during [User driven syncing](/docs/features/permission-syncing#how-it-works). Sourcebot **will only** use this token for **reads**.
# How it works
Permission syncing works by periodically syncing ACLs from the code host(s) to Sourcebot to build an internal mapping between Users and Repositories. This mapping is hydrated in two directions:
- **User driven** : fetches the list of all repositories that a given user has access to.
- **Repo driven** : fetches the list of all users that have access to a given repository.
User driven and repo driven syncing occurs every 24 hours by default. These intervals can be configured using the following settings in the [config file](/docs/configuration/config-file):
| Setting | Type | Default | Minimum |
|-------------------------------------------------|---------|------------|---------|
| `experiment_repoDrivenPermissionSyncIntervalMs` | number | 24 hours | 1 |
| `experiment_userDrivenPermissionSyncIntervalMs` | number | 24 hours | 1 |

View file

@ -12,7 +12,7 @@ By default, only the default branch of a repository is indexed and can be search
## Configuration ## Configuration
<Warning> <Warning>
Multi-branch indexing is currently limited to 64 branches and tags. If this limitation impacts your use-case, please [open a discussion](https://github.com/sourcebot-dev/sourcebot/discussions/categories/support). Multi-branch indexing is currently limited to 64 branches and tags. Please see [this issue](https://github.com/sourcebot-dev/sourcebot/issues/461) for more details.
</Warning> </Warning>
Multi-branch indexing is configured in the [connection](/docs/connections/overview) using the `revisions.branches` and `revisions.tags` arrays. Glob patterns are supported. For example: Multi-branch indexing is configured in the [connection](/docs/connections/overview) using the `revisions.branches` and `revisions.tags` arrays. Glob patterns are supported. For example:
@ -89,6 +89,6 @@ Additional info:
| Bitbucket Cloud | ✅ | | Bitbucket Cloud | ✅ |
| Bitbucket Data Center | ✅ | | Bitbucket Data Center | ✅ |
| Gitea | ✅ | | Gitea | ✅ |
| Gerrit | | | Gerrit | |
| Generic git host | ✅ | | Generic git host | ✅ |

View file

@ -0,0 +1,40 @@
---
title: Overview
---
Search across all your repos/branches across any code host platform. Blazingly fast, and supports regular expressions, repo/language search filters, boolean logic, and more.
<Accordion title="Key benefits">
- **Regex support:** Use regular expressions to find code with precision.
- **Query language:** Scope searches to specific files, repos, languages, symbol definitions and more using a rich [query language](/docs/features/search/syntax-reference).
- **Branch search:** Specify a list of branches to search across ([docs](/docs/features/search/multi-branch-indexing)).
- **Fast & scalable:** Sourcebot uses [trigram indexing](https://en.wikipedia.org/wiki/Trigram_search), allowing it to scale to massive codebases.
- **Syntax highlighting:** Syntax highlighting support for over [100+ languages](https://github.com/sourcebot-dev/sourcebot/blob/57724689303f351c279d37f45b6406f1d5d5d5ab/packages/web/src/lib/codemirrorLanguage.ts#L125).
- **Multi-repository:** Search across all of your repositories in a single search.
- **Search suggestions:** Get search suggestions as you craft your query.
- **Filter panel:** Filter results by repository or by language.
</Accordion>
<CardGroup>
<Card title="Index repos" icon="book" href="/docs/connections/overview" horizontal="true">
Learn how to index your repos so you can ask questions about them
</Card>
<Card title="Branches" icon="split" href="/docs/features/search/multi-branch-indexing" horizontal="true">
Learn how to index and search through your branches
</Card>
<Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true">
Learn how to self-host Sourcebot in a few simple steps.
</Card>
<Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true">
Try Sourcebot's code search on our public demo instance.
</Card>
</CardGroup>
<video
autoPlay
muted
loop
playsInline
className="w-full aspect-video"
src="https://framerusercontent.com/assets/cEqHNSLiMbNeG3bk5xheQWXmKqc.mp4"
></video>

View file

@ -7,7 +7,7 @@ sidebarTitle: License key
If you'd like a trial license, [reach out](https://www.sourcebot.dev/contact) and we'll send one over within 24 hours If you'd like a trial license, [reach out](https://www.sourcebot.dev/contact) and we'll send one over within 24 hours
</Note> </Note>
All core Sourcebot features are available in Sourcebot OSS (MIT Licensed) without any limits. Some additional features require a license key. See the [pricing page](https://www.sourcebot.dev/pricing) for more details. All core Sourcebot features are available [FSL licensed](https://github.com/sourcebot-dev/sourcebot/blob/main/LICENSE.md#functional-source-license-version-11-alv2-future-license) without any limits. Some additional features require a license key. See the [pricing page](https://www.sourcebot.dev/pricing) for more details.
## Activating a license key ## Activating a license key

View file

@ -2,7 +2,10 @@
title: "Overview" title: "Overview"
--- ---
[Sourcebot]((https://github.com/sourcebot-dev/sourcebot)) is an open-source, self-hosted code search tool. It allows you to search and navigate across millions of lines of code across several code host platforms. [Sourcebot](https://github.com/sourcebot-dev/sourcebot) is a self-hosted tool that helps you understand your codebase.
- [Code search](/docs/features/search/overview): Search and navigate across all your repos and branches, no matter where theyre hosted
- [Ask Sourcebot](/docs/features/ask): Ask questions about your codebase and have Sourcebot provide detailed answers grounded with inline citations
<CardGroup> <CardGroup>
<Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true"> <Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true">
@ -19,7 +22,7 @@ title: "Overview"
- **Self-hosted:** Deploy it in minutes using our official [docker container](https://github.com/sourcebot-dev/sourcebot/pkgs/container/sourcebot). All of your data stays on your machine. - **Self-hosted:** Deploy it in minutes using our official [docker container](https://github.com/sourcebot-dev/sourcebot/pkgs/container/sourcebot). All of your data stays on your machine.
- **Modern design:** Light/Dark mode, vim keybindings, keyboard shortcuts, syntax highlighting, etc. - **Modern design:** Light/Dark mode, vim keybindings, keyboard shortcuts, syntax highlighting, etc.
- **Scalable:** Scales to millions of lines of code. - **Scalable:** Scales to millions of lines of code.
- **Open-source:** Core features are MIT licensed. - **Fair-source:** Core features are [FSL licensed](https://github.com/sourcebot-dev/sourcebot/blob/main/LICENSE.md#functional-source-license-version-11-alv2-future-license).
</Accordion> </Accordion>
</AccordionGroup> </AccordionGroup>
@ -30,9 +33,28 @@ title: "Overview"
Find an overview of all Sourcebot features below. For details, see the individual documentation pages. Find an overview of all Sourcebot features below. For details, see the individual documentation pages.
</Info> </Info>
### Fast indexed based search ### Ask Sourcebot
Search across millions of lines of code instantly using Sourcebot's blazingly fast indexed search. Find exactly what you are looking for with regular expressions, search filters, boolean logic, and more. [Ask Sourcebot](/docs/features/ask) gives you the ability to ask complex questions about your codebase, and have Sourcebot provide detailed answers with inline citations.
<Accordion title="Key benefits">
- **Bring your own model:** [Configure](/docs/configuration/language-model-providers) to any language model you'd like
- **Inline citations:** Every answer Sourcebot provides is grounded with inline citations directly into your codebase
- **Mutli-repo:** Ask questions about any repository you have indexed on Sourcebot
</Accordion>
<video
autoPlay
muted
loop
playsInline
className="w-full aspect-video"
src="/images/ask_sourcebot_low_res.mp4"
></video>
### Code Search
Search across all your repos/branches across any code host platform. Blazingly fast, and supports regular expressions, repo/language search filters, boolean logic, and more.
<Accordion title="Key benefits"> <Accordion title="Key benefits">
- **Regex support:** Use regular expressions to find code with precision. - **Regex support:** Use regular expressions to find code with precision.
@ -71,7 +93,7 @@ Search across millions of lines of code instantly using Sourcebot's blazingly fa
loop loop
playsInline playsInline
className="w-full aspect-video" className="w-full aspect-video"
src="https://framerusercontent.com/assets/B9ZxrlsUeO9NJyzkKyvVV2KSU4.mp4" src="/images/code_nav.mp4"
></video> ></video>
@ -174,7 +196,7 @@ Sourcebot does not support horizontal scaling at this time, but it is on our roa
## License key ## License key
--- ---
Sourcebot's core features are available under an [MIT license](https://github.com/sourcebot-dev/sourcebot/blob/HEAD/LICENSE) without any limits. Some [additional features](/docs/license-key#feature-availability) such as SSO and code navigation require a [license key](/docs/license-key). Sourcebot's core features are available under an [FSL licensed](https://github.com/sourcebot-dev/sourcebot/blob/main/LICENSE.md#functional-source-license-version-11-alv2-future-license) without any limits. Some [additional features](/docs/license-key#feature-availability) such as SSO and code navigation require a [license key](/docs/license-key).
<CardGroup cols={2}> <CardGroup cols={2}>
<Card title="Pricing page" href="https://www.sourcebot.dev/pricing" /> <Card title="Pricing page" href="https://www.sourcebot.dev/pricing" />

View file

@ -78,7 +78,7 @@ If your deployment is dependent on these features, please [reach out](https://gi
After updating your configuration file, restart your Sourcebot deployment to pick up the new changes. After updating your configuration file, restart your Sourcebot deployment to pick up the new changes.
</Step> </Step>
<Step title="You're done!"> <Step title="You're done!">
Congrats, you've successfully migrated to v3! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub discussion](https://github.com/sourcebot-dev/sourcebot/discussions/categories/support) Congrats, you've successfully migrated to v3! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/6Fhp27x7Pb) or on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose).
</Step> </Step>
</Steps> </Steps>
@ -90,4 +90,4 @@ Some things to check:
- Make sure you have a name for each `connection`, and that the name only contains letters, digits, hyphens, or underscores - Make sure you have a name for each `connection`, and that the name only contains letters, digits, hyphens, or underscores
- Make sure each `connection` has a `type` field with a valid value (`gitlab`, `github`, `gitea`, `gerrit`) - Make sure each `connection` has a `type` field with a valid value (`gitlab`, `github`, `gitea`, `gerrit`)
Having troubles migrating from v2 to v3? Reach out to us on [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub discussion](https://github.com/sourcebot-dev/sourcebot/discussions/categories/support) and we'll try our best to help Having troubles migrating from v2 to v3? Reach out to us on [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help

View file

@ -40,7 +40,7 @@ Please note that the following features are no longer supported in v4:
</Step> </Step>
<Step title="You're done!"> <Step title="You're done!">
Congrats, you've successfully migrated to v4! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub discussion](https://github.com/sourcebot-dev/sourcebot/discussions/categories/support) Congrats, you've successfully migrated to v4! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose)
</Step> </Step>
</Steps> </Steps>
@ -58,4 +58,4 @@ to finish upgrading to v4 in single-tenant mode.
- If you're hitting issues with signing into your Sourcebot instance, make sure you're setting `AUTH_URL` correctly to your deployment domain (ex. `https://sourcebot.yourcompany.com`) - If you're hitting issues with signing into your Sourcebot instance, make sure you're setting `AUTH_URL` correctly to your deployment domain (ex. `https://sourcebot.yourcompany.com`)
Having troubles migrating from v3 to v4? Reach out to us on [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub discussion](https://github.com/sourcebot-dev/sourcebot/discussions/categories/support) and we'll try our best to help Having troubles migrating from v3 to v4? Reach out to us on [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help

3
docs/images/ado.svg Normal file
View file

@ -0,0 +1,3 @@
<svg viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" fill="none">
<path fill="currentColor" d="M15 3.622v8.512L11.5 15l-5.425-1.975v1.958L3.004 10.97l8.951.7V4.005L15 3.622zm-2.984.428L6.994 1v2.001L2.382 4.356 1 6.13v4.029l1.978.873V5.869l9.038-1.818z"/>
</svg>

After

Width:  |  Height:  |  Size: 272 B

Binary file not shown.

BIN
docs/images/code_nav.mp4 Normal file

Binary file not shown.

View file

@ -1,6 +1,5 @@
<Tabs> <Tabs>
<Tab title="Environment Variable"> <Tab title="Environment Variable">
<Note>Environment variables are only supported in a [declarative config](/docs/configuration/declarative-config) and cannot be used in the web UI.</Note>
1. Add the `token` and `user` (username associated with the app password you created) properties to your connection config: 1. Add the `token` and `user` (username associated with the app password you created) properties to your connection config:
```json ```json

View file

@ -1,6 +1,5 @@
<Tabs> <Tabs>
<Tab title="Environment Variable"> <Tab title="Environment Variable">
<Note>Environment variables are only supported in a [declarative config](/docs/configuration/declarative-config) and cannot be used in the web UI.</Note>
1. Add the `token` property to your connection config: 1. Add the `token` property to your connection config:
```json ```json

View file

@ -0,0 +1,4 @@
<Warning>
This is an experimental feature. Certain functionality may be incomplete and breaking changes may ship in non-major releases. Have feedback? Submit a [issue](https://github.com/sourcebot-dev/sourcebot/issues) on GitHub.
</Warning>

View file

@ -3,6 +3,43 @@
<Card horizontal title="GitLab" icon="gitlab" href="/docs/connections/gitlab" /> <Card horizontal title="GitLab" icon="gitlab" href="/docs/connections/gitlab" />
<Card horizontal title="Bitbucket Cloud" icon="bitbucket" href="/docs/connections/bitbucket-cloud" /> <Card horizontal title="Bitbucket Cloud" icon="bitbucket" href="/docs/connections/bitbucket-cloud" />
<Card horizontal title="Bitbucket Data Center" icon="bitbucket" href="/docs/connections/bitbucket-data-center" /> <Card horizontal title="Bitbucket Data Center" icon="bitbucket" href="/docs/connections/bitbucket-data-center" />
{/* Mintlify has a bug where linking to a file for the logo renders it with a white background, so we have to embed it directly */}
<Card
horizontal
title="Azure Dev Ops Cloud"
href="/docs/connections/ado-cloud"
icon={
<svg
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 16 16"
className="w-6 h-6 text-white"
>
<path
fill="currentColor"
d="M15 3.622v8.512L11.5 15l-5.425-1.975v1.958L3.004 10.97l8.951.7V4.005L15 3.622zm-2.984.428L6.994 1v2.001L2.382 4.356 1 6.13v4.029l1.978.873V5.869l9.038-1.818z"
/>
</svg>
}
/>
<Card
horizontal
title="Azure Dev Ops Server"
href="/docs/connections/ado-server"
icon={
<svg
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 16 16"
className="w-6 h-6 text-white"
>
<path
fill="currentColor"
d="M15 3.622v8.512L11.5 15l-5.425-1.975v1.958L3.004 10.97l8.951.7V4.005L15 3.622zm-2.984.428L6.994 1v2.001L2.382 4.356 1 6.13v4.029l1.978.873V5.869l9.038-1.818z"
/>
</svg>
}
/>
<Card horizontal title="Gitea" icon="mug-tea" href="/docs/connections/gitea" /> <Card horizontal title="Gitea" icon="mug-tea" href="/docs/connections/gitea" />
<Card horizontal title="Gerrit" icon="crow" href="/docs/connections/gerrit" /> <Card horizontal title="Gerrit" icon="crow" href="/docs/connections/gerrit" />
<Card horizontal title="Other Git hosts" icon="git-alt" href="/docs/connections/generic-git-host" /> <Card horizontal title="Other Git hosts" icon="git-alt" href="/docs/connections/generic-git-host" />

View file

@ -0,0 +1,206 @@
{/* THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! */}
```json
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "AzureDevOpsConnectionConfig",
"properties": {
"type": {
"const": "azuredevops",
"description": "Azure DevOps Configuration"
},
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
"type": "string",
"format": "url",
"default": "https://dev.azure.com",
"description": "The URL of the Azure DevOps host. For Azure DevOps Cloud, use https://dev.azure.com. For Azure DevOps Server, use your server URL.",
"examples": [
"https://dev.azure.com",
"https://azuredevops.example.com"
],
"pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
},
"deploymentType": {
"type": "string",
"enum": [
"cloud",
"server"
],
"description": "The type of Azure DevOps deployment"
},
"useTfsPath": {
"type": "boolean",
"default": false,
"description": "Use legacy TFS path format (/tfs) in API URLs. Required for older TFS installations (TFS 2018 and earlier). When true, API URLs will include /tfs in the path (e.g., https://server/tfs/collection/_apis/...)."
},
"orgs": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org"
]
],
"description": "List of organizations to sync with. For Cloud, this is the organization name. For Server, this is the collection name. All projects and repositories visible to the provided `token` will be synced, unless explicitly defined in the `exclude` property."
},
"projects": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+\\/[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org/my-project",
"my-collection/my-project"
]
],
"description": "List of specific projects to sync with. Expected to be formatted as '{orgName}/{projectName}' for Cloud or '{collectionName}/{projectName}' for Server."
},
"repos": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+\\/[\\w.-]+\\/[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org/my-project/my-repo"
]
],
"description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{projectName}/{repoName}'."
},
"exclude": {
"type": "object",
"properties": {
"disabled": {
"type": "boolean",
"default": false,
"description": "Exclude disabled repositories from syncing."
},
"repos": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "List of repositories to exclude from syncing. Glob patterns are supported."
},
"projects": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "List of projects to exclude from syncing. Glob patterns are supported."
},
"size": {
"type": "object",
"description": "Exclude repositories based on their size.",
"properties": {
"min": {
"type": "integer",
"description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing."
},
"max": {
"type": "integer",
"description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing."
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"token",
"deploymentType"
],
"additionalProperties": false
}
```

View file

@ -343,6 +343,11 @@
"default": false, "default": false,
"description": "Exclude archived projects from syncing." "description": "Exclude archived projects from syncing."
}, },
"userOwnedProjects": {
"type": "boolean",
"default": false,
"description": "Exclude user-owned projects from syncing."
},
"projects": { "projects": {
"type": "array", "type": "array",
"items": { "items": {
@ -638,6 +643,47 @@
} }
}, },
"additionalProperties": false "additionalProperties": false
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -823,6 +869,209 @@
}, },
"additionalProperties": false "additionalProperties": false
}, },
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "AzureDevOpsConnectionConfig",
"properties": {
"type": {
"const": "azuredevops",
"description": "Azure DevOps Configuration"
},
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
"type": "string",
"format": "url",
"default": "https://dev.azure.com",
"description": "The URL of the Azure DevOps host. For Azure DevOps Cloud, use https://dev.azure.com. For Azure DevOps Server, use your server URL.",
"examples": [
"https://dev.azure.com",
"https://azuredevops.example.com"
],
"pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
},
"deploymentType": {
"type": "string",
"enum": [
"cloud",
"server"
],
"description": "The type of Azure DevOps deployment"
},
"useTfsPath": {
"type": "boolean",
"default": false,
"description": "Use legacy TFS path format (/tfs) in API URLs. Required for older TFS installations (TFS 2018 and earlier). When true, API URLs will include /tfs in the path (e.g., https://server/tfs/collection/_apis/...)."
},
"orgs": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org"
]
],
"description": "List of organizations to sync with. For Cloud, this is the organization name. For Server, this is the collection name. All projects and repositories visible to the provided `token` will be synced, unless explicitly defined in the `exclude` property."
},
"projects": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+\\/[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org/my-project",
"my-collection/my-project"
]
],
"description": "List of specific projects to sync with. Expected to be formatted as '{orgName}/{projectName}' for Cloud or '{collectionName}/{projectName}' for Server."
},
"repos": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+\\/[\\w.-]+\\/[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org/my-project/my-repo"
]
],
"description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{projectName}/{repoName}'."
},
"exclude": {
"type": "object",
"properties": {
"disabled": {
"type": "boolean",
"default": false,
"description": "Exclude disabled repositories from syncing."
},
"repos": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "List of repositories to exclude from syncing. Glob patterns are supported."
},
"projects": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "List of projects to exclude from syncing. Glob patterns are supported."
},
"size": {
"type": "object",
"description": "Exclude repositories based on their size.",
"properties": {
"min": {
"type": "integer",
"description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing."
},
"max": {
"type": "integer",
"description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing."
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"token",
"deploymentType"
],
"additionalProperties": false
},
{ {
"$schema": "http://json-schema.org/draft-07/schema#", "$schema": "http://json-schema.org/draft-07/schema#",
"type": "object", "type": "object",

View file

@ -59,6 +59,47 @@
} }
}, },
"additionalProperties": false "additionalProperties": false
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [

View file

@ -126,6 +126,11 @@
"default": false, "default": false,
"description": "Exclude archived projects from syncing." "description": "Exclude archived projects from syncing."
}, },
"userOwnedProjects": {
"type": "boolean",
"default": false,
"description": "Exclude user-owned projects from syncing."
},
"projects": { "projects": {
"type": "array", "type": "array",
"items": { "items": {

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -19,6 +19,13 @@
] ]
] ]
}, },
"includeConnections": {
"type": "array",
"description": "List of connections to include in the search context.",
"items": {
"type": "string"
}
},
"exclude": { "exclude": {
"type": "array", "type": "array",
"description": "List of repositories to exclude from the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.", "description": "List of repositories to exclude from the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.",
@ -32,14 +39,18 @@
] ]
] ]
}, },
"excludeConnections": {
"type": "array",
"description": "List of connections to exclude from the search context.",
"items": {
"type": "string"
}
},
"description": { "description": {
"type": "string", "type": "string",
"description": "Optional description of the search context that surfaces in the UI." "description": "Optional description of the search context that surfaces in the UI."
} }
}, },
"required": [
"include"
],
"additionalProperties": false "additionalProperties": false
} }
``` ```

View file

@ -74,6 +74,94 @@
} }
}, },
"additionalProperties": false "additionalProperties": false
},
"LanguageModelHeaders": {
"type": "object",
"description": "Optional headers to use with the model.",
"patternProperties": {
"^[!#$%&'*+\\-.^_`|~0-9A-Za-z]+$": {
"anyOf": [
{
"type": "string"
},
{
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
}
]
}
},
"additionalProperties": false
},
"LanguageModelQueryParams": {
"type": "object",
"description": "Optional query parameters to include in the request url.",
"patternProperties": {
"^[!#$%&'*+\\-.^_`|~0-9A-Za-z]+$": {
"anyOf": [
{
"type": "string"
},
{
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
}
]
}
},
"additionalProperties": false
} }
} }
} }

View file

@ -1,6 +1,24 @@
#!/bin/sh #!/bin/sh
set -e set -e
# Check if DATABASE_URL is not set
if [ -z "$DATABASE_URL" ]; then
# Check if the individual database variables are set and construct the URL
if [ -n "$DATABASE_HOST" ] && [ -n "$DATABASE_USERNAME" ] && [ -n "$DATABASE_PASSWORD" ] && [ -n "$DATABASE_NAME" ]; then
DATABASE_URL="postgresql://${DATABASE_USERNAME}:${DATABASE_PASSWORD}@${DATABASE_HOST}/${DATABASE_NAME}"
if [ -n "$DATABASE_ARGS" ]; then
DATABASE_URL="${DATABASE_URL}?$DATABASE_ARGS"
fi
export DATABASE_URL
else
# Otherwise, fallback to a default value
DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
export DATABASE_URL
fi
fi
if [ "$DATABASE_URL" = "postgresql://postgres@localhost:5432/sourcebot" ]; then if [ "$DATABASE_URL" = "postgresql://postgres@localhost:5432/sourcebot" ]; then
DATABASE_EMBEDDED="true" DATABASE_EMBEDDED="true"
fi fi

View file

@ -14,8 +14,10 @@
"watch:mcp": "yarn workspace @sourcebot/mcp build:watch", "watch:mcp": "yarn workspace @sourcebot/mcp build:watch",
"watch:schemas": "yarn workspace @sourcebot/schemas watch", "watch:schemas": "yarn workspace @sourcebot/schemas watch",
"dev:prisma:migrate:dev": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:dev", "dev:prisma:migrate:dev": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:dev",
"dev:prisma:generate": "yarn with-env yarn workspace @sourcebot/db prisma:generate",
"dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio", "dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio",
"dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset", "dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset",
"dev:prisma:db:push": "yarn with-env yarn workspace @sourcebot/db prisma:db:push",
"build:deps": "yarn workspaces foreach -R --from '{@sourcebot/schemas,@sourcebot/error,@sourcebot/crypto,@sourcebot/db,@sourcebot/shared}' run build" "build:deps": "yarn workspaces foreach -R --from '{@sourcebot/schemas,@sourcebot/error,@sourcebot/crypto,@sourcebot/db,@sourcebot/shared}' run build"
}, },
"devDependencies": { "devDependencies": {
@ -23,5 +25,8 @@
"dotenv-cli": "^8.0.0", "dotenv-cli": "^8.0.0",
"npm-run-all": "^4.1.5" "npm-run-all": "^4.1.5"
}, },
"packageManager": "yarn@4.7.0" "packageManager": "yarn@4.7.0",
"resolutions": {
"prettier": "3.5.3"
}
} }

View file

@ -15,7 +15,7 @@
"@types/micromatch": "^4.0.9", "@types/micromatch": "^4.0.9",
"@types/node": "^22.7.5", "@types/node": "^22.7.5",
"cross-env": "^7.0.3", "cross-env": "^7.0.3",
"json-schema-to-typescript": "^15.0.2", "json-schema-to-typescript": "^15.0.4",
"tsc-watch": "^6.2.0", "tsc-watch": "^6.2.0",
"tsx": "^4.19.1", "tsx": "^4.19.1",
"typescript": "^5.6.2", "typescript": "^5.6.2",
@ -37,6 +37,7 @@
"@t3-oss/env-core": "^0.12.0", "@t3-oss/env-core": "^0.12.0",
"@types/express": "^5.0.0", "@types/express": "^5.0.0",
"argparse": "^2.0.1", "argparse": "^2.0.1",
"azure-devops-node-api": "^15.1.1",
"bullmq": "^5.34.10", "bullmq": "^5.34.10",
"cross-fetch": "^4.0.0", "cross-fetch": "^4.0.0",
"dotenv": "^16.4.5", "dotenv": "^16.4.5",

View file

@ -0,0 +1,348 @@
import { AzureDevOpsConnectionConfig } from "@sourcebot/schemas/v3/azuredevops.type";
import { createLogger } from "@sourcebot/logger";
import { getTokenFromConfig, measure, fetchWithRetry } from "./utils.js";
import micromatch from "micromatch";
import { PrismaClient } from "@sourcebot/db";
import { BackendException, BackendError } from "@sourcebot/error";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import * as Sentry from "@sentry/node";
import * as azdev from "azure-devops-node-api";
import { GitRepository } from "azure-devops-node-api/interfaces/GitInterfaces.js";
const logger = createLogger('azuredevops');
const AZUREDEVOPS_CLOUD_HOSTNAME = "dev.azure.com";
function buildOrgUrl(baseUrl: string, org: string, useTfsPath: boolean): string {
const tfsSegment = useTfsPath ? '/tfs' : '';
return `${baseUrl}${tfsSegment}/${org}`;
}
function createAzureDevOpsConnection(
orgUrl: string,
token: string,
): azdev.WebApi {
const authHandler = azdev.getPersonalAccessTokenHandler(token);
return new azdev.WebApi(orgUrl, authHandler);
}
export const getAzureDevOpsReposFromConfig = async (
config: AzureDevOpsConnectionConfig,
orgId: number,
db: PrismaClient
) => {
const baseUrl = config.url || `https://${AZUREDEVOPS_CLOUD_HOSTNAME}`;
const token = config.token ?
await getTokenFromConfig(config.token, orgId, db, logger) :
undefined;
if (!token) {
const e = new BackendException(BackendError.CONNECTION_SYNC_INVALID_TOKEN, {
message: 'Azure DevOps requires a Personal Access Token',
});
Sentry.captureException(e);
throw e;
}
const useTfsPath = config.useTfsPath || false;
let allRepos: GitRepository[] = [];
let notFound: {
users: string[],
orgs: string[],
repos: string[],
} = {
users: [],
orgs: [],
repos: [],
};
if (config.orgs) {
const { validRepos, notFoundOrgs } = await getReposForOrganizations(
config.orgs,
baseUrl,
token,
useTfsPath
);
allRepos = allRepos.concat(validRepos);
notFound.orgs = notFoundOrgs;
}
if (config.projects) {
const { validRepos, notFoundProjects } = await getReposForProjects(
config.projects,
baseUrl,
token,
useTfsPath
);
allRepos = allRepos.concat(validRepos);
notFound.repos = notFound.repos.concat(notFoundProjects);
}
if (config.repos) {
const { validRepos, notFoundRepos } = await getRepos(
config.repos,
baseUrl,
token,
useTfsPath
);
allRepos = allRepos.concat(validRepos);
notFound.repos = notFound.repos.concat(notFoundRepos);
}
let repos = allRepos
.filter((repo) => {
const isExcluded = shouldExcludeRepo({
repo,
exclude: config.exclude,
});
return !isExcluded;
});
logger.debug(`Found ${repos.length} total repositories.`);
return {
validRepos: repos,
notFound,
};
};
export const shouldExcludeRepo = ({
repo,
exclude
}: {
repo: GitRepository,
exclude?: AzureDevOpsConnectionConfig['exclude']
}) => {
let reason = '';
const repoName = `${repo.project!.name}/${repo.name}`;
const shouldExclude = (() => {
if (!repo.remoteUrl) {
reason = 'remoteUrl is undefined';
return true;
}
if (!!exclude?.disabled && repo.isDisabled) {
reason = `\`exclude.disabled\` is true`;
return true;
}
if (exclude?.repos) {
if (micromatch.isMatch(repoName, exclude.repos)) {
reason = `\`exclude.repos\` contains ${repoName}`;
return true;
}
}
if (exclude?.projects) {
if (micromatch.isMatch(repo.project!.name!, exclude.projects)) {
reason = `\`exclude.projects\` contains ${repo.project!.name}`;
return true;
}
}
const repoSizeInBytes = repo.size || 0;
if (exclude?.size && repoSizeInBytes) {
const min = exclude.size.min;
const max = exclude.size.max;
if (min && repoSizeInBytes < min) {
reason = `repo is less than \`exclude.size.min\`=${min} bytes.`;
return true;
}
if (max && repoSizeInBytes > max) {
reason = `repo is greater than \`exclude.size.max\`=${max} bytes.`;
return true;
}
}
return false;
})();
if (shouldExclude) {
logger.debug(`Excluding repo ${repoName}. Reason: ${reason}`);
return true;
}
return false;
};
async function getReposForOrganizations(
organizations: string[],
baseUrl: string,
token: string,
useTfsPath: boolean
) {
const results = await Promise.allSettled(organizations.map(async (org) => {
try {
logger.debug(`Fetching repositories for organization ${org}...`);
const { durationMs, data } = await measure(async () => {
const fetchFn = async () => {
const orgUrl = buildOrgUrl(baseUrl, org, useTfsPath);
const connection = createAzureDevOpsConnection(orgUrl, token); // useTfsPath already handled in orgUrl
const coreApi = await connection.getCoreApi();
const gitApi = await connection.getGitApi();
const projects = await coreApi.getProjects();
const allRepos: GitRepository[] = [];
for (const project of projects) {
if (!project.id) {
logger.warn(`Encountered project in org ${org} with no id: ${project.name}`);
continue;
}
try {
const repos = await gitApi.getRepositories(project.id);
allRepos.push(...repos);
} catch (error) {
logger.warn(`Failed to fetch repositories for project ${project.name}: ${error}`);
}
}
return allRepos;
};
return fetchWithRetry(fetchFn, `organization ${org}`, logger);
});
logger.debug(`Found ${data.length} repositories in organization ${org} in ${durationMs}ms.`);
return {
type: 'valid' as const,
data
};
} catch (error) {
Sentry.captureException(error);
logger.error(`Failed to fetch repositories for organization ${org}.`, error);
// Check if it's a 404-like error (organization not found)
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 404) {
logger.error(`Organization ${org} not found or no access`);
return {
type: 'notFound' as const,
value: org
};
}
throw error;
}
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundOrgs } = processPromiseResults<GitRepository>(results);
return {
validRepos,
notFoundOrgs,
};
}
async function getReposForProjects(
projects: string[],
baseUrl: string,
token: string,
useTfsPath: boolean
) {
const results = await Promise.allSettled(projects.map(async (project) => {
try {
const [org, projectName] = project.split('/');
logger.debug(`Fetching repositories for project ${project}...`);
const { durationMs, data } = await measure(async () => {
const fetchFn = async () => {
const orgUrl = buildOrgUrl(baseUrl, org, useTfsPath);
const connection = createAzureDevOpsConnection(orgUrl, token);
const gitApi = await connection.getGitApi();
const repos = await gitApi.getRepositories(projectName);
return repos;
};
return fetchWithRetry(fetchFn, `project ${project}`, logger);
});
logger.debug(`Found ${data.length} repositories in project ${project} in ${durationMs}ms.`);
return {
type: 'valid' as const,
data
};
} catch (error) {
Sentry.captureException(error);
logger.error(`Failed to fetch repositories for project ${project}.`, error);
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 404) {
logger.error(`Project ${project} not found or no access`);
return {
type: 'notFound' as const,
value: project
};
}
throw error;
}
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundProjects } = processPromiseResults<GitRepository>(results);
return {
validRepos,
notFoundProjects,
};
}
async function getRepos(
repoList: string[],
baseUrl: string,
token: string,
useTfsPath: boolean
) {
const results = await Promise.allSettled(repoList.map(async (repo) => {
try {
const [org, projectName, repoName] = repo.split('/');
logger.info(`Fetching repository info for ${repo}...`);
const { durationMs, data: result } = await measure(async () => {
const fetchFn = async () => {
const orgUrl = buildOrgUrl(baseUrl, org, useTfsPath);
const connection = createAzureDevOpsConnection(orgUrl, token);
const gitApi = await connection.getGitApi();
const repo = await gitApi.getRepository(repoName, projectName);
return repo;
};
return fetchWithRetry(fetchFn, repo, logger);
});
logger.info(`Found info for repository ${repo} in ${durationMs}ms`);
return {
type: 'valid' as const,
data: [result]
};
} catch (error) {
Sentry.captureException(error);
logger.error(`Failed to fetch repository ${repo}.`, error);
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 404) {
logger.error(`Repository ${repo} not found or no access`);
return {
type: 'notFound' as const,
value: repo
};
}
throw error;
}
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundRepos } = processPromiseResults<GitRepository>(results);
return {
validRepos,
notFoundRepos,
};
}

View file

@ -148,13 +148,14 @@ function cloudClient(user: string | undefined, token: string | undefined): Bitbu
**/ **/
const getPaginatedCloud = async <T>( const getPaginatedCloud = async <T>(
path: CloudGetRequestPath, path: CloudGetRequestPath,
get: (url: CloudGetRequestPath) => Promise<CloudPaginatedResponse<T>> get: (path: CloudGetRequestPath, query?: Record<string, string>) => Promise<CloudPaginatedResponse<T>>
): Promise<T[]> => { ): Promise<T[]> => {
const results: T[] = []; const results: T[] = [];
let url = path; let nextPath = path;
let nextQuery = undefined;
while (true) { while (true) {
const response = await get(url); const response = await get(nextPath, nextQuery);
if (!response.values || response.values.length === 0) { if (!response.values || response.values.length === 0) {
break; break;
@ -166,25 +167,38 @@ const getPaginatedCloud = async <T>(
break; break;
} }
url = response.next as CloudGetRequestPath; const parsedUrl = parseUrl(response.next);
nextPath = parsedUrl.path as CloudGetRequestPath;
nextQuery = parsedUrl.query;
} }
return results; return results;
} }
/**
* Parse the url into a path and query parameters to be used with the api client (openapi-fetch)
*/
function parseUrl(url: string): { path: string; query: Record<string, string>; } {
const fullUrl = new URL(url);
const path = fullUrl.pathname.replace(/^\/\d+(\.\d+)*/, ''); // remove version number in the beginning of the path
const query = Object.fromEntries(fullUrl.searchParams);
logger.debug(`Parsed url ${url} into path ${path} and query ${JSON.stringify(query)}`);
return { path, query };
}
async function cloudGetReposForWorkspace(client: BitbucketClient, workspaces: string[]): Promise<{validRepos: CloudRepository[], notFoundWorkspaces: string[]}> { async function cloudGetReposForWorkspace(client: BitbucketClient, workspaces: string[]): Promise<{validRepos: CloudRepository[], notFoundWorkspaces: string[]}> {
const results = await Promise.allSettled(workspaces.map(async (workspace) => { const results = await Promise.allSettled(workspaces.map(async (workspace) => {
try { try {
logger.debug(`Fetching all repos for workspace ${workspace}...`); logger.debug(`Fetching all repos for workspace ${workspace}...`);
const path = `/repositories/${workspace}` as CloudGetRequestPath;
const { durationMs, data } = await measure(async () => { const { durationMs, data } = await measure(async () => {
const fetchFn = () => getPaginatedCloud<CloudRepository>(path, async (url) => { const fetchFn = () => getPaginatedCloud<CloudRepository>(`/repositories/${workspace}` as CloudGetRequestPath, async (path, query) => {
const response = await client.apiClient.GET(url, { const response = await client.apiClient.GET(path, {
params: { params: {
path: { path: {
workspace, workspace,
} },
query: query,
} }
}); });
const { data, error } = response; const { data, error } = response;
@ -238,11 +252,14 @@ async function cloudGetReposForProjects(client: BitbucketClient, projects: strin
logger.debug(`Fetching all repos for project ${project} for workspace ${workspace}...`); logger.debug(`Fetching all repos for project ${project} for workspace ${workspace}...`);
try { try {
const path = `/repositories/${workspace}` as CloudGetRequestPath; const repos = await getPaginatedCloud<CloudRepository>(`/repositories/${workspace}` as CloudGetRequestPath, async (path, query) => {
const repos = await getPaginatedCloud<CloudRepository>(path, async (url) => { const response = await client.apiClient.GET(path, {
const response = await client.apiClient.GET(url, {
params: { params: {
path: {
workspace,
},
query: { query: {
...query,
q: `project.key="${project_name}"` q: `project.key="${project_name}"`
} }
} }

View file

@ -4,19 +4,13 @@ import { Settings } from "./types.js";
import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type"; import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
import { createLogger } from "@sourcebot/logger"; import { createLogger } from "@sourcebot/logger";
import { Redis } from 'ioredis'; import { Redis } from 'ioredis';
import { RepoData, compileGithubConfig, compileGitlabConfig, compileGiteaConfig, compileGerritConfig, compileBitbucketConfig, compileGenericGitHostConfig } from "./repoCompileUtils.js"; import { RepoData, compileGithubConfig, compileGitlabConfig, compileGiteaConfig, compileGerritConfig, compileBitbucketConfig, compileAzureDevOpsConfig, compileGenericGitHostConfig } from "./repoCompileUtils.js";
import { BackendError, BackendException } from "@sourcebot/error"; import { BackendError, BackendException } from "@sourcebot/error";
import { captureEvent } from "./posthog.js"; import { captureEvent } from "./posthog.js";
import { env } from "./env.js"; import { env } from "./env.js";
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { loadConfig, syncSearchContexts } from "@sourcebot/shared"; import { loadConfig, syncSearchContexts } from "@sourcebot/shared";
interface IConnectionManager {
scheduleConnectionSync: (connection: Connection) => Promise<void>;
registerPollingCallback: () => void;
dispose: () => void;
}
const QUEUE_NAME = 'connectionSyncQueue'; const QUEUE_NAME = 'connectionSyncQueue';
type JobPayload = { type JobPayload = {
@ -30,10 +24,11 @@ type JobResult = {
repoCount: number, repoCount: number,
} }
export class ConnectionManager implements IConnectionManager { export class ConnectionManager {
private worker: Worker; private worker: Worker;
private queue: Queue<JobPayload>; private queue: Queue<JobPayload>;
private logger = createLogger('connection-manager'); private logger = createLogger('connection-manager');
private interval?: NodeJS.Timeout;
constructor( constructor(
private db: PrismaClient, private db: PrismaClient,
@ -75,8 +70,9 @@ export class ConnectionManager implements IConnectionManager {
}); });
} }
public async registerPollingCallback() { public startScheduler() {
setInterval(async () => { this.logger.debug('Starting scheduler');
this.interval = setInterval(async () => {
const thresholdDate = new Date(Date.now() - this.settings.resyncConnectionIntervalMs); const thresholdDate = new Date(Date.now() - this.settings.resyncConnectionIntervalMs);
const connections = await this.db.connection.findMany({ const connections = await this.db.connection.findMany({
where: { where: {
@ -177,6 +173,9 @@ export class ConnectionManager implements IConnectionManager {
case 'bitbucket': { case 'bitbucket': {
return await compileBitbucketConfig(config, job.data.connectionId, orgId, this.db); return await compileBitbucketConfig(config, job.data.connectionId, orgId, this.db);
} }
case 'azuredevops': {
return await compileAzureDevOpsConfig(config, job.data.connectionId, orgId, this.db, abortController);
}
case 'git': { case 'git': {
return await compileGenericGitHostConfig(config, job.data.connectionId, orgId); return await compileGenericGitHostConfig(config, job.data.connectionId, orgId);
} }
@ -366,6 +365,9 @@ export class ConnectionManager implements IConnectionManager {
} }
public dispose() { public dispose() {
if (this.interval) {
clearInterval(this.interval);
}
this.worker.close(); this.worker.close();
this.queue.close(); this.queue.close();
} }

View file

@ -15,5 +15,11 @@ export const DEFAULT_SETTINGS: Settings = {
maxRepoGarbageCollectionJobConcurrency: 8, maxRepoGarbageCollectionJobConcurrency: 8,
repoGarbageCollectionGracePeriodMs: 10 * 1000, // 10 seconds repoGarbageCollectionGracePeriodMs: 10 * 1000, // 10 seconds
repoIndexTimeoutMs: 1000 * 60 * 60 * 2, // 2 hours repoIndexTimeoutMs: 1000 * 60 * 60 * 2, // 2 hours
enablePublicAccess: false // deprected, use FORCE_ENABLE_ANONYMOUS_ACCESS instead enablePublicAccess: false, // deprected, use FORCE_ENABLE_ANONYMOUS_ACCESS instead
experiment_repoDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
experiment_userDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
} }
export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES = [
'github',
];

View file

@ -0,0 +1,274 @@
import * as Sentry from "@sentry/node";
import { PrismaClient, Repo, RepoPermissionSyncJobStatus } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { hasEntitlement } from "@sourcebot/shared";
import { Job, Queue, Worker } from 'bullmq';
import { Redis } from 'ioredis';
import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js";
import { env } from "../env.js";
import { createOctokitFromToken, getRepoCollaborators } from "../github.js";
import { Settings } from "../types.js";
import { getAuthCredentialsForRepo } from "../utils.js";
type RepoPermissionSyncJob = {
jobId: string;
}
const QUEUE_NAME = 'repoPermissionSyncQueue';
const logger = createLogger('repo-permission-syncer');
export class RepoPermissionSyncer {
private queue: Queue<RepoPermissionSyncJob>;
private worker: Worker<RepoPermissionSyncJob>;
private interval?: NodeJS.Timeout;
constructor(
private db: PrismaClient,
private settings: Settings,
redis: Redis,
) {
this.queue = new Queue<RepoPermissionSyncJob>(QUEUE_NAME, {
connection: redis,
});
this.worker = new Worker<RepoPermissionSyncJob>(QUEUE_NAME, this.runJob.bind(this), {
connection: redis,
concurrency: 1,
});
this.worker.on('completed', this.onJobCompleted.bind(this));
this.worker.on('failed', this.onJobFailed.bind(this));
}
public startScheduler() {
if (!hasEntitlement('permission-syncing')) {
throw new Error('Permission syncing is not supported in current plan.');
}
logger.debug('Starting scheduler');
this.interval = setInterval(async () => {
// @todo: make this configurable
const thresholdDate = new Date(Date.now() - this.settings.experiment_repoDrivenPermissionSyncIntervalMs);
const repos = await this.db.repo.findMany({
// Repos need their permissions to be synced against the code host when...
where: {
// They belong to a code host that supports permissions syncing
AND: [
{
external_codeHostType: {
in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES,
}
},
{
OR: [
{ permissionSyncedAt: null },
{ permissionSyncedAt: { lt: thresholdDate } },
],
},
{
NOT: {
permissionSyncJobs: {
some: {
OR: [
// Don't schedule if there are active jobs
{
status: {
in: [
RepoPermissionSyncJobStatus.PENDING,
RepoPermissionSyncJobStatus.IN_PROGRESS,
],
}
},
// Don't schedule if there are recent failed jobs (within the threshold date). Note `gt` is used here since this is a inverse condition.
{
AND: [
{ status: RepoPermissionSyncJobStatus.FAILED },
{ completedAt: { gt: thresholdDate } },
]
}
]
}
}
}
},
]
}
});
await this.schedulePermissionSync(repos);
}, 1000 * 5);
}
public dispose() {
if (this.interval) {
clearInterval(this.interval);
}
this.worker.close();
this.queue.close();
}
private async schedulePermissionSync(repos: Repo[]) {
await this.db.$transaction(async (tx) => {
const jobs = await tx.repoPermissionSyncJob.createManyAndReturn({
data: repos.map(repo => ({
repoId: repo.id,
})),
});
await this.queue.addBulk(jobs.map((job) => ({
name: 'repoPermissionSyncJob',
data: {
jobId: job.id,
},
opts: {
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
}
})))
});
}
private async runJob(job: Job<RepoPermissionSyncJob>) {
const id = job.data.jobId;
const { repo } = await this.db.repoPermissionSyncJob.update({
where: {
id,
},
data: {
status: RepoPermissionSyncJobStatus.IN_PROGRESS,
},
select: {
repo: {
include: {
connections: {
include: {
connection: true,
}
}
}
}
}
});
if (!repo) {
throw new Error(`Repo ${id} not found`);
}
logger.info(`Syncing permissions for repo ${repo.displayName}...`);
const credentials = await getAuthCredentialsForRepo(repo, this.db, logger);
if (!credentials) {
throw new Error(`No credentials found for repo ${id}`);
}
const userIds = await (async () => {
if (repo.external_codeHostType === 'github') {
const { octokit } = await createOctokitFromToken({
token: credentials.token,
url: credentials.hostUrl,
});
// @note: this is a bit of a hack since the displayName _might_ not be set..
// however, this property was introduced many versions ago and _should_ be set
// on each connection sync. Let's throw an error just in case.
if (!repo.displayName) {
throw new Error(`Repo ${id} does not have a displayName`);
}
const [owner, repoName] = repo.displayName.split('/');
const collaborators = await getRepoCollaborators(owner, repoName, octokit);
const githubUserIds = collaborators.map(collaborator => collaborator.id.toString());
const accounts = await this.db.account.findMany({
where: {
provider: 'github',
providerAccountId: {
in: githubUserIds,
}
},
select: {
userId: true,
},
});
return accounts.map(account => account.userId);
}
return [];
})();
await this.db.$transaction([
this.db.repo.update({
where: {
id: repo.id,
},
data: {
permittedUsers: {
deleteMany: {},
}
}
}),
this.db.userToRepoPermission.createMany({
data: userIds.map(userId => ({
userId,
repoId: repo.id,
})),
})
]);
}
private async onJobCompleted(job: Job<RepoPermissionSyncJob>) {
const { repo } = await this.db.repoPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: RepoPermissionSyncJobStatus.COMPLETED,
repo: {
update: {
permissionSyncedAt: new Date(),
}
},
completedAt: new Date(),
},
select: {
repo: true
}
});
logger.info(`Permissions synced for repo ${repo.displayName ?? repo.name}`);
}
private async onJobFailed(job: Job<RepoPermissionSyncJob> | undefined, err: Error) {
Sentry.captureException(err, {
tags: {
jobId: job?.data.jobId,
queue: QUEUE_NAME,
}
});
const errorMessage = (repoName: string) => `Repo permission sync job failed for repo ${repoName}: ${err.message}`;
if (job) {
const { repo } = await this.db.repoPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: RepoPermissionSyncJobStatus.FAILED,
completedAt: new Date(),
errorMessage: err.message,
},
select: {
repo: true
},
});
logger.error(errorMessage(repo.displayName ?? repo.name));
} else {
logger.error(errorMessage('unknown repo (id not found)'));
}
}
}

View file

@ -0,0 +1,266 @@
import * as Sentry from "@sentry/node";
import { PrismaClient, User, UserPermissionSyncJobStatus } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { Job, Queue, Worker } from "bullmq";
import { Redis } from "ioredis";
import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js";
import { env } from "../env.js";
import { createOctokitFromToken, getReposForAuthenticatedUser } from "../github.js";
import { hasEntitlement } from "@sourcebot/shared";
import { Settings } from "../types.js";
const logger = createLogger('user-permission-syncer');
const QUEUE_NAME = 'userPermissionSyncQueue';
type UserPermissionSyncJob = {
jobId: string;
}
export class UserPermissionSyncer {
private queue: Queue<UserPermissionSyncJob>;
private worker: Worker<UserPermissionSyncJob>;
private interval?: NodeJS.Timeout;
constructor(
private db: PrismaClient,
private settings: Settings,
redis: Redis,
) {
this.queue = new Queue<UserPermissionSyncJob>(QUEUE_NAME, {
connection: redis,
});
this.worker = new Worker<UserPermissionSyncJob>(QUEUE_NAME, this.runJob.bind(this), {
connection: redis,
concurrency: 1,
});
this.worker.on('completed', this.onJobCompleted.bind(this));
this.worker.on('failed', this.onJobFailed.bind(this));
}
public startScheduler() {
if (!hasEntitlement('permission-syncing')) {
throw new Error('Permission syncing is not supported in current plan.');
}
logger.debug('Starting scheduler');
this.interval = setInterval(async () => {
const thresholdDate = new Date(Date.now() - this.settings.experiment_userDrivenPermissionSyncIntervalMs);
const users = await this.db.user.findMany({
where: {
AND: [
{
accounts: {
some: {
provider: {
in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES
}
}
}
},
{
OR: [
{ permissionSyncedAt: null },
{ permissionSyncedAt: { lt: thresholdDate } },
]
},
{
NOT: {
permissionSyncJobs: {
some: {
OR: [
// Don't schedule if there are active jobs
{
status: {
in: [
UserPermissionSyncJobStatus.PENDING,
UserPermissionSyncJobStatus.IN_PROGRESS,
],
}
},
// Don't schedule if there are recent failed jobs (within the threshold date). Note `gt` is used here since this is a inverse condition.
{
AND: [
{ status: UserPermissionSyncJobStatus.FAILED },
{ completedAt: { gt: thresholdDate } },
]
}
]
}
}
}
},
]
}
});
await this.schedulePermissionSync(users);
}, 1000 * 5);
}
public dispose() {
if (this.interval) {
clearInterval(this.interval);
}
this.worker.close();
this.queue.close();
}
private async schedulePermissionSync(users: User[]) {
await this.db.$transaction(async (tx) => {
const jobs = await tx.userPermissionSyncJob.createManyAndReturn({
data: users.map(user => ({
userId: user.id,
})),
});
await this.queue.addBulk(jobs.map((job) => ({
name: 'userPermissionSyncJob',
data: {
jobId: job.id,
},
opts: {
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
}
})))
});
}
private async runJob(job: Job<UserPermissionSyncJob>) {
const id = job.data.jobId;
const { user } = await this.db.userPermissionSyncJob.update({
where: {
id,
},
data: {
status: UserPermissionSyncJobStatus.IN_PROGRESS,
},
select: {
user: {
include: {
accounts: true,
}
}
}
});
if (!user) {
throw new Error(`User ${id} not found`);
}
logger.info(`Syncing permissions for user ${user.email}...`);
// Get a list of all repos that the user has access to from all connected accounts.
const repoIds = await (async () => {
const aggregatedRepoIds: Set<number> = new Set();
for (const account of user.accounts) {
if (account.provider === 'github') {
if (!account.access_token) {
throw new Error(`User '${user.email}' does not have an GitHub OAuth access token associated with their GitHub account.`);
}
const { octokit } = await createOctokitFromToken({
token: account.access_token,
url: env.AUTH_EE_GITHUB_BASE_URL,
});
// @note: we only care about the private repos since we don't need to build a mapping
// for public repos.
// @see: packages/web/src/prisma.ts
const githubRepos = await getReposForAuthenticatedUser(/* visibility = */ 'private', octokit);
const gitHubRepoIds = githubRepos.map(repo => repo.id.toString());
const repos = await this.db.repo.findMany({
where: {
external_codeHostType: 'github',
external_id: {
in: gitHubRepoIds,
}
}
});
repos.forEach(repo => aggregatedRepoIds.add(repo.id));
}
}
return Array.from(aggregatedRepoIds);
})();
await this.db.$transaction([
this.db.user.update({
where: {
id: user.id,
},
data: {
accessibleRepos: {
deleteMany: {},
}
}
}),
this.db.userToRepoPermission.createMany({
data: repoIds.map(repoId => ({
userId: user.id,
repoId,
})),
skipDuplicates: true,
})
]);
}
private async onJobCompleted(job: Job<UserPermissionSyncJob>) {
const { user } = await this.db.userPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: UserPermissionSyncJobStatus.COMPLETED,
user: {
update: {
permissionSyncedAt: new Date(),
}
},
completedAt: new Date(),
},
select: {
user: true
}
});
logger.info(`Permissions synced for user ${user.email}`);
}
private async onJobFailed(job: Job<UserPermissionSyncJob> | undefined, err: Error) {
Sentry.captureException(err, {
tags: {
jobId: job?.data.jobId,
queue: QUEUE_NAME,
}
});
const errorMessage = (email: string) => `User permission sync job failed for user ${email}: ${err.message}`;
if (job) {
const { user } = await this.db.userPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: UserPermissionSyncJobStatus.FAILED,
completedAt: new Date(),
errorMessage: err.message,
},
select: {
user: true,
}
});
logger.error(errorMessage(user.email ?? user.id));
} else {
logger.error(errorMessage('unknown user (id not found)'));
}
}
}

View file

@ -43,6 +43,7 @@ export const env = createEnv({
LOGTAIL_TOKEN: z.string().optional(), LOGTAIL_TOKEN: z.string().optional(),
LOGTAIL_HOST: z.string().url().optional(), LOGTAIL_HOST: z.string().url().optional(),
SOURCEBOT_LOG_LEVEL: z.enum(["info", "debug", "warn", "error"]).default("info"),
DATABASE_URL: z.string().url().default("postgresql://postgres:postgres@localhost:5432/postgres"), DATABASE_URL: z.string().url().default("postgresql://postgres:postgres@localhost:5432/postgres"),
CONFIG_PATH: z.string().optional(), CONFIG_PATH: z.string().optional(),
@ -51,6 +52,9 @@ export const env = createEnv({
REPO_SYNC_RETRY_BASE_SLEEP_SECONDS: numberSchema.default(60), REPO_SYNC_RETRY_BASE_SLEEP_SECONDS: numberSchema.default(60),
GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS: numberSchema.default(60 * 10), GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS: numberSchema.default(60 * 10),
EXPERIMENT_EE_PERMISSION_SYNC_ENABLED: booleanSchema.default('false'),
AUTH_EE_GITHUB_BASE_URL: z.string().optional(),
}, },
runtimeEnv: process.env, runtimeEnv: process.env,
emptyStringAsUndefined: true, emptyStringAsUndefined: true,

View file

@ -1,53 +1,102 @@
import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git'; import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git';
import { mkdir } from 'node:fs/promises';
import { env } from './env.js';
type onProgressFn = (event: SimpleGitProgressEvent) => void; type onProgressFn = (event: SimpleGitProgressEvent) => void;
export const cloneRepository = async (cloneURL: string, path: string, onProgress?: onProgressFn) => { export const cloneRepository = async (
{
cloneUrl,
authHeader,
path,
onProgress,
}: {
cloneUrl: string,
authHeader?: string,
path: string,
onProgress?: onProgressFn
}
) => {
try {
await mkdir(path, { recursive: true });
const git = simpleGit({ const git = simpleGit({
progress: onProgress, progress: onProgress,
}); }).cwd({
try {
await git.clone(
cloneURL,
path, path,
[ })
const cloneArgs = [
"--bare", "--bare",
] ...(authHeader ? ["-c", `http.extraHeader=${authHeader}`] : [])
); ];
await git.cwd({ await git.clone(cloneUrl, path, cloneArgs);
path,
}).addConfig("remote.origin.fetch", "+refs/heads/*:refs/heads/*"); await unsetGitConfig(path, ["remote.origin.url"]);
} catch (error: unknown) { } catch (error: unknown) {
if (error instanceof Error) { const baseLog = `Failed to clone repository: ${path}`;
throw new Error(`Failed to clone repository: ${error.message}`);
if (env.SOURCEBOT_LOG_LEVEL !== "debug") {
// Avoid printing the remote URL (that may contain credentials) to logs by default.
throw new Error(`${baseLog}. Set environment variable SOURCEBOT_LOG_LEVEL=debug to see the full error message.`);
} else if (error instanceof Error) {
throw new Error(`${baseLog}. Reason: ${error.message}`);
} else { } else {
throw new Error(`Failed to clone repository: ${error}`); throw new Error(`${baseLog}. Error: ${error}`);
}
} }
} }
};
export const fetchRepository = async (
export const fetchRepository = async (path: string, onProgress?: onProgressFn) => { {
cloneUrl,
authHeader,
path,
onProgress,
}: {
cloneUrl: string,
authHeader?: string,
path: string,
onProgress?: onProgressFn
}
) => {
try {
const git = simpleGit({ const git = simpleGit({
progress: onProgress, progress: onProgress,
}); }).cwd({
try {
await git.cwd({
path: path, path: path,
}).fetch( })
"origin",
[ if (authHeader) {
await git.addConfig("http.extraHeader", authHeader);
}
await git.fetch([
cloneUrl,
"+refs/heads/*:refs/heads/*",
"--prune", "--prune",
"--progress" "--progress"
] ]);
);
} catch (error: unknown) { } catch (error: unknown) {
if (error instanceof Error) { const baseLog = `Failed to fetch repository: ${path}`;
throw new Error(`Failed to fetch repository ${path}: ${error.message}`); if (env.SOURCEBOT_LOG_LEVEL !== "debug") {
// Avoid printing the remote URL (that may contain credentials) to logs by default.
throw new Error(`${baseLog}. Set environment variable SOURCEBOT_LOG_LEVEL=debug to see the full error message.`);
} else if (error instanceof Error) {
throw new Error(`${baseLog}. Reason: ${error.message}`);
} else { } else {
throw new Error(`Failed to fetch repository ${path}: ${error}`); throw new Error(`${baseLog}. Error: ${error}`);
}
} finally {
if (authHeader) {
const git = simpleGit({
progress: onProgress,
}).cwd({
path: path,
})
await git.raw(["config", "--unset", "http.extraHeader", authHeader]);
} }
} }
} }
@ -76,6 +125,33 @@ export const upsertGitConfig = async (path: string, gitConfig: Record<string, st
} }
} }
/**
* Unsets the specified keys in the git config for the repo at the given path.
* If a key is not set, this is a no-op.
*/
export const unsetGitConfig = async (path: string, keys: string[], onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
}).cwd(path);
try {
const configList = await git.listConfig();
const setKeys = Object.keys(configList.all);
for (const key of keys) {
if (setKeys.includes(key)) {
await git.raw(['config', '--unset', key]);
}
}
} catch (error: unknown) {
if (error instanceof Error) {
throw new Error(`Failed to unset git config ${path}: ${error.message}`);
} else {
throw new Error(`Failed to unset git config ${path}: ${error}`);
}
}
}
/** /**
* Returns true if `path` is the _root_ of a git repository. * Returns true if `path` is the _root_ of a git repository.
*/ */

View file

@ -30,6 +30,7 @@ export type OctokitRepository = {
size?: number, size?: number,
owner: { owner: {
avatar_url: string, avatar_url: string,
login: string,
} }
} }
@ -40,6 +41,20 @@ const isHttpError = (error: unknown, status: number): boolean => {
&& error.status === status; && error.status === status;
} }
export const createOctokitFromToken = async ({ token, url }: { token?: string, url?: string }): Promise<{ octokit: Octokit, isAuthenticated: boolean }> => {
const octokit = new Octokit({
auth: token,
...(url ? {
baseUrl: `${url}/api/v3`
} : {}),
});
return {
octokit,
isAuthenticated: !!token,
};
}
export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient, signal: AbortSignal) => { export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient, signal: AbortSignal) => {
const hostname = config.url ? const hostname = config.url ?
new URL(config.url).hostname : new URL(config.url).hostname :
@ -51,14 +66,12 @@ export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, o
env.FALLBACK_GITHUB_CLOUD_TOKEN : env.FALLBACK_GITHUB_CLOUD_TOKEN :
undefined; undefined;
const octokit = new Octokit({ const { octokit, isAuthenticated } = await createOctokitFromToken({
auth: token, token,
...(config.url ? { url: config.url,
baseUrl: `${config.url}/api/v3`
} : {}),
}); });
if (token) { if (isAuthenticated) {
try { try {
await octokit.rest.users.getAuthenticated(); await octokit.rest.users.getAuthenticated();
} catch (error) { } catch (error) {
@ -106,8 +119,7 @@ export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, o
} }
if (config.users) { if (config.users) {
const isAuthenticated = config.token !== undefined; const { validRepos, notFoundUsers } = await getReposOwnedByUsers(config.users, octokit, signal);
const { validRepos, notFoundUsers } = await getReposOwnedByUsers(config.users, isAuthenticated, octokit, signal);
allRepos = allRepos.concat(validRepos); allRepos = allRepos.concat(validRepos);
notFound.users = notFoundUsers; notFound.users = notFoundUsers;
} }
@ -133,118 +145,64 @@ export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, o
}; };
} }
export const shouldExcludeRepo = ({ export const getRepoCollaborators = async (owner: string, repo: string, octokit: Octokit) => {
try {
const fetchFn = () => octokit.paginate(octokit.repos.listCollaborators, {
owner,
repo, repo,
include, per_page: 100,
exclude });
} : {
repo: OctokitRepository,
include?: {
topics?: GithubConnectionConfig['topics']
},
exclude?: GithubConnectionConfig['exclude']
}) => {
let reason = '';
const repoName = repo.full_name;
const shouldExclude = (() => { const collaborators = await fetchWithRetry(fetchFn, `repo ${owner}/${repo}`, logger);
if (!repo.clone_url) { return collaborators;
reason = 'clone_url is undefined'; } catch (error) {
return true; Sentry.captureException(error);
} logger.error(`Failed to fetch collaborators for repo ${owner}/${repo}.`, error);
throw error;
if (!!exclude?.forks && repo.fork) {
reason = `\`exclude.forks\` is true`;
return true;
}
if (!!exclude?.archived && !!repo.archived) {
reason = `\`exclude.archived\` is true`;
return true;
}
if (exclude?.repos) {
if (micromatch.isMatch(repoName, exclude.repos)) {
reason = `\`exclude.repos\` contains ${repoName}`;
return true;
} }
} }
if (exclude?.topics) { export const getReposForAuthenticatedUser = async (visibility: 'all' | 'private' | 'public' = 'all', octokit: Octokit) => {
const configTopics = exclude.topics.map(topic => topic.toLowerCase()); try {
const repoTopics = repo.topics ?? []; const fetchFn = () => octokit.paginate(octokit.repos.listForAuthenticatedUser, {
per_page: 100,
visibility,
});
const matchingTopics = repoTopics.filter((topic) => micromatch.isMatch(topic, configTopics)); const repos = await fetchWithRetry(fetchFn, `authenticated user`, logger);
if (matchingTopics.length > 0) { return repos;
reason = `\`exclude.topics\` matches the following topics: ${matchingTopics.join(', ')}`; } catch (error) {
return true; Sentry.captureException(error);
logger.error(`Failed to fetch repositories for authenticated user.`, error);
throw error;
} }
} }
if (include?.topics) { const getReposOwnedByUsers = async (users: string[], octokit: Octokit, signal: AbortSignal) => {
const configTopics = include.topics.map(topic => topic.toLowerCase());
const repoTopics = repo.topics ?? [];
const matchingTopics = repoTopics.filter((topic) => micromatch.isMatch(topic, configTopics));
if (matchingTopics.length === 0) {
reason = `\`include.topics\` does not match any of the following topics: ${configTopics.join(', ')}`;
return true;
}
}
const repoSizeInBytes = repo.size ? repo.size * 1000 : undefined;
if (exclude?.size && repoSizeInBytes) {
const min = exclude.size.min;
const max = exclude.size.max;
if (min && repoSizeInBytes < min) {
reason = `repo is less than \`exclude.size.min\`=${min} bytes.`;
return true;
}
if (max && repoSizeInBytes > max) {
reason = `repo is greater than \`exclude.size.max\`=${max} bytes.`;
return true;
}
}
return false;
})();
if (shouldExclude) {
logger.debug(`Excluding repo ${repoName}. Reason: ${reason}`);
return true;
}
return false;
}
const getReposOwnedByUsers = async (users: string[], isAuthenticated: boolean, octokit: Octokit, signal: AbortSignal) => {
const results = await Promise.allSettled(users.map(async (user) => { const results = await Promise.allSettled(users.map(async (user) => {
try { try {
logger.debug(`Fetching repository info for user ${user}...`); logger.debug(`Fetching repository info for user ${user}...`);
const { durationMs, data } = await measure(async () => { const { durationMs, data } = await measure(async () => {
const fetchFn = async () => { const fetchFn = async () => {
if (isAuthenticated) { let query = `user:${user}`;
return octokit.paginate(octokit.repos.listForAuthenticatedUser, { // To include forks in the search results, we will need to add fork:true
username: user, // see: https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories
visibility: 'all', query += ' fork:true';
affiliation: 'owner', // @note: We need to use GitHub's search API here since it is the only way
// to get all repositories (private and public) owned by a user that supports
// the username as a parameter.
// @see: https://github.com/orgs/community/discussions/24382#discussioncomment-3243958
// @see: https://api.github.com/search/repositories?q=user:USERNAME
const searchResults = await octokit.paginate(octokit.rest.search.repos, {
q: query,
per_page: 100, per_page: 100,
request: { request: {
signal, signal,
}, },
}); });
} else {
return octokit.paginate(octokit.repos.listForUser, { return searchResults as OctokitRepository[];
username: user,
per_page: 100,
request: {
signal,
},
});
}
}; };
return fetchWithRetry(fetchFn, `user ${user}`, logger); return fetchWithRetry(fetchFn, `user ${user}`, logger);
@ -372,3 +330,89 @@ const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSigna
notFoundRepos, notFoundRepos,
}; };
} }
export const shouldExcludeRepo = ({
repo,
include,
exclude
}: {
repo: OctokitRepository,
include?: {
topics?: GithubConnectionConfig['topics']
},
exclude?: GithubConnectionConfig['exclude']
}) => {
let reason = '';
const repoName = repo.full_name;
const shouldExclude = (() => {
if (!repo.clone_url) {
reason = 'clone_url is undefined';
return true;
}
if (!!exclude?.forks && repo.fork) {
reason = `\`exclude.forks\` is true`;
return true;
}
if (!!exclude?.archived && !!repo.archived) {
reason = `\`exclude.archived\` is true`;
return true;
}
if (exclude?.repos) {
if (micromatch.isMatch(repoName, exclude.repos)) {
reason = `\`exclude.repos\` contains ${repoName}`;
return true;
}
}
if (exclude?.topics) {
const configTopics = exclude.topics.map(topic => topic.toLowerCase());
const repoTopics = repo.topics ?? [];
const matchingTopics = repoTopics.filter((topic) => micromatch.isMatch(topic, configTopics));
if (matchingTopics.length > 0) {
reason = `\`exclude.topics\` matches the following topics: ${matchingTopics.join(', ')}`;
return true;
}
}
if (include?.topics) {
const configTopics = include.topics.map(topic => topic.toLowerCase());
const repoTopics = repo.topics ?? [];
const matchingTopics = repoTopics.filter((topic) => micromatch.isMatch(topic, configTopics));
if (matchingTopics.length === 0) {
reason = `\`include.topics\` does not match any of the following topics: ${configTopics.join(', ')}`;
return true;
}
}
const repoSizeInBytes = repo.size ? repo.size * 1000 : undefined;
if (exclude?.size && repoSizeInBytes) {
const min = exclude.size.min;
const max = exclude.size.max;
if (min && repoSizeInBytes < min) {
reason = `repo is less than \`exclude.size.min\`=${min} bytes.`;
return true;
}
if (max && repoSizeInBytes > max) {
reason = `repo is greater than \`exclude.size.max\`=${max} bytes.`;
return true;
}
}
return false;
})();
if (shouldExclude) {
logger.debug(`Excluding repo ${repoName}. Reason: ${reason}`);
return true;
}
return false;
}

View file

@ -41,3 +41,30 @@ test('shouldExcludeProject returns true when the project is excluded by exclude.
})).toBe(true) })).toBe(true)
}); });
test('shouldExcludeProject returns true when the project is excluded by exclude.userOwnedProjects.', () => {
const project = {
path_with_namespace: 'test/project',
namespace: {
kind: 'user',
}
} as unknown as ProjectSchema;
expect(shouldExcludeProject({
project,
exclude: {
userOwnedProjects: true,
}
})).toBe(true)
});
test('shouldExcludeProject returns false when exclude.userOwnedProjects is true but project is group-owned.', () => {
const project = {
path_with_namespace: 'test/project',
namespace: { kind: 'group' },
} as unknown as ProjectSchema;
expect(shouldExcludeProject({
project,
exclude: { userOwnedProjects: true },
})).toBe(false);
});

View file

@ -222,6 +222,11 @@ export const shouldExcludeProject = ({
return true; return true;
} }
if (exclude?.userOwnedProjects && project.namespace.kind === 'user') {
reason = `\`exclude.userOwnedProjects\` is true`;
return true;
}
if (exclude?.projects) { if (exclude?.projects) {
if (micromatch.isMatch(projectName, exclude.projects)) { if (micromatch.isMatch(projectName, exclude.projects)) {
reason = `\`exclude.projects\` contains ${projectName}`; reason = `\`exclude.projects\` contains ${projectName}`;

View file

@ -1,44 +1,37 @@
import "./instrument.js"; import "./instrument.js";
import * as Sentry from "@sentry/node"; import { PrismaClient } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { hasEntitlement, loadConfig } from '@sourcebot/shared';
import { existsSync } from 'fs'; import { existsSync } from 'fs';
import { mkdir } from 'fs/promises'; import { mkdir } from 'fs/promises';
import { Redis } from 'ioredis';
import path from 'path'; import path from 'path';
import { AppContext } from "./types.js"; import { ConnectionManager } from './connectionManager.js';
import { main } from "./main.js" import { DEFAULT_SETTINGS } from './constants.js';
import { PrismaClient } from "@sourcebot/db";
import { env } from "./env.js"; import { env } from "./env.js";
import { createLogger } from "@sourcebot/logger"; import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js';
import { PromClient } from './promClient.js';
import { RepoManager } from './repoManager.js';
import { AppContext } from "./types.js";
import { UserPermissionSyncer } from "./ee/userPermissionSyncer.js";
const logger = createLogger('backend-entrypoint'); const logger = createLogger('backend-entrypoint');
const getSettings = async (configPath?: string) => {
if (!configPath) {
return DEFAULT_SETTINGS;
}
// Register handler for normal exit const config = await loadConfig(configPath);
process.on('exit', (code) => {
logger.info(`Process is exiting with code: ${code}`);
});
// Register handlers for abnormal terminations return {
process.on('SIGINT', () => { ...DEFAULT_SETTINGS,
logger.info('Process interrupted (SIGINT)'); ...config.settings,
process.exit(0); }
}); }
process.on('SIGTERM', () => {
logger.info('Process terminated (SIGTERM)');
process.exit(0);
});
// Register handlers for uncaught exceptions and unhandled rejections
process.on('uncaughtException', (err) => {
logger.error(`Uncaught exception: ${err.message}`);
process.exit(1);
});
process.on('unhandledRejection', (reason, promise) => {
logger.error(`Unhandled rejection at: ${promise}, reason: ${reason}`);
process.exit(1);
});
const cacheDir = env.DATA_CACHE_DIR; const cacheDir = env.DATA_CACHE_DIR;
const reposPath = path.join(cacheDir, 'repos'); const reposPath = path.join(cacheDir, 'repos');
@ -59,18 +52,62 @@ const context: AppContext = {
const prisma = new PrismaClient(); const prisma = new PrismaClient();
main(prisma, context) const redis = new Redis(env.REDIS_URL, {
.then(async () => { maxRetriesPerRequest: null
await prisma.$disconnect(); });
}) redis.ping().then(() => {
.catch(async (e) => { logger.info('Connected to redis');
logger.error(e); }).catch((err: unknown) => {
Sentry.captureException(e); logger.error('Failed to connect to redis');
logger.error(err);
await prisma.$disconnect();
process.exit(1); process.exit(1);
})
.finally(() => {
logger.info("Shutting down...");
}); });
const promClient = new PromClient();
const settings = await getSettings(env.CONFIG_PATH);
const connectionManager = new ConnectionManager(prisma, settings, redis);
const repoManager = new RepoManager(prisma, settings, redis, promClient, context);
const repoPermissionSyncer = new RepoPermissionSyncer(prisma, settings, redis);
const userPermissionSyncer = new UserPermissionSyncer(prisma, settings, redis);
await repoManager.validateIndexedReposHaveShards();
connectionManager.startScheduler();
repoManager.startScheduler();
if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && !hasEntitlement('permission-syncing')) {
logger.error('Permission syncing is not supported in current plan. Please contact team@sourcebot.dev for assistance.');
process.exit(1);
}
else if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && hasEntitlement('permission-syncing')) {
repoPermissionSyncer.startScheduler();
userPermissionSyncer.startScheduler();
}
const cleanup = async (signal: string) => {
logger.info(`Recieved ${signal}, cleaning up...`);
connectionManager.dispose();
repoManager.dispose();
repoPermissionSyncer.dispose();
userPermissionSyncer.dispose();
await prisma.$disconnect();
await redis.quit();
}
process.on('SIGINT', () => cleanup('SIGINT').finally(() => process.exit(0)));
process.on('SIGTERM', () => cleanup('SIGTERM').finally(() => process.exit(0)));
// Register handlers for uncaught exceptions and unhandled rejections
process.on('uncaughtException', (err) => {
logger.error(`Uncaught exception: ${err.message}`);
cleanup('uncaughtException').finally(() => process.exit(1));
});
process.on('unhandledRejection', (reason, promise) => {
logger.error(`Unhandled rejection at: ${promise}, reason: ${reason}`);
cleanup('unhandledRejection').finally(() => process.exit(1));
});

View file

@ -1,49 +0,0 @@
import { PrismaClient } from '@sourcebot/db';
import { createLogger } from "@sourcebot/logger";
import { AppContext } from "./types.js";
import { DEFAULT_SETTINGS } from './constants.js';
import { Redis } from 'ioredis';
import { ConnectionManager } from './connectionManager.js';
import { RepoManager } from './repoManager.js';
import { env } from './env.js';
import { PromClient } from './promClient.js';
import { loadConfig } from '@sourcebot/shared';
const logger = createLogger('backend-main');
const getSettings = async (configPath?: string) => {
if (!configPath) {
return DEFAULT_SETTINGS;
}
const config = await loadConfig(configPath);
return {
...DEFAULT_SETTINGS,
...config.settings,
}
}
export const main = async (db: PrismaClient, context: AppContext) => {
const redis = new Redis(env.REDIS_URL, {
maxRetriesPerRequest: null
});
redis.ping().then(() => {
logger.info('Connected to redis');
}).catch((err: unknown) => {
logger.error('Failed to connect to redis');
logger.error(err);
process.exit(1);
});
const settings = await getSettings(env.CONFIG_PATH);
const promClient = new PromClient();
const connectionManager = new ConnectionManager(db, settings, redis);
connectionManager.registerPollingCallback();
const repoManager = new RepoManager(db, settings, redis, promClient, context);
await repoManager.validateIndexedReposHaveShards();
await repoManager.blockingPollLoop();
}

View file

@ -4,13 +4,15 @@ import { getGitLabReposFromConfig } from "./gitlab.js";
import { getGiteaReposFromConfig } from "./gitea.js"; import { getGiteaReposFromConfig } from "./gitea.js";
import { getGerritReposFromConfig } from "./gerrit.js"; import { getGerritReposFromConfig } from "./gerrit.js";
import { BitbucketRepository, getBitbucketReposFromConfig } from "./bitbucket.js"; import { BitbucketRepository, getBitbucketReposFromConfig } from "./bitbucket.js";
import { getAzureDevOpsReposFromConfig } from "./azuredevops.js";
import { SchemaRestRepository as BitbucketServerRepository } from "@coderabbitai/bitbucket/server/openapi"; import { SchemaRestRepository as BitbucketServerRepository } from "@coderabbitai/bitbucket/server/openapi";
import { SchemaRepository as BitbucketCloudRepository } from "@coderabbitai/bitbucket/cloud/openapi"; import { SchemaRepository as BitbucketCloudRepository } from "@coderabbitai/bitbucket/cloud/openapi";
import { Prisma, PrismaClient } from '@sourcebot/db'; import { Prisma, PrismaClient } from '@sourcebot/db';
import { WithRequired } from "./types.js" import { WithRequired } from "./types.js"
import { marshalBool } from "./utils.js"; import { marshalBool } from "./utils.js";
import { createLogger } from '@sourcebot/logger'; import { createLogger } from '@sourcebot/logger';
import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig } from '@sourcebot/schemas/v3/connection.type'; import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js";
import { RepoMetadata } from './types.js'; import { RepoMetadata } from './types.js';
import path from 'path'; import path from 'path';
import { glob } from 'glob'; import { glob } from 'glob';
@ -48,6 +50,7 @@ export const compileGithubConfig = async (
const repoDisplayName = repo.full_name; const repoDisplayName = repo.full_name;
const repoName = path.join(repoNameRoot, repoDisplayName); const repoName = path.join(repoNameRoot, repoDisplayName);
const cloneUrl = new URL(repo.clone_url!); const cloneUrl = new URL(repo.clone_url!);
const isPublic = repo.private === false;
logger.debug(`Found github repo ${repoDisplayName} with webUrl: ${repo.html_url}`); logger.debug(`Found github repo ${repoDisplayName} with webUrl: ${repo.html_url}`);
@ -62,6 +65,7 @@ export const compileGithubConfig = async (
imageUrl: repo.owner.avatar_url, imageUrl: repo.owner.avatar_url,
isFork: repo.fork, isFork: repo.fork,
isArchived: !!repo.archived, isArchived: !!repo.archived,
isPublic: isPublic,
org: { org: {
connect: { connect: {
id: orgId, id: orgId,
@ -83,7 +87,7 @@ export const compileGithubConfig = async (
'zoekt.github-forks': (repo.forks_count ?? 0).toString(), 'zoekt.github-forks': (repo.forks_count ?? 0).toString(),
'zoekt.archived': marshalBool(repo.archived), 'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork), 'zoekt.fork': marshalBool(repo.fork),
'zoekt.public': marshalBool(repo.private === false), 'zoekt.public': marshalBool(isPublic),
'zoekt.display-name': repoDisplayName, 'zoekt.display-name': repoDisplayName,
}, },
branches: config.revisions?.branches ?? undefined, branches: config.revisions?.branches ?? undefined,
@ -119,6 +123,8 @@ export const compileGitlabConfig = async (
const projectUrl = `${hostUrl}/${project.path_with_namespace}`; const projectUrl = `${hostUrl}/${project.path_with_namespace}`;
const cloneUrl = new URL(project.http_url_to_repo); const cloneUrl = new URL(project.http_url_to_repo);
const isFork = project.forked_from_project !== undefined; const isFork = project.forked_from_project !== undefined;
// @todo: we will need to double check whether 'internal' should also be considered public or not.
const isPublic = project.visibility === 'public';
const repoDisplayName = project.path_with_namespace; const repoDisplayName = project.path_with_namespace;
const repoName = path.join(repoNameRoot, repoDisplayName); const repoName = path.join(repoNameRoot, repoDisplayName);
// project.avatar_url is not directly accessible with tokens; use the avatar API endpoint if available // project.avatar_url is not directly accessible with tokens; use the avatar API endpoint if available
@ -137,6 +143,7 @@ export const compileGitlabConfig = async (
displayName: repoDisplayName, displayName: repoDisplayName,
imageUrl: avatarUrl, imageUrl: avatarUrl,
isFork: isFork, isFork: isFork,
isPublic: isPublic,
isArchived: !!project.archived, isArchived: !!project.archived,
org: { org: {
connect: { connect: {
@ -157,7 +164,7 @@ export const compileGitlabConfig = async (
'zoekt.gitlab-forks': (project.forks_count ?? 0).toString(), 'zoekt.gitlab-forks': (project.forks_count ?? 0).toString(),
'zoekt.archived': marshalBool(project.archived), 'zoekt.archived': marshalBool(project.archived),
'zoekt.fork': marshalBool(isFork), 'zoekt.fork': marshalBool(isFork),
'zoekt.public': marshalBool(project.private === false), 'zoekt.public': marshalBool(isPublic),
'zoekt.display-name': repoDisplayName, 'zoekt.display-name': repoDisplayName,
}, },
branches: config.revisions?.branches ?? undefined, branches: config.revisions?.branches ?? undefined,
@ -195,6 +202,7 @@ export const compileGiteaConfig = async (
cloneUrl.host = configUrl.host cloneUrl.host = configUrl.host
const repoDisplayName = repo.full_name!; const repoDisplayName = repo.full_name!;
const repoName = path.join(repoNameRoot, repoDisplayName); const repoName = path.join(repoNameRoot, repoDisplayName);
const isPublic = repo.internal === false && repo.private === false;
logger.debug(`Found gitea repo ${repoDisplayName} with webUrl: ${repo.html_url}`); logger.debug(`Found gitea repo ${repoDisplayName} with webUrl: ${repo.html_url}`);
@ -208,6 +216,7 @@ export const compileGiteaConfig = async (
displayName: repoDisplayName, displayName: repoDisplayName,
imageUrl: repo.owner?.avatar_url, imageUrl: repo.owner?.avatar_url,
isFork: repo.fork!, isFork: repo.fork!,
isPublic: isPublic,
isArchived: !!repo.archived, isArchived: !!repo.archived,
org: { org: {
connect: { connect: {
@ -226,7 +235,7 @@ export const compileGiteaConfig = async (
'zoekt.name': repoName, 'zoekt.name': repoName,
'zoekt.archived': marshalBool(repo.archived), 'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork!), 'zoekt.fork': marshalBool(repo.fork!),
'zoekt.public': marshalBool(repo.internal === false && repo.private === false), 'zoekt.public': marshalBool(isPublic),
'zoekt.display-name': repoDisplayName, 'zoekt.display-name': repoDisplayName,
}, },
branches: config.revisions?.branches ?? undefined, branches: config.revisions?.branches ?? undefined,
@ -310,6 +319,8 @@ export const compileGerritConfig = async (
'zoekt.public': marshalBool(true), 'zoekt.public': marshalBool(true),
'zoekt.display-name': repoDisplayName, 'zoekt.display-name': repoDisplayName,
}, },
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
} satisfies RepoMetadata, } satisfies RepoMetadata,
}; };
@ -407,6 +418,7 @@ export const compileBitbucketConfig = async (
name: repoName, name: repoName,
displayName: displayName, displayName: displayName,
isFork: isFork, isFork: isFork,
isPublic: isPublic,
isArchived: isArchived, isArchived: isArchived,
org: { org: {
connect: { connect: {
@ -542,6 +554,7 @@ export const compileGenericGitHostConfig_file = async (
} }
} }
export const compileGenericGitHostConfig_url = async ( export const compileGenericGitHostConfig_url = async (
config: GenericGitHostConnectionConfig, config: GenericGitHostConnectionConfig,
orgId: number, orgId: number,
@ -604,3 +617,86 @@ export const compileGenericGitHostConfig_url = async (
notFound, notFound,
} }
} }
export const compileAzureDevOpsConfig = async (
config: AzureDevOpsConnectionConfig,
connectionId: number,
orgId: number,
db: PrismaClient,
abortController: AbortController) => {
const azureDevOpsReposResult = await getAzureDevOpsReposFromConfig(config, orgId, db);
const azureDevOpsRepos = azureDevOpsReposResult.validRepos;
const notFound = azureDevOpsReposResult.notFound;
const hostUrl = config.url ?? 'https://dev.azure.com';
const repoNameRoot = new URL(hostUrl)
.toString()
.replace(/^https?:\/\//, '');
const repos = azureDevOpsRepos.map((repo) => {
if (!repo.project) {
throw new Error(`No project found for repository ${repo.name}`);
}
const repoDisplayName = `${repo.project.name}/${repo.name}`;
const repoName = path.join(repoNameRoot, repoDisplayName);
const isPublic = repo.project.visibility === ProjectVisibility.Public;
if (!repo.remoteUrl) {
throw new Error(`No remoteUrl found for repository ${repoDisplayName}`);
}
if (!repo.id) {
throw new Error(`No id found for repository ${repoDisplayName}`);
}
// Construct web URL for the repository
const webUrl = repo.webUrl || `${hostUrl}/${repo.project.name}/_git/${repo.name}`;
logger.debug(`Found Azure DevOps repo ${repoDisplayName} with webUrl: ${webUrl}`);
const record: RepoData = {
external_id: repo.id.toString(),
external_codeHostType: 'azuredevops',
external_codeHostUrl: hostUrl,
cloneUrl: webUrl,
webUrl: webUrl,
name: repoName,
displayName: repoDisplayName,
imageUrl: null,
isFork: !!repo.isFork,
isArchived: false,
isPublic: isPublic,
org: {
connect: {
id: orgId,
},
},
connections: {
create: {
connectionId: connectionId,
}
},
metadata: {
gitConfig: {
'zoekt.web-url-type': 'azuredevops',
'zoekt.web-url': webUrl,
'zoekt.name': repoName,
'zoekt.archived': marshalBool(false),
'zoekt.fork': marshalBool(!!repo.isFork),
'zoekt.public': marshalBool(isPublic),
'zoekt.display-name': repoDisplayName,
},
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
} satisfies RepoMetadata,
};
return record;
})
return {
repoData: repos,
notFound,
};
}

View file

@ -1,27 +1,19 @@
import { Job, Queue, Worker } from 'bullmq';
import { Redis } from 'ioredis';
import { createLogger } from "@sourcebot/logger";
import { Connection, PrismaClient, Repo, RepoToConnection, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db";
import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig, BitbucketConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { AppContext, Settings, repoMetadataSchema } from "./types.js";
import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./utils.js";
import { cloneRepository, fetchRepository, upsertGitConfig } from "./git.js";
import { existsSync, readdirSync, promises } from 'fs';
import { indexGitRepository } from "./zoekt.js";
import { PromClient } from './promClient.js';
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { PrismaClient, Repo, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { Job, Queue, Worker } from 'bullmq';
import { existsSync, promises, readdirSync } from 'fs';
import { Redis } from 'ioredis';
import { env } from './env.js'; import { env } from './env.js';
import { cloneRepository, fetchRepository, unsetGitConfig, upsertGitConfig } from "./git.js";
interface IRepoManager { import { PromClient } from './promClient.js';
validateIndexedReposHaveShards: () => Promise<void>; import { AppContext, RepoWithConnections, Settings, repoMetadataSchema } from "./types.js";
blockingPollLoop: () => void; import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, measure } from "./utils.js";
dispose: () => void; import { indexGitRepository } from "./zoekt.js";
}
const REPO_INDEXING_QUEUE = 'repoIndexingQueue'; const REPO_INDEXING_QUEUE = 'repoIndexingQueue';
const REPO_GC_QUEUE = 'repoGarbageCollectionQueue'; const REPO_GC_QUEUE = 'repoGarbageCollectionQueue';
type RepoWithConnections = Repo & { connections: (RepoToConnection & { connection: Connection })[] };
type RepoIndexingPayload = { type RepoIndexingPayload = {
repo: RepoWithConnections, repo: RepoWithConnections,
} }
@ -32,11 +24,12 @@ type RepoGarbageCollectionPayload = {
const logger = createLogger('repo-manager'); const logger = createLogger('repo-manager');
export class RepoManager implements IRepoManager { export class RepoManager {
private indexWorker: Worker; private indexWorker: Worker;
private indexQueue: Queue<RepoIndexingPayload>; private indexQueue: Queue<RepoIndexingPayload>;
private gcWorker: Worker; private gcWorker: Worker;
private gcQueue: Queue<RepoGarbageCollectionPayload>; private gcQueue: Queue<RepoGarbageCollectionPayload>;
private interval?: NodeJS.Timeout;
constructor( constructor(
private db: PrismaClient, private db: PrismaClient,
@ -68,14 +61,13 @@ export class RepoManager implements IRepoManager {
this.gcWorker.on('failed', this.onGarbageCollectionJobFailed.bind(this)); this.gcWorker.on('failed', this.onGarbageCollectionJobFailed.bind(this));
} }
public async blockingPollLoop() { public startScheduler() {
while (true) { logger.debug('Starting scheduler');
this.interval = setInterval(async () => {
await this.fetchAndScheduleRepoIndexing(); await this.fetchAndScheduleRepoIndexing();
await this.fetchAndScheduleRepoGarbageCollection(); await this.fetchAndScheduleRepoGarbageCollection();
await this.fetchAndScheduleRepoTimeouts(); await this.fetchAndScheduleRepoTimeouts();
}, this.settings.reindexRepoPollingIntervalMs);
await new Promise(resolve => setTimeout(resolve, this.settings.reindexRepoPollingIntervalMs));
}
} }
/////////////////////////// ///////////////////////////
@ -169,62 +161,6 @@ export class RepoManager implements IRepoManager {
} }
} }
// TODO: do this better? ex: try using the tokens from all the connections
// We can no longer use repo.cloneUrl directly since it doesn't contain the token for security reasons. As a result, we need to
// fetch the token here using the connections from the repo. Multiple connections could be referencing this repo, and each
// may have their own token. This method will just pick the first connection that has a token (if one exists) and uses that. This
// may technically cause syncing to fail if that connection's token just so happens to not have access to the repo it's referencing.
private async getCloneCredentialsForRepo(repo: RepoWithConnections, db: PrismaClient): Promise<{ username?: string, password: string } | undefined> {
for (const { connection } of repo.connections) {
if (connection.connectionType === 'github') {
const config = connection.config as unknown as GithubConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
return {
password: token,
}
}
}
else if (connection.connectionType === 'gitlab') {
const config = connection.config as unknown as GitlabConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
return {
username: 'oauth2',
password: token,
}
}
}
else if (connection.connectionType === 'gitea') {
const config = connection.config as unknown as GiteaConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
return {
password: token,
}
}
}
else if (connection.connectionType === 'bitbucket') {
const config = connection.config as unknown as BitbucketConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
const username = config.user ?? 'x-token-auth';
return {
username,
password: token,
}
}
}
}
return undefined;
}
private async syncGitRepository(repo: RepoWithConnections, repoAlreadyInIndexingState: boolean) { private async syncGitRepository(repo: RepoWithConnections, repoAlreadyInIndexingState: boolean) {
const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx); const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
@ -237,11 +173,27 @@ export class RepoManager implements IRepoManager {
await promises.rm(repoPath, { recursive: true, force: true }); await promises.rm(repoPath, { recursive: true, force: true });
} }
if (existsSync(repoPath) && !isReadOnly) { const credentials = await getAuthCredentialsForRepo(repo, this.db);
logger.info(`Fetching ${repo.displayName}...`); const cloneUrlMaybeWithToken = credentials?.cloneUrlWithToken ?? repo.cloneUrl;
const authHeader = credentials?.authHeader ?? undefined;
const { durationMs } = await measure(() => fetchRepository(repoPath, ({ method, stage, progress }) => { if (existsSync(repoPath) && !isReadOnly) {
// @NOTE: in #483, we changed the cloning method s.t., we _no longer_
// write the clone URL (which could contain a auth token) to the
// `remote.origin.url` entry. For the upgrade scenario, we want
// to unset this key since it is no longer needed, hence this line.
// This will no-op if the key is already unset.
// @see: https://github.com/sourcebot-dev/sourcebot/pull/483
await unsetGitConfig(repoPath, ["remote.origin.url"]);
logger.info(`Fetching ${repo.displayName}...`);
const { durationMs } = await measure(() => fetchRepository({
cloneUrl: cloneUrlMaybeWithToken,
authHeader,
path: repoPath,
onProgress: ({ method, stage, progress }) => {
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
}
})); }));
const fetchDuration_s = durationMs / 1000; const fetchDuration_s = durationMs / 1000;
@ -251,24 +203,13 @@ export class RepoManager implements IRepoManager {
} else if (!isReadOnly) { } else if (!isReadOnly) {
logger.info(`Cloning ${repo.displayName}...`); logger.info(`Cloning ${repo.displayName}...`);
const auth = await this.getCloneCredentialsForRepo(repo, this.db); const { durationMs } = await measure(() => cloneRepository({
const cloneUrl = new URL(repo.cloneUrl); cloneUrl: cloneUrlMaybeWithToken,
if (auth) { authHeader,
// @note: URL has a weird behavior where if you set the password but path: repoPath,
// _not_ the username, the ":" delimiter will still be present in the onProgress: ({ method, stage, progress }) => {
// URL (e.g., https://:password@example.com). To get around this, if
// we only have a password, we set the username to the password.
// @see: https://www.typescriptlang.org/play/?#code/MYewdgzgLgBArgJwDYwLwzAUwO4wKoBKAMgBQBEAFlFAA4QBcA9I5gB4CGAtjUpgHShOZADQBKANwAoREj412ECNhAIAJmhhl5i5WrJTQkELz5IQAcxIy+UEAGUoCAJZhLo0UA
if (!auth.username) {
cloneUrl.username = auth.password;
} else {
cloneUrl.username = auth.username;
cloneUrl.password = auth.password;
}
}
const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, ({ method, stage, progress }) => {
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
}
})); }));
const cloneDuration_s = durationMs / 1000; const cloneDuration_s = durationMs / 1000;
@ -552,8 +493,8 @@ export class RepoManager implements IRepoManager {
return; return;
} }
const files = readdirSync(this.ctx.indexPath);
const reposToReindex: number[] = []; const reposToReindex: number[] = [];
for (const repo of indexedRepos) { for (const repo of indexedRepos) {
const shardPrefix = getShardPrefix(repo.orgId, repo.id); const shardPrefix = getShardPrefix(repo.orgId, repo.id);
@ -561,7 +502,6 @@ export class RepoManager implements IRepoManager {
// would need to know how many total shards are expected for this repo // would need to know how many total shards are expected for this repo
let hasShards = false; let hasShards = false;
try { try {
const files = readdirSync(this.ctx.indexPath);
hasShards = files.some(file => file.startsWith(shardPrefix)); hasShards = files.some(file => file.startsWith(shardPrefix));
} catch (error) { } catch (error) {
logger.error(`Failed to read index directory ${this.ctx.indexPath}: ${error}`); logger.error(`Failed to read index directory ${this.ctx.indexPath}: ${error}`);
@ -615,6 +555,9 @@ export class RepoManager implements IRepoManager {
} }
public async dispose() { public async dispose() {
if (this.interval) {
clearInterval(this.interval);
}
this.indexWorker.close(); this.indexWorker.close();
this.indexQueue.close(); this.indexQueue.close();
this.gcQueue.close(); this.gcQueue.close();

View file

@ -1,3 +1,4 @@
import { Connection, Repo, RepoToConnection } from "@sourcebot/db";
import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type"; import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type";
import { z } from "zod"; import { z } from "zod";
@ -51,3 +52,13 @@ export type DeepPartial<T> = T extends object ? {
// @see: https://stackoverflow.com/a/69328045 // @see: https://stackoverflow.com/a/69328045
export type WithRequired<T, K extends keyof T> = T & { [P in K]-?: T[P] }; export type WithRequired<T, K extends keyof T> = T & { [P in K]-?: T[P] };
export type RepoWithConnections = Repo & { connections: (RepoToConnection & { connection: Connection })[] };
export type RepoAuthCredentials = {
hostUrl?: string;
token: string;
cloneUrlWithToken?: string;
authHeader?: string;
}

View file

@ -1,10 +1,11 @@
import { Logger } from "winston"; import { Logger } from "winston";
import { AppContext } from "./types.js"; import { AppContext, RepoAuthCredentials, RepoWithConnections } from "./types.js";
import path from 'path'; import path from 'path';
import { PrismaClient, Repo } from "@sourcebot/db"; import { PrismaClient, Repo } from "@sourcebot/db";
import { getTokenFromConfig as getTokenFromConfigBase } from "@sourcebot/crypto"; import { getTokenFromConfig as getTokenFromConfigBase } from "@sourcebot/crypto";
import { BackendException, BackendError } from "@sourcebot/error"; import { BackendException, BackendError } from "@sourcebot/error";
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig, BitbucketConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
export const measure = async <T>(cb: () => Promise<T>) => { export const measure = async <T>(cb: () => Promise<T>) => {
const start = Date.now(); const start = Date.now();
@ -117,3 +118,126 @@ export const fetchWithRetry = async <T>(
} }
} }
} }
// TODO: do this better? ex: try using the tokens from all the connections
// We can no longer use repo.cloneUrl directly since it doesn't contain the token for security reasons. As a result, we need to
// fetch the token here using the connections from the repo. Multiple connections could be referencing this repo, and each
// may have their own token. This method will just pick the first connection that has a token (if one exists) and uses that. This
// may technically cause syncing to fail if that connection's token just so happens to not have access to the repo it's referencing.
export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: PrismaClient, logger?: Logger): Promise<RepoAuthCredentials | undefined> => {
for (const { connection } of repo.connections) {
if (connection.connectionType === 'github') {
const config = connection.config as unknown as GithubConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
return {
hostUrl: config.url,
token,
cloneUrlWithToken: createGitCloneUrlWithToken(
repo.cloneUrl,
{
password: token,
}
),
}
}
} else if (connection.connectionType === 'gitlab') {
const config = connection.config as unknown as GitlabConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
return {
hostUrl: config.url,
token,
cloneUrlWithToken: createGitCloneUrlWithToken(
repo.cloneUrl,
{
username: 'oauth2',
password: token
}
),
}
}
} else if (connection.connectionType === 'gitea') {
const config = connection.config as unknown as GiteaConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
return {
hostUrl: config.url,
token,
cloneUrlWithToken: createGitCloneUrlWithToken(
repo.cloneUrl,
{
password: token
}
),
}
}
} else if (connection.connectionType === 'bitbucket') {
const config = connection.config as unknown as BitbucketConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
const username = config.user ?? 'x-token-auth';
return {
hostUrl: config.url,
token,
cloneUrlWithToken: createGitCloneUrlWithToken(
repo.cloneUrl,
{
username,
password: token
}
),
}
}
} else if (connection.connectionType === 'azuredevops') {
const config = connection.config as unknown as AzureDevOpsConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
// For ADO server, multiple auth schemes may be supported. If the ADO deployment supports NTLM, the git clone will default
// to this over basic auth. As a result, we cannot embed the token in the clone URL and must force basic auth by passing in the token
// appropriately in the header. To do this, we set the authHeader field here
if (config.deploymentType === 'server') {
return {
hostUrl: config.url,
token,
authHeader: "Authorization: Basic " + Buffer.from(`:${token}`).toString('base64')
}
} else {
return {
hostUrl: config.url,
token,
cloneUrlWithToken: createGitCloneUrlWithToken(
repo.cloneUrl,
{
// @note: If we don't provide a username, the password will be set as the username. This seems to work
// for ADO cloud but not for ADO server. To fix this, we set a placeholder username to ensure the password
// is set correctly
username: 'user',
password: token
}
),
}
}
}
}
}
return undefined;
}
const createGitCloneUrlWithToken = (cloneUrl: string, credentials: { username?: string, password: string }) => {
const url = new URL(cloneUrl);
// @note: URL has a weird behavior where if you set the password but
// _not_ the username, the ":" delimiter will still be present in the
// URL (e.g., https://:password@example.com). To get around this, if
// we only have a password, we set the username to the password.
// @see: https://www.typescriptlang.org/play/?#code/MYewdgzgLgBArgJwDYwLwzAUwO4wKoBKAMgBQBEAFlFAA4QBcA9I5gB4CGAtjUpgHShOZADQBKANwAoREj412ECNhAIAJmhhl5i5WrJTQkELz5IQAcxIy+UEAGUoCAJZhLo0UA
if (!credentials.username) {
url.username = credentials.password;
} else {
url.username = credentials.username;
url.password = credentials.password;
}
return url.toString();
}

View file

@ -63,7 +63,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
`-index ${ctx.indexPath}`, `-index ${ctx.indexPath}`,
`-max_trigram_count ${settings.maxTrigramCount}`, `-max_trigram_count ${settings.maxTrigramCount}`,
`-file_limit ${settings.maxFileSize}`, `-file_limit ${settings.maxFileSize}`,
`-branches ${revisions.join(',')}`, `-branches "${revisions.join(',')}"`,
`-tenant_id ${repo.orgId}`, `-tenant_id ${repo.orgId}`,
`-repo_id ${repo.id}`, `-repo_id ${repo.id}`,
`-shard_prefix ${shardPrefix}`, `-shard_prefix ${shardPrefix}`,
@ -76,6 +76,20 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
reject(error); reject(error);
return; return;
} }
if (stdout) {
stdout.split('\n').filter(line => line.trim()).forEach(line => {
logger.info(line);
});
}
if (stderr) {
stderr.split('\n').filter(line => line.trim()).forEach(line => {
// TODO: logging as regular info here and not error because non error logs are being
// streamed in stderr and incorrectly being logged as errors at a high level
logger.info(line);
});
}
resolve({ resolve({
stdout, stdout,
stderr stderr

View file

@ -0,0 +1,23 @@
-- CreateEnum
CREATE TYPE "ChatVisibility" AS ENUM ('PRIVATE', 'PUBLIC');
-- CreateTable
CREATE TABLE "Chat" (
"id" TEXT NOT NULL,
"name" TEXT,
"createdById" TEXT NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
"orgId" INTEGER NOT NULL,
"visibility" "ChatVisibility" NOT NULL DEFAULT 'PRIVATE',
"isReadonly" BOOLEAN NOT NULL DEFAULT false,
"messages" JSONB NOT NULL,
CONSTRAINT "Chat_pkey" PRIMARY KEY ("id")
);
-- AddForeignKey
ALTER TABLE "Chat" ADD CONSTRAINT "Chat_createdById_fkey" FOREIGN KEY ("createdById") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "Chat" ADD CONSTRAINT "Chat_orgId_fkey" FOREIGN KEY ("orgId") REFERENCES "Org"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View file

@ -0,0 +1,5 @@
-- CreateIndex
CREATE INDEX "Repo_orgId_idx" ON "Repo"("orgId");
-- CreateIndex
CREATE INDEX "RepoToConnection_repoId_connectionId_idx" ON "RepoToConnection"("repoId", "connectionId");

View file

@ -0,0 +1,59 @@
-- CreateEnum
CREATE TYPE "RepoPermissionSyncJobStatus" AS ENUM ('PENDING', 'IN_PROGRESS', 'COMPLETED', 'FAILED');
-- CreateEnum
CREATE TYPE "UserPermissionSyncJobStatus" AS ENUM ('PENDING', 'IN_PROGRESS', 'COMPLETED', 'FAILED');
-- AlterTable
ALTER TABLE "Repo" ADD COLUMN "isPublic" BOOLEAN NOT NULL DEFAULT false,
ADD COLUMN "permissionSyncedAt" TIMESTAMP(3);
-- AlterTable
ALTER TABLE "User" ADD COLUMN "permissionSyncedAt" TIMESTAMP(3);
-- CreateTable
CREATE TABLE "RepoPermissionSyncJob" (
"id" TEXT NOT NULL,
"status" "RepoPermissionSyncJobStatus" NOT NULL DEFAULT 'PENDING',
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
"completedAt" TIMESTAMP(3),
"errorMessage" TEXT,
"repoId" INTEGER NOT NULL,
CONSTRAINT "RepoPermissionSyncJob_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "UserPermissionSyncJob" (
"id" TEXT NOT NULL,
"status" "UserPermissionSyncJobStatus" NOT NULL DEFAULT 'PENDING',
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
"completedAt" TIMESTAMP(3),
"errorMessage" TEXT,
"userId" TEXT NOT NULL,
CONSTRAINT "UserPermissionSyncJob_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "UserToRepoPermission" (
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"repoId" INTEGER NOT NULL,
"userId" TEXT NOT NULL,
CONSTRAINT "UserToRepoPermission_pkey" PRIMARY KEY ("repoId","userId")
);
-- AddForeignKey
ALTER TABLE "RepoPermissionSyncJob" ADD CONSTRAINT "RepoPermissionSyncJob_repoId_fkey" FOREIGN KEY ("repoId") REFERENCES "Repo"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "UserPermissionSyncJob" ADD CONSTRAINT "UserPermissionSyncJob_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "UserToRepoPermission" ADD CONSTRAINT "UserToRepoPermission_repoId_fkey" FOREIGN KEY ("repoId") REFERENCES "Repo"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "UserToRepoPermission" ADD CONSTRAINT "UserToRepoPermission_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View file

@ -35,29 +35,35 @@ enum StripeSubscriptionStatus {
INACTIVE INACTIVE
} }
enum ChatVisibility {
PRIVATE
PUBLIC
}
model Repo { model Repo {
id Int @id @default(autoincrement()) id Int @id @default(autoincrement())
name String name String /// Full repo name, including the vcs hostname (ex. github.com/sourcebot-dev/sourcebot)
displayName String? displayName String? /// Display name of the repo for UI (ex. sourcebot-dev/sourcebot)
createdAt DateTime @default(now()) createdAt DateTime @default(now())
updatedAt DateTime @updatedAt updatedAt DateTime @updatedAt
/// When the repo was last indexed successfully. indexedAt DateTime? /// When the repo was last indexed successfully.
indexedAt DateTime?
isFork Boolean isFork Boolean
isArchived Boolean isArchived Boolean
metadata Json // For schema see repoMetadataSchema in packages/backend/src/types.ts isPublic Boolean @default(false)
metadata Json /// For schema see repoMetadataSchema in packages/backend/src/types.ts
cloneUrl String cloneUrl String
webUrl String? webUrl String?
connections RepoToConnection[] connections RepoToConnection[]
imageUrl String? imageUrl String?
repoIndexingStatus RepoIndexingStatus @default(NEW) repoIndexingStatus RepoIndexingStatus @default(NEW)
// The id of the repo in the external service permittedUsers UserToRepoPermission[]
external_id String permissionSyncJobs RepoPermissionSyncJob[]
// The type of the external service (e.g., github, gitlab, etc.) permissionSyncedAt DateTime? /// When the permissions were last synced successfully.
external_codeHostType String
// The base url of the external service (e.g., https://github.com) external_id String /// The id of the repo in the external service
external_codeHostUrl String external_codeHostType String /// The type of the external service (e.g., github, gitlab, etc.)
external_codeHostUrl String /// The base url of the external service (e.g., https://github.com)
org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) org Org @relation(fields: [orgId], references: [id], onDelete: Cascade)
orgId Int orgId Int
@ -65,6 +71,27 @@ model Repo {
searchContexts SearchContext[] searchContexts SearchContext[]
@@unique([external_id, external_codeHostUrl, orgId]) @@unique([external_id, external_codeHostUrl, orgId])
@@index([orgId])
}
enum RepoPermissionSyncJobStatus {
PENDING
IN_PROGRESS
COMPLETED
FAILED
}
model RepoPermissionSyncJob {
id String @id @default(cuid())
status RepoPermissionSyncJobStatus @default(PENDING)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
completedAt DateTime?
errorMessage String?
repo Repo @relation(fields: [repoId], references: [id], onDelete: Cascade)
repoId Int
} }
model SearchContext { model SearchContext {
@ -113,6 +140,7 @@ model RepoToConnection {
repoId Int repoId Int
@@id([connectionId, repoId]) @@id([connectionId, repoId])
@@index([repoId, connectionId])
} }
model Invite { model Invite {
@ -183,6 +211,8 @@ model Org {
accountRequests AccountRequest[] accountRequests AccountRequest[]
searchContexts SearchContext[] searchContexts SearchContext[]
chats Chat[]
} }
enum OrgRole { enum OrgRole {
@ -232,7 +262,6 @@ model ApiKey {
createdBy User @relation(fields: [createdById], references: [id], onDelete: Cascade) createdBy User @relation(fields: [createdById], references: [id], onDelete: Cascade)
createdById String createdById String
} }
model Audit { model Audit {
@ -251,10 +280,8 @@ model Audit {
orgId Int orgId Int
@@index([actorId, actorType, targetId, targetType, orgId]) @@index([actorId, actorType, targetId, targetType, orgId])
// Fast path for analytics queries orgId is first because we assume most deployments are single tenant // Fast path for analytics queries orgId is first because we assume most deployments are single tenant
@@index([orgId, timestamp, action, actorId], map: "idx_audit_core_actions_full") @@index([orgId, timestamp, action, actorId], map: "idx_audit_core_actions_full")
// Fast path for analytics queries for a specific user // Fast path for analytics queries for a specific user
@@index([actorId, timestamp], map: "idx_audit_actor_time_full") @@index([actorId, timestamp], map: "idx_audit_actor_time_full")
} }
@ -270,14 +297,52 @@ model User {
accounts Account[] accounts Account[]
orgs UserToOrg[] orgs UserToOrg[]
accountRequest AccountRequest? accountRequest AccountRequest?
accessibleRepos UserToRepoPermission[]
/// List of pending invites that the user has created /// List of pending invites that the user has created
invites Invite[] invites Invite[]
apiKeys ApiKey[] apiKeys ApiKey[]
chats Chat[]
createdAt DateTime @default(now()) createdAt DateTime @default(now())
updatedAt DateTime @updatedAt updatedAt DateTime @updatedAt
permissionSyncJobs UserPermissionSyncJob[]
permissionSyncedAt DateTime?
}
enum UserPermissionSyncJobStatus {
PENDING
IN_PROGRESS
COMPLETED
FAILED
}
model UserPermissionSyncJob {
id String @id @default(cuid())
status UserPermissionSyncJobStatus @default(PENDING)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
completedAt DateTime?
errorMessage String?
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
userId String
}
model UserToRepoPermission {
createdAt DateTime @default(now())
repo Repo @relation(fields: [repoId], references: [id], onDelete: Cascade)
repoId Int
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
userId String
@@id([repoId, userId])
} }
// @see : https://authjs.dev/concepts/database-models#account // @see : https://authjs.dev/concepts/database-models#account
@ -311,3 +376,23 @@ model VerificationToken {
@@unique([identifier, token]) @@unique([identifier, token])
} }
model Chat {
id String @id @default(cuid())
name String?
createdBy User @relation(fields: [createdById], references: [id], onDelete: Cascade)
createdById String
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
org Org @relation(fields: [orgId], references: [id], onDelete: Cascade)
orgId Int
visibility ChatVisibility @default(PRIVATE)
isReadonly Boolean @default(false)
messages Json // This is a JSON array of `Message` types from @ai-sdk/ui-utils.
}

View file

@ -4,6 +4,8 @@ import { migrateDuplicateConnections } from "./scripts/migrate-duplicate-connect
import { injectAuditData } from "./scripts/inject-audit-data"; import { injectAuditData } from "./scripts/inject-audit-data";
import { confirmAction } from "./utils"; import { confirmAction } from "./utils";
import { createLogger } from "@sourcebot/logger"; import { createLogger } from "@sourcebot/logger";
import { injectRepoData } from "./scripts/inject-repo-data";
import { testRepoQueryPerf } from "./scripts/test-repo-query-perf";
export interface Script { export interface Script {
run: (prisma: PrismaClient) => Promise<void>; run: (prisma: PrismaClient) => Promise<void>;
@ -12,6 +14,8 @@ export interface Script {
export const scripts: Record<string, Script> = { export const scripts: Record<string, Script> = {
"migrate-duplicate-connections": migrateDuplicateConnections, "migrate-duplicate-connections": migrateDuplicateConnections,
"inject-audit-data": injectAuditData, "inject-audit-data": injectAuditData,
"inject-repo-data": injectRepoData,
"test-repo-query-perf": testRepoQueryPerf,
} }
const parser = new ArgumentParser(); const parser = new ArgumentParser();

View file

@ -0,0 +1,64 @@
import { Script } from "../scriptRunner";
import { PrismaClient } from "../../dist";
import { createLogger } from "@sourcebot/logger";
const logger = createLogger('inject-repo-data');
const NUM_REPOS = 100000;
export const injectRepoData: Script = {
run: async (prisma: PrismaClient) => {
const orgId = 1;
// Check if org exists
const org = await prisma.org.findUnique({
where: { id: orgId }
});
if (!org) {
await prisma.org.create({
data: {
id: orgId,
name: 'Test Org',
domain: 'test-org.com'
}
});
}
const connection = await prisma.connection.create({
data: {
orgId,
name: 'test-connection',
connectionType: 'github',
config: {}
}
});
logger.info(`Creating ${NUM_REPOS} repos...`);
for (let i = 0; i < NUM_REPOS; i++) {
await prisma.repo.create({
data: {
name: `test-repo-${i}`,
isFork: false,
isArchived: false,
metadata: {},
cloneUrl: `https://github.com/test-org/test-repo-${i}`,
webUrl: `https://github.com/test-org/test-repo-${i}`,
orgId,
external_id: `test-repo-${i}`,
external_codeHostType: 'github',
external_codeHostUrl: 'https://github.com',
connections: {
create: {
connectionId: connection.id,
}
}
}
});
}
logger.info(`Created ${NUM_REPOS} repos.`);
}
};

View file

@ -0,0 +1,28 @@
import { Script } from "../scriptRunner";
import { PrismaClient } from "../../dist";
import { createLogger } from "@sourcebot/logger";
const logger = createLogger('test-repo-query-perf');
export const testRepoQueryPerf: Script = {
run: async (prisma: PrismaClient) => {
const start = Date.now();
const allRepos = await prisma.repo.findMany({
where: {
orgId: 1,
},
include: {
connections: {
include: {
connection: true,
}
}
}
});
const durationMs = Date.now() - start;
logger.info(`Found ${allRepos.length} repos in ${durationMs}ms`);
}
};

View file

@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
## [1.0.6] - 2025-09-26
- Fix `linkedConnections is required` schema error.
## [1.0.5] - 2025-09-15
### Changed
- Updated API client to match the latest Sourcebot release. [#356](https://github.com/sourcebot-dev/sourcebot/pull/356)
## [1.0.4] - 2025-08-04
### Fixed
- Fixed issue where console logs were resulting in "unexpected token" errors on the MCP client. [#429](https://github.com/sourcebot-dev/sourcebot/pull/429)
## [1.0.3] - 2025-06-18 ## [1.0.3] - 2025-06-18
### Changed ### Changed

View file

@ -207,18 +207,10 @@ Sourcebot supports the following code hosts:
- [Gitea](https://docs.sourcebot.dev/docs/connections/gitea) - [Gitea](https://docs.sourcebot.dev/docs/connections/gitea)
- [Gerrit](https://docs.sourcebot.dev/docs/connections/gerrit) - [Gerrit](https://docs.sourcebot.dev/docs/connections/gerrit)
| Don't see your code host? Open a [GitHub discussion](https://github.com/sourcebot-dev/sourcebot/discussions/categories/ideas). | Don't see your code host? Open a [feature request](https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md).
## Future Work ## Future Work
### Semantic Search ### Semantic Search
Currently, Sourcebot only supports regex-based code search (powered by [zoekt](https://github.com/sourcegraph/zoekt) under the hood). It is great for scenarios when the agent is searching for is something that is super precise and well-represented in the source code (e.g., a specific function name, a error string, etc.). It is not-so-great for _fuzzy_ searches where the objective is to find some loosely defined _category_ or _concept_ in the code (e.g., find code that verifies JWT tokens). The LLM can approximate this by crafting regex searches that attempt to capture a concept (e.g., it might try a query like `"jwt|token|(verify|validate).*(jwt|token)"`), but often yields sub-optimal search results that aren't related. Tools like Cursor solve this with [embedding models](https://docs.cursor.com/context/codebase-indexing) to capture the semantic meaning of code, allowing for LLMs to search using natural language. We would like to extend Sourcebot to support semantic search and expose this capability over MCP as a tool (e.g., `semantic_search_code` tool). [GitHub Discussion](https://github.com/sourcebot-dev/sourcebot/discussions/297) Currently, Sourcebot only supports regex-based code search (powered by [zoekt](https://github.com/sourcegraph/zoekt) under the hood). It is great for scenarios when the agent is searching for is something that is super precise and well-represented in the source code (e.g., a specific function name, a error string, etc.). It is not-so-great for _fuzzy_ searches where the objective is to find some loosely defined _category_ or _concept_ in the code (e.g., find code that verifies JWT tokens). The LLM can approximate this by crafting regex searches that attempt to capture a concept (e.g., it might try a query like `"jwt|token|(verify|validate).*(jwt|token)"`), but often yields sub-optimal search results that aren't related. Tools like Cursor solve this with [embedding models](https://docs.cursor.com/context/codebase-indexing) to capture the semantic meaning of code, allowing for LLMs to search using natural language. We would like to extend Sourcebot to support semantic search and expose this capability over MCP as a tool (e.g., `semantic_search_code` tool). [GitHub Discussion](https://github.com/sourcebot-dev/sourcebot/discussions/297)
### Code Navigation
Another idea is to allow LLMs to traverse abstract syntax trees (ASTs) of a codebase to enable reliable code navigation. This could be packaged as tools like `goto_definition`, `find_all_references`, etc., which could be useful for LLMs to get additional code context. [GitHub Discussion](https://github.com/sourcebot-dev/sourcebot/discussions/296)
### Got an idea?
Open up a [GitHub discussion](https://github.com/sourcebot-dev/sourcebot/discussions/categories/feature-requests)!

View file

@ -1,6 +1,6 @@
{ {
"name": "@sourcebot/mcp", "name": "@sourcebot/mcp",
"version": "1.0.3", "version": "1.0.6",
"type": "module", "type": "module",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",

View file

@ -4,7 +4,6 @@ import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, Search
import { isServiceError } from './utils.js'; import { isServiceError } from './utils.js';
export const search = async (request: SearchRequest): Promise<SearchResponse | ServiceError> => { export const search = async (request: SearchRequest): Promise<SearchResponse | ServiceError> => {
console.debug(`Executing search request: ${JSON.stringify(request, null, 2)}`);
const result = await fetch(`${env.SOURCEBOT_HOST}/api/search`, { const result = await fetch(`${env.SOURCEBOT_HOST}/api/search`, {
method: 'POST', method: 'POST',
headers: { headers: {

View file

@ -75,8 +75,6 @@ server.tool(
query += ` case:no`; query += ` case:no`;
} }
console.debug(`Executing search request: ${query}`);
const response = await search({ const response = await search({
query, query,
matches: env.DEFAULT_MATCHES, matches: env.DEFAULT_MATCHES,
@ -163,10 +161,10 @@ server.tool(
}; };
} }
const content: TextContent[] = response.repos.map(repo => { const content: TextContent[] = response.map(repo => {
return { return {
type: "text", type: "text",
text: `id: ${repo.name}\nurl: ${repo.webUrl}`, text: `id: ${repo.repoName}\nurl: ${repo.webUrl}`,
} }
}); });
@ -214,7 +212,6 @@ server.tool(
const runServer = async () => { const runServer = async () => {
const transport = new StdioServerTransport(); const transport = new StdioServerTransport();
await server.connect(transport); await server.connect(transport);
console.info('Sourcebot MCP server ready');
} }
runServer().catch((error) => { runServer().catch((error) => {

View file

@ -92,16 +92,30 @@ export const searchResponseSchema = z.object({
isBranchFilteringEnabled: z.boolean(), isBranchFilteringEnabled: z.boolean(),
}); });
export const repositorySchema = z.object({ enum RepoIndexingStatus {
name: z.string(), NEW = 'NEW',
branches: z.array(z.string()), IN_INDEX_QUEUE = 'IN_INDEX_QUEUE',
INDEXING = 'INDEXING',
INDEXED = 'INDEXED',
FAILED = 'FAILED',
IN_GC_QUEUE = 'IN_GC_QUEUE',
GARBAGE_COLLECTING = 'GARBAGE_COLLECTING',
GARBAGE_COLLECTION_FAILED = 'GARBAGE_COLLECTION_FAILED'
}
export const repositoryQuerySchema = z.object({
codeHostType: z.string(),
repoId: z.number(),
repoName: z.string(),
repoDisplayName: z.string().optional(),
repoCloneUrl: z.string(),
webUrl: z.string().optional(), webUrl: z.string().optional(),
rawConfig: z.record(z.string(), z.string()).optional(), imageUrl: z.string().optional(),
indexedAt: z.coerce.date().optional(),
repoIndexingStatus: z.nativeEnum(RepoIndexingStatus),
}); });
export const listRepositoriesResponseSchema = z.object({ export const listRepositoriesResponseSchema = repositoryQuerySchema.array();
repos: z.array(repositorySchema),
});
export const fileSourceRequestSchema = z.object({ export const fileSourceRequestSchema = z.object({
fileName: z.string(), fileName: z.string(),

View file

@ -22,7 +22,6 @@ export type SearchResultChunk = SearchResultFile["chunks"][number];
export type SearchSymbol = z.infer<typeof symbolSchema>; export type SearchSymbol = z.infer<typeof symbolSchema>;
export type ListRepositoriesResponse = z.infer<typeof listRepositoriesResponseSchema>; export type ListRepositoriesResponse = z.infer<typeof listRepositoriesResponseSchema>;
export type Repository = ListRepositoriesResponse["repos"][number];
export type FileSourceRequest = z.infer<typeof fileSourceRequestSchema>; export type FileSourceRequest = z.infer<typeof fileSourceRequestSchema>;
export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>; export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;

View file

@ -0,0 +1,205 @@
// THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY!
const schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "AzureDevOpsConnectionConfig",
"properties": {
"type": {
"const": "azuredevops",
"description": "Azure DevOps Configuration"
},
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
"type": "string",
"format": "url",
"default": "https://dev.azure.com",
"description": "The URL of the Azure DevOps host. For Azure DevOps Cloud, use https://dev.azure.com. For Azure DevOps Server, use your server URL.",
"examples": [
"https://dev.azure.com",
"https://azuredevops.example.com"
],
"pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
},
"deploymentType": {
"type": "string",
"enum": [
"cloud",
"server"
],
"description": "The type of Azure DevOps deployment"
},
"useTfsPath": {
"type": "boolean",
"default": false,
"description": "Use legacy TFS path format (/tfs) in API URLs. Required for older TFS installations (TFS 2018 and earlier). When true, API URLs will include /tfs in the path (e.g., https://server/tfs/collection/_apis/...)."
},
"orgs": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org"
]
],
"description": "List of organizations to sync with. For Cloud, this is the organization name. For Server, this is the collection name. All projects and repositories visible to the provided `token` will be synced, unless explicitly defined in the `exclude` property."
},
"projects": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+\\/[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org/my-project",
"my-collection/my-project"
]
],
"description": "List of specific projects to sync with. Expected to be formatted as '{orgName}/{projectName}' for Cloud or '{collectionName}/{projectName}' for Server."
},
"repos": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+\\/[\\w.-]+\\/[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org/my-project/my-repo"
]
],
"description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{projectName}/{repoName}'."
},
"exclude": {
"type": "object",
"properties": {
"disabled": {
"type": "boolean",
"default": false,
"description": "Exclude disabled repositories from syncing."
},
"repos": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "List of repositories to exclude from syncing. Glob patterns are supported."
},
"projects": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "List of projects to exclude from syncing. Glob patterns are supported."
},
"size": {
"type": "object",
"description": "Exclude repositories based on their size.",
"properties": {
"min": {
"type": "integer",
"description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing."
},
"max": {
"type": "integer",
"description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing."
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"token",
"deploymentType"
],
"additionalProperties": false
} as const;
export { schema as azuredevopsSchema };

View file

@ -0,0 +1,89 @@
// THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY!
export interface AzureDevOpsConnectionConfig {
/**
* Azure DevOps Configuration
*/
type: "azuredevops";
/**
* A Personal Access Token (PAT).
*/
token:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* The URL of the Azure DevOps host. For Azure DevOps Cloud, use https://dev.azure.com. For Azure DevOps Server, use your server URL.
*/
url?: string;
/**
* The type of Azure DevOps deployment
*/
deploymentType: "cloud" | "server";
/**
* Use legacy TFS path format (/tfs) in API URLs. Required for older TFS installations (TFS 2018 and earlier). When true, API URLs will include /tfs in the path (e.g., https://server/tfs/collection/_apis/...).
*/
useTfsPath?: boolean;
/**
* List of organizations to sync with. For Cloud, this is the organization name. For Server, this is the collection name. All projects and repositories visible to the provided `token` will be synced, unless explicitly defined in the `exclude` property.
*/
orgs?: string[];
/**
* List of specific projects to sync with. Expected to be formatted as '{orgName}/{projectName}' for Cloud or '{collectionName}/{projectName}' for Server.
*/
projects?: string[];
/**
* List of individual repositories to sync with. Expected to be formatted as '{orgName}/{projectName}/{repoName}'.
*/
repos?: string[];
exclude?: {
/**
* Exclude disabled repositories from syncing.
*/
disabled?: boolean;
/**
* List of repositories to exclude from syncing. Glob patterns are supported.
*/
repos?: string[];
/**
* List of projects to exclude from syncing. Glob patterns are supported.
*/
projects?: string[];
/**
* Exclude repositories based on their size.
*/
size?: {
/**
* Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing.
*/
min?: number;
/**
* Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing.
*/
max?: number;
};
};
revisions?: GitRevisions;
}
/**
* The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.
*/
export interface GitRevisions {
/**
* List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.
*/
branches?: string[];
/**
* List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.
*/
tags?: string[];
}

View file

@ -342,6 +342,11 @@ const schema = {
"default": false, "default": false,
"description": "Exclude archived projects from syncing." "description": "Exclude archived projects from syncing."
}, },
"userOwnedProjects": {
"type": "boolean",
"default": false,
"description": "Exclude user-owned projects from syncing."
},
"projects": { "projects": {
"type": "array", "type": "array",
"items": { "items": {
@ -637,6 +642,47 @@ const schema = {
} }
}, },
"additionalProperties": false "additionalProperties": false
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [
@ -822,6 +868,209 @@ const schema = {
}, },
"additionalProperties": false "additionalProperties": false
}, },
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "AzureDevOpsConnectionConfig",
"properties": {
"type": {
"const": "azuredevops",
"description": "Azure DevOps Configuration"
},
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
},
"url": {
"type": "string",
"format": "url",
"default": "https://dev.azure.com",
"description": "The URL of the Azure DevOps host. For Azure DevOps Cloud, use https://dev.azure.com. For Azure DevOps Server, use your server URL.",
"examples": [
"https://dev.azure.com",
"https://azuredevops.example.com"
],
"pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
},
"deploymentType": {
"type": "string",
"enum": [
"cloud",
"server"
],
"description": "The type of Azure DevOps deployment"
},
"useTfsPath": {
"type": "boolean",
"default": false,
"description": "Use legacy TFS path format (/tfs) in API URLs. Required for older TFS installations (TFS 2018 and earlier). When true, API URLs will include /tfs in the path (e.g., https://server/tfs/collection/_apis/...)."
},
"orgs": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org"
]
],
"description": "List of organizations to sync with. For Cloud, this is the organization name. For Server, this is the collection name. All projects and repositories visible to the provided `token` will be synced, unless explicitly defined in the `exclude` property."
},
"projects": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+\\/[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org/my-project",
"my-collection/my-project"
]
],
"description": "List of specific projects to sync with. Expected to be formatted as '{orgName}/{projectName}' for Cloud or '{collectionName}/{projectName}' for Server."
},
"repos": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[\\w.-]+\\/[\\w.-]+\\/[\\w.-]+$"
},
"default": [],
"examples": [
[
"my-org/my-project/my-repo"
]
],
"description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{projectName}/{repoName}'."
},
"exclude": {
"type": "object",
"properties": {
"disabled": {
"type": "boolean",
"default": false,
"description": "Exclude disabled repositories from syncing."
},
"repos": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "List of repositories to exclude from syncing. Glob patterns are supported."
},
"projects": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "List of projects to exclude from syncing. Glob patterns are supported."
},
"size": {
"type": "object",
"description": "Exclude repositories based on their size.",
"properties": {
"min": {
"type": "integer",
"description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing."
},
"max": {
"type": "integer",
"description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing."
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
}
},
"required": [
"type",
"token",
"deploymentType"
],
"additionalProperties": false
},
{ {
"$schema": "http://json-schema.org/draft-07/schema#", "$schema": "http://json-schema.org/draft-07/schema#",
"type": "object", "type": "object",

View file

@ -6,6 +6,7 @@ export type ConnectionConfig =
| GiteaConnectionConfig | GiteaConnectionConfig
| GerritConnectionConfig | GerritConnectionConfig
| BitbucketConnectionConfig | BitbucketConnectionConfig
| AzureDevOpsConnectionConfig
| GenericGitHostConnectionConfig; | GenericGitHostConnectionConfig;
export interface GithubConnectionConfig { export interface GithubConnectionConfig {
@ -153,6 +154,10 @@ export interface GitlabConnectionConfig {
* Exclude archived projects from syncing. * Exclude archived projects from syncing.
*/ */
archived?: boolean; archived?: boolean;
/**
* Exclude user-owned projects from syncing.
*/
userOwnedProjects?: boolean;
/** /**
* List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/ * List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/
*/ */
@ -244,6 +249,7 @@ export interface GerritConnectionConfig {
*/ */
hidden?: boolean; hidden?: boolean;
}; };
revisions?: GitRevisions;
} }
export interface BitbucketConnectionConfig { export interface BitbucketConnectionConfig {
/** /**
@ -306,6 +312,80 @@ export interface BitbucketConnectionConfig {
}; };
revisions?: GitRevisions; revisions?: GitRevisions;
} }
export interface AzureDevOpsConnectionConfig {
/**
* Azure DevOps Configuration
*/
type: "azuredevops";
/**
* A Personal Access Token (PAT).
*/
token:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* The URL of the Azure DevOps host. For Azure DevOps Cloud, use https://dev.azure.com. For Azure DevOps Server, use your server URL.
*/
url?: string;
/**
* The type of Azure DevOps deployment
*/
deploymentType: "cloud" | "server";
/**
* Use legacy TFS path format (/tfs) in API URLs. Required for older TFS installations (TFS 2018 and earlier). When true, API URLs will include /tfs in the path (e.g., https://server/tfs/collection/_apis/...).
*/
useTfsPath?: boolean;
/**
* List of organizations to sync with. For Cloud, this is the organization name. For Server, this is the collection name. All projects and repositories visible to the provided `token` will be synced, unless explicitly defined in the `exclude` property.
*/
orgs?: string[];
/**
* List of specific projects to sync with. Expected to be formatted as '{orgName}/{projectName}' for Cloud or '{collectionName}/{projectName}' for Server.
*/
projects?: string[];
/**
* List of individual repositories to sync with. Expected to be formatted as '{orgName}/{projectName}/{repoName}'.
*/
repos?: string[];
exclude?: {
/**
* Exclude disabled repositories from syncing.
*/
disabled?: boolean;
/**
* List of repositories to exclude from syncing. Glob patterns are supported.
*/
repos?: string[];
/**
* List of projects to exclude from syncing. Glob patterns are supported.
*/
projects?: string[];
/**
* Exclude repositories based on their size.
*/
size?: {
/**
* Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing.
*/
min?: number;
/**
* Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing.
*/
max?: number;
};
};
revisions?: GitRevisions;
}
export interface GenericGitHostConnectionConfig { export interface GenericGitHostConnectionConfig {
/** /**
* Generic Git host configuration * Generic Git host configuration

View file

@ -58,6 +58,47 @@ const schema = {
} }
}, },
"additionalProperties": false "additionalProperties": false
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"main",
"release/*"
],
[
"**"
]
],
"default": []
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
"examples": [
[
"latest",
"v2.*.*"
],
[
"**"
]
],
"default": []
}
},
"additionalProperties": false
} }
}, },
"required": [ "required": [

View file

@ -27,4 +27,18 @@ export interface GerritConnectionConfig {
*/ */
hidden?: boolean; hidden?: boolean;
}; };
revisions?: GitRevisions;
}
/**
* The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.
*/
export interface GitRevisions {
/**
* List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.
*/
branches?: string[];
/**
* List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.
*/
tags?: string[];
} }

View file

@ -125,6 +125,11 @@ const schema = {
"default": false, "default": false,
"description": "Exclude archived projects from syncing." "description": "Exclude archived projects from syncing."
}, },
"userOwnedProjects": {
"type": "boolean",
"default": false,
"description": "Exclude user-owned projects from syncing."
},
"projects": { "projects": {
"type": "array", "type": "array",
"items": { "items": {

View file

@ -56,6 +56,10 @@ export interface GitlabConnectionConfig {
* Exclude archived projects from syncing. * Exclude archived projects from syncing.
*/ */
archived?: boolean; archived?: boolean;
/**
* Exclude user-owned projects from syncing.
*/
userOwnedProjects?: boolean;
/** /**
* List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/ * List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/
*/ */

File diff suppressed because it is too large Load diff

View file

@ -10,7 +10,21 @@ export type ConnectionConfig =
| GiteaConnectionConfig | GiteaConnectionConfig
| GerritConnectionConfig | GerritConnectionConfig
| BitbucketConnectionConfig | BitbucketConnectionConfig
| AzureDevOpsConnectionConfig
| GenericGitHostConnectionConfig; | GenericGitHostConnectionConfig;
export type LanguageModel =
| AmazonBedrockLanguageModel
| AnthropicLanguageModel
| AzureLanguageModel
| DeepSeekLanguageModel
| GoogleGenerativeAILanguageModel
| GoogleVertexAnthropicLanguageModel
| GoogleVertexLanguageModel
| MistralLanguageModel
| OpenAILanguageModel
| OpenAICompatibleLanguageModel
| OpenRouterLanguageModel
| XaiLanguageModel;
export interface SourcebotConfig { export interface SourcebotConfig {
$schema?: string; $schema?: string;
@ -27,6 +41,10 @@ export interface SourcebotConfig {
connections?: { connections?: {
[k: string]: ConnectionConfig; [k: string]: ConnectionConfig;
}; };
/**
* Defines a collection of language models that are available to Sourcebot.
*/
models?: LanguageModel[];
} }
/** /**
* Defines the global settings for Sourcebot. * Defines the global settings for Sourcebot.
@ -84,6 +102,14 @@ export interface Settings {
* This setting is deprecated. Please use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead. * This setting is deprecated. Please use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead.
*/ */
enablePublicAccess?: boolean; enablePublicAccess?: boolean;
/**
* The interval (in milliseconds) at which the repo permission syncer should run. Defaults to 24 hours.
*/
experiment_repoDrivenPermissionSyncIntervalMs?: number;
/**
* The interval (in milliseconds) at which the user permission syncer should run. Defaults to 24 hours.
*/
experiment_userDrivenPermissionSyncIntervalMs?: number;
} }
/** /**
* Search context * Search context
@ -98,11 +124,19 @@ export interface SearchContext {
/** /**
* List of repositories to include in the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported. * List of repositories to include in the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.
*/ */
include: string[]; include?: string[];
/**
* List of connections to include in the search context.
*/
includeConnections?: string[];
/** /**
* List of repositories to exclude from the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported. * List of repositories to exclude from the search context. Expected to be formatted as a URL without any leading http(s):// prefix (e.g., 'github.com/sourcebot-dev/sourcebot'). Glob patterns are supported.
*/ */
exclude?: string[]; exclude?: string[];
/**
* List of connections to exclude from the search context.
*/
excludeConnections?: string[];
/** /**
* Optional description of the search context that surfaces in the UI. * Optional description of the search context that surfaces in the UI.
*/ */
@ -253,6 +287,10 @@ export interface GitlabConnectionConfig {
* Exclude archived projects from syncing. * Exclude archived projects from syncing.
*/ */
archived?: boolean; archived?: boolean;
/**
* Exclude user-owned projects from syncing.
*/
userOwnedProjects?: boolean;
/** /**
* List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/ * List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/
*/ */
@ -344,6 +382,7 @@ export interface GerritConnectionConfig {
*/ */
hidden?: boolean; hidden?: boolean;
}; };
revisions?: GitRevisions;
} }
export interface BitbucketConnectionConfig { export interface BitbucketConnectionConfig {
/** /**
@ -406,6 +445,80 @@ export interface BitbucketConnectionConfig {
}; };
revisions?: GitRevisions; revisions?: GitRevisions;
} }
export interface AzureDevOpsConnectionConfig {
/**
* Azure DevOps Configuration
*/
type: "azuredevops";
/**
* A Personal Access Token (PAT).
*/
token:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* The URL of the Azure DevOps host. For Azure DevOps Cloud, use https://dev.azure.com. For Azure DevOps Server, use your server URL.
*/
url?: string;
/**
* The type of Azure DevOps deployment
*/
deploymentType: "cloud" | "server";
/**
* Use legacy TFS path format (/tfs) in API URLs. Required for older TFS installations (TFS 2018 and earlier). When true, API URLs will include /tfs in the path (e.g., https://server/tfs/collection/_apis/...).
*/
useTfsPath?: boolean;
/**
* List of organizations to sync with. For Cloud, this is the organization name. For Server, this is the collection name. All projects and repositories visible to the provided `token` will be synced, unless explicitly defined in the `exclude` property.
*/
orgs?: string[];
/**
* List of specific projects to sync with. Expected to be formatted as '{orgName}/{projectName}' for Cloud or '{collectionName}/{projectName}' for Server.
*/
projects?: string[];
/**
* List of individual repositories to sync with. Expected to be formatted as '{orgName}/{projectName}/{repoName}'.
*/
repos?: string[];
exclude?: {
/**
* Exclude disabled repositories from syncing.
*/
disabled?: boolean;
/**
* List of repositories to exclude from syncing. Glob patterns are supported.
*/
repos?: string[];
/**
* List of projects to exclude from syncing. Glob patterns are supported.
*/
projects?: string[];
/**
* Exclude repositories based on their size.
*/
size?: {
/**
* Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing.
*/
min?: number;
/**
* Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing.
*/
max?: number;
};
};
revisions?: GitRevisions;
}
export interface GenericGitHostConnectionConfig { export interface GenericGitHostConnectionConfig {
/** /**
* Generic Git host configuration * Generic Git host configuration
@ -417,3 +530,538 @@ export interface GenericGitHostConnectionConfig {
url: string; url: string;
revisions?: GitRevisions; revisions?: GitRevisions;
} }
export interface AmazonBedrockLanguageModel {
/**
* Amazon Bedrock Configuration
*/
provider: "amazon-bedrock";
/**
* The name of the language model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* Optional access key ID to use with the model. Defaults to the `AWS_ACCESS_KEY_ID` environment variable.
*/
accessKeyId?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional secret access key to use with the model. Defaults to the `AWS_SECRET_ACCESS_KEY` environment variable.
*/
accessKeySecret?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional session token to use with the model. Defaults to the `AWS_SESSION_TOKEN` environment variable.
*/
sessionToken?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* The AWS region. Defaults to the `AWS_REGION` environment variable.
*/
region?: string;
/**
* Optional base URL.
*/
baseUrl?: string;
headers?: LanguageModelHeaders;
}
/**
* Optional headers to use with the model.
*/
export interface LanguageModelHeaders {
/**
* This interface was referenced by `LanguageModelHeaders`'s JSON-Schema definition
* via the `patternProperty` "^[!#$%&'*+\-.^_`|~0-9A-Za-z]+$".
*/
[k: string]:
| string
| (
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
}
);
}
export interface AnthropicLanguageModel {
/**
* Anthropic Configuration
*/
provider: "anthropic";
/**
* The name of the language model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* Optional API key to use with the model. Defaults to the `ANTHROPIC_API_KEY` environment variable.
*/
token?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional base URL.
*/
baseUrl?: string;
headers?: LanguageModelHeaders;
}
export interface AzureLanguageModel {
/**
* Azure Configuration
*/
provider: "azure";
/**
* The deployment name of the Azure model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* Azure resource name. Defaults to the `AZURE_RESOURCE_NAME` environment variable.
*/
resourceName?: string;
/**
* Optional API key to use with the model. Defaults to the `AZURE_API_KEY` environment variable.
*/
token?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Sets a custom api version. Defaults to `preview`.
*/
apiVersion?: string;
/**
* Use a different URL prefix for API calls. Either this or `resourceName` can be used.
*/
baseUrl?: string;
headers?: LanguageModelHeaders;
}
export interface DeepSeekLanguageModel {
/**
* DeepSeek Configuration
*/
provider: "deepseek";
/**
* The name of the language model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* Optional API key to use with the model. Defaults to the `DEEPSEEK_API_KEY` environment variable.
*/
token?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional base URL.
*/
baseUrl?: string;
headers?: LanguageModelHeaders;
}
export interface GoogleGenerativeAILanguageModel {
/**
* Google Generative AI Configuration
*/
provider: "google-generative-ai";
/**
* The name of the language model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* Optional API key to use with the model. Defaults to the `GOOGLE_GENERATIVE_AI_API_KEY` environment variable.
*/
token?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional base URL.
*/
baseUrl?: string;
headers?: LanguageModelHeaders;
}
export interface GoogleVertexAnthropicLanguageModel {
/**
* Google Vertex AI Anthropic Configuration
*/
provider: "google-vertex-anthropic";
/**
* The name of the Anthropic language model running on Google Vertex.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* The Google Cloud project ID. Defaults to the `GOOGLE_VERTEX_PROJECT` environment variable.
*/
project?: string;
/**
* The Google Cloud region. Defaults to the `GOOGLE_VERTEX_REGION` environment variable.
*/
region?: string;
/**
* Optional file path to service account credentials JSON. Defaults to the `GOOGLE_APPLICATION_CREDENTIALS` environment variable or application default credentials.
*/
credentials?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional base URL.
*/
baseUrl?: string;
headers?: LanguageModelHeaders;
}
export interface GoogleVertexLanguageModel {
/**
* Google Vertex AI Configuration
*/
provider: "google-vertex";
/**
* The name of the language model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* The Google Cloud project ID. Defaults to the `GOOGLE_VERTEX_PROJECT` environment variable.
*/
project?: string;
/**
* The Google Cloud region. Defaults to the `GOOGLE_VERTEX_REGION` environment variable.
*/
region?: string;
/**
* Optional file path to service account credentials JSON. Defaults to the `GOOGLE_APPLICATION_CREDENTIALS` environment variable or application default credentials.
*/
credentials?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional base URL.
*/
baseUrl?: string;
headers?: LanguageModelHeaders;
}
export interface MistralLanguageModel {
/**
* Mistral AI Configuration
*/
provider: "mistral";
/**
* The name of the language model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* Optional API key to use with the model. Defaults to the `MISTRAL_API_KEY` environment variable.
*/
token?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional base URL.
*/
baseUrl?: string;
headers?: LanguageModelHeaders;
}
export interface OpenAILanguageModel {
/**
* OpenAI Configuration
*/
provider: "openai";
/**
* The name of the language model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* Optional API key to use with the model. Defaults to the `OPENAI_API_KEY` environment variable.
*/
token?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional base URL.
*/
baseUrl?: string;
/**
* The reasoning effort to use with the model. Defaults to `medium`. See https://platform.openai.com/docs/guides/reasoning#get-started-with-reasonings
*/
reasoningEffort?: string;
headers?: LanguageModelHeaders;
}
export interface OpenAICompatibleLanguageModel {
/**
* OpenAI Compatible Configuration
*/
provider: "openai-compatible";
/**
* The name of the language model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* Optional API key. If specified, adds an `Authorization` header to request headers with the value Bearer <token>.
*/
token?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Base URL of the OpenAI-compatible chat completions API endpoint.
*/
baseUrl: string;
headers?: LanguageModelHeaders;
queryParams?: LanguageModelQueryParams;
}
/**
* Optional query parameters to include in the request url.
*/
export interface LanguageModelQueryParams {
/**
* This interface was referenced by `LanguageModelQueryParams`'s JSON-Schema definition
* via the `patternProperty` "^[!#$%&'*+\-.^_`|~0-9A-Za-z]+$".
*/
[k: string]:
| string
| (
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
}
);
}
export interface OpenRouterLanguageModel {
/**
* OpenRouter Configuration
*/
provider: "openrouter";
/**
* The name of the language model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* Optional API key to use with the model. Defaults to the `OPENROUTER_API_KEY` environment variable.
*/
token?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional base URL.
*/
baseUrl?: string;
headers?: LanguageModelHeaders;
}
export interface XaiLanguageModel {
/**
* xAI Configuration
*/
provider: "xai";
/**
* The name of the language model.
*/
model: string;
/**
* Optional display name.
*/
displayName?: string;
/**
* Optional API key to use with the model. Defaults to the `XAI_API_KEY` environment variable.
*/
token?:
| {
/**
* The name of the secret that contains the token.
*/
secret: string;
}
| {
/**
* The name of the environment variable that contains the token. Only supported in declarative connection configs.
*/
env: string;
};
/**
* Optional base URL.
*/
baseUrl?: string;
headers?: LanguageModelHeaders;
}

Some files were not shown because too many files have changed in this diff Show more