Compare commits

...

69 commits
v4.8.0 ... main

Author SHA1 Message Date
msukkari
095474a901 update perm syncing docs
Some checks are pending
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-12-11 06:46:20 -08:00
Brendan Kellam
d63f3cf9d9
chore(web): Improve error messages for file loading errors (#665)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-12-05 11:58:19 -08:00
Cade 🐀
3d85a0595c
fix: add support for anyuid to Dockerfile (#658)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
* fix: add support for anyuid to Dockerfile

* changelog

---------

Co-authored-by: Cade Schlaefli <cade.schlaefli@mouser.com>
Co-authored-by: Brendan Kellam <bshizzle1234@gmail.com>
2025-12-04 22:29:23 -08:00
Brian Phillips
84cf524d84
Add GHES support to the review agent (#611)
* add support for GHES to the review agent

* fix throttling types

---------

Co-authored-by: Brendan Kellam <bshizzle1234@gmail.com>
2025-12-04 22:08:24 -08:00
bkellam
7c72578765 sourcebot v4.10.2
Some checks are pending
Update Roadmap Released / update (push) Waiting to run
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-12-04 10:41:41 -08:00
Brendan Kellam
483b433aab
fix(web): Respect disable telemetry flag for web server side events (#657)
* fix

* changelog
2025-12-04 10:32:32 -08:00
Brendan Kellam
bcca1d6d7d
chore(web): Fix mistake of upgrading to a breaking version of next (#656)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-12-03 17:12:10 -08:00
bkellam
0e88eecc30 release @sourcebot/mcp v1.0.11
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-12-03 16:10:51 -08:00
bkellam
a4685e34ab sourcebot v4.10.1 2025-12-03 16:05:53 -08:00
Brendan Kellam
76dc2f5a12
chore(web): Server side search telemetry (#652) 2025-12-03 16:04:36 -08:00
Brendan Kellam
7fc068f8b2
fix(web): Fix CVE 2025-55182 (#654) 2025-12-03 15:59:43 -08:00
bkellam
91caf129ed chore: add default PostHog token in env.server.ts for development scenarios
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-12-01 20:18:23 -08:00
Brendan Kellam
92578881df
chore(web): Scope code nav to current repository by default (#647)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-30 18:53:09 -08:00
Brendan Kellam
28986f4355
chore(web): Bake PostHog token into build 2025-11-30 18:29:01 -08:00
Adam
41a6eb48a0
Shrink Docker image size by ~1/3 by removing unnecessary ops (#642)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
* Remove duplicate copy, chown on copy

* Add Dockerfile syntax

* Revert entrypoint changes to avoid errors in some non-root cases
2025-11-29 12:43:12 -08:00
Brendan Kellam
92ae76168c
fix(web): Fix issue where creating a new Ask thread would result in a 404 (#641)
Some checks are pending
Publish to ghcr / merge (push) Blocked by required conditions
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Update Roadmap Released / update (push) Waiting to run
2025-11-28 23:01:33 -08:00
Brendan Kellam
f1dd16be82
fix(web): Ask sourcebot perf improvements (#632)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-27 16:56:11 -08:00
Brendan Kellam
cc2837b740
fix(web): Fix error when loading files with special characters (#637) 2025-11-27 14:24:45 -08:00
Brendan Kellam
0633d1f23c
fix discord link (#634)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-26 13:56:00 -08:00
Brendan Kellam
8bc4f1e520
feat(worker): Add ALWAYS_INDEX_FILE_PATTERNS env var to specify files that should always be indexed (#631)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-25 23:38:30 -08:00
Brendan Kellam
c962fdd636
fix(web): Fix issue where quotes cannot be used within a query (#629)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-25 12:06:51 -08:00
bkellam
8e036a340f @sourcebot/mcp v1.0.10
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-24 14:29:42 -08:00
bkellam
fb305c2808 sourcebot v4.10.0 2025-11-24 13:44:12 -08:00
Brendan Kellam
c671e96139
feat(web): Add support for authentik sso (#627) 2025-11-24 13:28:04 -08:00
Brendan Kellam
f3a8fa3dab
feat(web): Streamed code search (#623)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
* generate protobuf types

* stream poc over SSE

* wip: make stream search api follow existing schema. Modify UI to support streaming

* fix scrolling issue

* Dockerfile

* wip on lezer parser grammar for query language

* add lezer tree -> grpc transformer

* remove spammy log message

* fix syntax highlighting by adding a module resolution for @lezer/common

* further wip on query language

* Add case sensitivity and regexp toggles

* Improved type safety / cleanup for query lang

* support search contexts

* update Dockerfile with query langauge package

* fix filter

* Add skeletons to filter panel when search is streaming

* add client side caching

* improved cancelation handling

* add isSearchExausted flag for flagging when a search captured all results

* Add back posthog search_finished event

* remove zoekt tenant enforcement

* migrate blocking search over to grpc. Centralize everything in searchApi

* branch handling

* plumb file weburl

* add repo_sets filter for repositories a user has access to

* refactor a bunch of stuff + add support for passing in Query IR to search api

* refactor

* dev README

* wip on better error handling

* error handling for stream path

* update mcp

* changelog wip

* type fix

* style

* Support rev:* wildcard

* changelog

* changelog nit

* feedback

* fix build

* update docs and remove uneeded test file
2025-11-22 15:33:31 -08:00
Brendan Kellam
09507d3e89
fix(worker): Permission syncer fixes (#624)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-19 22:14:23 -08:00
Brendan Kellam
97dd54d48f
chore(web): Add count to members / requests / invites tabs in settings (#621)
Some checks failed
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-18 10:41:40 -08:00
bkellam
831197980c release @sourcebot/mcp v1.0.9
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-17 17:11:03 -08:00
Teddy Reinert
9bee8c2c59
feat(mcp): Add pagination and filtering to list_repos tool (#614)
* feat(mcp): Add pagination and filtering to list_repos tool

Fixes #566

  - Add query parameter to filter repositories by name
  - Add pageNumber and limit parameters for pagination
  - Include pagination info in response when applicable
  - Add listReposRequestSchema for request validation
  - Update README with new list_repos parameters

* feat(mcp): Sort repositories alphabetically for consistent pagination

Fixes #566
- Updated CHANGELOG.md with pagination and filtering changes

---------

Co-authored-by: Brendan Kellam <bshizzle1234@gmail.com>
2025-11-17 17:08:20 -08:00
Jose Hernandez
e20d514569
feat(bitbucket): support glob patterns in repository exclusions (#620)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
* feat(bitbucket): support glob patterns in repository exclusions

Update Bitbucket Cloud and Server exclusion logic to support glob
patterns (e.g., "org/repo*") in the exclude.repos configuration,
matching the documented behavior and aligning with other providers
(GitHub, GitLab, Gitea, Azure DevOps).

Changes:
- Add micromatch import for pattern matching
- Replace Array.includes() with micromatch.isMatch() in
  cloudShouldExcludeRepo and serverShouldExcludeRepo functions
- Add reason logging for exclusion decisions to match GitHub's pattern

This enables users to exclude repositories using wildcard patterns
as documented in the Bitbucket Cloud connection documentation.

* update changelog

---------

Co-authored-by: Jose Hernandez <jose.hernandez@emilabs.ai>
Co-authored-by: bkellam <bshizzle1234@gmail.com>
2025-11-17 14:33:39 -08:00
Michael Sukkarieh
1dff20d47a
fix(ee): Wipe search contexts on init if we no longer have the entitlement (#618)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-13 21:29:51 -08:00
Brendan Kellam
fbe1073d0e
fix(web): Fix loading issues with references / definitions list (#617) 2025-11-13 17:21:48 -08:00
bkellam
341836a2ed sourcebot v4.9.2
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-13 00:36:47 -08:00
Brendan Kellam
2e959b7d59
feat(web): Add env var to configure default max match count (#616) 2025-11-13 00:06:23 -08:00
Brendan Kellam
a814bd6f7e
fix(web): Search performance improvements (#615) 2025-11-12 23:20:26 -08:00
Brendan Kellam
06c84f0bf5
fix(worker): Fix issue where connections would always sync on startup (#613)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-11 20:31:08 -08:00
Brendan Kellam
903d15a2c5
fix(worker): Fix issues with gracefully shutting down (#612) 2025-11-11 20:11:59 -08:00
Brendan Kellam
18fad64baa
feat(web): Add force resync buttons for repo & connections (#610) 2025-11-11 15:16:40 -08:00
bkellam
2dfafdae41 release @sourcebot/mcp v1.0.8
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-10 15:32:32 -08:00
Wayne Sun
278c0dc556
fix: return truncated content when token limit exceeded in MCP search_code (#604)
When search results exceed maxTokens limit, now returns partial truncated
content instead of discarding the file completely.

Changes:
- Calculate remaining token budget before breaking
- Truncate file content to fit within remaining tokens (if > 100 tokens left)
- Append truncation marker to indicate content was cut off
- Still add truncation message at end of all results

Benefits:
- Users get partial data instead of nothing
- Better debugging and analysis experience
- More useful for AI-powered code analysis tasks
- Consistent with expected behavior when limits are reached

Example: If file would use 10K tokens but only 2K remain, return
first ~8K chars of content + truncation marker instead of dropping it.

Signed-off-by: Wayne Sun <gsun@redhat.com>
2025-11-10 15:23:56 -08:00
Brendan Kellam
6f64d5bb8d
fix(worker): Run setInterval as blocking (#607)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-09 14:49:24 -08:00
Brendan Kellam
1be6e8842e
fix(worker): properly shutdown PostHog client (#609) 2025-11-09 14:30:01 -08:00
Arman K.
f04ecab3ad
Update README.md (#608) 2025-11-09 13:28:28 -08:00
bkellam
d63da4b2c0 sourcebot v4.9.1
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-07 10:39:57 -08:00
Brendan Kellam
825cef9da4
feat(deployment): Basic docker-compose file (#480) 2025-11-07 10:38:24 -08:00
Brendan Kellam
dd5cf61977
fix discord links (#606) 2025-11-07 10:05:05 -08:00
Furbreeze
5f5690ec49
adding contribution step for generating database schema (#602)
Some checks failed
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-06 10:50:41 -08:00
bkellam
720f2e4f4b update changelog for https://github.com/sourcebot-dev/sourcebot/pull/599
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-05 22:29:15 -08:00
Brendan Kellam
612ecff93a
feat: Support running Docker container as non-root (#599) 2025-11-05 22:24:46 -08:00
bkellam
33c732855f sourcebot v4.9.0
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-04 21:29:33 -08:00
msukkari
5fe00a6b48 typo in linked account settings 2025-11-04 21:26:19 -08:00
Brendan Kellam
1908051daa
feat(web,worker): Environment overrides (#597) 2025-11-04 21:22:31 -08:00
Brendan Kellam
5fde901356
chore(worker): Refactor permission syncing join table to be between Account <> Repo (#600) 2025-11-04 20:12:07 -08:00
Michael Sukkarieh
449c76fdcc
feat(ee): Add ability to link external accounts (#595) 2025-11-04 20:08:04 -08:00
Brendan Kellam
26ec7af7f0
feat(worker,web): Support google secrets as a token type (#594)
Some checks failed
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-01 22:02:55 -07:00
bkellam
7e161e6df3 alter roadmap release update trigger
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-01 15:17:51 -07:00
Derek Miller
2c0540f6bf
fix(backend): Limit concurrent git operations to prevent resource exhaustion (#590) (#593)
When syncing generic-git-host connections with thousands of repositories,
unbounded Promise.all caused resource exhaustion (EAGAIN errors) by spawning
too many concurrent git processes. This resulted in valid repositories being
incorrectly skipped during sync.

- Add p-limit to control concurrent git operations (max 100)
- Follow existing pattern from github.ts for consistency
- Prevents file descriptor and process limit exhaustion
- Uses rolling concurrency to avoid head-of-line blocking

Fixes #590
2025-11-01 15:15:09 -07:00
bkellam
d1655d4587 run update roadmap on pushes to main
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-10-31 14:58:33 -07:00
Brian Phillips
58456d616b
add p-limit to GitHub API calls to avoid overwhelming the node process (or the API rate limits) (#591) 2025-10-31 14:49:43 -07:00
Michael Sukkarieh
fd17871da4
chore(tech-debt): Remove built-in secret manager (#592) 2025-10-31 14:33:28 -07:00
Brendan Kellam
581a5a0bd8
fix(web): Fix /settings/connections throwing a error when there is a git connection present (#588) 2025-10-31 13:08:51 -07:00
Brendan Kellam
4899c9fbc7
feat(ee): GitLab permission syncing (#585)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-30 11:08:10 -07:00
Brendan Kellam
384aa9ebe6
fix(web): Fix "The account is already associated with another user" errors when signing in with GitLab (#584)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-29 21:25:48 -07:00
Michael Sukkarieh
bbb197a9bf
fix(github app): Generate installation tokens each time (#583)
* generate installation tokens each time

* changelog
2025-10-29 18:05:18 -07:00
Brendan Kellam
d09d65dce7
fix(ask): Extract reasoning tokens for openai compatible models (#582) 2025-10-29 17:13:31 -07:00
msukkari
727a6da105 remove old config files
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-29 16:04:30 -07:00
bkellam
86be06928b sourcebot v4.8.1 2025-10-29 14:09:26 -07:00
Brendan Kellam
63cf48264d
chore(web): Bug fixes related to v4.8.0 release (#581) 2025-10-29 14:05:48 -07:00
bkellam
bc592addad @sourcebot/mcp v1.0.7
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-28 23:03:11 -07:00
417 changed files with 22525 additions and 7937 deletions

View file

@ -6,8 +6,6 @@ DATABASE_URL="postgresql://postgres:postgres@localhost:5432/postgres"
ZOEKT_WEBSERVER_URL="http://localhost:6070" ZOEKT_WEBSERVER_URL="http://localhost:6070"
# The command to use for generating ctags. # The command to use for generating ctags.
CTAGS_COMMAND=ctags CTAGS_COMMAND=ctags
# logging, strict
SRC_TENANT_ENFORCEMENT_MODE=strict
# Auth.JS # Auth.JS
# You can generate a new secret with: # You can generate a new secret with:
@ -23,7 +21,7 @@ AUTH_URL="http://localhost:3000"
DATA_CACHE_DIR=${PWD}/.sourcebot # Path to the sourcebot cache dir (ex. ~/sourcebot/.sourcebot) DATA_CACHE_DIR=${PWD}/.sourcebot # Path to the sourcebot cache dir (ex. ~/sourcebot/.sourcebot)
SOURCEBOT_PUBLIC_KEY_PATH=${PWD}/public.pem SOURCEBOT_PUBLIC_KEY_PATH=${PWD}/public.pem
# CONFIG_PATH=${PWD}/config.json # Path to the sourcebot config file (if one exists) CONFIG_PATH=${PWD}/config.json # Path to the sourcebot config file (if one exists)
# Email # Email
# EMAIL_FROM_ADDRESS="" # The from address for transactional emails. # EMAIL_FROM_ADDRESS="" # The from address for transactional emails.
@ -31,7 +29,6 @@ SOURCEBOT_PUBLIC_KEY_PATH=${PWD}/public.pem
# PostHog # PostHog
# POSTHOG_PAPIK="" # POSTHOG_PAPIK=""
# NEXT_PUBLIC_POSTHOG_PAPIK=""
# Sentry # Sentry
# SENTRY_BACKEND_DSN="" # SENTRY_BACKEND_DSN=""
@ -84,7 +81,7 @@ SOURCEBOT_TELEMETRY_DISABLED=true # Disables telemetry collection
# NEXT_PUBLIC_SOURCEBOT_VERSION= # NEXT_PUBLIC_SOURCEBOT_VERSION=
# CONFIG_MAX_REPOS_NO_TOKEN= # CONFIG_MAX_REPOS_NO_TOKEN=
# NODE_ENV= NODE_ENV=development
# SOURCEBOT_TENANCY_MODE=single # SOURCEBOT_TENANCY_MODE=single
# NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT= # NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT=

View file

@ -1,4 +1,4 @@
contact_links: contact_links:
- name: 👾 Discord - name: 👾 Discord
url: https://discord.gg/f4Cbf3HT url: https://discord.gg/HDScTs3ptP
about: Something else? Join the Discord! about: Something else? Join the Discord!

View file

@ -55,7 +55,6 @@ jobs:
${{ env.IMAGE_PATH }}:latest ${{ env.IMAGE_PATH }}:latest
build-args: | build-args: |
NEXT_PUBLIC_SOURCEBOT_VERSION=${{ github.ref_name }} NEXT_PUBLIC_SOURCEBOT_VERSION=${{ github.ref_name }}
NEXT_PUBLIC_POSTHOG_PAPIK=${{ vars.NEXT_PUBLIC_POSTHOG_PAPIK }}
NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT=${{ vars.NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT }} NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT=${{ vars.NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT }}
NEXT_PUBLIC_SENTRY_ENVIRONMENT=${{ vars.NEXT_PUBLIC_SENTRY_ENVIRONMENT }} NEXT_PUBLIC_SENTRY_ENVIRONMENT=${{ vars.NEXT_PUBLIC_SENTRY_ENVIRONMENT }}
NEXT_PUBLIC_SENTRY_WEBAPP_DSN=${{ vars.NEXT_PUBLIC_SENTRY_WEBAPP_DSN }} NEXT_PUBLIC_SENTRY_WEBAPP_DSN=${{ vars.NEXT_PUBLIC_SENTRY_WEBAPP_DSN }}

View file

@ -77,7 +77,6 @@ jobs:
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true,annotation.org.opencontainers.image.description=Blazingly fast code search outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true,annotation.org.opencontainers.image.description=Blazingly fast code search
build-args: | build-args: |
NEXT_PUBLIC_SOURCEBOT_VERSION=${{ github.ref_name }} NEXT_PUBLIC_SOURCEBOT_VERSION=${{ github.ref_name }}
NEXT_PUBLIC_POSTHOG_PAPIK=${{ vars.NEXT_PUBLIC_POSTHOG_PAPIK }}
- name: Export digest - name: Export digest
run: | run: |

View file

@ -1,8 +1,9 @@
name: Update Roadmap Released name: Update Roadmap Released
on: on:
pull_request: push:
types: [closed] branches:
- main
workflow_dispatch: workflow_dispatch:
schedule: schedule:
- cron: "0 */6 * * *" - cron: "0 */6 * * *"

View file

@ -7,6 +7,111 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Fixed
- Fixed review agent so that it works with GHES instances [#611](https://github.com/sourcebot-dev/sourcebot/pull/611)
### Added
- Added support for arbitrary user IDs required for OpenShift. [#658](https://github.com/sourcebot-dev/sourcebot/pull/658)
### Updated
- Improved error messages in file source api. [#665](https://github.com/sourcebot-dev/sourcebot/pull/665)
## [4.10.2] - 2025-12-04
### Fixed
- Fixed issue where the disable telemetry flag was not being respected for web server telemetry. [#657](https://github.com/sourcebot-dev/sourcebot/pull/657)
## [4.10.1] - 2025-12-03
### Added
- Added `ALWAYS_INDEX_FILE_PATTERNS` environment variable to allow specifying a comma seperated list of glob patterns matching file paths that should always be indexed, regardless of size or # of trigrams. [#631](https://github.com/sourcebot-dev/sourcebot/pull/631)
- Added button to explore menu to toggle cross-repository search. [#647](https://github.com/sourcebot-dev/sourcebot/pull/647)
- Added server side telemetry for search metrics. [#652](https://github.com/sourcebot-dev/sourcebot/pull/652)
### Fixed
- Fixed issue where single quotes could not be used in search queries. [#629](https://github.com/sourcebot-dev/sourcebot/pull/629)
- Fixed issue where files with special characters would fail to load. [#636](https://github.com/sourcebot-dev/sourcebot/issues/636)
- Fixed Ask performance issues. [#632](https://github.com/sourcebot-dev/sourcebot/pull/632)
- Fixed regression where creating a new Ask thread when unauthenticated would result in a 404. [#641](https://github.com/sourcebot-dev/sourcebot/pull/641)
- Updated react and next package versions to fix CVE 2025-55182. [#654](https://github.com/sourcebot-dev/sourcebot/pull/654)
### Changed
- Changed the default behaviour for code nav to scope references & definitions search to the current repository. [#647](https://github.com/sourcebot-dev/sourcebot/pull/647)
## [4.10.0] - 2025-11-24
### Added
- Added support for streaming code search results. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Added buttons to toggle case sensitivity and regex patterns. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Added counts to members, requets, and invites tabs in the members settings. [#621](https://github.com/sourcebot-dev/sourcebot/pull/621)
- [Sourcebot EE] Add support for Authentik as a identity provider. [#627](https://github.com/sourcebot-dev/sourcebot/pull/627)
### Changed
- Changed the default search behaviour to match patterns as substrings and **not** regular expressions. Regular expressions can be used by toggling the regex button in search bar. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Renamed `public` query prefix to `visibility`. Allowed values for `visibility` are `public`, `private`, and `any`. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Changed `archived` query prefix to accept values `yes`, `no`, and `only`. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
### Removed
- Removed `case` query prefix. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Removed `branch` and `b` query prefixes. Please use `rev:` instead. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Removed `regex` query prefix. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
### Fixed
- Fixed spurious infinite loads with explore panel, file tree, and file search command. [#617](https://github.com/sourcebot-dev/sourcebot/pull/617)
- Wipe search context on init if entitlement no longer exists [#618](https://github.com/sourcebot-dev/sourcebot/pull/618)
- Fixed Bitbucket repository exclusions not supporting glob patterns. [#620](https://github.com/sourcebot-dev/sourcebot/pull/620)
- Fixed issue where the repo driven permission syncer was attempting to sync public repositories. [#624](https://github.com/sourcebot-dev/sourcebot/pull/624)
- Fixed issue where worker would not shutdown while a permission sync job (repo or user) was in progress. [#624](https://github.com/sourcebot-dev/sourcebot/pull/624)
## [4.9.2] - 2025-11-13
### Changed
- Bumped the default requested search result count from 5k to 10k after optimization pass. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
### Fixed
- Fixed incorrect shutdown of PostHog SDK in the worker. [#609](https://github.com/sourcebot-dev/sourcebot/pull/609)
- Fixed race condition in job schedulers. [#607](https://github.com/sourcebot-dev/sourcebot/pull/607)
- Fixed connection sync jobs getting stuck in pending or in progress after restarting the worker. [#612](https://github.com/sourcebot-dev/sourcebot/pull/612)
- Fixed issue where connections would always sync on startup, regardless if they changed or not. [#613](https://github.com/sourcebot-dev/sourcebot/pull/613)
- Fixed performance bottleneck in search api. Result is a order of magnitutde improvement to average search time according to benchmarks. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
### Added
- Added force resync buttons for connections and repositories. [#610](https://github.com/sourcebot-dev/sourcebot/pull/610)
- Added environment variable to configure default search result count. [#616](https://github.com/sourcebot-dev/sourcebot/pull/616)
## [4.9.1] - 2025-11-07
### Added
- Added support for running Sourcebot as non-root user. [#599](https://github.com/sourcebot-dev/sourcebot/pull/599)
## [4.9.0] - 2025-11-04
### Added
- [Experimental][Sourcebot EE] Added GitLab permission syncing. [#585](https://github.com/sourcebot-dev/sourcebot/pull/585)
- [Sourcebot EE] Added external identity provider config and support for multiple accounts. [#595](https://github.com/sourcebot-dev/sourcebot/pull/595)
- Added ability to configure environment variables from the config. [#597](https://github.com/sourcebot-dev/sourcebot/pull/597)
### Fixed
- [ask sb] Fixed issue where reasoning tokens would appear in `text` content for openai compatible models. [#582](https://github.com/sourcebot-dev/sourcebot/pull/582)
- Fixed issue with GitHub app token tracking and refreshing. [#583](https://github.com/sourcebot-dev/sourcebot/pull/583)
- Fixed "The account is already associated with another user" errors with GitLab oauth provider. [#584](https://github.com/sourcebot-dev/sourcebot/pull/584)
- Fixed error when viewing a generic git connection in `/settings/connections`. [#588](https://github.com/sourcebot-dev/sourcebot/pull/588)
- Fixed issue with an unbounded `Promise.allSettled(...)` when retrieving details from the GitHub API about a large number of repositories (or orgs or users). [#591](https://github.com/sourcebot-dev/sourcebot/pull/591)
- Fixed resource exhaustion (EAGAIN errors) when syncing generic-git-host connections with thousands of repositories. [#593](https://github.com/sourcebot-dev/sourcebot/pull/593)
### Removed
- Removed built-in secret manager. [#592](https://github.com/sourcebot-dev/sourcebot/pull/592)
### Changed
- Changed internal representation of how repo permissions are represented in the database. [#600](https://github.com/sourcebot-dev/sourcebot/pull/600)
## [4.8.1] - 2025-10-29
### Fixed
- Fixed commit and branch hyperlinks not rendering for Gerrit repos. [#581](https://github.com/sourcebot-dev/sourcebot/pull/581)
- Fixed visual bug when a repository does not have a image. [#581](https://github.com/sourcebot-dev/sourcebot/pull/581)
- Fixed issue where the Ask homepage was not scrollable. [#581](https://github.com/sourcebot-dev/sourcebot/pull/581)
## [4.8.0] - 2025-10-28 ## [4.8.0] - 2025-10-28
### Added ### Added

View file

@ -36,15 +36,20 @@
docker compose -f docker-compose-dev.yml up -d docker compose -f docker-compose-dev.yml up -d
``` ```
6. Create a copy of `.env.development` and name it `.env.development.local`. Update the required environment variables. 6. Generate the database schema.
```sh
yarn dev:prisma:migrate:dev
```
7. If you're using a declarative configuration file, create a configuration file and update the `CONFIG_PATH` environment variable in your `.env.development.local` file. 7. Create a copy of `.env.development` and name it `.env.development.local`. Update the required environment variables.
8. Start Sourcebot with the command: 8. If you're using a declarative configuration file, create a configuration file and update the `CONFIG_PATH` environment variable in your `.env.development.local` file.
9. Start Sourcebot with the command:
```sh ```sh
yarn dev yarn dev
``` ```
A `.sourcebot` directory will be created and zoekt will begin to index the repositories found in the `config.json` file. A `.sourcebot` directory will be created and zoekt will begin to index the repositories found in the `config.json` file.
9. Start searching at `http://localhost:3000`. 10. Start searching at `http://localhost:3000`.

View file

@ -1,3 +1,4 @@
# syntax=docker/dockerfile:1
# ------ Global scope variables ------ # ------ Global scope variables ------
# Set of global build arguments. # Set of global build arguments.
@ -8,11 +9,6 @@
# @see: https://docs.docker.com/build/building/variables/#scoping # @see: https://docs.docker.com/build/building/variables/#scoping
ARG NEXT_PUBLIC_SOURCEBOT_VERSION ARG NEXT_PUBLIC_SOURCEBOT_VERSION
# PAPIK = Project API Key
# Note that this key does not need to be kept secret, so it's not
# necessary to use Docker build secrets here.
# @see: https://posthog.com/tutorials/api-capture-events#authenticating-with-the-project-api-key
ARG NEXT_PUBLIC_POSTHOG_PAPIK
ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT
ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT
ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN
@ -42,17 +38,13 @@ COPY package.json yarn.lock* .yarnrc.yml ./
COPY .yarn ./.yarn COPY .yarn ./.yarn
COPY ./packages/db ./packages/db COPY ./packages/db ./packages/db
COPY ./packages/schemas ./packages/schemas COPY ./packages/schemas ./packages/schemas
COPY ./packages/crypto ./packages/crypto
COPY ./packages/error ./packages/error
COPY ./packages/logger ./packages/logger
COPY ./packages/shared ./packages/shared COPY ./packages/shared ./packages/shared
COPY ./packages/queryLanguage ./packages/queryLanguage
RUN yarn workspace @sourcebot/db install RUN yarn workspace @sourcebot/db install
RUN yarn workspace @sourcebot/schemas install RUN yarn workspace @sourcebot/schemas install
RUN yarn workspace @sourcebot/crypto install
RUN yarn workspace @sourcebot/error install
RUN yarn workspace @sourcebot/logger install
RUN yarn workspace @sourcebot/shared install RUN yarn workspace @sourcebot/shared install
RUN yarn workspace @sourcebot/query-language install
# ------------------------------------ # ------------------------------------
# ------ Build Web ------ # ------ Build Web ------
@ -61,8 +53,6 @@ ENV SKIP_ENV_VALIDATION=1
# ----------- # -----------
ARG NEXT_PUBLIC_SOURCEBOT_VERSION ARG NEXT_PUBLIC_SOURCEBOT_VERSION
ENV NEXT_PUBLIC_SOURCEBOT_VERSION=$NEXT_PUBLIC_SOURCEBOT_VERSION ENV NEXT_PUBLIC_SOURCEBOT_VERSION=$NEXT_PUBLIC_SOURCEBOT_VERSION
ARG NEXT_PUBLIC_POSTHOG_PAPIK
ENV NEXT_PUBLIC_POSTHOG_PAPIK=$NEXT_PUBLIC_POSTHOG_PAPIK
ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT
ENV NEXT_PUBLIC_SENTRY_ENVIRONMENT=$NEXT_PUBLIC_SENTRY_ENVIRONMENT ENV NEXT_PUBLIC_SENTRY_ENVIRONMENT=$NEXT_PUBLIC_SENTRY_ENVIRONMENT
ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT
@ -97,10 +87,8 @@ COPY ./packages/web ./packages/web
COPY --from=shared-libs-builder /app/node_modules ./node_modules COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto
COPY --from=shared-libs-builder /app/packages/error ./packages/error
COPY --from=shared-libs-builder /app/packages/logger ./packages/logger
COPY --from=shared-libs-builder /app/packages/shared ./packages/shared COPY --from=shared-libs-builder /app/packages/shared ./packages/shared
COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage
# Fixes arm64 timeouts # Fixes arm64 timeouts
RUN yarn workspace @sourcebot/web install RUN yarn workspace @sourcebot/web install
@ -138,10 +126,8 @@ COPY ./packages/backend ./packages/backend
COPY --from=shared-libs-builder /app/node_modules ./node_modules COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto
COPY --from=shared-libs-builder /app/packages/error ./packages/error
COPY --from=shared-libs-builder /app/packages/logger ./packages/logger
COPY --from=shared-libs-builder /app/packages/shared ./packages/shared COPY --from=shared-libs-builder /app/packages/shared ./packages/shared
COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage
RUN yarn workspace @sourcebot/backend install RUN yarn workspace @sourcebot/backend install
RUN yarn workspace @sourcebot/backend build RUN yarn workspace @sourcebot/backend build
@ -156,14 +142,12 @@ fi
ENV SKIP_ENV_VALIDATION=0 ENV SKIP_ENV_VALIDATION=0
# ------------------------------ # ------------------------------
# ------ Runner ------ # ------ Runner ------
FROM node-alpine AS runner FROM node-alpine AS runner
# ----------- # -----------
ARG NEXT_PUBLIC_SOURCEBOT_VERSION ARG NEXT_PUBLIC_SOURCEBOT_VERSION
ENV NEXT_PUBLIC_SOURCEBOT_VERSION=$NEXT_PUBLIC_SOURCEBOT_VERSION ENV NEXT_PUBLIC_SOURCEBOT_VERSION=$NEXT_PUBLIC_SOURCEBOT_VERSION
ARG NEXT_PUBLIC_POSTHOG_PAPIK
ENV NEXT_PUBLIC_POSTHOG_PAPIK=$NEXT_PUBLIC_POSTHOG_PAPIK
ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT
ENV NEXT_PUBLIC_SENTRY_ENVIRONMENT=$NEXT_PUBLIC_SENTRY_ENVIRONMENT ENV NEXT_PUBLIC_SENTRY_ENVIRONMENT=$NEXT_PUBLIC_SENTRY_ENVIRONMENT
ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN
@ -185,9 +169,13 @@ ENV DATA_DIR=/data
ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot
ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db
ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis
ENV REDIS_URL="redis://localhost:6379"
ENV SRC_TENANT_ENFORCEMENT_MODE=strict
ENV SOURCEBOT_PUBLIC_KEY_PATH=/app/public.pem ENV SOURCEBOT_PUBLIC_KEY_PATH=/app/public.pem
# PAPIK = Project API Key
# Note that this key does not need to be kept secret, so it's not
# necessary to use Docker build secrets here.
# @see: https://posthog.com/tutorials/api-capture-events#authenticating-with-the-project-api-key
# @note: this is also declared in the shared env.server.ts file.
ENV POSTHOG_PAPIK=phc_lLPuFFi5LH6c94eFJcqvYVFwiJffVcV6HD8U4a1OnRW
# Valid values are: debug, info, warn, error # Valid values are: debug, info, warn, error
ENV SOURCEBOT_LOG_LEVEL=info ENV SOURCEBOT_LOG_LEVEL=info
@ -195,6 +183,23 @@ ENV SOURCEBOT_LOG_LEVEL=info
# Sourcebot collects anonymous usage data using [PostHog](https://posthog.com/). Uncomment this line to disable. # Sourcebot collects anonymous usage data using [PostHog](https://posthog.com/). Uncomment this line to disable.
# ENV SOURCEBOT_TELEMETRY_DISABLED=1 # ENV SOURCEBOT_TELEMETRY_DISABLED=1
# Configure dependencies
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib openssl util-linux unzip
ARG UID=1500
ARG GID=1500
# Always create the non-root user to support runtime user switching
# The container can be run as root (default) or as sourcebot user using docker run --user
RUN addgroup -g $GID sourcebot && \
adduser -D -u $UID -h /app -S sourcebot && \
adduser sourcebot postgres && \
adduser sourcebot redis && \
chown -R sourcebot /app && \
adduser sourcebot node && \
mkdir /var/log/sourcebot && \
chown sourcebot /var/log/sourcebot
COPY package.json yarn.lock* .yarnrc.yml public.pem ./ COPY package.json yarn.lock* .yarnrc.yml public.pem ./
COPY .yarn ./.yarn COPY .yarn ./.yarn
@ -214,24 +219,23 @@ COPY --from=zoekt-builder \
/cmd/zoekt-index \ /cmd/zoekt-index \
/usr/local/bin/ /usr/local/bin/
RUN chown -R sourcebot:sourcebot /app
# Copy zoekt proto files (needed for gRPC client at runtime)
COPY --chown=sourcebot:sourcebot vendor/zoekt/grpc/protos /app/vendor/zoekt/grpc/protos
# Copy all of the things # Copy all of the things
COPY --from=web-builder /app/packages/web/public ./packages/web/public COPY --chown=sourcebot:sourcebot --from=web-builder /app/packages/web/public ./packages/web/public
COPY --from=web-builder /app/packages/web/.next/standalone ./ COPY --chown=sourcebot:sourcebot --from=web-builder /app/packages/web/.next/standalone ./
COPY --from=web-builder /app/packages/web/.next/static ./packages/web/.next/static COPY --chown=sourcebot:sourcebot --from=web-builder /app/packages/web/.next/static ./packages/web/.next/static
COPY --from=backend-builder /app/node_modules ./node_modules COPY --chown=sourcebot:sourcebot --from=backend-builder /app/node_modules ./node_modules
COPY --from=backend-builder /app/packages/backend ./packages/backend COPY --chown=sourcebot:sourcebot --from=backend-builder /app/packages/backend ./packages/backend
COPY --from=shared-libs-builder /app/node_modules ./node_modules COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/db ./packages/db COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/shared ./packages/shared
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage
COPY --from=shared-libs-builder /app/packages/error ./packages/error
COPY --from=shared-libs-builder /app/packages/logger ./packages/logger
COPY --from=shared-libs-builder /app/packages/shared ./packages/shared
# Configure dependencies
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib openssl util-linux unzip
# Fixes git "dubious ownership" issues when the volume is mounted with different permissions to the container. # Fixes git "dubious ownership" issues when the volume is mounted with different permissions to the container.
RUN git config --global safe.directory "*" RUN git config --global safe.directory "*"
@ -241,13 +245,22 @@ RUN mkdir -p /run/postgresql && \
chown -R postgres:postgres /run/postgresql && \ chown -R postgres:postgres /run/postgresql && \
chmod 775 /run/postgresql chmod 775 /run/postgresql
# Make app directory accessible to both root and sourcebot user
RUN chown -R sourcebot /app \
&& chgrp -R 0 /app \
&& chmod -R g=u /app
# Make data directory accessible to both root and sourcebot user
RUN chown -R sourcebot /data
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY prefix-output.sh ./prefix-output.sh COPY prefix-output.sh ./prefix-output.sh
RUN chmod +x ./prefix-output.sh RUN chmod +x ./prefix-output.sh
COPY entrypoint.sh ./entrypoint.sh COPY entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh RUN chmod +x ./entrypoint.sh
COPY default-config.json . # Note: for back-compat cases, we do _not_ set the USER directive here.
# Instead, the user can be overridden at runtime with --user flag.
# USER sourcebot
EXPOSE 3000 EXPOSE 3000
ENV PORT=3000 ENV PORT=3000

View file

@ -28,10 +28,6 @@ clean:
packages/db/dist \ packages/db/dist \
packages/schemas/node_modules \ packages/schemas/node_modules \
packages/schemas/dist \ packages/schemas/dist \
packages/crypto/node_modules \
packages/crypto/dist \
packages/error/node_modules \
packages/error/dist \
packages/mcp/node_modules \ packages/mcp/node_modules \
packages/mcp/dist \ packages/mcp/dist \
packages/shared/node_modules \ packages/shared/node_modules \

View file

@ -72,15 +72,22 @@ https://github.com/user-attachments/assets/31ec0669-707d-4e03-b511-1bc33d44197a
# Deploy Sourcebot # Deploy Sourcebot
Sourcebot can be deployed in seconds using our official docker image. Visit our [docs](https://docs.sourcebot.dev/docs/deployment-guide) for more information. Sourcebot can be deployed in seconds using Docker Compose. Visit our [docs](https://docs.sourcebot.dev/docs/deployment/docker-compose) for more information.
1. Create a config 1. Download the docker-compose.yml file
```sh
curl -o docker-compose.yml https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/docker-compose.yml
```
2. In the same directory as the `docker-compose.yml` file, create a [configuration file](https://docs.sourcebot.dev/docs/configuration/config-file). The configuration file is a JSON file that configures Sourcebot's behaviour, including what repositories to index, language model providers, auth providers, and more.
```sh ```sh
touch config.json touch config.json
echo '{ echo '{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json", "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
// Comments are supported.
// This config creates a single connection to GitHub.com that
// indexes the Sourcebot repository
"connections": { "connections": {
// Comments are supported
"starter-connection": { "starter-connection": {
"type": "github", "type": "github",
"repos": [ "repos": [
@ -91,37 +98,19 @@ echo '{
}' > config.json }' > config.json
``` ```
2. Run the docker container 3. Update the secrets in the `docker-compose.yml` and then run Sourcebot using:
```sh ```sh
docker run \ docker compose up
-p 3000:3000 \
--pull=always \
--rm \
-v $(pwd):/data \
-e CONFIG_PATH=/data/config.json \
--name sourcebot \
ghcr.io/sourcebot-dev/sourcebot:latest
``` ```
<details>
<summary>What does this command do?</summary>
- Pull and run the Sourcebot docker image from [ghcr.io/sourcebot-dev/sourcebot:latest](https://github.com/sourcebot-dev/sourcebot/pkgs/container/sourcebot). 4. Visit `http://localhost:3000` to start using Sourcebot
- Mount the current directory (`-v $(pwd):/data`) to allow Sourcebot to persist the `.sourcebot` cache.
- Clones sourcebot at `HEAD` into `.sourcebot/github/sourcebot-dev/sourcebot`.
- Indexes sourcebot into a .zoekt index file in `.sourcebot/index/`.
- Map port 3000 between your machine and the docker image.
- Starts the web server on port 3000.
</details>
</br>
3. Visit `http://localhost:3000` to start using Sourcebot
</br> </br>
To configure Sourcebot (index your own repos, connect your LLMs, etc), check out our [docs](https://docs.sourcebot.dev/docs/configuration/config-file). To configure Sourcebot (index your own repos, connect your LLMs, etc), check out our [docs](https://docs.sourcebot.dev/docs/configuration/config-file).
> [!NOTE] > [!NOTE]
> Sourcebot collects <a href="https://demo.sourcebot.dev/~/search?query=captureEvent%5C(%20repo%3Asourcebot">anonymous usage data</a> by default to help us improve the product. No sensitive data is collected, but if you'd like to disable this you can do so by setting the `SOURCEBOT_TELEMETRY_DISABLED` environment > Sourcebot collects <a href="https://demo.sourcebot.dev/~/search?query=captureEvent%5C(%20repo%3Asourcebot">anonymous usage data</a> by default to help us improve the product. No sensitive data is collected, but if you'd like to disable this you can do so by setting the `SOURCEBOT_TELEMETRY_DISABLED` environment
> variable to `true`. Please refer to our [telemetry docs](https://docs.sourcebot.dev/self-hosting/overview#telemetry) for more information. > variable to `true`. Please refer to our [telemetry docs](https://docs.sourcebot.dev/docs/overview#telemetry) for more information.
# Build from source # Build from source
>[!NOTE] >[!NOTE]

View file

@ -1,11 +0,0 @@
{
"$schema": "./schemas/v2/index.json",
"repos": [
{
"type": "github",
"repos": [
"sourcebot-dev/sourcebot"
]
}
]
}

View file

@ -1,243 +0,0 @@
// This is the config file for https://demo.sourcebot.dev.
// To add a new repository, edit this file and open a PR.
// After the PR is merged, the deploy demo workflow will
// run (see: https://github.com/sourcebot-dev/sourcebot/actions/workflows/deploy-demo.yml),
// after which the changes will be reflected on the demo site.
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"connections": {
// Defines the GitHub repositories.
// See: https://docs.sourcebot.dev/docs/connections/github
"github-repos": {
"type": "github",
"token": {
"env": "GITHUB_TOKEN"
},
"repos": [
"torvalds/linux",
"pytorch/pytorch",
"commaai/openpilot",
"ggerganov/whisper.cpp",
"ggerganov/llama.cpp",
"codemirror/dev",
"tailwindlabs/tailwindcss",
"sourcebot-dev/sourcebot",
"sindresorhus/awesome",
"facebook/react",
"vinta/awesome-python",
"vuejs/vue",
"TheAlgorithms/Python",
"tensorflow/tensorflow",
"twbs/bootstrap",
"flutter/flutter",
"microsoft/vscode",
"github/gitignore",
"airbnb/javascript",
"AUTOMATIC1111/stable-diffusion-webui",
"huggingface/transformers",
"avelino/awesome-go",
"ytdl-org/youtube-dl",
"vercel/next.js",
"golang/go",
"facebook/react-native",
"electron/electron",
"Genymobile/scrcpy",
"f/awesome-chatgpt-prompts",
"microsoft/PowerToys",
"kubernetes/kubernetes",
"d3/d3",
"nodejs/node",
"massgravel/Microsoft-Activation-Scripts",
"axios/axios",
"mrdoob/three.js",
"krahets/hello-algo",
"facebook/create-react-app",
"ollama/ollama",
"microsoft/TypeScript",
"goldbergyoni/nodebestpractices",
"rust-lang/rust",
"denoland/deno",
"angular/angular",
"langchain-ai/langchain",
"microsoft/terminal",
"521xueweihan/HelloGitHub",
"mui/material-ui",
"ant-design/ant-design",
"yt-dlp/yt-dlp",
"puppeteer/puppeteer",
"papers-we-love/papers-we-love",
"iptv-org/iptv",
"fatedier/frp",
"excalidraw/excalidraw",
"tauri-apps/tauri",
"neovim/neovim",
"django/django",
"florinpop17/app-ideas",
"animate-css/animate.css",
"nvm-sh/nvm",
"gothinkster/realworld",
"bitcoin/bitcoin",
"sveltejs/svelte",
"opencv/opencv",
"gin-gonic/gin",
"laravel/laravel",
"fastapi/fastapi",
"macrozheng/mall",
"jaywcjlove/awesome-mac",
"tonsky/FiraCode",
"rustdesk/rustdesk",
"tensorflow/models",
"doocs/advanced-java",
"shadcn-ui/ui",
"gohugoio/hugo",
"spring-projects/spring-boot",
"supabase/supabase",
"oven-sh/bun",
"FortAwesome/Font-Awesome",
"home-assistant/core",
"typicode/json-server",
"mermaid-js/mermaid",
"openai/whisper",
"netdata/netdata",
"vuejs/awesome-vue",
"3b1b/manim",
"2dust/v2rayN",
"nomic-ai/gpt4all",
"elastic/elasticsearch",
"fighting41love/funNLP",
"vitejs/vite",
"coder/code-server",
"moby/moby",
"CompVis/stable-diffusion",
"base-org/node",
"nestjs/nest",
"pallets/flask",
"hakimel/reveal.js",
"microsoft/playwright",
"swiftlang/swift",
"redis/redis",
"bregman-arie/devops-exercises",
"binary-husky/gpt_academic",
"junegunn/fzf",
"syncthing/syncthing",
"hoppscotch/hoppscotch",
"protocolbuffers/protobuf",
"enaqx/awesome-react",
"expressjs/express",
"microsoft/generative-ai-for-beginners",
"grafana/grafana",
"abi/screenshot-to-code",
"chartjs/Chart.js",
"webpack/webpack",
"d2l-ai/d2l-zh",
"strapi/strapi",
"python/cpython",
"leonardomso/33-js-concepts",
"kdn251/interviews",
"ventoy/Ventoy",
"ansible/ansible",
"apache/superset",
"tesseract-ocr/tesseract",
"lydiahallie/javascript-questions",
"FuelLabs/sway",
"keras-team/keras",
"resume/resume.github.com",
"swisskyrepo/PayloadsAllTheThings",
"ocornut/imgui",
"socketio/socket.io",
"awesomedata/awesome-public-datasets",
"louislam/uptime-kuma",
"kelseyhightower/nocode",
"sherlock-project/sherlock",
"reduxjs/redux",
"apache/echarts",
"obsproject/obs-studio",
"openai/openai-cookbook",
"fffaraz/awesome-cpp",
"scikit-learn/scikit-learn",
"TheAlgorithms/Java",
"atom/atom",
"Eugeny/tabby",
"lodash/lodash",
"caddyserver/caddy",
"sindresorhus/awesome-nodejs",
"rust-unofficial/awesome-rust",
"streamich/react-use",
"pocketbase/pocketbase",
"lllyasviel/Fooocus",
"k88hudson/git-flight-rules",
"react-hook-form/react-hook-form",
"koajs/koa",
"SheetJS/sheetjs",
"trpc/trpc",
"LC044/WeChatMsg",
"airbnb/lottie-android",
"huihut/interview",
"jgm/pandoc",
"google/googletest",
"date-fns/date-fns",
"nativefier/nativefier",
"openai/gym",
"files-community/Files",
"sahat/hackathon-starter",
"appsmithorg/appsmith",
"ultralytics/ultralytics",
"slidevjs/slidev",
"xitu/gold-miner",
"sorrycc/awesome-javascript",
"astral-sh/ruff",
"logseq/logseq",
"shadowsocks/shadowsocks",
"ccxt/ccxt",
"netty/netty",
"tw93/Pake",
"fxsjy/jieba",
"atlassian/react-beautiful-dnd",
"ToolJet/ToolJet",
"markedjs/marked",
"typicode/husky",
"laravel/framework",
"TheAlgorithms/JavaScript",
"bilibili/ijkplayer",
"solidjs/solid",
"fastify/fastify",
"huggingface/pytorch-image-models",
"shadowsocks/ShadowsocksX-NG",
"carbon-language/carbon-lang",
"s0md3v/roop",
"ascoders/weekly",
"backstage/backstage",
"servo/servo",
"composer/composer",
"tastejs/todomvc",
"lutzroeder/netron",
"alibaba/canal",
"tinygrad/tinygrad",
"ManimCommunity/manim",
"filebrowser/filebrowser",
"nicolargo/glances",
"iperov/DeepFaceLive",
"StevenBlack/hosts",
"crossoverJie/JCSprout",
"mantinedev/mantine",
"Automattic/mongoose",
"eslint/eslint",
"nextauthjs/next-auth",
"flameshot-org/flameshot",
"envoyproxy/envoy",
"sourcebot-dev/zoekt"
]
},
// Defines the GitLab repositories.
// See: https://docs.sourcebot.dev/docs/connections/gitlab
"gitlab-repos": {
"type": "gitlab",
"projects": [
"gnachman/iterm2"
]
}
},
"settings": {
"reindexIntervalMs": 86400000 // 24 hours
}
}

66
docker-compose.yml Normal file
View file

@ -0,0 +1,66 @@
services:
sourcebot:
image: ghcr.io/sourcebot-dev/sourcebot:latest
user: sourcebot
restart: always
container_name: sourcebot
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
ports:
- "3000:3000"
volumes:
- ./config.json:/data/config.json
- sourcebot_data:/data
environment:
- CONFIG_PATH=/data/config.json
- AUTH_URL=${AUTH_URL:-http://localhost:3000}
- AUTH_SECRET=${AUTH_SECRET:-000000000000000000000000000000000} # CHANGEME: generate via `openssl rand -base64 33`
- SOURCEBOT_ENCRYPTION_KEY=${SOURCEBOT_ENCRYPTION_KEY:-000000000000000000000000000000000} # CHANGEME: generate via `openssl rand -base64 24`
- DATABASE_URL=${DATABASE_URL:-postgresql://postgres:postgres@postgres:5432/postgres} # CHANGEME
- REDIS_URL=${REDIS_URL:-redis://redis:6379} # CHANGEME
- SOURCEBOT_EE_LICENSE_KEY=${SOURCEBOT_EE_LICENSE_KEY:-}
- SOURCEBOT_TELEMETRY_DISABLED=${SOURCEBOT_TELEMETRY_DISABLED:-false}
# For the full list of environment variables see:
# https://docs.sourcebot.dev/docs/configuration/environment-variables
postgres:
image: docker.io/postgres:${POSTGRES_VERSION:-latest}
restart: always
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 3s
timeout: 3s
retries: 10
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres # CHANGEME
POSTGRES_DB: postgres
ports:
- 127.0.0.1:5432:5432
volumes:
- sourcebot_postgres_data:/var/lib/postgresql/data
redis:
image: docker.io/redis:${REDIS_VERSION:-latest}
restart: always
ports:
- 127.0.0.1:6379:6379
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 3s
timeout: 10s
retries: 10
volumes:
- sourcebot_redis_data:/data
volumes:
sourcebot_data:
driver: local
sourcebot_postgres_data:
driver: local
sourcebot_redis_data:
driver: local

View file

@ -21,7 +21,13 @@
"group": "Getting Started", "group": "Getting Started",
"pages": [ "pages": [
"docs/overview", "docs/overview",
"docs/deployment-guide" {
"group": "Deployment",
"pages": [
"docs/deployment/docker-compose",
"docs/deployment/k8s"
]
}
] ]
}, },
{ {
@ -79,6 +85,7 @@
] ]
}, },
"docs/configuration/language-model-providers", "docs/configuration/language-model-providers",
"docs/configuration/idp",
{ {
"group": "Authentication", "group": "Authentication",
"pages": [ "pages": [
@ -137,7 +144,7 @@
"socials": { "socials": {
"github": "https://github.com/sourcebot-dev/sourcebot", "github": "https://github.com/sourcebot-dev/sourcebot",
"twitter": "https://x.com/sourcebot_dev", "twitter": "https://x.com/sourcebot_dev",
"discord": "https://discord.gg/Y6b78RqM", "discord": "https://discord.gg/HDScTs3ptP",
"linkedin": "https://www.linkedin.com/company/sourcebot" "linkedin": "https://www.linkedin.com/company/sourcebot"
} }
}, },

View file

@ -25,4 +25,4 @@ Sourcebot's built-in authentication system gates your deployment, and allows adm
# Troubleshooting # Troubleshooting
- If you experience issues logging in, logging out, or accessing an organization you should have access to, try clearing your cookies & performing a full page refresh (`Cmd/Ctrl + Shift + R` on most browsers). - If you experience issues logging in, logging out, or accessing an organization you should have access to, try clearing your cookies & performing a full page refresh (`Cmd/Ctrl + Shift + R` on most browsers).
- Still not working? Reach out to us on our [discord](https://discord.com/invite/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) - Still not working? Reach out to us on our [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose)

View file

@ -26,87 +26,5 @@ See [transactional emails](/docs/configuration/transactional-emails) for more de
# Enterprise Authentication Providers # Enterprise Authentication Providers
The following authentication providers require an [enterprise license](/docs/license-key) to be enabled. Sourcebot supports authentication using several different [external identity providers](/docs/configuration/idp) as well. These identity providers require an
[enterprise license](/docs/license-key)
### GitHub
---
[Auth.js GitHub Provider Docs](https://authjs.dev/getting-started/providers/github)
Authentication using both a **GitHub OAuth App** and a **GitHub App** is supported. In both cases, you must provide Sourcebot the `CLIENT_ID` and `SECRET_ID` and configure the
callback URL correctly (more info in Auth.js docs).
When using a **GitHub App** for auth, enable the following permissions:
- `“Email addresses” account permissions (read)`
- `"Metadata" repository permissions (read)` (only needed if enabling [permission syncing](/docs/features/permission-syncing))
**Required environment variables:**
- `AUTH_EE_GITHUB_CLIENT_ID`
- `AUTH_EE_GITHUB_CLIENT_SECRET`
Optional environment variables:
- `AUTH_EE_GITHUB_BASE_URL` - Base URL for GitHub Enterprise (defaults to https://github.com)
### GitLab
---
[Auth.js GitLab Provider Docs](https://authjs.dev/getting-started/providers/gitlab)
**Required environment variables:**
- `AUTH_EE_GITLAB_CLIENT_ID`
- `AUTH_EE_GITLAB_CLIENT_SECRET`
Optional environment variables:
- `AUTH_EE_GITLAB_BASE_URL` - Base URL for GitLab instance (defaults to https://gitlab.com)
### Google
---
[Auth.js Google Provider Docs](https://authjs.dev/getting-started/providers/google)
**Required environment variables:**
- `AUTH_EE_GOOGLE_CLIENT_ID`
- `AUTH_EE_GOOGLE_CLIENT_SECRET`
### GCP IAP
---
<Note>If you're running Sourcebot in an environment that blocks egress, make sure you allow the [IAP IP ranges](https://www.gstatic.com/ipranges/goog.json)</Note>
Custom provider built to enable automatic Sourcebot account registration/login when using GCP IAP.
**Required environment variables**
- `AUTH_EE_GCP_IAP_ENABLED`
- `AUTH_EE_GCP_IAP_AUDIENCE`
- This can be found by selecting the ⋮ icon next to the IAP-enabled backend service and pressing `Get JWT audience code`
### Okta
---
[Auth.js Okta Provider Docs](https://authjs.dev/getting-started/providers/okta)
**Required environment variables:**
- `AUTH_EE_OKTA_CLIENT_ID`
- `AUTH_EE_OKTA_CLIENT_SECRET`
- `AUTH_EE_OKTA_ISSUER`
### Keycloak
---
[Auth.js Keycloak Provider Docs](https://authjs.dev/getting-started/providers/keycloak)
**Required environment variables:**
- `AUTH_EE_KEYCLOAK_CLIENT_ID`
- `AUTH_EE_KEYCLOAK_CLIENT_SECRET`
- `AUTH_EE_KEYCLOAK_ISSUER`
### Microsoft Entra ID
[Auth.js Microsoft Entra ID Provider Docs](https://authjs.dev/getting-started/providers/microsoft-entra-id)
**Required environment variables:**
- `AUTH_EE_MICROSOFT_ENTRA_ID_CLIENT_ID`
- `AUTH_EE_MICROSOFT_ENTRA_ID_CLIENT_SECRET`
- `AUTH_EE_MICROSOFT_ENTRA_ID_ISSUER`
---

View file

@ -3,6 +3,9 @@ title: Config File
sidebarTitle: Config file sidebarTitle: Config file
--- ---
import ConfigSchema from '/snippets/schemas/v3/index.schema.mdx'
import EnvironmentOverridesSchema from '/snippets/schemas/v3/environmentOverrides.schema.mdx'
When self-hosting Sourcebot, you **must** provide it a config file. This is done by defining a config file in a volume that's mounted to Sourcebot, and providing the path to this When self-hosting Sourcebot, you **must** provide it a config file. This is done by defining a config file in a volume that's mounted to Sourcebot, and providing the path to this
file in the `CONFIG_PATH` environment variable. For example: file in the `CONFIG_PATH` environment variable. For example:
@ -49,3 +52,103 @@ The following are settings that can be provided in your config file to modify So
| `enablePublicAccess` **(deprecated)** | boolean | false | — | Use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead. | | `enablePublicAccess` **(deprecated)** | boolean | false | — | Use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead. |
| `experiment_repoDrivenPermissionSyncIntervalMs` | number | 24hours | 1 | Interval at which the repo permission syncer should run. | | `experiment_repoDrivenPermissionSyncIntervalMs` | number | 24hours | 1 | Interval at which the repo permission syncer should run. |
| `experiment_userDrivenPermissionSyncIntervalMs` | number | 24hours | 1 | Interval at which the user permission syncer should run. | | `experiment_userDrivenPermissionSyncIntervalMs` | number | 24hours | 1 | Interval at which the user permission syncer should run. |
# Tokens
Tokens are used to securely pass secrets to Sourcebot in a config file. They are used in various places, including connections, language model providers, auth providers, etc. Tokens can be passed as either environment variables or Google Cloud secrets:
<AccordionGroup>
<Accordion title="Environment Variables">
```json
{
"token": {
"env": "TOKEN_NAME"
}
}
```
</Accordion>
<Accordion title="Google Cloud Secrets">
```json
{
"token": {
"googleCloudSecret": "projects/<project-id>/secrets/<secret-name>/versions/<version-id>"
}
}
```
</Accordion>
</AccordionGroup>
# Overriding environment variables from the config
You can override / set environment variables from the config file by using the `environmentOverrides` property. Overrides can be of type `string`, `number`, `boolean`, or a [token](/docs/configuration/config-file#tokens). Tokens are useful when you want to configure a environment variable using a Google Cloud Secret or other supported secret management service.
<AccordionGroup>
<Accordion title="Token">
```jsonc
{
"environmentOverrides": {
"DATABASE_URL": {
"type": "token",
"value": {
"googleCloudSecret": "projects/<id>/secrets/postgres-connection-string/versions/latest"
}
},
"REDIS_URL": {
"type": "token",
"value": {
"googleCloudSecret": "projects/<id>/secrets/redis-connection-string/versions/latest"
}
}
},
}
```
</Accordion>
<Accordion title="String">
```jsonc
{
"environmentOverrides": {
"EMAIL_FROM_ADDRESS": {
"type": "string",
"value": "hello@sourcebot.dev"
}
}
}
```
</Accordion>
<Accordion title="Number">
```jsonc
{
"environmentOverrides": {
"SOURCEBOT_CHAT_MODEL_TEMPERATURE": {
"type": "number",
"value": 0.5
}
}
}
```
</Accordion>
<Accordion title="Boolean">
```jsonc
{
"environmentOverrides": {
"SOURCEBOT_TELEMETRY_DISABLED": {
"type": "boolean",
"value": false
}
}
}
```
</Accordion>
</AccordionGroup>
**Note:** Overrides are **not** set as system environment variables, and instead are resolved at runtime on startup and stored in memory.
<Accordion title="Schema reference">
[schemas/v3/environmentOverrides.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/environmentOverrides.json)
<EnvironmentOverridesSchema />
</Accordion>

View file

@ -1,10 +1,9 @@
--- ---
title: Environment variables title: Environment variables
sidebarTitle: Environment variables sidebarTitle: Environment variables
mode: "wide"
--- ---
<Note>This page provides a detailed reference of all environment variables supported by Sourcebot. If you're just looking to get up and running, we recommend starting with the [deployment guide](/docs/deployment-guide) instead.</Note> <Note>This page provides a detailed reference of all environment variables supported by Sourcebot. If you're just looking to get up and running, we recommend starting with the [deployment guides](/docs/deployment/docker-compose) instead.</Note>
### Core Environment Variables ### Core Environment Variables
The following environment variables allow you to configure your Sourcebot deployment. The following environment variables allow you to configure your Sourcebot deployment.
@ -35,6 +34,8 @@ The following environment variables allow you to configure your Sourcebot deploy
| `SOURCEBOT_STRUCTURED_LOGGING_ENABLED` | `false` | <p>Enables/disable structured JSON logging. See [this doc](/docs/configuration/structured-logging) for more info.</p> | | `SOURCEBOT_STRUCTURED_LOGGING_ENABLED` | `false` | <p>Enables/disable structured JSON logging. See [this doc](/docs/configuration/structured-logging) for more info.</p> |
| `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - | <p>Optional file to log to if structured logging is enabled</p> | | `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - | <p>Optional file to log to if structured logging is enabled</p> |
| `SOURCEBOT_TELEMETRY_DISABLED` | `false` | <p>Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.</p> | | `SOURCEBOT_TELEMETRY_DISABLED` | `false` | <p>Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.</p> |
| `DEFAULT_MAX_MATCH_COUNT` | `10000` | <p>The default maximum number of search results to return when using search in the web app.</p> |
| `ALWAYS_INDEX_FILE_PATTERNS` | - | <p>A comma separated list of glob patterns matching file paths that should always be indexed, regardless of size or number of trigrams.</p> |
### Enterprise Environment Variables ### Enterprise Environment Variables
| Variable | Default | Description | | Variable | Default | Description |
@ -71,3 +72,6 @@ The following environment variables allow you to configure your Sourcebot deploy
| `REVIEW_AGENT_LOGGING_ENABLED` | `true` | <p>Enables/disables logging for the review agent. Logs are saved in `DATA_CACHE_DIR/review-agent`</p> | | `REVIEW_AGENT_LOGGING_ENABLED` | `true` | <p>Enables/disables logging for the review agent. Logs are saved in `DATA_CACHE_DIR/review-agent`</p> |
| `REVIEW_AGENT_REVIEW_COMMAND` | `review` | <p>The command used to trigger a code review by the review agent.</p> | | `REVIEW_AGENT_REVIEW_COMMAND` | `review` | <p>The command used to trigger a code review by the review agent.</p> |
### Overriding environment variables from the config
You can override environment variables from the config file by using the `environmentOverrides` property. See [this doc](/docs/configuration/config-file#overriding-environment-variables-from-the-config) for more info.

View file

@ -0,0 +1,418 @@
---
title: External Identity Providers
sidebarTitle: External identity providers
---
import LicenseKeyRequired from '/snippets/license-key-required.mdx'
<LicenseKeyRequired />
You can connect Sourcebot to various **external identity providers** to associate a Sourcebot user with one or more external service accounts (ex. Google, GitHub, etc).
External identity providers can be used for [authentication](/docs/configuration/auth) and/or [permission syncing](/docs/features/permission-syncing). They're defined in the
[config file](/docs/configuration/config-file) in the top-level `identityProviders` object:
```json wrap icon="code" Example config with both google and github identity providers defined
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "github",
"purpose": "account_linking",
"accountLinkingRequired": true,
"clientId": {
"env": "GITHUB_IDENTITY_PROVIDER_CLIENT_ID"
},
"clientSecret": {
"env": "GITHUB_IDENTITY_PROVIDER_CLIENT_SECRET"
}
},
{
"provider": "google",
"clientId": {
"env": "GOOGLE_IDENTITY_PROVIDER_CLIENT_ID"
},
"clientSecret": {
"env": "GOOGLE_IDENTITY_PROVIDER_CLIENT_SECRET"
}
}
]
}
```
Secret values (such as `clientId` and `clientSecret`) can be provided as environment variables or Google Cloud secrets via [tokens](/docs/configuration/config-file#tokens).
# Supported External Identity Providers
Sourcebot uses [Auth.js](https://authjs.dev/) to connect to external identity providers. If there's a provider supported by Auth.js that you don't see below, please submit a
[feature request](https://github.com/sourcebot-dev/sourcebot/issues) to have it added.
### GitHub
[Auth.js GitHub Provider Docs](https://authjs.dev/getting-started/providers/github)
A GitHub connection can be used for either [authentication](/docs/configuration/auth) or [permission syncing](/docs/features/permission-syncing). This is controlled using the `purpose` field
in the GitHub identity provider config.
<Accordion title="instructions">
<Steps>
<Step title="Register an Oauth Client">
To begin, you must register an Oauth client in GitHub to faciliate the identity provider connection. You can do this by creating a **GitHub App** or a **GitHub OAuth App**. Either
one works, but the **GitHub App** is the [recommended mechanism](https://docs.github.com/en/apps/oauth-apps/building-oauth-apps/differences-between-github-apps-and-oauth-apps).
The result of registering an OAuth client is a `CLIENT_ID` and `CLIENT_SECRET` which you'll provide to Sourcebot.
<Tabs>
<Tab title="GitHub App">
<Note>You don't need to install the app to use it as an external identity provider</Note>
Follow [this guide](https://docs.github.com/en/apps/creating-github-apps/registering-a-github-app/registering-a-github-app) to register a new GitHub App.
When asked to provide a callback url, provide `<sourcebot_url>/api/auth/callback/github` (ex. https://sourcebot.coolcorp.com/api/auth/callback/github)
Set the following fine-grained permissions in the GitHub App:
- `“Email addresses” account permissions (read)`
- `"Metadata" repository permissions (read)` (only needed if using permission syncing)
</Tab>
<Tab title="GitHub OAuth App">
Follow [this guide](https://docs.github.com/en/apps/oauth-apps/building-oauth-apps/creating-an-oauth-app) by GitHub to create an OAuth App.
When asked to provide a callback url, provide `<sourcebot_url>/api/auth/callback/github` (ex. https://sourcebot.coolcorp.com/api/auth/callback/github)
</Tab>
</Tabs>
</Step>
<Step title="Define environemnt variables">
To provide Sourcebot the client id and secret for your OAuth client you must set them as environment variables. These can be named whatever you like
(ex. `GITHUB_IDENTITY_PROVIDER_CLIENT_ID` and `GITHUB_IDENTITY_PROVIDER_CLIENT_SECRET`)
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id and secret to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "github",
// "sso" for auth + perm sync, "account_linking" for only perm sync
"purpose": "account_linking",
// if purpose == "account_linking" this controls if a user must connect to the IdP
"accountLinkingRequired": true,
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
}
}
]
}
```
</Step>
</Steps>
</Accordion>
### GitLab
[Auth.js GitLab Provider Docs](https://authjs.dev/getting-started/providers/gitlab)
A GitLab connection can be used for either [authentication](/docs/configuration/auth) or [permission syncing](/docs/features/permission-syncing). This is controlled using the `purpose` field
in the GitLab identity provider config.
<Accordion title="instructions">
<Steps>
<Step title="Register an OAuth Application">
To begin, you must register an OAuth application in GitLab to facilitate the identity provider connection.
Follow [this guide](https://docs.gitlab.com/integration/oauth_provider/) by GitLab to create an OAuth application.
When configuring your application:
- Set the callback URL to `<sourcebot_url>/api/auth/callback/gitlab` (ex. https://sourcebot.coolcorp.com/api/auth/callback/gitlab)
- Enable the `read_user` scope
- If using for permission syncing, also enable the `read_api` scope
The result of registering an OAuth application is an `APPLICATION_ID` (`CLIENT_ID`) and `SECRET` (`CLIENT_SECRET`) which you'll provide to Sourcebot.
</Step>
<Step title="Define environment variables">
To provide Sourcebot the client id and secret for your OAuth application you must set them as environment variables. These can be named whatever you like
(ex. `GITLAB_IDENTITY_PROVIDER_CLIENT_ID` and `GITLAB_IDENTITY_PROVIDER_CLIENT_SECRET`)
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id and secret to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "gitlab",
// "sso" for auth + perm sync, "account_linking" for only perm sync
"purpose": "account_linking",
// if purpose == "account_linking" this controls if a user must connect to the IdP
"accountLinkingRequired": true,
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
},
// Optional: for self-hosted GitLab instances
"baseUrl": "https://gitlab.example.com"
}
]
}
```
</Step>
</Steps>
</Accordion>
### Google
[Auth.js Google Provider Docs](https://authjs.dev/getting-started/providers/google)
A Google connection can be used for [authentication](/docs/configuration/auth).
<Accordion title="instructions">
<Steps>
<Step title="Register an OAuth Client">
To begin, you must register an OAuth client in Google Cloud Console to facilitate the identity provider connection.
Follow [this guide](https://support.google.com/cloud/answer/6158849) by Google to create OAuth 2.0 credentials.
When configuring your OAuth client:
- Set the application type to "Web application"
- Add `<sourcebot_url>/api/auth/callback/google` to the authorized redirect URIs (ex. https://sourcebot.coolcorp.com/api/auth/callback/google)
The result of creating OAuth credentials is a `CLIENT_ID` and `CLIENT_SECRET` which you'll provide to Sourcebot.
</Step>
<Step title="Define environment variables">
To provide Sourcebot the client id and secret for your OAuth client you must set them as environment variables. These can be named whatever you like
(ex. `GOOGLE_IDENTITY_PROVIDER_CLIENT_ID` and `GOOGLE_IDENTITY_PROVIDER_CLIENT_SECRET`)
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id and secret to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "google",
"purpose": "sso",
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
}
}
]
}
```
</Step>
</Steps>
</Accordion>
### Okta
[Auth.js Okta Provider Docs](https://authjs.dev/getting-started/providers/okta)
An Okta connection can be used for [authentication](/docs/configuration/auth).
<Accordion title="instructions">
<Steps>
<Step title="Register an OAuth Application">
To begin, you must register an OAuth application in Okta to facilitate the identity provider connection.
Follow [this guide](https://developer.okta.com/docs/guides/implement-oauth-for-okta/main/) by Okta to create an OAuth application.
When configuring your application:
- Set the application type to "Web Application"
- Add `<sourcebot_url>/api/auth/callback/okta` to the sign-in redirect URIs (ex. https://sourcebot.coolcorp.com/api/auth/callback/okta)
The result of creating an OAuth application is a `CLIENT_ID`, `CLIENT_SECRET`, and `ISSUER` URL which you'll provide to Sourcebot.
</Step>
<Step title="Define environment variables">
To provide Sourcebot the client id, client secret, and issuer for your OAuth application you must set them as environment variables. These can be named whatever you like
(ex. `OKTA_IDENTITY_PROVIDER_CLIENT_ID`, `OKTA_IDENTITY_PROVIDER_CLIENT_SECRET`, and `OKTA_IDENTITY_PROVIDER_ISSUER`)
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id, client secret, and issuer to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "okta",
"purpose": "sso",
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
},
"issuer": {
"env": "YOUR_ISSUER_ENV_VAR"
}
}
]
}
```
</Step>
</Steps>
</Accordion>
### Keycloak
[Auth.js Keycloak Provider Docs](https://authjs.dev/getting-started/providers/keycloak)
A Keycloak connection can be used for [authentication](/docs/configuration/auth).
<Accordion title="instructions">
<Steps>
<Step title="Register an OAuth Client">
To begin, you must register an OAuth client in Keycloak to facilitate the identity provider connection.
Follow [this guide](https://www.keycloak.org/docs/latest/server_admin/#_oidc_clients) by Keycloak to create an OpenID Connect client.
When configuring your client:
- Set the client protocol to "openid-connect"
- Set the access type to "confidential"
- Add `<sourcebot_url>/api/auth/callback/keycloak` to the valid redirect URIs (ex. https://sourcebot.coolcorp.com/api/auth/callback/keycloak)
The result of creating an OAuth client is a `CLIENT_ID`, `CLIENT_SECRET`, and an `ISSUER` URL (typically in the format `https://<keycloak-domain>/realms/<realm-name>`) which you'll provide to Sourcebot.
</Step>
<Step title="Define environment variables">
To provide Sourcebot the client id, client secret, and issuer for your OAuth client you must set them as environment variables. These can be named whatever you like
(ex. `KEYCLOAK_IDENTITY_PROVIDER_CLIENT_ID`, `KEYCLOAK_IDENTITY_PROVIDER_CLIENT_SECRET`, and `KEYCLOAK_IDENTITY_PROVIDER_ISSUER`)
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id, client secret, and issuer to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "keycloak",
"purpose": "sso",
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
},
"issuer": {
"env": "YOUR_ISSUER_ENV_VAR"
}
}
]
}
```
</Step>
</Steps>
</Accordion>
### Microsoft Entra ID
[Auth.js Microsoft Entra ID Provider Docs](https://authjs.dev/getting-started/providers/microsoft-entra-id)
A Microsoft Entra ID connection can be used for [authentication](/docs/configuration/auth).
<Accordion title="instructions">
<Steps>
<Step title="Register an OAuth Application">
To begin, you must register an OAuth application in Microsoft Entra ID (formerly Azure Active Directory) to facilitate the identity provider connection.
Follow [this guide](https://learn.microsoft.com/en-us/entra/identity-platform/quickstart-register-app) by Microsoft to register an application.
When configuring your application:
- Under "Authentication", add a platform and select "Web"
- Set the redirect URI to `<sourcebot_url>/api/auth/callback/microsoft-entra-id` (ex. https://sourcebot.coolcorp.com/api/auth/callback/microsoft-entra-id)
- Under "Certificates & secrets", create a new client secret
The result of registering an application is a `CLIENT_ID` (Application ID), `CLIENT_SECRET`, and `TENANT_ID` which you'll use to construct the issuer URL.
</Step>
<Step title="Define environment variables">
To provide Sourcebot the client id, client secret, and issuer for your OAuth application you must set them as environment variables. These can be named whatever you like
(ex. `MICROSOFT_ENTRA_ID_IDENTITY_PROVIDER_CLIENT_ID`, `MICROSOFT_ENTRA_ID_IDENTITY_PROVIDER_CLIENT_SECRET`, and `MICROSOFT_ENTRA_ID_IDENTITY_PROVIDER_ISSUER`)
The issuer URL should be in the format: `https://login.microsoftonline.com/<TENANT_ID>/v2.0`
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id, client secret, and issuer to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "microsoft-entra-id",
"purpose": "sso",
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
},
"issuer": {
"env": "YOUR_ISSUER_ENV_VAR"
}
}
]
}
```
</Step>
</Steps>
</Accordion>
### Authentik
[Auth.js Authentik Provider Docs](https://authjs.dev/getting-started/providers/authentik)
An Authentik connection can be used for [authentication](/docs/configuration/auth).
<Accordion title="instructions">
<Steps>
<Step title="Create a OAuth2/OpenID Connect application">
To begin, you must create a OAuth2/OpenID Connect application in Authentik. For more information, see the [Authentik documentation](https://docs.goauthentik.io/add-secure-apps/applications/manage_apps/#create-an-application-and-provider-pair).
When configuring your application:
- Set the provider type to "OAuth2/OpenID Connect"
- Set the client type to "Confidential"
- Add `<sourcebot_url>/api/auth/callback/authentik` to the redirect URIs (ex. https://sourcebot.coolcorp.com/api/auth/callback/authentik)
After creating the application, open the application details to obtain the client id, client secret, and issuer URL (typically in the format `https://<authentik-domain>/application/o/<provider-slug>/`).
</Step>
<Step title="Define environment variables">
The client id, secret, and issuer URL are provided to Sourcebot via environment variables. These can be named whatever you like
(ex. `AUTHENTIK_IDENTITY_PROVIDER_CLIENT_ID`, `AUTHENTIK_IDENTITY_PROVIDER_CLIENT_SECRET`, and `AUTHENTIK_IDENTITY_PROVIDER_ISSUER`)
</Step>
<Step title="Define the identity provider config">
Create a `identityProvider` object in the [config file](/docs/configuration/config-file) with the following fields:
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "authentik",
"purpose": "sso",
"clientId": {
"env": "AUTHENTIK_IDENTITY_PROVIDER_CLIENT_ID"
},
"clientSecret": {
"env": "AUTHENTIK_IDENTITY_PROVIDER_CLIENT_SECRET"
},
"issuer": {
"env": "AUTHENTIK_IDENTITY_PROVIDER_ISSUER"
}
}
]
}
```
</Step>
</Steps>
</Accordion>

View file

@ -292,6 +292,7 @@ The OpenAI compatible provider allows you to use any model that is compatible wi
<Accordion title="Troubleshooting"> <Accordion title="Troubleshooting">
- When using [llama.cpp](https://github.com/ggml-org/llama.cpp), if you hit "Failed after 3 attempts. Last error: tools param requires --jinja flag", add the `--jinja` flag to your `llama-server` command. - When using [llama.cpp](https://github.com/ggml-org/llama.cpp), if you hit "Failed after 3 attempts. Last error: tools param requires --jinja flag", add the `--jinja` flag to your `llama-server` command.
- If you're seeing the LLM outputing reasoning tokens wrapped in XML tags (e.g., `<reasoning>`, `<thinking>`, etc.), you can configure the `reasoningTag` parameter to the name of the tag (without angle brackets). This parameter defaults to `think`.
</Accordion> </Accordion>
### OpenRouter ### OpenRouter

View file

@ -86,7 +86,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
Azure Devops Cloud requires you to provide a PAT in order to index your repositories. To learn how to create PAT, check out the [Azure Devops docs](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows). Azure Devops Cloud requires you to provide a PAT in order to index your repositories. To learn how to create PAT, check out the [Azure Devops docs](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows).
Sourcebot needs the `Read` access for the `Code` scope in order to find and clone your repos. Sourcebot needs the `Read` access for the `Code` scope in order to find and clone your repos.
Next, provide the access token via the `token` property, either as an environment variable or a secret: Next, provide the access [token](/docs/configuration/config-file#tokens) via an environment variable which is referenced in the `token` property:
<Tabs> <Tabs>
<Tab title="Environment Variable"> <Tab title="Environment Variable">
@ -113,28 +113,6 @@ Next, provide the access token via the `token` property, either as an environmen
ghcr.io/sourcebot-dev/sourcebot:latest ghcr.io/sourcebot-dev/sourcebot:latest
``` ```
</Tab> </Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs> </Tabs>
## Schema reference ## Schema reference

View file

@ -100,7 +100,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
Azure Devops Server requires you to provide a PAT in order to index your repositories. To learn how to create PAT, check out the [Azure Devops docs](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows). Azure Devops Server requires you to provide a PAT in order to index your repositories. To learn how to create PAT, check out the [Azure Devops docs](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows).
Sourcebot needs the `Read` access for the `Code` scope in order to find and clone your repos. Sourcebot needs the `Read` access for the `Code` scope in order to find and clone your repos.
Next, provide the access token via the `token` property, either as an environment variable or a secret: Next, provide the access [token](/docs/configuration/config-file#tokens) via an environment variable which is referenced in the `token` property:
<Tabs> <Tabs>
<Tab title="Environment Variable"> <Tab title="Environment Variable">
@ -127,28 +127,6 @@ Next, provide the access token via the `token` property, either as an environmen
ghcr.io/sourcebot-dev/sourcebot:latest ghcr.io/sourcebot-dev/sourcebot:latest
``` ```
</Tab> </Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "azuredevops",
"deploymentType": "server",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs> </Tabs>
## Schema reference ## Schema reference

View file

@ -78,7 +78,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
## Authenticating with Bitbucket Cloud ## Authenticating with Bitbucket Cloud
In order to index private repositories, you'll need to provide authentication credentials. You can do this using an `App Password` or an `Access Token` In order to index private repositories, you'll need to provide authentication credentials via a [token](/docs/configuration/config-file#tokens). You can do this using an `App Password` or an `Access Token`
<Tabs> <Tabs>
<Tab title="App Password"> <Tab title="App Password">

View file

@ -70,7 +70,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
## Authenticating with Bitbucket Data Center ## Authenticating with Bitbucket Data Center
In order to index private repositories, you'll need to provide an access token to Sourcebot. In order to index private repositories, you'll need to provide an access token to Sourcebot via a [token](/docs/configuration/config-file#tokens).
Create an access token for the desired scope (repo, project, or workspace). Visit the official [Bitbucket Data Center docs](https://confluence.atlassian.com/bitbucketserver/http-access-tokens-939515499.html) Create an access token for the desired scope (repo, project, or workspace). Visit the official [Bitbucket Data Center docs](https://confluence.atlassian.com/bitbucketserver/http-access-tokens-939515499.html)
for more info. for more info.

View file

@ -81,7 +81,7 @@ In order to index private repositories, you'll need to generate a Gitea access t
![Gitea Access token creation](/images/gitea_pat_creation.png) ![Gitea Access token creation](/images/gitea_pat_creation.png)
Next, provide the access token via the `token` property, either as an environment variable or a secret: Next, provide the access token via an environment variable [token](/docs/configuration/config-file#tokens) which is referenced in the `token` property:
<Tabs> <Tabs>
<Tab title="Environment Variable"> <Tab title="Environment Variable">
@ -107,27 +107,6 @@ Next, provide the access token via the `token` property, either as an environmen
ghcr.io/sourcebot-dev/sourcebot:latest ghcr.io/sourcebot-dev/sourcebot:latest
``` ```
</Tab> </Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "gitea",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs> </Tabs>
## Connecting to a custom Gitea ## Connecting to a custom Gitea

View file

@ -128,7 +128,7 @@ In order to index private repositories, you'll need to generate a access token a
</Accordion> </Accordion>
</AccordionGroup> </AccordionGroup>
Next, provide the access token via the `token` property, either as an environment variable or a secret: Next, provide the access token via an environment variable [token](/docs/configuration/config-file#tokens) which is referenced in the `token` property:
<Tabs> <Tabs>
<Tab title="Environment Variable"> <Tab title="Environment Variable">
@ -154,27 +154,6 @@ Next, provide the access token via the `token` property, either as an environmen
ghcr.io/sourcebot-dev/sourcebot:latest ghcr.io/sourcebot-dev/sourcebot:latest
``` ```
</Tab> </Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "github",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs> </Tabs>
## Connecting to a custom GitHub host ## Connecting to a custom GitHub host

View file

@ -116,7 +116,7 @@ In order to index private projects, you'll need to generate a GitLab Personal Ac
![GitLab PAT Scope](/images/gitlab_pat_scopes.png) ![GitLab PAT Scope](/images/gitlab_pat_scopes.png)
Next, provide the PAT via the `token` property, either as an environment variable or a secret: Next, provide the PAT via an environment variable [token](/docs/configuration/config-file#tokens) which is referenced in the `token` property:
<Tabs> <Tabs>
<Tab title="Environment Variable"> <Tab title="Environment Variable">
@ -142,27 +142,6 @@ Next, provide the PAT via the `token` property, either as an environment variabl
ghcr.io/sourcebot-dev/sourcebot:latest ghcr.io/sourcebot-dev/sourcebot:latest
``` ```
</Tab> </Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "gitlab",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs> </Tabs>
## Connecting to a custom GitLab host ## Connecting to a custom GitLab host

View file

@ -69,6 +69,26 @@ To learn more about how to create a connection for a specific code host, check o
<Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md).</Note> <Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md).</Note>
## Indexing Large Files
By default, Sourcebot will skip indexing files that are larger than 2MB or have more than 20,000 trigrams. You can configure this by setting the `maxFileSize` and `maxTrigramCount` [settings](/docs/configuration/config-file#settings).
These limits can be ignored for specific files by passing in a comma separated list of glob patterns matching file paths to the `ALWAYS_INDEX_FILE_PATTERNS` environment variable. For example:
```bash
# Always index all .sum and .lock files
ALWAYS_INDEX_FILE_PATTERNS=**/*.sum,**/*.lock
```
Files that have been skipped are assigned the `skipped` language. You can view a list of all skipped files by using the following query:
```
lang:skipped
```
## Indexing Binary Files
Binary files cannot be indexed by Sourcebot. See [#575](https://github.com/sourcebot-dev/sourcebot/issues/575) for more information.
## Schema reference ## Schema reference
--- ---

View file

@ -1,88 +0,0 @@
---
title: "Deployment guide"
---
import SupportedPlatforms from '/snippets/platform-support.mdx'
The following guide will walk you through the steps to deploy Sourcebot on your own infrastructure. Sourcebot is distributed as a [single docker container](/docs/overview#architecture) that can be deployed to a k8s cluster, a VM, or any platform that supports docker.
<Note>Hit an issue? Please let us know on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) or by [emailing us](mailto:team@sourcebot.dev).</Note>
<Steps>
<Step title="Requirements">
- Docker -> use [Docker Desktop](https://www.docker.com/products/docker-desktop/) on Mac or Windows.
</Step>
<Step title="Create a config.json">
Create a `config.json` file that tells Sourcebot which repositories to sync and index:
```bash wrap icon="terminal" Create example config
touch config.json
echo '{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"connections": {
// comments are supported
"starter-connection": {
"type": "github",
"repos": [
"sourcebot-dev/sourcebot"
]
}
}
}' > config.json
```
This config creates a single GitHub connection named `starter-connection` that specifies [Sourcebot](https://github.com/sourcebot-dev/sourcebot) as a repo to sync. [Learn more about the config file](/docs/configuration/config-file).
</Step>
<Step title="Launch your instance">
<Warning>If you're deploying Sourcebot behind a domain, you must set the [AUTH_URL](/docs/configuration/environment-variables) environment variable.</Warning>
In the same directory as `config.json`, run the following command to start your instance:
``` bash icon="terminal" Start the Sourcebot container
docker run \
-p 3000:3000 \
--pull=always \
--rm \
-v $(pwd):/data \
-e CONFIG_PATH=/data/config.json \
--name sourcebot \
ghcr.io/sourcebot-dev/sourcebot:latest
```
<Accordion title="Details">
**This command**:
- pulls the latest version of the `sourcebot` docker image.
- mounts the working directory to `/data` in the container to allow Sourcebot to persist data across restarts, and to access the `config.json`. In your local directory, you should see a `.sourcebot` folder created that contains all persistent data.
- runs any pending database migrations.
- starts up all services, including the webserver exposed on port 3000.
- reads `config.json` and starts syncing.
</Accordion>
</Step>
<Step title="Complete onboarding">
Navigate to `http://localhost:3000` and complete the onboarding flow.
</Step>
<Step title="Done">
You're all set! If you'd like to setup [Ask Sourcebot](/docs/features/ask/overview), configure a language model [provider](/docs/configuration/language-model-providers).
</Step>
</Steps>
## Next steps
---
<CardGroup cols={3}>
<Card title="Index your code" icon="code" href="/docs/connections/overview">
Learn how to index your code using Sourcebot
</Card>
<Card title="Language models" icon="brain" href="/docs/configuration/language-model-providers">
Learn how to configure language model providers to start using [Ask Sourcebot](/docs/features/ask/overview)
</Card>
<Card title="Authentication" icon="lock" href="/docs/configuration/auth/overview">
Learn more about how to setup SSO, email codes, and other authentication providers.
</Card>
</CardGroup>

View file

@ -0,0 +1,61 @@
---
title: "Docker Compose"
---
This guide will walk you through deploying Sourcebot locally or on a VM using Docker Compose. We will use the [docker-compose.yml](https://github.com/sourcebot-dev/sourcebot/blob/main/docker-compose.yml) file from the Sourcebot repository. This is the simplest way to get started with Sourcebot.
If you are looking to deploy onto Kubernetes, see the [Kubernetes (Helm)](/docs/deployment/k8s) guide.
## Get started
<Steps>
<Step title="Requirements">
- docker & docker compose. Use [Docker Desktop](https://www.docker.com/products/docker-desktop/) on Mac or Windows.
</Step>
<Step title="Obtain the Docker Compose file">
Download the [docker-compose.yml](https://github.com/sourcebot-dev/sourcebot/blob/main/docker-compose.yml) file from the Sourcebot repository.
```bash wrap icon="terminal"
curl -o docker-compose.yml https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/docker-compose.yml
```
</Step>
<Step title="Create a config.json">
In the same directory as the `docker-compose.yml` file, create a [configuration file](/docs/configuration/config-file). The configuration file is a JSON file that configures Sourcebot's behaviour, including what repositories to index, language model providers, auth providers, and more.
```bash wrap icon="terminal" Create example config
touch config.json
echo '{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
// Comments are supported.
// This config creates a single connection to GitHub.com that
// indexes the Sourcebot repository
"connections": {
"starter-connection": {
"type": "github",
"repos": [
"sourcebot-dev/sourcebot"
]
}
}
}' > config.json
```
</Step>
<Step title="Launch your instance">
Update the secrets in the `docker-compose.yml` and then run Sourcebot using:
```bash wrap icon="terminal"
docker compose up
```
</Step>
<Step title="Done">
You're all set! Navigate to [http://localhost:3000](http://localhost:3000) to access your Sourcebot instance.
</Step>
</Steps>
## Next steps

View file

@ -0,0 +1,4 @@
---
title: "Kubernetes (Helm)"
url: https://github.com/sourcebot-dev/sourcebot-helm-chart
---

View file

@ -10,7 +10,7 @@ codebase that the agent may fetch to perform the review.
This agent provides codebase-aware reviews for your PRs. For each diff, this agent fetches relevant context from Sourcebot and feeds it into an LLM for a detailed review of your changes. This agent provides codebase-aware reviews for your PRs. For each diff, this agent fetches relevant context from Sourcebot and feeds it into an LLM for a detailed review of your changes.
The AI Code Review Agent is [fair source](https://github.com/sourcebot-dev/sourcebot/tree/main/packages/web/src/features/agents/review-agent) and packaged in [Sourcebot](https://github.com/sourcebot-dev/sourcebot). To get started using this agent, [deploy Sourcebot](/docs/deployment-guide) The AI Code Review Agent is [fair source](https://github.com/sourcebot-dev/sourcebot/tree/main/packages/web/src/features/agents/review-agent) and packaged in [Sourcebot](https://github.com/sourcebot-dev/sourcebot). To get started using this agent, [deploy Sourcebot](/docs/deployment/docker-compose)
and then follow the configuration instructions below. and then follow the configuration instructions below.
![AI Code Review Agent Example](/images/review_agent_example.png) ![AI Code Review Agent Example](/images/review_agent_example.png)

View file

@ -14,7 +14,7 @@ follow code nav references, and provide an answer thats rich with inline cita
<Card title="Index repos" icon="book" href="/docs/connections/overview" horizontal="true"> <Card title="Index repos" icon="book" href="/docs/connections/overview" horizontal="true">
Learn how to index your repos so you can ask questions about them Learn how to index your repos so you can ask questions about them
</Card> </Card>
<Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true"> <Card title="Deployment guide" icon="server" href="/docs/deployment/docker-compose" horizontal="true">
Learn how to self-host Sourcebot in a few simple steps. Learn how to self-host Sourcebot in a few simple steps.
</Card> </Card>
<Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true"> <Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true">

View file

@ -21,6 +21,7 @@ import LicenseKeyRequired from '/snippets/license-key-required.mdx'
| **Go to definition** | Clicking the "go to definition" button in the popover or clicking the symbol name navigates to the symbol's definition. | | **Go to definition** | Clicking the "go to definition" button in the popover or clicking the symbol name navigates to the symbol's definition. |
| **Find references** | Clicking the "find all references" button in the popover lists all references in the explore panel. | | **Find references** | Clicking the "find all references" button in the popover lists all references in the explore panel. |
| **Explore panel** | Lists all references and definitions for the symbol selected in the popover. | | **Explore panel** | Lists all references and definitions for the symbol selected in the popover. |
| **Cross-repository navigation** | You can search across all repositories by clicking the globe icon in the explore panel. By default, references and definitions are scoped to the repository where the symbol is being resolved. |
## How does it work? ## How does it work?

View file

@ -9,7 +9,7 @@ The [Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP)
<Steps> <Steps>
<Step title="Launch Sourcebot"> <Step title="Launch Sourcebot">
Follow the [deployment guide](/docs/deployment-guide) to launch Sourcebot and get your code indexed. The host url of your instance (e.g., `http://localhost:3000`) is passed to the MCP server via the `SOURCEBOT_HOST` url. Follow the [deployment guides](/docs/deployment/docker-compose) to launch Sourcebot and get your code indexed. The host url of your instance (e.g., `http://localhost:3000`) is passed to the MCP server via the `SOURCEBOT_HOST` url.
If a host is not provided, then the server will fallback to using the demo instance hosted at https://demo.sourcebot.dev. You can see the list of repositories indexed [here](https://demo.sourcebot.dev/~/repos). Add additional repositories by [opening a PR](https://github.com/sourcebot-dev/sourcebot/blob/main/demo-site-config.json). If a host is not provided, then the server will fallback to using the demo instance hosted at https://demo.sourcebot.dev. You can see the list of repositories indexed [here](https://demo.sourcebot.dev/~/repos). Add additional repositories by [opening a PR](https://github.com/sourcebot-dev/sourcebot/blob/main/demo-site-config.json).
</Step> </Step>

View file

@ -1,21 +1,20 @@
--- ---
title: "Permission syncing" title: "Permission syncing"
sidebarTitle: "Permission syncing" sidebarTitle: "Permission syncing"
tag: "experimental"
--- ---
import LicenseKeyRequired from '/snippets/license-key-required.mdx' import LicenseKeyRequired from '/snippets/license-key-required.mdx'
import ExperimentalFeatureWarning from '/snippets/experimental-feature-warning.mdx'
<LicenseKeyRequired /> <LicenseKeyRequired />
<ExperimentalFeatureWarning />
# Overview # Overview
Permission syncing allows you to sync Access Permission Lists (ACLs) from a code host to Sourcebot. When configured, users signed into Sourcebot (via the code host's OAuth provider) will only be able to access repositories that they have access to on the code host. Practically, this means: Permission syncing allows you to sync Access Permission Lists (ACLs) from a code host to Sourcebot. When configured, users signed into Sourcebot will only be able to access repositories
that they have access to on the code host. Practically, this means:
- Code Search results will only include repositories that the user has access to. - Code Search results will only include repositories that the user has access to.
- Code navigation results will only include repositories that the user has access to. - Code navigation results will only include repositories that the user has access to.
- MCP results will only include results from repositories the user has access to.
- Ask Sourcebot (and the underlying LLM) will only have access to repositories that the user has access to. - Ask Sourcebot (and the underlying LLM) will only have access to repositories that the user has access to.
- File browsing is scoped to the repositories that the user has access to. - File browsing is scoped to the repositories that the user has access to.
@ -35,7 +34,7 @@ We are actively working on supporting more code hosts. If you'd like to see a sp
| Platform | Permission syncing | | Platform | Permission syncing |
|:----------|------------------------------| |:----------|------------------------------|
| [GitHub (GHEC & GHEC Server)](/docs/features/permission-syncing#github) | ✅ | | [GitHub (GHEC & GHEC Server)](/docs/features/permission-syncing#github) | ✅ |
| GitLab | 🛑 | | [GitLab (Self-managed & Cloud)](/docs/features/permission-syncing#gitlab) | ✅ |
| Bitbucket Cloud | 🛑 | | Bitbucket Cloud | 🛑 |
| Bitbucket Data Center | 🛑 | | Bitbucket Data Center | 🛑 |
| Gitea | 🛑 | | Gitea | 🛑 |
@ -46,7 +45,7 @@ We are actively working on supporting more code hosts. If you'd like to see a sp
## GitHub ## GitHub
Prerequisite: [Add GitHub as an OAuth provider](/docs/configuration/auth/providers#github). Prerequisite: Configure GitHub as an [external identity provider](/docs/configuration/idp).
Permission syncing works with **GitHub.com**, **GitHub Enterprise Cloud**, and **GitHub Enterprise Server**. For organization-owned repositories, users that have **read-only** access (or above) via the following methods will have their access synced to Sourcebot: Permission syncing works with **GitHub.com**, **GitHub Enterprise Cloud**, and **GitHub Enterprise Server**. For organization-owned repositories, users that have **read-only** access (or above) via the following methods will have their access synced to Sourcebot:
- Outside collaborators - Outside collaborators
@ -56,9 +55,21 @@ Permission syncing works with **GitHub.com**, **GitHub Enterprise Cloud**, and *
- Organization owners. - Organization owners.
**Notes:** **Notes:**
- A GitHub OAuth provider must be configured to (1) correlate a Sourcebot user with a GitHub user, and (2) to list repositories that the user has access to for [User driven syncing](/docs/features/permission-syncing#how-it-works). - A GitHub [external identity provider](/docs/configuration/idp) must be configured to (1) correlate a Sourcebot user with a GitHub user, and (2) to list repositories that the user has access to for [User driven syncing](/docs/features/permission-syncing#how-it-works).
- OAuth tokens must assume the `repo` scope in order to use the [List repositories for the authenticated user API](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repositories-for-the-authenticated-user) during [User driven syncing](/docs/features/permission-syncing#how-it-works). Sourcebot **will only** use this token for **reads**. - OAuth tokens must assume the `repo` scope in order to use the [List repositories for the authenticated user API](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repositories-for-the-authenticated-user) during [User driven syncing](/docs/features/permission-syncing#how-it-works). Sourcebot **will only** use this token for **reads**.
## GitLab
Prerequisite: Configure GitLab as an [external identity provider](/docs/configuration/idp).
Permission syncing works with **GitLab Self-managed** and **GitLab Cloud**. Users with **Guest** role or above with membership to a group or project will have their access synced to Sourcebot. Both direct and indirect membership to a group or project will be synced with Sourcebot. For more details, see the [GitLab docs](https://docs.gitlab.com/user/project/members/#membership-types).
**Notes:**
- A GitLab [external identity provider](/docs/configuration/idp) must be configured to (1) correlate a Sourcebot user with a GitLab user, and (2) to list repositories that the user has access to for [User driven syncing](/docs/features/permission-syncing#how-it-works).
- OAuth tokens require the `read_api` scope in order to use the [List projects for the authenticated user API](https://docs.gitlab.com/ee/api/projects.html#list-all-projects) during [User driven syncing](/docs/features/permission-syncing#how-it-works).
# How it works # How it works
Permission syncing works by periodically syncing ACLs from the code host(s) to Sourcebot to build an internal mapping between Users and Repositories. This mapping is hydrated in two directions: Permission syncing works by periodically syncing ACLs from the code host(s) to Sourcebot to build an internal mapping between Users and Repositories. This mapping is hydrated in two directions:

View file

@ -22,7 +22,7 @@ Search across all your repos/branches across any code host platform. Blazingly f
<Card title="Branches" icon="split" href="/docs/features/search/multi-branch-indexing" horizontal="true"> <Card title="Branches" icon="split" href="/docs/features/search/multi-branch-indexing" horizontal="true">
Learn how to index and search through your branches Learn how to index and search through your branches
</Card> </Card>
<Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true"> <Card title="Deployment guides" icon="server" href="/docs/deployment/docker-compose" horizontal="true">
Learn how to self-host Sourcebot in a few simple steps. Learn how to self-host Sourcebot in a few simple steps.
</Card> </Card>
<Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true"> <Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true">

View file

@ -4,32 +4,51 @@ title: Writing search queries
Sourcebot uses a powerful regex-based query language that enabled precise code search within large codebases. Sourcebot uses a powerful regex-based query language that enabled precise code search within large codebases.
## Syntax reference guide ## Syntax reference guide
Queries consist of space-separated regular expressions. Wrapping expressions in `""` combines them. By default, a file must have at least one match for each expression to be included. Queries consist of space-separated search patterns that are matched against file contents. A file must have at least one match for each expression to be included. Queries can optionally contain search filters to further refine the search results.
## Keyword search (default)
Keyword search matches search patterns exactly in file contents. Wrapping search patterns in `""` combines them as a single expression.
| Example | Explanation |
| :--- | :--- |
| `foo` | Match files containing the keyword `foo` |
| `foo bar` | Match files containing both `foo` **and** `bar` |
| `"foo bar"` | Match files containing the phrase `foo bar` |
| `"foo \"bar\""` | Match files containing `foo "bar"` exactly (escaped quotes) |
## Regex search
Toggle the regex button (`.*`) in the search bar to interpret search patterns as regular expressions.
| Example | Explanation | | Example | Explanation |
| :--- | :--- | | :--- | :--- |
| `foo` | Match files with regex `/foo/` | | `foo` | Match files with regex `/foo/` |
| `foo bar` | Match files with regex `/foo/` **and** `/bar/` | | `foo.*bar` | Match files with regex `/foo.*bar/` (foo followed by any characters, then bar) |
| `"foo bar"` | Match files with regex `/foo bar/` | | `^function\s+\w+` | Match files with regex `/^function\s+\w+/` (function at start of line, followed by whitespace and word characters) |
| `"foo bar"` | Match files with regex `/foo bar/`. Quotes are not matched. |
Multiple expressions can be or'd together with `or`, negated with `-`, or grouped with `()`. ## Search filters
| Example | Explanation | Search queries (keyword or regex) can include multiple search filters to further refine the search results. Some filters can be negated using the `-` prefix.
| :--- | :--- |
| `foo or bar` | Match files with regex `/foo/` **or** `/bar/` |
| `foo -bar` | Match files with regex `/foo/` but **not** `/bar/` |
| `foo (bar or baz)` | Match files with regex `/foo/` **and** either `/bar/` **or** `/baz/` |
Expressions can be prefixed with certain keywords to modify search behavior. Some keywords can be negated using the `-` prefix.
| Prefix | Description | Example | | Prefix | Description | Example |
| :--- | :--- | :--- | | :--- | :--- | :--- |
| `file:` | Filter results from filepaths that match the regex. By default all files are searched. | `file:README` - Filter results to filepaths that match regex `/README/`<br/>`file:"my file"` - Filter results to filepaths that match regex `/my file/`<br/>`-file:test\.ts$` - Ignore results from filepaths match regex `/test\.ts$/` | | `file:` | Filter results from filepaths that match the regex. By default all files are searched. | `file:README` - Filter results to filepaths that match regex `/README/`<br/>`file:"my file"` - Filter results to filepaths that match regex `/my file/`<br/>`-file:test\.ts$` - Ignore results from filepaths match regex `/test\.ts$/` |
| `repo:` | Filter results from repos that match the regex. By default all repos are searched. | `repo:linux` - Filter results to repos that match regex `/linux/`<br/>`-repo:^web/.*` - Ignore results from repos that match regex `/^web\/.*` | | `repo:` | Filter results from repos that match the regex. By default all repos are searched. | `repo:linux` - Filter results to repos that match regex `/linux/`<br/>`-repo:^web/.*` - Ignore results from repos that match regex `/^web\/.*/` |
| `rev:` | Filter results from a specific branch or tag. By default **only** the default branch is searched. | `rev:beta` - Filter results to branches that match regex `/beta/` | | `rev:` | Filter results from a specific branch or tag. By default **only** the default branch is searched. | `rev:beta` - Filter results to branches that match regex `/beta/` |
| `lang:` | Filter results by language (as defined by [linguist](https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml)). By default all languages are searched. | `lang:TypeScript` - Filter results to TypeScript files<br/>`-lang:YAML` - Ignore results from YAML files | | `lang:` | Filter results by language (as defined by [linguist](https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml)). By default all languages are searched. | `lang:TypeScript` - Filter results to TypeScript files<br/>`-lang:YAML` - Ignore results from YAML files |
| `sym:` | Match symbol definitions created by [universal ctags](https://ctags.io/) at index time. | `sym:\bmain\b` - Filter results to symbols that match regex `/\bmain\b/` | | `sym:` | Match symbol definitions created by [universal ctags](https://ctags.io/) at index time. | `sym:\bmain\b` - Filter results to symbols that match regex `/\bmain\b/` |
| `context:` | Filter results to a predefined [search context](/docs/features/search/search-contexts). | `context:web` - Filter results to the web context<br/>`-context:pipelines` - Ignore results from the pipelines context | | `context:` | Filter results to a predefined [search context](/docs/features/search/search-contexts). | `context:web` - Filter results to the web context<br/>`-context:pipelines` - Ignore results from the pipelines context |
## Boolean operators & grouping
By default, space-separated expressions are and'd together. Using the `or` keyword as well as parentheses `()` can be used to create more complex boolean logic. Parentheses can be negated using the `-` prefix.
| Example | Explanation |
| :--- | :--- |
| `foo or bar` | Match files containing `foo` **or** `bar` |
| `foo (bar or baz)` | Match files containing `foo` **and** either `bar` **or** `baz`. |
| `-(foo) bar` | Match files containing `bar` **and not** `foo`. |

View file

@ -7,7 +7,7 @@ sidebarTitle: License key
If you'd like a trial license, [reach out](https://www.sourcebot.dev/contact) and we'll send one over within 24 hours If you'd like a trial license, [reach out](https://www.sourcebot.dev/contact) and we'll send one over within 24 hours
</Note> </Note>
All core Sourcebot features are available [FSL licensed](https://github.com/sourcebot-dev/sourcebot/blob/main/LICENSE.md#functional-source-license-version-11-alv2-future-license) without any limits. Some additional features require a license key. See the [pricing page](https://www.sourcebot.dev/pricing) for more details. All core Sourcebot features are available under the [FSL license](https://github.com/sourcebot-dev/sourcebot/blob/main/LICENSE.md#functional-source-license-version-11-alv2-future-license). Some additional features require a license key. See the [pricing page](https://www.sourcebot.dev/pricing) for more details.
## Activating a license key ## Activating a license key
@ -25,7 +25,7 @@ docker run \
## Feature availability ## Feature availability
--- ---
| Feature | OSS | Licensed | | Feature | [FSL](https://github.com/sourcebot-dev/sourcebot/blob/main/LICENSE.md#functional-source-license-version-11-alv2-future-license) | [Enterprise](https://github.com/sourcebot-dev/sourcebot/blob/main/ee/LICENSE) |
|:---------|:-----|:----------| |:---------|:-----|:----------|
| [Search](/docs/features/search/syntax-reference) | ✅ | ✅ | | [Search](/docs/features/search/syntax-reference) | ✅ | ✅ |
| [Full code host support](/docs/connections/overview) | ✅ | ✅ | | [Full code host support](/docs/connections/overview) | ✅ | ✅ |
@ -34,6 +34,7 @@ docker run \
| [Login with credentials](/docs/configuration/auth/overview) | ✅ | ✅ | | [Login with credentials](/docs/configuration/auth/overview) | ✅ | ✅ |
| [Login with email codes](/docs/configuration/auth/overview) | ✅ | ✅ | | [Login with email codes](/docs/configuration/auth/overview) | ✅ | ✅ |
| [Login with SSO](/docs/configuration/auth/overview#enterprise-authentication-providers) | 🛑 | ✅ | | [Login with SSO](/docs/configuration/auth/overview#enterprise-authentication-providers) | 🛑 | ✅ |
| [Permission syncing](/docs/features/permission-syncing) | 🛑 | ✅ |
| [Code navigation](/docs/features/code-navigation) | 🛑 | ✅ | | [Code navigation](/docs/features/code-navigation) | 🛑 | ✅ |
| [Search contexts](/docs/features/search/search-contexts) | 🛑 | ✅ | | [Search contexts](/docs/features/search/search-contexts) | 🛑 | ✅ |
| [Audit logs](/docs/configuration/audit-logs) | 🛑 | ✅ | | [Audit logs](/docs/configuration/audit-logs) | 🛑 | ✅ |

View file

@ -9,7 +9,7 @@ title: "Overview"
- [MCP](/docs/features/mcp-server): Enrich agent context windows with code across your organization - [MCP](/docs/features/mcp-server): Enrich agent context windows with code across your organization
<CardGroup> <CardGroup>
<Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true"> <Card title="Deployment guides" icon="server" href="/docs/deployment/docker-compose" horizontal="true">
Learn how to self-host Sourcebot in a few simple steps. Learn how to self-host Sourcebot in a few simple steps.
</Card> </Card>
<Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true"> <Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true">
@ -162,7 +162,7 @@ Sourcebot is designed to be easily self-hosted, allowing you to deploy it onto y
--- ---
<CardGroup cols={2}> <CardGroup cols={2}>
<Card horizontal title="Deployment guide ->" href="/docs/deployment-guide" /> <Card horizontal title="Deployment guides ->" href="/docs/deployment/docker-compose" />
<Card horizontal title="Connecting your code ->" href="/docs/connections/overview" /> <Card horizontal title="Connecting your code ->" href="/docs/connections/overview" />
<Card horizontal title="Search syntax reference ->" href="/docs/features/search/syntax-reference" /> <Card horizontal title="Search syntax reference ->" href="/docs/features/search/syntax-reference" />
<Card horizontal title="Code navigation overview ->" href="/docs/features/code-navigation" /> <Card horizontal title="Code navigation overview ->" href="/docs/features/code-navigation" />

View file

@ -78,7 +78,7 @@ If your deployment is dependent on these features, please [reach out](https://gi
After updating your configuration file, restart your Sourcebot deployment to pick up the new changes. After updating your configuration file, restart your Sourcebot deployment to pick up the new changes.
</Step> </Step>
<Step title="You're done!"> <Step title="You're done!">
Congrats, you've successfully migrated to v3! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/6Fhp27x7Pb) or on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose). Congrats, you've successfully migrated to v3! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/HDScTs3ptP) or on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose).
</Step> </Step>
</Steps> </Steps>
@ -90,4 +90,4 @@ Some things to check:
- Make sure you have a name for each `connection`, and that the name only contains letters, digits, hyphens, or underscores - Make sure you have a name for each `connection`, and that the name only contains letters, digits, hyphens, or underscores
- Make sure each `connection` has a `type` field with a valid value (`gitlab`, `github`, `gitea`, `gerrit`) - Make sure each `connection` has a `type` field with a valid value (`gitlab`, `github`, `gitea`, `gerrit`)
Having troubles migrating from v2 to v3? Reach out to us on [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help Having troubles migrating from v2 to v3? Reach out to us on [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help

View file

@ -40,7 +40,7 @@ Please note that the following features are no longer supported in v4:
</Step> </Step>
<Step title="You're done!"> <Step title="You're done!">
Congrats, you've successfully migrated to v4! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) Congrats, you've successfully migrated to v4! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose)
</Step> </Step>
</Steps> </Steps>
@ -58,4 +58,4 @@ to finish upgrading to v4 in single-tenant mode.
- If you're hitting issues with signing into your Sourcebot instance, make sure you're setting `AUTH_URL` correctly to your deployment domain (ex. `https://sourcebot.yourcompany.com`) - If you're hitting issues with signing into your Sourcebot instance, make sure you're setting `AUTH_URL` correctly to your deployment domain (ex. `https://sourcebot.yourcompany.com`)
Having troubles migrating from v3 to v4? Reach out to us on [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help Having troubles migrating from v3 to v4? Reach out to us on [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help

View file

@ -24,27 +24,4 @@
ghcr.io/sourcebot-dev/sourcebot:latest ghcr.io/sourcebot-dev/sourcebot:latest
``` ```
</Tab> </Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your access token:
![](/images/secrets_list.png)
2. Add the `token` and `user` (username associated with the app password you created) properties to your connection config:
```json
{
"type": "bitbucket",
"deploymentType": "cloud",
"user": "myusername",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs> </Tabs>

View file

@ -22,25 +22,4 @@
ghcr.io/sourcebot-dev/sourcebot:latest ghcr.io/sourcebot-dev/sourcebot:latest
``` ```
</Tab> </Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "bitbucket",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs> </Tabs>

View file

@ -77,7 +77,6 @@
"token": { "token": {
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [ "examples": [
"secret-token",
{ {
"env": "ENV_VAR_CONTAINING_TOKEN" "env": "ENV_VAR_CONTAINING_TOKEN"
} }
@ -274,7 +273,6 @@
"token": { "token": {
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
"secret-token",
{ {
"env": "ENV_VAR_CONTAINING_TOKEN" "env": "ENV_VAR_CONTAINING_TOKEN"
} }
@ -465,7 +463,6 @@
"token": { "token": {
"description": "An access token.", "description": "An access token.",
"examples": [ "examples": [
"secret-token",
{ {
"env": "ENV_VAR_CONTAINING_TOKEN" "env": "ENV_VAR_CONTAINING_TOKEN"
} }
@ -779,7 +776,6 @@
"token": { "token": {
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [ "examples": [
"secret-token",
{ {
"env": "ENV_VAR_CONTAINING_TOKEN" "env": "ENV_VAR_CONTAINING_TOKEN"
} }
@ -976,7 +972,6 @@
"token": { "token": {
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
"secret-token",
{ {
"env": "ENV_VAR_CONTAINING_TOKEN" "env": "ENV_VAR_CONTAINING_TOKEN"
} }
@ -1167,7 +1162,6 @@
"token": { "token": {
"description": "An access token.", "description": "An access token.",
"examples": [ "examples": [
"secret-token",
{ {
"env": "ENV_VAR_CONTAINING_TOKEN" "env": "ENV_VAR_CONTAINING_TOKEN"
} }
@ -1563,7 +1557,6 @@
"token": { "token": {
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [ "examples": [
"secret-token",
{ {
"env": "ENV_VAR_CONTAINING_TOKEN" "env": "ENV_VAR_CONTAINING_TOKEN"
} }
@ -1760,7 +1753,6 @@
"token": { "token": {
"description": "An authentication token.", "description": "An authentication token.",
"examples": [ "examples": [
"secret-token",
{ {
"env": "ENV_VAR_CONTAINING_TOKEN" "env": "ENV_VAR_CONTAINING_TOKEN"
} }
@ -1951,7 +1943,6 @@
"token": { "token": {
"description": "An access token.", "description": "An access token.",
"examples": [ "examples": [
"secret-token",
{ {
"env": "ENV_VAR_CONTAINING_TOKEN" "env": "ENV_VAR_CONTAINING_TOKEN"
} }

View file

@ -8,7 +8,7 @@
"type": "object", "type": "object",
"properties": { "properties": {
"type": { "type": {
"const": "githubApp", "const": "github",
"description": "GitHub App Configuration" "description": "GitHub App Configuration"
}, },
"deploymentHostname": { "deploymentHostname": {
@ -31,26 +31,26 @@
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }
@ -70,7 +70,7 @@
"type": "object", "type": "object",
"properties": { "properties": {
"type": { "type": {
"const": "githubApp", "const": "github",
"description": "GitHub App Configuration" "description": "GitHub App Configuration"
}, },
"deploymentHostname": { "deploymentHostname": {
@ -93,26 +93,26 @@
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }

View file

@ -11,35 +11,30 @@
}, },
"token": { "token": {
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [ "anyOf": [
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }

View file

@ -15,35 +15,30 @@
}, },
"token": { "token": {
"description": "An authentication token.", "description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [ "anyOf": [
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }

View file

@ -15,35 +15,30 @@
}, },
"token": { "token": {
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [ "anyOf": [
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }
@ -228,35 +223,30 @@
}, },
"token": { "token": {
"description": "An authentication token.", "description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [ "anyOf": [
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }
@ -435,35 +425,30 @@
}, },
"token": { "token": {
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [ "anyOf": [
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }
@ -707,35 +692,30 @@
}, },
"token": { "token": {
"description": "An authentication token.", "description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [ "anyOf": [
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }
@ -880,35 +860,30 @@
}, },
"token": { "token": {
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [ "anyOf": [
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }

View file

@ -0,0 +1,115 @@
{/* THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! */}
```json
{
"type": "object",
"description": "Environment variable overrides.",
"title": "EnvironmentOverrides",
"not": {
"$comment": "List of environment variables that are not allowed to be overridden.",
"anyOf": [
{
"required": [
"CONFIG_PATH"
]
}
]
},
"patternProperties": {
"^[a-zA-Z0-9_-]+$": {
"oneOf": [
{
"type": "object",
"properties": {
"type": {
"const": "token"
},
"value": {
"anyOf": [
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"googleCloudSecret": {
"type": "string",
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"googleCloudSecret"
],
"additionalProperties": false
}
]
}
},
"required": [
"type",
"value"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"type": {
"const": "string"
},
"value": {
"type": "string"
}
},
"required": [
"type",
"value"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"type": {
"const": "number"
},
"value": {
"type": "number"
}
},
"required": [
"type",
"value"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"type": {
"const": "boolean"
},
"value": {
"type": "boolean"
}
},
"required": [
"type",
"value"
],
"additionalProperties": false
}
]
}
}
}
```

View file

@ -11,35 +11,30 @@
}, },
"token": { "token": {
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [ "anyOf": [
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }

View file

@ -11,35 +11,30 @@
}, },
"token": { "token": {
"description": "A Personal Access Token (PAT).", "description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [ "anyOf": [
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }

View file

@ -1,76 +0,0 @@
{/* THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! */}
```json
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"title": "GithubAppConfig",
"properties": {
"type": {
"const": "githubApp",
"description": "GitHub App Configuration"
},
"deploymentHostname": {
"type": "string",
"format": "hostname",
"default": "github.com",
"description": "The hostname of the GitHub App deployment.",
"examples": [
"github.com",
"github.example.com"
]
},
"id": {
"type": "string",
"description": "The ID of the GitHub App."
},
"privateKey": {
"description": "The private key of the GitHub App.",
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"type": "string",
"description": "The name of the secret that contains the token."
}
},
"required": [
"secret"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
}
},
"required": [
"env"
],
"additionalProperties": false
}
]
}
},
"required": [
"type",
"id"
],
"oneOf": [
{
"required": [
"privateKey"
]
},
{
"required": [
"privateKeyPath"
]
}
],
"additionalProperties": false
}
```

View file

@ -11,35 +11,30 @@
}, },
"token": { "token": {
"description": "An authentication token.", "description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [ "anyOf": [
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -9,26 +9,26 @@
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }
@ -89,26 +89,26 @@
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }
@ -133,26 +133,26 @@
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"secret": { "env": {
"type": "string", "type": "string",
"description": "The name of the secret that contains the token." "description": "The name of the environment variable that contains the token."
} }
}, },
"required": [ "required": [
"secret" "env"
], ],
"additionalProperties": false "additionalProperties": false
}, },
{ {
"type": "object", "type": "object",
"properties": { "properties": {
"env": { "googleCloudSecret": {
"type": "string", "type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs." "description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
} }
}, },
"required": [ "required": [
"env" "googleCloudSecret"
], ],
"additionalProperties": false "additionalProperties": false
} }

View file

@ -1,36 +1,71 @@
#!/bin/sh #!/bin/sh
# Exit immediately if a command fails
set -e set -e
# Disable auto-exporting of variables
set +a
# Check if DATABASE_URL is not set # Detect if running as root
if [ -z "$DATABASE_URL" ]; then IS_ROOT=false
# Check if the individual database variables are set and construct the URL if [ "$(id -u)" -eq 0 ]; then
if [ -n "$DATABASE_HOST" ] && [ -n "$DATABASE_USERNAME" ] && [ -n "$DATABASE_PASSWORD" ] && [ -n "$DATABASE_NAME" ]; then IS_ROOT=true
DATABASE_URL="postgresql://${DATABASE_USERNAME}:${DATABASE_PASSWORD}@${DATABASE_HOST}/${DATABASE_NAME}" fi
if [ -n "$DATABASE_ARGS" ]; then if [ "$IS_ROOT" = "true" ]; then
DATABASE_URL="${DATABASE_URL}?$DATABASE_ARGS" echo -e "\e[34m[Info] Running as root user.\e[0m"
fi else
echo -e "\e[34m[Info] Running as non-root user.\e[0m"
fi
export DATABASE_URL # If a CONFIG_PATH is set, resolve the environment overrides from the config file.
# The overrides will be written into variables scopped to the current shell. This is
# required in case one of the variables used in this entrypoint is overriden (e.g.,
# DATABASE_URL, REDIS_URL, etc.)
if [ -n "$CONFIG_PATH" ]; then
echo -e "\e[34m[Info] Resolving environment overrides from $CONFIG_PATH...\e[0m"
set +e # Disable exist on error so we can capture EXIT_CODE
OVERRIDES_OUTPUT=$(SKIP_ENV_VALIDATION=1 yarn tool:resolve-env-overrides 2>&1)
EXIT_CODE=$?
set -e # Re-enable exit on error
if [ $EXIT_CODE -eq 0 ]; then
eval "$OVERRIDES_OUTPUT"
else else
# Otherwise, fallback to a default value echo -e "\e[31m[Error] Failed to resolve environment overrides.\e[0m"
DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot" echo "$OVERRIDES_OUTPUT"
export DATABASE_URL exit 1
fi fi
fi fi
if [ "$DATABASE_URL" = "postgresql://postgres@localhost:5432/sourcebot" ]; then # Descontruct the database URL from the individual variables if DATABASE_URL is not set
DATABASE_EMBEDDED="true" if [ -z "$DATABASE_URL" ] && [ -n "$DATABASE_HOST" ] && [ -n "$DATABASE_USERNAME" ] && [ -n "$DATABASE_PASSWORD" ] && [ -n "$DATABASE_NAME" ]; then
DATABASE_URL="postgresql://${DATABASE_USERNAME}:${DATABASE_PASSWORD}@${DATABASE_HOST}/${DATABASE_NAME}"
if [ -n "$DATABASE_ARGS" ]; then
DATABASE_URL="${DATABASE_URL}?$DATABASE_ARGS"
fi
fi fi
if [ -z "$DATABASE_URL" ]; then
echo -e "\e[34m[Info] DATABASE_URL is not set. Using embeded database.\e[0m"
export DATABASE_EMBEDDED="true"
export DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
else
export DATABASE_EMBEDDED="false"
fi
if [ -z "$REDIS_URL" ]; then
echo -e "\e[34m[Info] REDIS_URL is not set. Using embeded redis.\e[0m"
export REDIS_EMBEDDED="true"
export REDIS_URL="redis://localhost:6379"
else
export REDIS_EMBEDDED="false"
fi
echo -e "\e[34m[Info] Sourcebot version: $NEXT_PUBLIC_SOURCEBOT_VERSION\e[0m" echo -e "\e[34m[Info] Sourcebot version: $NEXT_PUBLIC_SOURCEBOT_VERSION\e[0m"
# If we don't have a PostHog key, then we need to disable telemetry.
if [ -z "$NEXT_PUBLIC_POSTHOG_PAPIK" ]; then
echo -e "\e[33m[Warning] NEXT_PUBLIC_POSTHOG_PAPIK was not set. Setting SOURCEBOT_TELEMETRY_DISABLED.\e[0m"
export SOURCEBOT_TELEMETRY_DISABLED=true
fi
if [ -n "$SOURCEBOT_TELEMETRY_DISABLED" ]; then if [ -n "$SOURCEBOT_TELEMETRY_DISABLED" ]; then
# Validate that SOURCEBOT_TELEMETRY_DISABLED is either "true" or "false" # Validate that SOURCEBOT_TELEMETRY_DISABLED is either "true" or "false"
if [ "$SOURCEBOT_TELEMETRY_DISABLED" != "true" ] && [ "$SOURCEBOT_TELEMETRY_DISABLED" != "false" ]; then if [ "$SOURCEBOT_TELEMETRY_DISABLED" != "true" ] && [ "$SOURCEBOT_TELEMETRY_DISABLED" != "false" ]; then
@ -54,12 +89,17 @@ fi
# Check if DATABASE_DATA_DIR exists, if not initialize it # Check if DATABASE_DATA_DIR exists, if not initialize it
if [ "$DATABASE_EMBEDDED" = "true" ] && [ ! -d "$DATABASE_DATA_DIR" ]; then if [ "$DATABASE_EMBEDDED" = "true" ] && [ ! -d "$DATABASE_DATA_DIR" ]; then
echo -e "\e[34m[Info] Initializing database at $DATABASE_DATA_DIR...\e[0m" echo -e "\e[34m[Info] Initializing database at $DATABASE_DATA_DIR...\e[0m"
mkdir -p $DATABASE_DATA_DIR && chown -R postgres:postgres "$DATABASE_DATA_DIR" mkdir -p $DATABASE_DATA_DIR
su postgres -c "initdb -D $DATABASE_DATA_DIR" if [ "$IS_ROOT" = "true" ]; then
chown -R postgres:postgres "$DATABASE_DATA_DIR"
su postgres -c "initdb -D $DATABASE_DATA_DIR"
else
initdb -D "$DATABASE_DATA_DIR" -U postgres
fi
fi fi
# Create the redis data directory if it doesn't exist # Create the redis data directory if it doesn't exist
if [ ! -d "$REDIS_DATA_DIR" ]; then if [ "$REDIS_EMBEDDED" = "true" ] && [ ! -d "$REDIS_DATA_DIR" ]; then
mkdir -p $REDIS_DATA_DIR mkdir -p $REDIS_DATA_DIR
fi fi
@ -113,7 +153,7 @@ if [ ! -f "$FIRST_RUN_FILE" ]; then
# (if telemetry is enabled) # (if telemetry is enabled)
if [ "$SOURCEBOT_TELEMETRY_DISABLED" = "false" ]; then if [ "$SOURCEBOT_TELEMETRY_DISABLED" = "false" ]; then
if ! ( curl -L --output /dev/null --silent --fail --header "Content-Type: application/json" -d '{ if ! ( curl -L --output /dev/null --silent --fail --header "Content-Type: application/json" -d '{
"api_key": "'"$NEXT_PUBLIC_POSTHOG_PAPIK"'", "api_key": "'"$POSTHOG_PAPIK"'",
"event": "install", "event": "install",
"distinct_id": "'"$SOURCEBOT_INSTALL_ID"'", "distinct_id": "'"$SOURCEBOT_INSTALL_ID"'",
"properties": { "properties": {
@ -133,7 +173,7 @@ else
if [ "$SOURCEBOT_TELEMETRY_DISABLED" = "false" ]; then if [ "$SOURCEBOT_TELEMETRY_DISABLED" = "false" ]; then
if ! ( curl -L --output /dev/null --silent --fail --header "Content-Type: application/json" -d '{ if ! ( curl -L --output /dev/null --silent --fail --header "Content-Type: application/json" -d '{
"api_key": "'"$NEXT_PUBLIC_POSTHOG_PAPIK"'", "api_key": "'"$POSTHOG_PAPIK"'",
"event": "upgrade", "event": "upgrade",
"distinct_id": "'"$SOURCEBOT_INSTALL_ID"'", "distinct_id": "'"$SOURCEBOT_INSTALL_ID"'",
"properties": { "properties": {
@ -149,16 +189,33 @@ fi
echo "{\"version\": \"$NEXT_PUBLIC_SOURCEBOT_VERSION\", \"install_id\": \"$SOURCEBOT_INSTALL_ID\"}" > "$FIRST_RUN_FILE" echo "{\"version\": \"$NEXT_PUBLIC_SOURCEBOT_VERSION\", \"install_id\": \"$SOURCEBOT_INSTALL_ID\"}" > "$FIRST_RUN_FILE"
# Start the database and wait for it to be ready before starting any other service # Start the database and wait for it to be ready before starting any other service
if [ "$DATABASE_EMBEDDED" = "true" ]; then if [ "$DATABASE_EMBEDDED" = "true" ]; then
su postgres -c "postgres -D $DATABASE_DATA_DIR" & if [ "$IS_ROOT" = "true" ]; then
su postgres -c "postgres -D $DATABASE_DATA_DIR" &
else
postgres -D "$DATABASE_DATA_DIR" &
fi
until pg_isready -h localhost -p 5432 -U postgres; do until pg_isready -h localhost -p 5432 -U postgres; do
echo -e "\e[34m[Info] Waiting for the database to be ready...\e[0m" echo -e "\e[34m[Info] Waiting for the database to be ready...\e[0m"
sleep 1 sleep 1
done
# Check if the database already exists, and create it if it dne # As postgres runs in the background, we must check if it is still
# running, otherwise the "until" loop will be running indefinitely.
if ! pgrep -x "postgres" > /dev/null; then
echo "postgres failed to run"
exit 1
fi
done
if [ "$IS_ROOT" = "false" ]; then
# Running as non-root we need to ensure the postgres account is created.
psql -U postgres -tc "SELECT 1 FROM pg_roles WHERE rolname='postgres'" | grep -q 1 \
|| createuser postgres -s
fi
# Check if the database already exists, and create it if it doesn't exist
EXISTING_DB=$(psql -U postgres -tAc "SELECT 1 FROM pg_database WHERE datname = 'sourcebot'") EXISTING_DB=$(psql -U postgres -tAc "SELECT 1 FROM pg_database WHERE datname = 'sourcebot'")
if [ "$EXISTING_DB" = "1" ]; then if [ "$EXISTING_DB" = "1" ]; then
@ -171,9 +228,9 @@ fi
# Run a Database migration # Run a Database migration
echo -e "\e[34m[Info] Running database migration...\e[0m" echo -e "\e[34m[Info] Running database migration...\e[0m"
yarn workspace @sourcebot/db prisma:migrate:prod DATABASE_URL="$DATABASE_URL" yarn workspace @sourcebot/db prisma:migrate:prod
# Create the log directory # Create the log directory if it doesn't exist
mkdir -p /var/log/sourcebot mkdir -p /var/log/sourcebot
# Run supervisord # Run supervisord

View file

@ -4,8 +4,8 @@
"packages/*" "packages/*"
], ],
"scripts": { "scripts": {
"build": "cross-env SKIP_ENV_VALIDATION=1 yarn workspaces foreach -A run build", "build": "cross-env SKIP_ENV_VALIDATION=1 yarn workspaces foreach --all --topological run build",
"test": "yarn workspaces foreach -A run test", "test": "yarn workspaces foreach --all --topological run test",
"dev": "concurrently --kill-others --names \"zoekt,worker,web,mcp,schemas\" 'yarn dev:zoekt' 'yarn dev:backend' 'yarn dev:web' 'yarn watch:mcp' 'yarn watch:schemas'", "dev": "concurrently --kill-others --names \"zoekt,worker,web,mcp,schemas\" 'yarn dev:zoekt' 'yarn dev:backend' 'yarn dev:web' 'yarn watch:mcp' 'yarn watch:schemas'",
"with-env": "cross-env PATH=\"$PWD/bin:$PATH\" dotenv -e .env.development -c --", "with-env": "cross-env PATH=\"$PWD/bin:$PATH\" dotenv -e .env.development -c --",
"dev:zoekt": "yarn with-env zoekt-webserver -index .sourcebot/index -rpc", "dev:zoekt": "yarn with-env zoekt-webserver -index .sourcebot/index -rpc",
@ -18,7 +18,7 @@
"dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio", "dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio",
"dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset", "dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset",
"dev:prisma:db:push": "yarn with-env yarn workspace @sourcebot/db prisma:db:push", "dev:prisma:db:push": "yarn with-env yarn workspace @sourcebot/db prisma:db:push",
"build:deps": "yarn workspaces foreach -R --from '{@sourcebot/schemas,@sourcebot/error,@sourcebot/crypto,@sourcebot/db,@sourcebot/shared}' run build" "build:deps": "yarn workspaces foreach --recursive --topological --from '{@sourcebot/schemas,@sourcebot/db,@sourcebot/shared,@sourcebot/query-language}' run build"
}, },
"devDependencies": { "devDependencies": {
"concurrently": "^9.2.1", "concurrently": "^9.2.1",
@ -27,6 +27,7 @@
}, },
"packageManager": "yarn@4.7.0", "packageManager": "yarn@4.7.0",
"resolutions": { "resolutions": {
"prettier": "3.5.3" "prettier": "3.5.3",
"@lezer/common": "1.3.0"
} }
} }

View file

@ -29,13 +29,9 @@
"@sentry/cli": "^2.42.2", "@sentry/cli": "^2.42.2",
"@sentry/node": "^9.3.0", "@sentry/node": "^9.3.0",
"@sentry/profiling-node": "^9.3.0", "@sentry/profiling-node": "^9.3.0",
"@sourcebot/crypto": "workspace:*",
"@sourcebot/db": "workspace:*", "@sourcebot/db": "workspace:*",
"@sourcebot/error": "workspace:*",
"@sourcebot/logger": "workspace:*",
"@sourcebot/schemas": "workspace:*", "@sourcebot/schemas": "workspace:*",
"@sourcebot/shared": "workspace:*", "@sourcebot/shared": "workspace:*",
"@t3-oss/env-core": "^0.12.0",
"@types/express": "^5.0.0", "@types/express": "^5.0.0",
"argparse": "^2.0.1", "argparse": "^2.0.1",
"azure-devops-node-api": "^15.1.1", "azure-devops-node-api": "^15.1.1",
@ -44,6 +40,8 @@
"cross-fetch": "^4.0.0", "cross-fetch": "^4.0.0",
"dotenv": "^16.4.5", "dotenv": "^16.4.5",
"express": "^4.21.2", "express": "^4.21.2",
"express-async-errors": "^3.1.1",
"fast-deep-equal": "^3.1.3",
"git-url-parse": "^16.1.0", "git-url-parse": "^16.1.0",
"gitea-js": "^1.22.0", "gitea-js": "^1.22.0",
"glob": "^11.0.0", "glob": "^11.0.0",
@ -51,9 +49,10 @@
"ioredis": "^5.4.2", "ioredis": "^5.4.2",
"lowdb": "^7.0.1", "lowdb": "^7.0.1",
"micromatch": "^4.0.8", "micromatch": "^4.0.8",
"p-limit": "^7.2.0",
"posthog-node": "^4.2.1", "posthog-node": "^4.2.1",
"prom-client": "^15.1.3", "prom-client": "^15.1.3",
"simple-git": "^3.27.0", "simple-git": "^3.27.0",
"zod": "^3.24.3" "zod": "^3.25.74"
} }
} }

103
packages/backend/src/api.ts Normal file
View file

@ -0,0 +1,103 @@
import { PrismaClient, RepoIndexingJobType } from '@sourcebot/db';
import { createLogger } from '@sourcebot/shared';
import express, { Request, Response } from 'express';
import 'express-async-errors';
import * as http from "http";
import z from 'zod';
import { ConnectionManager } from './connectionManager.js';
import { PromClient } from './promClient.js';
import { RepoIndexManager } from './repoIndexManager.js';
const logger = createLogger('api');
const PORT = 3060;
export class Api {
private server: http.Server;
constructor(
promClient: PromClient,
private prisma: PrismaClient,
private connectionManager: ConnectionManager,
private repoIndexManager: RepoIndexManager,
) {
const app = express();
app.use(express.json());
app.use(express.urlencoded({ extended: true }));
// Prometheus metrics endpoint
app.use('/metrics', async (_req: Request, res: Response) => {
res.set('Content-Type', promClient.registry.contentType);
const metrics = await promClient.registry.metrics();
res.end(metrics);
});
app.post('/api/sync-connection', this.syncConnection.bind(this));
app.post('/api/index-repo', this.indexRepo.bind(this));
this.server = app.listen(PORT, () => {
logger.info(`API server is running on port ${PORT}`);
});
}
private async syncConnection(req: Request, res: Response) {
const schema = z.object({
connectionId: z.number(),
}).strict();
const parsed = schema.safeParse(req.body);
if (!parsed.success) {
res.status(400).json({ error: parsed.error.message });
return;
}
const { connectionId } = parsed.data;
const connection = await this.prisma.connection.findUnique({
where: {
id: connectionId,
}
});
if (!connection) {
res.status(404).json({ error: 'Connection not found' });
return;
}
const [jobId] = await this.connectionManager.createJobs([connection]);
res.status(200).json({ jobId });
}
private async indexRepo(req: Request, res: Response) {
const schema = z.object({
repoId: z.number(),
}).strict();
const parsed = schema.safeParse(req.body);
if (!parsed.success) {
res.status(400).json({ error: parsed.error.message });
return;
}
const { repoId } = parsed.data;
const repo = await this.prisma.repo.findUnique({
where: { id: repoId },
});
if (!repo) {
res.status(404).json({ error: 'Repo not found' });
return;
}
const [jobId] = await this.repoIndexManager.createJobs([repo], RepoIndexingJobType.INDEX);
res.status(200).json({ jobId });
}
public async dispose() {
return new Promise<void>((resolve, reject) => {
this.server.close((err) => {
if (err) reject(err);
else resolve(undefined);
});
});
}
}

View file

@ -1,14 +1,12 @@
import { AzureDevOpsConnectionConfig } from "@sourcebot/schemas/v3/azuredevops.type"; import { AzureDevOpsConnectionConfig } from "@sourcebot/schemas/v3/azuredevops.type";
import { createLogger } from "@sourcebot/logger"; import { createLogger } from "@sourcebot/shared";
import { measure, fetchWithRetry } from "./utils.js"; import { measure, fetchWithRetry } from "./utils.js";
import micromatch from "micromatch"; import micromatch from "micromatch";
import { PrismaClient } from "@sourcebot/db";
import { BackendException, BackendError } from "@sourcebot/error";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js"; import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import * as azdev from "azure-devops-node-api"; import * as azdev from "azure-devops-node-api";
import { GitRepository } from "azure-devops-node-api/interfaces/GitInterfaces.js"; import { GitRepository } from "azure-devops-node-api/interfaces/GitInterfaces.js";
import { getTokenFromConfig } from "@sourcebot/crypto"; import { getTokenFromConfig } from "@sourcebot/shared";
const logger = createLogger('azuredevops'); const logger = createLogger('azuredevops');
const AZUREDEVOPS_CLOUD_HOSTNAME = "dev.azure.com"; const AZUREDEVOPS_CLOUD_HOSTNAME = "dev.azure.com";
@ -29,19 +27,15 @@ function createAzureDevOpsConnection(
export const getAzureDevOpsReposFromConfig = async ( export const getAzureDevOpsReposFromConfig = async (
config: AzureDevOpsConnectionConfig, config: AzureDevOpsConnectionConfig,
orgId: number,
db: PrismaClient
) => { ) => {
const baseUrl = config.url || `https://${AZUREDEVOPS_CLOUD_HOSTNAME}`; const baseUrl = config.url || `https://${AZUREDEVOPS_CLOUD_HOSTNAME}`;
const token = config.token ? const token = config.token ?
await getTokenFromConfig(config.token, orgId, db) : await getTokenFromConfig(config.token) :
undefined; undefined;
if (!token) { if (!token) {
const e = new BackendException(BackendError.CONNECTION_SYNC_INVALID_TOKEN, { const e = new Error('Azure DevOps requires a Personal Access Token');
message: 'Azure DevOps requires a Personal Access Token',
});
Sentry.captureException(e); Sentry.captureException(e);
throw e; throw e;
} }

View file

@ -2,17 +2,17 @@ import { createBitbucketCloudClient } from "@coderabbitai/bitbucket/cloud";
import { createBitbucketServerClient } from "@coderabbitai/bitbucket/server"; import { createBitbucketServerClient } from "@coderabbitai/bitbucket/server";
import { BitbucketConnectionConfig } from "@sourcebot/schemas/v3/bitbucket.type"; import { BitbucketConnectionConfig } from "@sourcebot/schemas/v3/bitbucket.type";
import type { ClientOptions, ClientPathsWithMethod } from "openapi-fetch"; import type { ClientOptions, ClientPathsWithMethod } from "openapi-fetch";
import { createLogger } from "@sourcebot/logger"; import { createLogger } from "@sourcebot/shared";
import { PrismaClient } from "@sourcebot/db";
import { measure, fetchWithRetry } from "./utils.js"; import { measure, fetchWithRetry } from "./utils.js";
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import micromatch from "micromatch";
import { import {
SchemaRepository as CloudRepository, SchemaRepository as CloudRepository,
} from "@coderabbitai/bitbucket/cloud/openapi"; } from "@coderabbitai/bitbucket/cloud/openapi";
import { SchemaRestRepository as ServerRepository } from "@coderabbitai/bitbucket/server/openapi"; import { SchemaRestRepository as ServerRepository } from "@coderabbitai/bitbucket/server/openapi";
import { processPromiseResults } from "./connectionUtils.js"; import { processPromiseResults } from "./connectionUtils.js";
import { throwIfAnyFailed } from "./connectionUtils.js"; import { throwIfAnyFailed } from "./connectionUtils.js";
import { getTokenFromConfig } from "@sourcebot/crypto"; import { getTokenFromConfig } from "@sourcebot/shared";
const logger = createLogger('bitbucket'); const logger = createLogger('bitbucket');
const BITBUCKET_CLOUD_GIT = 'https://bitbucket.org'; const BITBUCKET_CLOUD_GIT = 'https://bitbucket.org';
@ -58,9 +58,9 @@ type ServerPaginatedResponse<T> = {
readonly nextPageStart: number; readonly nextPageStart: number;
} }
export const getBitbucketReposFromConfig = async (config: BitbucketConnectionConfig, orgId: number, db: PrismaClient) => { export const getBitbucketReposFromConfig = async (config: BitbucketConnectionConfig) => {
const token = config.token ? const token = config.token ?
await getTokenFromConfig(config.token, orgId, db) : await getTokenFromConfig(config.token) :
undefined; undefined;
if (config.deploymentType === 'server' && !config.url) { if (config.deploymentType === 'server' && !config.url) {
@ -347,10 +347,15 @@ async function cloudGetRepos(client: BitbucketClient, repoList: string[]): Promi
function cloudShouldExcludeRepo(repo: BitbucketRepository, config: BitbucketConnectionConfig): boolean { function cloudShouldExcludeRepo(repo: BitbucketRepository, config: BitbucketConnectionConfig): boolean {
const cloudRepo = repo as CloudRepository; const cloudRepo = repo as CloudRepository;
let reason = '';
const repoName = cloudRepo.full_name!;
const shouldExclude = (() => { const shouldExclude = (() => {
if (config.exclude?.repos && config.exclude.repos.includes(cloudRepo.full_name!)) { if (config.exclude?.repos) {
return true; if (micromatch.isMatch(repoName, config.exclude.repos)) {
reason = `\`exclude.repos\` contains ${repoName}`;
return true;
}
} }
if (!!config.exclude?.archived) { if (!!config.exclude?.archived) {
@ -358,12 +363,15 @@ function cloudShouldExcludeRepo(repo: BitbucketRepository, config: BitbucketConn
} }
if (!!config.exclude?.forks && cloudRepo.parent !== undefined) { if (!!config.exclude?.forks && cloudRepo.parent !== undefined) {
reason = `\`exclude.forks\` is true`;
return true; return true;
} }
return false;
})(); })();
if (shouldExclude) { if (shouldExclude) {
logger.debug(`Excluding repo ${cloudRepo.full_name} because it matches the exclude pattern`); logger.debug(`Excluding repo ${repoName}. Reason: ${reason}`);
return true; return true;
} }
return false; return false;
@ -549,23 +557,32 @@ function serverShouldExcludeRepo(repo: BitbucketRepository, config: BitbucketCon
const projectName = serverRepo.project!.key; const projectName = serverRepo.project!.key;
const repoSlug = serverRepo.slug!; const repoSlug = serverRepo.slug!;
const repoName = `${projectName}/${repoSlug}`;
let reason = '';
const shouldExclude = (() => { const shouldExclude = (() => {
if (config.exclude?.repos && config.exclude.repos.includes(`${projectName}/${repoSlug}`)) { if (config.exclude?.repos) {
return true; if (micromatch.isMatch(repoName, config.exclude.repos)) {
reason = `\`exclude.repos\` contains ${repoName}`;
return true;
}
} }
if (!!config.exclude?.archived && serverRepo.archived) { if (!!config.exclude?.archived && serverRepo.archived) {
reason = `\`exclude.archived\` is true`;
return true; return true;
} }
if (!!config.exclude?.forks && serverRepo.origin !== undefined) { if (!!config.exclude?.forks && serverRepo.origin !== undefined) {
reason = `\`exclude.forks\` is true`;
return true; return true;
} }
return false;
})(); })();
if (shouldExclude) { if (shouldExclude) {
logger.debug(`Excluding repo ${projectName}/${repoSlug} because it matches the exclude pattern`); logger.debug(`Excluding repo ${repoName}. Reason: ${reason}`);
return true; return true;
} }
return false; return false;

View file

@ -1,11 +1,12 @@
import { Prisma, PrismaClient } from "@sourcebot/db"; import { Prisma, PrismaClient } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger"; import { createLogger } from "@sourcebot/shared";
import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type"; import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
import { loadConfig } from "@sourcebot/shared"; import { loadConfig } from "@sourcebot/shared";
import chokidar, { FSWatcher } from 'chokidar'; import chokidar, { FSWatcher } from 'chokidar';
import { ConnectionManager } from "./connectionManager.js"; import { ConnectionManager } from "./connectionManager.js";
import { SINGLE_TENANT_ORG_ID } from "./constants.js"; import { SINGLE_TENANT_ORG_ID } from "./constants.js";
import { syncSearchContexts } from "./ee/syncSearchContexts.js"; import { syncSearchContexts } from "./ee/syncSearchContexts.js";
import isEqual from 'fast-deep-equal';
const logger = createLogger('config-manager'); const logger = createLogger('config-manager');
@ -64,8 +65,8 @@ export class ConfigManager {
const existingConnectionConfig = existingConnection ? existingConnection.config as unknown as ConnectionConfig : undefined; const existingConnectionConfig = existingConnection ? existingConnection.config as unknown as ConnectionConfig : undefined;
const connectionNeedsSyncing = const connectionNeedsSyncing =
!existingConnection || !existingConnectionConfig ||
(JSON.stringify(existingConnectionConfig) !== JSON.stringify(newConnectionConfig)); !isEqual(existingConnectionConfig, newConnectionConfig);
// Either update the existing connection or create a new one. // Either update the existing connection or create a new one.
const connection = existingConnection ? const connection = existingConnection ?
@ -93,8 +94,8 @@ export class ConfigManager {
}); });
if (connectionNeedsSyncing) { if (connectionNeedsSyncing) {
const [jobId] = await this.connectionManager.createJobs([connection]); logger.info(`Change detected for connection '${key}' (id: ${connection.id}). Creating sync job.`);
logger.info(`Change detected for connection '${key}' (id: ${connection.id}). Created sync job ${jobId}.`); await this.connectionManager.createJobs([connection]);
} }
} }
} }

View file

@ -1,21 +1,22 @@
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { Connection, ConnectionSyncJobStatus, PrismaClient } from "@sourcebot/db"; import { Connection, ConnectionSyncJobStatus, PrismaClient } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger"; import { createLogger } from "@sourcebot/shared";
import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type"; import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
import { loadConfig } from "@sourcebot/shared"; import { loadConfig, env } from "@sourcebot/shared";
import { Job, Queue, ReservedJob, Worker } from "groupmq"; import { Job, Queue, ReservedJob, Worker } from "groupmq";
import { Redis } from 'ioredis'; import { Redis } from 'ioredis';
import { env } from "./env.js";
import { compileAzureDevOpsConfig, compileBitbucketConfig, compileGenericGitHostConfig, compileGerritConfig, compileGiteaConfig, compileGithubConfig, compileGitlabConfig } from "./repoCompileUtils.js"; import { compileAzureDevOpsConfig, compileBitbucketConfig, compileGenericGitHostConfig, compileGerritConfig, compileGiteaConfig, compileGithubConfig, compileGitlabConfig } from "./repoCompileUtils.js";
import { Settings } from "./types.js"; import { Settings } from "./types.js";
import { groupmqLifecycleExceptionWrapper } from "./utils.js"; import { groupmqLifecycleExceptionWrapper, setIntervalAsync } from "./utils.js";
import { syncSearchContexts } from "./ee/syncSearchContexts.js"; import { syncSearchContexts } from "./ee/syncSearchContexts.js";
import { captureEvent } from "./posthog.js"; import { captureEvent } from "./posthog.js";
import { PromClient } from "./promClient.js"; import { PromClient } from "./promClient.js";
import { GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS } from "./constants.js";
const LOG_TAG = 'connection-manager'; const LOG_TAG = 'connection-manager';
const logger = createLogger(LOG_TAG); const logger = createLogger(LOG_TAG);
const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`); const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`);
const QUEUE_NAME = 'connection-sync-queue';
type JobPayload = { type JobPayload = {
jobId: string, jobId: string,
@ -31,19 +32,19 @@ type JobResult = {
const JOB_TIMEOUT_MS = 1000 * 60 * 60 * 2; // 2 hour timeout const JOB_TIMEOUT_MS = 1000 * 60 * 60 * 2; // 2 hour timeout
export class ConnectionManager { export class ConnectionManager {
private worker: Worker; private worker: Worker<JobPayload>;
private queue: Queue<JobPayload>; private queue: Queue<JobPayload>;
private interval?: NodeJS.Timeout; private interval?: NodeJS.Timeout;
constructor( constructor(
private db: PrismaClient, private db: PrismaClient,
private settings: Settings, private settings: Settings,
redis: Redis, private redis: Redis,
private promClient: PromClient, private promClient: PromClient,
) { ) {
this.queue = new Queue<JobPayload>({ this.queue = new Queue<JobPayload>({
redis, redis,
namespace: 'connection-sync-queue', namespace: QUEUE_NAME,
jobTimeoutMs: JOB_TIMEOUT_MS, jobTimeoutMs: JOB_TIMEOUT_MS,
maxAttempts: 3, maxAttempts: 3,
logger: env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true', logger: env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true',
@ -63,11 +64,15 @@ export class ConnectionManager {
this.worker.on('failed', this.onJobFailed.bind(this)); this.worker.on('failed', this.onJobFailed.bind(this));
this.worker.on('stalled', this.onJobStalled.bind(this)); this.worker.on('stalled', this.onJobStalled.bind(this));
this.worker.on('error', this.onWorkerError.bind(this)); this.worker.on('error', this.onWorkerError.bind(this));
// graceful-timeout is triggered when a job is still processing after
// worker.close() is called and the timeout period has elapsed. In this case,
// we fail the job with no retry.
this.worker.on('graceful-timeout', this.onJobGracefulTimeout.bind(this));
} }
public startScheduler() { public startScheduler() {
logger.debug('Starting scheduler'); logger.debug('Starting scheduler');
this.interval = setInterval(async () => { this.interval = setIntervalAsync(async () => {
const thresholdDate = new Date(Date.now() - this.settings.resyncConnectionIntervalMs); const thresholdDate = new Date(Date.now() - this.settings.resyncConnectionIntervalMs);
const timeoutDate = new Date(Date.now() - JOB_TIMEOUT_MS); const timeoutDate = new Date(Date.now() - JOB_TIMEOUT_MS);
@ -129,6 +134,7 @@ export class ConnectionManager {
}); });
for (const job of jobs) { for (const job of jobs) {
logger.info(`Scheduling job ${job.id} for connection ${job.connection.name} (id: ${job.connectionId})`);
await this.queue.add({ await this.queue.add({
groupId: `connection:${job.connectionId}`, groupId: `connection:${job.connectionId}`,
data: { data: {
@ -151,6 +157,22 @@ export class ConnectionManager {
const logger = createJobLogger(jobId); const logger = createJobLogger(jobId);
logger.info(`Running connection sync job ${jobId} for connection ${connectionName} (id: ${job.data.connectionId}) (attempt ${job.attempts + 1} / ${job.maxAttempts})`); logger.info(`Running connection sync job ${jobId} for connection ${connectionName} (id: ${job.data.connectionId}) (attempt ${job.attempts + 1} / ${job.maxAttempts})`);
const currentStatus = await this.db.connectionSyncJob.findUniqueOrThrow({
where: {
id: jobId,
},
select: {
status: true,
}
});
// Fail safe: if the job is not PENDING (first run) or IN_PROGRESS (retry), it indicates the job
// is in an invalid state and should be skipped.
if (currentStatus.status !== ConnectionSyncJobStatus.PENDING && currentStatus.status !== ConnectionSyncJobStatus.IN_PROGRESS) {
throw new Error(`Job ${jobId} is not in a valid state. Expected: ${ConnectionSyncJobStatus.PENDING} or ${ConnectionSyncJobStatus.IN_PROGRESS}. Actual: ${currentStatus.status}. Skipping.`);
}
this.promClient.pendingConnectionSyncJobs.dec({ connection: connectionName }); this.promClient.pendingConnectionSyncJobs.dec({ connection: connectionName });
this.promClient.activeConnectionSyncJobs.inc({ connection: connectionName }); this.promClient.activeConnectionSyncJobs.inc({ connection: connectionName });
@ -179,29 +201,29 @@ export class ConnectionManager {
const result = await (async () => { const result = await (async () => {
switch (config.type) { switch (config.type) {
case 'github': { case 'github': {
return await compileGithubConfig(config, job.data.connectionId, orgId, this.db, abortController); return await compileGithubConfig(config, job.data.connectionId, abortController.signal);
} }
case 'gitlab': { case 'gitlab': {
return await compileGitlabConfig(config, job.data.connectionId, orgId, this.db); return await compileGitlabConfig(config, job.data.connectionId);
} }
case 'gitea': { case 'gitea': {
return await compileGiteaConfig(config, job.data.connectionId, orgId, this.db); return await compileGiteaConfig(config, job.data.connectionId);
} }
case 'gerrit': { case 'gerrit': {
return await compileGerritConfig(config, job.data.connectionId, orgId); return await compileGerritConfig(config, job.data.connectionId);
} }
case 'bitbucket': { case 'bitbucket': {
return await compileBitbucketConfig(config, job.data.connectionId, orgId, this.db); return await compileBitbucketConfig(config, job.data.connectionId);
} }
case 'azuredevops': { case 'azuredevops': {
return await compileAzureDevOpsConfig(config, job.data.connectionId, orgId, this.db); return await compileAzureDevOpsConfig(config, job.data.connectionId);
} }
case 'git': { case 'git': {
return await compileGenericGitHostConfig(config, job.data.connectionId, orgId); return await compileGenericGitHostConfig(config, job.data.connectionId);
} }
} }
})(); })();
let { repoData, warnings } = result; let { repoData, warnings } = result;
await this.db.connectionSyncJob.update({ await this.db.connectionSyncJob.update({
@ -384,6 +406,33 @@ export class ConnectionManager {
}); });
}); });
private onJobGracefulTimeout = async (job: Job<JobPayload>) =>
groupmqLifecycleExceptionWrapper('onJobGracefulTimeout', logger, async () => {
const logger = createJobLogger(job.id);
const { connection } = await this.db.connectionSyncJob.update({
where: { id: job.id },
data: {
status: ConnectionSyncJobStatus.FAILED,
completedAt: new Date(),
errorMessage: 'Job timed out',
},
select: {
connection: true,
}
});
this.promClient.activeConnectionSyncJobs.dec({ connection: connection.name });
this.promClient.connectionSyncJobFailTotal.inc({ connection: connection.name });
logger.error(`Job ${job.id} timed out for connection ${connection.name} (id: ${connection.id})`);
captureEvent('backend_connection_sync_job_failed', {
connectionId: connection.id,
error: 'Job timed out',
});
});
private async onWorkerError(error: Error) { private async onWorkerError(error: Error) {
Sentry.captureException(error); Sentry.captureException(error);
logger.error(`Connection syncer worker error.`, error); logger.error(`Connection syncer worker error.`, error);
@ -393,8 +442,28 @@ export class ConnectionManager {
if (this.interval) { if (this.interval) {
clearInterval(this.interval); clearInterval(this.interval);
} }
await this.worker.close();
await this.queue.close(); const inProgressJobs = this.worker.getCurrentJobs();
await this.worker.close(GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS);
// Manually release group locks for in progress jobs to prevent deadlocks.
// @see: https://github.com/Openpanel-dev/groupmq/issues/8
for (const { job } of inProgressJobs) {
const lockKey = `groupmq:${QUEUE_NAME}:lock:${job.groupId}`;
logger.debug(`Releasing group lock ${lockKey} for in progress job ${job.id}`);
try {
await this.redis.del(lockKey);
} catch (error) {
Sentry.captureException(error);
logger.error(`Failed to release group lock ${lockKey} for in progress job ${job.id}. Error: `, error);
}
}
// @note: As of groupmq v1.0.0, queue.close() will just close the underlying
// redis connection. Since we share the same redis client between, skip this
// step and close the redis client directly in index.ts.
// @see: https://github.com/Openpanel-dev/groupmq/blob/main/src/queue.ts#L1900
// await this.queue.close();
} }
} }

View file

@ -1,11 +1,33 @@
import { env } from "./env.js"; import { CodeHostType } from "@sourcebot/db";
import { env } from "@sourcebot/shared";
import path from "path"; import path from "path";
export const SINGLE_TENANT_ORG_ID = 1; export const SINGLE_TENANT_ORG_ID = 1;
export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES = [ export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES: CodeHostType[] = [
'github', 'github',
'gitlab',
]; ];
export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos'); export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos');
export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index'); export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index');
// Maximum time to wait for current job to finish
export const GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS = 5 * 1000; // 5 seconds
// List of shutdown signals
export const SHUTDOWN_SIGNALS: string[] = [
'SIGHUP',
'SIGINT',
'SIGQUIT',
'SIGILL',
'SIGTRAP',
'SIGABRT',
'SIGBUS',
'SIGFPE',
'SIGSEGV',
'SIGUSR2',
'SIGTERM',
// @note: SIGKILL and SIGSTOP cannot have listeners installed.
// @see: https://nodejs.org/api/process.html#signal-events
];

View file

@ -0,0 +1,303 @@
import * as Sentry from "@sentry/node";
import { PrismaClient, AccountPermissionSyncJobStatus, Account} from "@sourcebot/db";
import { env, hasEntitlement, createLogger } from "@sourcebot/shared";
import { Job, Queue, Worker } from "bullmq";
import { Redis } from "ioredis";
import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js";
import { createOctokitFromToken, getReposForAuthenticatedUser } from "../github.js";
import { createGitLabFromOAuthToken, getProjectsForAuthenticatedUser } from "../gitlab.js";
import { Settings } from "../types.js";
import { setIntervalAsync } from "../utils.js";
const LOG_TAG = 'user-permission-syncer';
const logger = createLogger(LOG_TAG);
const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`);
const QUEUE_NAME = 'accountPermissionSyncQueue';
type AccountPermissionSyncJob = {
jobId: string;
}
export class AccountPermissionSyncer {
private queue: Queue<AccountPermissionSyncJob>;
private worker: Worker<AccountPermissionSyncJob>;
private interval?: NodeJS.Timeout;
constructor(
private db: PrismaClient,
private settings: Settings,
redis: Redis,
) {
this.queue = new Queue<AccountPermissionSyncJob>(QUEUE_NAME, {
connection: redis,
});
this.worker = new Worker<AccountPermissionSyncJob>(QUEUE_NAME, this.runJob.bind(this), {
connection: redis,
concurrency: 1,
});
this.worker.on('completed', this.onJobCompleted.bind(this));
this.worker.on('failed', this.onJobFailed.bind(this));
}
public startScheduler() {
if (!hasEntitlement('permission-syncing')) {
throw new Error('Permission syncing is not supported in current plan.');
}
logger.debug('Starting scheduler');
this.interval = setIntervalAsync(async () => {
const thresholdDate = new Date(Date.now() - this.settings.experiment_userDrivenPermissionSyncIntervalMs);
const accounts = await this.db.account.findMany({
where: {
AND: [
{
provider: {
in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES
}
},
{
OR: [
{ permissionSyncedAt: null },
{ permissionSyncedAt: { lt: thresholdDate } },
]
},
{
NOT: {
permissionSyncJobs: {
some: {
OR: [
// Don't schedule if there are active jobs
{
status: {
in: [
AccountPermissionSyncJobStatus.PENDING,
AccountPermissionSyncJobStatus.IN_PROGRESS,
],
}
},
// Don't schedule if there are recent failed jobs (within the threshold date). Note `gt` is used here since this is a inverse condition.
{
AND: [
{ status: AccountPermissionSyncJobStatus.FAILED },
{ completedAt: { gt: thresholdDate } },
]
}
]
}
}
}
},
]
}
});
await this.schedulePermissionSync(accounts);
}, 1000 * 5);
}
public async dispose() {
if (this.interval) {
clearInterval(this.interval);
}
await this.worker.close(/* force = */ true);
await this.queue.close();
}
private async schedulePermissionSync(accounts: Account[]) {
// @note: we don't perform this in a transaction because
// we want to avoid the situation where a job is created and run
// prior to the transaction being committed.
const jobs = await this.db.accountPermissionSyncJob.createManyAndReturn({
data: accounts.map(account => ({
accountId: account.id,
})),
});
await this.queue.addBulk(jobs.map((job) => ({
name: 'accountPermissionSyncJob',
data: {
jobId: job.id,
},
opts: {
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
}
})))
}
private async runJob(job: Job<AccountPermissionSyncJob>) {
const id = job.data.jobId;
const logger = createJobLogger(id);
const { account } = await this.db.accountPermissionSyncJob.update({
where: {
id,
},
data: {
status: AccountPermissionSyncJobStatus.IN_PROGRESS,
},
select: {
account: {
include: {
user: true,
}
}
}
});
logger.info(`Syncing permissions for ${account.provider} account (id: ${account.id}) for user ${account.user.email}...`);
// Get a list of all repos that the user has access to from all connected accounts.
const repoIds = await (async () => {
const aggregatedRepoIds: Set<number> = new Set();
if (account.provider === 'github') {
if (!account.access_token) {
throw new Error(`User '${account.user.email}' does not have an GitHub OAuth access token associated with their GitHub account.`);
}
const { octokit } = await createOctokitFromToken({
token: account.access_token,
url: env.AUTH_EE_GITHUB_BASE_URL,
});
// @note: we only care about the private repos since we don't need to build a mapping
// for public repos.
// @see: packages/web/src/prisma.ts
const githubRepos = await getReposForAuthenticatedUser(/* visibility = */ 'private', octokit);
const gitHubRepoIds = githubRepos.map(repo => repo.id.toString());
const repos = await this.db.repo.findMany({
where: {
external_codeHostType: 'github',
external_id: {
in: gitHubRepoIds,
}
}
});
repos.forEach(repo => aggregatedRepoIds.add(repo.id));
} else if (account.provider === 'gitlab') {
if (!account.access_token) {
throw new Error(`User '${account.user.email}' does not have a GitLab OAuth access token associated with their GitLab account.`);
}
const api = await createGitLabFromOAuthToken({
oauthToken: account.access_token,
url: env.AUTH_EE_GITLAB_BASE_URL,
});
// @note: we only care about the private and internal repos since we don't need to build a mapping
// for public repos.
// @see: packages/web/src/prisma.ts
const privateGitLabProjects = await getProjectsForAuthenticatedUser('private', api);
const internalGitLabProjects = await getProjectsForAuthenticatedUser('internal', api);
const gitLabProjectIds = [
...privateGitLabProjects,
...internalGitLabProjects,
].map(project => project.id.toString());
const repos = await this.db.repo.findMany({
where: {
external_codeHostType: 'gitlab',
external_id: {
in: gitLabProjectIds,
}
}
});
repos.forEach(repo => aggregatedRepoIds.add(repo.id));
}
return Array.from(aggregatedRepoIds);
})();
await this.db.$transaction([
this.db.account.update({
where: {
id: account.id,
},
data: {
accessibleRepos: {
deleteMany: {},
}
}
}),
this.db.accountToRepoPermission.createMany({
data: repoIds.map(repoId => ({
accountId: account.id,
repoId,
})),
skipDuplicates: true,
})
]);
}
private async onJobCompleted(job: Job<AccountPermissionSyncJob>) {
const logger = createJobLogger(job.data.jobId);
const { account } = await this.db.accountPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: AccountPermissionSyncJobStatus.COMPLETED,
account: {
update: {
permissionSyncedAt: new Date(),
},
},
completedAt: new Date(),
},
select: {
account: {
include: {
user: true,
}
}
}
});
logger.info(`Permissions synced for ${account.provider} account (id: ${account.id}) for user ${account.user.email}`);
}
private async onJobFailed(job: Job<AccountPermissionSyncJob> | undefined, err: Error) {
const logger = createJobLogger(job?.data.jobId ?? 'unknown');
Sentry.captureException(err, {
tags: {
jobId: job?.data.jobId,
queue: QUEUE_NAME,
}
});
const errorMessage = (accountId: string, email: string) => `Account permission sync job failed for account (id: ${accountId}) for user ${email}: ${err.message}`;
if (job) {
const { account } = await this.db.accountPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: AccountPermissionSyncJobStatus.FAILED,
completedAt: new Date(),
errorMessage: err.message,
},
select: {
account: {
include: {
user: true,
}
}
}
});
logger.error(errorMessage(account.id, account.user.email ?? 'unknown user (email not found)'));
} else {
logger.error(errorMessage('unknown account (id not found)', 'unknown user (id not found)'));
}
}
}

View file

@ -1,10 +1,9 @@
import { loadConfig } from "@sourcebot/shared";
import { env } from "../env.js";
import { createLogger } from "@sourcebot/logger";
import { getTokenFromConfig } from "@sourcebot/crypto";
import { PrismaClient } from "@sourcebot/db";
import { App } from "@octokit/app"; import { App } from "@octokit/app";
import { getTokenFromConfig } from "@sourcebot/shared";
import { PrismaClient } from "@sourcebot/db";
import { createLogger } from "@sourcebot/shared";
import { GitHubAppConfig } from "@sourcebot/schemas/v3/index.type"; import { GitHubAppConfig } from "@sourcebot/schemas/v3/index.type";
import { env, loadConfig } from "@sourcebot/shared";
const logger = createLogger('githubAppManager'); const logger = createLogger('githubAppManager');
const GITHUB_DEFAULT_DEPLOYMENT_HOSTNAME = 'github.com'; const GITHUB_DEFAULT_DEPLOYMENT_HOSTNAME = 'github.com';
@ -16,9 +15,6 @@ type Installation = {
login: string; login: string;
type: 'organization' | 'user'; type: 'organization' | 'user';
}; };
createdAt: string;
expiresAt: string;
token: string;
}; };
export class GithubAppManager { export class GithubAppManager {
@ -48,21 +44,17 @@ export class GithubAppManager {
public async init(db: PrismaClient) { public async init(db: PrismaClient) {
this.db = db; this.db = db;
const config = await loadConfig(env.CONFIG_PATH!); const config = await loadConfig(env.CONFIG_PATH);
if (!config.apps) { if (!config.apps) {
return; return;
} }
const githubApps = config.apps.filter(app => app.type === 'githubApp') as GitHubAppConfig[]; const githubApps = config.apps.filter(app => app.type === 'github') as GitHubAppConfig[];
logger.info(`Found ${githubApps.length} GitHub apps in config`); logger.info(`Found ${githubApps.length} GitHub apps in config`);
for (const app of githubApps) { for (const app of githubApps) {
const deploymentHostname = app.deploymentHostname as string || GITHUB_DEFAULT_DEPLOYMENT_HOSTNAME; const deploymentHostname = app.deploymentHostname as string || GITHUB_DEFAULT_DEPLOYMENT_HOSTNAME;
const privateKey = await getTokenFromConfig(app.privateKey);
// @todo: we should move SINGLE_TENANT_ORG_ID to shared package or just remove the need to pass this in
// when resolving tokens
const SINGLE_TENANT_ORG_ID = 1;
const privateKey = await getTokenFromConfig(app.privateKey, SINGLE_TENANT_ORG_ID, this.db);
const octokitApp = new App({ const octokitApp = new App({
appId: Number(app.id), appId: Number(app.id),
@ -83,9 +75,6 @@ export class GithubAppManager {
const owner = installationData.account.login; const owner = installationData.account.login;
const accountType = installationData.account.type.toLowerCase() as 'organization' | 'user'; const accountType = installationData.account.type.toLowerCase() as 'organization' | 'user';
const installationOctokit = await octokitApp.getInstallationOctokit(installationData.id);
const auth = await installationOctokit.auth({ type: "installation" }) as { expires_at: string, token: string };
const installation: Installation = { const installation: Installation = {
id: installationData.id, id: installationData.id,
appId: Number(app.id), appId: Number(app.id),
@ -93,9 +82,6 @@ export class GithubAppManager {
login: owner, login: owner,
type: accountType, type: accountType,
}, },
createdAt: installationData.created_at,
expiresAt: auth.expires_at,
token: auth.token
}; };
this.installationMap.set(this.generateMapKey(owner, deploymentHostname), installation); this.installationMap.set(this.generateMapKey(owner, deploymentHostname), installation);
} }
@ -113,22 +99,10 @@ export class GithubAppManager {
throw new Error(`GitHub App Installation not found for ${key}`); throw new Error(`GitHub App Installation not found for ${key}`);
} }
if (installation.expiresAt < new Date().toISOString()) { const octokitApp = this.octokitApps.get(installation.appId) as App;
const octokitApp = this.octokitApps.get(installation.appId) as App; const installationOctokit = await octokitApp.getInstallationOctokit(installation.id);
const installationOctokit = await octokitApp.getInstallationOctokit(installation.id); const auth = await installationOctokit.auth({ type: "installation" }) as { expires_at: string, token: string };
const auth = await installationOctokit.auth({ type: "installation" }) as { expires_at: string, token: string }; return auth.token;
const newInstallation: Installation = {
...installation,
expiresAt: auth.expires_at,
token: auth.token
};
this.installationMap.set(key, newInstallation);
return newInstallation.token;
} else {
return installation.token;
}
} }
public appsConfigured() { public appsConfigured() {

View file

@ -1,14 +1,14 @@
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { PrismaClient, Repo, RepoPermissionSyncJobStatus } from "@sourcebot/db"; import { PrismaClient, Repo, RepoPermissionSyncJobStatus } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger"; import { createLogger } from "@sourcebot/shared";
import { hasEntitlement } from "@sourcebot/shared"; import { env, hasEntitlement } from "@sourcebot/shared";
import { Job, Queue, Worker } from 'bullmq'; import { Job, Queue, Worker } from 'bullmq';
import { Redis } from 'ioredis'; import { Redis } from 'ioredis';
import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js"; import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js";
import { env } from "../env.js";
import { createOctokitFromToken, getRepoCollaborators, GITHUB_CLOUD_HOSTNAME } from "../github.js"; import { createOctokitFromToken, getRepoCollaborators, GITHUB_CLOUD_HOSTNAME } from "../github.js";
import { createGitLabFromPersonalAccessToken, getProjectMembers } from "../gitlab.js";
import { Settings } from "../types.js"; import { Settings } from "../types.js";
import { getAuthCredentialsForRepo } from "../utils.js"; import { getAuthCredentialsForRepo, setIntervalAsync } from "../utils.js";
type RepoPermissionSyncJob = { type RepoPermissionSyncJob = {
jobId: string; jobId: string;
@ -16,7 +16,9 @@ type RepoPermissionSyncJob = {
const QUEUE_NAME = 'repoPermissionSyncQueue'; const QUEUE_NAME = 'repoPermissionSyncQueue';
const logger = createLogger('repo-permission-syncer'); const LOG_TAG = 'repo-permission-syncer';
const logger = createLogger(LOG_TAG);
const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`);
export class RepoPermissionSyncer { export class RepoPermissionSyncer {
private queue: Queue<RepoPermissionSyncJob>; private queue: Queue<RepoPermissionSyncJob>;
@ -46,26 +48,34 @@ export class RepoPermissionSyncer {
logger.debug('Starting scheduler'); logger.debug('Starting scheduler');
this.interval = setInterval(async () => { this.interval = setIntervalAsync(async () => {
// @todo: make this configurable // @todo: make this configurable
const thresholdDate = new Date(Date.now() - this.settings.experiment_repoDrivenPermissionSyncIntervalMs); const thresholdDate = new Date(Date.now() - this.settings.experiment_repoDrivenPermissionSyncIntervalMs);
const repos = await this.db.repo.findMany({ const repos = await this.db.repo.findMany({
// Repos need their permissions to be synced against the code host when... // Repos need their permissions to be synced against the code host when...
where: { where: {
// They belong to a code host that supports permissions syncing
AND: [ AND: [
// They are not public. Public repositories are always visible to all users, therefore we don't
// need to explicitly perform permission syncing for them.
// @see: packages/web/src/prisma.ts
{
isPublic: false
},
// They belong to a code host that supports permissions syncing
{ {
external_codeHostType: { external_codeHostType: {
in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES, in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES,
} }
}, },
// They have not been synced within the threshold date.
{ {
OR: [ OR: [
{ permissionSyncedAt: null }, { permissionSyncedAt: null },
{ permissionSyncedAt: { lt: thresholdDate } }, { permissionSyncedAt: { lt: thresholdDate } },
], ],
}, },
// There aren't any active or recently failed jobs.
{ {
NOT: { NOT: {
permissionSyncJobs: { permissionSyncJobs: {
@ -104,33 +114,36 @@ export class RepoPermissionSyncer {
if (this.interval) { if (this.interval) {
clearInterval(this.interval); clearInterval(this.interval);
} }
await this.worker.close(); await this.worker.close(/* force = */ true);
await this.queue.close(); await this.queue.close();
} }
private async schedulePermissionSync(repos: Repo[]) { private async schedulePermissionSync(repos: Repo[]) {
await this.db.$transaction(async (tx) => { // @note: we don't perform this in a transaction because
const jobs = await tx.repoPermissionSyncJob.createManyAndReturn({ // we want to avoid the situation where a job is created and run
data: repos.map(repo => ({ // prior to the transaction being committed.
repoId: repo.id, const jobs = await this.db.repoPermissionSyncJob.createManyAndReturn({
})), data: repos.map(repo => ({
}); repoId: repo.id,
})),
await this.queue.addBulk(jobs.map((job) => ({
name: 'repoPermissionSyncJob',
data: {
jobId: job.id,
},
opts: {
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
}
})))
}); });
await this.queue.addBulk(jobs.map((job) => ({
name: 'repoPermissionSyncJob',
data: {
jobId: job.id,
},
opts: {
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
}
})))
} }
private async runJob(job: Job<RepoPermissionSyncJob>) { private async runJob(job: Job<RepoPermissionSyncJob>) {
const id = job.data.jobId; const id = job.data.jobId;
const logger = createJobLogger(id);
const { repo } = await this.db.repoPermissionSyncJob.update({ const { repo } = await this.db.repoPermissionSyncJob.update({
where: { where: {
id, id,
@ -157,12 +170,12 @@ export class RepoPermissionSyncer {
logger.info(`Syncing permissions for repo ${repo.displayName}...`); logger.info(`Syncing permissions for repo ${repo.displayName}...`);
const credentials = await getAuthCredentialsForRepo(repo, this.db, logger); const credentials = await getAuthCredentialsForRepo(repo, logger);
if (!credentials) { if (!credentials) {
throw new Error(`No credentials found for repo ${id}`); throw new Error(`No credentials found for repo ${id}`);
} }
const userIds = await (async () => { const accountIds = await (async () => {
if (repo.external_codeHostType === 'github') { if (repo.external_codeHostType === 'github') {
const isGitHubCloud = credentials.hostUrl ? new URL(credentials.hostUrl).hostname === GITHUB_CLOUD_HOSTNAME : false; const isGitHubCloud = credentials.hostUrl ? new URL(credentials.hostUrl).hostname === GITHUB_CLOUD_HOSTNAME : false;
const { octokit } = await createOctokitFromToken({ const { octokit } = await createOctokitFromToken({
@ -189,12 +202,33 @@ export class RepoPermissionSyncer {
in: githubUserIds, in: githubUserIds,
} }
}, },
select: { });
userId: true,
return accounts.map(account => account.id);
} else if (repo.external_codeHostType === 'gitlab') {
const api = await createGitLabFromPersonalAccessToken({
token: credentials.token,
url: credentials.hostUrl,
});
const projectId = repo.external_id;
if (!projectId) {
throw new Error(`Repo ${id} does not have an external_id`);
}
const members = await getProjectMembers(projectId, api);
const gitlabUserIds = members.map(member => member.id.toString());
const accounts = await this.db.account.findMany({
where: {
provider: 'gitlab',
providerAccountId: {
in: gitlabUserIds,
}
}, },
}); });
return accounts.map(account => account.userId); return accounts.map(account => account.id);
} }
return []; return [];
@ -206,14 +240,14 @@ export class RepoPermissionSyncer {
id: repo.id, id: repo.id,
}, },
data: { data: {
permittedUsers: { permittedAccounts: {
deleteMany: {}, deleteMany: {},
} }
} }
}), }),
this.db.userToRepoPermission.createMany({ this.db.accountToRepoPermission.createMany({
data: userIds.map(userId => ({ data: accountIds.map(accountId => ({
userId, accountId,
repoId: repo.id, repoId: repo.id,
})), })),
}) })
@ -221,6 +255,8 @@ export class RepoPermissionSyncer {
} }
private async onJobCompleted(job: Job<RepoPermissionSyncJob>) { private async onJobCompleted(job: Job<RepoPermissionSyncJob>) {
const logger = createJobLogger(job.data.jobId);
const { repo } = await this.db.repoPermissionSyncJob.update({ const { repo } = await this.db.repoPermissionSyncJob.update({
where: { where: {
id: job.data.jobId, id: job.data.jobId,
@ -243,6 +279,8 @@ export class RepoPermissionSyncer {
} }
private async onJobFailed(job: Job<RepoPermissionSyncJob> | undefined, err: Error) { private async onJobFailed(job: Job<RepoPermissionSyncJob> | undefined, err: Error) {
const logger = createJobLogger(job?.data.jobId ?? 'unknown');
Sentry.captureException(err, { Sentry.captureException(err, {
tags: { tags: {
jobId: job?.data.jobId, jobId: job?.data.jobId,

View file

@ -1,5 +1,5 @@
import micromatch from "micromatch"; import micromatch from "micromatch";
import { createLogger } from "@sourcebot/logger"; import { createLogger } from "@sourcebot/shared";
import { PrismaClient } from "@sourcebot/db"; import { PrismaClient } from "@sourcebot/db";
import { getPlan, hasEntitlement, SOURCEBOT_SUPPORT_EMAIL } from "@sourcebot/shared"; import { getPlan, hasEntitlement, SOURCEBOT_SUPPORT_EMAIL } from "@sourcebot/shared";
import { SearchContext } from "@sourcebot/schemas/v3/index.type"; import { SearchContext } from "@sourcebot/schemas/v3/index.type";

View file

@ -1,266 +0,0 @@
import * as Sentry from "@sentry/node";
import { PrismaClient, User, UserPermissionSyncJobStatus } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { Job, Queue, Worker } from "bullmq";
import { Redis } from "ioredis";
import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js";
import { env } from "../env.js";
import { createOctokitFromToken, getReposForAuthenticatedUser } from "../github.js";
import { hasEntitlement } from "@sourcebot/shared";
import { Settings } from "../types.js";
const logger = createLogger('user-permission-syncer');
const QUEUE_NAME = 'userPermissionSyncQueue';
type UserPermissionSyncJob = {
jobId: string;
}
export class UserPermissionSyncer {
private queue: Queue<UserPermissionSyncJob>;
private worker: Worker<UserPermissionSyncJob>;
private interval?: NodeJS.Timeout;
constructor(
private db: PrismaClient,
private settings: Settings,
redis: Redis,
) {
this.queue = new Queue<UserPermissionSyncJob>(QUEUE_NAME, {
connection: redis,
});
this.worker = new Worker<UserPermissionSyncJob>(QUEUE_NAME, this.runJob.bind(this), {
connection: redis,
concurrency: 1,
});
this.worker.on('completed', this.onJobCompleted.bind(this));
this.worker.on('failed', this.onJobFailed.bind(this));
}
public startScheduler() {
if (!hasEntitlement('permission-syncing')) {
throw new Error('Permission syncing is not supported in current plan.');
}
logger.debug('Starting scheduler');
this.interval = setInterval(async () => {
const thresholdDate = new Date(Date.now() - this.settings.experiment_userDrivenPermissionSyncIntervalMs);
const users = await this.db.user.findMany({
where: {
AND: [
{
accounts: {
some: {
provider: {
in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES
}
}
}
},
{
OR: [
{ permissionSyncedAt: null },
{ permissionSyncedAt: { lt: thresholdDate } },
]
},
{
NOT: {
permissionSyncJobs: {
some: {
OR: [
// Don't schedule if there are active jobs
{
status: {
in: [
UserPermissionSyncJobStatus.PENDING,
UserPermissionSyncJobStatus.IN_PROGRESS,
],
}
},
// Don't schedule if there are recent failed jobs (within the threshold date). Note `gt` is used here since this is a inverse condition.
{
AND: [
{ status: UserPermissionSyncJobStatus.FAILED },
{ completedAt: { gt: thresholdDate } },
]
}
]
}
}
}
},
]
}
});
await this.schedulePermissionSync(users);
}, 1000 * 5);
}
public async dispose() {
if (this.interval) {
clearInterval(this.interval);
}
await this.worker.close();
await this.queue.close();
}
private async schedulePermissionSync(users: User[]) {
await this.db.$transaction(async (tx) => {
const jobs = await tx.userPermissionSyncJob.createManyAndReturn({
data: users.map(user => ({
userId: user.id,
})),
});
await this.queue.addBulk(jobs.map((job) => ({
name: 'userPermissionSyncJob',
data: {
jobId: job.id,
},
opts: {
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
}
})))
});
}
private async runJob(job: Job<UserPermissionSyncJob>) {
const id = job.data.jobId;
const { user } = await this.db.userPermissionSyncJob.update({
where: {
id,
},
data: {
status: UserPermissionSyncJobStatus.IN_PROGRESS,
},
select: {
user: {
include: {
accounts: true,
}
}
}
});
if (!user) {
throw new Error(`User ${id} not found`);
}
logger.info(`Syncing permissions for user ${user.email}...`);
// Get a list of all repos that the user has access to from all connected accounts.
const repoIds = await (async () => {
const aggregatedRepoIds: Set<number> = new Set();
for (const account of user.accounts) {
if (account.provider === 'github') {
if (!account.access_token) {
throw new Error(`User '${user.email}' does not have an GitHub OAuth access token associated with their GitHub account.`);
}
const { octokit } = await createOctokitFromToken({
token: account.access_token,
url: env.AUTH_EE_GITHUB_BASE_URL,
});
// @note: we only care about the private repos since we don't need to build a mapping
// for public repos.
// @see: packages/web/src/prisma.ts
const githubRepos = await getReposForAuthenticatedUser(/* visibility = */ 'private', octokit);
const gitHubRepoIds = githubRepos.map(repo => repo.id.toString());
const repos = await this.db.repo.findMany({
where: {
external_codeHostType: 'github',
external_id: {
in: gitHubRepoIds,
}
}
});
repos.forEach(repo => aggregatedRepoIds.add(repo.id));
}
}
return Array.from(aggregatedRepoIds);
})();
await this.db.$transaction([
this.db.user.update({
where: {
id: user.id,
},
data: {
accessibleRepos: {
deleteMany: {},
}
}
}),
this.db.userToRepoPermission.createMany({
data: repoIds.map(repoId => ({
userId: user.id,
repoId,
})),
skipDuplicates: true,
})
]);
}
private async onJobCompleted(job: Job<UserPermissionSyncJob>) {
const { user } = await this.db.userPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: UserPermissionSyncJobStatus.COMPLETED,
user: {
update: {
permissionSyncedAt: new Date(),
}
},
completedAt: new Date(),
},
select: {
user: true
}
});
logger.info(`Permissions synced for user ${user.email}`);
}
private async onJobFailed(job: Job<UserPermissionSyncJob> | undefined, err: Error) {
Sentry.captureException(err, {
tags: {
jobId: job?.data.jobId,
queue: QUEUE_NAME,
}
});
const errorMessage = (email: string) => `User permission sync job failed for user ${email}: ${err.message}`;
if (job) {
const { user } = await this.db.userPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: UserPermissionSyncJobStatus.FAILED,
completedAt: new Date(),
errorMessage: err.message,
},
select: {
user: true,
}
});
logger.error(errorMessage(user.email ?? user.id));
} else {
logger.error(errorMessage('unknown user (id not found)'));
}
}
}

View file

@ -1,63 +0,0 @@
import { createEnv } from "@t3-oss/env-core";
import { z } from "zod";
import dotenv from 'dotenv';
// Booleans are specified as 'true' or 'false' strings.
const booleanSchema = z.enum(["true", "false"]);
// Numbers are treated as strings in .env files.
// coerce helps us convert them to numbers.
// @see: https://zod.dev/?id=coercion-for-primitives
const numberSchema = z.coerce.number();
dotenv.config({
path: './.env',
});
dotenv.config({
path: './.env.local',
override: true
});
export const env = createEnv({
server: {
SOURCEBOT_ENCRYPTION_KEY: z.string(),
SOURCEBOT_TELEMETRY_DISABLED: booleanSchema.default("false"),
SOURCEBOT_INSTALL_ID: z.string().default("unknown"),
NEXT_PUBLIC_SOURCEBOT_VERSION: z.string().default("unknown"),
DATA_CACHE_DIR: z.string(),
NEXT_PUBLIC_POSTHOG_PAPIK: z.string().optional(),
FALLBACK_GITHUB_CLOUD_TOKEN: z.string().optional(),
FALLBACK_GITLAB_CLOUD_TOKEN: z.string().optional(),
FALLBACK_GITEA_CLOUD_TOKEN: z.string().optional(),
REDIS_URL: z.string().url().default("redis://localhost:6379"),
REDIS_REMOVE_ON_COMPLETE: numberSchema.default(0),
REDIS_REMOVE_ON_FAIL: numberSchema.default(100),
NEXT_PUBLIC_SENTRY_BACKEND_DSN: z.string().optional(),
NEXT_PUBLIC_SENTRY_ENVIRONMENT: z.string().optional(),
LOGTAIL_TOKEN: z.string().optional(),
LOGTAIL_HOST: z.string().url().optional(),
SOURCEBOT_LOG_LEVEL: z.enum(["info", "debug", "warn", "error"]).default("info"),
DEBUG_ENABLE_GROUPMQ_LOGGING: booleanSchema.default('false'),
DATABASE_URL: z.string().url().default("postgresql://postgres:postgres@localhost:5432/postgres"),
CONFIG_PATH: z.string(),
CONNECTION_MANAGER_UPSERT_TIMEOUT_MS: numberSchema.default(300000),
REPO_SYNC_RETRY_BASE_SLEEP_SECONDS: numberSchema.default(60),
GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS: numberSchema.default(60 * 10),
EXPERIMENT_EE_PERMISSION_SYNC_ENABLED: booleanSchema.default('false'),
AUTH_EE_GITHUB_BASE_URL: z.string().optional(),
},
runtimeEnv: process.env,
emptyStringAsUndefined: true,
skipValidation: process.env.SKIP_ENV_VALIDATION === "1",
});

View file

@ -1,11 +1,8 @@
import { GerritConnectionConfig } from "@sourcebot/schemas/v3/index.type";
import { createLogger } from '@sourcebot/shared';
import fetch from 'cross-fetch'; import fetch from 'cross-fetch';
import { GerritConnectionConfig } from "@sourcebot/schemas/v3/index.type"
import { createLogger } from '@sourcebot/logger';
import micromatch from "micromatch"; import micromatch from "micromatch";
import { measure, fetchWithRetry } from './utils.js'; import { fetchWithRetry, measure } from './utils.js';
import { BackendError } from '@sourcebot/error';
import { BackendException } from '@sourcebot/error';
import * as Sentry from "@sentry/node";
// https://gerrit-review.googlesource.com/Documentation/rest-api.html // https://gerrit-review.googlesource.com/Documentation/rest-api.html
interface GerritProjects { interface GerritProjects {
@ -39,26 +36,10 @@ export const getGerritReposFromConfig = async (config: GerritConnectionConfig):
const url = config.url.endsWith('/') ? config.url : `${config.url}/`; const url = config.url.endsWith('/') ? config.url : `${config.url}/`;
let { durationMs, data: projects } = await measure(async () => { let { durationMs, data: projects } = await measure(async () => {
try { const fetchFn = () => fetchAllProjects(url);
const fetchFn = () => fetchAllProjects(url); return fetchWithRetry(fetchFn, `projects from ${url}`, logger);
return fetchWithRetry(fetchFn, `projects from ${url}`, logger);
} catch (err) {
Sentry.captureException(err);
if (err instanceof BackendException) {
throw err;
}
logger.error(`Failed to fetch projects from ${url}`, err);
return null;
}
}); });
if (!projects) {
const e = new Error(`Failed to fetch projects from ${url}`);
Sentry.captureException(e);
throw e;
}
// include repos by glob if specified in config // include repos by glob if specified in config
if (config.projects) { if (config.projects) {
projects = projects.filter((project) => { projects = projects.filter((project) => {
@ -91,27 +72,9 @@ const fetchAllProjects = async (url: string): Promise<GerritProject[]> => {
logger.debug(`Fetching projects from Gerrit at ${endpointWithParams}`); logger.debug(`Fetching projects from Gerrit at ${endpointWithParams}`);
let response: Response; let response: Response;
try { response = await fetch(endpointWithParams);
response = await fetch(endpointWithParams); if (!response.ok) {
if (!response.ok) { throw new Error(`Failed to fetch projects from Gerrit at ${endpointWithParams} with status ${response.status}`);
logger.error(`Failed to fetch projects from Gerrit at ${endpointWithParams} with status ${response.status}`);
const e = new BackendException(BackendError.CONNECTION_SYNC_FAILED_TO_FETCH_GERRIT_PROJECTS, {
status: response.status,
});
Sentry.captureException(e);
throw e;
}
} catch (err) {
Sentry.captureException(err);
if (err instanceof BackendException) {
throw err;
}
const status = (err as any).code;
logger.error(`Failed to fetch projects from Gerrit at ${endpointWithParams} with status ${status}`);
throw new BackendException(BackendError.CONNECTION_SYNC_FAILED_TO_FETCH_GERRIT_PROJECTS, {
status: status,
});
} }
const text = await response.text(); const text = await response.text();
@ -151,11 +114,11 @@ const shouldExcludeProject = ({
const shouldExclude = (() => { const shouldExclude = (() => {
if ([ if ([
'All-Projects', 'All-Projects',
'All-Users', 'All-Users',
'All-Avatars', 'All-Avatars',
'All-Archived-Projects' 'All-Archived-Projects'
].includes(project.name)) { ].includes(project.name)) {
reason = `Project is a special project.`; reason = `Project is a special project.`;
return true; return true;
} }

View file

@ -1,8 +1,8 @@
import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git'; import { env } from "@sourcebot/shared";
import { mkdir } from 'node:fs/promises';
import { env } from './env.js';
import { dirname, resolve } from 'node:path';
import { existsSync } from 'node:fs'; import { existsSync } from 'node:fs';
import { mkdir } from 'node:fs/promises';
import { dirname, resolve } from 'node:path';
import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git';
type onProgressFn = (event: SimpleGitProgressEvent) => void; type onProgressFn = (event: SimpleGitProgressEvent) => void;

View file

@ -1,25 +1,24 @@
import { Api, giteaApi, HttpResponse, Repository as GiteaRepository } from 'gitea-js';
import { GiteaConnectionConfig } from '@sourcebot/schemas/v3/gitea.type';
import { measure } from './utils.js';
import fetch from 'cross-fetch';
import { createLogger } from '@sourcebot/logger';
import micromatch from 'micromatch';
import { PrismaClient } from '@sourcebot/db';
import { processPromiseResults, throwIfAnyFailed } from './connectionUtils.js';
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { env } from './env.js'; import { getTokenFromConfig } from "@sourcebot/shared";
import { getTokenFromConfig } from "@sourcebot/crypto"; import { createLogger } from '@sourcebot/shared';
import { GiteaConnectionConfig } from '@sourcebot/schemas/v3/gitea.type';
import { env } from "@sourcebot/shared";
import fetch from 'cross-fetch';
import { Api, giteaApi, Repository as GiteaRepository, HttpResponse } from 'gitea-js';
import micromatch from 'micromatch';
import { processPromiseResults, throwIfAnyFailed } from './connectionUtils.js';
import { measure } from './utils.js';
const logger = createLogger('gitea'); const logger = createLogger('gitea');
const GITEA_CLOUD_HOSTNAME = "gitea.com"; const GITEA_CLOUD_HOSTNAME = "gitea.com";
export const getGiteaReposFromConfig = async (config: GiteaConnectionConfig, orgId: number, db: PrismaClient) => { export const getGiteaReposFromConfig = async (config: GiteaConnectionConfig) => {
const hostname = config.url ? const hostname = config.url ?
new URL(config.url).hostname : new URL(config.url).hostname :
GITEA_CLOUD_HOSTNAME; GITEA_CLOUD_HOSTNAME;
const token = config.token ? const token = config.token ?
await getTokenFromConfig(config.token, orgId, db) : await getTokenFromConfig(config.token) :
hostname === GITEA_CLOUD_HOSTNAME ? hostname === GITEA_CLOUD_HOSTNAME ?
env.FALLBACK_GITEA_CLOUD_TOKEN : env.FALLBACK_GITEA_CLOUD_TOKEN :
undefined; undefined;
@ -53,7 +52,7 @@ export const getGiteaReposFromConfig = async (config: GiteaConnectionConfig, org
allRepos = allRepos.filter(repo => repo.full_name !== undefined); allRepos = allRepos.filter(repo => repo.full_name !== undefined);
allRepos = allRepos.filter(repo => { allRepos = allRepos.filter(repo => {
if (repo.full_name === undefined) { if (repo.full_name === undefined) {
logger.warn(`Repository with undefined full_name found: orgId=${orgId}, repoId=${repo.id}`); logger.warn(`Repository with undefined full_name found: repoId=${repo.id}`);
return false; return false;
} }
return true; return true;

View file

@ -1,17 +1,20 @@
import { Octokit } from "@octokit/rest"; import { Octokit } from "@octokit/rest";
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { PrismaClient } from "@sourcebot/db"; import { getTokenFromConfig } from "@sourcebot/shared";
import { createLogger } from "@sourcebot/logger"; import { createLogger } from "@sourcebot/shared";
import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type"; import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type";
import { hasEntitlement } from "@sourcebot/shared"; import { env, hasEntitlement } from "@sourcebot/shared";
import micromatch from "micromatch"; import micromatch from "micromatch";
import pLimit from "p-limit";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js"; import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import { GithubAppManager } from "./ee/githubAppManager.js"; import { GithubAppManager } from "./ee/githubAppManager.js";
import { env } from "./env.js";
import { fetchWithRetry, measure } from "./utils.js"; import { fetchWithRetry, measure } from "./utils.js";
import { getTokenFromConfig } from "@sourcebot/crypto";
export const GITHUB_CLOUD_HOSTNAME = "github.com"; export const GITHUB_CLOUD_HOSTNAME = "github.com";
// Limit concurrent GitHub requests to avoid hitting rate limits and overwhelming installations.
const MAX_CONCURRENT_GITHUB_QUERIES = 5;
const githubQueryLimit = pLimit(MAX_CONCURRENT_GITHUB_QUERIES);
const logger = createLogger('github'); const logger = createLogger('github');
export type OctokitRepository = { export type OctokitRepository = {
@ -92,13 +95,13 @@ const getOctokitWithGithubApp = async (
} }
} }
export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient, signal: AbortSignal): Promise<{ repos: OctokitRepository[], warnings: string[] }> => { export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, signal: AbortSignal): Promise<{ repos: OctokitRepository[], warnings: string[] }> => {
const hostname = config.url ? const hostname = config.url ?
new URL(config.url).hostname : new URL(config.url).hostname :
GITHUB_CLOUD_HOSTNAME; GITHUB_CLOUD_HOSTNAME;
const token = config.token ? const token = config.token ?
await getTokenFromConfig(config.token, orgId, db) : await getTokenFromConfig(config.token) :
hostname === GITHUB_CLOUD_HOSTNAME ? hostname === GITHUB_CLOUD_HOSTNAME ?
env.FALLBACK_GITHUB_CLOUD_TOKEN : env.FALLBACK_GITHUB_CLOUD_TOKEN :
undefined; undefined;
@ -195,7 +198,7 @@ export const getReposForAuthenticatedUser = async (visibility: 'all' | 'private'
} }
const getReposOwnedByUsers = async (users: string[], octokit: Octokit, signal: AbortSignal, url?: string) => { const getReposOwnedByUsers = async (users: string[], octokit: Octokit, signal: AbortSignal, url?: string) => {
const results = await Promise.allSettled(users.map(async (user) => { const results = await Promise.allSettled(users.map((user) => githubQueryLimit(async () => {
try { try {
logger.debug(`Fetching repository info for user ${user}...`); logger.debug(`Fetching repository info for user ${user}...`);
@ -244,7 +247,7 @@ const getReposOwnedByUsers = async (users: string[], octokit: Octokit, signal: A
} }
throw error; throw error;
} }
})); })));
throwIfAnyFailed(results); throwIfAnyFailed(results);
const { validItems: repos, warnings } = processPromiseResults<OctokitRepository>(results); const { validItems: repos, warnings } = processPromiseResults<OctokitRepository>(results);
@ -256,7 +259,7 @@ const getReposOwnedByUsers = async (users: string[], octokit: Octokit, signal: A
} }
const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSignal, url?: string) => { const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSignal, url?: string) => {
const results = await Promise.allSettled(orgs.map(async (org) => { const results = await Promise.allSettled(orgs.map((org) => githubQueryLimit(async () => {
try { try {
logger.debug(`Fetching repository info for org ${org}...`); logger.debug(`Fetching repository info for org ${org}...`);
@ -292,7 +295,7 @@ const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSi
} }
throw error; throw error;
} }
})); })));
throwIfAnyFailed(results); throwIfAnyFailed(results);
const { validItems: repos, warnings } = processPromiseResults<OctokitRepository>(results); const { validItems: repos, warnings } = processPromiseResults<OctokitRepository>(results);
@ -304,7 +307,7 @@ const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSi
} }
const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSignal, url?: string) => { const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSignal, url?: string) => {
const results = await Promise.allSettled(repoList.map(async (repo) => { const results = await Promise.allSettled(repoList.map((repo) => githubQueryLimit(async () => {
try { try {
const [owner, repoName] = repo.split('/'); const [owner, repoName] = repo.split('/');
logger.debug(`Fetching repository info for ${repo}...`); logger.debug(`Fetching repository info for ${repo}...`);
@ -342,7 +345,7 @@ const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSigna
} }
throw error; throw error;
} }
})); })));
throwIfAnyFailed(results); throwIfAnyFailed(results);
const { validItems: repos, warnings } = processPromiseResults<OctokitRepository>(results); const { validItems: repos, warnings } = processPromiseResults<OctokitRepository>(results);

View file

@ -1,36 +1,52 @@
import { Gitlab, ProjectSchema } from "@gitbeaker/rest"; import { Gitlab, ProjectSchema } from "@gitbeaker/rest";
import micromatch from "micromatch";
import { createLogger } from "@sourcebot/logger";
import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type"
import { measure, fetchWithRetry } from "./utils.js";
import { PrismaClient } from "@sourcebot/db";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { env } from "./env.js"; import { getTokenFromConfig } from "@sourcebot/shared";
import { getTokenFromConfig } from "@sourcebot/crypto"; import { createLogger } from "@sourcebot/shared";
import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type";
import { env } from "@sourcebot/shared";
import micromatch from "micromatch";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import { fetchWithRetry, measure } from "./utils.js";
const logger = createLogger('gitlab'); const logger = createLogger('gitlab');
export const GITLAB_CLOUD_HOSTNAME = "gitlab.com"; export const GITLAB_CLOUD_HOSTNAME = "gitlab.com";
export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, orgId: number, db: PrismaClient) => { export const createGitLabFromPersonalAccessToken = async ({ token, url }: { token?: string, url?: string }) => {
const isGitLabCloud = url ? new URL(url).hostname === GITLAB_CLOUD_HOSTNAME : false;
return new Gitlab({
token,
...(isGitLabCloud ? {} : {
host: url,
}),
queryTimeout: env.GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS * 1000,
});
}
export const createGitLabFromOAuthToken = async ({ oauthToken, url }: { oauthToken?: string, url?: string }) => {
const isGitLabCloud = url ? new URL(url).hostname === GITLAB_CLOUD_HOSTNAME : false;
return new Gitlab({
oauthToken,
...(isGitLabCloud ? {} : {
host: url,
}),
queryTimeout: env.GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS * 1000,
});
}
export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig) => {
const hostname = config.url ? const hostname = config.url ?
new URL(config.url).hostname : new URL(config.url).hostname :
GITLAB_CLOUD_HOSTNAME; GITLAB_CLOUD_HOSTNAME;
const token = config.token ? const token = config.token ?
await getTokenFromConfig(config.token, orgId, db) : await getTokenFromConfig(config.token) :
hostname === GITLAB_CLOUD_HOSTNAME ? hostname === GITLAB_CLOUD_HOSTNAME ?
env.FALLBACK_GITLAB_CLOUD_TOKEN : env.FALLBACK_GITLAB_CLOUD_TOKEN :
undefined; undefined;
const api = new Gitlab({ const api = await createGitLabFromPersonalAccessToken({
...(token ? { token,
token, url: config.url,
} : {}),
...(config.url ? {
host: config.url,
} : {}),
queryTimeout: env.GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS * 1000,
}); });
let allRepos: ProjectSchema[] = []; let allRepos: ProjectSchema[] = [];
@ -261,4 +277,38 @@ export const shouldExcludeProject = ({
} }
return false; return false;
}
export const getProjectMembers = async (projectId: string, api: InstanceType<typeof Gitlab>) => {
try {
const fetchFn = () => api.ProjectMembers.all(projectId, {
perPage: 100,
includeInherited: true,
});
const members = await fetchWithRetry(fetchFn, `project ${projectId}`, logger);
return members as Array<{ id: number }>;
} catch (error) {
Sentry.captureException(error);
logger.error(`Failed to fetch members for project ${projectId}.`, error);
throw error;
}
}
export const getProjectsForAuthenticatedUser = async (visibility: 'private' | 'internal' | 'public' | 'all' = 'all', api: InstanceType<typeof Gitlab>) => {
try {
const fetchFn = () => api.Projects.all({
membership: true,
...(visibility !== 'all' ? {
visibility,
} : {}),
perPage: 100,
});
const response = await fetchWithRetry(fetchFn, `authenticated user`, logger);
return response;
} catch (error) {
Sentry.captureException(error);
logger.error(`Failed to fetch projects for authenticated user.`, error);
throw error;
}
} }

View file

@ -1,18 +1,20 @@
import "./instrument.js"; import "./instrument.js";
import * as Sentry from "@sentry/node";
import { PrismaClient } from "@sourcebot/db"; import { PrismaClient } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger"; import { createLogger, env, getConfigSettings, getDBConnectionString, hasEntitlement } from "@sourcebot/shared";
import { getConfigSettings, hasEntitlement } from '@sourcebot/shared'; import 'express-async-errors';
import { existsSync } from 'fs'; import { existsSync } from 'fs';
import { mkdir } from 'fs/promises'; import { mkdir } from 'fs/promises';
import { Redis } from 'ioredis'; import { Redis } from 'ioredis';
import { Api } from "./api.js";
import { ConfigManager } from "./configManager.js"; import { ConfigManager } from "./configManager.js";
import { ConnectionManager } from './connectionManager.js'; import { ConnectionManager } from './connectionManager.js';
import { INDEX_CACHE_DIR, REPOS_CACHE_DIR } from './constants.js'; import { INDEX_CACHE_DIR, REPOS_CACHE_DIR, SHUTDOWN_SIGNALS } from './constants.js';
import { AccountPermissionSyncer } from "./ee/accountPermissionSyncer.js";
import { GithubAppManager } from "./ee/githubAppManager.js"; import { GithubAppManager } from "./ee/githubAppManager.js";
import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js'; import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js';
import { UserPermissionSyncer } from "./ee/userPermissionSyncer.js"; import { shutdownPosthog } from "./posthog.js";
import { env } from "./env.js";
import { PromClient } from './promClient.js'; import { PromClient } from './promClient.js';
import { RepoIndexManager } from "./repoIndexManager.js"; import { RepoIndexManager } from "./repoIndexManager.js";
@ -29,18 +31,25 @@ if (!existsSync(indexPath)) {
await mkdir(indexPath, { recursive: true }); await mkdir(indexPath, { recursive: true });
} }
const prisma = new PrismaClient(); const prisma = new PrismaClient({
datasources: {
db: {
url: getDBConnectionString(),
},
},
});
const redis = new Redis(env.REDIS_URL, { const redis = new Redis(env.REDIS_URL, {
maxRetriesPerRequest: null maxRetriesPerRequest: null
}); });
redis.ping().then(() => {
try {
await redis.ping();
logger.info('Connected to redis'); logger.info('Connected to redis');
}).catch((err: unknown) => { } catch (err: unknown) {
logger.error('Failed to connect to redis'); logger.error('Failed to connect to redis. Error:', err);
logger.error(err);
process.exit(1); process.exit(1);
}); }
const promClient = new PromClient(); const promClient = new PromClient();
@ -52,7 +61,7 @@ if (hasEntitlement('github-app')) {
const connectionManager = new ConnectionManager(prisma, settings, redis, promClient); const connectionManager = new ConnectionManager(prisma, settings, redis, promClient);
const repoPermissionSyncer = new RepoPermissionSyncer(prisma, settings, redis); const repoPermissionSyncer = new RepoPermissionSyncer(prisma, settings, redis);
const userPermissionSyncer = new UserPermissionSyncer(prisma, settings, redis); const accountPermissionSyncer = new AccountPermissionSyncer(prisma, settings, redis);
const repoIndexManager = new RepoIndexManager(prisma, settings, redis, promClient); const repoIndexManager = new RepoIndexManager(prisma, settings, redis, promClient);
const configManager = new ConfigManager(prisma, connectionManager, env.CONFIG_PATH); const configManager = new ConfigManager(prisma, connectionManager, env.CONFIG_PATH);
@ -65,49 +74,77 @@ if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && !hasEntitlement('per
} }
else if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && hasEntitlement('permission-syncing')) { else if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && hasEntitlement('permission-syncing')) {
repoPermissionSyncer.startScheduler(); repoPermissionSyncer.startScheduler();
userPermissionSyncer.startScheduler(); accountPermissionSyncer.startScheduler();
} }
const api = new Api(
promClient,
prisma,
connectionManager,
repoIndexManager,
);
logger.info('Worker started.'); logger.info('Worker started.');
const cleanup = async (signal: string) => { const listenToShutdownSignals = () => {
logger.info(`Received ${signal}, cleaning up...`); const signals = SHUTDOWN_SIGNALS;
const shutdownTimeout = 30000; // 30 seconds let receivedSignal = false;
try { const cleanup = async (signal: string) => {
await Promise.race([ try {
Promise.all([ if (receivedSignal) {
repoIndexManager.dispose(), return;
connectionManager.dispose(), }
repoPermissionSyncer.dispose(), receivedSignal = true;
userPermissionSyncer.dispose(),
promClient.dispose(), logger.info(`Received ${signal}, cleaning up...`);
configManager.dispose(),
]), await repoIndexManager.dispose()
new Promise((_, reject) => await connectionManager.dispose()
setTimeout(() => reject(new Error('Shutdown timeout')), shutdownTimeout) await repoPermissionSyncer.dispose()
) await accountPermissionSyncer.dispose()
]); await configManager.dispose()
logger.info('All workers shut down gracefully');
} catch (error) { await prisma.$disconnect();
logger.warn('Shutdown timeout or error, forcing exit:', error instanceof Error ? error.message : String(error)); await redis.quit();
await api.dispose();
await shutdownPosthog();
logger.info('All workers shut down gracefully');
signals.forEach(sig => process.removeListener(sig, cleanup));
return 0;
} catch (error) {
Sentry.captureException(error);
logger.error('Error shutting down worker:', error);
return 1;
}
} }
await prisma.$disconnect(); signals.forEach(signal => {
await redis.quit(); process.on(signal, (err) => {
cleanup(err).then(code => {
process.exit(code);
});
});
});
// Register handlers for uncaught exceptions and unhandled rejections
process.on('uncaughtException', (err) => {
logger.error(`Uncaught exception: ${err.message}`);
cleanup('uncaughtException').then(() => {
process.exit(1);
});
});
process.on('unhandledRejection', (reason, promise) => {
logger.error(`Unhandled rejection at: ${promise}, reason: ${reason}`);
cleanup('unhandledRejection').then(() => {
process.exit(1);
});
});
} }
process.on('SIGINT', () => cleanup('SIGINT').finally(() => process.exit(0))); listenToShutdownSignals();
process.on('SIGTERM', () => cleanup('SIGTERM').finally(() => process.exit(0)));
// Register handlers for uncaught exceptions and unhandled rejections
process.on('uncaughtException', (err) => {
logger.error(`Uncaught exception: ${err.message}`);
cleanup('uncaughtException').finally(() => process.exit(1));
});
process.on('unhandledRejection', (reason, promise) => {
logger.error(`Unhandled rejection at: ${promise}, reason: ${reason}`);
cleanup('unhandledRejection').finally(() => process.exit(1));
});

View file

@ -1,6 +1,6 @@
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { env } from "./env.js"; import { createLogger } from "@sourcebot/shared";
import { createLogger } from "@sourcebot/logger"; import { env } from "@sourcebot/shared/client";
const logger = createLogger('instrument'); const logger = createLogger('instrument');

View file

@ -1,12 +1,13 @@
import { env as clientEnv } from "@sourcebot/shared/client";
import { env } from "@sourcebot/shared";
import { PostHog } from 'posthog-node'; import { PostHog } from 'posthog-node';
import { PosthogEvent, PosthogEventMap } from './posthogEvents.js'; import { PosthogEvent, PosthogEventMap } from './posthogEvents.js';
import { env } from './env.js';
let posthog: PostHog | undefined = undefined; let posthog: PostHog | undefined = undefined;
if (env.NEXT_PUBLIC_POSTHOG_PAPIK) { if (env.POSTHOG_PAPIK) {
posthog = new PostHog( posthog = new PostHog(
env.NEXT_PUBLIC_POSTHOG_PAPIK, env.POSTHOG_PAPIK,
{ {
host: "https://us.i.posthog.com", host: "https://us.i.posthog.com",
} }
@ -23,9 +24,11 @@ export function captureEvent<E extends PosthogEvent>(event: E, properties: Posth
event: event, event: event,
properties: { properties: {
...properties, ...properties,
sourcebot_version: env.NEXT_PUBLIC_SOURCEBOT_VERSION, sourcebot_version: clientEnv.NEXT_PUBLIC_SOURCEBOT_VERSION,
}, },
}); });
} }
await posthog?.shutdown(); export async function shutdownPosthog() {
await posthog?.shutdown();
}

View file

@ -1,14 +1,6 @@
import express, { Request, Response } from 'express';
import { Server } from 'http';
import client, { Registry, Counter, Gauge } from 'prom-client'; import client, { Registry, Counter, Gauge } from 'prom-client';
import { createLogger } from "@sourcebot/logger";
const logger = createLogger('prometheus-client');
export class PromClient { export class PromClient {
private registry: Registry; public registry: Registry;
private app: express.Application;
private server: Server;
public activeRepoIndexJobs: Gauge<string>; public activeRepoIndexJobs: Gauge<string>;
public pendingRepoIndexJobs: Gauge<string>; public pendingRepoIndexJobs: Gauge<string>;
@ -22,8 +14,6 @@ export class PromClient {
public connectionSyncJobFailTotal: Counter<string>; public connectionSyncJobFailTotal: Counter<string>;
public connectionSyncJobSuccessTotal: Counter<string>; public connectionSyncJobSuccessTotal: Counter<string>;
public readonly PORT = 3060;
constructor() { constructor() {
this.registry = new Registry(); this.registry = new Registry();
@ -100,26 +90,5 @@ export class PromClient {
client.collectDefaultMetrics({ client.collectDefaultMetrics({
register: this.registry, register: this.registry,
}); });
this.app = express();
this.app.get('/metrics', async (req: Request, res: Response) => {
res.set('Content-Type', this.registry.contentType);
const metrics = await this.registry.metrics();
res.end(metrics);
});
this.server = this.app.listen(this.PORT, () => {
logger.info(`Prometheus metrics server is running on port ${this.PORT}`);
});
}
async dispose() {
return new Promise<void>((resolve, reject) => {
this.server.close((err) => {
if (err) reject(err);
else resolve();
});
});
} }
} }

View file

@ -7,10 +7,10 @@ import { BitbucketRepository, getBitbucketReposFromConfig } from "./bitbucket.js
import { getAzureDevOpsReposFromConfig } from "./azuredevops.js"; import { getAzureDevOpsReposFromConfig } from "./azuredevops.js";
import { SchemaRestRepository as BitbucketServerRepository } from "@coderabbitai/bitbucket/server/openapi"; import { SchemaRestRepository as BitbucketServerRepository } from "@coderabbitai/bitbucket/server/openapi";
import { SchemaRepository as BitbucketCloudRepository } from "@coderabbitai/bitbucket/cloud/openapi"; import { SchemaRepository as BitbucketCloudRepository } from "@coderabbitai/bitbucket/cloud/openapi";
import { Prisma, PrismaClient } from '@sourcebot/db'; import { CodeHostType, Prisma } from '@sourcebot/db';
import { WithRequired } from "./types.js" import { WithRequired } from "./types.js"
import { marshalBool } from "./utils.js"; import { marshalBool } from "./utils.js";
import { createLogger } from '@sourcebot/logger'; import { createLogger } from '@sourcebot/shared';
import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type'; import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js"; import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js";
import path from 'path'; import path from 'path';
@ -19,11 +19,18 @@ import { getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git
import assert from 'assert'; import assert from 'assert';
import GitUrlParse from 'git-url-parse'; import GitUrlParse from 'git-url-parse';
import { RepoMetadata } from '@sourcebot/shared'; import { RepoMetadata } from '@sourcebot/shared';
import { SINGLE_TENANT_ORG_ID } from './constants.js';
import pLimit from 'p-limit';
export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>; export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;
const logger = createLogger('repo-compile-utils'); const logger = createLogger('repo-compile-utils');
// Limit concurrent git operations to prevent resource exhaustion (EAGAIN errors)
// when processing thousands of repositories simultaneously
const MAX_CONCURRENT_GIT_OPERATIONS = 100;
const gitOperationLimit = pLimit(MAX_CONCURRENT_GIT_OPERATIONS);
type CompileResult = { type CompileResult = {
repoData: RepoData[], repoData: RepoData[],
warnings: string[], warnings: string[],
@ -32,10 +39,8 @@ type CompileResult = {
export const compileGithubConfig = async ( export const compileGithubConfig = async (
config: GithubConnectionConfig, config: GithubConnectionConfig,
connectionId: number, connectionId: number,
orgId: number, signal: AbortSignal): Promise<CompileResult> => {
db: PrismaClient, const gitHubReposResult = await getGitHubReposFromConfig(config, signal);
abortController: AbortController): Promise<CompileResult> => {
const gitHubReposResult = await getGitHubReposFromConfig(config, orgId, db, abortController.signal);
const gitHubRepos = gitHubReposResult.repos; const gitHubRepos = gitHubReposResult.repos;
const warnings = gitHubReposResult.warnings; const warnings = gitHubReposResult.warnings;
@ -66,7 +71,7 @@ export const compileGithubConfig = async (
isPublic: isPublic, isPublic: isPublic,
org: { org: {
connect: { connect: {
id: orgId, id: SINGLE_TENANT_ORG_ID,
}, },
}, },
connections: { connections: {
@ -104,11 +109,9 @@ export const compileGithubConfig = async (
export const compileGitlabConfig = async ( export const compileGitlabConfig = async (
config: GitlabConnectionConfig, config: GitlabConnectionConfig,
connectionId: number, connectionId: number): Promise<CompileResult> => {
orgId: number,
db: PrismaClient): Promise<CompileResult> => {
const gitlabReposResult = await getGitLabReposFromConfig(config, orgId, db); const gitlabReposResult = await getGitLabReposFromConfig(config);
const gitlabRepos = gitlabReposResult.repos; const gitlabRepos = gitlabReposResult.repos;
const warnings = gitlabReposResult.warnings; const warnings = gitlabReposResult.warnings;
@ -121,7 +124,6 @@ export const compileGitlabConfig = async (
const projectUrl = `${hostUrl}/${project.path_with_namespace}`; const projectUrl = `${hostUrl}/${project.path_with_namespace}`;
const cloneUrl = new URL(project.http_url_to_repo); const cloneUrl = new URL(project.http_url_to_repo);
const isFork = project.forked_from_project !== undefined; const isFork = project.forked_from_project !== undefined;
// @todo: we will need to double check whether 'internal' should also be considered public or not.
const isPublic = project.visibility === 'public'; const isPublic = project.visibility === 'public';
const repoDisplayName = project.path_with_namespace; const repoDisplayName = project.path_with_namespace;
const repoName = path.join(repoNameRoot, repoDisplayName); const repoName = path.join(repoNameRoot, repoDisplayName);
@ -145,7 +147,7 @@ export const compileGitlabConfig = async (
isArchived: !!project.archived, isArchived: !!project.archived,
org: { org: {
connect: { connect: {
id: orgId, id: SINGLE_TENANT_ORG_ID,
}, },
}, },
connections: { connections: {
@ -181,11 +183,9 @@ export const compileGitlabConfig = async (
export const compileGiteaConfig = async ( export const compileGiteaConfig = async (
config: GiteaConnectionConfig, config: GiteaConnectionConfig,
connectionId: number, connectionId: number): Promise<CompileResult> => {
orgId: number,
db: PrismaClient): Promise<CompileResult> => {
const giteaReposResult = await getGiteaReposFromConfig(config, orgId, db); const giteaReposResult = await getGiteaReposFromConfig(config);
const giteaRepos = giteaReposResult.repos; const giteaRepos = giteaReposResult.repos;
const warnings = giteaReposResult.warnings; const warnings = giteaReposResult.warnings;
@ -218,7 +218,7 @@ export const compileGiteaConfig = async (
isArchived: !!repo.archived, isArchived: !!repo.archived,
org: { org: {
connect: { connect: {
id: orgId, id: SINGLE_TENANT_ORG_ID,
}, },
}, },
connections: { connections: {
@ -252,8 +252,7 @@ export const compileGiteaConfig = async (
export const compileGerritConfig = async ( export const compileGerritConfig = async (
config: GerritConnectionConfig, config: GerritConnectionConfig,
connectionId: number, connectionId: number): Promise<CompileResult> => {
orgId: number): Promise<CompileResult> => {
const gerritRepos = await getGerritReposFromConfig(config); const gerritRepos = await getGerritReposFromConfig(config);
const hostUrl = config.url; const hostUrl = config.url;
@ -299,7 +298,7 @@ export const compileGerritConfig = async (
isArchived: false, isArchived: false,
org: { org: {
connect: { connect: {
id: orgId, id: SINGLE_TENANT_ORG_ID,
}, },
}, },
connections: { connections: {
@ -333,11 +332,9 @@ export const compileGerritConfig = async (
export const compileBitbucketConfig = async ( export const compileBitbucketConfig = async (
config: BitbucketConnectionConfig, config: BitbucketConnectionConfig,
connectionId: number, connectionId: number): Promise<CompileResult> => {
orgId: number,
db: PrismaClient): Promise<CompileResult> => {
const bitbucketReposResult = await getBitbucketReposFromConfig(config, orgId, db); const bitbucketReposResult = await getBitbucketReposFromConfig(config);
const bitbucketRepos = bitbucketReposResult.repos; const bitbucketRepos = bitbucketReposResult.repos;
const warnings = bitbucketReposResult.warnings; const warnings = bitbucketReposResult.warnings;
@ -393,7 +390,7 @@ export const compileBitbucketConfig = async (
const repos = bitbucketRepos.map((repo) => { const repos = bitbucketRepos.map((repo) => {
const isServer = config.deploymentType === 'server'; const isServer = config.deploymentType === 'server';
const codeHostType = isServer ? 'bitbucket-server' : 'bitbucket-cloud'; // zoekt expects bitbucket-server const codeHostType: CodeHostType = isServer ? 'bitbucketServer' : 'bitbucketCloud';
const displayName = isServer ? (repo as BitbucketServerRepository).name! : (repo as BitbucketCloudRepository).full_name!; const displayName = isServer ? (repo as BitbucketServerRepository).name! : (repo as BitbucketCloudRepository).full_name!;
const externalId = isServer ? (repo as BitbucketServerRepository).id!.toString() : (repo as BitbucketCloudRepository).uuid!; const externalId = isServer ? (repo as BitbucketServerRepository).id!.toString() : (repo as BitbucketCloudRepository).uuid!;
const isPublic = isServer ? (repo as BitbucketServerRepository).public : (repo as BitbucketCloudRepository).is_private === false; const isPublic = isServer ? (repo as BitbucketServerRepository).public : (repo as BitbucketCloudRepository).is_private === false;
@ -416,7 +413,7 @@ export const compileBitbucketConfig = async (
isArchived: isArchived, isArchived: isArchived,
org: { org: {
connect: { connect: {
id: orgId, id: SINGLE_TENANT_ORG_ID,
}, },
}, },
connections: { connections: {
@ -426,7 +423,8 @@ export const compileBitbucketConfig = async (
}, },
metadata: { metadata: {
gitConfig: { gitConfig: {
'zoekt.web-url-type': codeHostType, // zoekt expects bitbucket-server and bitbucket-cloud
'zoekt.web-url-type': codeHostType === 'bitbucketServer' ? 'bitbucket-server' : 'bitbucket-cloud',
'zoekt.web-url': webUrl, 'zoekt.web-url': webUrl,
'zoekt.name': repoName, 'zoekt.name': repoName,
'zoekt.archived': marshalBool(isArchived), 'zoekt.archived': marshalBool(isArchived),
@ -450,15 +448,14 @@ export const compileBitbucketConfig = async (
export const compileGenericGitHostConfig = async ( export const compileGenericGitHostConfig = async (
config: GenericGitHostConnectionConfig, config: GenericGitHostConnectionConfig,
connectionId: number, connectionId: number
orgId: number,
): Promise<CompileResult> => { ): Promise<CompileResult> => {
const configUrl = new URL(config.url); const configUrl = new URL(config.url);
if (configUrl.protocol === 'file:') { if (configUrl.protocol === 'file:') {
return compileGenericGitHostConfig_file(config, orgId, connectionId); return compileGenericGitHostConfig_file(config, connectionId);
} }
else if (configUrl.protocol === 'http:' || configUrl.protocol === 'https:') { else if (configUrl.protocol === 'http:' || configUrl.protocol === 'https:') {
return compileGenericGitHostConfig_url(config, orgId, connectionId); return compileGenericGitHostConfig_url(config, connectionId);
} }
else { else {
// Schema should prevent this, but throw an error just in case. // Schema should prevent this, but throw an error just in case.
@ -468,7 +465,6 @@ export const compileGenericGitHostConfig = async (
export const compileGenericGitHostConfig_file = async ( export const compileGenericGitHostConfig_file = async (
config: GenericGitHostConnectionConfig, config: GenericGitHostConnectionConfig,
orgId: number,
connectionId: number, connectionId: number,
): Promise<CompileResult> => { ): Promise<CompileResult> => {
const configUrl = new URL(config.url); const configUrl = new URL(config.url);
@ -481,8 +477,8 @@ export const compileGenericGitHostConfig_file = async (
const repos: RepoData[] = []; const repos: RepoData[] = [];
const warnings: string[] = []; const warnings: string[] = [];
await Promise.all(repoPaths.map(async (repoPath) => { await Promise.all(repoPaths.map((repoPath) => gitOperationLimit(async () => {
const isGitRepo = await isPathAValidGitRepoRoot({ const isGitRepo = await isPathAValidGitRepoRoot({
path: repoPath, path: repoPath,
}); });
@ -508,7 +504,7 @@ export const compileGenericGitHostConfig_file = async (
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, '')); const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
const repo: RepoData = { const repo: RepoData = {
external_codeHostType: 'generic-git-host', external_codeHostType: 'genericGitHost',
external_codeHostUrl: remoteUrl.resource, external_codeHostUrl: remoteUrl.resource,
external_id: remoteUrl.toString(), external_id: remoteUrl.toString(),
cloneUrl: `file://${repoPath}`, cloneUrl: `file://${repoPath}`,
@ -518,7 +514,7 @@ export const compileGenericGitHostConfig_file = async (
isArchived: false, isArchived: false,
org: { org: {
connect: { connect: {
id: orgId, id: SINGLE_TENANT_ORG_ID,
}, },
}, },
connections: { connections: {
@ -536,7 +532,7 @@ export const compileGenericGitHostConfig_file = async (
} }
repos.push(repo); repos.push(repo);
})); })));
return { return {
repoData: repos, repoData: repos,
@ -547,7 +543,6 @@ export const compileGenericGitHostConfig_file = async (
export const compileGenericGitHostConfig_url = async ( export const compileGenericGitHostConfig_url = async (
config: GenericGitHostConnectionConfig, config: GenericGitHostConnectionConfig,
orgId: number,
connectionId: number, connectionId: number,
): Promise<CompileResult> => { ): Promise<CompileResult> => {
const remoteUrl = new URL(config.url); const remoteUrl = new URL(config.url);
@ -572,7 +567,7 @@ export const compileGenericGitHostConfig_url = async (
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, '')); const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
const repo: RepoData = { const repo: RepoData = {
external_codeHostType: 'generic-git-host', external_codeHostType: 'genericGitHost',
external_codeHostUrl: remoteUrl.origin, external_codeHostUrl: remoteUrl.origin,
external_id: remoteUrl.toString(), external_id: remoteUrl.toString(),
cloneUrl: remoteUrl.toString(), cloneUrl: remoteUrl.toString(),
@ -582,7 +577,7 @@ export const compileGenericGitHostConfig_url = async (
isArchived: false, isArchived: false,
org: { org: {
connect: { connect: {
id: orgId, id: SINGLE_TENANT_ORG_ID,
}, },
}, },
connections: { connections: {
@ -604,11 +599,9 @@ export const compileGenericGitHostConfig_url = async (
export const compileAzureDevOpsConfig = async ( export const compileAzureDevOpsConfig = async (
config: AzureDevOpsConnectionConfig, config: AzureDevOpsConnectionConfig,
connectionId: number, connectionId: number): Promise<CompileResult> => {
orgId: number,
db: PrismaClient): Promise<CompileResult> => {
const azureDevOpsReposResult = await getAzureDevOpsReposFromConfig(config, orgId, db); const azureDevOpsReposResult = await getAzureDevOpsReposFromConfig(config);
const azureDevOpsRepos = azureDevOpsReposResult.repos; const azureDevOpsRepos = azureDevOpsReposResult.repos;
const warnings = azureDevOpsReposResult.warnings; const warnings = azureDevOpsReposResult.warnings;
@ -621,18 +614,18 @@ export const compileAzureDevOpsConfig = async (
if (!repo.project) { if (!repo.project) {
throw new Error(`No project found for repository ${repo.name}`); throw new Error(`No project found for repository ${repo.name}`);
} }
const repoDisplayName = `${repo.project.name}/${repo.name}`; const repoDisplayName = `${repo.project.name}/${repo.name}`;
const repoName = path.join(repoNameRoot, repoDisplayName); const repoName = path.join(repoNameRoot, repoDisplayName);
const isPublic = repo.project.visibility === ProjectVisibility.Public; const isPublic = repo.project.visibility === ProjectVisibility.Public;
if (!repo.remoteUrl) { if (!repo.remoteUrl) {
throw new Error(`No remoteUrl found for repository ${repoDisplayName}`); throw new Error(`No remoteUrl found for repository ${repoDisplayName}`);
} }
if (!repo.id) { if (!repo.id) {
throw new Error(`No id found for repository ${repoDisplayName}`); throw new Error(`No id found for repository ${repoDisplayName}`);
} }
// Construct web URL for the repository // Construct web URL for the repository
const webUrl = repo.webUrl || `${hostUrl}/${repo.project.name}/_git/${repo.name}`; const webUrl = repo.webUrl || `${hostUrl}/${repo.project.name}/_git/${repo.name}`;
@ -652,7 +645,7 @@ export const compileAzureDevOpsConfig = async (
isPublic: isPublic, isPublic: isPublic,
org: { org: {
connect: { connect: {
id: orgId, id: SINGLE_TENANT_ORG_ID,
}, },
}, },
connections: { connections: {

View file

@ -1,19 +1,18 @@
import * as Sentry from '@sentry/node'; import * as Sentry from '@sentry/node';
import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db"; import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db";
import { createLogger, Logger } from "@sourcebot/logger"; import { createLogger, Logger } from "@sourcebot/shared";
import { repoMetadataSchema, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata } from '@sourcebot/shared'; import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema } from '@sourcebot/shared';
import { existsSync } from 'fs'; import { existsSync } from 'fs';
import { readdir, rm } from 'fs/promises'; import { readdir, rm } from 'fs/promises';
import { Job, Queue, ReservedJob, Worker } from "groupmq"; import { Job, Queue, ReservedJob, Worker } from "groupmq";
import { Redis } from 'ioredis'; import { Redis } from 'ioredis';
import micromatch from 'micromatch'; import micromatch from 'micromatch';
import { INDEX_CACHE_DIR } from './constants.js'; import { GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS, INDEX_CACHE_DIR } from './constants.js';
import { env } from './env.js';
import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, getTags, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js'; import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, getTags, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
import { captureEvent } from './posthog.js'; import { captureEvent } from './posthog.js';
import { PromClient } from './promClient.js'; import { PromClient } from './promClient.js';
import { RepoWithConnections, Settings } from "./types.js"; import { RepoWithConnections, Settings } from "./types.js";
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure } from './utils.js'; import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
import { indexGitRepository } from './zoekt.js'; import { indexGitRepository } from './zoekt.js';
const LOG_TAG = 'repo-index-manager'; const LOG_TAG = 'repo-index-manager';
@ -46,7 +45,7 @@ export class RepoIndexManager {
constructor( constructor(
private db: PrismaClient, private db: PrismaClient,
private settings: Settings, private settings: Settings,
redis: Redis, private redis: Redis,
private promClient: PromClient, private promClient: PromClient,
) { ) {
this.queue = new Queue<JobPayload>({ this.queue = new Queue<JobPayload>({
@ -71,11 +70,15 @@ export class RepoIndexManager {
this.worker.on('failed', this.onJobFailed.bind(this)); this.worker.on('failed', this.onJobFailed.bind(this));
this.worker.on('stalled', this.onJobStalled.bind(this)); this.worker.on('stalled', this.onJobStalled.bind(this));
this.worker.on('error', this.onWorkerError.bind(this)); this.worker.on('error', this.onWorkerError.bind(this));
// graceful-timeout is triggered when a job is still processing after
// worker.close() is called and the timeout period has elapsed. In this case,
// we fail the job with no retry.
this.worker.on('graceful-timeout', this.onJobGracefulTimeout.bind(this));
} }
public async startScheduler() { public startScheduler() {
logger.debug('Starting scheduler'); logger.debug('Starting scheduler');
this.interval = setInterval(async () => { this.interval = setIntervalAsync(async () => {
await this.scheduleIndexJobs(); await this.scheduleIndexJobs();
await this.scheduleCleanupJobs(); await this.scheduleCleanupJobs();
}, this.settings.reindexRepoPollingIntervalMs); }, this.settings.reindexRepoPollingIntervalMs);
@ -193,7 +196,7 @@ export class RepoIndexManager {
} }
} }
private async createJobs(repos: Repo[], type: RepoIndexingJobType) { public async createJobs(repos: Repo[], type: RepoIndexingJobType) {
// @note: we don't perform this in a transaction because // @note: we don't perform this in a transaction because
// we want to avoid the situation where a job is created and run // we want to avoid the situation where a job is created and run
// prior to the transaction being committed. // prior to the transaction being committed.
@ -222,6 +225,8 @@ export class RepoIndexManager {
const jobTypeLabel = getJobTypePrometheusLabel(type); const jobTypeLabel = getJobTypePrometheusLabel(type);
this.promClient.pendingRepoIndexJobs.inc({ repo: job.repo.name, type: jobTypeLabel }); this.promClient.pendingRepoIndexJobs.inc({ repo: job.repo.name, type: jobTypeLabel });
} }
return jobs.map(job => job.id);
} }
private async runJob(job: ReservedJob<JobPayload>) { private async runJob(job: ReservedJob<JobPayload>) {
@ -229,6 +234,23 @@ export class RepoIndexManager {
const logger = createJobLogger(id); const logger = createJobLogger(id);
logger.info(`Running ${job.data.type} job ${id} for repo ${job.data.repoName} (id: ${job.data.repoId}) (attempt ${job.attempts + 1} / ${job.maxAttempts})`); logger.info(`Running ${job.data.type} job ${id} for repo ${job.data.repoName} (id: ${job.data.repoId}) (attempt ${job.attempts + 1} / ${job.maxAttempts})`);
const currentStatus = await this.db.repoIndexingJob.findUniqueOrThrow({
where: {
id,
},
select: {
status: true,
}
});
// Fail safe: if the job is not PENDING (first run) or IN_PROGRESS (retry), it indicates the job
// is in an invalid state and should be skipped.
if (
currentStatus.status !== RepoIndexingJobStatus.PENDING &&
currentStatus.status !== RepoIndexingJobStatus.IN_PROGRESS
) {
throw new Error(`Job ${id} is not in a valid state. Expected: ${RepoIndexingJobStatus.PENDING} or ${RepoIndexingJobStatus.IN_PROGRESS}. Actual: ${currentStatus.status}. Skipping.`);
}
const { repo, type: jobType } = await this.db.repoIndexingJob.update({ const { repo, type: jobType } = await this.db.repoIndexingJob.update({
where: { where: {
@ -290,7 +312,7 @@ export class RepoIndexManager {
const metadata = repoMetadataSchema.parse(repo.metadata); const metadata = repoMetadataSchema.parse(repo.metadata);
const credentials = await getAuthCredentialsForRepo(repo, this.db); const credentials = await getAuthCredentialsForRepo(repo);
const cloneUrlMaybeWithToken = credentials?.cloneUrlWithToken ?? repo.cloneUrl; const cloneUrlMaybeWithToken = credentials?.cloneUrlWithToken ?? repo.cloneUrl;
const authHeader = credentials?.authHeader ?? undefined; const authHeader = credentials?.authHeader ?? undefined;
@ -539,6 +561,28 @@ export class RepoIndexManager {
logger.error(`Job ${jobId} stalled for repo ${repo.name} (id: ${repo.id})`); logger.error(`Job ${jobId} stalled for repo ${repo.name} (id: ${repo.id})`);
}); });
private onJobGracefulTimeout = async (job: Job<JobPayload>) =>
groupmqLifecycleExceptionWrapper('onJobGracefulTimeout', logger, async () => {
const logger = createJobLogger(job.data.jobId);
const jobTypeLabel = getJobTypePrometheusLabel(job.data.type);
const { repo } = await this.db.repoIndexingJob.update({
where: { id: job.data.jobId },
data: {
status: RepoIndexingJobStatus.FAILED,
completedAt: new Date(),
errorMessage: 'Job timed out',
},
select: { repo: true }
});
this.promClient.activeRepoIndexJobs.dec({ repo: job.data.repoName, type: jobTypeLabel });
this.promClient.repoIndexJobFailTotal.inc({ repo: job.data.repoName, type: jobTypeLabel });
logger.error(`Job ${job.data.jobId} timed out for repo ${repo.name} (id: ${repo.id}). Failing job.`);
});
private async onWorkerError(error: Error) { private async onWorkerError(error: Error) {
Sentry.captureException(error); Sentry.captureException(error);
logger.error(`Index syncer worker error.`, error); logger.error(`Index syncer worker error.`, error);
@ -548,8 +592,20 @@ export class RepoIndexManager {
if (this.interval) { if (this.interval) {
clearInterval(this.interval); clearInterval(this.interval);
} }
await this.worker.close(); const inProgressJobs = this.worker.getCurrentJobs();
await this.queue.close(); await this.worker.close(GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS);
// Manually release group locks for in progress jobs to prevent deadlocks.
// @see: https://github.com/Openpanel-dev/groupmq/issues/8
for (const { job } of inProgressJobs) {
const lockKey = `groupmq:repo-index-queue:lock:${job.groupId}`;
logger.debug(`Releasing group lock ${lockKey} for in progress job ${job.id}`);
await this.redis.del(lockKey);
}
// @note: As of groupmq v1.0.0, queue.close() will just close the underlying
// redis connection. Since we share the same redis client between, skip this
// step and close the redis client directly in index.ts.
// await this.queue.close();
} }
} }

View file

@ -1,8 +1,8 @@
import { Logger } from "winston"; import { Logger } from "winston";
import { RepoAuthCredentials, RepoWithConnections } from "./types.js"; import { RepoAuthCredentials, RepoWithConnections } from "./types.js";
import path from 'path'; import path from 'path';
import { PrismaClient, Repo } from "@sourcebot/db"; import { Repo } from "@sourcebot/db";
import { getTokenFromConfig } from "@sourcebot/crypto"; import { getTokenFromConfig } from "@sourcebot/shared";
import * as Sentry from "@sentry/node"; import * as Sentry from "@sentry/node";
import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig, BitbucketConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type'; import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig, BitbucketConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { GithubAppManager } from "./ee/githubAppManager.js"; import { GithubAppManager } from "./ee/githubAppManager.js";
@ -59,7 +59,7 @@ export const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } =
// If we are dealing with a local repository, then use that as the path. // If we are dealing with a local repository, then use that as the path.
// Mark as read-only since we aren't guaranteed to have write access to the local filesystem. // Mark as read-only since we aren't guaranteed to have write access to the local filesystem.
const cloneUrl = new URL(repo.cloneUrl); const cloneUrl = new URL(repo.cloneUrl);
if (repo.external_codeHostType === 'generic-git-host' && cloneUrl.protocol === 'file:') { if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') {
return { return {
path: cloneUrl.pathname, path: cloneUrl.pathname,
isReadOnly: true, isReadOnly: true,
@ -110,7 +110,7 @@ export const fetchWithRetry = async <T>(
// fetch the token here using the connections from the repo. Multiple connections could be referencing this repo, and each // fetch the token here using the connections from the repo. Multiple connections could be referencing this repo, and each
// may have their own token. This method will just pick the first connection that has a token (if one exists) and uses that. This // may have their own token. This method will just pick the first connection that has a token (if one exists) and uses that. This
// may technically cause syncing to fail if that connection's token just so happens to not have access to the repo it's referencing. // may technically cause syncing to fail if that connection's token just so happens to not have access to the repo it's referencing.
export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: PrismaClient, logger?: Logger): Promise<RepoAuthCredentials | undefined> => { export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, logger?: Logger): Promise<RepoAuthCredentials | undefined> => {
// If we have github apps configured we assume that we must use them for github service auth // If we have github apps configured we assume that we must use them for github service auth
if (repo.external_codeHostType === 'github' && hasEntitlement('github-app') && GithubAppManager.getInstance().appsConfigured()) { if (repo.external_codeHostType === 'github' && hasEntitlement('github-app') && GithubAppManager.getInstance().appsConfigured()) {
logger?.debug(`Using GitHub App for service auth for repo ${repo.displayName} hosted at ${repo.external_codeHostUrl}`); logger?.debug(`Using GitHub App for service auth for repo ${repo.displayName} hosted at ${repo.external_codeHostUrl}`);
@ -139,7 +139,7 @@ export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: P
if (connection.connectionType === 'github') { if (connection.connectionType === 'github') {
const config = connection.config as unknown as GithubConnectionConfig; const config = connection.config as unknown as GithubConnectionConfig;
if (config.token) { if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db); const token = await getTokenFromConfig(config.token);
return { return {
hostUrl: config.url, hostUrl: config.url,
token, token,
@ -154,7 +154,7 @@ export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: P
} else if (connection.connectionType === 'gitlab') { } else if (connection.connectionType === 'gitlab') {
const config = connection.config as unknown as GitlabConnectionConfig; const config = connection.config as unknown as GitlabConnectionConfig;
if (config.token) { if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db); const token = await getTokenFromConfig(config.token);
return { return {
hostUrl: config.url, hostUrl: config.url,
token, token,
@ -170,7 +170,7 @@ export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: P
} else if (connection.connectionType === 'gitea') { } else if (connection.connectionType === 'gitea') {
const config = connection.config as unknown as GiteaConnectionConfig; const config = connection.config as unknown as GiteaConnectionConfig;
if (config.token) { if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db); const token = await getTokenFromConfig(config.token);
return { return {
hostUrl: config.url, hostUrl: config.url,
token, token,
@ -185,7 +185,7 @@ export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: P
} else if (connection.connectionType === 'bitbucket') { } else if (connection.connectionType === 'bitbucket') {
const config = connection.config as unknown as BitbucketConnectionConfig; const config = connection.config as unknown as BitbucketConnectionConfig;
if (config.token) { if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db); const token = await getTokenFromConfig(config.token);
const username = config.user ?? 'x-token-auth'; const username = config.user ?? 'x-token-auth';
return { return {
hostUrl: config.url, hostUrl: config.url,
@ -202,7 +202,7 @@ export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: P
} else if (connection.connectionType === 'azuredevops') { } else if (connection.connectionType === 'azuredevops') {
const config = connection.config as unknown as AzureDevOpsConnectionConfig; const config = connection.config as unknown as AzureDevOpsConnectionConfig;
if (config.token) { if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db); const token = await getTokenFromConfig(config.token);
// For ADO server, multiple auth schemes may be supported. If the ADO deployment supports NTLM, the git clone will default // For ADO server, multiple auth schemes may be supported. If the ADO deployment supports NTLM, the git clone will default
// to this over basic auth. As a result, we cannot embed the token in the clone URL and must force basic auth by passing in the token // to this over basic auth. As a result, we cannot embed the token in the clone URL and must force basic auth by passing in the token
@ -268,3 +268,27 @@ export const groupmqLifecycleExceptionWrapper = async (name: string, logger: Log
} }
} }
// setInterval wrapper that ensures async callbacks are not executed concurrently.
// @see: https://mottaquikarim.github.io/dev/posts/setinterval-that-blocks-on-await/
export const setIntervalAsync = (target: () => Promise<void>, pollingIntervalMs: number): NodeJS.Timeout => {
const setIntervalWithPromise = <T extends (...args: any[]) => Promise<any>>(
target: T
): (...args: Parameters<T>) => Promise<void> => {
return async function (...args: Parameters<T>): Promise<void> {
if ((target as any).isRunning) return;
(target as any).isRunning = true;
try {
await target(...args);
} finally {
(target as any).isRunning = false;
}
};
}
return setInterval(
setIntervalWithPromise(target),
pollingIntervalMs
);
}

View file

@ -1,5 +1,5 @@
import { Repo } from "@sourcebot/db"; import { Repo } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger"; import { createLogger, env } from "@sourcebot/shared";
import { exec } from "child_process"; import { exec } from "child_process";
import { INDEX_CACHE_DIR } from "./constants.js"; import { INDEX_CACHE_DIR } from "./constants.js";
import { Settings } from "./types.js"; import { Settings } from "./types.js";
@ -11,6 +11,8 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio
const { path: repoPath } = getRepoPath(repo); const { path: repoPath } = getRepoPath(repo);
const shardPrefix = getShardPrefix(repo.orgId, repo.id); const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const largeFileGlobPatterns = env.ALWAYS_INDEX_FILE_PATTERNS?.split(',').map(pattern => pattern.trim()) ?? [];
const command = [ const command = [
'zoekt-git-index', 'zoekt-git-index',
'-allow_missing_branches', '-allow_missing_branches',
@ -21,6 +23,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio
`-tenant_id ${repo.orgId}`, `-tenant_id ${repo.orgId}`,
`-repo_id ${repo.id}`, `-repo_id ${repo.id}`,
`-shard_prefix ${shardPrefix}`, `-shard_prefix ${shardPrefix}`,
...largeFileGlobPatterns.map((pattern) => `-large_file ${pattern}`),
repoPath repoPath
].join(' '); ].join(' ');

View file

@ -1 +0,0 @@
.env.local

View file

@ -1,19 +0,0 @@
{
"name": "@sourcebot/crypto",
"version": "0.1.0",
"main": "dist/index.js",
"private": true,
"scripts": {
"build": "tsc",
"postinstall": "yarn build"
},
"dependencies": {
"@sourcebot/db": "*",
"@sourcebot/schemas": "*",
"dotenv": "^16.4.5"
},
"devDependencies": {
"@types/node": "^22.7.5",
"typescript": "^5.7.3"
}
}

View file

@ -1,13 +0,0 @@
import dotenv from 'dotenv';
export const getEnv = (env: string | undefined, defaultValue?: string) => {
return env ?? defaultValue;
}
dotenv.config({
path: './.env.local',
override: true
});
// @note: You can use https://generate-random.org/encryption-key-generator to create a new 32 byte key
export const SOURCEBOT_ENCRYPTION_KEY = getEnv(process.env.SOURCEBOT_ENCRYPTION_KEY);

View file

@ -1,33 +0,0 @@
import { PrismaClient } from "@sourcebot/db";
import { Token } from "@sourcebot/schemas/v3/shared.type";
import { decrypt } from "./index.js";
export const getTokenFromConfig = async (token: Token, orgId: number, db: PrismaClient) => {
if ('secret' in token) {
const secretKey = token.secret;
const secret = await db.secret.findUnique({
where: {
orgId_key: {
key: secretKey,
orgId
}
}
});
if (!secret) {
throw new Error(`Secret with key ${secretKey} not found for org ${orgId}`);
}
const decryptedToken = decrypt(secret.iv, secret.encryptedValue);
return decryptedToken;
} else if ('env' in token) {
const envToken = process.env[token.env];
if (!envToken) {
throw new Error(`Environment variable ${token.env} not found.`);
}
return envToken;
} else {
throw new Error('Invalid token configuration');
}
};

View file

@ -25,7 +25,6 @@
}, },
"dependencies": { "dependencies": {
"@prisma/client": "6.2.1", "@prisma/client": "6.2.1",
"@sourcebot/logger": "workspace:*",
"@types/readline-sync": "^1.4.8", "@types/readline-sync": "^1.4.8",
"readline-sync": "^1.4.10" "readline-sync": "^1.4.10"
} }

View file

@ -0,0 +1,14 @@
/*
Migrates the `connectionType` column from text to a enum. The values in this field are known to
be one of the following: github, gitlab, gitea, gerrit, bitbucket, azuredevops, git.
This is occording to what we would expect to be in a valid config file for the schema version at commit 4899c9fbc755851af2ddcce99f4a4200f2faa4f6.
See: https://github.com/sourcebot-dev/sourcebot/blob/4899c9fbc755851af2ddcce99f4a4200f2faa4f6/packages/schemas/src/v3/connection.type.ts#L3
*/
-- CreateEnum
CREATE TYPE "ConnectionType" AS ENUM ('github', 'gitlab', 'gitea', 'gerrit', 'bitbucket', 'azuredevops', 'git');
-- AlterTable - Convert existing column to enum type without dropping data
ALTER TABLE "Connection"
ALTER COLUMN "connectionType" TYPE "ConnectionType"
USING "connectionType"::text::"ConnectionType";

View file

@ -0,0 +1,22 @@
/*
Migrates the `external_codeHostType` column from text to a enum. The values in this field are known to
be one of the following: github, gitlab, gitea, gerrit, bitbucket-server, bitbucket-cloud, generic-git-host, azuredevops.
This is occording to what we would expect to be in the database written as of commit 4899c9fbc755851af2ddcce99f4a4200f2faa4f6.
See:
- https://github.com/sourcebot-dev/sourcebot/blob/4899c9fbc755851af2ddcce99f4a4200f2faa4f6/packages/backend/src/repoCompileUtils.ts#L57
- https://github.com/sourcebot-dev/sourcebot/blob/4899c9fbc755851af2ddcce99f4a4200f2faa4f6/packages/backend/src/repoCompileUtils.ts#L135
- https://github.com/sourcebot-dev/sourcebot/blob/4899c9fbc755851af2ddcce99f4a4200f2faa4f6/packages/backend/src/repoCompileUtils.ts#L208
- https://github.com/sourcebot-dev/sourcebot/blob/4899c9fbc755851af2ddcce99f4a4200f2faa4f6/packages/backend/src/repoCompileUtils.ts#L291
- https://github.com/sourcebot-dev/sourcebot/blob/4899c9fbc755851af2ddcce99f4a4200f2faa4f6/packages/backend/src/repoCompileUtils.ts#L407
- https://github.com/sourcebot-dev/sourcebot/blob/4899c9fbc755851af2ddcce99f4a4200f2faa4f6/packages/backend/src/repoCompileUtils.ts#L510
- https://github.com/sourcebot-dev/sourcebot/blob/4899c9fbc755851af2ddcce99f4a4200f2faa4f6/packages/backend/src/repoCompileUtils.ts#L574
- https://github.com/sourcebot-dev/sourcebot/blob/4899c9fbc755851af2ddcce99f4a4200f2faa4f6/packages/backend/src/repoCompileUtils.ts#L642
*/
-- CreateEnum
CREATE TYPE "CodeHostType" AS ENUM ('github', 'gitlab', 'gitea', 'gerrit', 'bitbucket-server', 'bitbucket-cloud', 'generic-git-host', 'azuredevops');
-- AlterTable - Convert existing column to enum type without dropping data
ALTER TABLE "Repo"
ALTER COLUMN "external_codeHostType" TYPE "CodeHostType"
USING "external_codeHostType"::text::"CodeHostType";

View file

@ -0,0 +1,11 @@
/*
Warnings:
- You are about to drop the `Secret` table. If the table is not empty, all the data it contains will be lost.
*/
-- DropForeignKey
ALTER TABLE "Secret" DROP CONSTRAINT "Secret_orgId_fkey";
-- DropTable
DROP TABLE "Secret";

View file

@ -0,0 +1,65 @@
/*
Warnings:
- You are about to drop the column `permissionSyncedAt` on the `User` table. All the data in the column will be lost.
- You are about to drop the `UserPermissionSyncJob` table. If the table is not empty, all the data it contains will be lost.
- You are about to drop the `UserToRepoPermission` table. If the table is not empty, all the data it contains will be lost.
*/
-- CreateEnum
CREATE TYPE "AccountPermissionSyncJobStatus" AS ENUM ('PENDING', 'IN_PROGRESS', 'COMPLETED', 'FAILED');
-- DropForeignKey
ALTER TABLE "UserPermissionSyncJob" DROP CONSTRAINT "UserPermissionSyncJob_userId_fkey";
-- DropForeignKey
ALTER TABLE "UserToRepoPermission" DROP CONSTRAINT "UserToRepoPermission_repoId_fkey";
-- DropForeignKey
ALTER TABLE "UserToRepoPermission" DROP CONSTRAINT "UserToRepoPermission_userId_fkey";
-- AlterTable
ALTER TABLE "Account" ADD COLUMN "permissionSyncedAt" TIMESTAMP(3);
-- AlterTable
ALTER TABLE "User" DROP COLUMN "permissionSyncedAt";
-- DropTable
DROP TABLE "UserPermissionSyncJob";
-- DropTable
DROP TABLE "UserToRepoPermission";
-- DropEnum
DROP TYPE "UserPermissionSyncJobStatus";
-- CreateTable
CREATE TABLE "AccountPermissionSyncJob" (
"id" TEXT NOT NULL,
"status" "AccountPermissionSyncJobStatus" NOT NULL DEFAULT 'PENDING',
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
"completedAt" TIMESTAMP(3),
"errorMessage" TEXT,
"accountId" TEXT NOT NULL,
CONSTRAINT "AccountPermissionSyncJob_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "AccountToRepoPermission" (
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"repoId" INTEGER NOT NULL,
"accountId" TEXT NOT NULL,
CONSTRAINT "AccountToRepoPermission_pkey" PRIMARY KEY ("repoId","accountId")
);
-- AddForeignKey
ALTER TABLE "AccountPermissionSyncJob" ADD CONSTRAINT "AccountPermissionSyncJob_accountId_fkey" FOREIGN KEY ("accountId") REFERENCES "Account"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "AccountToRepoPermission" ADD CONSTRAINT "AccountToRepoPermission_repoId_fkey" FOREIGN KEY ("repoId") REFERENCES "Repo"("id") ON DELETE CASCADE ON UPDATE CASCADE;
-- AddForeignKey
ALTER TABLE "AccountToRepoPermission" ADD CONSTRAINT "AccountToRepoPermission_accountId_fkey" FOREIGN KEY ("accountId") REFERENCES "Account"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View file

@ -0,0 +1,5 @@
-- First, remove the NOT NULL constraint on the createdById column.
ALTER TABLE "Chat" ALTER COLUMN "createdById" DROP NOT NULL;
-- Then, set all chats created by the guest user (id: 1) to have a NULL createdById.
UPDATE "Chat" SET "createdById" = NULL WHERE "createdById" = '1';

View file

@ -29,6 +29,21 @@ enum ChatVisibility {
PUBLIC PUBLIC
} }
/// @note: The @map annotation is required to maintain backwards compatibility
/// with the existing database.
/// @note: In the generated client, these mapped values will be in pascalCase.
/// This behaviour will change in prisma v7. See: https://github.com/prisma/prisma/issues/8446#issuecomment-3356119713
enum CodeHostType {
github
gitlab
gitea
gerrit
bitbucketServer @map("bitbucket-server")
bitbucketCloud @map("bitbucket-cloud")
genericGitHost @map("generic-git-host")
azuredevops
}
model Repo { model Repo {
id Int @id @default(autoincrement()) id Int @id @default(autoincrement())
name String /// Full repo name, including the vcs hostname (ex. github.com/sourcebot-dev/sourcebot) name String /// Full repo name, including the vcs hostname (ex. github.com/sourcebot-dev/sourcebot)
@ -44,7 +59,7 @@ model Repo {
connections RepoToConnection[] connections RepoToConnection[]
imageUrl String? imageUrl String?
permittedUsers UserToRepoPermission[] permittedAccounts AccountToRepoPermission[]
permissionSyncJobs RepoPermissionSyncJob[] permissionSyncJobs RepoPermissionSyncJob[]
permissionSyncedAt DateTime? /// When the permissions were last synced successfully. permissionSyncedAt DateTime? /// When the permissions were last synced successfully.
@ -53,7 +68,7 @@ model Repo {
indexedCommitHash String? /// The commit hash of the last indexed commit (on HEAD). indexedCommitHash String? /// The commit hash of the last indexed commit (on HEAD).
external_id String /// The id of the repo in the external service external_id String /// The id of the repo in the external service
external_codeHostType String /// The type of the external service (e.g., github, gitlab, etc.) external_codeHostType CodeHostType /// The type of the external service (e.g., github, gitlab, etc.)
external_codeHostUrl String /// The base url of the external service (e.g., https://github.com) external_codeHostUrl String /// The base url of the external service (e.g., https://github.com)
org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) org Org @relation(fields: [orgId], references: [id], onDelete: Cascade)
@ -125,6 +140,18 @@ model SearchContext {
@@unique([name, orgId]) @@unique([name, orgId])
} }
/// Matches the union of `type` fields in the schema.
/// @see: schemas/v3/connection.type.ts
enum ConnectionType {
github
gitlab
gitea
gerrit
bitbucket
azuredevops
git
}
model Connection { model Connection {
id Int @id @default(autoincrement()) id Int @id @default(autoincrement())
name String name String
@ -135,7 +162,7 @@ model Connection {
repos RepoToConnection[] repos RepoToConnection[]
// The type of connection (e.g., github, gitlab, etc.) // The type of connection (e.g., github, gitlab, etc.)
connectionType String connectionType ConnectionType
syncJobs ConnectionSyncJob[] syncJobs ConnectionSyncJob[]
/// When the connection was last synced successfully. /// When the connection was last synced successfully.
@ -226,7 +253,6 @@ model Org {
members UserToOrg[] members UserToOrg[]
connections Connection[] connections Connection[]
repos Repo[] repos Repo[]
secrets Secret[]
apiKeys ApiKey[] apiKeys ApiKey[]
isOnboarded Boolean @default(false) isOnboarded Boolean @default(false)
imageUrl String? imageUrl String?
@ -276,19 +302,6 @@ model UserToOrg {
@@id([orgId, userId]) @@id([orgId, userId])
} }
model Secret {
orgId Int
key String
encryptedValue String
iv String
createdAt DateTime @default(now())
org Org @relation(fields: [orgId], references: [id], onDelete: Cascade)
@@id([orgId, key])
}
model ApiKey { model ApiKey {
name String name String
hash String @id @unique hash String @id @unique
@ -336,7 +349,6 @@ model User {
accounts Account[] accounts Account[]
orgs UserToOrg[] orgs UserToOrg[]
accountRequest AccountRequest? accountRequest AccountRequest?
accessibleRepos UserToRepoPermission[]
/// List of pending invites that the user has created /// List of pending invites that the user has created
invites Invite[] invites Invite[]
@ -348,40 +360,38 @@ model User {
createdAt DateTime @default(now()) createdAt DateTime @default(now())
updatedAt DateTime @updatedAt updatedAt DateTime @updatedAt
permissionSyncJobs UserPermissionSyncJob[]
permissionSyncedAt DateTime?
} }
enum UserPermissionSyncJobStatus { enum AccountPermissionSyncJobStatus {
PENDING PENDING
IN_PROGRESS IN_PROGRESS
COMPLETED COMPLETED
FAILED FAILED
} }
model UserPermissionSyncJob { model AccountPermissionSyncJob {
id String @id @default(cuid()) id String @id @default(cuid())
status UserPermissionSyncJobStatus @default(PENDING) status AccountPermissionSyncJobStatus @default(PENDING)
createdAt DateTime @default(now()) createdAt DateTime @default(now())
updatedAt DateTime @updatedAt updatedAt DateTime @updatedAt
completedAt DateTime? completedAt DateTime?
errorMessage String? errorMessage String?
user User @relation(fields: [userId], references: [id], onDelete: Cascade) account Account @relation(fields: [accountId], references: [id], onDelete: Cascade)
userId String accountId String
} }
model UserToRepoPermission { model AccountToRepoPermission {
createdAt DateTime @default(now()) createdAt DateTime @default(now())
repo Repo @relation(fields: [repoId], references: [id], onDelete: Cascade) repo Repo @relation(fields: [repoId], references: [id], onDelete: Cascade)
repoId Int repoId Int
user User @relation(fields: [userId], references: [id], onDelete: Cascade) account Account @relation(fields: [accountId], references: [id], onDelete: Cascade)
userId String accountId String
@@id([repoId, userId]) @@id([repoId, accountId])
} }
// @see : https://authjs.dev/concepts/database-models#account // @see : https://authjs.dev/concepts/database-models#account
@ -398,6 +408,12 @@ model Account {
scope String? scope String?
id_token String? id_token String?
session_state String? session_state String?
/// List of repos that this account has access to.
accessibleRepos AccountToRepoPermission[]
permissionSyncJobs AccountPermissionSyncJob[]
permissionSyncedAt DateTime?
createdAt DateTime @default(now()) createdAt DateTime @default(now())
updatedAt DateTime @updatedAt updatedAt DateTime @updatedAt
@ -421,8 +437,8 @@ model Chat {
name String? name String?
createdBy User @relation(fields: [createdById], references: [id], onDelete: Cascade) createdBy User? @relation(fields: [createdById], references: [id], onDelete: Cascade)
createdById String createdById String?
createdAt DateTime @default(now()) createdAt DateTime @default(now())
updatedAt DateTime @updatedAt updatedAt DateTime @updatedAt

View file

@ -1 +1,3 @@
import type { User, Account } from ".prisma/client";
export type UserWithAccounts = User & { accounts: Account[] };
export * from ".prisma/client"; export * from ".prisma/client";

View file

@ -3,7 +3,6 @@ import { ArgumentParser } from "argparse";
import { migrateDuplicateConnections } from "./scripts/migrate-duplicate-connections"; import { migrateDuplicateConnections } from "./scripts/migrate-duplicate-connections";
import { injectAuditData } from "./scripts/inject-audit-data"; import { injectAuditData } from "./scripts/inject-audit-data";
import { confirmAction } from "./utils"; import { confirmAction } from "./utils";
import { createLogger } from "@sourcebot/logger";
import { injectRepoData } from "./scripts/inject-repo-data"; import { injectRepoData } from "./scripts/inject-repo-data";
import { testRepoQueryPerf } from "./scripts/test-repo-query-perf"; import { testRepoQueryPerf } from "./scripts/test-repo-query-perf";
@ -23,19 +22,17 @@ parser.add_argument("--url", { required: true, help: "Database URL" });
parser.add_argument("--script", { required: true, help: "Script to run" }); parser.add_argument("--script", { required: true, help: "Script to run" });
const args = parser.parse_args(); const args = parser.parse_args();
const logger = createLogger('db-script-runner');
(async () => { (async () => {
if (!(args.script in scripts)) { if (!(args.script in scripts)) {
logger.error("Invalid script"); console.error("Invalid script");
process.exit(1); process.exit(1);
} }
const selectedScript = scripts[args.script]; const selectedScript = scripts[args.script];
logger.info("\nTo confirm:"); console.log("\nTo confirm:");
logger.info(`- Database URL: ${args.url}`); console.log(`- Database URL: ${args.url}`);
logger.info(`- Script: ${args.script}`); console.log(`- Script: ${args.script}`);
confirmAction(); confirmAction();
@ -45,7 +42,7 @@ const logger = createLogger('db-script-runner');
await selectedScript.run(prisma); await selectedScript.run(prisma);
logger.info("\nDone."); console.log("\nDone.");
process.exit(0); process.exit(0);
})(); })();

View file

@ -1,9 +1,6 @@
import { Script } from "../scriptRunner"; import { Script } from "../scriptRunner";
import { PrismaClient } from "../../dist"; import { PrismaClient } from "../../dist";
import { confirmAction } from "../utils"; import { confirmAction } from "../utils";
import { createLogger } from "@sourcebot/logger";
const logger = createLogger('inject-audit-data');
// Generate realistic audit data for analytics testing // Generate realistic audit data for analytics testing
// Simulates 50 engineers with varying activity patterns // Simulates 50 engineers with varying activity patterns
@ -17,11 +14,11 @@ export const injectAuditData: Script = {
}); });
if (!org) { if (!org) {
logger.error(`Organization with id ${orgId} not found. Please create it first.`); console.error(`Organization with id ${orgId} not found. Please create it first.`);
return; return;
} }
logger.info(`Injecting audit data for organization: ${org.name} (${org.domain})`); console.log(`Injecting audit data for organization: ${org.name} (${org.domain})`);
// Generate 50 fake user IDs // Generate 50 fake user IDs
const userIds = Array.from({ length: 50 }, (_, i) => `user_${String(i + 1).padStart(3, '0')}`); const userIds = Array.from({ length: 50 }, (_, i) => `user_${String(i + 1).padStart(3, '0')}`);
@ -38,7 +35,7 @@ export const injectAuditData: Script = {
const startDate = new Date(); const startDate = new Date();
startDate.setDate(startDate.getDate() - 90); startDate.setDate(startDate.getDate() - 90);
logger.info(`Generating data from ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`); console.log(`Generating data from ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`);
confirmAction(); confirmAction();
@ -125,9 +122,9 @@ export const injectAuditData: Script = {
} }
} }
logger.info(`\nAudit data injection complete!`); console.log(`\nAudit data injection complete!`);
logger.info(`Users: ${userIds.length}`); console.log(`Users: ${userIds.length}`);
logger.info(`Date range: ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`); console.log(`Date range: ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`);
// Show some statistics // Show some statistics
const stats = await prisma.audit.groupBy({ const stats = await prisma.audit.groupBy({
@ -136,9 +133,9 @@ export const injectAuditData: Script = {
_count: { action: true } _count: { action: true }
}); });
logger.info('\nAction breakdown:'); console.log('\nAction breakdown:');
stats.forEach(stat => { stats.forEach(stat => {
logger.info(` ${stat.action}: ${stat._count.action}`); console.log(` ${stat.action}: ${stat._count.action}`);
}); });
}, },
}; };

Some files were not shown because too many files have changed in this diff Show more