Compare commits

...

103 commits
v4.7.2 ... main

Author SHA1 Message Date
msukkari
095474a901 update perm syncing docs
Some checks are pending
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-12-11 06:46:20 -08:00
Brendan Kellam
d63f3cf9d9
chore(web): Improve error messages for file loading errors (#665)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-12-05 11:58:19 -08:00
Cade 🐀
3d85a0595c
fix: add support for anyuid to Dockerfile (#658)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
* fix: add support for anyuid to Dockerfile

* changelog

---------

Co-authored-by: Cade Schlaefli <cade.schlaefli@mouser.com>
Co-authored-by: Brendan Kellam <bshizzle1234@gmail.com>
2025-12-04 22:29:23 -08:00
Brian Phillips
84cf524d84
Add GHES support to the review agent (#611)
* add support for GHES to the review agent

* fix throttling types

---------

Co-authored-by: Brendan Kellam <bshizzle1234@gmail.com>
2025-12-04 22:08:24 -08:00
bkellam
7c72578765 sourcebot v4.10.2
Some checks are pending
Update Roadmap Released / update (push) Waiting to run
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-12-04 10:41:41 -08:00
Brendan Kellam
483b433aab
fix(web): Respect disable telemetry flag for web server side events (#657)
* fix

* changelog
2025-12-04 10:32:32 -08:00
Brendan Kellam
bcca1d6d7d
chore(web): Fix mistake of upgrading to a breaking version of next (#656)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-12-03 17:12:10 -08:00
bkellam
0e88eecc30 release @sourcebot/mcp v1.0.11
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-12-03 16:10:51 -08:00
bkellam
a4685e34ab sourcebot v4.10.1 2025-12-03 16:05:53 -08:00
Brendan Kellam
76dc2f5a12
chore(web): Server side search telemetry (#652) 2025-12-03 16:04:36 -08:00
Brendan Kellam
7fc068f8b2
fix(web): Fix CVE 2025-55182 (#654) 2025-12-03 15:59:43 -08:00
bkellam
91caf129ed chore: add default PostHog token in env.server.ts for development scenarios
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-12-01 20:18:23 -08:00
Brendan Kellam
92578881df
chore(web): Scope code nav to current repository by default (#647)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-30 18:53:09 -08:00
Brendan Kellam
28986f4355
chore(web): Bake PostHog token into build 2025-11-30 18:29:01 -08:00
Adam
41a6eb48a0
Shrink Docker image size by ~1/3 by removing unnecessary ops (#642)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
* Remove duplicate copy, chown on copy

* Add Dockerfile syntax

* Revert entrypoint changes to avoid errors in some non-root cases
2025-11-29 12:43:12 -08:00
Brendan Kellam
92ae76168c
fix(web): Fix issue where creating a new Ask thread would result in a 404 (#641)
Some checks are pending
Publish to ghcr / merge (push) Blocked by required conditions
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Update Roadmap Released / update (push) Waiting to run
2025-11-28 23:01:33 -08:00
Brendan Kellam
f1dd16be82
fix(web): Ask sourcebot perf improvements (#632)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-27 16:56:11 -08:00
Brendan Kellam
cc2837b740
fix(web): Fix error when loading files with special characters (#637) 2025-11-27 14:24:45 -08:00
Brendan Kellam
0633d1f23c
fix discord link (#634)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-26 13:56:00 -08:00
Brendan Kellam
8bc4f1e520
feat(worker): Add ALWAYS_INDEX_FILE_PATTERNS env var to specify files that should always be indexed (#631)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-25 23:38:30 -08:00
Brendan Kellam
c962fdd636
fix(web): Fix issue where quotes cannot be used within a query (#629)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-25 12:06:51 -08:00
bkellam
8e036a340f @sourcebot/mcp v1.0.10
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-24 14:29:42 -08:00
bkellam
fb305c2808 sourcebot v4.10.0 2025-11-24 13:44:12 -08:00
Brendan Kellam
c671e96139
feat(web): Add support for authentik sso (#627) 2025-11-24 13:28:04 -08:00
Brendan Kellam
f3a8fa3dab
feat(web): Streamed code search (#623)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
* generate protobuf types

* stream poc over SSE

* wip: make stream search api follow existing schema. Modify UI to support streaming

* fix scrolling issue

* Dockerfile

* wip on lezer parser grammar for query language

* add lezer tree -> grpc transformer

* remove spammy log message

* fix syntax highlighting by adding a module resolution for @lezer/common

* further wip on query language

* Add case sensitivity and regexp toggles

* Improved type safety / cleanup for query lang

* support search contexts

* update Dockerfile with query langauge package

* fix filter

* Add skeletons to filter panel when search is streaming

* add client side caching

* improved cancelation handling

* add isSearchExausted flag for flagging when a search captured all results

* Add back posthog search_finished event

* remove zoekt tenant enforcement

* migrate blocking search over to grpc. Centralize everything in searchApi

* branch handling

* plumb file weburl

* add repo_sets filter for repositories a user has access to

* refactor a bunch of stuff + add support for passing in Query IR to search api

* refactor

* dev README

* wip on better error handling

* error handling for stream path

* update mcp

* changelog wip

* type fix

* style

* Support rev:* wildcard

* changelog

* changelog nit

* feedback

* fix build

* update docs and remove uneeded test file
2025-11-22 15:33:31 -08:00
Brendan Kellam
09507d3e89
fix(worker): Permission syncer fixes (#624)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-19 22:14:23 -08:00
Brendan Kellam
97dd54d48f
chore(web): Add count to members / requests / invites tabs in settings (#621)
Some checks failed
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-18 10:41:40 -08:00
bkellam
831197980c release @sourcebot/mcp v1.0.9
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-17 17:11:03 -08:00
Teddy Reinert
9bee8c2c59
feat(mcp): Add pagination and filtering to list_repos tool (#614)
* feat(mcp): Add pagination and filtering to list_repos tool

Fixes #566

  - Add query parameter to filter repositories by name
  - Add pageNumber and limit parameters for pagination
  - Include pagination info in response when applicable
  - Add listReposRequestSchema for request validation
  - Update README with new list_repos parameters

* feat(mcp): Sort repositories alphabetically for consistent pagination

Fixes #566
- Updated CHANGELOG.md with pagination and filtering changes

---------

Co-authored-by: Brendan Kellam <bshizzle1234@gmail.com>
2025-11-17 17:08:20 -08:00
Jose Hernandez
e20d514569
feat(bitbucket): support glob patterns in repository exclusions (#620)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
* feat(bitbucket): support glob patterns in repository exclusions

Update Bitbucket Cloud and Server exclusion logic to support glob
patterns (e.g., "org/repo*") in the exclude.repos configuration,
matching the documented behavior and aligning with other providers
(GitHub, GitLab, Gitea, Azure DevOps).

Changes:
- Add micromatch import for pattern matching
- Replace Array.includes() with micromatch.isMatch() in
  cloudShouldExcludeRepo and serverShouldExcludeRepo functions
- Add reason logging for exclusion decisions to match GitHub's pattern

This enables users to exclude repositories using wildcard patterns
as documented in the Bitbucket Cloud connection documentation.

* update changelog

---------

Co-authored-by: Jose Hernandez <jose.hernandez@emilabs.ai>
Co-authored-by: bkellam <bshizzle1234@gmail.com>
2025-11-17 14:33:39 -08:00
Michael Sukkarieh
1dff20d47a
fix(ee): Wipe search contexts on init if we no longer have the entitlement (#618)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-13 21:29:51 -08:00
Brendan Kellam
fbe1073d0e
fix(web): Fix loading issues with references / definitions list (#617) 2025-11-13 17:21:48 -08:00
bkellam
341836a2ed sourcebot v4.9.2
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-13 00:36:47 -08:00
Brendan Kellam
2e959b7d59
feat(web): Add env var to configure default max match count (#616) 2025-11-13 00:06:23 -08:00
Brendan Kellam
a814bd6f7e
fix(web): Search performance improvements (#615) 2025-11-12 23:20:26 -08:00
Brendan Kellam
06c84f0bf5
fix(worker): Fix issue where connections would always sync on startup (#613)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-11 20:31:08 -08:00
Brendan Kellam
903d15a2c5
fix(worker): Fix issues with gracefully shutting down (#612) 2025-11-11 20:11:59 -08:00
Brendan Kellam
18fad64baa
feat(web): Add force resync buttons for repo & connections (#610) 2025-11-11 15:16:40 -08:00
bkellam
2dfafdae41 release @sourcebot/mcp v1.0.8
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-10 15:32:32 -08:00
Wayne Sun
278c0dc556
fix: return truncated content when token limit exceeded in MCP search_code (#604)
When search results exceed maxTokens limit, now returns partial truncated
content instead of discarding the file completely.

Changes:
- Calculate remaining token budget before breaking
- Truncate file content to fit within remaining tokens (if > 100 tokens left)
- Append truncation marker to indicate content was cut off
- Still add truncation message at end of all results

Benefits:
- Users get partial data instead of nothing
- Better debugging and analysis experience
- More useful for AI-powered code analysis tasks
- Consistent with expected behavior when limits are reached

Example: If file would use 10K tokens but only 2K remain, return
first ~8K chars of content + truncation marker instead of dropping it.

Signed-off-by: Wayne Sun <gsun@redhat.com>
2025-11-10 15:23:56 -08:00
Brendan Kellam
6f64d5bb8d
fix(worker): Run setInterval as blocking (#607)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-09 14:49:24 -08:00
Brendan Kellam
1be6e8842e
fix(worker): properly shutdown PostHog client (#609) 2025-11-09 14:30:01 -08:00
Arman K.
f04ecab3ad
Update README.md (#608) 2025-11-09 13:28:28 -08:00
bkellam
d63da4b2c0 sourcebot v4.9.1
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-07 10:39:57 -08:00
Brendan Kellam
825cef9da4
feat(deployment): Basic docker-compose file (#480) 2025-11-07 10:38:24 -08:00
Brendan Kellam
dd5cf61977
fix discord links (#606) 2025-11-07 10:05:05 -08:00
Furbreeze
5f5690ec49
adding contribution step for generating database schema (#602)
Some checks failed
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-06 10:50:41 -08:00
bkellam
720f2e4f4b update changelog for https://github.com/sourcebot-dev/sourcebot/pull/599
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-05 22:29:15 -08:00
Brendan Kellam
612ecff93a
feat: Support running Docker container as non-root (#599) 2025-11-05 22:24:46 -08:00
bkellam
33c732855f sourcebot v4.9.0
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-04 21:29:33 -08:00
msukkari
5fe00a6b48 typo in linked account settings 2025-11-04 21:26:19 -08:00
Brendan Kellam
1908051daa
feat(web,worker): Environment overrides (#597) 2025-11-04 21:22:31 -08:00
Brendan Kellam
5fde901356
chore(worker): Refactor permission syncing join table to be between Account <> Repo (#600) 2025-11-04 20:12:07 -08:00
Michael Sukkarieh
449c76fdcc
feat(ee): Add ability to link external accounts (#595) 2025-11-04 20:08:04 -08:00
Brendan Kellam
26ec7af7f0
feat(worker,web): Support google secrets as a token type (#594)
Some checks failed
Update Roadmap Released / update (push) Has been cancelled
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-11-01 22:02:55 -07:00
bkellam
7e161e6df3 alter roadmap release update trigger
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-11-01 15:17:51 -07:00
Derek Miller
2c0540f6bf
fix(backend): Limit concurrent git operations to prevent resource exhaustion (#590) (#593)
When syncing generic-git-host connections with thousands of repositories,
unbounded Promise.all caused resource exhaustion (EAGAIN errors) by spawning
too many concurrent git processes. This resulted in valid repositories being
incorrectly skipped during sync.

- Add p-limit to control concurrent git operations (max 100)
- Follow existing pattern from github.ts for consistency
- Prevents file descriptor and process limit exhaustion
- Uses rolling concurrency to avoid head-of-line blocking

Fixes #590
2025-11-01 15:15:09 -07:00
bkellam
d1655d4587 run update roadmap on pushes to main
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
Update Roadmap Released / update (push) Waiting to run
2025-10-31 14:58:33 -07:00
Brian Phillips
58456d616b
add p-limit to GitHub API calls to avoid overwhelming the node process (or the API rate limits) (#591) 2025-10-31 14:49:43 -07:00
Michael Sukkarieh
fd17871da4
chore(tech-debt): Remove built-in secret manager (#592) 2025-10-31 14:33:28 -07:00
Brendan Kellam
581a5a0bd8
fix(web): Fix /settings/connections throwing a error when there is a git connection present (#588) 2025-10-31 13:08:51 -07:00
Brendan Kellam
4899c9fbc7
feat(ee): GitLab permission syncing (#585)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-30 11:08:10 -07:00
Brendan Kellam
384aa9ebe6
fix(web): Fix "The account is already associated with another user" errors when signing in with GitLab (#584)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-29 21:25:48 -07:00
Michael Sukkarieh
bbb197a9bf
fix(github app): Generate installation tokens each time (#583)
* generate installation tokens each time

* changelog
2025-10-29 18:05:18 -07:00
Brendan Kellam
d09d65dce7
fix(ask): Extract reasoning tokens for openai compatible models (#582) 2025-10-29 17:13:31 -07:00
msukkari
727a6da105 remove old config files
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-29 16:04:30 -07:00
bkellam
86be06928b sourcebot v4.8.1 2025-10-29 14:09:26 -07:00
Brendan Kellam
63cf48264d
chore(web): Bug fixes related to v4.8.0 release (#581) 2025-10-29 14:05:48 -07:00
bkellam
bc592addad @sourcebot/mcp v1.0.7
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-28 23:03:11 -07:00
bkellam
6a56296a76 sourcebot v4.8.0 2025-10-28 22:38:54 -07:00
bkellam
b40b204408 nit(web): Change how no jobs is represented in connections & repos tables 2025-10-28 22:25:02 -07:00
Brendan Kellam
0d738a27b6
chore: Specify shutdown order in supervisord.conf (#580) 2025-10-28 22:23:10 -07:00
Brendan Kellam
a167accd7e
feat(worker,web): Improved connection management (#579) 2025-10-28 21:31:28 -07:00
Michael Sukkarieh
3ff88da33b
feat(ee): Add REST API to get users and delete a user (#578)
* add get users and delete user endpoints

* changelog

* changelog typo

* update license

* add tags to changelog
2025-10-28 17:05:47 -07:00
Michael Sukkarieh
5b1caae854
feat(security): Add env var to restrict api key creation (#577)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
* add env var to restrict api key creation

* changelog
2025-10-28 15:36:29 -07:00
bkellam
336b07d41c Add github commit issue # to todo comment
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-27 11:49:03 -07:00
msukkari
b939d1e420 enforce permitted user check even when no where clause
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-26 21:11:42 -07:00
msukkari
0bd545359e fix bug with octokit url for github cloud
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-25 21:57:13 -07:00
Brendan Kellam
2d3b03bf12
feat(web): Improved repository table (#572)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-25 14:51:41 -04:00
msukkari
4b86bcd182 add debug log for github auth app
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-24 11:49:40 -07:00
Brendan Kellam
a470ab8463
chore(worker): Prometheus metrics for repo index manager (#571)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-10-21 20:43:33 -07:00
msukkari
ef77e212a0 fix bug from github app pr 2025-10-21 20:39:59 -07:00
Michael Sukkarieh
c2299aa86b
feat(auth): github app (#570)
* properly handle emails for github app auth case

* add docs info for auth through github app

* more info in docs for user auth perms

* modify review agent env var names

* github app service auth

* coderabbit suggestions

* fixes

* fix build
2025-10-21 20:17:28 -07:00
Brendan Kellam
03999f0de0
fix(worker): Use indexTimeoutMs setting for job timeout (#567)
Some checks are pending
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Waiting to run
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Waiting to run
Publish to ghcr / merge (push) Blocked by required conditions
2025-10-20 12:41:09 -07:00
Brendan Kellam
4ebe4e0475
chore(worker,web): Repo indexing stability improvements + perf improvements to web (#563)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
2025-10-18 16:31:22 -07:00
prateek singh
5b09757e92
feat(browse): Implement dynamic tab titles for files and folders (#560)
Some checks failed
Publish to ghcr / build (linux/amd64, blacksmith-4vcpu-ubuntu-2404) (push) Has been cancelled
Publish to ghcr / build (linux/arm64, blacksmith-8vcpu-ubuntu-2204-arm) (push) Has been cancelled
Publish to ghcr / merge (push) Has been cancelled
* feat(metadata): Enhance metadata generation for repository browsing
feat(utils): Add parseRepoPath function to extract repository name and revision from URL path

* feat(metadata): update tab title with appropriate file name, path or repository name.

* fix: remove left-over console logs and Async Params resolution.

* feat: refactor parsePathForTitle to utilize getBrowseParamsFromPathParam for cleaner code.

* minor refactoring and adding changelog.

* Remove unused import

* refactor: change parsePathForTitle to a non-exported function

---------

Co-authored-by: Brendan Kellam <bshizzle1234@gmail.com>
2025-10-15 11:44:30 -07:00
Brendan Kellam
c3fae1aaab
feat(web): Improved search performance on unbounded searches (#555) 2025-10-07 23:55:36 -07:00
Brendan Kellam
18ba1d2492
update demo deploy cadence (#556) 2025-10-07 23:51:52 -07:00
bkellam
8d7babc8d2 chore(worker): Change log message to debug 2025-10-07 16:38:56 -07:00
bkellam
595abc12be use blacksmith arm machine for arm builds 2025-10-07 10:21:19 -07:00
blacksmith-sh[bot]
0e8fdf0f97
Migrate workflows to Blacksmith (#554)
Co-authored-by: blacksmith-sh[bot] <157653362+blacksmith-sh[bot]@users.noreply.github.com>
2025-10-07 10:05:27 -07:00
Brendan Kellam
83c6704b01
fix: Fix git dubious ownership errors (#553) 2025-10-06 19:54:17 -07:00
Brendan Kellam
5e3e4f000a
chore(web): Remove spam "login page loaded" log (#552) 2025-10-06 15:04:41 -07:00
msukkari
623c794a75 update description in docs 2025-10-04 10:03:29 -07:00
Brendan Kellam
425a816fb6
Update README.md 2025-10-03 21:38:49 -07:00
bkellam
6a4c9220bd chore: try including platform pair in cache key 2025-10-03 21:24:14 -07:00
Brendan Kellam
eeb6b73a64
chore: Move helm chart to seperate repo (#549) 2025-10-03 15:45:36 -07:00
Andre Nogueira
9c8224e39f
Add Sourcebot Helm Chart (#370)
* feat: add helm chart

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* feat: add sts support to use internal DB and improve values docs

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: include postgresql extra dependency

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: remove autoscaler

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: remove sts

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: add more suggestive env var example

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: add chart dependency lock

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

* fix: add host infer to the chart docs

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>

---------

Signed-off-by: Andre Nogueira <andre.nogueira@mollie.com>
2025-10-03 15:39:26 -07:00
Brendan Kellam
c10010eb99
feat(db): Support passing db connection as separate env vars (#545) 2025-10-02 12:51:39 -07:00
bkellam
d24de793f2 Add roadmap link to docs 2025-10-01 14:47:26 -07:00
msukkari
5b20911a08 v4.7.3 2025-09-29 10:28:41 -07:00
Michael Sukkarieh
aa62847143
fix(ado): Manually pass token through http header for ado server (#543)
* support passing in token manually in auth header

* remove unneeded PAT embed check

* cleanup authheader usage

* changelog

* var name typo

* unset auth header in fetch

* move unset to finally in fetch
2025-09-27 17:14:29 -07:00
bkellam
7a97d4ee06 release @sourcebot/mcp v1.0.6 2025-09-26 21:46:23 -07:00
501 changed files with 29960 additions and 14328 deletions

View file

@ -4,12 +4,8 @@ DATABASE_URL="postgresql://postgres:postgres@localhost:5432/postgres"
# Zoekt
ZOEKT_WEBSERVER_URL="http://localhost:6070"
# SHARD_MAX_MATCH_COUNT=10000
# TOTAL_MAX_MATCH_COUNT=100000
# The command to use for generating ctags.
CTAGS_COMMAND=ctags
# logging, strict
SRC_TENANT_ENFORCEMENT_MODE=strict
# Auth.JS
# You can generate a new secret with:
@ -25,7 +21,7 @@ AUTH_URL="http://localhost:3000"
DATA_CACHE_DIR=${PWD}/.sourcebot # Path to the sourcebot cache dir (ex. ~/sourcebot/.sourcebot)
SOURCEBOT_PUBLIC_KEY_PATH=${PWD}/public.pem
# CONFIG_PATH=${PWD}/config.json # Path to the sourcebot config file (if one exists)
CONFIG_PATH=${PWD}/config.json # Path to the sourcebot config file (if one exists)
# Email
# EMAIL_FROM_ADDRESS="" # The from address for transactional emails.
@ -33,7 +29,6 @@ SOURCEBOT_PUBLIC_KEY_PATH=${PWD}/public.pem
# PostHog
# POSTHOG_PAPIK=""
# NEXT_PUBLIC_POSTHOG_PAPIK=""
# Sentry
# SENTRY_BACKEND_DSN=""
@ -82,14 +77,11 @@ SOURCEBOT_TELEMETRY_DISABLED=true # Disables telemetry collection
# Controls the number of concurrent indexing jobs that can run at once
# INDEX_CONCURRENCY_MULTIPLE=
# Controls the polling interval for the web app
# NEXT_PUBLIC_POLLING_INTERVAL_MS=
# Controls the version of the web app
# NEXT_PUBLIC_SOURCEBOT_VERSION=
# CONFIG_MAX_REPOS_NO_TOKEN=
# NODE_ENV=
NODE_ENV=development
# SOURCEBOT_TENANCY_MODE=single
# NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT=

View file

@ -1,4 +1,4 @@
contact_links:
- name: 👾 Discord
url: https://discord.gg/f4Cbf3HT
url: https://discord.gg/HDScTs3ptP
about: Something else? Join the Discord!

View file

@ -55,7 +55,6 @@ jobs:
${{ env.IMAGE_PATH }}:latest
build-args: |
NEXT_PUBLIC_SOURCEBOT_VERSION=${{ github.ref_name }}
NEXT_PUBLIC_POSTHOG_PAPIK=${{ vars.NEXT_PUBLIC_POSTHOG_PAPIK }}
NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT=${{ vars.NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT }}
NEXT_PUBLIC_SENTRY_ENVIRONMENT=${{ vars.NEXT_PUBLIC_SENTRY_ENVIRONMENT }}
NEXT_PUBLIC_SENTRY_WEBAPP_DSN=${{ vars.NEXT_PUBLIC_SENTRY_WEBAPP_DSN }}

View file

@ -2,7 +2,7 @@ name: Deploy Demo
on:
push:
branches: ["main"]
tags: ["v*.*.*"]
workflow_dispatch:
jobs:

View file

@ -27,9 +27,9 @@ jobs:
platform: [linux/amd64, linux/arm64]
include:
- platform: linux/amd64
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2404
- platform: linux/arm64
runs-on: ubuntu-24.04-arm
runs-on: blacksmith-8vcpu-ubuntu-2204-arm
steps:
- name: Prepare
@ -57,8 +57,8 @@ jobs:
with:
cosign-release: "v2.2.4"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Setup Blacksmith Builder
uses: useblacksmith/setup-docker-builder@v1
- name: Login to GitHub Packages Docker Registry
uses: docker/login-action@v3
@ -69,17 +69,14 @@ jobs:
- name: Build Docker image
id: build
uses: docker/build-push-action@v6
uses: useblacksmith/build-push-action@v2
with:
context: .
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
platforms: ${{ matrix.platform }}
outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true,annotation.org.opencontainers.image.description=Blazingly fast code search
build-args: |
NEXT_PUBLIC_SOURCEBOT_VERSION=${{ github.ref_name }}
NEXT_PUBLIC_POSTHOG_PAPIK=${{ vars.NEXT_PUBLIC_POSTHOG_PAPIK }}
- name: Export digest
run: |
@ -110,7 +107,7 @@ jobs:
run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}
merge:
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2404
permissions:
packages: write
needs:
@ -123,8 +120,8 @@ jobs:
pattern: digests-*
merge-multiple: true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Setup Blacksmith Builder
uses: useblacksmith/setup-docker-builder@v1
- name: Extract Docker metadata
id: meta

View file

@ -8,7 +8,7 @@ on:
jobs:
build:
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2404
permissions:
contents: read
steps:
@ -19,6 +19,6 @@ jobs:
- name: Build Docker image
id: build
uses: docker/build-push-action@v6
uses: useblacksmith/build-push-action@v2
with:
context: .

View file

@ -7,7 +7,7 @@ on:
jobs:
build:
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2404
permissions:
contents: read
steps:

View file

@ -7,7 +7,7 @@ on:
jobs:
build:
runs-on: ubuntu-latest
runs-on: blacksmith-4vcpu-ubuntu-2404
permissions:
contents: read
steps:

View file

@ -1,8 +1,9 @@
name: Update Roadmap Released
on:
pull_request:
types: [closed]
push:
branches:
- main
workflow_dispatch:
schedule:
- cron: "0 */6 * * *"

View file

@ -5,6 +5,9 @@
},
{
"path": "../vendor/zoekt"
},
{
"path": "../../sourcebot-helm-chart"
}
],
"settings": {

View file

@ -7,6 +7,143 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Fixed
- Fixed review agent so that it works with GHES instances [#611](https://github.com/sourcebot-dev/sourcebot/pull/611)
### Added
- Added support for arbitrary user IDs required for OpenShift. [#658](https://github.com/sourcebot-dev/sourcebot/pull/658)
### Updated
- Improved error messages in file source api. [#665](https://github.com/sourcebot-dev/sourcebot/pull/665)
## [4.10.2] - 2025-12-04
### Fixed
- Fixed issue where the disable telemetry flag was not being respected for web server telemetry. [#657](https://github.com/sourcebot-dev/sourcebot/pull/657)
## [4.10.1] - 2025-12-03
### Added
- Added `ALWAYS_INDEX_FILE_PATTERNS` environment variable to allow specifying a comma seperated list of glob patterns matching file paths that should always be indexed, regardless of size or # of trigrams. [#631](https://github.com/sourcebot-dev/sourcebot/pull/631)
- Added button to explore menu to toggle cross-repository search. [#647](https://github.com/sourcebot-dev/sourcebot/pull/647)
- Added server side telemetry for search metrics. [#652](https://github.com/sourcebot-dev/sourcebot/pull/652)
### Fixed
- Fixed issue where single quotes could not be used in search queries. [#629](https://github.com/sourcebot-dev/sourcebot/pull/629)
- Fixed issue where files with special characters would fail to load. [#636](https://github.com/sourcebot-dev/sourcebot/issues/636)
- Fixed Ask performance issues. [#632](https://github.com/sourcebot-dev/sourcebot/pull/632)
- Fixed regression where creating a new Ask thread when unauthenticated would result in a 404. [#641](https://github.com/sourcebot-dev/sourcebot/pull/641)
- Updated react and next package versions to fix CVE 2025-55182. [#654](https://github.com/sourcebot-dev/sourcebot/pull/654)
### Changed
- Changed the default behaviour for code nav to scope references & definitions search to the current repository. [#647](https://github.com/sourcebot-dev/sourcebot/pull/647)
## [4.10.0] - 2025-11-24
### Added
- Added support for streaming code search results. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Added buttons to toggle case sensitivity and regex patterns. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Added counts to members, requets, and invites tabs in the members settings. [#621](https://github.com/sourcebot-dev/sourcebot/pull/621)
- [Sourcebot EE] Add support for Authentik as a identity provider. [#627](https://github.com/sourcebot-dev/sourcebot/pull/627)
### Changed
- Changed the default search behaviour to match patterns as substrings and **not** regular expressions. Regular expressions can be used by toggling the regex button in search bar. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Renamed `public` query prefix to `visibility`. Allowed values for `visibility` are `public`, `private`, and `any`. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Changed `archived` query prefix to accept values `yes`, `no`, and `only`. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
### Removed
- Removed `case` query prefix. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Removed `branch` and `b` query prefixes. Please use `rev:` instead. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Removed `regex` query prefix. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
### Fixed
- Fixed spurious infinite loads with explore panel, file tree, and file search command. [#617](https://github.com/sourcebot-dev/sourcebot/pull/617)
- Wipe search context on init if entitlement no longer exists [#618](https://github.com/sourcebot-dev/sourcebot/pull/618)
- Fixed Bitbucket repository exclusions not supporting glob patterns. [#620](https://github.com/sourcebot-dev/sourcebot/pull/620)
- Fixed issue where the repo driven permission syncer was attempting to sync public repositories. [#624](https://github.com/sourcebot-dev/sourcebot/pull/624)
- Fixed issue where worker would not shutdown while a permission sync job (repo or user) was in progress. [#624](https://github.com/sourcebot-dev/sourcebot/pull/624)
## [4.9.2] - 2025-11-13
### Changed
- Bumped the default requested search result count from 5k to 10k after optimization pass. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
### Fixed
- Fixed incorrect shutdown of PostHog SDK in the worker. [#609](https://github.com/sourcebot-dev/sourcebot/pull/609)
- Fixed race condition in job schedulers. [#607](https://github.com/sourcebot-dev/sourcebot/pull/607)
- Fixed connection sync jobs getting stuck in pending or in progress after restarting the worker. [#612](https://github.com/sourcebot-dev/sourcebot/pull/612)
- Fixed issue where connections would always sync on startup, regardless if they changed or not. [#613](https://github.com/sourcebot-dev/sourcebot/pull/613)
- Fixed performance bottleneck in search api. Result is a order of magnitutde improvement to average search time according to benchmarks. [#615](https://github.com/sourcebot-dev/sourcebot/pull/615)
### Added
- Added force resync buttons for connections and repositories. [#610](https://github.com/sourcebot-dev/sourcebot/pull/610)
- Added environment variable to configure default search result count. [#616](https://github.com/sourcebot-dev/sourcebot/pull/616)
## [4.9.1] - 2025-11-07
### Added
- Added support for running Sourcebot as non-root user. [#599](https://github.com/sourcebot-dev/sourcebot/pull/599)
## [4.9.0] - 2025-11-04
### Added
- [Experimental][Sourcebot EE] Added GitLab permission syncing. [#585](https://github.com/sourcebot-dev/sourcebot/pull/585)
- [Sourcebot EE] Added external identity provider config and support for multiple accounts. [#595](https://github.com/sourcebot-dev/sourcebot/pull/595)
- Added ability to configure environment variables from the config. [#597](https://github.com/sourcebot-dev/sourcebot/pull/597)
### Fixed
- [ask sb] Fixed issue where reasoning tokens would appear in `text` content for openai compatible models. [#582](https://github.com/sourcebot-dev/sourcebot/pull/582)
- Fixed issue with GitHub app token tracking and refreshing. [#583](https://github.com/sourcebot-dev/sourcebot/pull/583)
- Fixed "The account is already associated with another user" errors with GitLab oauth provider. [#584](https://github.com/sourcebot-dev/sourcebot/pull/584)
- Fixed error when viewing a generic git connection in `/settings/connections`. [#588](https://github.com/sourcebot-dev/sourcebot/pull/588)
- Fixed issue with an unbounded `Promise.allSettled(...)` when retrieving details from the GitHub API about a large number of repositories (or orgs or users). [#591](https://github.com/sourcebot-dev/sourcebot/pull/591)
- Fixed resource exhaustion (EAGAIN errors) when syncing generic-git-host connections with thousands of repositories. [#593](https://github.com/sourcebot-dev/sourcebot/pull/593)
### Removed
- Removed built-in secret manager. [#592](https://github.com/sourcebot-dev/sourcebot/pull/592)
### Changed
- Changed internal representation of how repo permissions are represented in the database. [#600](https://github.com/sourcebot-dev/sourcebot/pull/600)
## [4.8.1] - 2025-10-29
### Fixed
- Fixed commit and branch hyperlinks not rendering for Gerrit repos. [#581](https://github.com/sourcebot-dev/sourcebot/pull/581)
- Fixed visual bug when a repository does not have a image. [#581](https://github.com/sourcebot-dev/sourcebot/pull/581)
- Fixed issue where the Ask homepage was not scrollable. [#581](https://github.com/sourcebot-dev/sourcebot/pull/581)
## [4.8.0] - 2025-10-28
### Added
- Implement dynamic tab titles for files and folders in browse tab. [#560](https://github.com/sourcebot-dev/sourcebot/pull/560)
- Added support for passing db connection url as seperate `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` env vars. [#545](https://github.com/sourcebot-dev/sourcebot/pull/545)
- Added support for GitHub Apps for service auth. [#570](https://github.com/sourcebot-dev/sourcebot/pull/570)
- Added prometheus metrics for repo index manager. [#571](https://github.com/sourcebot-dev/sourcebot/pull/571)
- Added experimental environment variable to disable API key creation for non-admin users. [#577](https://github.com/sourcebot-dev/sourcebot/pull/577)
- [Experimental][Sourcebot EE] Added REST API to get users and delete a user. [#578](https://github.com/sourcebot-dev/sourcebot/pull/578)
### Fixed
- Fixed "dubious ownership" errors when cloning / fetching repos. [#553](https://github.com/sourcebot-dev/sourcebot/pull/553)
- Fixed issue with Ask Sourcebot tutorial re-appearing after restarting the browser. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
- Fixed `repoIndexTimeoutMs` not being used for index job timeouts. [#567](https://github.com/sourcebot-dev/sourcebot/pull/567)
### Changed
- Improved search performance for unbounded search queries. [#555](https://github.com/sourcebot-dev/sourcebot/pull/555)
- Improved homepage performance by removing client side polling. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
- Changed navbar indexing indicator to only report progress for first time indexing jobs. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
- Improved repo indexing job stability and robustness. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
- Improved repositories table. [#572](https://github.com/sourcebot-dev/sourcebot/pull/572)
- Improved connections table. [#579](https://github.com/sourcebot-dev/sourcebot/pull/579)
### Removed
- Removed spam "login page loaded" log. [#552](https://github.com/sourcebot-dev/sourcebot/pull/552)
- Removed connections management page. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
## [4.7.3] - 2025-09-29
### Fixed
- Manually pass auth token for ado server deployments. [#543](https://github.com/sourcebot-dev/sourcebot/pull/543)
## [4.7.2] - 2025-09-22
### Fixed

View file

@ -36,15 +36,20 @@
docker compose -f docker-compose-dev.yml up -d
```
6. Create a copy of `.env.development` and name it `.env.development.local`. Update the required environment variables.
6. Generate the database schema.
```sh
yarn dev:prisma:migrate:dev
```
7. If you're using a declarative configuration file, create a configuration file and update the `CONFIG_PATH` environment variable in your `.env.development.local` file.
7. Create a copy of `.env.development` and name it `.env.development.local`. Update the required environment variables.
8. Start Sourcebot with the command:
8. If you're using a declarative configuration file, create a configuration file and update the `CONFIG_PATH` environment variable in your `.env.development.local` file.
9. Start Sourcebot with the command:
```sh
yarn dev
```
A `.sourcebot` directory will be created and zoekt will begin to index the repositories found in the `config.json` file.
9. Start searching at `http://localhost:3000`.
10. Start searching at `http://localhost:3000`.

View file

@ -1,3 +1,4 @@
# syntax=docker/dockerfile:1
# ------ Global scope variables ------
# Set of global build arguments.
@ -8,11 +9,6 @@
# @see: https://docs.docker.com/build/building/variables/#scoping
ARG NEXT_PUBLIC_SOURCEBOT_VERSION
# PAPIK = Project API Key
# Note that this key does not need to be kept secret, so it's not
# necessary to use Docker build secrets here.
# @see: https://posthog.com/tutorials/api-capture-events#authenticating-with-the-project-api-key
ARG NEXT_PUBLIC_POSTHOG_PAPIK
ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT
ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT
ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN
@ -42,17 +38,13 @@ COPY package.json yarn.lock* .yarnrc.yml ./
COPY .yarn ./.yarn
COPY ./packages/db ./packages/db
COPY ./packages/schemas ./packages/schemas
COPY ./packages/crypto ./packages/crypto
COPY ./packages/error ./packages/error
COPY ./packages/logger ./packages/logger
COPY ./packages/shared ./packages/shared
COPY ./packages/queryLanguage ./packages/queryLanguage
RUN yarn workspace @sourcebot/db install
RUN yarn workspace @sourcebot/schemas install
RUN yarn workspace @sourcebot/crypto install
RUN yarn workspace @sourcebot/error install
RUN yarn workspace @sourcebot/logger install
RUN yarn workspace @sourcebot/shared install
RUN yarn workspace @sourcebot/query-language install
# ------------------------------------
# ------ Build Web ------
@ -61,8 +53,6 @@ ENV SKIP_ENV_VALIDATION=1
# -----------
ARG NEXT_PUBLIC_SOURCEBOT_VERSION
ENV NEXT_PUBLIC_SOURCEBOT_VERSION=$NEXT_PUBLIC_SOURCEBOT_VERSION
ARG NEXT_PUBLIC_POSTHOG_PAPIK
ENV NEXT_PUBLIC_POSTHOG_PAPIK=$NEXT_PUBLIC_POSTHOG_PAPIK
ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT
ENV NEXT_PUBLIC_SENTRY_ENVIRONMENT=$NEXT_PUBLIC_SENTRY_ENVIRONMENT
ARG NEXT_PUBLIC_SOURCEBOT_CLOUD_ENVIRONMENT
@ -97,10 +87,8 @@ COPY ./packages/web ./packages/web
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto
COPY --from=shared-libs-builder /app/packages/error ./packages/error
COPY --from=shared-libs-builder /app/packages/logger ./packages/logger
COPY --from=shared-libs-builder /app/packages/shared ./packages/shared
COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage
# Fixes arm64 timeouts
RUN yarn workspace @sourcebot/web install
@ -138,10 +126,8 @@ COPY ./packages/backend ./packages/backend
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto
COPY --from=shared-libs-builder /app/packages/error ./packages/error
COPY --from=shared-libs-builder /app/packages/logger ./packages/logger
COPY --from=shared-libs-builder /app/packages/shared ./packages/shared
COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage
RUN yarn workspace @sourcebot/backend install
RUN yarn workspace @sourcebot/backend build
@ -162,8 +148,6 @@ FROM node-alpine AS runner
# -----------
ARG NEXT_PUBLIC_SOURCEBOT_VERSION
ENV NEXT_PUBLIC_SOURCEBOT_VERSION=$NEXT_PUBLIC_SOURCEBOT_VERSION
ARG NEXT_PUBLIC_POSTHOG_PAPIK
ENV NEXT_PUBLIC_POSTHOG_PAPIK=$NEXT_PUBLIC_POSTHOG_PAPIK
ARG NEXT_PUBLIC_SENTRY_ENVIRONMENT
ENV NEXT_PUBLIC_SENTRY_ENVIRONMENT=$NEXT_PUBLIC_SENTRY_ENVIRONMENT
ARG NEXT_PUBLIC_SENTRY_WEBAPP_DSN
@ -185,10 +169,13 @@ ENV DATA_DIR=/data
ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot
ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db
ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis
ENV DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
ENV REDIS_URL="redis://localhost:6379"
ENV SRC_TENANT_ENFORCEMENT_MODE=strict
ENV SOURCEBOT_PUBLIC_KEY_PATH=/app/public.pem
# PAPIK = Project API Key
# Note that this key does not need to be kept secret, so it's not
# necessary to use Docker build secrets here.
# @see: https://posthog.com/tutorials/api-capture-events#authenticating-with-the-project-api-key
# @note: this is also declared in the shared env.server.ts file.
ENV POSTHOG_PAPIK=phc_lLPuFFi5LH6c94eFJcqvYVFwiJffVcV6HD8U4a1OnRW
# Valid values are: debug, info, warn, error
ENV SOURCEBOT_LOG_LEVEL=info
@ -196,6 +183,23 @@ ENV SOURCEBOT_LOG_LEVEL=info
# Sourcebot collects anonymous usage data using [PostHog](https://posthog.com/). Uncomment this line to disable.
# ENV SOURCEBOT_TELEMETRY_DISABLED=1
# Configure dependencies
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib openssl util-linux unzip
ARG UID=1500
ARG GID=1500
# Always create the non-root user to support runtime user switching
# The container can be run as root (default) or as sourcebot user using docker run --user
RUN addgroup -g $GID sourcebot && \
adduser -D -u $UID -h /app -S sourcebot && \
adduser sourcebot postgres && \
adduser sourcebot redis && \
chown -R sourcebot /app && \
adduser sourcebot node && \
mkdir /var/log/sourcebot && \
chown sourcebot /var/log/sourcebot
COPY package.json yarn.lock* .yarnrc.yml public.pem ./
COPY .yarn ./.yarn
@ -215,37 +219,48 @@ COPY --from=zoekt-builder \
/cmd/zoekt-index \
/usr/local/bin/
RUN chown -R sourcebot:sourcebot /app
# Copy zoekt proto files (needed for gRPC client at runtime)
COPY --chown=sourcebot:sourcebot vendor/zoekt/grpc/protos /app/vendor/zoekt/grpc/protos
# Copy all of the things
COPY --from=web-builder /app/packages/web/public ./packages/web/public
COPY --from=web-builder /app/packages/web/.next/standalone ./
COPY --from=web-builder /app/packages/web/.next/static ./packages/web/.next/static
COPY --chown=sourcebot:sourcebot --from=web-builder /app/packages/web/public ./packages/web/public
COPY --chown=sourcebot:sourcebot --from=web-builder /app/packages/web/.next/standalone ./
COPY --chown=sourcebot:sourcebot --from=web-builder /app/packages/web/.next/static ./packages/web/.next/static
COPY --from=backend-builder /app/node_modules ./node_modules
COPY --from=backend-builder /app/packages/backend ./packages/backend
COPY --chown=sourcebot:sourcebot --from=backend-builder /app/node_modules ./node_modules
COPY --chown=sourcebot:sourcebot --from=backend-builder /app/packages/backend ./packages/backend
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto
COPY --from=shared-libs-builder /app/packages/error ./packages/error
COPY --from=shared-libs-builder /app/packages/logger ./packages/logger
COPY --from=shared-libs-builder /app/packages/shared ./packages/shared
COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/db ./packages/db
COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/shared ./packages/shared
COPY --chown=sourcebot:sourcebot --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage
# Configure dependencies
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib openssl util-linux unzip
# Fixes git "dubious ownership" issues when the volume is mounted with different permissions to the container.
RUN git config --global safe.directory "*"
# Configure the database
RUN mkdir -p /run/postgresql && \
chown -R postgres:postgres /run/postgresql && \
chmod 775 /run/postgresql
# Make app directory accessible to both root and sourcebot user
RUN chown -R sourcebot /app \
&& chgrp -R 0 /app \
&& chmod -R g=u /app
# Make data directory accessible to both root and sourcebot user
RUN chown -R sourcebot /data
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY prefix-output.sh ./prefix-output.sh
RUN chmod +x ./prefix-output.sh
COPY entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh
COPY default-config.json .
# Note: for back-compat cases, we do _not_ set the USER directive here.
# Instead, the user can be overridden at runtime with --user flag.
# USER sourcebot
EXPOSE 3000
ENV PORT=3000

View file

@ -2,7 +2,7 @@ Copyright (c) 2025 Taqla Inc.
Portions of this software are licensed as follows:
- All content that resides under the "ee/", "packages/web/src/ee/", "packages/backend/src/ee/", and "packages/shared/src/ee/" directories of this repository, if these directories exist, is licensed under the license defined in "ee/LICENSE".
- All content located within any folder or subfolder named “ee” in this repository is licensed under the terms specified in “ee/LICENSE”,
- All third party components incorporated into the Sourcebot Software are licensed under the original license provided by the owner of the applicable component.
- Content outside of the above mentioned directories or restrictions above is available under the "Functional Source License" as defined below.

View file

@ -28,10 +28,6 @@ clean:
packages/db/dist \
packages/schemas/node_modules \
packages/schemas/dist \
packages/crypto/node_modules \
packages/crypto/dist \
packages/error/node_modules \
packages/error/dist \
packages/mcp/node_modules \
packages/mcp/dist \
packages/shared/node_modules \

View file

@ -72,15 +72,22 @@ https://github.com/user-attachments/assets/31ec0669-707d-4e03-b511-1bc33d44197a
# Deploy Sourcebot
Sourcebot can be deployed in seconds using our official docker image. Visit our [docs](https://docs.sourcebot.dev/docs/deployment-guide) for more information.
Sourcebot can be deployed in seconds using Docker Compose. Visit our [docs](https://docs.sourcebot.dev/docs/deployment/docker-compose) for more information.
1. Create a config
1. Download the docker-compose.yml file
```sh
curl -o docker-compose.yml https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/docker-compose.yml
```
2. In the same directory as the `docker-compose.yml` file, create a [configuration file](https://docs.sourcebot.dev/docs/configuration/config-file). The configuration file is a JSON file that configures Sourcebot's behaviour, including what repositories to index, language model providers, auth providers, and more.
```sh
touch config.json
echo '{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
// Comments are supported.
// This config creates a single connection to GitHub.com that
// indexes the Sourcebot repository
"connections": {
// Comments are supported
"starter-connection": {
"type": "github",
"repos": [
@ -91,41 +98,22 @@ echo '{
}' > config.json
```
2. Run the docker container
3. Update the secrets in the `docker-compose.yml` and then run Sourcebot using:
```sh
docker run \
-p 3000:3000 \
--pull=always \
--rm \
-v $(pwd):/data \
-e CONFIG_PATH=/data/config.json \
--name sourcebot \
ghcr.io/sourcebot-dev/sourcebot:latest
docker compose up
```
<details>
<summary>What does this command do?</summary>
- Pull and run the Sourcebot docker image from [ghcr.io/sourcebot-dev/sourcebot:latest](https://github.com/sourcebot-dev/sourcebot/pkgs/container/sourcebot).
- Mount the current directory (`-v $(pwd):/data`) to allow Sourcebot to persist the `.sourcebot` cache.
- Clones sourcebot at `HEAD` into `.sourcebot/github/sourcebot-dev/sourcebot`.
- Indexes sourcebot into a .zoekt index file in `.sourcebot/index/`.
- Map port 3000 between your machine and the docker image.
- Starts the web server on port 3000.
</details>
</br>
3. Visit `http://localhost:3000` to start using Sourcebot
4. Visit `http://localhost:3000` to start using Sourcebot
</br>
To configure Sourcebot (index your own repos, connect your LLMs, etc), check out our [docs](https://docs.sourcebot.dev/docs/configuration/config-file).
> [!NOTE]
> Sourcebot collects <a href="https://demo.sourcebot.dev/~/search?query=captureEvent%5C(%20repo%3Asourcebot">anonymous usage data</a> by default to help us improve the product. No sensitive data is collected, but if you'd like to disable this you can do so by setting the `SOURCEBOT_TELEMETRY_DISABLED` environment
> variable to `true`. Please refer to our [telemetry docs](https://docs.sourcebot.dev/self-hosting/overview#telemetry) for more information.
> variable to `true`. Please refer to our [telemetry docs](https://docs.sourcebot.dev/docs/overview#telemetry) for more information.
# Build from source
>[!NOTE]
> Building from source is only required if you'd like to contribute. If you'd just like to use Sourcebot, we recommend checking out our self-hosting [docs](https://docs.sourcebot.dev/self-hosting/overview).
If you'd like to build from source, please checkout the `CONTRIBUTING.md` file for more information.

View file

@ -1,11 +0,0 @@
{
"$schema": "./schemas/v2/index.json",
"repos": [
{
"type": "github",
"repos": [
"sourcebot-dev/sourcebot"
]
}
]
}

View file

@ -1,243 +0,0 @@
// This is the config file for https://demo.sourcebot.dev.
// To add a new repository, edit this file and open a PR.
// After the PR is merged, the deploy demo workflow will
// run (see: https://github.com/sourcebot-dev/sourcebot/actions/workflows/deploy-demo.yml),
// after which the changes will be reflected on the demo site.
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"connections": {
// Defines the GitHub repositories.
// See: https://docs.sourcebot.dev/docs/connections/github
"github-repos": {
"type": "github",
"token": {
"env": "GITHUB_TOKEN"
},
"repos": [
"torvalds/linux",
"pytorch/pytorch",
"commaai/openpilot",
"ggerganov/whisper.cpp",
"ggerganov/llama.cpp",
"codemirror/dev",
"tailwindlabs/tailwindcss",
"sourcebot-dev/sourcebot",
"sindresorhus/awesome",
"facebook/react",
"vinta/awesome-python",
"vuejs/vue",
"TheAlgorithms/Python",
"tensorflow/tensorflow",
"twbs/bootstrap",
"flutter/flutter",
"microsoft/vscode",
"github/gitignore",
"airbnb/javascript",
"AUTOMATIC1111/stable-diffusion-webui",
"huggingface/transformers",
"avelino/awesome-go",
"ytdl-org/youtube-dl",
"vercel/next.js",
"golang/go",
"facebook/react-native",
"electron/electron",
"Genymobile/scrcpy",
"f/awesome-chatgpt-prompts",
"microsoft/PowerToys",
"kubernetes/kubernetes",
"d3/d3",
"nodejs/node",
"massgravel/Microsoft-Activation-Scripts",
"axios/axios",
"mrdoob/three.js",
"krahets/hello-algo",
"facebook/create-react-app",
"ollama/ollama",
"microsoft/TypeScript",
"goldbergyoni/nodebestpractices",
"rust-lang/rust",
"denoland/deno",
"angular/angular",
"langchain-ai/langchain",
"microsoft/terminal",
"521xueweihan/HelloGitHub",
"mui/material-ui",
"ant-design/ant-design",
"yt-dlp/yt-dlp",
"puppeteer/puppeteer",
"papers-we-love/papers-we-love",
"iptv-org/iptv",
"fatedier/frp",
"excalidraw/excalidraw",
"tauri-apps/tauri",
"neovim/neovim",
"django/django",
"florinpop17/app-ideas",
"animate-css/animate.css",
"nvm-sh/nvm",
"gothinkster/realworld",
"bitcoin/bitcoin",
"sveltejs/svelte",
"opencv/opencv",
"gin-gonic/gin",
"laravel/laravel",
"fastapi/fastapi",
"macrozheng/mall",
"jaywcjlove/awesome-mac",
"tonsky/FiraCode",
"rustdesk/rustdesk",
"tensorflow/models",
"doocs/advanced-java",
"shadcn-ui/ui",
"gohugoio/hugo",
"spring-projects/spring-boot",
"supabase/supabase",
"oven-sh/bun",
"FortAwesome/Font-Awesome",
"home-assistant/core",
"typicode/json-server",
"mermaid-js/mermaid",
"openai/whisper",
"netdata/netdata",
"vuejs/awesome-vue",
"3b1b/manim",
"2dust/v2rayN",
"nomic-ai/gpt4all",
"elastic/elasticsearch",
"fighting41love/funNLP",
"vitejs/vite",
"coder/code-server",
"moby/moby",
"CompVis/stable-diffusion",
"base-org/node",
"nestjs/nest",
"pallets/flask",
"hakimel/reveal.js",
"microsoft/playwright",
"swiftlang/swift",
"redis/redis",
"bregman-arie/devops-exercises",
"binary-husky/gpt_academic",
"junegunn/fzf",
"syncthing/syncthing",
"hoppscotch/hoppscotch",
"protocolbuffers/protobuf",
"enaqx/awesome-react",
"expressjs/express",
"microsoft/generative-ai-for-beginners",
"grafana/grafana",
"abi/screenshot-to-code",
"chartjs/Chart.js",
"webpack/webpack",
"d2l-ai/d2l-zh",
"strapi/strapi",
"python/cpython",
"leonardomso/33-js-concepts",
"kdn251/interviews",
"ventoy/Ventoy",
"ansible/ansible",
"apache/superset",
"tesseract-ocr/tesseract",
"lydiahallie/javascript-questions",
"FuelLabs/sway",
"keras-team/keras",
"resume/resume.github.com",
"swisskyrepo/PayloadsAllTheThings",
"ocornut/imgui",
"socketio/socket.io",
"awesomedata/awesome-public-datasets",
"louislam/uptime-kuma",
"kelseyhightower/nocode",
"sherlock-project/sherlock",
"reduxjs/redux",
"apache/echarts",
"obsproject/obs-studio",
"openai/openai-cookbook",
"fffaraz/awesome-cpp",
"scikit-learn/scikit-learn",
"TheAlgorithms/Java",
"atom/atom",
"Eugeny/tabby",
"lodash/lodash",
"caddyserver/caddy",
"sindresorhus/awesome-nodejs",
"rust-unofficial/awesome-rust",
"streamich/react-use",
"pocketbase/pocketbase",
"lllyasviel/Fooocus",
"k88hudson/git-flight-rules",
"react-hook-form/react-hook-form",
"koajs/koa",
"SheetJS/sheetjs",
"trpc/trpc",
"LC044/WeChatMsg",
"airbnb/lottie-android",
"huihut/interview",
"jgm/pandoc",
"google/googletest",
"date-fns/date-fns",
"nativefier/nativefier",
"openai/gym",
"files-community/Files",
"sahat/hackathon-starter",
"appsmithorg/appsmith",
"ultralytics/ultralytics",
"slidevjs/slidev",
"xitu/gold-miner",
"sorrycc/awesome-javascript",
"astral-sh/ruff",
"logseq/logseq",
"shadowsocks/shadowsocks",
"ccxt/ccxt",
"netty/netty",
"tw93/Pake",
"fxsjy/jieba",
"atlassian/react-beautiful-dnd",
"ToolJet/ToolJet",
"markedjs/marked",
"typicode/husky",
"laravel/framework",
"TheAlgorithms/JavaScript",
"bilibili/ijkplayer",
"solidjs/solid",
"fastify/fastify",
"huggingface/pytorch-image-models",
"shadowsocks/ShadowsocksX-NG",
"carbon-language/carbon-lang",
"s0md3v/roop",
"ascoders/weekly",
"backstage/backstage",
"servo/servo",
"composer/composer",
"tastejs/todomvc",
"lutzroeder/netron",
"alibaba/canal",
"tinygrad/tinygrad",
"ManimCommunity/manim",
"filebrowser/filebrowser",
"nicolargo/glances",
"iperov/DeepFaceLive",
"StevenBlack/hosts",
"crossoverJie/JCSprout",
"mantinedev/mantine",
"Automattic/mongoose",
"eslint/eslint",
"nextauthjs/next-auth",
"flameshot-org/flameshot",
"envoyproxy/envoy",
"sourcebot-dev/zoekt"
]
},
// Defines the GitLab repositories.
// See: https://docs.sourcebot.dev/docs/connections/gitlab
"gitlab-repos": {
"type": "gitlab",
"projects": [
"gnachman/iterm2"
]
}
},
"settings": {
"reindexIntervalMs": 86400000 // 24 hours
}
}

66
docker-compose.yml Normal file
View file

@ -0,0 +1,66 @@
services:
sourcebot:
image: ghcr.io/sourcebot-dev/sourcebot:latest
user: sourcebot
restart: always
container_name: sourcebot
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
ports:
- "3000:3000"
volumes:
- ./config.json:/data/config.json
- sourcebot_data:/data
environment:
- CONFIG_PATH=/data/config.json
- AUTH_URL=${AUTH_URL:-http://localhost:3000}
- AUTH_SECRET=${AUTH_SECRET:-000000000000000000000000000000000} # CHANGEME: generate via `openssl rand -base64 33`
- SOURCEBOT_ENCRYPTION_KEY=${SOURCEBOT_ENCRYPTION_KEY:-000000000000000000000000000000000} # CHANGEME: generate via `openssl rand -base64 24`
- DATABASE_URL=${DATABASE_URL:-postgresql://postgres:postgres@postgres:5432/postgres} # CHANGEME
- REDIS_URL=${REDIS_URL:-redis://redis:6379} # CHANGEME
- SOURCEBOT_EE_LICENSE_KEY=${SOURCEBOT_EE_LICENSE_KEY:-}
- SOURCEBOT_TELEMETRY_DISABLED=${SOURCEBOT_TELEMETRY_DISABLED:-false}
# For the full list of environment variables see:
# https://docs.sourcebot.dev/docs/configuration/environment-variables
postgres:
image: docker.io/postgres:${POSTGRES_VERSION:-latest}
restart: always
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 3s
timeout: 3s
retries: 10
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres # CHANGEME
POSTGRES_DB: postgres
ports:
- 127.0.0.1:5432:5432
volumes:
- sourcebot_postgres_data:/var/lib/postgresql/data
redis:
image: docker.io/redis:${REDIS_VERSION:-latest}
restart: always
ports:
- 127.0.0.1:6379:6379
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 3s
timeout: 10s
retries: 10
volumes:
- sourcebot_redis_data:/data
volumes:
sourcebot_data:
driver: local
sourcebot_postgres_data:
driver: local
sourcebot_redis_data:
driver: local

View file

@ -21,7 +21,13 @@
"group": "Getting Started",
"pages": [
"docs/overview",
"docs/deployment-guide"
{
"group": "Deployment",
"pages": [
"docs/deployment/docker-compose",
"docs/deployment/k8s"
]
}
]
},
{
@ -79,6 +85,7 @@
]
},
"docs/configuration/language-model-providers",
"docs/configuration/idp",
{
"group": "Authentication",
"pages": [
@ -110,6 +117,11 @@
"href": "https://sourcebot.dev/changelog",
"icon": "list-check"
},
{
"anchor": "Roadmap",
"href": "https://github.com/sourcebot-dev/sourcebot/issues/459",
"icon": "map"
},
{
"anchor": "Support",
"href": "https://github.com/sourcebot-dev/sourcebot/issues/new?template=get_help.md",
@ -132,7 +144,7 @@
"socials": {
"github": "https://github.com/sourcebot-dev/sourcebot",
"twitter": "https://x.com/sourcebot_dev",
"discord": "https://discord.gg/Y6b78RqM",
"discord": "https://discord.gg/HDScTs3ptP",
"linkedin": "https://www.linkedin.com/company/sourcebot"
}
},

View file

@ -10,7 +10,7 @@ Sourcebot's built-in authentication system gates your deployment, and allows adm
<Card horizontal title="Authentication providers" icon="lock" href="/docs/configuration/auth/providers">
Configure additional authentication providers for your deployment.
</Card>
<Card horizontal title="Inviting members" icon="user" href="/docs/configuration/auth/inviting-members">
<Card horizontal title="Access settings" icon="user" href="/docs/configuration/auth/access-settings">
Learn how to configure how members join your deployment.
</Card>
<Card horizontal title="Roles and permissions" icon="shield" href="/docs/configuration/auth/roles-and-permissions">
@ -25,4 +25,4 @@ Sourcebot's built-in authentication system gates your deployment, and allows adm
# Troubleshooting
- If you experience issues logging in, logging out, or accessing an organization you should have access to, try clearing your cookies & performing a full page refresh (`Cmd/Ctrl + Shift + R` on most browsers).
- Still not working? Reach out to us on our [discord](https://discord.com/invite/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose)
- Still not working? Reach out to us on our [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose)

View file

@ -26,80 +26,5 @@ See [transactional emails](/docs/configuration/transactional-emails) for more de
# Enterprise Authentication Providers
The following authentication providers require an [enterprise license](/docs/license-key) to be enabled.
### GitHub
---
[Auth.js GitHub Provider Docs](https://authjs.dev/getting-started/providers/github)
**Required environment variables:**
- `AUTH_EE_GITHUB_CLIENT_ID`
- `AUTH_EE_GITHUB_CLIENT_SECRET`
Optional environment variables:
- `AUTH_EE_GITHUB_BASE_URL` - Base URL for GitHub Enterprise (defaults to https://github.com)
### GitLab
---
[Auth.js GitLab Provider Docs](https://authjs.dev/getting-started/providers/gitlab)
**Required environment variables:**
- `AUTH_EE_GITLAB_CLIENT_ID`
- `AUTH_EE_GITLAB_CLIENT_SECRET`
Optional environment variables:
- `AUTH_EE_GITLAB_BASE_URL` - Base URL for GitLab instance (defaults to https://gitlab.com)
### Google
---
[Auth.js Google Provider Docs](https://authjs.dev/getting-started/providers/google)
**Required environment variables:**
- `AUTH_EE_GOOGLE_CLIENT_ID`
- `AUTH_EE_GOOGLE_CLIENT_SECRET`
### GCP IAP
---
<Note>If you're running Sourcebot in an environment that blocks egress, make sure you allow the [IAP IP ranges](https://www.gstatic.com/ipranges/goog.json)</Note>
Custom provider built to enable automatic Sourcebot account registration/login when using GCP IAP.
**Required environment variables**
- `AUTH_EE_GCP_IAP_ENABLED`
- `AUTH_EE_GCP_IAP_AUDIENCE`
- This can be found by selecting the ⋮ icon next to the IAP-enabled backend service and pressing `Get JWT audience code`
### Okta
---
[Auth.js Okta Provider Docs](https://authjs.dev/getting-started/providers/okta)
**Required environment variables:**
- `AUTH_EE_OKTA_CLIENT_ID`
- `AUTH_EE_OKTA_CLIENT_SECRET`
- `AUTH_EE_OKTA_ISSUER`
### Keycloak
---
[Auth.js Keycloak Provider Docs](https://authjs.dev/getting-started/providers/keycloak)
**Required environment variables:**
- `AUTH_EE_KEYCLOAK_CLIENT_ID`
- `AUTH_EE_KEYCLOAK_CLIENT_SECRET`
- `AUTH_EE_KEYCLOAK_ISSUER`
### Microsoft Entra ID
[Auth.js Microsoft Entra ID Provider Docs](https://authjs.dev/getting-started/providers/microsoft-entra-id)
**Required environment variables:**
- `AUTH_EE_MICROSOFT_ENTRA_ID_CLIENT_ID`
- `AUTH_EE_MICROSOFT_ENTRA_ID_CLIENT_SECRET`
- `AUTH_EE_MICROSOFT_ENTRA_ID_ISSUER`
---
Sourcebot supports authentication using several different [external identity providers](/docs/configuration/idp) as well. These identity providers require an
[enterprise license](/docs/license-key)

View file

@ -3,6 +3,9 @@ title: Config File
sidebarTitle: Config file
---
import ConfigSchema from '/snippets/schemas/v3/index.schema.mdx'
import EnvironmentOverridesSchema from '/snippets/schemas/v3/environmentOverrides.schema.mdx'
When self-hosting Sourcebot, you **must** provide it a config file. This is done by defining a config file in a volume that's mounted to Sourcebot, and providing the path to this
file in the `CONFIG_PATH` environment variable. For example:
@ -49,3 +52,103 @@ The following are settings that can be provided in your config file to modify So
| `enablePublicAccess` **(deprecated)** | boolean | false | — | Use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead. |
| `experiment_repoDrivenPermissionSyncIntervalMs` | number | 24hours | 1 | Interval at which the repo permission syncer should run. |
| `experiment_userDrivenPermissionSyncIntervalMs` | number | 24hours | 1 | Interval at which the user permission syncer should run. |
# Tokens
Tokens are used to securely pass secrets to Sourcebot in a config file. They are used in various places, including connections, language model providers, auth providers, etc. Tokens can be passed as either environment variables or Google Cloud secrets:
<AccordionGroup>
<Accordion title="Environment Variables">
```json
{
"token": {
"env": "TOKEN_NAME"
}
}
```
</Accordion>
<Accordion title="Google Cloud Secrets">
```json
{
"token": {
"googleCloudSecret": "projects/<project-id>/secrets/<secret-name>/versions/<version-id>"
}
}
```
</Accordion>
</AccordionGroup>
# Overriding environment variables from the config
You can override / set environment variables from the config file by using the `environmentOverrides` property. Overrides can be of type `string`, `number`, `boolean`, or a [token](/docs/configuration/config-file#tokens). Tokens are useful when you want to configure a environment variable using a Google Cloud Secret or other supported secret management service.
<AccordionGroup>
<Accordion title="Token">
```jsonc
{
"environmentOverrides": {
"DATABASE_URL": {
"type": "token",
"value": {
"googleCloudSecret": "projects/<id>/secrets/postgres-connection-string/versions/latest"
}
},
"REDIS_URL": {
"type": "token",
"value": {
"googleCloudSecret": "projects/<id>/secrets/redis-connection-string/versions/latest"
}
}
},
}
```
</Accordion>
<Accordion title="String">
```jsonc
{
"environmentOverrides": {
"EMAIL_FROM_ADDRESS": {
"type": "string",
"value": "hello@sourcebot.dev"
}
}
}
```
</Accordion>
<Accordion title="Number">
```jsonc
{
"environmentOverrides": {
"SOURCEBOT_CHAT_MODEL_TEMPERATURE": {
"type": "number",
"value": 0.5
}
}
}
```
</Accordion>
<Accordion title="Boolean">
```jsonc
{
"environmentOverrides": {
"SOURCEBOT_TELEMETRY_DISABLED": {
"type": "boolean",
"value": false
}
}
}
```
</Accordion>
</AccordionGroup>
**Note:** Overrides are **not** set as system environment variables, and instead are resolved at runtime on startup and stored in memory.
<Accordion title="Schema reference">
[schemas/v3/environmentOverrides.json](https://github.com/sourcebot-dev/sourcebot/blob/main/schemas/v3/environmentOverrides.json)
<EnvironmentOverridesSchema />
</Accordion>

View file

@ -1,10 +1,9 @@
---
title: Environment variables
sidebarTitle: Environment variables
mode: "wide"
---
<Note>This page provides a detailed reference of all environment variables supported by Sourcebot. If you're just looking to get up and running, we recommend starting with the [deployment guide](/docs/deployment-guide) instead.</Note>
<Note>This page provides a detailed reference of all environment variables supported by Sourcebot. If you're just looking to get up and running, we recommend starting with the [deployment guides](/docs/deployment/docker-compose) instead.</Note>
### Core Environment Variables
The following environment variables allow you to configure your Sourcebot deployment.
@ -19,7 +18,7 @@ The following environment variables allow you to configure your Sourcebot deploy
| `DATA_CACHE_DIR` | `$DATA_DIR/.sourcebot` | <p>The root data directory in which all data written to disk by Sourcebot will be located.</p> |
| `DATA_DIR` | `/data` | <p>The directory within the container to store all persistent data. Typically, this directory will be volume mapped such that data is persisted across container restarts (e.g., `docker run -v $(pwd):/data`)</p> |
| `DATABASE_DATA_DIR` | `$DATA_CACHE_DIR/db` | <p>The data directory for the default Postgres database.</p> |
| `DATABASE_URL` | `postgresql://postgres@ localhost:5432/sourcebot` | <p>Connection string of your Postgres database. By default, a Postgres database is automatically provisioned at startup within the container.</p><p>If you'd like to use a non-default schema, you can provide it as a parameter in the database url </p> |
| `DATABASE_URL` | `postgresql://postgres@ localhost:5432/sourcebot` | <p>Connection string of your Postgres database. By default, a Postgres database is automatically provisioned at startup within the container.</p><p>If you'd like to use a non-default schema, you can provide it as a parameter in the database url.</p><p>You can also use `DATABASE_HOST`, `DATABASE_USERNAME`, `DATABASE_PASSWORD`, `DATABASE_NAME`, and `DATABASE_ARGS` to construct the database url.</p> |
| `EMAIL_FROM_ADDRESS` | `-` | <p>The email address that transactional emails will be sent from. See [this doc](/docs/configuration/transactional-emails) for more info.</p> |
| `FORCE_ENABLE_ANONYMOUS_ACCESS` | `false` | <p>When enabled, [anonymous access](/docs/configuration/auth/access-settings#anonymous-access) to the organization will always be enabled</p>
| `REDIS_DATA_DIR` | `$DATA_CACHE_DIR/redis` | <p>The data directory for the default Redis instance.</p> |
@ -28,7 +27,6 @@ The following environment variables allow you to configure your Sourcebot deploy
| `REDIS_REMOVE_ON_FAIL` | `100` | <p>Controls how many failed jobs are allowed to remain in Redis queues</p> |
| `REPO_SYNC_RETRY_BASE_SLEEP_SECONDS` | `60` | <p>The base sleep duration (in seconds) for exponential backoff when retrying repository sync operations that fail</p> |
| `GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS` | `600` | <p>The timeout duration (in seconds) for GitLab client queries</p> |
| `SHARD_MAX_MATCH_COUNT` | `10000` | <p>The maximum shard count per query</p> |
| `SMTP_CONNECTION_URL` | `-` | <p>The url to the SMTP service used for sending transactional emails. See [this doc](/docs/configuration/transactional-emails) for more info.</p> |
| `SOURCEBOT_ENCRYPTION_KEY` | Automatically generated at startup if no value is provided. Generated using `openssl rand -base64 24` | <p>Used to encrypt connection secrets and generate API keys.</p> |
| `SOURCEBOT_PUBLIC_KEY_PATH` | `/app/public.pem` | <p>Sourcebot's public key that's used to verify encrypted license key signatures.</p> |
@ -36,8 +34,8 @@ The following environment variables allow you to configure your Sourcebot deploy
| `SOURCEBOT_STRUCTURED_LOGGING_ENABLED` | `false` | <p>Enables/disable structured JSON logging. See [this doc](/docs/configuration/structured-logging) for more info.</p> |
| `SOURCEBOT_STRUCTURED_LOGGING_FILE` | - | <p>Optional file to log to if structured logging is enabled</p> |
| `SOURCEBOT_TELEMETRY_DISABLED` | `false` | <p>Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/overview.mdx#telemetry) for more info.</p> |
| `TOTAL_MAX_MATCH_COUNT` | `100000` | <p>The maximum number of matches per query</p> |
| `ZOEKT_MAX_WALL_TIME_MS` | `10000` | <p>The maximum real world duration (in milliseconds) per zoekt query</p> |
| `DEFAULT_MAX_MATCH_COUNT` | `10000` | <p>The default maximum number of search results to return when using search in the web app.</p> |
| `ALWAYS_INDEX_FILE_PATTERNS` | - | <p>A comma separated list of glob patterns matching file paths that should always be indexed, regardless of size or number of trigrams.</p> |
### Enterprise Environment Variables
| Variable | Default | Description |
@ -65,12 +63,15 @@ The following environment variables allow you to configure your Sourcebot deploy
### Review Agent Environment Variables
| Variable | Default | Description |
| :------- | :------ | :---------- |
| `GITHUB_APP_ID` | `-` | <p>The GitHub App ID used for review agent authentication.</p> |
| `GITHUB_APP_PRIVATE_KEY_PATH` | `-` | <p>The container relative path to the private key file for the GitHub App used by the review agent.</p> |
| `GITHUB_APP_WEBHOOK_SECRET` | `-` | <p>The webhook secret for the GitHub App used by the review agent.</p> |
| `GITHUB_REVIEW_AGENT_APP_ID` | `-` | <p>The GitHub App ID used for review agent authentication.</p> |
| `GITHUB_REVIEW_AGENT_APP_PRIVATE_KEY_PATH` | `-` | <p>The container relative path to the private key file for the GitHub App used by the review agent.</p> |
| `GITHUB_REVIEW_AGENT_APP_WEBHOOK_SECRET` | `-` | <p>The webhook secret for the GitHub App used by the review agent.</p> |
| `OPENAI_API_KEY` | `-` | <p>The OpenAI API key used by the review agent.</p> |
| `REVIEW_AGENT_API_KEY` | `-` | <p>The Sourcebot API key used by the review agent.</p> |
| `REVIEW_AGENT_AUTO_REVIEW_ENABLED` | `false` | <p>Enables/disables automatic code reviews by the review agent.</p> |
| `REVIEW_AGENT_LOGGING_ENABLED` | `true` | <p>Enables/disables logging for the review agent. Logs are saved in `DATA_CACHE_DIR/review-agent`</p> |
| `REVIEW_AGENT_REVIEW_COMMAND` | `review` | <p>The command used to trigger a code review by the review agent.</p> |
### Overriding environment variables from the config
You can override environment variables from the config file by using the `environmentOverrides` property. See [this doc](/docs/configuration/config-file#overriding-environment-variables-from-the-config) for more info.

View file

@ -0,0 +1,418 @@
---
title: External Identity Providers
sidebarTitle: External identity providers
---
import LicenseKeyRequired from '/snippets/license-key-required.mdx'
<LicenseKeyRequired />
You can connect Sourcebot to various **external identity providers** to associate a Sourcebot user with one or more external service accounts (ex. Google, GitHub, etc).
External identity providers can be used for [authentication](/docs/configuration/auth) and/or [permission syncing](/docs/features/permission-syncing). They're defined in the
[config file](/docs/configuration/config-file) in the top-level `identityProviders` object:
```json wrap icon="code" Example config with both google and github identity providers defined
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "github",
"purpose": "account_linking",
"accountLinkingRequired": true,
"clientId": {
"env": "GITHUB_IDENTITY_PROVIDER_CLIENT_ID"
},
"clientSecret": {
"env": "GITHUB_IDENTITY_PROVIDER_CLIENT_SECRET"
}
},
{
"provider": "google",
"clientId": {
"env": "GOOGLE_IDENTITY_PROVIDER_CLIENT_ID"
},
"clientSecret": {
"env": "GOOGLE_IDENTITY_PROVIDER_CLIENT_SECRET"
}
}
]
}
```
Secret values (such as `clientId` and `clientSecret`) can be provided as environment variables or Google Cloud secrets via [tokens](/docs/configuration/config-file#tokens).
# Supported External Identity Providers
Sourcebot uses [Auth.js](https://authjs.dev/) to connect to external identity providers. If there's a provider supported by Auth.js that you don't see below, please submit a
[feature request](https://github.com/sourcebot-dev/sourcebot/issues) to have it added.
### GitHub
[Auth.js GitHub Provider Docs](https://authjs.dev/getting-started/providers/github)
A GitHub connection can be used for either [authentication](/docs/configuration/auth) or [permission syncing](/docs/features/permission-syncing). This is controlled using the `purpose` field
in the GitHub identity provider config.
<Accordion title="instructions">
<Steps>
<Step title="Register an Oauth Client">
To begin, you must register an Oauth client in GitHub to faciliate the identity provider connection. You can do this by creating a **GitHub App** or a **GitHub OAuth App**. Either
one works, but the **GitHub App** is the [recommended mechanism](https://docs.github.com/en/apps/oauth-apps/building-oauth-apps/differences-between-github-apps-and-oauth-apps).
The result of registering an OAuth client is a `CLIENT_ID` and `CLIENT_SECRET` which you'll provide to Sourcebot.
<Tabs>
<Tab title="GitHub App">
<Note>You don't need to install the app to use it as an external identity provider</Note>
Follow [this guide](https://docs.github.com/en/apps/creating-github-apps/registering-a-github-app/registering-a-github-app) to register a new GitHub App.
When asked to provide a callback url, provide `<sourcebot_url>/api/auth/callback/github` (ex. https://sourcebot.coolcorp.com/api/auth/callback/github)
Set the following fine-grained permissions in the GitHub App:
- `“Email addresses” account permissions (read)`
- `"Metadata" repository permissions (read)` (only needed if using permission syncing)
</Tab>
<Tab title="GitHub OAuth App">
Follow [this guide](https://docs.github.com/en/apps/oauth-apps/building-oauth-apps/creating-an-oauth-app) by GitHub to create an OAuth App.
When asked to provide a callback url, provide `<sourcebot_url>/api/auth/callback/github` (ex. https://sourcebot.coolcorp.com/api/auth/callback/github)
</Tab>
</Tabs>
</Step>
<Step title="Define environemnt variables">
To provide Sourcebot the client id and secret for your OAuth client you must set them as environment variables. These can be named whatever you like
(ex. `GITHUB_IDENTITY_PROVIDER_CLIENT_ID` and `GITHUB_IDENTITY_PROVIDER_CLIENT_SECRET`)
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id and secret to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "github",
// "sso" for auth + perm sync, "account_linking" for only perm sync
"purpose": "account_linking",
// if purpose == "account_linking" this controls if a user must connect to the IdP
"accountLinkingRequired": true,
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
}
}
]
}
```
</Step>
</Steps>
</Accordion>
### GitLab
[Auth.js GitLab Provider Docs](https://authjs.dev/getting-started/providers/gitlab)
A GitLab connection can be used for either [authentication](/docs/configuration/auth) or [permission syncing](/docs/features/permission-syncing). This is controlled using the `purpose` field
in the GitLab identity provider config.
<Accordion title="instructions">
<Steps>
<Step title="Register an OAuth Application">
To begin, you must register an OAuth application in GitLab to facilitate the identity provider connection.
Follow [this guide](https://docs.gitlab.com/integration/oauth_provider/) by GitLab to create an OAuth application.
When configuring your application:
- Set the callback URL to `<sourcebot_url>/api/auth/callback/gitlab` (ex. https://sourcebot.coolcorp.com/api/auth/callback/gitlab)
- Enable the `read_user` scope
- If using for permission syncing, also enable the `read_api` scope
The result of registering an OAuth application is an `APPLICATION_ID` (`CLIENT_ID`) and `SECRET` (`CLIENT_SECRET`) which you'll provide to Sourcebot.
</Step>
<Step title="Define environment variables">
To provide Sourcebot the client id and secret for your OAuth application you must set them as environment variables. These can be named whatever you like
(ex. `GITLAB_IDENTITY_PROVIDER_CLIENT_ID` and `GITLAB_IDENTITY_PROVIDER_CLIENT_SECRET`)
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id and secret to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "gitlab",
// "sso" for auth + perm sync, "account_linking" for only perm sync
"purpose": "account_linking",
// if purpose == "account_linking" this controls if a user must connect to the IdP
"accountLinkingRequired": true,
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
},
// Optional: for self-hosted GitLab instances
"baseUrl": "https://gitlab.example.com"
}
]
}
```
</Step>
</Steps>
</Accordion>
### Google
[Auth.js Google Provider Docs](https://authjs.dev/getting-started/providers/google)
A Google connection can be used for [authentication](/docs/configuration/auth).
<Accordion title="instructions">
<Steps>
<Step title="Register an OAuth Client">
To begin, you must register an OAuth client in Google Cloud Console to facilitate the identity provider connection.
Follow [this guide](https://support.google.com/cloud/answer/6158849) by Google to create OAuth 2.0 credentials.
When configuring your OAuth client:
- Set the application type to "Web application"
- Add `<sourcebot_url>/api/auth/callback/google` to the authorized redirect URIs (ex. https://sourcebot.coolcorp.com/api/auth/callback/google)
The result of creating OAuth credentials is a `CLIENT_ID` and `CLIENT_SECRET` which you'll provide to Sourcebot.
</Step>
<Step title="Define environment variables">
To provide Sourcebot the client id and secret for your OAuth client you must set them as environment variables. These can be named whatever you like
(ex. `GOOGLE_IDENTITY_PROVIDER_CLIENT_ID` and `GOOGLE_IDENTITY_PROVIDER_CLIENT_SECRET`)
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id and secret to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "google",
"purpose": "sso",
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
}
}
]
}
```
</Step>
</Steps>
</Accordion>
### Okta
[Auth.js Okta Provider Docs](https://authjs.dev/getting-started/providers/okta)
An Okta connection can be used for [authentication](/docs/configuration/auth).
<Accordion title="instructions">
<Steps>
<Step title="Register an OAuth Application">
To begin, you must register an OAuth application in Okta to facilitate the identity provider connection.
Follow [this guide](https://developer.okta.com/docs/guides/implement-oauth-for-okta/main/) by Okta to create an OAuth application.
When configuring your application:
- Set the application type to "Web Application"
- Add `<sourcebot_url>/api/auth/callback/okta` to the sign-in redirect URIs (ex. https://sourcebot.coolcorp.com/api/auth/callback/okta)
The result of creating an OAuth application is a `CLIENT_ID`, `CLIENT_SECRET`, and `ISSUER` URL which you'll provide to Sourcebot.
</Step>
<Step title="Define environment variables">
To provide Sourcebot the client id, client secret, and issuer for your OAuth application you must set them as environment variables. These can be named whatever you like
(ex. `OKTA_IDENTITY_PROVIDER_CLIENT_ID`, `OKTA_IDENTITY_PROVIDER_CLIENT_SECRET`, and `OKTA_IDENTITY_PROVIDER_ISSUER`)
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id, client secret, and issuer to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "okta",
"purpose": "sso",
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
},
"issuer": {
"env": "YOUR_ISSUER_ENV_VAR"
}
}
]
}
```
</Step>
</Steps>
</Accordion>
### Keycloak
[Auth.js Keycloak Provider Docs](https://authjs.dev/getting-started/providers/keycloak)
A Keycloak connection can be used for [authentication](/docs/configuration/auth).
<Accordion title="instructions">
<Steps>
<Step title="Register an OAuth Client">
To begin, you must register an OAuth client in Keycloak to facilitate the identity provider connection.
Follow [this guide](https://www.keycloak.org/docs/latest/server_admin/#_oidc_clients) by Keycloak to create an OpenID Connect client.
When configuring your client:
- Set the client protocol to "openid-connect"
- Set the access type to "confidential"
- Add `<sourcebot_url>/api/auth/callback/keycloak` to the valid redirect URIs (ex. https://sourcebot.coolcorp.com/api/auth/callback/keycloak)
The result of creating an OAuth client is a `CLIENT_ID`, `CLIENT_SECRET`, and an `ISSUER` URL (typically in the format `https://<keycloak-domain>/realms/<realm-name>`) which you'll provide to Sourcebot.
</Step>
<Step title="Define environment variables">
To provide Sourcebot the client id, client secret, and issuer for your OAuth client you must set them as environment variables. These can be named whatever you like
(ex. `KEYCLOAK_IDENTITY_PROVIDER_CLIENT_ID`, `KEYCLOAK_IDENTITY_PROVIDER_CLIENT_SECRET`, and `KEYCLOAK_IDENTITY_PROVIDER_ISSUER`)
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id, client secret, and issuer to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "keycloak",
"purpose": "sso",
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
},
"issuer": {
"env": "YOUR_ISSUER_ENV_VAR"
}
}
]
}
```
</Step>
</Steps>
</Accordion>
### Microsoft Entra ID
[Auth.js Microsoft Entra ID Provider Docs](https://authjs.dev/getting-started/providers/microsoft-entra-id)
A Microsoft Entra ID connection can be used for [authentication](/docs/configuration/auth).
<Accordion title="instructions">
<Steps>
<Step title="Register an OAuth Application">
To begin, you must register an OAuth application in Microsoft Entra ID (formerly Azure Active Directory) to facilitate the identity provider connection.
Follow [this guide](https://learn.microsoft.com/en-us/entra/identity-platform/quickstart-register-app) by Microsoft to register an application.
When configuring your application:
- Under "Authentication", add a platform and select "Web"
- Set the redirect URI to `<sourcebot_url>/api/auth/callback/microsoft-entra-id` (ex. https://sourcebot.coolcorp.com/api/auth/callback/microsoft-entra-id)
- Under "Certificates & secrets", create a new client secret
The result of registering an application is a `CLIENT_ID` (Application ID), `CLIENT_SECRET`, and `TENANT_ID` which you'll use to construct the issuer URL.
</Step>
<Step title="Define environment variables">
To provide Sourcebot the client id, client secret, and issuer for your OAuth application you must set them as environment variables. These can be named whatever you like
(ex. `MICROSOFT_ENTRA_ID_IDENTITY_PROVIDER_CLIENT_ID`, `MICROSOFT_ENTRA_ID_IDENTITY_PROVIDER_CLIENT_SECRET`, and `MICROSOFT_ENTRA_ID_IDENTITY_PROVIDER_ISSUER`)
The issuer URL should be in the format: `https://login.microsoftonline.com/<TENANT_ID>/v2.0`
</Step>
<Step title="Define the identity provider config">
Finally, pass the client id, client secret, and issuer to Sourcebot by defining a `identityProvider` object in the [config file](/docs/configuration/config-file):
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "microsoft-entra-id",
"purpose": "sso",
"clientId": {
"env": "YOUR_CLIENT_ID_ENV_VAR"
},
"clientSecret": {
"env": "YOUR_CLIENT_SECRET_ENV_VAR"
},
"issuer": {
"env": "YOUR_ISSUER_ENV_VAR"
}
}
]
}
```
</Step>
</Steps>
</Accordion>
### Authentik
[Auth.js Authentik Provider Docs](https://authjs.dev/getting-started/providers/authentik)
An Authentik connection can be used for [authentication](/docs/configuration/auth).
<Accordion title="instructions">
<Steps>
<Step title="Create a OAuth2/OpenID Connect application">
To begin, you must create a OAuth2/OpenID Connect application in Authentik. For more information, see the [Authentik documentation](https://docs.goauthentik.io/add-secure-apps/applications/manage_apps/#create-an-application-and-provider-pair).
When configuring your application:
- Set the provider type to "OAuth2/OpenID Connect"
- Set the client type to "Confidential"
- Add `<sourcebot_url>/api/auth/callback/authentik` to the redirect URIs (ex. https://sourcebot.coolcorp.com/api/auth/callback/authentik)
After creating the application, open the application details to obtain the client id, client secret, and issuer URL (typically in the format `https://<authentik-domain>/application/o/<provider-slug>/`).
</Step>
<Step title="Define environment variables">
The client id, secret, and issuer URL are provided to Sourcebot via environment variables. These can be named whatever you like
(ex. `AUTHENTIK_IDENTITY_PROVIDER_CLIENT_ID`, `AUTHENTIK_IDENTITY_PROVIDER_CLIENT_SECRET`, and `AUTHENTIK_IDENTITY_PROVIDER_ISSUER`)
</Step>
<Step title="Define the identity provider config">
Create a `identityProvider` object in the [config file](/docs/configuration/config-file) with the following fields:
```json wrap icon="code"
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"identityProviders": [
{
"provider": "authentik",
"purpose": "sso",
"clientId": {
"env": "AUTHENTIK_IDENTITY_PROVIDER_CLIENT_ID"
},
"clientSecret": {
"env": "AUTHENTIK_IDENTITY_PROVIDER_CLIENT_SECRET"
},
"issuer": {
"env": "AUTHENTIK_IDENTITY_PROVIDER_ISSUER"
}
}
]
}
```
</Step>
</Steps>
</Accordion>

View file

@ -292,6 +292,7 @@ The OpenAI compatible provider allows you to use any model that is compatible wi
<Accordion title="Troubleshooting">
- When using [llama.cpp](https://github.com/ggml-org/llama.cpp), if you hit "Failed after 3 attempts. Last error: tools param requires --jinja flag", add the `--jinja` flag to your `llama-server` command.
- If you're seeing the LLM outputing reasoning tokens wrapped in XML tags (e.g., `<reasoning>`, `<thinking>`, etc.), you can configure the `reasoningTag` parameter to the name of the tag (without angle brackets). This parameter defaults to `think`.
</Accordion>
### OpenRouter

View file

@ -15,6 +15,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
"repos": [
"organizationName/projectName/repoName",
"organizationName/projectName/repoName2
@ -26,6 +27,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
"orgs": [
"organizationName",
"organizationName2
@ -37,6 +39,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
"projects": [
"organizationName/projectName",
"organizationName/projectName2"
@ -48,6 +51,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
// Include all repos in my-org...
"orgs": [
"my-org"
@ -82,7 +86,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
Azure Devops Cloud requires you to provide a PAT in order to index your repositories. To learn how to create PAT, check out the [Azure Devops docs](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows).
Sourcebot needs the `Read` access for the `Code` scope in order to find and clone your repos.
Next, provide the access token via the `token` property, either as an environment variable or a secret:
Next, provide the access [token](/docs/configuration/config-file#tokens) via an environment variable which is referenced in the `token` property:
<Tabs>
<Tab title="Environment Variable">
@ -91,6 +95,7 @@ Next, provide the access token via the `token` property, either as an environmen
```json
{
"type": "azuredevops",
"deploymentType": "cloud",
"token": {
// note: this env var can be named anything. It
// doesn't need to be `ADO_TOKEN`.
@ -108,27 +113,6 @@ Next, provide the access token via the `token` property, either as an environmen
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "azuredevops",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs>
## Schema reference

View file

@ -16,7 +16,8 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
```json
{
"type": "azuredevops",
"useTfsPath": true
"deploymentType": "server",
"useTfsPath": true,
"repos": [
"organizationName/projectName/repoName",
"organizationName/projectName/repoName2
@ -28,6 +29,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
```json
{
"type": "azuredevops",
"deploymentType": "server",
"repos": [
"organizationName/projectName/repoName",
"organizationName/projectName/repoName2
@ -39,6 +41,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
```json
{
"type": "azuredevops",
"deploymentType": "server",
"orgs": [
"collectionName",
"collectionName2"
@ -50,6 +53,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
```json
{
"type": "azuredevops",
"deploymentType": "server",
"projects": [
"collectionName/projectName",
"collectionName/projectName2"
@ -61,6 +65,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
```json
{
"type": "azuredevops",
"deploymentType": "server",
// Include all repos in my-org...
"orgs": [
"my-org"
@ -95,7 +100,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
Azure Devops Server requires you to provide a PAT in order to index your repositories. To learn how to create PAT, check out the [Azure Devops docs](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows).
Sourcebot needs the `Read` access for the `Code` scope in order to find and clone your repos.
Next, provide the access token via the `token` property, either as an environment variable or a secret:
Next, provide the access [token](/docs/configuration/config-file#tokens) via an environment variable which is referenced in the `token` property:
<Tabs>
<Tab title="Environment Variable">
@ -104,6 +109,7 @@ Next, provide the access token via the `token` property, either as an environmen
```json
{
"type": "azuredevops",
"deploymentType": "server",
"token": {
// note: this env var can be named anything. It
// doesn't need to be `ADO_TOKEN`.
@ -121,27 +127,6 @@ Next, provide the access token via the `token` property, either as an environmen
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "azuredevops",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs>
## Schema reference

View file

@ -78,7 +78,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
## Authenticating with Bitbucket Cloud
In order to index private repositories, you'll need to provide authentication credentials. You can do this using an `App Password` or an `Access Token`
In order to index private repositories, you'll need to provide authentication credentials via a [token](/docs/configuration/config-file#tokens). You can do this using an `App Password` or an `Access Token`
<Tabs>
<Tab title="App Password">

View file

@ -70,7 +70,7 @@ If you're not familiar with Sourcebot [connections](/docs/connections/overview),
## Authenticating with Bitbucket Data Center
In order to index private repositories, you'll need to provide an access token to Sourcebot.
In order to index private repositories, you'll need to provide an access token to Sourcebot via a [token](/docs/configuration/config-file#tokens).
Create an access token for the desired scope (repo, project, or workspace). Visit the official [Bitbucket Data Center docs](https://confluence.atlassian.com/bitbucketserver/http-access-tokens-939515499.html)
for more info.

View file

@ -81,7 +81,7 @@ In order to index private repositories, you'll need to generate a Gitea access t
![Gitea Access token creation](/images/gitea_pat_creation.png)
Next, provide the access token via the `token` property, either as an environment variable or a secret:
Next, provide the access token via an environment variable [token](/docs/configuration/config-file#tokens) which is referenced in the `token` property:
<Tabs>
<Tab title="Environment Variable">
@ -107,27 +107,6 @@ Next, provide the access token via the `token` property, either as an environmen
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "gitea",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs>
## Connecting to a custom Gitea

View file

@ -128,7 +128,7 @@ In order to index private repositories, you'll need to generate a access token a
</Accordion>
</AccordionGroup>
Next, provide the access token via the `token` property, either as an environment variable or a secret:
Next, provide the access token via an environment variable [token](/docs/configuration/config-file#tokens) which is referenced in the `token` property:
<Tabs>
<Tab title="Environment Variable">
@ -154,27 +154,6 @@ Next, provide the access token via the `token` property, either as an environmen
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "github",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs>
## Connecting to a custom GitHub host

View file

@ -116,7 +116,7 @@ In order to index private projects, you'll need to generate a GitLab Personal Ac
![GitLab PAT Scope](/images/gitlab_pat_scopes.png)
Next, provide the PAT via the `token` property, either as an environment variable or a secret:
Next, provide the PAT via an environment variable [token](/docs/configuration/config-file#tokens) which is referenced in the `token` property:
<Tabs>
<Tab title="Environment Variable">
@ -142,27 +142,6 @@ Next, provide the PAT via the `token` property, either as an environment variabl
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "gitlab",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs>
## Connecting to a custom GitLab host

View file

@ -69,6 +69,26 @@ To learn more about how to create a connection for a specific code host, check o
<Note>Missing your code host? [Submit a feature request on GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md).</Note>
## Indexing Large Files
By default, Sourcebot will skip indexing files that are larger than 2MB or have more than 20,000 trigrams. You can configure this by setting the `maxFileSize` and `maxTrigramCount` [settings](/docs/configuration/config-file#settings).
These limits can be ignored for specific files by passing in a comma separated list of glob patterns matching file paths to the `ALWAYS_INDEX_FILE_PATTERNS` environment variable. For example:
```bash
# Always index all .sum and .lock files
ALWAYS_INDEX_FILE_PATTERNS=**/*.sum,**/*.lock
```
Files that have been skipped are assigned the `skipped` language. You can view a list of all skipped files by using the following query:
```
lang:skipped
```
## Indexing Binary Files
Binary files cannot be indexed by Sourcebot. See [#575](https://github.com/sourcebot-dev/sourcebot/issues/575) for more information.
## Schema reference
---

View file

@ -1,88 +0,0 @@
---
title: "Deployment guide"
---
import SupportedPlatforms from '/snippets/platform-support.mdx'
The following guide will walk you through the steps to deploy Sourcebot on your own infrastructure. Sourcebot is distributed as a [single docker container](/docs/overview#architecture) that can be deployed to a k8s cluster, a VM, or any platform that supports docker.
<Note>Hit an issue? Please let us know on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) or by [emailing us](mailto:team@sourcebot.dev).</Note>
<Steps>
<Step title="Requirements">
- Docker -> use [Docker Desktop](https://www.docker.com/products/docker-desktop/) on Mac or Windows.
</Step>
<Step title="Create a config.json">
Create a `config.json` file that tells Sourcebot which repositories to sync and index:
```bash wrap icon="terminal" Create example config
touch config.json
echo '{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
"connections": {
// comments are supported
"starter-connection": {
"type": "github",
"repos": [
"sourcebot-dev/sourcebot"
]
}
}
}' > config.json
```
This config creates a single GitHub connection named `starter-connection` that specifies [Sourcebot](https://github.com/sourcebot-dev/sourcebot) as a repo to sync. [Learn more about the config file](/docs/configuration/config-file).
</Step>
<Step title="Launch your instance">
<Warning>If you're deploying Sourcebot behind a domain, you must set the [AUTH_URL](/docs/configuration/environment-variables) environment variable.</Warning>
In the same directory as `config.json`, run the following command to start your instance:
``` bash icon="terminal" Start the Sourcebot container
docker run \
-p 3000:3000 \
--pull=always \
--rm \
-v $(pwd):/data \
-e CONFIG_PATH=/data/config.json \
--name sourcebot \
ghcr.io/sourcebot-dev/sourcebot:latest
```
<Accordion title="Details">
**This command**:
- pulls the latest version of the `sourcebot` docker image.
- mounts the working directory to `/data` in the container to allow Sourcebot to persist data across restarts, and to access the `config.json`. In your local directory, you should see a `.sourcebot` folder created that contains all persistent data.
- runs any pending database migrations.
- starts up all services, including the webserver exposed on port 3000.
- reads `config.json` and starts syncing.
</Accordion>
</Step>
<Step title="Complete onboarding">
Navigate to `http://localhost:3000` and complete the onboarding flow.
</Step>
<Step title="Done">
You're all set! If you'd like to setup [Ask Sourcebot](/docs/features/ask/overview), configure a language model [provider](/docs/configuration/language-model-providers).
</Step>
</Steps>
## Next steps
---
<CardGroup cols={3}>
<Card title="Index your code" icon="code" href="/docs/connections/overview">
Learn how to index your code using Sourcebot
</Card>
<Card title="Language models" icon="brain" href="/docs/configuration/language-model-providers">
Learn how to configure language model providers to start using [Ask Sourcebot](/docs/features/ask/overview)
</Card>
<Card title="Authentication" icon="lock" href="/docs/configuration/auth/overview">
Learn more about how to setup SSO, email codes, and other authentication providers.
</Card>
</CardGroup>

View file

@ -0,0 +1,61 @@
---
title: "Docker Compose"
---
This guide will walk you through deploying Sourcebot locally or on a VM using Docker Compose. We will use the [docker-compose.yml](https://github.com/sourcebot-dev/sourcebot/blob/main/docker-compose.yml) file from the Sourcebot repository. This is the simplest way to get started with Sourcebot.
If you are looking to deploy onto Kubernetes, see the [Kubernetes (Helm)](/docs/deployment/k8s) guide.
## Get started
<Steps>
<Step title="Requirements">
- docker & docker compose. Use [Docker Desktop](https://www.docker.com/products/docker-desktop/) on Mac or Windows.
</Step>
<Step title="Obtain the Docker Compose file">
Download the [docker-compose.yml](https://github.com/sourcebot-dev/sourcebot/blob/main/docker-compose.yml) file from the Sourcebot repository.
```bash wrap icon="terminal"
curl -o docker-compose.yml https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/docker-compose.yml
```
</Step>
<Step title="Create a config.json">
In the same directory as the `docker-compose.yml` file, create a [configuration file](/docs/configuration/config-file). The configuration file is a JSON file that configures Sourcebot's behaviour, including what repositories to index, language model providers, auth providers, and more.
```bash wrap icon="terminal" Create example config
touch config.json
echo '{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v3/index.json",
// Comments are supported.
// This config creates a single connection to GitHub.com that
// indexes the Sourcebot repository
"connections": {
"starter-connection": {
"type": "github",
"repos": [
"sourcebot-dev/sourcebot"
]
}
}
}' > config.json
```
</Step>
<Step title="Launch your instance">
Update the secrets in the `docker-compose.yml` and then run Sourcebot using:
```bash wrap icon="terminal"
docker compose up
```
</Step>
<Step title="Done">
You're all set! Navigate to [http://localhost:3000](http://localhost:3000) to access your Sourcebot instance.
</Step>
</Steps>
## Next steps

View file

@ -0,0 +1,4 @@
---
title: "Kubernetes (Helm)"
url: https://github.com/sourcebot-dev/sourcebot-helm-chart
---

View file

@ -10,7 +10,7 @@ codebase that the agent may fetch to perform the review.
This agent provides codebase-aware reviews for your PRs. For each diff, this agent fetches relevant context from Sourcebot and feeds it into an LLM for a detailed review of your changes.
The AI Code Review Agent is [fair source](https://github.com/sourcebot-dev/sourcebot/tree/main/packages/web/src/features/agents/review-agent) and packaged in [Sourcebot](https://github.com/sourcebot-dev/sourcebot). To get started using this agent, [deploy Sourcebot](/docs/deployment-guide)
The AI Code Review Agent is [fair source](https://github.com/sourcebot-dev/sourcebot/tree/main/packages/web/src/features/agents/review-agent) and packaged in [Sourcebot](https://github.com/sourcebot-dev/sourcebot). To get started using this agent, [deploy Sourcebot](/docs/deployment/docker-compose)
and then follow the configuration instructions below.
![AI Code Review Agent Example](/images/review_agent_example.png)
@ -44,9 +44,9 @@ Before you get started, make sure you have an OpenAPI account that you can creat
<Step title="Configure the environment variables in Sourcebot">
Sourcebot requires the following environment variables to begin reviewing PRs through your new GitHub app:
- `GITHUB_APP_ID`: The client ID of your GitHub app. Can be found in your [app settings](https://docs.github.com/en/apps/creating-github-apps/writing-code-for-a-github-app/quickstart#navigate-to-your-app-settings)
- `GITHUB_APP_WEBHOOK_SECRET`: The webhook secret you defined in your GitHub app. Can be found in your [app settings](https://docs.github.com/en/apps/creating-github-apps/writing-code-for-a-github-app/quickstart#navigate-to-your-app-settings)
- `GITHUB_APP_PRIVATE_KEY_PATH`: The path to your app's private key. If you're running Sourcebot from a container, this is the path to this file from within your container
- `GITHUB_REVIEW_AGENT_APP_ID`: The client ID of your GitHub app. Can be found in your [app settings](https://docs.github.com/en/apps/creating-github-apps/writing-code-for-a-github-app/quickstart#navigate-to-your-app-settings)
- `GITHUB_REVIEW_AGENT_APP_WEBHOOK_SECRET`: The webhook secret you defined in your GitHub app. Can be found in your [app settings](https://docs.github.com/en/apps/creating-github-apps/writing-code-for-a-github-app/quickstart#navigate-to-your-app-settings)
- `GITHUB_REVIEW_AGENT_APP_PRIVATE_KEY_PATH`: The path to your app's private key. If you're running Sourcebot from a container, this is the path to this file from within your container
(ex `/data/review-agent-key.pem`). You must copy the private key file into the directory you mount to Sourcebot (similar to the config file).
You can generate a private key file for your app in the [app settings](https://docs.github.com/en/apps/creating-github-apps/writing-code-for-a-github-app/quickstart#navigate-to-your-app-settings). You must copy this private key file into the
@ -74,9 +74,9 @@ Before you get started, make sure you have an OpenAPI account that you can creat
- "/Users/michael/sourcebot_review_agent_workspace:/data"
environment:
CONFIG_PATH: "/data/config.json"
GITHUB_APP_ID: "my-github-app-id"
GITHUB_APP_WEBHOOK_SECRET: "my-github-app-webhook-secret"
GITHUB_APP_PRIVATE_KEY_PATH: "/data/review-agent-key.pem"
GITHUB_REVIEW_AGENT_APP_ID: "my-github-app-id"
GITHUB_REVIEW_AGENT_APP_WEBHOOK_SECRET: "my-github-app-webhook-secret"
GITHUB_REVIEW_AGENT_APP_PRIVATE_KEY_PATH: "/data/review-agent-key.pem"
REVIEW_AGENT_API_KEY: "sourcebot-my-key"
OPENAI_API_KEY: "sk-proj-my-open-api-key"
```

View file

@ -14,7 +14,7 @@ follow code nav references, and provide an answer thats rich with inline cita
<Card title="Index repos" icon="book" href="/docs/connections/overview" horizontal="true">
Learn how to index your repos so you can ask questions about them
</Card>
<Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true">
<Card title="Deployment guide" icon="server" href="/docs/deployment/docker-compose" horizontal="true">
Learn how to self-host Sourcebot in a few simple steps.
</Card>
<Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true">

View file

@ -21,6 +21,7 @@ import LicenseKeyRequired from '/snippets/license-key-required.mdx'
| **Go to definition** | Clicking the "go to definition" button in the popover or clicking the symbol name navigates to the symbol's definition. |
| **Find references** | Clicking the "find all references" button in the popover lists all references in the explore panel. |
| **Explore panel** | Lists all references and definitions for the symbol selected in the popover. |
| **Cross-repository navigation** | You can search across all repositories by clicking the globe icon in the explore panel. By default, references and definitions are scoped to the repository where the symbol is being resolved. |
## How does it work?

View file

@ -9,7 +9,7 @@ The [Model Context Protocol](https://modelcontextprotocol.io/introduction) (MCP)
<Steps>
<Step title="Launch Sourcebot">
Follow the [deployment guide](/docs/deployment-guide) to launch Sourcebot and get your code indexed. The host url of your instance (e.g., `http://localhost:3000`) is passed to the MCP server via the `SOURCEBOT_HOST` url.
Follow the [deployment guides](/docs/deployment/docker-compose) to launch Sourcebot and get your code indexed. The host url of your instance (e.g., `http://localhost:3000`) is passed to the MCP server via the `SOURCEBOT_HOST` url.
If a host is not provided, then the server will fallback to using the demo instance hosted at https://demo.sourcebot.dev. You can see the list of repositories indexed [here](https://demo.sourcebot.dev/~/repos). Add additional repositories by [opening a PR](https://github.com/sourcebot-dev/sourcebot/blob/main/demo-site-config.json).
</Step>

View file

@ -1,21 +1,20 @@
---
title: "Permission syncing"
sidebarTitle: "Permission syncing"
tag: "experimental"
---
import LicenseKeyRequired from '/snippets/license-key-required.mdx'
import ExperimentalFeatureWarning from '/snippets/experimental-feature-warning.mdx'
<LicenseKeyRequired />
<ExperimentalFeatureWarning />
# Overview
Permission syncing allows you to sync Access Permission Lists (ACLs) from a code host to Sourcebot. When configured, users signed into Sourcebot (via the code host's OAuth provider) will only be able to access repositories that they have access to on the code host. Practically, this means:
Permission syncing allows you to sync Access Permission Lists (ACLs) from a code host to Sourcebot. When configured, users signed into Sourcebot will only be able to access repositories
that they have access to on the code host. Practically, this means:
- Code Search results will only include repositories that the user has access to.
- Code navigation results will only include repositories that the user has access to.
- MCP results will only include results from repositories the user has access to.
- Ask Sourcebot (and the underlying LLM) will only have access to repositories that the user has access to.
- File browsing is scoped to the repositories that the user has access to.
@ -35,7 +34,7 @@ We are actively working on supporting more code hosts. If you'd like to see a sp
| Platform | Permission syncing |
|:----------|------------------------------|
| [GitHub (GHEC & GHEC Server)](/docs/features/permission-syncing#github) | ✅ |
| GitLab | 🛑 |
| [GitLab (Self-managed & Cloud)](/docs/features/permission-syncing#gitlab) | ✅ |
| Bitbucket Cloud | 🛑 |
| Bitbucket Data Center | 🛑 |
| Gitea | 🛑 |
@ -46,7 +45,7 @@ We are actively working on supporting more code hosts. If you'd like to see a sp
## GitHub
Prerequisite: [Add GitHub as an OAuth provider](/docs/configuration/auth/providers#github).
Prerequisite: Configure GitHub as an [external identity provider](/docs/configuration/idp).
Permission syncing works with **GitHub.com**, **GitHub Enterprise Cloud**, and **GitHub Enterprise Server**. For organization-owned repositories, users that have **read-only** access (or above) via the following methods will have their access synced to Sourcebot:
- Outside collaborators
@ -56,9 +55,21 @@ Permission syncing works with **GitHub.com**, **GitHub Enterprise Cloud**, and *
- Organization owners.
**Notes:**
- A GitHub OAuth provider must be configured to (1) correlate a Sourcebot user with a GitHub user, and (2) to list repositories that the user has access to for [User driven syncing](/docs/features/permission-syncing#how-it-works).
- A GitHub [external identity provider](/docs/configuration/idp) must be configured to (1) correlate a Sourcebot user with a GitHub user, and (2) to list repositories that the user has access to for [User driven syncing](/docs/features/permission-syncing#how-it-works).
- OAuth tokens must assume the `repo` scope in order to use the [List repositories for the authenticated user API](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repositories-for-the-authenticated-user) during [User driven syncing](/docs/features/permission-syncing#how-it-works). Sourcebot **will only** use this token for **reads**.
## GitLab
Prerequisite: Configure GitLab as an [external identity provider](/docs/configuration/idp).
Permission syncing works with **GitLab Self-managed** and **GitLab Cloud**. Users with **Guest** role or above with membership to a group or project will have their access synced to Sourcebot. Both direct and indirect membership to a group or project will be synced with Sourcebot. For more details, see the [GitLab docs](https://docs.gitlab.com/user/project/members/#membership-types).
**Notes:**
- A GitLab [external identity provider](/docs/configuration/idp) must be configured to (1) correlate a Sourcebot user with a GitLab user, and (2) to list repositories that the user has access to for [User driven syncing](/docs/features/permission-syncing#how-it-works).
- OAuth tokens require the `read_api` scope in order to use the [List projects for the authenticated user API](https://docs.gitlab.com/ee/api/projects.html#list-all-projects) during [User driven syncing](/docs/features/permission-syncing#how-it-works).
# How it works
Permission syncing works by periodically syncing ACLs from the code host(s) to Sourcebot to build an internal mapping between Users and Repositories. This mapping is hydrated in two directions:

View file

@ -22,7 +22,7 @@ Search across all your repos/branches across any code host platform. Blazingly f
<Card title="Branches" icon="split" href="/docs/features/search/multi-branch-indexing" horizontal="true">
Learn how to index and search through your branches
</Card>
<Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true">
<Card title="Deployment guides" icon="server" href="/docs/deployment/docker-compose" horizontal="true">
Learn how to self-host Sourcebot in a few simple steps.
</Card>
<Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true">

View file

@ -4,32 +4,51 @@ title: Writing search queries
Sourcebot uses a powerful regex-based query language that enabled precise code search within large codebases.
## Syntax reference guide
Queries consist of space-separated regular expressions. Wrapping expressions in `""` combines them. By default, a file must have at least one match for each expression to be included.
Queries consist of space-separated search patterns that are matched against file contents. A file must have at least one match for each expression to be included. Queries can optionally contain search filters to further refine the search results.
## Keyword search (default)
Keyword search matches search patterns exactly in file contents. Wrapping search patterns in `""` combines them as a single expression.
| Example | Explanation |
| :--- | :--- |
| `foo` | Match files containing the keyword `foo` |
| `foo bar` | Match files containing both `foo` **and** `bar` |
| `"foo bar"` | Match files containing the phrase `foo bar` |
| `"foo \"bar\""` | Match files containing `foo "bar"` exactly (escaped quotes) |
## Regex search
Toggle the regex button (`.*`) in the search bar to interpret search patterns as regular expressions.
| Example | Explanation |
| :--- | :--- |
| `foo` | Match files with regex `/foo/` |
| `foo bar` | Match files with regex `/foo/` **and** `/bar/` |
| `"foo bar"` | Match files with regex `/foo bar/` |
| `foo.*bar` | Match files with regex `/foo.*bar/` (foo followed by any characters, then bar) |
| `^function\s+\w+` | Match files with regex `/^function\s+\w+/` (function at start of line, followed by whitespace and word characters) |
| `"foo bar"` | Match files with regex `/foo bar/`. Quotes are not matched. |
Multiple expressions can be or'd together with `or`, negated with `-`, or grouped with `()`.
## Search filters
| Example | Explanation |
| :--- | :--- |
| `foo or bar` | Match files with regex `/foo/` **or** `/bar/` |
| `foo -bar` | Match files with regex `/foo/` but **not** `/bar/` |
| `foo (bar or baz)` | Match files with regex `/foo/` **and** either `/bar/` **or** `/baz/` |
Expressions can be prefixed with certain keywords to modify search behavior. Some keywords can be negated using the `-` prefix.
Search queries (keyword or regex) can include multiple search filters to further refine the search results. Some filters can be negated using the `-` prefix.
| Prefix | Description | Example |
| :--- | :--- | :--- |
| `file:` | Filter results from filepaths that match the regex. By default all files are searched. | `file:README` - Filter results to filepaths that match regex `/README/`<br/>`file:"my file"` - Filter results to filepaths that match regex `/my file/`<br/>`-file:test\.ts$` - Ignore results from filepaths match regex `/test\.ts$/` |
| `repo:` | Filter results from repos that match the regex. By default all repos are searched. | `repo:linux` - Filter results to repos that match regex `/linux/`<br/>`-repo:^web/.*` - Ignore results from repos that match regex `/^web\/.*` |
| `repo:` | Filter results from repos that match the regex. By default all repos are searched. | `repo:linux` - Filter results to repos that match regex `/linux/`<br/>`-repo:^web/.*` - Ignore results from repos that match regex `/^web\/.*/` |
| `rev:` | Filter results from a specific branch or tag. By default **only** the default branch is searched. | `rev:beta` - Filter results to branches that match regex `/beta/` |
| `lang:` | Filter results by language (as defined by [linguist](https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml)). By default all languages are searched. | `lang:TypeScript` - Filter results to TypeScript files<br/>`-lang:YAML` - Ignore results from YAML files |
| `sym:` | Match symbol definitions created by [universal ctags](https://ctags.io/) at index time. | `sym:\bmain\b` - Filter results to symbols that match regex `/\bmain\b/` |
| `context:` | Filter results to a predefined [search context](/docs/features/search/search-contexts). | `context:web` - Filter results to the web context<br/>`-context:pipelines` - Ignore results from the pipelines context |
## Boolean operators & grouping
By default, space-separated expressions are and'd together. Using the `or` keyword as well as parentheses `()` can be used to create more complex boolean logic. Parentheses can be negated using the `-` prefix.
| Example | Explanation |
| :--- | :--- |
| `foo or bar` | Match files containing `foo` **or** `bar` |
| `foo (bar or baz)` | Match files containing `foo` **and** either `bar` **or** `baz`. |
| `-(foo) bar` | Match files containing `bar` **and not** `foo`. |

View file

@ -7,7 +7,7 @@ sidebarTitle: License key
If you'd like a trial license, [reach out](https://www.sourcebot.dev/contact) and we'll send one over within 24 hours
</Note>
All core Sourcebot features are available [FSL licensed](https://github.com/sourcebot-dev/sourcebot/blob/main/LICENSE.md#functional-source-license-version-11-alv2-future-license) without any limits. Some additional features require a license key. See the [pricing page](https://www.sourcebot.dev/pricing) for more details.
All core Sourcebot features are available under the [FSL license](https://github.com/sourcebot-dev/sourcebot/blob/main/LICENSE.md#functional-source-license-version-11-alv2-future-license). Some additional features require a license key. See the [pricing page](https://www.sourcebot.dev/pricing) for more details.
## Activating a license key
@ -25,7 +25,7 @@ docker run \
## Feature availability
---
| Feature | OSS | Licensed |
| Feature | [FSL](https://github.com/sourcebot-dev/sourcebot/blob/main/LICENSE.md#functional-source-license-version-11-alv2-future-license) | [Enterprise](https://github.com/sourcebot-dev/sourcebot/blob/main/ee/LICENSE) |
|:---------|:-----|:----------|
| [Search](/docs/features/search/syntax-reference) | ✅ | ✅ |
| [Full code host support](/docs/connections/overview) | ✅ | ✅ |
@ -34,6 +34,7 @@ docker run \
| [Login with credentials](/docs/configuration/auth/overview) | ✅ | ✅ |
| [Login with email codes](/docs/configuration/auth/overview) | ✅ | ✅ |
| [Login with SSO](/docs/configuration/auth/overview#enterprise-authentication-providers) | 🛑 | ✅ |
| [Permission syncing](/docs/features/permission-syncing) | 🛑 | ✅ |
| [Code navigation](/docs/features/code-navigation) | 🛑 | ✅ |
| [Search contexts](/docs/features/search/search-contexts) | 🛑 | ✅ |
| [Audit logs](/docs/configuration/audit-logs) | 🛑 | ✅ |

View file

@ -2,13 +2,14 @@
title: "Overview"
---
[Sourcebot](https://github.com/sourcebot-dev/sourcebot) is a self-hosted tool that helps you understand your codebase.
[Sourcebot](https://github.com/sourcebot-dev/sourcebot) is a platform that helps humans and agents understand your codebase:
- [Code search](/docs/features/search/overview): Search and navigate across all your repos and branches, no matter where theyre hosted
- [Ask Sourcebot](/docs/features/ask): Ask questions about your codebase and have Sourcebot provide detailed answers grounded with inline citations
- [MCP](/docs/features/mcp-server): Enrich agent context windows with code across your organization
<CardGroup>
<Card title="Deployment guide" icon="server" href="/docs/deployment-guide" horizontal="true">
<Card title="Deployment guides" icon="server" href="/docs/deployment/docker-compose" horizontal="true">
Learn how to self-host Sourcebot in a few simple steps.
</Card>
<Card title="Public demo" icon="globe" href="https://demo.sourcebot.dev/" horizontal="true">
@ -161,7 +162,7 @@ Sourcebot is designed to be easily self-hosted, allowing you to deploy it onto y
---
<CardGroup cols={2}>
<Card horizontal title="Deployment guide ->" href="/docs/deployment-guide" />
<Card horizontal title="Deployment guides ->" href="/docs/deployment/docker-compose" />
<Card horizontal title="Connecting your code ->" href="/docs/connections/overview" />
<Card horizontal title="Search syntax reference ->" href="/docs/features/search/syntax-reference" />
<Card horizontal title="Code navigation overview ->" href="/docs/features/code-navigation" />

View file

@ -78,7 +78,7 @@ If your deployment is dependent on these features, please [reach out](https://gi
After updating your configuration file, restart your Sourcebot deployment to pick up the new changes.
</Step>
<Step title="You're done!">
Congrats, you've successfully migrated to v3! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/6Fhp27x7Pb) or on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose).
Congrats, you've successfully migrated to v3! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/HDScTs3ptP) or on [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose).
</Step>
</Steps>
@ -90,4 +90,4 @@ Some things to check:
- Make sure you have a name for each `connection`, and that the name only contains letters, digits, hyphens, or underscores
- Make sure each `connection` has a `type` field with a valid value (`gitlab`, `github`, `gitea`, `gerrit`)
Having troubles migrating from v2 to v3? Reach out to us on [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help
Having troubles migrating from v2 to v3? Reach out to us on [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help

View file

@ -40,7 +40,7 @@ Please note that the following features are no longer supported in v4:
</Step>
<Step title="You're done!">
Congrats, you've successfully migrated to v4! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose)
Congrats, you've successfully migrated to v4! Please let us know what you think of the new features by reaching out on our [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose)
</Step>
</Steps>
@ -58,4 +58,4 @@ to finish upgrading to v4 in single-tenant mode.
- If you're hitting issues with signing into your Sourcebot instance, make sure you're setting `AUTH_URL` correctly to your deployment domain (ex. `https://sourcebot.yourcompany.com`)
Having troubles migrating from v3 to v4? Reach out to us on [discord](https://discord.gg/6Fhp27x7Pb) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help
Having troubles migrating from v3 to v4? Reach out to us on [discord](https://discord.gg/HDScTs3ptP) or [GitHub](https://github.com/sourcebot-dev/sourcebot/issues/new/choose) and we'll try our best to help

View file

@ -24,27 +24,4 @@
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your access token:
![](/images/secrets_list.png)
2. Add the `token` and `user` (username associated with the app password you created) properties to your connection config:
```json
{
"type": "bitbucket",
"deploymentType": "cloud",
"user": "myusername",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs>

View file

@ -22,25 +22,4 @@
ghcr.io/sourcebot-dev/sourcebot:latest
```
</Tab>
<Tab title="Secret">
<Note>Secrets are only supported when [authentication](/docs/configuration/auth/overview) is enabled.</Note>
1. Navigate to **Secrets** in settings and create a new secret with your PAT:
![](/images/secrets_list.png)
2. Add the `token` property to your connection config:
```json
{
"type": "bitbucket",
"token": {
"secret": "mysecret"
}
// .. rest of config ..
}
```
</Tab>
</Tabs>

View file

@ -77,7 +77,6 @@
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
"secret-token",
{
"env": "ENV_VAR_CONTAINING_TOKEN"
}
@ -274,7 +273,6 @@
"token": {
"description": "An authentication token.",
"examples": [
"secret-token",
{
"env": "ENV_VAR_CONTAINING_TOKEN"
}
@ -465,7 +463,6 @@
"token": {
"description": "An access token.",
"examples": [
"secret-token",
{
"env": "ENV_VAR_CONTAINING_TOKEN"
}
@ -779,7 +776,6 @@
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
"secret-token",
{
"env": "ENV_VAR_CONTAINING_TOKEN"
}
@ -976,7 +972,6 @@
"token": {
"description": "An authentication token.",
"examples": [
"secret-token",
{
"env": "ENV_VAR_CONTAINING_TOKEN"
}
@ -1167,7 +1162,6 @@
"token": {
"description": "An access token.",
"examples": [
"secret-token",
{
"env": "ENV_VAR_CONTAINING_TOKEN"
}
@ -1563,7 +1557,6 @@
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
"secret-token",
{
"env": "ENV_VAR_CONTAINING_TOKEN"
}
@ -1760,7 +1753,6 @@
"token": {
"description": "An authentication token.",
"examples": [
"secret-token",
{
"env": "ENV_VAR_CONTAINING_TOKEN"
}
@ -1951,7 +1943,6 @@
"token": {
"description": "An access token.",
"examples": [
"secret-token",
{
"env": "ENV_VAR_CONTAINING_TOKEN"
}

View file

@ -0,0 +1,131 @@
{/* THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! */}
```json
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "AppConfig",
"definitions": {
"GitHubAppConfig": {
"type": "object",
"properties": {
"type": {
"const": "github",
"description": "GitHub App Configuration"
},
"deploymentHostname": {
"type": "string",
"format": "hostname",
"default": "github.com",
"description": "The hostname of the GitHub App deployment.",
"examples": [
"github.com",
"github.example.com"
]
},
"id": {
"type": "string",
"description": "The ID of the GitHub App."
},
"privateKey": {
"description": "The private key of the GitHub App.",
"anyOf": [
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"googleCloudSecret": {
"type": "string",
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"googleCloudSecret"
],
"additionalProperties": false
}
]
}
},
"required": [
"type",
"id",
"privateKey"
],
"additionalProperties": false
}
},
"oneOf": [
{
"type": "object",
"properties": {
"type": {
"const": "github",
"description": "GitHub App Configuration"
},
"deploymentHostname": {
"type": "string",
"format": "hostname",
"default": "github.com",
"description": "The hostname of the GitHub App deployment.",
"examples": [
"github.com",
"github.example.com"
]
},
"id": {
"type": "string",
"description": "The ID of the GitHub App."
},
"privateKey": {
"description": "The private key of the GitHub App.",
"anyOf": [
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"googleCloudSecret": {
"type": "string",
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"googleCloudSecret"
],
"additionalProperties": false
}
]
}
},
"required": [
"type",
"id",
"privateKey"
],
"additionalProperties": false
}
]
}
```

View file

@ -11,35 +11,30 @@
},
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}
@ -62,7 +57,6 @@
"cloud",
"server"
],
"default": "cloud",
"description": "The type of Azure DevOps deployment"
},
"useTfsPath": {
@ -199,7 +193,8 @@
},
"required": [
"type",
"token"
"token",
"deploymentType"
],
"additionalProperties": false
}

View file

@ -15,35 +15,30 @@
},
"token": {
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}

View file

@ -15,35 +15,30 @@
},
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}
@ -228,35 +223,30 @@
},
"token": {
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}
@ -435,35 +425,30 @@
},
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}
@ -707,35 +692,30 @@
},
"token": {
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}
@ -880,35 +860,30 @@
},
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}
@ -931,7 +906,6 @@
"cloud",
"server"
],
"default": "cloud",
"description": "The type of Azure DevOps deployment"
},
"useTfsPath": {
@ -1068,7 +1042,8 @@
},
"required": [
"type",
"token"
"token",
"deploymentType"
],
"additionalProperties": false
},

View file

@ -0,0 +1,115 @@
{/* THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY! */}
```json
{
"type": "object",
"description": "Environment variable overrides.",
"title": "EnvironmentOverrides",
"not": {
"$comment": "List of environment variables that are not allowed to be overridden.",
"anyOf": [
{
"required": [
"CONFIG_PATH"
]
}
]
},
"patternProperties": {
"^[a-zA-Z0-9_-]+$": {
"oneOf": [
{
"type": "object",
"properties": {
"type": {
"const": "token"
},
"value": {
"anyOf": [
{
"type": "object",
"properties": {
"env": {
"type": "string",
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"googleCloudSecret": {
"type": "string",
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"googleCloudSecret"
],
"additionalProperties": false
}
]
}
},
"required": [
"type",
"value"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"type": {
"const": "string"
},
"value": {
"type": "string"
}
},
"required": [
"type",
"value"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"type": {
"const": "number"
},
"value": {
"type": "number"
}
},
"required": [
"type",
"value"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"type": {
"const": "boolean"
},
"value": {
"type": "boolean"
}
},
"required": [
"type",
"value"
],
"additionalProperties": false
}
]
}
}
}
```

View file

@ -11,35 +11,30 @@
},
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}

View file

@ -11,35 +11,30 @@
},
"token": {
"description": "A Personal Access Token (PAT).",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}

View file

@ -11,35 +11,30 @@
},
"token": {
"description": "An authentication token.",
"examples": [
{
"secret": "SECRET_KEY"
}
],
"anyOf": [
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -9,26 +9,26 @@
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}
@ -89,26 +89,26 @@
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}
@ -133,26 +133,26 @@
{
"type": "object",
"properties": {
"secret": {
"env": {
"type": "string",
"description": "The name of the secret that contains the token."
"description": "The name of the environment variable that contains the token."
}
},
"required": [
"secret"
"env"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"env": {
"googleCloudSecret": {
"type": "string",
"description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
"description": "The resource name of a Google Cloud secret. Must be in the format `projects/<project-id>/secrets/<secret-name>/versions/<version-id>`. See https://cloud.google.com/secret-manager/docs/creating-and-accessing-secrets"
}
},
"required": [
"env"
"googleCloudSecret"
],
"additionalProperties": false
}

View file

@ -1,18 +1,71 @@
#!/bin/sh
set -e
if [ "$DATABASE_URL" = "postgresql://postgres@localhost:5432/sourcebot" ]; then
DATABASE_EMBEDDED="true"
# Exit immediately if a command fails
set -e
# Disable auto-exporting of variables
set +a
# Detect if running as root
IS_ROOT=false
if [ "$(id -u)" -eq 0 ]; then
IS_ROOT=true
fi
if [ "$IS_ROOT" = "true" ]; then
echo -e "\e[34m[Info] Running as root user.\e[0m"
else
echo -e "\e[34m[Info] Running as non-root user.\e[0m"
fi
# If a CONFIG_PATH is set, resolve the environment overrides from the config file.
# The overrides will be written into variables scopped to the current shell. This is
# required in case one of the variables used in this entrypoint is overriden (e.g.,
# DATABASE_URL, REDIS_URL, etc.)
if [ -n "$CONFIG_PATH" ]; then
echo -e "\e[34m[Info] Resolving environment overrides from $CONFIG_PATH...\e[0m"
set +e # Disable exist on error so we can capture EXIT_CODE
OVERRIDES_OUTPUT=$(SKIP_ENV_VALIDATION=1 yarn tool:resolve-env-overrides 2>&1)
EXIT_CODE=$?
set -e # Re-enable exit on error
if [ $EXIT_CODE -eq 0 ]; then
eval "$OVERRIDES_OUTPUT"
else
echo -e "\e[31m[Error] Failed to resolve environment overrides.\e[0m"
echo "$OVERRIDES_OUTPUT"
exit 1
fi
fi
# Descontruct the database URL from the individual variables if DATABASE_URL is not set
if [ -z "$DATABASE_URL" ] && [ -n "$DATABASE_HOST" ] && [ -n "$DATABASE_USERNAME" ] && [ -n "$DATABASE_PASSWORD" ] && [ -n "$DATABASE_NAME" ]; then
DATABASE_URL="postgresql://${DATABASE_USERNAME}:${DATABASE_PASSWORD}@${DATABASE_HOST}/${DATABASE_NAME}"
if [ -n "$DATABASE_ARGS" ]; then
DATABASE_URL="${DATABASE_URL}?$DATABASE_ARGS"
fi
fi
if [ -z "$DATABASE_URL" ]; then
echo -e "\e[34m[Info] DATABASE_URL is not set. Using embeded database.\e[0m"
export DATABASE_EMBEDDED="true"
export DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
else
export DATABASE_EMBEDDED="false"
fi
if [ -z "$REDIS_URL" ]; then
echo -e "\e[34m[Info] REDIS_URL is not set. Using embeded redis.\e[0m"
export REDIS_EMBEDDED="true"
export REDIS_URL="redis://localhost:6379"
else
export REDIS_EMBEDDED="false"
fi
echo -e "\e[34m[Info] Sourcebot version: $NEXT_PUBLIC_SOURCEBOT_VERSION\e[0m"
# If we don't have a PostHog key, then we need to disable telemetry.
if [ -z "$NEXT_PUBLIC_POSTHOG_PAPIK" ]; then
echo -e "\e[33m[Warning] NEXT_PUBLIC_POSTHOG_PAPIK was not set. Setting SOURCEBOT_TELEMETRY_DISABLED.\e[0m"
export SOURCEBOT_TELEMETRY_DISABLED=true
fi
if [ -n "$SOURCEBOT_TELEMETRY_DISABLED" ]; then
# Validate that SOURCEBOT_TELEMETRY_DISABLED is either "true" or "false"
if [ "$SOURCEBOT_TELEMETRY_DISABLED" != "true" ] && [ "$SOURCEBOT_TELEMETRY_DISABLED" != "false" ]; then
@ -36,12 +89,17 @@ fi
# Check if DATABASE_DATA_DIR exists, if not initialize it
if [ "$DATABASE_EMBEDDED" = "true" ] && [ ! -d "$DATABASE_DATA_DIR" ]; then
echo -e "\e[34m[Info] Initializing database at $DATABASE_DATA_DIR...\e[0m"
mkdir -p $DATABASE_DATA_DIR && chown -R postgres:postgres "$DATABASE_DATA_DIR"
mkdir -p $DATABASE_DATA_DIR
if [ "$IS_ROOT" = "true" ]; then
chown -R postgres:postgres "$DATABASE_DATA_DIR"
su postgres -c "initdb -D $DATABASE_DATA_DIR"
else
initdb -D "$DATABASE_DATA_DIR" -U postgres
fi
fi
# Create the redis data directory if it doesn't exist
if [ ! -d "$REDIS_DATA_DIR" ]; then
if [ "$REDIS_EMBEDDED" = "true" ] && [ ! -d "$REDIS_DATA_DIR" ]; then
mkdir -p $REDIS_DATA_DIR
fi
@ -95,7 +153,7 @@ if [ ! -f "$FIRST_RUN_FILE" ]; then
# (if telemetry is enabled)
if [ "$SOURCEBOT_TELEMETRY_DISABLED" = "false" ]; then
if ! ( curl -L --output /dev/null --silent --fail --header "Content-Type: application/json" -d '{
"api_key": "'"$NEXT_PUBLIC_POSTHOG_PAPIK"'",
"api_key": "'"$POSTHOG_PAPIK"'",
"event": "install",
"distinct_id": "'"$SOURCEBOT_INSTALL_ID"'",
"properties": {
@ -115,7 +173,7 @@ else
if [ "$SOURCEBOT_TELEMETRY_DISABLED" = "false" ]; then
if ! ( curl -L --output /dev/null --silent --fail --header "Content-Type: application/json" -d '{
"api_key": "'"$NEXT_PUBLIC_POSTHOG_PAPIK"'",
"api_key": "'"$POSTHOG_PAPIK"'",
"event": "upgrade",
"distinct_id": "'"$SOURCEBOT_INSTALL_ID"'",
"properties": {
@ -131,16 +189,33 @@ fi
echo "{\"version\": \"$NEXT_PUBLIC_SOURCEBOT_VERSION\", \"install_id\": \"$SOURCEBOT_INSTALL_ID\"}" > "$FIRST_RUN_FILE"
# Start the database and wait for it to be ready before starting any other service
if [ "$DATABASE_EMBEDDED" = "true" ]; then
if [ "$IS_ROOT" = "true" ]; then
su postgres -c "postgres -D $DATABASE_DATA_DIR" &
else
postgres -D "$DATABASE_DATA_DIR" &
fi
until pg_isready -h localhost -p 5432 -U postgres; do
echo -e "\e[34m[Info] Waiting for the database to be ready...\e[0m"
sleep 1
# As postgres runs in the background, we must check if it is still
# running, otherwise the "until" loop will be running indefinitely.
if ! pgrep -x "postgres" > /dev/null; then
echo "postgres failed to run"
exit 1
fi
done
# Check if the database already exists, and create it if it dne
if [ "$IS_ROOT" = "false" ]; then
# Running as non-root we need to ensure the postgres account is created.
psql -U postgres -tc "SELECT 1 FROM pg_roles WHERE rolname='postgres'" | grep -q 1 \
|| createuser postgres -s
fi
# Check if the database already exists, and create it if it doesn't exist
EXISTING_DB=$(psql -U postgres -tAc "SELECT 1 FROM pg_database WHERE datname = 'sourcebot'")
if [ "$EXISTING_DB" = "1" ]; then
@ -153,9 +228,9 @@ fi
# Run a Database migration
echo -e "\e[34m[Info] Running database migration...\e[0m"
yarn workspace @sourcebot/db prisma:migrate:prod
DATABASE_URL="$DATABASE_URL" yarn workspace @sourcebot/db prisma:migrate:prod
# Create the log directory
# Create the log directory if it doesn't exist
mkdir -p /var/log/sourcebot
# Run supervisord

View file

@ -4,9 +4,9 @@
"packages/*"
],
"scripts": {
"build": "cross-env SKIP_ENV_VALIDATION=1 yarn workspaces foreach -A run build",
"test": "yarn workspaces foreach -A run test",
"dev": "yarn dev:prisma:migrate:dev && npm-run-all --print-label --parallel dev:zoekt dev:backend dev:web watch:mcp watch:schemas",
"build": "cross-env SKIP_ENV_VALIDATION=1 yarn workspaces foreach --all --topological run build",
"test": "yarn workspaces foreach --all --topological run test",
"dev": "concurrently --kill-others --names \"zoekt,worker,web,mcp,schemas\" 'yarn dev:zoekt' 'yarn dev:backend' 'yarn dev:web' 'yarn watch:mcp' 'yarn watch:schemas'",
"with-env": "cross-env PATH=\"$PWD/bin:$PATH\" dotenv -e .env.development -c --",
"dev:zoekt": "yarn with-env zoekt-webserver -index .sourcebot/index -rpc",
"dev:backend": "yarn with-env yarn workspace @sourcebot/backend dev:watch",
@ -18,15 +18,16 @@
"dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio",
"dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset",
"dev:prisma:db:push": "yarn with-env yarn workspace @sourcebot/db prisma:db:push",
"build:deps": "yarn workspaces foreach -R --from '{@sourcebot/schemas,@sourcebot/error,@sourcebot/crypto,@sourcebot/db,@sourcebot/shared}' run build"
"build:deps": "yarn workspaces foreach --recursive --topological --from '{@sourcebot/schemas,@sourcebot/db,@sourcebot/shared,@sourcebot/query-language}' run build"
},
"devDependencies": {
"concurrently": "^9.2.1",
"cross-env": "^7.0.3",
"dotenv-cli": "^8.0.0",
"npm-run-all": "^4.1.5"
"dotenv-cli": "^8.0.0"
},
"packageManager": "yarn@4.7.0",
"resolutions": {
"prettier": "3.5.3"
"prettier": "3.5.3",
"@lezer/common": "1.3.0"
}
}

View file

@ -24,33 +24,35 @@
"dependencies": {
"@coderabbitai/bitbucket": "^1.1.3",
"@gitbeaker/rest": "^40.5.1",
"@octokit/app": "^16.1.1",
"@octokit/rest": "^21.0.2",
"@sentry/cli": "^2.42.2",
"@sentry/node": "^9.3.0",
"@sentry/profiling-node": "^9.3.0",
"@sourcebot/crypto": "workspace:*",
"@sourcebot/db": "workspace:*",
"@sourcebot/error": "workspace:*",
"@sourcebot/logger": "workspace:*",
"@sourcebot/schemas": "workspace:*",
"@sourcebot/shared": "workspace:*",
"@t3-oss/env-core": "^0.12.0",
"@types/express": "^5.0.0",
"argparse": "^2.0.1",
"azure-devops-node-api": "^15.1.1",
"bullmq": "^5.34.10",
"chokidar": "^4.0.3",
"cross-fetch": "^4.0.0",
"dotenv": "^16.4.5",
"express": "^4.21.2",
"express-async-errors": "^3.1.1",
"fast-deep-equal": "^3.1.3",
"git-url-parse": "^16.1.0",
"gitea-js": "^1.22.0",
"glob": "^11.0.0",
"groupmq": "^1.0.0",
"ioredis": "^5.4.2",
"lowdb": "^7.0.1",
"micromatch": "^4.0.8",
"p-limit": "^7.2.0",
"posthog-node": "^4.2.1",
"prom-client": "^15.1.3",
"simple-git": "^3.27.0",
"zod": "^3.24.3"
"zod": "^3.25.74"
}
}

103
packages/backend/src/api.ts Normal file
View file

@ -0,0 +1,103 @@
import { PrismaClient, RepoIndexingJobType } from '@sourcebot/db';
import { createLogger } from '@sourcebot/shared';
import express, { Request, Response } from 'express';
import 'express-async-errors';
import * as http from "http";
import z from 'zod';
import { ConnectionManager } from './connectionManager.js';
import { PromClient } from './promClient.js';
import { RepoIndexManager } from './repoIndexManager.js';
const logger = createLogger('api');
const PORT = 3060;
export class Api {
private server: http.Server;
constructor(
promClient: PromClient,
private prisma: PrismaClient,
private connectionManager: ConnectionManager,
private repoIndexManager: RepoIndexManager,
) {
const app = express();
app.use(express.json());
app.use(express.urlencoded({ extended: true }));
// Prometheus metrics endpoint
app.use('/metrics', async (_req: Request, res: Response) => {
res.set('Content-Type', promClient.registry.contentType);
const metrics = await promClient.registry.metrics();
res.end(metrics);
});
app.post('/api/sync-connection', this.syncConnection.bind(this));
app.post('/api/index-repo', this.indexRepo.bind(this));
this.server = app.listen(PORT, () => {
logger.info(`API server is running on port ${PORT}`);
});
}
private async syncConnection(req: Request, res: Response) {
const schema = z.object({
connectionId: z.number(),
}).strict();
const parsed = schema.safeParse(req.body);
if (!parsed.success) {
res.status(400).json({ error: parsed.error.message });
return;
}
const { connectionId } = parsed.data;
const connection = await this.prisma.connection.findUnique({
where: {
id: connectionId,
}
});
if (!connection) {
res.status(404).json({ error: 'Connection not found' });
return;
}
const [jobId] = await this.connectionManager.createJobs([connection]);
res.status(200).json({ jobId });
}
private async indexRepo(req: Request, res: Response) {
const schema = z.object({
repoId: z.number(),
}).strict();
const parsed = schema.safeParse(req.body);
if (!parsed.success) {
res.status(400).json({ error: parsed.error.message });
return;
}
const { repoId } = parsed.data;
const repo = await this.prisma.repo.findUnique({
where: { id: repoId },
});
if (!repo) {
res.status(404).json({ error: 'Repo not found' });
return;
}
const [jobId] = await this.repoIndexManager.createJobs([repo], RepoIndexingJobType.INDEX);
res.status(200).json({ jobId });
}
public async dispose() {
return new Promise<void>((resolve, reject) => {
this.server.close((err) => {
if (err) reject(err);
else resolve(undefined);
});
});
}
}

View file

@ -1,13 +1,12 @@
import { AzureDevOpsConnectionConfig } from "@sourcebot/schemas/v3/azuredevops.type";
import { createLogger } from "@sourcebot/logger";
import { getTokenFromConfig, measure, fetchWithRetry } from "./utils.js";
import { createLogger } from "@sourcebot/shared";
import { measure, fetchWithRetry } from "./utils.js";
import micromatch from "micromatch";
import { PrismaClient } from "@sourcebot/db";
import { BackendException, BackendError } from "@sourcebot/error";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import * as Sentry from "@sentry/node";
import * as azdev from "azure-devops-node-api";
import { GitRepository } from "azure-devops-node-api/interfaces/GitInterfaces.js";
import { getTokenFromConfig } from "@sourcebot/shared";
const logger = createLogger('azuredevops');
const AZUREDEVOPS_CLOUD_HOSTNAME = "dev.azure.com";
@ -28,66 +27,54 @@ function createAzureDevOpsConnection(
export const getAzureDevOpsReposFromConfig = async (
config: AzureDevOpsConnectionConfig,
orgId: number,
db: PrismaClient
) => {
const baseUrl = config.url || `https://${AZUREDEVOPS_CLOUD_HOSTNAME}`;
const token = config.token ?
await getTokenFromConfig(config.token, orgId, db, logger) :
await getTokenFromConfig(config.token) :
undefined;
if (!token) {
const e = new BackendException(BackendError.CONNECTION_SYNC_INVALID_TOKEN, {
message: 'Azure DevOps requires a Personal Access Token',
});
const e = new Error('Azure DevOps requires a Personal Access Token');
Sentry.captureException(e);
throw e;
}
const useTfsPath = config.useTfsPath || false;
let allRepos: GitRepository[] = [];
let notFound: {
users: string[],
orgs: string[],
repos: string[],
} = {
users: [],
orgs: [],
repos: [],
};
let allWarnings: string[] = [];
if (config.orgs) {
const { validRepos, notFoundOrgs } = await getReposForOrganizations(
const { repos, warnings } = await getReposForOrganizations(
config.orgs,
baseUrl,
token,
useTfsPath
);
allRepos = allRepos.concat(validRepos);
notFound.orgs = notFoundOrgs;
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
if (config.projects) {
const { validRepos, notFoundProjects } = await getReposForProjects(
const { repos, warnings } = await getReposForProjects(
config.projects,
baseUrl,
token,
useTfsPath
);
allRepos = allRepos.concat(validRepos);
notFound.repos = notFound.repos.concat(notFoundProjects);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
if (config.repos) {
const { validRepos, notFoundRepos } = await getRepos(
const { repos, warnings } = await getRepos(
config.repos,
baseUrl,
token,
useTfsPath
);
allRepos = allRepos.concat(validRepos);
notFound.repos = notFound.repos.concat(notFoundRepos);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
let repos = allRepos
@ -103,8 +90,8 @@ export const getAzureDevOpsReposFromConfig = async (
logger.debug(`Found ${repos.length} total repositories.`);
return {
validRepos: repos,
notFound,
repos,
warnings: allWarnings,
};
};
@ -221,10 +208,11 @@ async function getReposForOrganizations(
// Check if it's a 404-like error (organization not found)
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 404) {
logger.error(`Organization ${org} not found or no access`);
const warning = `Organization ${org} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: org
type: 'warning' as const,
warning
};
}
throw error;
@ -232,11 +220,11 @@ async function getReposForOrganizations(
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundOrgs } = processPromiseResults<GitRepository>(results);
const { validItems: repos, warnings } = processPromiseResults<GitRepository>(results);
return {
validRepos,
notFoundOrgs,
repos,
warnings,
};
}
@ -274,10 +262,11 @@ async function getReposForProjects(
logger.error(`Failed to fetch repositories for project ${project}.`, error);
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 404) {
logger.error(`Project ${project} not found or no access`);
const warning = `Project ${project} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: project
type: 'warning' as const,
warning
};
}
throw error;
@ -285,11 +274,11 @@ async function getReposForProjects(
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundProjects } = processPromiseResults<GitRepository>(results);
const { validItems: repos, warnings } = processPromiseResults<GitRepository>(results);
return {
validRepos,
notFoundProjects,
repos,
warnings,
};
}
@ -328,10 +317,11 @@ async function getRepos(
logger.error(`Failed to fetch repository ${repo}.`, error);
if (error && typeof error === 'object' && 'statusCode' in error && error.statusCode === 404) {
logger.error(`Repository ${repo} not found or no access`);
const warning = `Repository ${repo} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: repo
type: 'warning' as const,
warning
};
}
throw error;
@ -339,10 +329,10 @@ async function getRepos(
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundRepos } = processPromiseResults<GitRepository>(results);
const { validItems: repos, warnings } = processPromiseResults<GitRepository>(results);
return {
validRepos,
notFoundRepos,
repos,
warnings,
};
}

View file

@ -2,16 +2,17 @@ import { createBitbucketCloudClient } from "@coderabbitai/bitbucket/cloud";
import { createBitbucketServerClient } from "@coderabbitai/bitbucket/server";
import { BitbucketConnectionConfig } from "@sourcebot/schemas/v3/bitbucket.type";
import type { ClientOptions, ClientPathsWithMethod } from "openapi-fetch";
import { createLogger } from "@sourcebot/logger";
import { PrismaClient } from "@sourcebot/db";
import { getTokenFromConfig, measure, fetchWithRetry } from "./utils.js";
import { createLogger } from "@sourcebot/shared";
import { measure, fetchWithRetry } from "./utils.js";
import * as Sentry from "@sentry/node";
import micromatch from "micromatch";
import {
SchemaRepository as CloudRepository,
} from "@coderabbitai/bitbucket/cloud/openapi";
import { SchemaRestRepository as ServerRepository } from "@coderabbitai/bitbucket/server/openapi";
import { processPromiseResults } from "./connectionUtils.js";
import { throwIfAnyFailed } from "./connectionUtils.js";
import { getTokenFromConfig } from "@sourcebot/shared";
const logger = createLogger('bitbucket');
const BITBUCKET_CLOUD_GIT = 'https://bitbucket.org';
@ -27,9 +28,9 @@ interface BitbucketClient {
apiClient: any;
baseUrl: string;
gitUrl: string;
getReposForWorkspace: (client: BitbucketClient, workspaces: string[]) => Promise<{validRepos: BitbucketRepository[], notFoundWorkspaces: string[]}>;
getReposForProjects: (client: BitbucketClient, projects: string[]) => Promise<{validRepos: BitbucketRepository[], notFoundProjects: string[]}>;
getRepos: (client: BitbucketClient, repos: string[]) => Promise<{validRepos: BitbucketRepository[], notFoundRepos: string[]}>;
getReposForWorkspace: (client: BitbucketClient, workspaces: string[]) => Promise<{repos: BitbucketRepository[], warnings: string[]}>;
getReposForProjects: (client: BitbucketClient, projects: string[]) => Promise<{repos: BitbucketRepository[], warnings: string[]}>;
getRepos: (client: BitbucketClient, repos: string[]) => Promise<{repos: BitbucketRepository[], warnings: string[]}>;
shouldExcludeRepo: (repo: BitbucketRepository, config: BitbucketConnectionConfig) => boolean;
}
@ -57,9 +58,9 @@ type ServerPaginatedResponse<T> = {
readonly nextPageStart: number;
}
export const getBitbucketReposFromConfig = async (config: BitbucketConnectionConfig, orgId: number, db: PrismaClient) => {
export const getBitbucketReposFromConfig = async (config: BitbucketConnectionConfig) => {
const token = config.token ?
await getTokenFromConfig(config.token, orgId, db, logger) :
await getTokenFromConfig(config.token) :
undefined;
if (config.deploymentType === 'server' && !config.url) {
@ -71,32 +72,24 @@ export const getBitbucketReposFromConfig = async (config: BitbucketConnectionCon
cloudClient(config.user, token);
let allRepos: BitbucketRepository[] = [];
let notFound: {
orgs: string[],
users: string[],
repos: string[],
} = {
orgs: [],
users: [],
repos: [],
};
let allWarnings: string[] = [];
if (config.workspaces) {
const { validRepos, notFoundWorkspaces } = await client.getReposForWorkspace(client, config.workspaces);
allRepos = allRepos.concat(validRepos);
notFound.orgs = notFoundWorkspaces;
const { repos, warnings } = await client.getReposForWorkspace(client, config.workspaces);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
if (config.projects) {
const { validRepos, notFoundProjects } = await client.getReposForProjects(client, config.projects);
allRepos = allRepos.concat(validRepos);
notFound.orgs = notFoundProjects;
const { repos, warnings } = await client.getReposForProjects(client, config.projects);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
if (config.repos) {
const { validRepos, notFoundRepos } = await client.getRepos(client, config.repos);
allRepos = allRepos.concat(validRepos);
notFound.repos = notFoundRepos;
const { repos, warnings } = await client.getRepos(client, config.repos);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
const filteredRepos = allRepos.filter((repo) => {
@ -104,8 +97,8 @@ export const getBitbucketReposFromConfig = async (config: BitbucketConnectionCon
});
return {
validRepos: filteredRepos,
notFound,
repos: filteredRepos,
warnings: allWarnings,
};
}
@ -186,7 +179,7 @@ function parseUrl(url: string): { path: string; query: Record<string, string>; }
}
async function cloudGetReposForWorkspace(client: BitbucketClient, workspaces: string[]): Promise<{validRepos: CloudRepository[], notFoundWorkspaces: string[]}> {
async function cloudGetReposForWorkspace(client: BitbucketClient, workspaces: string[]): Promise<{repos: CloudRepository[], warnings: string[]}> {
const results = await Promise.allSettled(workspaces.map(async (workspace) => {
try {
logger.debug(`Fetching all repos for workspace ${workspace}...`);
@ -221,10 +214,11 @@ async function cloudGetReposForWorkspace(client: BitbucketClient, workspaces: st
const status = e?.cause?.response?.status;
if (status == 404) {
logger.error(`Workspace ${workspace} not found or invalid access`)
const warning = `Workspace ${workspace} not found or invalid access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: workspace
type: 'warning' as const,
warning
}
}
throw e;
@ -232,21 +226,22 @@ async function cloudGetReposForWorkspace(client: BitbucketClient, workspaces: st
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundWorkspaces } = processPromiseResults(results);
const { validItems: repos, warnings } = processPromiseResults(results);
return {
validRepos,
notFoundWorkspaces,
repos,
warnings,
};
}
async function cloudGetReposForProjects(client: BitbucketClient, projects: string[]): Promise<{validRepos: CloudRepository[], notFoundProjects: string[]}> {
async function cloudGetReposForProjects(client: BitbucketClient, projects: string[]): Promise<{repos: CloudRepository[], warnings: string[]}> {
const results = await Promise.allSettled(projects.map(async (project) => {
const [workspace, project_name] = project.split('/');
if (!workspace || !project_name) {
logger.error(`Invalid project ${project}`);
const warning = `Invalid project ${project}`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: project
type: 'warning' as const,
warning
}
}
@ -282,10 +277,11 @@ async function cloudGetReposForProjects(client: BitbucketClient, projects: strin
const status = e?.cause?.response?.status;
if (status == 404) {
logger.error(`Project ${project_name} not found in ${workspace} or invalid access`)
const warning = `Project ${project_name} not found in ${workspace} or invalid access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: project
type: 'warning' as const,
warning
}
}
throw e;
@ -293,21 +289,22 @@ async function cloudGetReposForProjects(client: BitbucketClient, projects: strin
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundProjects } = processPromiseResults(results);
const { validItems: repos, warnings } = processPromiseResults(results);
return {
validRepos,
notFoundProjects
repos,
warnings
}
}
async function cloudGetRepos(client: BitbucketClient, repos: string[]): Promise<{validRepos: CloudRepository[], notFoundRepos: string[]}> {
const results = await Promise.allSettled(repos.map(async (repo) => {
async function cloudGetRepos(client: BitbucketClient, repoList: string[]): Promise<{repos: CloudRepository[], warnings: string[]}> {
const results = await Promise.allSettled(repoList.map(async (repo) => {
const [workspace, repo_slug] = repo.split('/');
if (!workspace || !repo_slug) {
logger.error(`Invalid repo ${repo}`);
const warning = `Invalid repo ${repo}`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: repo
type: 'warning' as const,
warning
};
}
@ -329,10 +326,11 @@ async function cloudGetRepos(client: BitbucketClient, repos: string[]): Promise<
const status = e?.cause?.response?.status;
if (status === 404) {
logger.error(`Repo ${repo} not found in ${workspace} or invalid access`);
const warning = `Repo ${repo} not found in ${workspace} or invalid access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: repo
type: 'warning' as const,
warning
};
}
throw e;
@ -340,32 +338,40 @@ async function cloudGetRepos(client: BitbucketClient, repos: string[]): Promise<
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundRepos } = processPromiseResults(results);
const { validItems: repos, warnings } = processPromiseResults(results);
return {
validRepos,
notFoundRepos
repos,
warnings
};
}
function cloudShouldExcludeRepo(repo: BitbucketRepository, config: BitbucketConnectionConfig): boolean {
const cloudRepo = repo as CloudRepository;
let reason = '';
const repoName = cloudRepo.full_name!;
const shouldExclude = (() => {
if (config.exclude?.repos && config.exclude.repos.includes(cloudRepo.full_name!)) {
if (config.exclude?.repos) {
if (micromatch.isMatch(repoName, config.exclude.repos)) {
reason = `\`exclude.repos\` contains ${repoName}`;
return true;
}
}
if (!!config.exclude?.archived) {
logger.warn(`Exclude archived repos flag provided in config but Bitbucket Cloud does not support archived repos. Ignoring...`);
}
if (!!config.exclude?.forks && cloudRepo.parent !== undefined) {
reason = `\`exclude.forks\` is true`;
return true;
}
return false;
})();
if (shouldExclude) {
logger.debug(`Excluding repo ${cloudRepo.full_name} because it matches the exclude pattern`);
logger.debug(`Excluding repo ${repoName}. Reason: ${reason}`);
return true;
}
return false;
@ -434,15 +440,16 @@ const getPaginatedServer = async <T>(
return results;
}
async function serverGetReposForWorkspace(client: BitbucketClient, workspaces: string[]): Promise<{validRepos: ServerRepository[], notFoundWorkspaces: string[]}> {
async function serverGetReposForWorkspace(client: BitbucketClient, workspaces: string[]): Promise<{repos: ServerRepository[], warnings: string[]}> {
const warnings = workspaces.map(workspace => `Workspaces are not supported in Bitbucket Server: ${workspace}`);
logger.debug('Workspaces are not supported in Bitbucket Server');
return {
validRepos: [],
notFoundWorkspaces: workspaces
repos: [],
warnings
};
}
async function serverGetReposForProjects(client: BitbucketClient, projects: string[]): Promise<{validRepos: ServerRepository[], notFoundProjects: string[]}> {
async function serverGetReposForProjects(client: BitbucketClient, projects: string[]): Promise<{repos: ServerRepository[], warnings: string[]}> {
const results = await Promise.allSettled(projects.map(async (project) => {
try {
logger.debug(`Fetching all repos for project ${project}...`);
@ -477,10 +484,11 @@ async function serverGetReposForProjects(client: BitbucketClient, projects: stri
const status = e?.cause?.response?.status;
if (status == 404) {
logger.error(`Project ${project} not found or invalid access`);
const warning = `Project ${project} not found or invalid access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: project
type: 'warning' as const,
warning
};
}
throw e;
@ -488,21 +496,22 @@ async function serverGetReposForProjects(client: BitbucketClient, projects: stri
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundProjects } = processPromiseResults(results);
const { validItems: repos, warnings } = processPromiseResults(results);
return {
validRepos,
notFoundProjects
repos,
warnings
};
}
async function serverGetRepos(client: BitbucketClient, repos: string[]): Promise<{validRepos: ServerRepository[], notFoundRepos: string[]}> {
const results = await Promise.allSettled(repos.map(async (repo) => {
async function serverGetRepos(client: BitbucketClient, repoList: string[]): Promise<{repos: ServerRepository[], warnings: string[]}> {
const results = await Promise.allSettled(repoList.map(async (repo) => {
const [project, repo_slug] = repo.split('/');
if (!project || !repo_slug) {
logger.error(`Invalid repo ${repo}`);
const warning = `Invalid repo ${repo}`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: repo
type: 'warning' as const,
warning
};
}
@ -524,10 +533,11 @@ async function serverGetRepos(client: BitbucketClient, repos: string[]): Promise
const status = e?.cause?.response?.status;
if (status === 404) {
logger.error(`Repo ${repo} not found in project ${project} or invalid access`);
const warning = `Repo ${repo} not found in project ${project} or invalid access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: repo
type: 'warning' as const,
warning
};
}
throw e;
@ -535,10 +545,10 @@ async function serverGetRepos(client: BitbucketClient, repos: string[]): Promise
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundRepos } = processPromiseResults(results);
const { validItems: repos, warnings } = processPromiseResults(results);
return {
validRepos,
notFoundRepos
repos,
warnings
};
}
@ -547,23 +557,32 @@ function serverShouldExcludeRepo(repo: BitbucketRepository, config: BitbucketCon
const projectName = serverRepo.project!.key;
const repoSlug = serverRepo.slug!;
const repoName = `${projectName}/${repoSlug}`;
let reason = '';
const shouldExclude = (() => {
if (config.exclude?.repos && config.exclude.repos.includes(`${projectName}/${repoSlug}`)) {
if (config.exclude?.repos) {
if (micromatch.isMatch(repoName, config.exclude.repos)) {
reason = `\`exclude.repos\` contains ${repoName}`;
return true;
}
}
if (!!config.exclude?.archived && serverRepo.archived) {
reason = `\`exclude.archived\` is true`;
return true;
}
if (!!config.exclude?.forks && serverRepo.origin !== undefined) {
reason = `\`exclude.forks\` is true`;
return true;
}
return false;
})();
if (shouldExclude) {
logger.debug(`Excluding repo ${projectName}/${repoSlug} because it matches the exclude pattern`);
logger.debug(`Excluding repo ${repoName}. Reason: ${reason}`);
return true;
}
return false;

View file

@ -0,0 +1,127 @@
import { Prisma, PrismaClient } from "@sourcebot/db";
import { createLogger } from "@sourcebot/shared";
import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
import { loadConfig } from "@sourcebot/shared";
import chokidar, { FSWatcher } from 'chokidar';
import { ConnectionManager } from "./connectionManager.js";
import { SINGLE_TENANT_ORG_ID } from "./constants.js";
import { syncSearchContexts } from "./ee/syncSearchContexts.js";
import isEqual from 'fast-deep-equal';
const logger = createLogger('config-manager');
export class ConfigManager {
private watcher: FSWatcher;
constructor(
private db: PrismaClient,
private connectionManager: ConnectionManager,
configPath: string,
) {
this.watcher = chokidar.watch(configPath, {
ignoreInitial: true, // Don't fire events for existing files
awaitWriteFinish: {
stabilityThreshold: 100, // File size stable for 100ms
pollInterval: 100 // Check every 100ms
},
atomic: true // Handle atomic writes (temp file + rename)
});
this.watcher.on('change', async () => {
logger.info(`Config file ${configPath} changed. Syncing config.`);
try {
await this.syncConfig(configPath);
} catch (error) {
logger.error(`Failed to sync config: ${error}`);
}
});
this.syncConfig(configPath);
}
private syncConfig = async (configPath: string) => {
const config = await loadConfig(configPath);
await this.syncConnections(config.connections);
await syncSearchContexts({
contexts: config.contexts,
orgId: SINGLE_TENANT_ORG_ID,
db: this.db,
});
}
private syncConnections = async (connections?: { [key: string]: ConnectionConfig }) => {
if (connections) {
for (const [key, newConnectionConfig] of Object.entries(connections)) {
const existingConnection = await this.db.connection.findUnique({
where: {
name_orgId: {
name: key,
orgId: SINGLE_TENANT_ORG_ID,
}
}
});
const existingConnectionConfig = existingConnection ? existingConnection.config as unknown as ConnectionConfig : undefined;
const connectionNeedsSyncing =
!existingConnectionConfig ||
!isEqual(existingConnectionConfig, newConnectionConfig);
// Either update the existing connection or create a new one.
const connection = existingConnection ?
await this.db.connection.update({
where: {
id: existingConnection.id,
},
data: {
config: newConnectionConfig as unknown as Prisma.InputJsonValue,
isDeclarative: true,
}
}) :
await this.db.connection.create({
data: {
name: key,
config: newConnectionConfig as unknown as Prisma.InputJsonValue,
connectionType: newConnectionConfig.type,
isDeclarative: true,
org: {
connect: {
id: SINGLE_TENANT_ORG_ID,
}
}
}
});
if (connectionNeedsSyncing) {
logger.info(`Change detected for connection '${key}' (id: ${connection.id}). Creating sync job.`);
await this.connectionManager.createJobs([connection]);
}
}
}
// Delete any connections that are no longer in the config.
const deletedConnections = await this.db.connection.findMany({
where: {
isDeclarative: true,
name: {
notIn: Object.keys(connections ?? {}),
},
orgId: SINGLE_TENANT_ORG_ID,
}
});
for (const connection of deletedConnections) {
logger.info(`Deleting connection with name '${connection.name}'. Connection ID: ${connection.id}`);
await this.db.connection.delete({
where: {
id: connection.id,
}
})
}
}
public dispose = async () => {
await this.watcher.close();
}
}

View file

@ -1,212 +1,241 @@
import { Connection, ConnectionSyncStatus, PrismaClient, Prisma } from "@sourcebot/db";
import { Job, Queue, Worker } from 'bullmq';
import { Settings } from "./types.js";
import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
import { createLogger } from "@sourcebot/logger";
import { Redis } from 'ioredis';
import { RepoData, compileGithubConfig, compileGitlabConfig, compileGiteaConfig, compileGerritConfig, compileBitbucketConfig, compileAzureDevOpsConfig, compileGenericGitHostConfig } from "./repoCompileUtils.js";
import { BackendError, BackendException } from "@sourcebot/error";
import { captureEvent } from "./posthog.js";
import { env } from "./env.js";
import * as Sentry from "@sentry/node";
import { loadConfig, syncSearchContexts } from "@sourcebot/shared";
import { Connection, ConnectionSyncJobStatus, PrismaClient } from "@sourcebot/db";
import { createLogger } from "@sourcebot/shared";
import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
import { loadConfig, env } from "@sourcebot/shared";
import { Job, Queue, ReservedJob, Worker } from "groupmq";
import { Redis } from 'ioredis';
import { compileAzureDevOpsConfig, compileBitbucketConfig, compileGenericGitHostConfig, compileGerritConfig, compileGiteaConfig, compileGithubConfig, compileGitlabConfig } from "./repoCompileUtils.js";
import { Settings } from "./types.js";
import { groupmqLifecycleExceptionWrapper, setIntervalAsync } from "./utils.js";
import { syncSearchContexts } from "./ee/syncSearchContexts.js";
import { captureEvent } from "./posthog.js";
import { PromClient } from "./promClient.js";
import { GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS } from "./constants.js";
const QUEUE_NAME = 'connectionSyncQueue';
const LOG_TAG = 'connection-manager';
const logger = createLogger(LOG_TAG);
const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`);
const QUEUE_NAME = 'connection-sync-queue';
type JobPayload = {
jobId: string,
connectionId: number,
connectionName: string,
orgId: number,
config: ConnectionConfig,
};
type JobResult = {
repoCount: number,
}
const JOB_TIMEOUT_MS = 1000 * 60 * 60 * 2; // 2 hour timeout
export class ConnectionManager {
private worker: Worker;
private worker: Worker<JobPayload>;
private queue: Queue<JobPayload>;
private logger = createLogger('connection-manager');
private interval?: NodeJS.Timeout;
constructor(
private db: PrismaClient,
private settings: Settings,
redis: Redis,
private redis: Redis,
private promClient: PromClient,
) {
this.queue = new Queue<JobPayload>(QUEUE_NAME, {
connection: redis,
this.queue = new Queue<JobPayload>({
redis,
namespace: QUEUE_NAME,
jobTimeoutMs: JOB_TIMEOUT_MS,
maxAttempts: 3,
logger: env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true',
});
this.worker = new Worker(QUEUE_NAME, this.runSyncJob.bind(this), {
connection: redis,
this.worker = new Worker<JobPayload>({
queue: this.queue,
maxStalledCount: 1,
handler: this.runJob.bind(this),
concurrency: this.settings.maxConnectionSyncJobConcurrency,
});
this.worker.on('completed', this.onSyncJobCompleted.bind(this));
this.worker.on('failed', this.onSyncJobFailed.bind(this));
}
public async scheduleConnectionSync(connection: Connection) {
await this.db.$transaction(async (tx) => {
await tx.connection.update({
where: { id: connection.id },
data: { syncStatus: ConnectionSyncStatus.IN_SYNC_QUEUE },
...(env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true' ? {
logger: true,
} : {}),
});
const connectionConfig = connection.config as unknown as ConnectionConfig;
await this.queue.add('connectionSyncJob', {
connectionId: connection.id,
connectionName: connection.name,
orgId: connection.orgId,
config: connectionConfig,
}, {
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
});
this.logger.info(`Added job to queue for connection ${connection.name} (id: ${connection.id})`);
}).catch((err: unknown) => {
this.logger.error(`Failed to add job to queue for connection ${connection.name} (id: ${connection.id}): ${err}`);
});
this.worker.on('completed', this.onJobCompleted.bind(this));
this.worker.on('failed', this.onJobFailed.bind(this));
this.worker.on('stalled', this.onJobStalled.bind(this));
this.worker.on('error', this.onWorkerError.bind(this));
// graceful-timeout is triggered when a job is still processing after
// worker.close() is called and the timeout period has elapsed. In this case,
// we fail the job with no retry.
this.worker.on('graceful-timeout', this.onJobGracefulTimeout.bind(this));
}
public startScheduler() {
this.logger.debug('Starting scheduler');
this.interval = setInterval(async () => {
logger.debug('Starting scheduler');
this.interval = setIntervalAsync(async () => {
const thresholdDate = new Date(Date.now() - this.settings.resyncConnectionIntervalMs);
const timeoutDate = new Date(Date.now() - JOB_TIMEOUT_MS);
const connections = await this.db.connection.findMany({
where: {
OR: [
// When the connection needs to be synced, we want to sync it immediately.
{
syncStatus: ConnectionSyncStatus.SYNC_NEEDED,
},
// When the connection has already been synced, we only want to re-sync if the re-sync interval has elapsed
// (or if the date isn't set for some reason).
{
AND: [
{
OR: [
{ syncStatus: ConnectionSyncStatus.SYNCED },
{ syncStatus: ConnectionSyncStatus.SYNCED_WITH_WARNINGS },
]
},
{
OR: [
{ syncedAt: null },
{ syncedAt: { lt: thresholdDate } },
]
},
{
NOT: {
syncJobs: {
some: {
OR: [
// Don't schedule if there are active jobs that were created within the threshold date.
// This handles the case where a job is stuck in a pending state and will never be scheduled.
{
AND: [
{ status: { in: [ConnectionSyncJobStatus.PENDING, ConnectionSyncJobStatus.IN_PROGRESS] } },
{ createdAt: { gt: timeoutDate } },
]
},
// Don't schedule if there are recent failed jobs (within the threshold date).
{
AND: [
{ status: ConnectionSyncJobStatus.FAILED },
{ completedAt: { gt: thresholdDate } },
]
}
]
}
}
}
}
]
}
});
for (const connection of connections) {
await this.scheduleConnectionSync(connection);
if (connections.length > 0) {
await this.createJobs(connections);
}
}, this.settings.resyncConnectionPollingIntervalMs);
this.worker.run();
}
private async runSyncJob(job: Job<JobPayload>): Promise<JobResult> {
const { config, orgId, connectionName } = job.data;
public async createJobs(connections: Connection[]) {
const jobs = await this.db.connectionSyncJob.createManyAndReturn({
data: connections.map(connection => ({
connectionId: connection.id,
})),
include: {
connection: true,
}
});
for (const job of jobs) {
logger.info(`Scheduling job ${job.id} for connection ${job.connection.name} (id: ${job.connectionId})`);
await this.queue.add({
groupId: `connection:${job.connectionId}`,
data: {
jobId: job.id,
connectionId: job.connectionId,
connectionName: job.connection.name,
orgId: job.connection.orgId,
},
jobId: job.id,
});
this.promClient.pendingConnectionSyncJobs.inc({ connection: job.connection.name });
}
return jobs.map(job => job.id);
}
private async runJob(job: ReservedJob<JobPayload>): Promise<JobResult> {
const { jobId, connectionName } = job.data;
const logger = createJobLogger(jobId);
logger.info(`Running connection sync job ${jobId} for connection ${connectionName} (id: ${job.data.connectionId}) (attempt ${job.attempts + 1} / ${job.maxAttempts})`);
const currentStatus = await this.db.connectionSyncJob.findUniqueOrThrow({
where: {
id: jobId,
},
select: {
status: true,
}
});
// Fail safe: if the job is not PENDING (first run) or IN_PROGRESS (retry), it indicates the job
// is in an invalid state and should be skipped.
if (currentStatus.status !== ConnectionSyncJobStatus.PENDING && currentStatus.status !== ConnectionSyncJobStatus.IN_PROGRESS) {
throw new Error(`Job ${jobId} is not in a valid state. Expected: ${ConnectionSyncJobStatus.PENDING} or ${ConnectionSyncJobStatus.IN_PROGRESS}. Actual: ${currentStatus.status}. Skipping.`);
}
this.promClient.pendingConnectionSyncJobs.dec({ connection: connectionName });
this.promClient.activeConnectionSyncJobs.inc({ connection: connectionName });
// @note: We aren't actually doing anything with this atm.
const abortController = new AbortController();
const connection = await this.db.connection.findUnique({
const { connection: { config: rawConnectionConfig, orgId } } = await this.db.connectionSyncJob.update({
where: {
id: job.data.connectionId,
},
});
if (!connection) {
const e = new BackendException(BackendError.CONNECTION_SYNC_CONNECTION_NOT_FOUND, {
message: `Connection ${job.data.connectionId} not found`,
});
Sentry.captureException(e);
throw e;
}
// Reset the syncStatusMetadata to an empty object at the start of the sync job
await this.db.connection.update({
where: {
id: job.data.connectionId,
id: jobId,
},
data: {
syncStatus: ConnectionSyncStatus.SYNCING,
syncStatusMetadata: {}
status: ConnectionSyncJobStatus.IN_PROGRESS,
},
select: {
connection: {
select: {
config: true,
orgId: true,
}
})
let result: {
repoData: RepoData[],
notFound: {
users: string[],
orgs: string[],
repos: string[],
}
} = {
repoData: [],
notFound: {
users: [],
orgs: [],
repos: [],
}
};
},
});
try {
result = await (async () => {
const config = rawConnectionConfig as unknown as ConnectionConfig;
const result = await (async () => {
switch (config.type) {
case 'github': {
return await compileGithubConfig(config, job.data.connectionId, orgId, this.db, abortController);
return await compileGithubConfig(config, job.data.connectionId, abortController.signal);
}
case 'gitlab': {
return await compileGitlabConfig(config, job.data.connectionId, orgId, this.db);
return await compileGitlabConfig(config, job.data.connectionId);
}
case 'gitea': {
return await compileGiteaConfig(config, job.data.connectionId, orgId, this.db);
return await compileGiteaConfig(config, job.data.connectionId);
}
case 'gerrit': {
return await compileGerritConfig(config, job.data.connectionId, orgId);
return await compileGerritConfig(config, job.data.connectionId);
}
case 'bitbucket': {
return await compileBitbucketConfig(config, job.data.connectionId, orgId, this.db);
return await compileBitbucketConfig(config, job.data.connectionId);
}
case 'azuredevops': {
return await compileAzureDevOpsConfig(config, job.data.connectionId, orgId, this.db, abortController);
return await compileAzureDevOpsConfig(config, job.data.connectionId);
}
case 'git': {
return await compileGenericGitHostConfig(config, job.data.connectionId, orgId);
return await compileGenericGitHostConfig(config, job.data.connectionId);
}
}
})();
} catch (err) {
this.logger.error(`Failed to compile repo data for connection ${job.data.connectionId} (${connectionName}): ${err}`);
Sentry.captureException(err);
if (err instanceof BackendException) {
throw err;
} else {
throw new BackendException(BackendError.CONNECTION_SYNC_SYSTEM_ERROR, {
message: `Failed to compile repo data for connection ${job.data.connectionId}`,
});
}
}
let { repoData, warnings } = result;
let { repoData, notFound } = result;
// Push the information regarding not found users, orgs, and repos to the connection's syncStatusMetadata. Note that
// this won't be overwritten even if the connection job fails
await this.db.connection.update({
await this.db.connectionSyncJob.update({
where: {
id: job.data.connectionId,
id: jobId,
},
data: {
syncStatusMetadata: { notFound }
}
warningMessages: warnings,
},
});
// Filter out any duplicates by external_id and external_codeHostUrl.
repoData = repoData.filter((repo, index, self) => {
return index === self.findIndex(r =>
@ -233,7 +262,7 @@ export class ConnectionManager {
}
});
const deleteDuration = performance.now() - deleteStart;
this.logger.info(`Deleted all RepoToConnection records for connection ${connectionName} (id: ${job.data.connectionId}) in ${deleteDuration}ms`);
logger.info(`Deleted all RepoToConnection records for connection ${connectionName} (id: ${job.data.connectionId}) in ${deleteDuration}ms`);
const totalUpsertStart = performance.now();
for (const repo of repoData) {
@ -250,10 +279,10 @@ export class ConnectionManager {
create: repo,
})
const upsertDuration = performance.now() - upsertStart;
this.logger.info(`Upserted repo ${repo.displayName} (id: ${repo.external_id}) in ${upsertDuration}ms`);
logger.debug(`Upserted repo ${repo.displayName} (id: ${repo.external_id}) in ${upsertDuration}ms`);
}
const totalUpsertDuration = performance.now() - totalUpsertStart;
this.logger.info(`Upserted ${repoData.length} repos for connection ${connectionName} (id: ${job.data.connectionId}) in ${totalUpsertDuration}ms`);
logger.info(`Upserted ${repoData.length} repos for connection ${connectionName} (id: ${job.data.connectionId}) in ${totalUpsertDuration}ms`);
}, { timeout: env.CONNECTION_MANAGER_UPSERT_TIMEOUT_MS });
return {
@ -262,32 +291,23 @@ export class ConnectionManager {
}
private async onSyncJobCompleted(job: Job<JobPayload>, result: JobResult) {
this.logger.info(`Connection sync job for connection ${job.data.connectionName} (id: ${job.data.connectionId}, jobId: ${job.id}) completed`);
const { connectionId, orgId } = job.data;
private onJobCompleted = async (job: Job<JobPayload>) =>
groupmqLifecycleExceptionWrapper('onJobCompleted', logger, async () => {
const logger = createJobLogger(job.id);
const { connectionId, connectionName, orgId } = job.data;
let syncStatusMetadata: Record<string, unknown> = (await this.db.connection.findUnique({
where: { id: connectionId },
select: { syncStatusMetadata: true }
}))?.syncStatusMetadata as Record<string, unknown> ?? {};
const { notFound } = syncStatusMetadata as {
notFound: {
users: string[],
orgs: string[],
repos: string[],
}
};
await this.db.connection.update({
await this.db.connectionSyncJob.update({
where: {
id: connectionId,
id: job.id,
},
data: {
syncStatus:
notFound.users.length > 0 ||
notFound.orgs.length > 0 ||
notFound.repos.length > 0 ? ConnectionSyncStatus.SYNCED_WITH_WARNINGS : ConnectionSyncStatus.SYNCED,
syncedAt: new Date()
status: ConnectionSyncJobStatus.COMPLETED,
completedAt: new Date(),
connection: {
update: {
syncedAt: new Date(),
}
}
}
});
@ -303,73 +323,147 @@ export class ConnectionManager {
contexts: config.contexts,
});
} catch (err) {
this.logger.error(`Failed to sync search contexts for connection ${connectionId}: ${err}`);
logger.error(`Failed to sync search contexts for connection ${connectionId}: ${err}`);
Sentry.captureException(err);
}
}
logger.info(`Connection sync job ${job.id} for connection ${job.data.connectionName} (id: ${job.data.connectionId}) completed`);
this.promClient.activeConnectionSyncJobs.dec({ connection: connectionName });
this.promClient.connectionSyncJobSuccessTotal.inc({ connection: connectionName });
const result = job.returnvalue as JobResult;
captureEvent('backend_connection_sync_job_completed', {
connectionId: connectionId,
repoCount: result.repoCount,
});
}
});
private async onSyncJobFailed(job: Job<JobPayload> | undefined, err: unknown) {
this.logger.info(`Connection sync job for connection ${job?.data.connectionName} (id: ${job?.data.connectionId}, jobId: ${job?.id}) failed with error: ${err}`);
Sentry.captureException(err, {
tags: {
connectionid: job?.data.connectionId,
jobId: job?.id,
queue: QUEUE_NAME,
private onJobFailed = async (job: Job<JobPayload>) =>
groupmqLifecycleExceptionWrapper('onJobFailed', logger, async () => {
const logger = createJobLogger(job.id);
const attempt = job.attemptsMade + 1;
const wasLastAttempt = attempt >= job.opts.attempts;
if (wasLastAttempt) {
const { connection } = await this.db.connectionSyncJob.update({
where: { id: job.id },
data: {
status: ConnectionSyncJobStatus.FAILED,
completedAt: new Date(),
errorMessage: job.failedReason,
},
select: {
connection: true,
}
});
if (job) {
const { connectionId } = job.data;
this.promClient.activeConnectionSyncJobs.dec({ connection: connection.name });
this.promClient.connectionSyncJobFailTotal.inc({ connection: connection.name });
logger.error(`Failed job ${job.id} for connection ${connection.name} (id: ${connection.id}). Attempt ${attempt} / ${job.opts.attempts}. Failing job.`);
} else {
const connection = await this.db.connection.findUniqueOrThrow({
where: { id: job.data.connectionId },
});
this.promClient.connectionSyncJobReattemptsTotal.inc({ connection: connection.name });
logger.warn(`Failed job ${job.id} for connection ${connection.name} (id: ${connection.id}). Attempt ${attempt} / ${job.opts.attempts}. Retrying.`);
}
captureEvent('backend_connection_sync_job_failed', {
connectionId: connectionId,
error: err instanceof BackendException ? err.code : 'UNKNOWN',
connectionId: job.data.connectionId,
error: job.failedReason,
});
});
// We may have pushed some metadata during the execution of the job, so we make sure to not overwrite the metadata here
let syncStatusMetadata: Record<string, unknown> = (await this.db.connection.findUnique({
where: { id: connectionId },
select: { syncStatusMetadata: true }
}))?.syncStatusMetadata as Record<string, unknown> ?? {};
if (err instanceof BackendException) {
syncStatusMetadata = {
...syncStatusMetadata,
error: err.code,
...err.metadata,
}
} else {
syncStatusMetadata = {
...syncStatusMetadata,
error: 'UNKNOWN',
}
}
await this.db.connection.update({
where: {
id: connectionId,
},
private onJobStalled = async (jobId: string) =>
groupmqLifecycleExceptionWrapper('onJobStalled', logger, async () => {
const logger = createJobLogger(jobId);
const { connection } = await this.db.connectionSyncJob.update({
where: { id: jobId },
data: {
syncStatus: ConnectionSyncStatus.FAILED,
syncStatusMetadata: syncStatusMetadata as Prisma.InputJsonValue,
status: ConnectionSyncJobStatus.FAILED,
completedAt: new Date(),
errorMessage: 'Job stalled',
},
select: {
connection: true,
}
});
this.promClient.activeConnectionSyncJobs.dec({ connection: connection.name });
this.promClient.connectionSyncJobFailTotal.inc({ connection: connection.name });
logger.error(`Job ${jobId} stalled for connection ${connection.name} (id: ${connection.id})`);
captureEvent('backend_connection_sync_job_failed', {
connectionId: connection.id,
error: 'Job stalled',
});
});
private onJobGracefulTimeout = async (job: Job<JobPayload>) =>
groupmqLifecycleExceptionWrapper('onJobGracefulTimeout', logger, async () => {
const logger = createJobLogger(job.id);
const { connection } = await this.db.connectionSyncJob.update({
where: { id: job.id },
data: {
status: ConnectionSyncJobStatus.FAILED,
completedAt: new Date(),
errorMessage: 'Job timed out',
},
select: {
connection: true,
}
});
this.promClient.activeConnectionSyncJobs.dec({ connection: connection.name });
this.promClient.connectionSyncJobFailTotal.inc({ connection: connection.name });
logger.error(`Job ${job.id} timed out for connection ${connection.name} (id: ${connection.id})`);
captureEvent('backend_connection_sync_job_failed', {
connectionId: connection.id,
error: 'Job timed out',
});
});
private async onWorkerError(error: Error) {
Sentry.captureException(error);
logger.error(`Connection syncer worker error.`, error);
}
public dispose() {
public async dispose() {
if (this.interval) {
clearInterval(this.interval);
}
this.worker.close();
this.queue.close();
const inProgressJobs = this.worker.getCurrentJobs();
await this.worker.close(GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS);
// Manually release group locks for in progress jobs to prevent deadlocks.
// @see: https://github.com/Openpanel-dev/groupmq/issues/8
for (const { job } of inProgressJobs) {
const lockKey = `groupmq:${QUEUE_NAME}:lock:${job.groupId}`;
logger.debug(`Releasing group lock ${lockKey} for in progress job ${job.id}`);
try {
await this.redis.del(lockKey);
} catch (error) {
Sentry.captureException(error);
logger.error(`Failed to release group lock ${lockKey} for in progress job ${job.id}. Error: `, error);
}
}
// @note: As of groupmq v1.0.0, queue.close() will just close the underlying
// redis connection. Since we share the same redis client between, skip this
// step and close the redis client directly in index.ts.
// @see: https://github.com/Openpanel-dev/groupmq/blob/main/src/queue.ts#L1900
// await this.queue.close();
}
}

View file

@ -5,21 +5,21 @@ type ValidResult<T> = {
data: T[];
};
type NotFoundResult = {
type: 'notFound';
value: string;
type WarningResult = {
type: 'warning';
warning: string;
};
type CustomResult<T> = ValidResult<T> | NotFoundResult;
type CustomResult<T> = ValidResult<T> | WarningResult;
export function processPromiseResults<T>(
results: PromiseSettledResult<CustomResult<T>>[],
): {
validItems: T[];
notFoundItems: string[];
warnings: string[];
} {
const validItems: T[] = [];
const notFoundItems: string[] = [];
const warnings: string[] = [];
results.forEach(result => {
if (result.status === 'fulfilled') {
@ -27,14 +27,14 @@ export function processPromiseResults<T>(
if (value.type === 'valid') {
validItems.push(...value.data);
} else {
notFoundItems.push(value.value);
warnings.push(value.warning);
}
}
});
return {
validItems,
notFoundItems,
warnings,
};
}

View file

@ -1,25 +1,33 @@
import { Settings } from "./types.js";
import { CodeHostType } from "@sourcebot/db";
import { env } from "@sourcebot/shared";
import path from "path";
/**
* Default settings.
*/
export const DEFAULT_SETTINGS: Settings = {
maxFileSize: 2 * 1024 * 1024, // 2MB in bytes
maxTrigramCount: 20000,
reindexIntervalMs: 1000 * 60 * 60, // 1 hour
resyncConnectionIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
resyncConnectionPollingIntervalMs: 1000 * 1, // 1 second
reindexRepoPollingIntervalMs: 1000 * 1, // 1 second
maxConnectionSyncJobConcurrency: 8,
maxRepoIndexingJobConcurrency: 8,
maxRepoGarbageCollectionJobConcurrency: 8,
repoGarbageCollectionGracePeriodMs: 10 * 1000, // 10 seconds
repoIndexTimeoutMs: 1000 * 60 * 60 * 2, // 2 hours
enablePublicAccess: false, // deprected, use FORCE_ENABLE_ANONYMOUS_ACCESS instead
experiment_repoDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
experiment_userDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
}
export const SINGLE_TENANT_ORG_ID = 1;
export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES = [
export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES: CodeHostType[] = [
'github',
'gitlab',
];
export const REPOS_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'repos');
export const INDEX_CACHE_DIR = path.join(env.DATA_CACHE_DIR, 'index');
// Maximum time to wait for current job to finish
export const GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS = 5 * 1000; // 5 seconds
// List of shutdown signals
export const SHUTDOWN_SIGNALS: string[] = [
'SIGHUP',
'SIGINT',
'SIGQUIT',
'SIGILL',
'SIGTRAP',
'SIGABRT',
'SIGBUS',
'SIGFPE',
'SIGSEGV',
'SIGUSR2',
'SIGTERM',
// @note: SIGKILL and SIGSTOP cannot have listeners installed.
// @see: https://nodejs.org/api/process.html#signal-events
];

View file

@ -0,0 +1,303 @@
import * as Sentry from "@sentry/node";
import { PrismaClient, AccountPermissionSyncJobStatus, Account} from "@sourcebot/db";
import { env, hasEntitlement, createLogger } from "@sourcebot/shared";
import { Job, Queue, Worker } from "bullmq";
import { Redis } from "ioredis";
import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js";
import { createOctokitFromToken, getReposForAuthenticatedUser } from "../github.js";
import { createGitLabFromOAuthToken, getProjectsForAuthenticatedUser } from "../gitlab.js";
import { Settings } from "../types.js";
import { setIntervalAsync } from "../utils.js";
const LOG_TAG = 'user-permission-syncer';
const logger = createLogger(LOG_TAG);
const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`);
const QUEUE_NAME = 'accountPermissionSyncQueue';
type AccountPermissionSyncJob = {
jobId: string;
}
export class AccountPermissionSyncer {
private queue: Queue<AccountPermissionSyncJob>;
private worker: Worker<AccountPermissionSyncJob>;
private interval?: NodeJS.Timeout;
constructor(
private db: PrismaClient,
private settings: Settings,
redis: Redis,
) {
this.queue = new Queue<AccountPermissionSyncJob>(QUEUE_NAME, {
connection: redis,
});
this.worker = new Worker<AccountPermissionSyncJob>(QUEUE_NAME, this.runJob.bind(this), {
connection: redis,
concurrency: 1,
});
this.worker.on('completed', this.onJobCompleted.bind(this));
this.worker.on('failed', this.onJobFailed.bind(this));
}
public startScheduler() {
if (!hasEntitlement('permission-syncing')) {
throw new Error('Permission syncing is not supported in current plan.');
}
logger.debug('Starting scheduler');
this.interval = setIntervalAsync(async () => {
const thresholdDate = new Date(Date.now() - this.settings.experiment_userDrivenPermissionSyncIntervalMs);
const accounts = await this.db.account.findMany({
where: {
AND: [
{
provider: {
in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES
}
},
{
OR: [
{ permissionSyncedAt: null },
{ permissionSyncedAt: { lt: thresholdDate } },
]
},
{
NOT: {
permissionSyncJobs: {
some: {
OR: [
// Don't schedule if there are active jobs
{
status: {
in: [
AccountPermissionSyncJobStatus.PENDING,
AccountPermissionSyncJobStatus.IN_PROGRESS,
],
}
},
// Don't schedule if there are recent failed jobs (within the threshold date). Note `gt` is used here since this is a inverse condition.
{
AND: [
{ status: AccountPermissionSyncJobStatus.FAILED },
{ completedAt: { gt: thresholdDate } },
]
}
]
}
}
}
},
]
}
});
await this.schedulePermissionSync(accounts);
}, 1000 * 5);
}
public async dispose() {
if (this.interval) {
clearInterval(this.interval);
}
await this.worker.close(/* force = */ true);
await this.queue.close();
}
private async schedulePermissionSync(accounts: Account[]) {
// @note: we don't perform this in a transaction because
// we want to avoid the situation where a job is created and run
// prior to the transaction being committed.
const jobs = await this.db.accountPermissionSyncJob.createManyAndReturn({
data: accounts.map(account => ({
accountId: account.id,
})),
});
await this.queue.addBulk(jobs.map((job) => ({
name: 'accountPermissionSyncJob',
data: {
jobId: job.id,
},
opts: {
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
}
})))
}
private async runJob(job: Job<AccountPermissionSyncJob>) {
const id = job.data.jobId;
const logger = createJobLogger(id);
const { account } = await this.db.accountPermissionSyncJob.update({
where: {
id,
},
data: {
status: AccountPermissionSyncJobStatus.IN_PROGRESS,
},
select: {
account: {
include: {
user: true,
}
}
}
});
logger.info(`Syncing permissions for ${account.provider} account (id: ${account.id}) for user ${account.user.email}...`);
// Get a list of all repos that the user has access to from all connected accounts.
const repoIds = await (async () => {
const aggregatedRepoIds: Set<number> = new Set();
if (account.provider === 'github') {
if (!account.access_token) {
throw new Error(`User '${account.user.email}' does not have an GitHub OAuth access token associated with their GitHub account.`);
}
const { octokit } = await createOctokitFromToken({
token: account.access_token,
url: env.AUTH_EE_GITHUB_BASE_URL,
});
// @note: we only care about the private repos since we don't need to build a mapping
// for public repos.
// @see: packages/web/src/prisma.ts
const githubRepos = await getReposForAuthenticatedUser(/* visibility = */ 'private', octokit);
const gitHubRepoIds = githubRepos.map(repo => repo.id.toString());
const repos = await this.db.repo.findMany({
where: {
external_codeHostType: 'github',
external_id: {
in: gitHubRepoIds,
}
}
});
repos.forEach(repo => aggregatedRepoIds.add(repo.id));
} else if (account.provider === 'gitlab') {
if (!account.access_token) {
throw new Error(`User '${account.user.email}' does not have a GitLab OAuth access token associated with their GitLab account.`);
}
const api = await createGitLabFromOAuthToken({
oauthToken: account.access_token,
url: env.AUTH_EE_GITLAB_BASE_URL,
});
// @note: we only care about the private and internal repos since we don't need to build a mapping
// for public repos.
// @see: packages/web/src/prisma.ts
const privateGitLabProjects = await getProjectsForAuthenticatedUser('private', api);
const internalGitLabProjects = await getProjectsForAuthenticatedUser('internal', api);
const gitLabProjectIds = [
...privateGitLabProjects,
...internalGitLabProjects,
].map(project => project.id.toString());
const repos = await this.db.repo.findMany({
where: {
external_codeHostType: 'gitlab',
external_id: {
in: gitLabProjectIds,
}
}
});
repos.forEach(repo => aggregatedRepoIds.add(repo.id));
}
return Array.from(aggregatedRepoIds);
})();
await this.db.$transaction([
this.db.account.update({
where: {
id: account.id,
},
data: {
accessibleRepos: {
deleteMany: {},
}
}
}),
this.db.accountToRepoPermission.createMany({
data: repoIds.map(repoId => ({
accountId: account.id,
repoId,
})),
skipDuplicates: true,
})
]);
}
private async onJobCompleted(job: Job<AccountPermissionSyncJob>) {
const logger = createJobLogger(job.data.jobId);
const { account } = await this.db.accountPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: AccountPermissionSyncJobStatus.COMPLETED,
account: {
update: {
permissionSyncedAt: new Date(),
},
},
completedAt: new Date(),
},
select: {
account: {
include: {
user: true,
}
}
}
});
logger.info(`Permissions synced for ${account.provider} account (id: ${account.id}) for user ${account.user.email}`);
}
private async onJobFailed(job: Job<AccountPermissionSyncJob> | undefined, err: Error) {
const logger = createJobLogger(job?.data.jobId ?? 'unknown');
Sentry.captureException(err, {
tags: {
jobId: job?.data.jobId,
queue: QUEUE_NAME,
}
});
const errorMessage = (accountId: string, email: string) => `Account permission sync job failed for account (id: ${accountId}) for user ${email}: ${err.message}`;
if (job) {
const { account } = await this.db.accountPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: AccountPermissionSyncJobStatus.FAILED,
completedAt: new Date(),
errorMessage: err.message,
},
select: {
account: {
include: {
user: true,
}
}
}
});
logger.error(errorMessage(account.id, account.user.email ?? 'unknown user (email not found)'));
} else {
logger.error(errorMessage('unknown account (id not found)', 'unknown user (id not found)'));
}
}
}

View file

@ -0,0 +1,115 @@
import { App } from "@octokit/app";
import { getTokenFromConfig } from "@sourcebot/shared";
import { PrismaClient } from "@sourcebot/db";
import { createLogger } from "@sourcebot/shared";
import { GitHubAppConfig } from "@sourcebot/schemas/v3/index.type";
import { env, loadConfig } from "@sourcebot/shared";
const logger = createLogger('githubAppManager');
const GITHUB_DEFAULT_DEPLOYMENT_HOSTNAME = 'github.com';
type Installation = {
id: number;
appId: number;
account: {
login: string;
type: 'organization' | 'user';
};
};
export class GithubAppManager {
private static instance: GithubAppManager | null = null;
private octokitApps: Map<number, App>;
private installationMap: Map<string, Installation>;
private db: PrismaClient | null = null;
private initialized: boolean = false;
private constructor() {
this.octokitApps = new Map<number, App>();
this.installationMap = new Map<string, Installation>();
}
public static getInstance(): GithubAppManager {
if (!GithubAppManager.instance) {
GithubAppManager.instance = new GithubAppManager();
}
return GithubAppManager.instance;
}
private ensureInitialized(): void {
if (!this.initialized) {
throw new Error('GithubAppManager must be initialized before use. Call init() first.');
}
}
public async init(db: PrismaClient) {
this.db = db;
const config = await loadConfig(env.CONFIG_PATH);
if (!config.apps) {
return;
}
const githubApps = config.apps.filter(app => app.type === 'github') as GitHubAppConfig[];
logger.info(`Found ${githubApps.length} GitHub apps in config`);
for (const app of githubApps) {
const deploymentHostname = app.deploymentHostname as string || GITHUB_DEFAULT_DEPLOYMENT_HOSTNAME;
const privateKey = await getTokenFromConfig(app.privateKey);
const octokitApp = new App({
appId: Number(app.id),
privateKey: privateKey,
});
this.octokitApps.set(Number(app.id), octokitApp);
const installations = await octokitApp.octokit.request("GET /app/installations");
logger.info(`Found ${installations.data.length} GitHub App installations for ${deploymentHostname}/${app.id}:`);
for (const installationData of installations.data) {
if (!installationData.account || !installationData.account.login || !installationData.account.type) {
logger.warn(`Skipping installation ${installationData.id}: missing account data (${installationData.account})`);
continue;
}
logger.info(`\tInstallation ID: ${installationData.id}, Account: ${installationData.account.login}, Type: ${installationData.account.type}`);
const owner = installationData.account.login;
const accountType = installationData.account.type.toLowerCase() as 'organization' | 'user';
const installation: Installation = {
id: installationData.id,
appId: Number(app.id),
account: {
login: owner,
type: accountType,
},
};
this.installationMap.set(this.generateMapKey(owner, deploymentHostname), installation);
}
}
this.initialized = true;
}
public async getInstallationToken(owner: string, deploymentHostname: string = GITHUB_DEFAULT_DEPLOYMENT_HOSTNAME): Promise<string> {
this.ensureInitialized();
const key = this.generateMapKey(owner, deploymentHostname);
const installation = this.installationMap.get(key) as Installation | undefined;
if (!installation) {
throw new Error(`GitHub App Installation not found for ${key}`);
}
const octokitApp = this.octokitApps.get(installation.appId) as App;
const installationOctokit = await octokitApp.getInstallationOctokit(installation.id);
const auth = await installationOctokit.auth({ type: "installation" }) as { expires_at: string, token: string };
return auth.token;
}
public appsConfigured() {
return this.octokitApps.size > 0;
}
private generateMapKey(owner: string, deploymentHostname: string): string {
return `${deploymentHostname}/${owner}`;
}
}

View file

@ -1,14 +1,14 @@
import * as Sentry from "@sentry/node";
import { PrismaClient, Repo, RepoPermissionSyncJobStatus } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { hasEntitlement } from "@sourcebot/shared";
import { createLogger } from "@sourcebot/shared";
import { env, hasEntitlement } from "@sourcebot/shared";
import { Job, Queue, Worker } from 'bullmq';
import { Redis } from 'ioredis';
import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js";
import { env } from "../env.js";
import { createOctokitFromToken, getRepoCollaborators } from "../github.js";
import { createOctokitFromToken, getRepoCollaborators, GITHUB_CLOUD_HOSTNAME } from "../github.js";
import { createGitLabFromPersonalAccessToken, getProjectMembers } from "../gitlab.js";
import { Settings } from "../types.js";
import { getAuthCredentialsForRepo } from "../utils.js";
import { getAuthCredentialsForRepo, setIntervalAsync } from "../utils.js";
type RepoPermissionSyncJob = {
jobId: string;
@ -16,8 +16,9 @@ type RepoPermissionSyncJob = {
const QUEUE_NAME = 'repoPermissionSyncQueue';
const logger = createLogger('repo-permission-syncer');
const LOG_TAG = 'repo-permission-syncer';
const logger = createLogger(LOG_TAG);
const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`);
export class RepoPermissionSyncer {
private queue: Queue<RepoPermissionSyncJob>;
@ -47,26 +48,34 @@ export class RepoPermissionSyncer {
logger.debug('Starting scheduler');
this.interval = setInterval(async () => {
this.interval = setIntervalAsync(async () => {
// @todo: make this configurable
const thresholdDate = new Date(Date.now() - this.settings.experiment_repoDrivenPermissionSyncIntervalMs);
const repos = await this.db.repo.findMany({
// Repos need their permissions to be synced against the code host when...
where: {
// They belong to a code host that supports permissions syncing
AND: [
// They are not public. Public repositories are always visible to all users, therefore we don't
// need to explicitly perform permission syncing for them.
// @see: packages/web/src/prisma.ts
{
isPublic: false
},
// They belong to a code host that supports permissions syncing
{
external_codeHostType: {
in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES,
}
},
// They have not been synced within the threshold date.
{
OR: [
{ permissionSyncedAt: null },
{ permissionSyncedAt: { lt: thresholdDate } },
],
},
// There aren't any active or recently failed jobs.
{
NOT: {
permissionSyncJobs: {
@ -101,17 +110,19 @@ export class RepoPermissionSyncer {
}, 1000 * 5);
}
public dispose() {
public async dispose() {
if (this.interval) {
clearInterval(this.interval);
}
this.worker.close();
this.queue.close();
await this.worker.close(/* force = */ true);
await this.queue.close();
}
private async schedulePermissionSync(repos: Repo[]) {
await this.db.$transaction(async (tx) => {
const jobs = await tx.repoPermissionSyncJob.createManyAndReturn({
// @note: we don't perform this in a transaction because
// we want to avoid the situation where a job is created and run
// prior to the transaction being committed.
const jobs = await this.db.repoPermissionSyncJob.createManyAndReturn({
data: repos.map(repo => ({
repoId: repo.id,
})),
@ -127,11 +138,12 @@ export class RepoPermissionSyncer {
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
}
})))
});
}
private async runJob(job: Job<RepoPermissionSyncJob>) {
const id = job.data.jobId;
const logger = createJobLogger(id);
const { repo } = await this.db.repoPermissionSyncJob.update({
where: {
id,
@ -158,16 +170,17 @@ export class RepoPermissionSyncer {
logger.info(`Syncing permissions for repo ${repo.displayName}...`);
const credentials = await getAuthCredentialsForRepo(repo, this.db, logger);
const credentials = await getAuthCredentialsForRepo(repo, logger);
if (!credentials) {
throw new Error(`No credentials found for repo ${id}`);
}
const userIds = await (async () => {
const accountIds = await (async () => {
if (repo.external_codeHostType === 'github') {
const isGitHubCloud = credentials.hostUrl ? new URL(credentials.hostUrl).hostname === GITHUB_CLOUD_HOSTNAME : false;
const { octokit } = await createOctokitFromToken({
token: credentials.token,
url: credentials.hostUrl,
url: isGitHubCloud ? undefined : credentials.hostUrl,
});
// @note: this is a bit of a hack since the displayName _might_ not be set..
@ -189,12 +202,33 @@ export class RepoPermissionSyncer {
in: githubUserIds,
}
},
select: {
userId: true,
});
return accounts.map(account => account.id);
} else if (repo.external_codeHostType === 'gitlab') {
const api = await createGitLabFromPersonalAccessToken({
token: credentials.token,
url: credentials.hostUrl,
});
const projectId = repo.external_id;
if (!projectId) {
throw new Error(`Repo ${id} does not have an external_id`);
}
const members = await getProjectMembers(projectId, api);
const gitlabUserIds = members.map(member => member.id.toString());
const accounts = await this.db.account.findMany({
where: {
provider: 'gitlab',
providerAccountId: {
in: gitlabUserIds,
}
},
});
return accounts.map(account => account.userId);
return accounts.map(account => account.id);
}
return [];
@ -206,14 +240,14 @@ export class RepoPermissionSyncer {
id: repo.id,
},
data: {
permittedUsers: {
permittedAccounts: {
deleteMany: {},
}
}
}),
this.db.userToRepoPermission.createMany({
data: userIds.map(userId => ({
userId,
this.db.accountToRepoPermission.createMany({
data: accountIds.map(accountId => ({
accountId,
repoId: repo.id,
})),
})
@ -221,6 +255,8 @@ export class RepoPermissionSyncer {
}
private async onJobCompleted(job: Job<RepoPermissionSyncJob>) {
const logger = createJobLogger(job.data.jobId);
const { repo } = await this.db.repoPermissionSyncJob.update({
where: {
id: job.data.jobId,
@ -243,6 +279,8 @@ export class RepoPermissionSyncer {
}
private async onJobFailed(job: Job<RepoPermissionSyncJob> | undefined, err: Error) {
const logger = createJobLogger(job?.data.jobId ?? 'unknown');
Sentry.captureException(err, {
tags: {
jobId: job?.data.jobId,

View file

@ -1,8 +1,7 @@
import micromatch from "micromatch";
import { createLogger } from "@sourcebot/logger";
import { createLogger } from "@sourcebot/shared";
import { PrismaClient } from "@sourcebot/db";
import { getPlan, hasEntitlement } from "../entitlements.js";
import { SOURCEBOT_SUPPORT_EMAIL } from "../constants.js";
import { getPlan, hasEntitlement, SOURCEBOT_SUPPORT_EMAIL } from "@sourcebot/shared";
import { SearchContext } from "@sourcebot/schemas/v3/index.type";
const logger = createLogger('sync-search-contexts');

View file

@ -1,266 +0,0 @@
import * as Sentry from "@sentry/node";
import { PrismaClient, User, UserPermissionSyncJobStatus } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { Job, Queue, Worker } from "bullmq";
import { Redis } from "ioredis";
import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js";
import { env } from "../env.js";
import { createOctokitFromToken, getReposForAuthenticatedUser } from "../github.js";
import { hasEntitlement } from "@sourcebot/shared";
import { Settings } from "../types.js";
const logger = createLogger('user-permission-syncer');
const QUEUE_NAME = 'userPermissionSyncQueue';
type UserPermissionSyncJob = {
jobId: string;
}
export class UserPermissionSyncer {
private queue: Queue<UserPermissionSyncJob>;
private worker: Worker<UserPermissionSyncJob>;
private interval?: NodeJS.Timeout;
constructor(
private db: PrismaClient,
private settings: Settings,
redis: Redis,
) {
this.queue = new Queue<UserPermissionSyncJob>(QUEUE_NAME, {
connection: redis,
});
this.worker = new Worker<UserPermissionSyncJob>(QUEUE_NAME, this.runJob.bind(this), {
connection: redis,
concurrency: 1,
});
this.worker.on('completed', this.onJobCompleted.bind(this));
this.worker.on('failed', this.onJobFailed.bind(this));
}
public startScheduler() {
if (!hasEntitlement('permission-syncing')) {
throw new Error('Permission syncing is not supported in current plan.');
}
logger.debug('Starting scheduler');
this.interval = setInterval(async () => {
const thresholdDate = new Date(Date.now() - this.settings.experiment_userDrivenPermissionSyncIntervalMs);
const users = await this.db.user.findMany({
where: {
AND: [
{
accounts: {
some: {
provider: {
in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES
}
}
}
},
{
OR: [
{ permissionSyncedAt: null },
{ permissionSyncedAt: { lt: thresholdDate } },
]
},
{
NOT: {
permissionSyncJobs: {
some: {
OR: [
// Don't schedule if there are active jobs
{
status: {
in: [
UserPermissionSyncJobStatus.PENDING,
UserPermissionSyncJobStatus.IN_PROGRESS,
],
}
},
// Don't schedule if there are recent failed jobs (within the threshold date). Note `gt` is used here since this is a inverse condition.
{
AND: [
{ status: UserPermissionSyncJobStatus.FAILED },
{ completedAt: { gt: thresholdDate } },
]
}
]
}
}
}
},
]
}
});
await this.schedulePermissionSync(users);
}, 1000 * 5);
}
public dispose() {
if (this.interval) {
clearInterval(this.interval);
}
this.worker.close();
this.queue.close();
}
private async schedulePermissionSync(users: User[]) {
await this.db.$transaction(async (tx) => {
const jobs = await tx.userPermissionSyncJob.createManyAndReturn({
data: users.map(user => ({
userId: user.id,
})),
});
await this.queue.addBulk(jobs.map((job) => ({
name: 'userPermissionSyncJob',
data: {
jobId: job.id,
},
opts: {
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
}
})))
});
}
private async runJob(job: Job<UserPermissionSyncJob>) {
const id = job.data.jobId;
const { user } = await this.db.userPermissionSyncJob.update({
where: {
id,
},
data: {
status: UserPermissionSyncJobStatus.IN_PROGRESS,
},
select: {
user: {
include: {
accounts: true,
}
}
}
});
if (!user) {
throw new Error(`User ${id} not found`);
}
logger.info(`Syncing permissions for user ${user.email}...`);
// Get a list of all repos that the user has access to from all connected accounts.
const repoIds = await (async () => {
const aggregatedRepoIds: Set<number> = new Set();
for (const account of user.accounts) {
if (account.provider === 'github') {
if (!account.access_token) {
throw new Error(`User '${user.email}' does not have an GitHub OAuth access token associated with their GitHub account.`);
}
const { octokit } = await createOctokitFromToken({
token: account.access_token,
url: env.AUTH_EE_GITHUB_BASE_URL,
});
// @note: we only care about the private repos since we don't need to build a mapping
// for public repos.
// @see: packages/web/src/prisma.ts
const githubRepos = await getReposForAuthenticatedUser(/* visibility = */ 'private', octokit);
const gitHubRepoIds = githubRepos.map(repo => repo.id.toString());
const repos = await this.db.repo.findMany({
where: {
external_codeHostType: 'github',
external_id: {
in: gitHubRepoIds,
}
}
});
repos.forEach(repo => aggregatedRepoIds.add(repo.id));
}
}
return Array.from(aggregatedRepoIds);
})();
await this.db.$transaction([
this.db.user.update({
where: {
id: user.id,
},
data: {
accessibleRepos: {
deleteMany: {},
}
}
}),
this.db.userToRepoPermission.createMany({
data: repoIds.map(repoId => ({
userId: user.id,
repoId,
})),
skipDuplicates: true,
})
]);
}
private async onJobCompleted(job: Job<UserPermissionSyncJob>) {
const { user } = await this.db.userPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: UserPermissionSyncJobStatus.COMPLETED,
user: {
update: {
permissionSyncedAt: new Date(),
}
},
completedAt: new Date(),
},
select: {
user: true
}
});
logger.info(`Permissions synced for user ${user.email}`);
}
private async onJobFailed(job: Job<UserPermissionSyncJob> | undefined, err: Error) {
Sentry.captureException(err, {
tags: {
jobId: job?.data.jobId,
queue: QUEUE_NAME,
}
});
const errorMessage = (email: string) => `User permission sync job failed for user ${email}: ${err.message}`;
if (job) {
const { user } = await this.db.userPermissionSyncJob.update({
where: {
id: job.data.jobId,
},
data: {
status: UserPermissionSyncJobStatus.FAILED,
completedAt: new Date(),
errorMessage: err.message,
},
select: {
user: true,
}
});
logger.error(errorMessage(user.email ?? user.id));
} else {
logger.error(errorMessage('unknown user (id not found)'));
}
}
}

View file

@ -1,62 +0,0 @@
import { createEnv } from "@t3-oss/env-core";
import { z } from "zod";
import dotenv from 'dotenv';
// Booleans are specified as 'true' or 'false' strings.
const booleanSchema = z.enum(["true", "false"]);
// Numbers are treated as strings in .env files.
// coerce helps us convert them to numbers.
// @see: https://zod.dev/?id=coercion-for-primitives
const numberSchema = z.coerce.number();
dotenv.config({
path: './.env',
});
dotenv.config({
path: './.env.local',
override: true
});
export const env = createEnv({
server: {
SOURCEBOT_ENCRYPTION_KEY: z.string(),
SOURCEBOT_TELEMETRY_DISABLED: booleanSchema.default("false"),
SOURCEBOT_INSTALL_ID: z.string().default("unknown"),
NEXT_PUBLIC_SOURCEBOT_VERSION: z.string().default("unknown"),
DATA_CACHE_DIR: z.string(),
NEXT_PUBLIC_POSTHOG_PAPIK: z.string().optional(),
FALLBACK_GITHUB_CLOUD_TOKEN: z.string().optional(),
FALLBACK_GITLAB_CLOUD_TOKEN: z.string().optional(),
FALLBACK_GITEA_CLOUD_TOKEN: z.string().optional(),
REDIS_URL: z.string().url().default("redis://localhost:6379"),
REDIS_REMOVE_ON_COMPLETE: numberSchema.default(0),
REDIS_REMOVE_ON_FAIL: numberSchema.default(100),
NEXT_PUBLIC_SENTRY_BACKEND_DSN: z.string().optional(),
NEXT_PUBLIC_SENTRY_ENVIRONMENT: z.string().optional(),
LOGTAIL_TOKEN: z.string().optional(),
LOGTAIL_HOST: z.string().url().optional(),
SOURCEBOT_LOG_LEVEL: z.enum(["info", "debug", "warn", "error"]).default("info"),
DATABASE_URL: z.string().url().default("postgresql://postgres:postgres@localhost:5432/postgres"),
CONFIG_PATH: z.string().optional(),
CONNECTION_MANAGER_UPSERT_TIMEOUT_MS: numberSchema.default(300000),
REPO_SYNC_RETRY_BASE_SLEEP_SECONDS: numberSchema.default(60),
GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS: numberSchema.default(60 * 10),
EXPERIMENT_EE_PERMISSION_SYNC_ENABLED: booleanSchema.default('false'),
AUTH_EE_GITHUB_BASE_URL: z.string().optional(),
},
runtimeEnv: process.env,
emptyStringAsUndefined: true,
skipValidation: process.env.SKIP_ENV_VALIDATION === "1",
});

View file

@ -1,11 +1,8 @@
import { GerritConnectionConfig } from "@sourcebot/schemas/v3/index.type";
import { createLogger } from '@sourcebot/shared';
import fetch from 'cross-fetch';
import { GerritConnectionConfig } from "@sourcebot/schemas/v3/index.type"
import { createLogger } from '@sourcebot/logger';
import micromatch from "micromatch";
import { measure, fetchWithRetry } from './utils.js';
import { BackendError } from '@sourcebot/error';
import { BackendException } from '@sourcebot/error';
import * as Sentry from "@sentry/node";
import { fetchWithRetry, measure } from './utils.js';
// https://gerrit-review.googlesource.com/Documentation/rest-api.html
interface GerritProjects {
@ -37,29 +34,12 @@ const logger = createLogger('gerrit');
export const getGerritReposFromConfig = async (config: GerritConnectionConfig): Promise<GerritProject[]> => {
const url = config.url.endsWith('/') ? config.url : `${config.url}/`;
const hostname = new URL(config.url).hostname;
let { durationMs, data: projects } = await measure(async () => {
try {
const fetchFn = () => fetchAllProjects(url);
return fetchWithRetry(fetchFn, `projects from ${url}`, logger);
} catch (err) {
Sentry.captureException(err);
if (err instanceof BackendException) {
throw err;
}
logger.error(`Failed to fetch projects from ${url}`, err);
return null;
}
});
if (!projects) {
const e = new Error(`Failed to fetch projects from ${url}`);
Sentry.captureException(e);
throw e;
}
// include repos by glob if specified in config
if (config.projects) {
projects = projects.filter((project) => {
@ -92,27 +72,9 @@ const fetchAllProjects = async (url: string): Promise<GerritProject[]> => {
logger.debug(`Fetching projects from Gerrit at ${endpointWithParams}`);
let response: Response;
try {
response = await fetch(endpointWithParams);
if (!response.ok) {
logger.error(`Failed to fetch projects from Gerrit at ${endpointWithParams} with status ${response.status}`);
const e = new BackendException(BackendError.CONNECTION_SYNC_FAILED_TO_FETCH_GERRIT_PROJECTS, {
status: response.status,
});
Sentry.captureException(e);
throw e;
}
} catch (err) {
Sentry.captureException(err);
if (err instanceof BackendException) {
throw err;
}
const status = (err as any).code;
logger.error(`Failed to fetch projects from Gerrit at ${endpointWithParams} with status ${status}`);
throw new BackendException(BackendError.CONNECTION_SYNC_FAILED_TO_FETCH_GERRIT_PROJECTS, {
status: status,
});
throw new Error(`Failed to fetch projects from Gerrit at ${endpointWithParams} with status ${response.status}`);
}
const text = await response.text();

View file

@ -1,38 +1,85 @@
import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git';
import { env } from "@sourcebot/shared";
import { existsSync } from 'node:fs';
import { mkdir } from 'node:fs/promises';
import { env } from './env.js';
import { dirname, resolve } from 'node:path';
import { CheckRepoActions, GitConfigScope, simpleGit, SimpleGitProgressEvent } from 'simple-git';
type onProgressFn = (event: SimpleGitProgressEvent) => void;
/**
* Creates a simple-git client that has it's working directory
* set to the given path.
*/
const createGitClientForPath = (path: string, onProgress?: onProgressFn, signal?: AbortSignal) => {
if (!existsSync(path)) {
throw new Error(`Path ${path} does not exist`);
}
const parentPath = resolve(dirname(path));
const git = simpleGit({
progress: onProgress,
abort: signal,
})
.env({
...process.env,
/**
* @note on some inside-baseball on why this is necessary: The specific
* issue we saw was that a `git clone` would fail without throwing, and
* then a subsequent `git config` command would run, but since the clone
* failed, it wouldn't be running in a git directory. Git would then walk
* up the directory tree until it either found a git directory (in the case
* of the development env) or it would hit a GIT_DISCOVERY_ACROSS_FILESYSTEM
* error when trying to cross a filesystem boundary (in the prod case).
* GIT_CEILING_DIRECTORIES ensures that this walk will be limited to the
* parent directory.
*/
GIT_CEILING_DIRECTORIES: parentPath,
/**
* Disable git credential prompts. This ensures that git operations will fail
* immediately if credentials are not available, rather than prompting for input.
*/
GIT_TERMINAL_PROMPT: '0',
})
.cwd({
path,
});
return git;
}
export const cloneRepository = async (
{
cloneUrl,
authHeader,
path,
onProgress,
signal,
}: {
cloneUrl: string,
authHeader?: string,
path: string,
onProgress?: onProgressFn
signal?: AbortSignal
}
) => {
try {
await mkdir(path, { recursive: true });
const git = simpleGit({
progress: onProgress,
}).cwd({
path,
})
const git = createGitClientForPath(path, onProgress, signal);
await git.clone(
cloneUrl,
path,
[
const cloneArgs = [
"--bare",
]
);
...(authHeader ? ["-c", `http.extraHeader=${authHeader}`] : [])
];
await unsetGitConfig(path, ["remote.origin.url"]);
await git.clone(cloneUrl, path, cloneArgs);
await unsetGitConfig({
path,
keys: ["remote.origin.url"],
signal,
});
} catch (error: unknown) {
const baseLog = `Failed to clone repository: ${path}`;
@ -50,20 +97,23 @@ export const cloneRepository = async (
export const fetchRepository = async (
{
cloneUrl,
authHeader,
path,
onProgress,
signal,
}: {
cloneUrl: string,
authHeader?: string,
path: string,
onProgress?: onProgressFn
onProgress?: onProgressFn,
signal?: AbortSignal
}
) => {
const git = createGitClientForPath(path, onProgress, signal);
try {
const git = simpleGit({
progress: onProgress,
}).cwd({
path: path,
})
if (authHeader) {
await git.addConfig("http.extraHeader", authHeader);
}
await git.fetch([
cloneUrl,
@ -81,6 +131,10 @@ export const fetchRepository = async (
} else {
throw new Error(`${baseLog}. Error: ${error}`);
}
} finally {
if (authHeader) {
await git.raw(["config", "--unset", "http.extraHeader", authHeader]);
}
}
}
@ -90,10 +144,19 @@ export const fetchRepository = async (
* that do not exist yet. It will _not_ remove any existing keys that are not
* present in gitConfig.
*/
export const upsertGitConfig = async (path: string, gitConfig: Record<string, string>, onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
}).cwd(path);
export const upsertGitConfig = async (
{
path,
gitConfig,
onProgress,
signal,
}: {
path: string,
gitConfig: Record<string, string>,
onProgress?: onProgressFn,
signal?: AbortSignal
}) => {
const git = createGitClientForPath(path, onProgress, signal);
try {
for (const [key, value] of Object.entries(gitConfig)) {
@ -112,10 +175,19 @@ export const upsertGitConfig = async (path: string, gitConfig: Record<string, st
* Unsets the specified keys in the git config for the repo at the given path.
* If a key is not set, this is a no-op.
*/
export const unsetGitConfig = async (path: string, keys: string[], onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
}).cwd(path);
export const unsetGitConfig = async (
{
path,
keys,
onProgress,
signal,
}: {
path: string,
keys: string[],
onProgress?: onProgressFn,
signal?: AbortSignal
}) => {
const git = createGitClientForPath(path, onProgress, signal);
try {
const configList = await git.listConfig();
@ -138,10 +210,20 @@ export const unsetGitConfig = async (path: string, keys: string[], onProgress?:
/**
* Returns true if `path` is the _root_ of a git repository.
*/
export const isPathAValidGitRepoRoot = async (path: string, onProgress?: onProgressFn) => {
const git = simpleGit({
progress: onProgress,
}).cwd(path);
export const isPathAValidGitRepoRoot = async ({
path,
onProgress,
signal,
}: {
path: string,
onProgress?: onProgressFn,
signal?: AbortSignal
}) => {
if (!existsSync(path)) {
return false;
}
const git = createGitClientForPath(path, onProgress, signal);
try {
return git.checkIsRepo(CheckRepoActions.IS_REPO_ROOT);
@ -167,7 +249,7 @@ export const isUrlAValidGitRepo = async (url: string) => {
}
export const getOriginUrl = async (path: string) => {
const git = simpleGit().cwd(path);
const git = createGitClientForPath(path);
try {
const remotes = await git.getConfig('remote.origin.url', GitConfigScope.local);
@ -182,18 +264,35 @@ export const getOriginUrl = async (path: string) => {
}
export const getBranches = async (path: string) => {
const git = simpleGit();
const branches = await git.cwd({
path,
}).branch();
const git = createGitClientForPath(path);
const branches = await git.branch();
return branches.all;
}
export const getTags = async (path: string) => {
const git = simpleGit();
const tags = await git.cwd({
path,
}).tags();
const git = createGitClientForPath(path);
const tags = await git.tags();
return tags.all;
}
export const getCommitHashForRefName = async ({
path,
refName,
}: {
path: string,
refName: string,
}) => {
const git = createGitClientForPath(path);
try {
// The `^{commit}` suffix is used to fully dereference the ref to a commit hash.
const rev = await git.revparse(`${refName}^{commit}`);
return rev;
// @note: Was hitting errors when the repository is empty,
// so we're catching the error and returning undefined.
} catch (error: unknown) {
console.error(error);
return undefined;
}
}

View file

@ -1,24 +1,24 @@
import { Api, giteaApi, HttpResponse, Repository as GiteaRepository } from 'gitea-js';
import { GiteaConnectionConfig } from '@sourcebot/schemas/v3/gitea.type';
import { getTokenFromConfig, measure } from './utils.js';
import fetch from 'cross-fetch';
import { createLogger } from '@sourcebot/logger';
import micromatch from 'micromatch';
import { PrismaClient } from '@sourcebot/db';
import { processPromiseResults, throwIfAnyFailed } from './connectionUtils.js';
import * as Sentry from "@sentry/node";
import { env } from './env.js';
import { getTokenFromConfig } from "@sourcebot/shared";
import { createLogger } from '@sourcebot/shared';
import { GiteaConnectionConfig } from '@sourcebot/schemas/v3/gitea.type';
import { env } from "@sourcebot/shared";
import fetch from 'cross-fetch';
import { Api, giteaApi, Repository as GiteaRepository, HttpResponse } from 'gitea-js';
import micromatch from 'micromatch';
import { processPromiseResults, throwIfAnyFailed } from './connectionUtils.js';
import { measure } from './utils.js';
const logger = createLogger('gitea');
const GITEA_CLOUD_HOSTNAME = "gitea.com";
export const getGiteaReposFromConfig = async (config: GiteaConnectionConfig, orgId: number, db: PrismaClient) => {
export const getGiteaReposFromConfig = async (config: GiteaConnectionConfig) => {
const hostname = config.url ?
new URL(config.url).hostname :
GITEA_CLOUD_HOSTNAME;
const token = config.token ?
await getTokenFromConfig(config.token, orgId, db, logger) :
await getTokenFromConfig(config.token) :
hostname === GITEA_CLOUD_HOSTNAME ?
env.FALLBACK_GITEA_CLOUD_TOKEN :
undefined;
@ -29,38 +29,30 @@ export const getGiteaReposFromConfig = async (config: GiteaConnectionConfig, org
});
let allRepos: GiteaRepository[] = [];
let notFound: {
users: string[],
orgs: string[],
repos: string[],
} = {
users: [],
orgs: [],
repos: [],
};
let allWarnings: string[] = [];
if (config.orgs) {
const { validRepos, notFoundOrgs } = await getReposForOrgs(config.orgs, api);
allRepos = allRepos.concat(validRepos);
notFound.orgs = notFoundOrgs;
const { repos, warnings } = await getReposForOrgs(config.orgs, api);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
if (config.repos) {
const { validRepos, notFoundRepos } = await getRepos(config.repos, api);
allRepos = allRepos.concat(validRepos);
notFound.repos = notFoundRepos;
const { repos, warnings } = await getRepos(config.repos, api);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
if (config.users) {
const { validRepos, notFoundUsers } = await getReposOwnedByUsers(config.users, api);
allRepos = allRepos.concat(validRepos);
notFound.users = notFoundUsers;
const { repos, warnings } = await getReposOwnedByUsers(config.users, api);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
allRepos = allRepos.filter(repo => repo.full_name !== undefined);
allRepos = allRepos.filter(repo => {
if (repo.full_name === undefined) {
logger.warn(`Repository with undefined full_name found: orgId=${orgId}, repoId=${repo.id}`);
logger.warn(`Repository with undefined full_name found: repoId=${repo.id}`);
return false;
}
return true;
@ -78,8 +70,8 @@ export const getGiteaReposFromConfig = async (config: GiteaConnectionConfig, org
logger.debug(`Found ${repos.length} total repositories.`);
return {
validRepos: repos,
notFound,
repos,
warnings: allWarnings,
};
}
@ -145,10 +137,11 @@ const getReposOwnedByUsers = async <T>(users: string[], api: Api<T>) => {
Sentry.captureException(e);
if (e?.status === 404) {
logger.error(`User ${user} not found or no access`);
const warning = `User ${user} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: user
type: 'warning' as const,
warning
};
}
throw e;
@ -156,11 +149,11 @@ const getReposOwnedByUsers = async <T>(users: string[], api: Api<T>) => {
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundUsers } = processPromiseResults<GiteaRepository>(results);
const { validItems: repos, warnings } = processPromiseResults<GiteaRepository>(results);
return {
validRepos,
notFoundUsers,
repos,
warnings,
};
}
@ -185,10 +178,11 @@ const getReposForOrgs = async <T>(orgs: string[], api: Api<T>) => {
Sentry.captureException(e);
if (e?.status === 404) {
logger.error(`Organization ${org} not found or no access`);
const warning = `Organization ${org} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: org
type: 'warning' as const,
warning
};
}
throw e;
@ -196,16 +190,16 @@ const getReposForOrgs = async <T>(orgs: string[], api: Api<T>) => {
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundOrgs } = processPromiseResults<GiteaRepository>(results);
const { validItems: repos, warnings } = processPromiseResults<GiteaRepository>(results);
return {
validRepos,
notFoundOrgs,
repos,
warnings,
};
}
const getRepos = async <T>(repos: string[], api: Api<T>) => {
const results = await Promise.allSettled(repos.map(async (repo) => {
const getRepos = async <T>(repoList: string[], api: Api<T>) => {
const results = await Promise.allSettled(repoList.map(async (repo) => {
try {
logger.debug(`Fetching repository info for ${repo}...`);
@ -223,10 +217,11 @@ const getRepos = async <T>(repos: string[], api: Api<T>) => {
Sentry.captureException(e);
if (e?.status === 404) {
logger.error(`Repository ${repo} not found or no access`);
const warning = `Repository ${repo} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: repo
type: 'warning' as const,
warning
};
}
throw e;
@ -234,11 +229,11 @@ const getRepos = async <T>(repos: string[], api: Api<T>) => {
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundRepos } = processPromiseResults<GiteaRepository>(results);
const { validItems: repos, warnings } = processPromiseResults<GiteaRepository>(results);
return {
validRepos,
notFoundRepos,
repos,
warnings,
};
}

View file

@ -1,16 +1,21 @@
import { Octokit } from "@octokit/rest";
import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type";
import { createLogger } from "@sourcebot/logger";
import { getTokenFromConfig, measure, fetchWithRetry } from "./utils.js";
import micromatch from "micromatch";
import { PrismaClient } from "@sourcebot/db";
import { BackendException, BackendError } from "@sourcebot/error";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import * as Sentry from "@sentry/node";
import { env } from "./env.js";
import { getTokenFromConfig } from "@sourcebot/shared";
import { createLogger } from "@sourcebot/shared";
import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type";
import { env, hasEntitlement } from "@sourcebot/shared";
import micromatch from "micromatch";
import pLimit from "p-limit";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import { GithubAppManager } from "./ee/githubAppManager.js";
import { fetchWithRetry, measure } from "./utils.js";
export const GITHUB_CLOUD_HOSTNAME = "github.com";
// Limit concurrent GitHub requests to avoid hitting rate limits and overwhelming installations.
const MAX_CONCURRENT_GITHUB_QUERIES = 5;
const githubQueryLimit = pLimit(MAX_CONCURRENT_GITHUB_QUERIES);
const logger = createLogger('github');
const GITHUB_CLOUD_HOSTNAME = "github.com";
export type OctokitRepository = {
name: string,
@ -42,9 +47,10 @@ const isHttpError = (error: unknown, status: number): boolean => {
}
export const createOctokitFromToken = async ({ token, url }: { token?: string, url?: string }): Promise<{ octokit: Octokit, isAuthenticated: boolean }> => {
const isGitHubCloud = url ? new URL(url).hostname === GITHUB_CLOUD_HOSTNAME : false;
const octokit = new Octokit({
auth: token,
...(url ? {
...(url && !isGitHubCloud ? {
baseUrl: `${url}/api/v3`
} : {}),
});
@ -55,13 +61,47 @@ export const createOctokitFromToken = async ({ token, url }: { token?: string, u
};
}
export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient, signal: AbortSignal) => {
/**
* Helper function to get an authenticated Octokit instance using GitHub App if available,
* otherwise falls back to the provided octokit instance.
*/
const getOctokitWithGithubApp = async (
octokit: Octokit,
owner: string,
url: string | undefined,
context: string
): Promise<Octokit> => {
if (!hasEntitlement('github-app') || !GithubAppManager.getInstance().appsConfigured()) {
return octokit;
}
try {
const hostname = url ? new URL(url).hostname : GITHUB_CLOUD_HOSTNAME;
const token = await GithubAppManager.getInstance().getInstallationToken(owner, hostname);
const { octokit: octokitFromToken, isAuthenticated } = await createOctokitFromToken({
token,
url,
});
if (isAuthenticated) {
return octokitFromToken;
} else {
logger.error(`Failed to authenticate with GitHub App for ${context}. Falling back to legacy token resolution.`);
return octokit;
}
} catch (error) {
logger.error(`Error getting GitHub App token for ${context}. Falling back to legacy token resolution.`, error);
return octokit;
}
}
export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, signal: AbortSignal): Promise<{ repos: OctokitRepository[], warnings: string[] }> => {
const hostname = config.url ?
new URL(config.url).hostname :
GITHUB_CLOUD_HOSTNAME;
const token = config.token ?
await getTokenFromConfig(config.token, orgId, db, logger) :
await getTokenFromConfig(config.token) :
hostname === GITHUB_CLOUD_HOSTNAME ?
env.FALLBACK_GITHUB_CLOUD_TOKEN :
undefined;
@ -71,57 +111,36 @@ export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, o
url: config.url,
});
if (isAuthenticated) {
try {
await octokit.rest.users.getAuthenticated();
} catch (error) {
Sentry.captureException(error);
if (isHttpError(error, 401)) {
const e = new BackendException(BackendError.CONNECTION_SYNC_INVALID_TOKEN, {
...(config.token && 'secret' in config.token ? {
secretKey: config.token.secret,
} : {}),
});
Sentry.captureException(e);
throw e;
}
const e = new BackendException(BackendError.CONNECTION_SYNC_SYSTEM_ERROR, {
message: `Failed to authenticate with GitHub`,
});
Sentry.captureException(e);
throw e;
logger.error(`Failed to authenticate with GitHub`, error);
throw error;
}
}
let allRepos: OctokitRepository[] = [];
let notFound: {
users: string[],
orgs: string[],
repos: string[],
} = {
users: [],
orgs: [],
repos: [],
};
let allWarnings: string[] = [];
if (config.orgs) {
const { validRepos, notFoundOrgs } = await getReposForOrgs(config.orgs, octokit, signal);
allRepos = allRepos.concat(validRepos);
notFound.orgs = notFoundOrgs;
const { repos, warnings } = await getReposForOrgs(config.orgs, octokit, signal, config.url);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
if (config.repos) {
const { validRepos, notFoundRepos } = await getRepos(config.repos, octokit, signal);
allRepos = allRepos.concat(validRepos);
notFound.repos = notFoundRepos;
const { repos, warnings } = await getRepos(config.repos, octokit, signal, config.url);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
if (config.users) {
const { validRepos, notFoundUsers } = await getReposOwnedByUsers(config.users, octokit, signal);
allRepos = allRepos.concat(validRepos);
notFound.users = notFoundUsers;
const { repos, warnings } = await getReposOwnedByUsers(config.users, octokit, signal, config.url);
allRepos = allRepos.concat(repos);
allWarnings = allWarnings.concat(warnings);
}
let repos = allRepos
@ -140,8 +159,8 @@ export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, o
logger.debug(`Found ${repos.length} total repositories.`);
return {
validRepos: repos,
notFound,
repos,
warnings: allWarnings,
};
}
@ -178,11 +197,12 @@ export const getReposForAuthenticatedUser = async (visibility: 'all' | 'private'
}
}
const getReposOwnedByUsers = async (users: string[], octokit: Octokit, signal: AbortSignal) => {
const results = await Promise.allSettled(users.map(async (user) => {
const getReposOwnedByUsers = async (users: string[], octokit: Octokit, signal: AbortSignal, url?: string) => {
const results = await Promise.allSettled(users.map((user) => githubQueryLimit(async () => {
try {
logger.debug(`Fetching repository info for user ${user}...`);
const octokitToUse = await getOctokitWithGithubApp(octokit, user, url, `user ${user}`);
const { durationMs, data } = await measure(async () => {
const fetchFn = async () => {
let query = `user:${user}`;
@ -194,7 +214,7 @@ const getReposOwnedByUsers = async (users: string[], octokit: Octokit, signal: A
// the username as a parameter.
// @see: https://github.com/orgs/community/discussions/24382#discussioncomment-3243958
// @see: https://api.github.com/search/repositories?q=user:USERNAME
const searchResults = await octokit.paginate(octokit.rest.search.repos, {
const searchResults = await octokitToUse.paginate(octokitToUse.rest.search.repos, {
q: query,
per_page: 100,
request: {
@ -218,32 +238,34 @@ const getReposOwnedByUsers = async (users: string[], octokit: Octokit, signal: A
logger.error(`Failed to fetch repositories for user ${user}.`, error);
if (isHttpError(error, 404)) {
logger.error(`User ${user} not found or no access`);
const warning = `User ${user} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: user
type: 'warning' as const,
warning
};
}
throw error;
}
}));
})));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundUsers } = processPromiseResults<OctokitRepository>(results);
const { validItems: repos, warnings } = processPromiseResults<OctokitRepository>(results);
return {
validRepos,
notFoundUsers,
repos,
warnings,
};
}
const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSignal) => {
const results = await Promise.allSettled(orgs.map(async (org) => {
const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSignal, url?: string) => {
const results = await Promise.allSettled(orgs.map((org) => githubQueryLimit(async () => {
try {
logger.info(`Fetching repository info for org ${org}...`);
logger.debug(`Fetching repository info for org ${org}...`);
const octokitToUse = await getOctokitWithGithubApp(octokit, org, url, `org ${org}`);
const { durationMs, data } = await measure(async () => {
const fetchFn = () => octokit.paginate(octokit.repos.listForOrg, {
const fetchFn = () => octokitToUse.paginate(octokitToUse.repos.listForOrg, {
org: org,
per_page: 100,
request: {
@ -254,7 +276,7 @@ const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSi
return fetchWithRetry(fetchFn, `org ${org}`, logger);
});
logger.info(`Found ${data.length} in org ${org} in ${durationMs}ms.`);
logger.debug(`Found ${data.length} in org ${org} in ${durationMs}ms.`);
return {
type: 'valid' as const,
data
@ -264,33 +286,35 @@ const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSi
logger.error(`Failed to fetch repositories for org ${org}.`, error);
if (isHttpError(error, 404)) {
logger.error(`Organization ${org} not found or no access`);
const warning = `Organization ${org} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: org
type: 'warning' as const,
warning
};
}
throw error;
}
}));
})));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundOrgs } = processPromiseResults<OctokitRepository>(results);
const { validItems: repos, warnings } = processPromiseResults<OctokitRepository>(results);
return {
validRepos,
notFoundOrgs,
repos,
warnings,
};
}
const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSignal) => {
const results = await Promise.allSettled(repoList.map(async (repo) => {
const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSignal, url?: string) => {
const results = await Promise.allSettled(repoList.map((repo) => githubQueryLimit(async () => {
try {
const [owner, repoName] = repo.split('/');
logger.info(`Fetching repository info for ${repo}...`);
logger.debug(`Fetching repository info for ${repo}...`);
const octokitToUse = await getOctokitWithGithubApp(octokit, owner, url, `repo ${repo}`);
const { durationMs, data: result } = await measure(async () => {
const fetchFn = () => octokit.repos.get({
const fetchFn = () => octokitToUse.repos.get({
owner,
repo: repoName,
request: {
@ -301,7 +325,7 @@ const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSigna
return fetchWithRetry(fetchFn, repo, logger);
});
logger.info(`Found info for repository ${repo} in ${durationMs}ms`);
logger.debug(`Found info for repository ${repo} in ${durationMs}ms`);
return {
type: 'valid' as const,
data: [result.data]
@ -312,22 +336,23 @@ const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSigna
logger.error(`Failed to fetch repository ${repo}.`, error);
if (isHttpError(error, 404)) {
logger.error(`Repository ${repo} not found or no access`);
const warning = `Repository ${repo} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: repo
type: 'warning' as const,
warning
};
}
throw error;
}
}));
})));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundRepos } = processPromiseResults<OctokitRepository>(results);
const { validItems: repos, warnings } = processPromiseResults<OctokitRepository>(results);
return {
validRepos,
notFoundRepos,
repos,
warnings,
};
}

View file

@ -1,47 +1,56 @@
import { Gitlab, ProjectSchema } from "@gitbeaker/rest";
import micromatch from "micromatch";
import { createLogger } from "@sourcebot/logger";
import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type"
import { getTokenFromConfig, measure, fetchWithRetry } from "./utils.js";
import { PrismaClient } from "@sourcebot/db";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import * as Sentry from "@sentry/node";
import { env } from "./env.js";
import { getTokenFromConfig } from "@sourcebot/shared";
import { createLogger } from "@sourcebot/shared";
import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type";
import { env } from "@sourcebot/shared";
import micromatch from "micromatch";
import { processPromiseResults, throwIfAnyFailed } from "./connectionUtils.js";
import { fetchWithRetry, measure } from "./utils.js";
const logger = createLogger('gitlab');
export const GITLAB_CLOUD_HOSTNAME = "gitlab.com";
export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, orgId: number, db: PrismaClient) => {
export const createGitLabFromPersonalAccessToken = async ({ token, url }: { token?: string, url?: string }) => {
const isGitLabCloud = url ? new URL(url).hostname === GITLAB_CLOUD_HOSTNAME : false;
return new Gitlab({
token,
...(isGitLabCloud ? {} : {
host: url,
}),
queryTimeout: env.GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS * 1000,
});
}
export const createGitLabFromOAuthToken = async ({ oauthToken, url }: { oauthToken?: string, url?: string }) => {
const isGitLabCloud = url ? new URL(url).hostname === GITLAB_CLOUD_HOSTNAME : false;
return new Gitlab({
oauthToken,
...(isGitLabCloud ? {} : {
host: url,
}),
queryTimeout: env.GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS * 1000,
});
}
export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig) => {
const hostname = config.url ?
new URL(config.url).hostname :
GITLAB_CLOUD_HOSTNAME;
const token = config.token ?
await getTokenFromConfig(config.token, orgId, db, logger) :
await getTokenFromConfig(config.token) :
hostname === GITLAB_CLOUD_HOSTNAME ?
env.FALLBACK_GITLAB_CLOUD_TOKEN :
undefined;
const api = new Gitlab({
...(token ? {
const api = await createGitLabFromPersonalAccessToken({
token,
} : {}),
...(config.url ? {
host: config.url,
} : {}),
queryTimeout: env.GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS * 1000,
url: config.url,
});
let allRepos: ProjectSchema[] = [];
let notFound: {
orgs: string[],
users: string[],
repos: string[],
} = {
orgs: [],
users: [],
repos: [],
};
let allWarnings: string[] = [];
if (config.all === true) {
if (hostname !== GITLAB_CLOUD_HOSTNAME) {
@ -61,7 +70,9 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
throw e;
}
} else {
logger.warn(`Ignoring option all:true in config : host is ${GITLAB_CLOUD_HOSTNAME}`);
const warning = `Ignoring option all:true in config : host is ${GITLAB_CLOUD_HOSTNAME}`;
logger.warn(warning);
allWarnings = allWarnings.concat(warning);
}
}
@ -87,10 +98,11 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
const status = e?.cause?.response?.status;
if (status === 404) {
logger.error(`Group ${group} not found or no access`);
const warning = `Group ${group} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: group
type: 'warning' as const,
warning
};
}
throw e;
@ -98,9 +110,9 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundOrgs } = processPromiseResults(results);
const { validItems: validRepos, warnings } = processPromiseResults(results);
allRepos = allRepos.concat(validRepos);
notFound.orgs = notFoundOrgs;
allWarnings = allWarnings.concat(warnings);
}
if (config.users) {
@ -124,10 +136,11 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
const status = e?.cause?.response?.status;
if (status === 404) {
logger.error(`User ${user} not found or no access`);
const warning = `User ${user} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: user
type: 'warning' as const,
warning
};
}
throw e;
@ -135,9 +148,9 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundUsers } = processPromiseResults(results);
const { validItems: validRepos, warnings } = processPromiseResults(results);
allRepos = allRepos.concat(validRepos);
notFound.users = notFoundUsers;
allWarnings = allWarnings.concat(warnings);
}
if (config.projects) {
@ -160,10 +173,11 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
const status = e?.cause?.response?.status;
if (status === 404) {
logger.error(`Project ${project} not found or no access`);
const warning = `Project ${project} not found or no access`;
logger.warn(warning);
return {
type: 'notFound' as const,
value: project
type: 'warning' as const,
warning
};
}
throw e;
@ -171,9 +185,9 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
}));
throwIfAnyFailed(results);
const { validItems: validRepos, notFoundItems: notFoundRepos } = processPromiseResults(results);
const { validItems: validRepos, warnings } = processPromiseResults(results);
allRepos = allRepos.concat(validRepos);
notFound.repos = notFoundRepos;
allWarnings = allWarnings.concat(warnings);
}
let repos = allRepos
@ -192,8 +206,8 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
logger.debug(`Found ${repos.length} total repositories.`);
return {
validRepos: repos,
notFound,
repos,
warnings: allWarnings,
};
}
@ -264,3 +278,37 @@ export const shouldExcludeProject = ({
return false;
}
export const getProjectMembers = async (projectId: string, api: InstanceType<typeof Gitlab>) => {
try {
const fetchFn = () => api.ProjectMembers.all(projectId, {
perPage: 100,
includeInherited: true,
});
const members = await fetchWithRetry(fetchFn, `project ${projectId}`, logger);
return members as Array<{ id: number }>;
} catch (error) {
Sentry.captureException(error);
logger.error(`Failed to fetch members for project ${projectId}.`, error);
throw error;
}
}
export const getProjectsForAuthenticatedUser = async (visibility: 'private' | 'internal' | 'public' | 'all' = 'all', api: InstanceType<typeof Gitlab>) => {
try {
const fetchFn = () => api.Projects.all({
membership: true,
...(visibility !== 'all' ? {
visibility,
} : {}),
perPage: 100,
});
const response = await fetchWithRetry(fetchFn, `authenticated user`, logger);
return response;
} catch (error) {
Sentry.captureException(error);
logger.error(`Failed to fetch projects for authenticated user.`, error);
throw error;
}
}

View file

@ -1,41 +1,28 @@
import "./instrument.js";
import * as Sentry from "@sentry/node";
import { PrismaClient } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { hasEntitlement, loadConfig } from '@sourcebot/shared';
import { createLogger, env, getConfigSettings, getDBConnectionString, hasEntitlement } from "@sourcebot/shared";
import 'express-async-errors';
import { existsSync } from 'fs';
import { mkdir } from 'fs/promises';
import { Redis } from 'ioredis';
import path from 'path';
import { Api } from "./api.js";
import { ConfigManager } from "./configManager.js";
import { ConnectionManager } from './connectionManager.js';
import { DEFAULT_SETTINGS } from './constants.js';
import { env } from "./env.js";
import { INDEX_CACHE_DIR, REPOS_CACHE_DIR, SHUTDOWN_SIGNALS } from './constants.js';
import { AccountPermissionSyncer } from "./ee/accountPermissionSyncer.js";
import { GithubAppManager } from "./ee/githubAppManager.js";
import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js';
import { shutdownPosthog } from "./posthog.js";
import { PromClient } from './promClient.js';
import { RepoManager } from './repoManager.js';
import { AppContext } from "./types.js";
import { UserPermissionSyncer } from "./ee/userPermissionSyncer.js";
import { RepoIndexManager } from "./repoIndexManager.js";
const logger = createLogger('backend-entrypoint');
const getSettings = async (configPath?: string) => {
if (!configPath) {
return DEFAULT_SETTINGS;
}
const config = await loadConfig(configPath);
return {
...DEFAULT_SETTINGS,
...config.settings,
}
}
const cacheDir = env.DATA_CACHE_DIR;
const reposPath = path.join(cacheDir, 'repos');
const indexPath = path.join(cacheDir, 'index');
const reposPath = REPOS_CACHE_DIR;
const indexPath = INDEX_CACHE_DIR;
if (!existsSync(reposPath)) {
await mkdir(reposPath, { recursive: true });
@ -44,38 +31,42 @@ if (!existsSync(indexPath)) {
await mkdir(indexPath, { recursive: true });
}
const context: AppContext = {
indexPath,
reposPath,
cachePath: cacheDir,
}
const prisma = new PrismaClient();
const prisma = new PrismaClient({
datasources: {
db: {
url: getDBConnectionString(),
},
},
});
const redis = new Redis(env.REDIS_URL, {
maxRetriesPerRequest: null
});
redis.ping().then(() => {
try {
await redis.ping();
logger.info('Connected to redis');
}).catch((err: unknown) => {
logger.error('Failed to connect to redis');
logger.error(err);
} catch (err: unknown) {
logger.error('Failed to connect to redis. Error:', err);
process.exit(1);
});
}
const promClient = new PromClient();
const settings = await getSettings(env.CONFIG_PATH);
const settings = await getConfigSettings(env.CONFIG_PATH);
const connectionManager = new ConnectionManager(prisma, settings, redis);
const repoManager = new RepoManager(prisma, settings, redis, promClient, context);
if (hasEntitlement('github-app')) {
await GithubAppManager.getInstance().init(prisma);
}
const connectionManager = new ConnectionManager(prisma, settings, redis, promClient);
const repoPermissionSyncer = new RepoPermissionSyncer(prisma, settings, redis);
const userPermissionSyncer = new UserPermissionSyncer(prisma, settings, redis);
await repoManager.validateIndexedReposHaveShards();
const accountPermissionSyncer = new AccountPermissionSyncer(prisma, settings, redis);
const repoIndexManager = new RepoIndexManager(prisma, settings, redis, promClient);
const configManager = new ConfigManager(prisma, connectionManager, env.CONFIG_PATH);
connectionManager.startScheduler();
repoManager.startScheduler();
repoIndexManager.startScheduler();
if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && !hasEntitlement('permission-syncing')) {
logger.error('Permission syncing is not supported in current plan. Please contact team@sourcebot.dev for assistance.');
@ -83,31 +74,77 @@ if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && !hasEntitlement('per
}
else if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && hasEntitlement('permission-syncing')) {
repoPermissionSyncer.startScheduler();
userPermissionSyncer.startScheduler();
accountPermissionSyncer.startScheduler();
}
const cleanup = async (signal: string) => {
logger.info(`Recieved ${signal}, cleaning up...`);
const api = new Api(
promClient,
prisma,
connectionManager,
repoIndexManager,
);
connectionManager.dispose();
repoManager.dispose();
repoPermissionSyncer.dispose();
userPermissionSyncer.dispose();
logger.info('Worker started.');
const listenToShutdownSignals = () => {
const signals = SHUTDOWN_SIGNALS;
let receivedSignal = false;
const cleanup = async (signal: string) => {
try {
if (receivedSignal) {
return;
}
receivedSignal = true;
logger.info(`Received ${signal}, cleaning up...`);
await repoIndexManager.dispose()
await connectionManager.dispose()
await repoPermissionSyncer.dispose()
await accountPermissionSyncer.dispose()
await configManager.dispose()
await prisma.$disconnect();
await redis.quit();
await api.dispose();
await shutdownPosthog();
logger.info('All workers shut down gracefully');
signals.forEach(sig => process.removeListener(sig, cleanup));
return 0;
} catch (error) {
Sentry.captureException(error);
logger.error('Error shutting down worker:', error);
return 1;
}
}
process.on('SIGINT', () => cleanup('SIGINT').finally(() => process.exit(0)));
process.on('SIGTERM', () => cleanup('SIGTERM').finally(() => process.exit(0)));
signals.forEach(signal => {
process.on(signal, (err) => {
cleanup(err).then(code => {
process.exit(code);
});
});
});
// Register handlers for uncaught exceptions and unhandled rejections
process.on('uncaughtException', (err) => {
logger.error(`Uncaught exception: ${err.message}`);
cleanup('uncaughtException').finally(() => process.exit(1));
cleanup('uncaughtException').then(() => {
process.exit(1);
});
});
process.on('unhandledRejection', (reason, promise) => {
logger.error(`Unhandled rejection at: ${promise}, reason: ${reason}`);
cleanup('unhandledRejection').finally(() => process.exit(1));
cleanup('unhandledRejection').then(() => {
process.exit(1);
});
});
}
listenToShutdownSignals();

View file

@ -1,6 +1,6 @@
import * as Sentry from "@sentry/node";
import { env } from "./env.js";
import { createLogger } from "@sourcebot/logger";
import { createLogger } from "@sourcebot/shared";
import { env } from "@sourcebot/shared/client";
const logger = createLogger('instrument');

View file

@ -1,12 +1,13 @@
import { env as clientEnv } from "@sourcebot/shared/client";
import { env } from "@sourcebot/shared";
import { PostHog } from 'posthog-node';
import { PosthogEvent, PosthogEventMap } from './posthogEvents.js';
import { env } from './env.js';
let posthog: PostHog | undefined = undefined;
if (env.NEXT_PUBLIC_POSTHOG_PAPIK) {
if (env.POSTHOG_PAPIK) {
posthog = new PostHog(
env.NEXT_PUBLIC_POSTHOG_PAPIK,
env.POSTHOG_PAPIK,
{
host: "https://us.i.posthog.com",
}
@ -23,9 +24,11 @@ export function captureEvent<E extends PosthogEvent>(event: E, properties: Posth
event: event,
properties: {
...properties,
sourcebot_version: env.NEXT_PUBLIC_SOURCEBOT_VERSION,
sourcebot_version: clientEnv.NEXT_PUBLIC_SOURCEBOT_VERSION,
},
});
}
export async function shutdownPosthog() {
await posthog?.shutdown();
}

View file

@ -1,109 +1,94 @@
import express, { Request, Response } from 'express';
import client, { Registry, Counter, Gauge } from 'prom-client';
import { createLogger } from "@sourcebot/logger";
const logger = createLogger('prometheus-client');
export class PromClient {
private registry: Registry;
private app: express.Application;
public activeRepoIndexingJobs: Gauge<string>;
public pendingRepoIndexingJobs: Gauge<string>;
public repoIndexingReattemptsTotal: Counter<string>;
public repoIndexingFailTotal: Counter<string>;
public repoIndexingSuccessTotal: Counter<string>;
public registry: Registry;
public activeRepoGarbageCollectionJobs: Gauge<string>;
public repoGarbageCollectionErrorTotal: Counter<string>;
public repoGarbageCollectionFailTotal: Counter<string>;
public repoGarbageCollectionSuccessTotal: Counter<string>;
public activeRepoIndexJobs: Gauge<string>;
public pendingRepoIndexJobs: Gauge<string>;
public repoIndexJobReattemptsTotal: Counter<string>;
public repoIndexJobFailTotal: Counter<string>;
public repoIndexJobSuccessTotal: Counter<string>;
public readonly PORT = 3060;
public activeConnectionSyncJobs: Gauge<string>;
public pendingConnectionSyncJobs: Gauge<string>;
public connectionSyncJobReattemptsTotal: Counter<string>;
public connectionSyncJobFailTotal: Counter<string>;
public connectionSyncJobSuccessTotal: Counter<string>;
constructor() {
this.registry = new Registry();
this.activeRepoIndexingJobs = new Gauge({
name: 'active_repo_indexing_jobs',
help: 'The number of repo indexing jobs in progress',
labelNames: ['repo'],
this.activeRepoIndexJobs = new Gauge({
name: 'active_repo_index_jobs',
help: 'The number of repo jobs in progress',
labelNames: ['repo', 'type'],
});
this.registry.registerMetric(this.activeRepoIndexingJobs);
this.registry.registerMetric(this.activeRepoIndexJobs);
this.pendingRepoIndexingJobs = new Gauge({
name: 'pending_repo_indexing_jobs',
help: 'The number of repo indexing jobs waiting in queue',
labelNames: ['repo'],
this.pendingRepoIndexJobs = new Gauge({
name: 'pending_repo_index_jobs',
help: 'The number of repo jobs waiting in queue',
labelNames: ['repo', 'type'],
});
this.registry.registerMetric(this.pendingRepoIndexingJobs);
this.registry.registerMetric(this.pendingRepoIndexJobs);
this.repoIndexingReattemptsTotal = new Counter({
name: 'repo_indexing_reattempts',
help: 'The number of repo indexing reattempts',
labelNames: ['repo'],
this.repoIndexJobReattemptsTotal = new Counter({
name: 'repo_index_job_reattempts',
help: 'The number of repo job reattempts',
labelNames: ['repo', 'type'],
});
this.registry.registerMetric(this.repoIndexingReattemptsTotal);
this.registry.registerMetric(this.repoIndexJobReattemptsTotal);
this.repoIndexingFailTotal = new Counter({
name: 'repo_indexing_fails',
help: 'The number of repo indexing fails',
labelNames: ['repo'],
this.repoIndexJobFailTotal = new Counter({
name: 'repo_index_job_fails',
help: 'The number of repo job fails',
labelNames: ['repo', 'type'],
});
this.registry.registerMetric(this.repoIndexingFailTotal);
this.registry.registerMetric(this.repoIndexJobFailTotal);
this.repoIndexingSuccessTotal = new Counter({
name: 'repo_indexing_successes',
help: 'The number of repo indexing successes',
labelNames: ['repo'],
this.repoIndexJobSuccessTotal = new Counter({
name: 'repo_index_job_successes',
help: 'The number of repo job successes',
labelNames: ['repo', 'type'],
});
this.registry.registerMetric(this.repoIndexingSuccessTotal);
this.registry.registerMetric(this.repoIndexJobSuccessTotal);
this.activeRepoGarbageCollectionJobs = new Gauge({
name: 'active_repo_garbage_collection_jobs',
help: 'The number of repo garbage collection jobs in progress',
labelNames: ['repo'],
this.activeConnectionSyncJobs = new Gauge({
name: 'active_connection_sync_jobs',
help: 'The number of connection sync jobs in progress',
labelNames: ['connection'],
});
this.registry.registerMetric(this.activeRepoGarbageCollectionJobs);
this.registry.registerMetric(this.activeConnectionSyncJobs);
this.repoGarbageCollectionErrorTotal = new Counter({
name: 'repo_garbage_collection_errors',
help: 'The number of repo garbage collection errors',
labelNames: ['repo'],
this.pendingConnectionSyncJobs = new Gauge({
name: 'pending_connection_sync_jobs',
help: 'The number of connection sync jobs waiting in queue',
labelNames: ['connection'],
});
this.registry.registerMetric(this.repoGarbageCollectionErrorTotal);
this.registry.registerMetric(this.pendingConnectionSyncJobs);
this.repoGarbageCollectionFailTotal = new Counter({
name: 'repo_garbage_collection_fails',
help: 'The number of repo garbage collection fails',
labelNames: ['repo'],
this.connectionSyncJobReattemptsTotal = new Counter({
name: 'connection_sync_job_reattempts',
help: 'The number of connection sync job reattempts',
labelNames: ['connection'],
});
this.registry.registerMetric(this.repoGarbageCollectionFailTotal);
this.registry.registerMetric(this.connectionSyncJobReattemptsTotal);
this.repoGarbageCollectionSuccessTotal = new Counter({
name: 'repo_garbage_collection_successes',
help: 'The number of repo garbage collection successes',
labelNames: ['repo'],
this.connectionSyncJobFailTotal = new Counter({
name: 'connection_sync_job_fails',
help: 'The number of connection sync job fails',
labelNames: ['connection'],
});
this.registry.registerMetric(this.repoGarbageCollectionSuccessTotal);
this.registry.registerMetric(this.connectionSyncJobFailTotal);
this.connectionSyncJobSuccessTotal = new Counter({
name: 'connection_sync_job_successes',
help: 'The number of connection sync job successes',
labelNames: ['connection'],
});
this.registry.registerMetric(this.connectionSyncJobSuccessTotal);
client.collectDefaultMetrics({
register: this.registry,
});
this.app = express();
this.app.get('/metrics', async (req: Request, res: Response) => {
res.set('Content-Type', this.registry.contentType);
const metrics = await this.registry.metrics();
res.end(metrics);
});
this.app.listen(this.PORT, () => {
logger.info(`Prometheus metrics server is running on port ${this.PORT}`);
});
}
getRegistry(): Registry {
return this.registry;
}
}

View file

@ -7,39 +7,42 @@ import { BitbucketRepository, getBitbucketReposFromConfig } from "./bitbucket.js
import { getAzureDevOpsReposFromConfig } from "./azuredevops.js";
import { SchemaRestRepository as BitbucketServerRepository } from "@coderabbitai/bitbucket/server/openapi";
import { SchemaRepository as BitbucketCloudRepository } from "@coderabbitai/bitbucket/cloud/openapi";
import { Prisma, PrismaClient } from '@sourcebot/db';
import { CodeHostType, Prisma } from '@sourcebot/db';
import { WithRequired } from "./types.js"
import { marshalBool } from "./utils.js";
import { createLogger } from '@sourcebot/logger';
import { createLogger } from '@sourcebot/shared';
import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js";
import { RepoMetadata } from './types.js';
import path from 'path';
import { glob } from 'glob';
import { getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
import assert from 'assert';
import GitUrlParse from 'git-url-parse';
import { RepoMetadata } from '@sourcebot/shared';
import { SINGLE_TENANT_ORG_ID } from './constants.js';
import pLimit from 'p-limit';
export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;
const logger = createLogger('repo-compile-utils');
// Limit concurrent git operations to prevent resource exhaustion (EAGAIN errors)
// when processing thousands of repositories simultaneously
const MAX_CONCURRENT_GIT_OPERATIONS = 100;
const gitOperationLimit = pLimit(MAX_CONCURRENT_GIT_OPERATIONS);
type CompileResult = {
repoData: RepoData[],
warnings: string[],
}
export const compileGithubConfig = async (
config: GithubConnectionConfig,
connectionId: number,
orgId: number,
db: PrismaClient,
abortController: AbortController): Promise<{
repoData: RepoData[],
notFound: {
users: string[],
orgs: string[],
repos: string[],
}
}> => {
const gitHubReposResult = await getGitHubReposFromConfig(config, orgId, db, abortController.signal);
const gitHubRepos = gitHubReposResult.validRepos;
const notFound = gitHubReposResult.notFound;
signal: AbortSignal): Promise<CompileResult> => {
const gitHubReposResult = await getGitHubReposFromConfig(config, signal);
const gitHubRepos = gitHubReposResult.repos;
const warnings = gitHubReposResult.warnings;
const hostUrl = config.url ?? 'https://github.com';
const repoNameRoot = new URL(hostUrl)
@ -68,7 +71,7 @@ export const compileGithubConfig = async (
isPublic: isPublic,
org: {
connect: {
id: orgId,
id: SINGLE_TENANT_ORG_ID,
},
},
connections: {
@ -100,19 +103,17 @@ export const compileGithubConfig = async (
return {
repoData: repos,
notFound,
warnings,
};
}
export const compileGitlabConfig = async (
config: GitlabConnectionConfig,
connectionId: number,
orgId: number,
db: PrismaClient) => {
connectionId: number): Promise<CompileResult> => {
const gitlabReposResult = await getGitLabReposFromConfig(config, orgId, db);
const gitlabRepos = gitlabReposResult.validRepos;
const notFound = gitlabReposResult.notFound;
const gitlabReposResult = await getGitLabReposFromConfig(config);
const gitlabRepos = gitlabReposResult.repos;
const warnings = gitlabReposResult.warnings;
const hostUrl = config.url ?? 'https://gitlab.com';
const repoNameRoot = new URL(hostUrl)
@ -123,7 +124,6 @@ export const compileGitlabConfig = async (
const projectUrl = `${hostUrl}/${project.path_with_namespace}`;
const cloneUrl = new URL(project.http_url_to_repo);
const isFork = project.forked_from_project !== undefined;
// @todo: we will need to double check whether 'internal' should also be considered public or not.
const isPublic = project.visibility === 'public';
const repoDisplayName = project.path_with_namespace;
const repoName = path.join(repoNameRoot, repoDisplayName);
@ -147,7 +147,7 @@ export const compileGitlabConfig = async (
isArchived: !!project.archived,
org: {
connect: {
id: orgId,
id: SINGLE_TENANT_ORG_ID,
},
},
connections: {
@ -177,19 +177,17 @@ export const compileGitlabConfig = async (
return {
repoData: repos,
notFound,
warnings,
};
}
export const compileGiteaConfig = async (
config: GiteaConnectionConfig,
connectionId: number,
orgId: number,
db: PrismaClient) => {
connectionId: number): Promise<CompileResult> => {
const giteaReposResult = await getGiteaReposFromConfig(config, orgId, db);
const giteaRepos = giteaReposResult.validRepos;
const notFound = giteaReposResult.notFound;
const giteaReposResult = await getGiteaReposFromConfig(config);
const giteaRepos = giteaReposResult.repos;
const warnings = giteaReposResult.warnings;
const hostUrl = config.url ?? 'https://gitea.com';
const repoNameRoot = new URL(hostUrl)
@ -220,7 +218,7 @@ export const compileGiteaConfig = async (
isArchived: !!repo.archived,
org: {
connect: {
id: orgId,
id: SINGLE_TENANT_ORG_ID,
},
},
connections: {
@ -248,14 +246,13 @@ export const compileGiteaConfig = async (
return {
repoData: repos,
notFound,
warnings,
};
}
export const compileGerritConfig = async (
config: GerritConnectionConfig,
connectionId: number,
orgId: number) => {
connectionId: number): Promise<CompileResult> => {
const gerritRepos = await getGerritReposFromConfig(config);
const hostUrl = config.url;
@ -301,7 +298,7 @@ export const compileGerritConfig = async (
isArchived: false,
org: {
connect: {
id: orgId,
id: SINGLE_TENANT_ORG_ID,
},
},
connections: {
@ -329,23 +326,17 @@ export const compileGerritConfig = async (
return {
repoData: repos,
notFound: {
users: [],
orgs: [],
repos: [],
}
warnings: [],
};
}
export const compileBitbucketConfig = async (
config: BitbucketConnectionConfig,
connectionId: number,
orgId: number,
db: PrismaClient) => {
connectionId: number): Promise<CompileResult> => {
const bitbucketReposResult = await getBitbucketReposFromConfig(config, orgId, db);
const bitbucketRepos = bitbucketReposResult.validRepos;
const notFound = bitbucketReposResult.notFound;
const bitbucketReposResult = await getBitbucketReposFromConfig(config);
const bitbucketRepos = bitbucketReposResult.repos;
const warnings = bitbucketReposResult.warnings;
const hostUrl = config.url ?? 'https://bitbucket.org';
const repoNameRoot = new URL(hostUrl)
@ -399,7 +390,7 @@ export const compileBitbucketConfig = async (
const repos = bitbucketRepos.map((repo) => {
const isServer = config.deploymentType === 'server';
const codeHostType = isServer ? 'bitbucket-server' : 'bitbucket-cloud'; // zoekt expects bitbucket-server
const codeHostType: CodeHostType = isServer ? 'bitbucketServer' : 'bitbucketCloud';
const displayName = isServer ? (repo as BitbucketServerRepository).name! : (repo as BitbucketCloudRepository).full_name!;
const externalId = isServer ? (repo as BitbucketServerRepository).id!.toString() : (repo as BitbucketCloudRepository).uuid!;
const isPublic = isServer ? (repo as BitbucketServerRepository).public : (repo as BitbucketCloudRepository).is_private === false;
@ -422,7 +413,7 @@ export const compileBitbucketConfig = async (
isArchived: isArchived,
org: {
connect: {
id: orgId,
id: SINGLE_TENANT_ORG_ID,
},
},
connections: {
@ -432,7 +423,8 @@ export const compileBitbucketConfig = async (
},
metadata: {
gitConfig: {
'zoekt.web-url-type': codeHostType,
// zoekt expects bitbucket-server and bitbucket-cloud
'zoekt.web-url-type': codeHostType === 'bitbucketServer' ? 'bitbucket-server' : 'bitbucket-cloud',
'zoekt.web-url': webUrl,
'zoekt.name': repoName,
'zoekt.archived': marshalBool(isArchived),
@ -450,21 +442,20 @@ export const compileBitbucketConfig = async (
return {
repoData: repos,
notFound,
warnings,
};
}
export const compileGenericGitHostConfig = async (
config: GenericGitHostConnectionConfig,
connectionId: number,
orgId: number,
) => {
connectionId: number
): Promise<CompileResult> => {
const configUrl = new URL(config.url);
if (configUrl.protocol === 'file:') {
return compileGenericGitHostConfig_file(config, orgId, connectionId);
return compileGenericGitHostConfig_file(config, connectionId);
}
else if (configUrl.protocol === 'http:' || configUrl.protocol === 'https:') {
return compileGenericGitHostConfig_url(config, orgId, connectionId);
return compileGenericGitHostConfig_url(config, connectionId);
}
else {
// Schema should prevent this, but throw an error just in case.
@ -474,9 +465,8 @@ export const compileGenericGitHostConfig = async (
export const compileGenericGitHostConfig_file = async (
config: GenericGitHostConnectionConfig,
orgId: number,
connectionId: number,
) => {
): Promise<CompileResult> => {
const configUrl = new URL(config.url);
assert(configUrl.protocol === 'file:', 'config.url must be a file:// URL');
@ -486,28 +476,24 @@ export const compileGenericGitHostConfig_file = async (
});
const repos: RepoData[] = [];
const notFound: {
users: string[],
orgs: string[],
repos: string[],
} = {
users: [],
orgs: [],
repos: [],
};
const warnings: string[] = [];
await Promise.all(repoPaths.map(async (repoPath) => {
const isGitRepo = await isPathAValidGitRepoRoot(repoPath);
await Promise.all(repoPaths.map((repoPath) => gitOperationLimit(async () => {
const isGitRepo = await isPathAValidGitRepoRoot({
path: repoPath,
});
if (!isGitRepo) {
logger.warn(`Skipping ${repoPath} - not a git repository.`);
notFound.repos.push(repoPath);
const warning = `Skipping ${repoPath} - not a git repository.`;
logger.warn(warning);
warnings.push(warning);
return;
}
const origin = await getOriginUrl(repoPath);
if (!origin) {
logger.warn(`Skipping ${repoPath} - remote.origin.url not found in git config.`);
notFound.repos.push(repoPath);
const warning = `Skipping ${repoPath} - remote.origin.url not found in git config.`;
logger.warn(warning);
warnings.push(warning);
return;
}
@ -518,7 +504,7 @@ export const compileGenericGitHostConfig_file = async (
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
const repo: RepoData = {
external_codeHostType: 'generic-git-host',
external_codeHostType: 'genericGitHost',
external_codeHostUrl: remoteUrl.resource,
external_id: remoteUrl.toString(),
cloneUrl: `file://${repoPath}`,
@ -528,7 +514,7 @@ export const compileGenericGitHostConfig_file = async (
isArchived: false,
org: {
connect: {
id: orgId,
id: SINGLE_TENANT_ORG_ID,
},
},
connections: {
@ -546,40 +532,33 @@ export const compileGenericGitHostConfig_file = async (
}
repos.push(repo);
}));
})));
return {
repoData: repos,
notFound,
warnings,
}
}
export const compileGenericGitHostConfig_url = async (
config: GenericGitHostConnectionConfig,
orgId: number,
connectionId: number,
) => {
): Promise<CompileResult> => {
const remoteUrl = new URL(config.url);
assert(remoteUrl.protocol === 'http:' || remoteUrl.protocol === 'https:', 'config.url must be a http:// or https:// URL');
const notFound: {
users: string[],
orgs: string[],
repos: string[],
} = {
users: [],
orgs: [],
repos: [],
};
const warnings: string[] = [];
// Validate that we are dealing with a valid git repo.
const isGitRepo = await isUrlAValidGitRepo(remoteUrl.toString());
if (!isGitRepo) {
notFound.repos.push(remoteUrl.toString());
const warning = `Skipping ${remoteUrl.toString()} - not a git repository.`;
logger.warn(warning);
warnings.push(warning);
return {
repoData: [],
notFound,
warnings,
}
}
@ -588,7 +567,7 @@ export const compileGenericGitHostConfig_url = async (
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
const repo: RepoData = {
external_codeHostType: 'generic-git-host',
external_codeHostType: 'genericGitHost',
external_codeHostUrl: remoteUrl.origin,
external_id: remoteUrl.toString(),
cloneUrl: remoteUrl.toString(),
@ -598,7 +577,7 @@ export const compileGenericGitHostConfig_url = async (
isArchived: false,
org: {
connect: {
id: orgId,
id: SINGLE_TENANT_ORG_ID,
},
},
connections: {
@ -614,20 +593,17 @@ export const compileGenericGitHostConfig_url = async (
return {
repoData: [repo],
notFound,
warnings,
}
}
export const compileAzureDevOpsConfig = async (
config: AzureDevOpsConnectionConfig,
connectionId: number,
orgId: number,
db: PrismaClient,
abortController: AbortController) => {
connectionId: number): Promise<CompileResult> => {
const azureDevOpsReposResult = await getAzureDevOpsReposFromConfig(config, orgId, db);
const azureDevOpsRepos = azureDevOpsReposResult.validRepos;
const notFound = azureDevOpsReposResult.notFound;
const azureDevOpsReposResult = await getAzureDevOpsReposFromConfig(config);
const azureDevOpsRepos = azureDevOpsReposResult.repos;
const warnings = azureDevOpsReposResult.warnings;
const hostUrl = config.url ?? 'https://dev.azure.com';
const repoNameRoot = new URL(hostUrl)
@ -669,7 +645,7 @@ export const compileAzureDevOpsConfig = async (
isPublic: isPublic,
org: {
connect: {
id: orgId,
id: SINGLE_TENANT_ORG_ID,
},
},
connections: {
@ -697,6 +673,6 @@ export const compileAzureDevOpsConfig = async (
return {
repoData: repos,
notFound,
warnings,
};
}

View file

@ -0,0 +1,612 @@
import * as Sentry from '@sentry/node';
import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db";
import { createLogger, Logger } from "@sourcebot/shared";
import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema } from '@sourcebot/shared';
import { existsSync } from 'fs';
import { readdir, rm } from 'fs/promises';
import { Job, Queue, ReservedJob, Worker } from "groupmq";
import { Redis } from 'ioredis';
import micromatch from 'micromatch';
import { GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS, INDEX_CACHE_DIR } from './constants.js';
import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, getTags, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
import { captureEvent } from './posthog.js';
import { PromClient } from './promClient.js';
import { RepoWithConnections, Settings } from "./types.js";
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
import { indexGitRepository } from './zoekt.js';
const LOG_TAG = 'repo-index-manager';
const logger = createLogger(LOG_TAG);
const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`);
type JobPayload = {
type: 'INDEX' | 'CLEANUP';
jobId: string;
repoId: number;
repoName: string;
};
/**
* Manages the lifecycle of repository data on disk, including git working copies
* and search index shards. Handles both indexing operations (cloning/fetching repos
* and building search indexes) and cleanup operations (removing orphaned repos and
* their associated data).
*
* Uses a job queue system to process indexing and cleanup tasks asynchronously,
* with configurable concurrency limits and retry logic. Automatically schedules
* re-indexing of repos based on configured intervals and manages garbage collection
* of repos that are no longer connected to any source.
*/
export class RepoIndexManager {
private interval?: NodeJS.Timeout;
private queue: Queue<JobPayload>;
private worker: Worker<JobPayload>;
constructor(
private db: PrismaClient,
private settings: Settings,
private redis: Redis,
private promClient: PromClient,
) {
this.queue = new Queue<JobPayload>({
redis,
namespace: 'repo-index-queue',
jobTimeoutMs: this.settings.repoIndexTimeoutMs,
maxAttempts: 3,
logger: env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true',
});
this.worker = new Worker<JobPayload>({
queue: this.queue,
maxStalledCount: 1,
handler: this.runJob.bind(this),
concurrency: this.settings.maxRepoIndexingJobConcurrency,
...(env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true' ? {
logger: true,
} : {}),
});
this.worker.on('completed', this.onJobCompleted.bind(this));
this.worker.on('failed', this.onJobFailed.bind(this));
this.worker.on('stalled', this.onJobStalled.bind(this));
this.worker.on('error', this.onWorkerError.bind(this));
// graceful-timeout is triggered when a job is still processing after
// worker.close() is called and the timeout period has elapsed. In this case,
// we fail the job with no retry.
this.worker.on('graceful-timeout', this.onJobGracefulTimeout.bind(this));
}
public startScheduler() {
logger.debug('Starting scheduler');
this.interval = setIntervalAsync(async () => {
await this.scheduleIndexJobs();
await this.scheduleCleanupJobs();
}, this.settings.reindexRepoPollingIntervalMs);
this.worker.run();
}
private async scheduleIndexJobs() {
const thresholdDate = new Date(Date.now() - this.settings.reindexIntervalMs);
const timeoutDate = new Date(Date.now() - this.settings.repoIndexTimeoutMs);
const reposToIndex = await this.db.repo.findMany({
where: {
AND: [
{
OR: [
{ indexedAt: null },
{ indexedAt: { lt: thresholdDate } },
]
},
{
NOT: {
jobs: {
some: {
AND: [
{
type: RepoIndexingJobType.INDEX,
},
{
OR: [
// Don't schedule if there are active jobs that were created within the threshold date.
// This handles the case where a job is stuck in a pending state and will never be scheduled.
{
AND: [
{
status: {
in: [
RepoIndexingJobStatus.PENDING,
RepoIndexingJobStatus.IN_PROGRESS,
]
},
},
{
createdAt: {
gt: timeoutDate,
}
}
]
},
// Don't schedule if there are recent failed jobs (within the threshold date).
{
AND: [
{ status: RepoIndexingJobStatus.FAILED },
{ completedAt: { gt: thresholdDate } },
]
}
]
}
]
}
}
}
}
],
},
});
if (reposToIndex.length > 0) {
await this.createJobs(reposToIndex, RepoIndexingJobType.INDEX);
}
}
private async scheduleCleanupJobs() {
const gcGracePeriodMs = new Date(Date.now() - this.settings.repoGarbageCollectionGracePeriodMs);
const timeoutDate = new Date(Date.now() - this.settings.repoIndexTimeoutMs);
const reposToCleanup = await this.db.repo.findMany({
where: {
connections: {
none: {}
},
OR: [
{ indexedAt: null },
{ indexedAt: { lt: gcGracePeriodMs } },
],
NOT: {
jobs: {
some: {
AND: [
{
type: RepoIndexingJobType.CLEANUP,
},
{
status: {
in: [
RepoIndexingJobStatus.PENDING,
RepoIndexingJobStatus.IN_PROGRESS,
]
},
},
{
createdAt: {
gt: timeoutDate,
}
}
]
}
}
}
}
});
if (reposToCleanup.length > 0) {
await this.createJobs(reposToCleanup, RepoIndexingJobType.CLEANUP);
}
}
public async createJobs(repos: Repo[], type: RepoIndexingJobType) {
// @note: we don't perform this in a transaction because
// we want to avoid the situation where a job is created and run
// prior to the transaction being committed.
const jobs = await this.db.repoIndexingJob.createManyAndReturn({
data: repos.map(repo => ({
type,
repoId: repo.id,
})),
include: {
repo: true,
}
});
for (const job of jobs) {
await this.queue.add({
groupId: `repo:${job.repoId}`,
data: {
jobId: job.id,
type,
repoName: job.repo.name,
repoId: job.repo.id,
},
jobId: job.id,
});
const jobTypeLabel = getJobTypePrometheusLabel(type);
this.promClient.pendingRepoIndexJobs.inc({ repo: job.repo.name, type: jobTypeLabel });
}
return jobs.map(job => job.id);
}
private async runJob(job: ReservedJob<JobPayload>) {
const id = job.data.jobId;
const logger = createJobLogger(id);
logger.info(`Running ${job.data.type} job ${id} for repo ${job.data.repoName} (id: ${job.data.repoId}) (attempt ${job.attempts + 1} / ${job.maxAttempts})`);
const currentStatus = await this.db.repoIndexingJob.findUniqueOrThrow({
where: {
id,
},
select: {
status: true,
}
});
// Fail safe: if the job is not PENDING (first run) or IN_PROGRESS (retry), it indicates the job
// is in an invalid state and should be skipped.
if (
currentStatus.status !== RepoIndexingJobStatus.PENDING &&
currentStatus.status !== RepoIndexingJobStatus.IN_PROGRESS
) {
throw new Error(`Job ${id} is not in a valid state. Expected: ${RepoIndexingJobStatus.PENDING} or ${RepoIndexingJobStatus.IN_PROGRESS}. Actual: ${currentStatus.status}. Skipping.`);
}
const { repo, type: jobType } = await this.db.repoIndexingJob.update({
where: {
id,
},
data: {
status: RepoIndexingJobStatus.IN_PROGRESS,
},
select: {
type: true,
repo: {
include: {
connections: {
include: {
connection: true,
}
}
}
}
}
});
const jobTypeLabel = getJobTypePrometheusLabel(jobType);
this.promClient.pendingRepoIndexJobs.dec({ repo: job.data.repoName, type: jobTypeLabel });
this.promClient.activeRepoIndexJobs.inc({ repo: job.data.repoName, type: jobTypeLabel });
const abortController = new AbortController();
const signalHandler = () => {
logger.info(`Received shutdown signal, aborting...`);
abortController.abort(); // This cancels all operations
};
process.on('SIGTERM', signalHandler);
process.on('SIGINT', signalHandler);
try {
if (jobType === RepoIndexingJobType.INDEX) {
const revisions = await this.indexRepository(repo, logger, abortController.signal);
await this.db.repoIndexingJob.update({
where: { id },
data: {
metadata: {
indexedRevisions: revisions,
} satisfies RepoIndexingJobMetadata,
},
});
} else if (jobType === RepoIndexingJobType.CLEANUP) {
await this.cleanupRepository(repo, logger);
}
} finally {
process.off('SIGTERM', signalHandler);
process.off('SIGINT', signalHandler);
}
}
private async indexRepository(repo: RepoWithConnections, logger: Logger, signal: AbortSignal) {
const { path: repoPath, isReadOnly } = getRepoPath(repo);
const metadata = repoMetadataSchema.parse(repo.metadata);
const credentials = await getAuthCredentialsForRepo(repo);
const cloneUrlMaybeWithToken = credentials?.cloneUrlWithToken ?? repo.cloneUrl;
const authHeader = credentials?.authHeader ?? undefined;
// If the repo path exists but it is not a valid git repository root, this indicates
// that the repository is in a bad state. To fix, we remove the directory and perform
// a fresh clone.
if (existsSync(repoPath) && !(await isPathAValidGitRepoRoot({ path: repoPath }))) {
const isValidGitRepo = await isPathAValidGitRepoRoot({
path: repoPath,
signal,
});
if (!isValidGitRepo && !isReadOnly) {
logger.warn(`${repoPath} is not a valid git repository root. Deleting directory and performing fresh clone.`);
await rm(repoPath, { recursive: true, force: true });
}
}
if (existsSync(repoPath) && !isReadOnly) {
// @NOTE: in #483, we changed the cloning method s.t., we _no longer_
// write the clone URL (which could contain a auth token) to the
// `remote.origin.url` entry. For the upgrade scenario, we want
// to unset this key since it is no longer needed, hence this line.
// This will no-op if the key is already unset.
// @see: https://github.com/sourcebot-dev/sourcebot/pull/483
await unsetGitConfig({
path: repoPath,
keys: ["remote.origin.url"],
signal,
});
logger.info(`Fetching ${repo.name} (id: ${repo.id})...`);
const { durationMs } = await measure(() => fetchRepository({
cloneUrl: cloneUrlMaybeWithToken,
authHeader,
path: repoPath,
onProgress: ({ method, stage, progress }) => {
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.name} (id: ${repo.id})`)
},
signal,
}));
const fetchDuration_s = durationMs / 1000;
process.stdout.write('\n');
logger.info(`Fetched ${repo.name} (id: ${repo.id}) in ${fetchDuration_s}s`);
} else if (!isReadOnly) {
logger.info(`Cloning ${repo.name} (id: ${repo.id})...`);
const { durationMs } = await measure(() => cloneRepository({
cloneUrl: cloneUrlMaybeWithToken,
authHeader,
path: repoPath,
onProgress: ({ method, stage, progress }) => {
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.name} (id: ${repo.id})`)
},
signal
}));
const cloneDuration_s = durationMs / 1000;
process.stdout.write('\n');
logger.info(`Cloned ${repo.name} (id: ${repo.id}) in ${cloneDuration_s}s`);
}
// Regardless of clone or fetch, always upsert the git config for the repo.
// This ensures that the git config is always up to date for whatever we
// have in the DB.
if (metadata.gitConfig && !isReadOnly) {
await upsertGitConfig({
path: repoPath,
gitConfig: metadata.gitConfig,
signal,
});
}
let revisions = [
'HEAD'
];
if (metadata.branches) {
const branchGlobs = metadata.branches
const allBranches = await getBranches(repoPath);
const matchingBranches =
allBranches
.filter((branch) => micromatch.isMatch(branch, branchGlobs))
.map((branch) => `refs/heads/${branch}`);
revisions = [
...revisions,
...matchingBranches
];
}
if (metadata.tags) {
const tagGlobs = metadata.tags;
const allTags = await getTags(repoPath);
const matchingTags =
allTags
.filter((tag) => micromatch.isMatch(tag, tagGlobs))
.map((tag) => `refs/tags/${tag}`);
revisions = [
...revisions,
...matchingTags
];
}
// zoekt has a limit of 64 branches/tags to index.
if (revisions.length > 64) {
logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`);
captureEvent('backend_revisions_truncated', {
repoId: repo.id,
revisionCount: revisions.length,
});
revisions = revisions.slice(0, 64);
}
logger.info(`Indexing ${repo.name} (id: ${repo.id})...`);
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
return revisions;
}
private async cleanupRepository(repo: Repo, logger: Logger) {
const { path: repoPath, isReadOnly } = getRepoPath(repo);
if (existsSync(repoPath) && !isReadOnly) {
logger.info(`Deleting repo directory ${repoPath}`);
await rm(repoPath, { recursive: true, force: true });
}
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const files = (await readdir(INDEX_CACHE_DIR)).filter(file => file.startsWith(shardPrefix));
for (const file of files) {
const filePath = `${INDEX_CACHE_DIR}/${file}`;
logger.info(`Deleting shard file ${filePath}`);
await rm(filePath, { force: true });
}
}
private onJobCompleted = async (job: Job<JobPayload>) =>
groupmqLifecycleExceptionWrapper('onJobCompleted', logger, async () => {
const logger = createJobLogger(job.data.jobId);
const jobData = await this.db.repoIndexingJob.update({
where: { id: job.data.jobId },
data: {
status: RepoIndexingJobStatus.COMPLETED,
completedAt: new Date(),
},
include: {
repo: true,
}
});
const jobTypeLabel = getJobTypePrometheusLabel(jobData.type);
if (jobData.type === RepoIndexingJobType.INDEX) {
const { path: repoPath } = getRepoPath(jobData.repo);
const commitHash = await getCommitHashForRefName({
path: repoPath,
refName: 'HEAD',
});
const jobMetadata = repoIndexingJobMetadataSchema.parse(jobData.metadata);
const repo = await this.db.repo.update({
where: { id: jobData.repoId },
data: {
indexedAt: new Date(),
indexedCommitHash: commitHash,
metadata: {
...(jobData.repo.metadata as RepoMetadata),
indexedRevisions: jobMetadata.indexedRevisions,
} satisfies RepoMetadata,
}
});
logger.info(`Completed index job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id})`);
}
else if (jobData.type === RepoIndexingJobType.CLEANUP) {
const repo = await this.db.repo.delete({
where: { id: jobData.repoId },
});
logger.info(`Completed cleanup job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id})`);
}
// Track metrics for successful job
this.promClient.activeRepoIndexJobs.dec({ repo: job.data.repoName, type: jobTypeLabel });
this.promClient.repoIndexJobSuccessTotal.inc({ repo: job.data.repoName, type: jobTypeLabel });
});
private onJobFailed = async (job: Job<JobPayload>) =>
groupmqLifecycleExceptionWrapper('onJobFailed', logger, async () => {
const logger = createJobLogger(job.data.jobId);
const attempt = job.attemptsMade + 1;
const wasLastAttempt = attempt >= job.opts.attempts;
const jobTypeLabel = getJobTypePrometheusLabel(job.data.type);
if (wasLastAttempt) {
const { repo } = await this.db.repoIndexingJob.update({
where: { id: job.data.jobId },
data: {
status: RepoIndexingJobStatus.FAILED,
completedAt: new Date(),
errorMessage: job.failedReason,
},
select: { repo: true }
});
this.promClient.activeRepoIndexJobs.dec({ repo: job.data.repoName, type: jobTypeLabel });
this.promClient.repoIndexJobFailTotal.inc({ repo: job.data.repoName, type: jobTypeLabel });
logger.error(`Failed job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id}). Attempt ${attempt} / ${job.opts.attempts}. Failing job.`);
} else {
const repo = await this.db.repo.findUniqueOrThrow({
where: { id: job.data.repoId },
});
this.promClient.repoIndexJobReattemptsTotal.inc({ repo: job.data.repoName, type: jobTypeLabel });
logger.warn(`Failed job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id}). Attempt ${attempt} / ${job.opts.attempts}. Retrying.`);
}
});
private onJobStalled = async (jobId: string) =>
groupmqLifecycleExceptionWrapper('onJobStalled', logger, async () => {
const logger = createJobLogger(jobId);
const { repo, type } = await this.db.repoIndexingJob.update({
where: { id: jobId },
data: {
status: RepoIndexingJobStatus.FAILED,
completedAt: new Date(),
errorMessage: 'Job stalled',
},
select: { repo: true, type: true }
});
const jobTypeLabel = getJobTypePrometheusLabel(type);
this.promClient.activeRepoIndexJobs.dec({ repo: repo.name, type: jobTypeLabel });
this.promClient.repoIndexJobFailTotal.inc({ repo: repo.name, type: jobTypeLabel });
logger.error(`Job ${jobId} stalled for repo ${repo.name} (id: ${repo.id})`);
});
private onJobGracefulTimeout = async (job: Job<JobPayload>) =>
groupmqLifecycleExceptionWrapper('onJobGracefulTimeout', logger, async () => {
const logger = createJobLogger(job.data.jobId);
const jobTypeLabel = getJobTypePrometheusLabel(job.data.type);
const { repo } = await this.db.repoIndexingJob.update({
where: { id: job.data.jobId },
data: {
status: RepoIndexingJobStatus.FAILED,
completedAt: new Date(),
errorMessage: 'Job timed out',
},
select: { repo: true }
});
this.promClient.activeRepoIndexJobs.dec({ repo: job.data.repoName, type: jobTypeLabel });
this.promClient.repoIndexJobFailTotal.inc({ repo: job.data.repoName, type: jobTypeLabel });
logger.error(`Job ${job.data.jobId} timed out for repo ${repo.name} (id: ${repo.id}). Failing job.`);
});
private async onWorkerError(error: Error) {
Sentry.captureException(error);
logger.error(`Index syncer worker error.`, error);
}
public async dispose() {
if (this.interval) {
clearInterval(this.interval);
}
const inProgressJobs = this.worker.getCurrentJobs();
await this.worker.close(GROUPMQ_WORKER_STOP_GRACEFUL_TIMEOUT_MS);
// Manually release group locks for in progress jobs to prevent deadlocks.
// @see: https://github.com/Openpanel-dev/groupmq/issues/8
for (const { job } of inProgressJobs) {
const lockKey = `groupmq:repo-index-queue:lock:${job.groupId}`;
logger.debug(`Releasing group lock ${lockKey} for in progress job ${job.id}`);
await this.redis.del(lockKey);
}
// @note: As of groupmq v1.0.0, queue.close() will just close the underlying
// redis connection. Since we share the same redis client between, skip this
// step and close the redis client directly in index.ts.
// await this.queue.close();
}
}
const getJobTypePrometheusLabel = (type: RepoIndexingJobType) => type === RepoIndexingJobType.INDEX ? 'index' : 'cleanup';

View file

@ -1,563 +0,0 @@
import * as Sentry from "@sentry/node";
import { PrismaClient, Repo, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { Job, Queue, Worker } from 'bullmq';
import { existsSync, promises, readdirSync } from 'fs';
import { Redis } from 'ioredis';
import { env } from './env.js';
import { cloneRepository, fetchRepository, unsetGitConfig, upsertGitConfig } from "./git.js";
import { PromClient } from './promClient.js';
import { AppContext, RepoWithConnections, Settings, repoMetadataSchema } from "./types.js";
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, measure } from "./utils.js";
import { indexGitRepository } from "./zoekt.js";
const REPO_INDEXING_QUEUE = 'repoIndexingQueue';
const REPO_GC_QUEUE = 'repoGarbageCollectionQueue';
type RepoIndexingPayload = {
repo: RepoWithConnections,
}
type RepoGarbageCollectionPayload = {
repo: Repo,
}
const logger = createLogger('repo-manager');
export class RepoManager {
private indexWorker: Worker;
private indexQueue: Queue<RepoIndexingPayload>;
private gcWorker: Worker;
private gcQueue: Queue<RepoGarbageCollectionPayload>;
private interval?: NodeJS.Timeout;
constructor(
private db: PrismaClient,
private settings: Settings,
redis: Redis,
private promClient: PromClient,
private ctx: AppContext,
) {
// Repo indexing
this.indexQueue = new Queue<RepoIndexingPayload>(REPO_INDEXING_QUEUE, {
connection: redis,
});
this.indexWorker = new Worker(REPO_INDEXING_QUEUE, this.runIndexJob.bind(this), {
connection: redis,
concurrency: this.settings.maxRepoIndexingJobConcurrency,
});
this.indexWorker.on('completed', this.onIndexJobCompleted.bind(this));
this.indexWorker.on('failed', this.onIndexJobFailed.bind(this));
// Garbage collection
this.gcQueue = new Queue<RepoGarbageCollectionPayload>(REPO_GC_QUEUE, {
connection: redis,
});
this.gcWorker = new Worker(REPO_GC_QUEUE, this.runGarbageCollectionJob.bind(this), {
connection: redis,
concurrency: this.settings.maxRepoGarbageCollectionJobConcurrency,
});
this.gcWorker.on('completed', this.onGarbageCollectionJobCompleted.bind(this));
this.gcWorker.on('failed', this.onGarbageCollectionJobFailed.bind(this));
}
public startScheduler() {
logger.debug('Starting scheduler');
this.interval = setInterval(async () => {
await this.fetchAndScheduleRepoIndexing();
await this.fetchAndScheduleRepoGarbageCollection();
await this.fetchAndScheduleRepoTimeouts();
}, this.settings.reindexRepoPollingIntervalMs);
}
///////////////////////////
// Repo indexing
///////////////////////////
private async scheduleRepoIndexingBulk(repos: RepoWithConnections[]) {
await this.db.$transaction(async (tx) => {
await tx.repo.updateMany({
where: { id: { in: repos.map(repo => repo.id) } },
data: { repoIndexingStatus: RepoIndexingStatus.IN_INDEX_QUEUE }
});
const reposByOrg = repos.reduce<Record<number, RepoWithConnections[]>>((acc, repo) => {
if (!acc[repo.orgId]) {
acc[repo.orgId] = [];
}
acc[repo.orgId].push(repo);
return acc;
}, {});
for (const orgId in reposByOrg) {
const orgRepos = reposByOrg[orgId];
// Set priority based on number of repos (more repos = lower priority)
// This helps prevent large orgs from overwhelming the indexQueue
const priority = Math.min(Math.ceil(orgRepos.length / 10), 2097152);
await this.indexQueue.addBulk(orgRepos.map(repo => ({
name: 'repoIndexJob',
data: { repo },
opts: {
priority: priority,
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
},
})));
// Increment pending jobs counter for each repo added
orgRepos.forEach(repo => {
this.promClient.pendingRepoIndexingJobs.inc({ repo: repo.id.toString() });
});
logger.info(`Added ${orgRepos.length} jobs to indexQueue for org ${orgId} with priority ${priority}`);
}
}).catch((err: unknown) => {
logger.error(`Failed to add jobs to indexQueue for repos ${repos.map(repo => repo.id).join(', ')}: ${err}`);
});
}
private async fetchAndScheduleRepoIndexing() {
const thresholdDate = new Date(Date.now() - this.settings.reindexIntervalMs);
const repos = await this.db.repo.findMany({
where: {
OR: [
// "NEW" is really a misnomer here - it just means that the repo needs to be indexed
// immediately. In most cases, this will be because the repo was just created and
// is indeed "new". However, it could also be that a "retry" was requested on a failed
// index. So, we don't want to block on the indexedAt timestamp here.
{
repoIndexingStatus: RepoIndexingStatus.NEW,
},
// When the repo has already been indexed, we only want to reindex if the reindexing
// interval has elapsed (or if the date isn't set for some reason).
{
AND: [
{ repoIndexingStatus: RepoIndexingStatus.INDEXED },
{
OR: [
{ indexedAt: null },
{ indexedAt: { lt: thresholdDate } },
]
}
]
}
]
},
include: {
connections: {
include: {
connection: true
}
}
}
});
if (repos.length > 0) {
await this.scheduleRepoIndexingBulk(repos);
}
}
private async syncGitRepository(repo: RepoWithConnections, repoAlreadyInIndexingState: boolean) {
const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
const metadata = repoMetadataSchema.parse(repo.metadata);
// If the repo was already in the indexing state, this job was likely killed and picked up again. As a result,
// to ensure the repo state is valid, we delete the repo if it exists so we get a fresh clone
if (repoAlreadyInIndexingState && existsSync(repoPath) && !isReadOnly) {
logger.info(`Deleting repo directory ${repoPath} during sync because it was already in the indexing state`);
await promises.rm(repoPath, { recursive: true, force: true });
}
const credentials = await getAuthCredentialsForRepo(repo, this.db);
const cloneUrlMaybeWithToken = credentials?.cloneUrlWithToken ?? repo.cloneUrl;
if (existsSync(repoPath) && !isReadOnly) {
// @NOTE: in #483, we changed the cloning method s.t., we _no longer_
// write the clone URL (which could contain a auth token) to the
// `remote.origin.url` entry. For the upgrade scenario, we want
// to unset this key since it is no longer needed, hence this line.
// This will no-op if the key is already unset.
// @see: https://github.com/sourcebot-dev/sourcebot/pull/483
await unsetGitConfig(repoPath, ["remote.origin.url"]);
logger.info(`Fetching ${repo.displayName}...`);
const { durationMs } = await measure(() => fetchRepository({
cloneUrl: cloneUrlMaybeWithToken,
path: repoPath,
onProgress: ({ method, stage, progress }) => {
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
}
}));
const fetchDuration_s = durationMs / 1000;
process.stdout.write('\n');
logger.info(`Fetched ${repo.displayName} in ${fetchDuration_s}s`);
} else if (!isReadOnly) {
logger.info(`Cloning ${repo.displayName}...`);
const { durationMs } = await measure(() => cloneRepository({
cloneUrl: cloneUrlMaybeWithToken,
path: repoPath,
onProgress: ({ method, stage, progress }) => {
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`)
}
}));
const cloneDuration_s = durationMs / 1000;
process.stdout.write('\n');
logger.info(`Cloned ${repo.displayName} in ${cloneDuration_s}s`);
}
// Regardless of clone or fetch, always upsert the git config for the repo.
// This ensures that the git config is always up to date for whatever we
// have in the DB.
if (metadata.gitConfig && !isReadOnly) {
await upsertGitConfig(repoPath, metadata.gitConfig);
}
logger.info(`Indexing ${repo.displayName}...`);
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.displayName} in ${indexDuration_s}s`);
}
private async runIndexJob(job: Job<RepoIndexingPayload>) {
logger.info(`Running index job (id: ${job.id}) for repo ${job.data.repo.displayName}`);
const repo = job.data.repo as RepoWithConnections;
// We have to use the existing repo object to get the repoIndexingStatus because the repo object
// inside the job is unchanged from when it was added to the queue.
const existingRepo = await this.db.repo.findUnique({
where: {
id: repo.id,
},
});
if (!existingRepo) {
logger.error(`Repo ${repo.id} not found`);
const e = new Error(`Repo ${repo.id} not found`);
Sentry.captureException(e);
throw e;
}
const repoAlreadyInIndexingState = existingRepo.repoIndexingStatus === RepoIndexingStatus.INDEXING;
await this.db.repo.update({
where: {
id: repo.id,
},
data: {
repoIndexingStatus: RepoIndexingStatus.INDEXING,
}
});
this.promClient.activeRepoIndexingJobs.inc();
this.promClient.pendingRepoIndexingJobs.dec({ repo: repo.id.toString() });
let attempts = 0;
const maxAttempts = 3;
while (attempts < maxAttempts) {
try {
await this.syncGitRepository(repo, repoAlreadyInIndexingState);
break;
} catch (error) {
Sentry.captureException(error);
attempts++;
this.promClient.repoIndexingReattemptsTotal.inc();
if (attempts === maxAttempts) {
logger.error(`Failed to sync repository ${repo.name} (id: ${repo.id}) after ${maxAttempts} attempts. Error: ${error}`);
throw error;
}
const sleepDuration = (env.REPO_SYNC_RETRY_BASE_SLEEP_SECONDS * 1000) * Math.pow(2, attempts - 1);
logger.error(`Failed to sync repository ${repo.name} (id: ${repo.id}), attempt ${attempts}/${maxAttempts}. Sleeping for ${sleepDuration / 1000}s... Error: ${error}`);
await new Promise(resolve => setTimeout(resolve, sleepDuration));
}
}
}
private async onIndexJobCompleted(job: Job<RepoIndexingPayload>) {
logger.info(`Repo index job for repo ${job.data.repo.displayName} (id: ${job.data.repo.id}, jobId: ${job.id}) completed`);
this.promClient.activeRepoIndexingJobs.dec();
this.promClient.repoIndexingSuccessTotal.inc();
await this.db.repo.update({
where: {
id: job.data.repo.id,
},
data: {
indexedAt: new Date(),
repoIndexingStatus: RepoIndexingStatus.INDEXED,
}
});
}
private async onIndexJobFailed(job: Job<RepoIndexingPayload> | undefined, err: unknown) {
logger.info(`Repo index job for repo ${job?.data.repo.displayName} (id: ${job?.data.repo.id}, jobId: ${job?.id}) failed with error: ${err}`);
Sentry.captureException(err, {
tags: {
repoId: job?.data.repo.id,
jobId: job?.id,
queue: REPO_INDEXING_QUEUE,
}
});
if (job) {
this.promClient.activeRepoIndexingJobs.dec();
this.promClient.repoIndexingFailTotal.inc();
await this.db.repo.update({
where: {
id: job.data.repo.id,
},
data: {
repoIndexingStatus: RepoIndexingStatus.FAILED,
}
})
}
}
///////////////////////////
// Repo garbage collection
///////////////////////////
private async scheduleRepoGarbageCollectionBulk(repos: Repo[]) {
await this.db.$transaction(async (tx) => {
await tx.repo.updateMany({
where: { id: { in: repos.map(repo => repo.id) } },
data: { repoIndexingStatus: RepoIndexingStatus.IN_GC_QUEUE }
});
await this.gcQueue.addBulk(repos.map(repo => ({
name: 'repoGarbageCollectionJob',
data: { repo },
opts: {
removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE,
removeOnFail: env.REDIS_REMOVE_ON_FAIL,
}
})));
logger.info(`Added ${repos.length} jobs to gcQueue`);
});
}
private async fetchAndScheduleRepoGarbageCollection() {
////////////////////////////////////
// Get repos with no connections
////////////////////////////////////
const thresholdDate = new Date(Date.now() - this.settings.repoGarbageCollectionGracePeriodMs);
const reposWithNoConnections = await this.db.repo.findMany({
where: {
repoIndexingStatus: {
in: [
RepoIndexingStatus.INDEXED, // we don't include NEW repos here because they'll be picked up by the index queue (potential race condition)
RepoIndexingStatus.FAILED,
]
},
connections: {
none: {}
},
OR: [
{ indexedAt: null },
{ indexedAt: { lt: thresholdDate } }
]
},
});
if (reposWithNoConnections.length > 0) {
logger.info(`Garbage collecting ${reposWithNoConnections.length} repos with no connections: ${reposWithNoConnections.map(repo => repo.id).join(', ')}`);
}
////////////////////////////////////
// Get inactive org repos
////////////////////////////////////
const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
const inactiveOrgRepos = await this.db.repo.findMany({
where: {
org: {
stripeSubscriptionStatus: StripeSubscriptionStatus.INACTIVE,
stripeLastUpdatedAt: {
lt: sevenDaysAgo
}
},
OR: [
{ indexedAt: null },
{ indexedAt: { lt: thresholdDate } }
]
}
});
if (inactiveOrgRepos.length > 0) {
logger.info(`Garbage collecting ${inactiveOrgRepos.length} inactive org repos: ${inactiveOrgRepos.map(repo => repo.id).join(', ')}`);
}
const reposToDelete = [...reposWithNoConnections, ...inactiveOrgRepos];
if (reposToDelete.length > 0) {
await this.scheduleRepoGarbageCollectionBulk(reposToDelete);
}
}
private async runGarbageCollectionJob(job: Job<RepoGarbageCollectionPayload>) {
logger.info(`Running garbage collection job (id: ${job.id}) for repo ${job.data.repo.displayName} (id: ${job.data.repo.id})`);
this.promClient.activeRepoGarbageCollectionJobs.inc();
const repo = job.data.repo as Repo;
await this.db.repo.update({
where: {
id: repo.id
},
data: {
repoIndexingStatus: RepoIndexingStatus.GARBAGE_COLLECTING
}
});
// delete cloned repo
const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx);
if (existsSync(repoPath) && !isReadOnly) {
logger.info(`Deleting repo directory ${repoPath}`);
await promises.rm(repoPath, { recursive: true, force: true });
}
// delete shards
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const files = readdirSync(this.ctx.indexPath).filter(file => file.startsWith(shardPrefix));
for (const file of files) {
const filePath = `${this.ctx.indexPath}/${file}`;
logger.info(`Deleting shard file ${filePath}`);
await promises.rm(filePath, { force: true });
}
}
private async onGarbageCollectionJobCompleted(job: Job<RepoGarbageCollectionPayload>) {
logger.info(`Garbage collection job ${job.id} completed`);
this.promClient.activeRepoGarbageCollectionJobs.dec();
this.promClient.repoGarbageCollectionSuccessTotal.inc();
await this.db.repo.delete({
where: {
id: job.data.repo.id
}
});
}
private async onGarbageCollectionJobFailed(job: Job<RepoGarbageCollectionPayload> | undefined, err: unknown) {
logger.info(`Garbage collection job failed (id: ${job?.id ?? 'unknown'}) with error: ${err}`);
Sentry.captureException(err, {
tags: {
repoId: job?.data.repo.id,
jobId: job?.id,
queue: REPO_GC_QUEUE,
}
});
if (job) {
this.promClient.activeRepoGarbageCollectionJobs.dec();
this.promClient.repoGarbageCollectionFailTotal.inc();
await this.db.repo.update({
where: {
id: job.data.repo.id
},
data: {
repoIndexingStatus: RepoIndexingStatus.GARBAGE_COLLECTION_FAILED
}
});
}
}
///////////////////////////
// Repo index validation
///////////////////////////
public async validateIndexedReposHaveShards() {
logger.info('Validating indexed repos have shards...');
const indexedRepos = await this.db.repo.findMany({
where: {
repoIndexingStatus: RepoIndexingStatus.INDEXED
}
});
logger.info(`Found ${indexedRepos.length} repos in the DB marked as INDEXED`);
if (indexedRepos.length === 0) {
return;
}
const files = readdirSync(this.ctx.indexPath);
const reposToReindex: number[] = [];
for (const repo of indexedRepos) {
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
// TODO: this doesn't take into account if a repo has multiple shards and only some of them are missing. To support that, this logic
// would need to know how many total shards are expected for this repo
let hasShards = false;
try {
hasShards = files.some(file => file.startsWith(shardPrefix));
} catch (error) {
logger.error(`Failed to read index directory ${this.ctx.indexPath}: ${error}`);
continue;
}
if (!hasShards) {
logger.info(`Repo ${repo.displayName} (id: ${repo.id}) is marked as INDEXED but has no shards on disk. Marking for reindexing.`);
reposToReindex.push(repo.id);
}
}
if (reposToReindex.length > 0) {
await this.db.repo.updateMany({
where: {
id: { in: reposToReindex }
},
data: {
repoIndexingStatus: RepoIndexingStatus.NEW
}
});
logger.info(`Marked ${reposToReindex.length} repos for reindexing due to missing shards`);
}
logger.info('Done validating indexed repos have shards');
}
private async fetchAndScheduleRepoTimeouts() {
const repos = await this.db.repo.findMany({
where: {
repoIndexingStatus: RepoIndexingStatus.INDEXING,
updatedAt: {
lt: new Date(Date.now() - this.settings.repoIndexTimeoutMs)
}
}
});
if (repos.length > 0) {
logger.info(`Scheduling ${repos.length} repo timeouts`);
await this.scheduleRepoTimeoutsBulk(repos);
}
}
private async scheduleRepoTimeoutsBulk(repos: Repo[]) {
await this.db.$transaction(async (tx) => {
await tx.repo.updateMany({
where: { id: { in: repos.map(repo => repo.id) } },
data: { repoIndexingStatus: RepoIndexingStatus.FAILED }
});
});
}
public async dispose() {
if (this.interval) {
clearInterval(this.interval);
}
this.indexWorker.close();
this.indexQueue.close();
this.gcQueue.close();
this.gcWorker.close();
}
}

View file

@ -1,50 +1,8 @@
import { Connection, Repo, RepoToConnection } from "@sourcebot/db";
import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type";
import { z } from "zod";
export type AppContext = {
/**
* Path to the repos cache directory.
*/
reposPath: string;
/**
* Path to the index cache directory;
*/
indexPath: string;
cachePath: string;
}
export type Settings = Required<SettingsSchema>;
// Structure of the `metadata` field in the `Repo` table.
//
// @WARNING: If you modify this schema, please make sure it is backwards
// compatible with any prior versions of the schema!!
// @NOTE: If you move this schema, please update the comment in schema.prisma
// to point to the new location.
export const repoMetadataSchema = z.object({
/**
* A set of key-value pairs that will be used as git config
* variables when cloning the repo.
* @see: https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--configcodecodeltkeygtltvaluegtcode
*/
gitConfig: z.record(z.string(), z.string()).optional(),
/**
* A list of branches to index. Glob patterns are supported.
*/
branches: z.array(z.string()).optional(),
/**
* A list of tags to index. Glob patterns are supported.
*/
tags: z.array(z.string()).optional(),
});
export type RepoMetadata = z.infer<typeof repoMetadataSchema>;
// @see : https://stackoverflow.com/a/61132308
export type DeepPartial<T> = T extends object ? {
[P in keyof T]?: DeepPartial<T[P]>;
@ -59,5 +17,6 @@ export type RepoWithConnections = Repo & { connections: (RepoToConnection & { co
export type RepoAuthCredentials = {
hostUrl?: string;
token: string;
cloneUrlWithToken: string;
cloneUrlWithToken?: string;
authHeader?: string;
}

View file

@ -1,11 +1,13 @@
import { Logger } from "winston";
import { AppContext, RepoAuthCredentials, RepoWithConnections } from "./types.js";
import { RepoAuthCredentials, RepoWithConnections } from "./types.js";
import path from 'path';
import { PrismaClient, Repo } from "@sourcebot/db";
import { getTokenFromConfig as getTokenFromConfigBase } from "@sourcebot/crypto";
import { BackendException, BackendError } from "@sourcebot/error";
import { Repo } from "@sourcebot/db";
import { getTokenFromConfig } from "@sourcebot/shared";
import * as Sentry from "@sentry/node";
import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig, BitbucketConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { GithubAppManager } from "./ee/githubAppManager.js";
import { hasEntitlement } from "@sourcebot/shared";
import { REPOS_CACHE_DIR } from "./constants.js";
export const measure = async <T>(cb: () => Promise<T>) => {
const start = Date.now();
@ -21,22 +23,6 @@ export const marshalBool = (value?: boolean) => {
return !!value ? '1' : '0';
}
export const getTokenFromConfig = async (token: any, orgId: number, db: PrismaClient, logger?: Logger) => {
try {
return await getTokenFromConfigBase(token, orgId, db);
} catch (error: unknown) {
if (error instanceof Error) {
const e = new BackendException(BackendError.CONNECTION_SYNC_SECRET_DNE, {
message: error.message,
});
Sentry.captureException(e);
logger?.error(error.message);
throw e;
}
throw error;
}
};
export const resolvePathRelativeToConfig = (localPath: string, configPath: string) => {
let absolutePath = localPath;
if (!path.isAbsolute(absolutePath)) {
@ -69,11 +55,11 @@ export const arraysEqualShallow = <T>(a?: readonly T[], b?: readonly T[]) => {
// @note: this function is duplicated in `packages/web/src/features/fileTree/actions.ts`.
// @todo: we should move this to a shared package.
export const getRepoPath = (repo: Repo, ctx: AppContext): { path: string, isReadOnly: boolean } => {
export const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } => {
// If we are dealing with a local repository, then use that as the path.
// Mark as read-only since we aren't guaranteed to have write access to the local filesystem.
const cloneUrl = new URL(repo.cloneUrl);
if (repo.external_codeHostType === 'generic-git-host' && cloneUrl.protocol === 'file:') {
if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') {
return {
path: cloneUrl.pathname,
isReadOnly: true,
@ -81,7 +67,7 @@ export const getRepoPath = (repo: Repo, ctx: AppContext): { path: string, isRead
}
return {
path: path.join(ctx.reposPath, repo.id.toString()),
path: path.join(REPOS_CACHE_DIR, repo.id.toString()),
isReadOnly: false,
}
}
@ -124,12 +110,36 @@ export const fetchWithRetry = async <T>(
// fetch the token here using the connections from the repo. Multiple connections could be referencing this repo, and each
// may have their own token. This method will just pick the first connection that has a token (if one exists) and uses that. This
// may technically cause syncing to fail if that connection's token just so happens to not have access to the repo it's referencing.
export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: PrismaClient, logger?: Logger): Promise<RepoAuthCredentials | undefined> => {
export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, logger?: Logger): Promise<RepoAuthCredentials | undefined> => {
// If we have github apps configured we assume that we must use them for github service auth
if (repo.external_codeHostType === 'github' && hasEntitlement('github-app') && GithubAppManager.getInstance().appsConfigured()) {
logger?.debug(`Using GitHub App for service auth for repo ${repo.displayName} hosted at ${repo.external_codeHostUrl}`);
const owner = repo.displayName?.split('/')[0];
const deploymentHostname = new URL(repo.external_codeHostUrl).hostname;
if (!owner || !deploymentHostname) {
throw new Error(`Failed to fetch GitHub App for repo ${repo.displayName}:Invalid repo displayName (${repo.displayName}) or deployment hostname (${deploymentHostname})`);
}
const token = await GithubAppManager.getInstance().getInstallationToken(owner, deploymentHostname);
return {
hostUrl: repo.external_codeHostUrl,
token,
cloneUrlWithToken: createGitCloneUrlWithToken(
repo.cloneUrl,
{
username: 'x-access-token',
password: token
}
),
}
}
for (const { connection } of repo.connections) {
if (connection.connectionType === 'github') {
const config = connection.config as unknown as GithubConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
const token = await getTokenFromConfig(config.token);
return {
hostUrl: config.url,
token,
@ -144,7 +154,7 @@ export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: P
} else if (connection.connectionType === 'gitlab') {
const config = connection.config as unknown as GitlabConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
const token = await getTokenFromConfig(config.token);
return {
hostUrl: config.url,
token,
@ -160,7 +170,7 @@ export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: P
} else if (connection.connectionType === 'gitea') {
const config = connection.config as unknown as GiteaConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
const token = await getTokenFromConfig(config.token);
return {
hostUrl: config.url,
token,
@ -175,7 +185,7 @@ export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: P
} else if (connection.connectionType === 'bitbucket') {
const config = connection.config as unknown as BitbucketConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
const token = await getTokenFromConfig(config.token);
const username = config.user ?? 'x-token-auth';
return {
hostUrl: config.url,
@ -192,7 +202,18 @@ export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: P
} else if (connection.connectionType === 'azuredevops') {
const config = connection.config as unknown as AzureDevOpsConnectionConfig;
if (config.token) {
const token = await getTokenFromConfig(config.token, connection.orgId, db, logger);
const token = await getTokenFromConfig(config.token);
// For ADO server, multiple auth schemes may be supported. If the ADO deployment supports NTLM, the git clone will default
// to this over basic auth. As a result, we cannot embed the token in the clone URL and must force basic auth by passing in the token
// appropriately in the header. To do this, we set the authHeader field here
if (config.deploymentType === 'server') {
return {
hostUrl: config.url,
token,
authHeader: "Authorization: Basic " + Buffer.from(`:${token}`).toString('base64')
}
} else {
return {
hostUrl: config.url,
token,
@ -210,6 +231,7 @@ export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: P
}
}
}
}
return undefined;
}
@ -229,3 +251,44 @@ const createGitCloneUrlWithToken = (cloneUrl: string, credentials: { username?:
}
return url.toString();
}
/**
* Wraps groupmq worker lifecycle callbacks with exception handling. This prevents
* uncaught exceptions (e.g., like a RepoIndexingJob not existing in the DB) from crashing
* the app.
* @see: https://openpanel-dev.github.io/groupmq/api-worker/#events
*/
export const groupmqLifecycleExceptionWrapper = async (name: string, logger: Logger, fn: () => Promise<void>) => {
try {
await fn();
} catch (error) {
Sentry.captureException(error);
logger.error(`Exception thrown while executing lifecycle function \`${name}\`.`, error);
}
}
// setInterval wrapper that ensures async callbacks are not executed concurrently.
// @see: https://mottaquikarim.github.io/dev/posts/setinterval-that-blocks-on-await/
export const setIntervalAsync = (target: () => Promise<void>, pollingIntervalMs: number): NodeJS.Timeout => {
const setIntervalWithPromise = <T extends (...args: any[]) => Promise<any>>(
target: T
): (...args: Parameters<T>) => Promise<void> => {
return async function (...args: Parameters<T>): Promise<void> {
if ((target as any).isRunning) return;
(target as any).isRunning = true;
try {
await target(...args);
} finally {
(target as any).isRunning = false;
}
};
}
return setInterval(
setIntervalWithPromise(target),
pollingIntervalMs
);
}

View file

@ -1,77 +1,34 @@
import { exec } from "child_process";
import { AppContext, repoMetadataSchema, Settings } from "./types.js";
import { Repo } from "@sourcebot/db";
import { getRepoPath } from "./utils.js";
import { getShardPrefix } from "./utils.js";
import { getBranches, getTags } from "./git.js";
import micromatch from "micromatch";
import { createLogger } from "@sourcebot/logger";
import { captureEvent } from "./posthog.js";
import { createLogger, env } from "@sourcebot/shared";
import { exec } from "child_process";
import { INDEX_CACHE_DIR } from "./constants.js";
import { Settings } from "./types.js";
import { getRepoPath, getShardPrefix } from "./utils.js";
const logger = createLogger('zoekt');
export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: AppContext) => {
let revisions = [
'HEAD'
];
const { path: repoPath } = getRepoPath(repo, ctx);
export const indexGitRepository = async (repo: Repo, settings: Settings, revisions: string[], signal?: AbortSignal) => {
const { path: repoPath } = getRepoPath(repo);
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const metadata = repoMetadataSchema.parse(repo.metadata);
if (metadata.branches) {
const branchGlobs = metadata.branches
const allBranches = await getBranches(repoPath);
const matchingBranches =
allBranches
.filter((branch) => micromatch.isMatch(branch, branchGlobs))
.map((branch) => `refs/heads/${branch}`);
revisions = [
...revisions,
...matchingBranches
];
}
if (metadata.tags) {
const tagGlobs = metadata.tags;
const allTags = await getTags(repoPath);
const matchingTags =
allTags
.filter((tag) => micromatch.isMatch(tag, tagGlobs))
.map((tag) => `refs/tags/${tag}`);
revisions = [
...revisions,
...matchingTags
];
}
// zoekt has a limit of 64 branches/tags to index.
if (revisions.length > 64) {
logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`);
captureEvent('backend_revisions_truncated', {
repoId: repo.id,
revisionCount: revisions.length,
});
revisions = revisions.slice(0, 64);
}
const largeFileGlobPatterns = env.ALWAYS_INDEX_FILE_PATTERNS?.split(',').map(pattern => pattern.trim()) ?? [];
const command = [
'zoekt-git-index',
'-allow_missing_branches',
`-index ${ctx.indexPath}`,
`-index ${INDEX_CACHE_DIR}`,
`-max_trigram_count ${settings.maxTrigramCount}`,
`-file_limit ${settings.maxFileSize}`,
`-branches "${revisions.join(',')}"`,
`-tenant_id ${repo.orgId}`,
`-repo_id ${repo.id}`,
`-shard_prefix ${shardPrefix}`,
...largeFileGlobPatterns.map((pattern) => `-large_file ${pattern}`),
repoPath
].join(' ');
return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
exec(command, (error, stdout, stderr) => {
exec(command, { signal }, (error, stdout, stderr) => {
if (error) {
reject(error);
return;

View file

@ -4,5 +4,8 @@ export default defineConfig({
test: {
environment: 'node',
watch: false,
env: {
DATA_CACHE_DIR: 'test-data'
}
}
});

View file

@ -1 +0,0 @@
.env.local

View file

@ -1,19 +0,0 @@
{
"name": "@sourcebot/crypto",
"version": "0.1.0",
"main": "dist/index.js",
"private": true,
"scripts": {
"build": "tsc",
"postinstall": "yarn build"
},
"dependencies": {
"@sourcebot/db": "*",
"@sourcebot/schemas": "*",
"dotenv": "^16.4.5"
},
"devDependencies": {
"@types/node": "^22.7.5",
"typescript": "^5.7.3"
}
}

View file

@ -1,13 +0,0 @@
import dotenv from 'dotenv';
export const getEnv = (env: string | undefined, defaultValue?: string) => {
return env ?? defaultValue;
}
dotenv.config({
path: './.env.local',
override: true
});
// @note: You can use https://generate-random.org/encryption-key-generator to create a new 32 byte key
export const SOURCEBOT_ENCRYPTION_KEY = getEnv(process.env.SOURCEBOT_ENCRYPTION_KEY);

View file

@ -1,33 +0,0 @@
import { PrismaClient } from "@sourcebot/db";
import { Token } from "@sourcebot/schemas/v3/shared.type";
import { decrypt } from "./index.js";
export const getTokenFromConfig = async (token: Token, orgId: number, db: PrismaClient) => {
if ('secret' in token) {
const secretKey = token.secret;
const secret = await db.secret.findUnique({
where: {
orgId_key: {
key: secretKey,
orgId
}
}
});
if (!secret) {
throw new Error(`Secret with key ${secretKey} not found for org ${orgId}`);
}
const decryptedToken = decrypt(secret.iv, secret.encryptedValue);
return decryptedToken;
} else if ('env' in token) {
const envToken = process.env[token.env];
if (!envToken) {
throw new Error(`Environment variable ${token.env} not found.`);
}
return envToken;
} else {
throw new Error('Invalid token configuration');
}
};

View file

@ -25,7 +25,6 @@
},
"dependencies": {
"@prisma/client": "6.2.1",
"@sourcebot/logger": "workspace:*",
"@types/readline-sync": "^1.4.8",
"readline-sync": "^1.4.10"
}

Some files were not shown because too many files have changed in this diff Show more