Config format V2 (#42)

This commit is contained in:
Brendan Kellam 2024-10-17 13:31:18 -07:00 committed by GitHub
parent 4b51a8d7d7
commit fc8815d135
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
111 changed files with 2955 additions and 742 deletions

182
.gitignore vendored
View file

@ -1,44 +1,166 @@
# Created by https://www.toptal.com/developers/gitignore/api/nextjs # Created by https://www.toptal.com/developers/gitignore/api/yarn,node
# Edit at https://www.toptal.com/developers/gitignore?templates=nextjs # Edit at https://www.toptal.com/developers/gitignore?templates=yarn,node
### NextJS ### ### Node ###
# dependencies # Logs
/node_modules logs
/.pnp *.log
.pnp.js
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log* npm-debug.log*
yarn-debug.log* yarn-debug.log*
yarn-error.log* yarn-error.log*
lerna-debug.log*
.pnpm-debug.log* .pnpm-debug.log*
# local env files # Diagnostic reports (https://nodejs.org/api/report.html)
.env*.local report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# vercel # Runtime data
.vercel pids
*.pid
*.seed
*.pid.lock
# typescript # Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# Snowpack dependency directory (https://snowpack.dev/)
web_modules/
# TypeScript cache
*.tsbuildinfo *.tsbuildinfo
next-env.d.ts
# End of https://www.toptal.com/developers/gitignore/api/nextjs # Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional stylelint cache
.stylelintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# Next.js build output
.next
out
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# vuepress v2.x temp and cache directory
.temp
# Docusaurus cache and generated files
.docusaurus
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*
### Node Patch ###
# Serverless Webpack directories
.webpack/
# Optional stylelint cache
# SvelteKit build / generate output
.svelte-kit
### yarn ###
# https://yarnpkg.com/getting-started/qa#which-files-should-be-gitignored
.yarn/*
!.yarn/releases
!.yarn/patches
!.yarn/plugins
!.yarn/sdks
!.yarn/versions
# if you are NOT using Zero-installs, then:
# comment the following lines
!.yarn/cache
# and uncomment the following lines
# .pnp.*
# End of https://www.toptal.com/developers/gitignore/api/yarn,node
.sourcebot .sourcebot
/bin /bin
/config.json /config.json
.DS_Store

6
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,6 @@
{
"files.associations": {
"*.json": "jsonc",
"index.json": "json"
}
}

View file

@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ### Added
- [**Breaking Change**] Added index schema v2. This new schema brings many quality of life features like clearer syntax, ability to specify individual `repos`, `projects`, `groups`, and `orgs`, and the ability to easily `exclude` repositories.
- Added a `SOURCEBOT_VERSION` build argument to the Docker image. ([#41](https://github.com/sourcebot-dev/sourcebot/pull/41)) - Added a `SOURCEBOT_VERSION` build argument to the Docker image. ([#41](https://github.com/sourcebot-dev/sourcebot/pull/41))
- Added the `sourcebot_version` property to all PostHog events for versioned telemetry. ([#41](https://github.com/sourcebot-dev/sourcebot/pull/41) - Added the `sourcebot_version` property to all PostHog events for versioned telemetry. ([#41](https://github.com/sourcebot-dev/sourcebot/pull/41)

View file

@ -14,18 +14,29 @@ RUN CGO_ENABLED=0 GOOS=linux go build -o /cmd/ ./cmd/...
FROM node-alpine AS web-builder FROM node-alpine AS web-builder
RUN apk add --no-cache libc6-compat RUN apk add --no-cache libc6-compat
WORKDIR /app WORKDIR /app
COPY package.json yarn.lock* ./ COPY package.json yarn.lock* ./
COPY ./packages/web ./packages/web
# Fixes arm64 timeouts # Fixes arm64 timeouts
RUN yarn config set registry https://registry.npmjs.org/ RUN yarn config set registry https://registry.npmjs.org/
RUN yarn config set network-timeout 1200000 RUN yarn config set network-timeout 1200000
RUN yarn --frozen-lockfile RUN yarn workspace @sourcebot/web install --frozen-lockfile
COPY . .
ENV NEXT_TELEMETRY_DISABLED=1 ENV NEXT_TELEMETRY_DISABLED=1
# @see: https://phase.dev/blog/nextjs-public-runtime-variables/ # @see: https://phase.dev/blog/nextjs-public-runtime-variables/
ARG NEXT_PUBLIC_SOURCEBOT_TELEMETRY_DISABLED=BAKED_NEXT_PUBLIC_SOURCEBOT_TELEMETRY_DISABLED ARG NEXT_PUBLIC_SOURCEBOT_TELEMETRY_DISABLED=BAKED_NEXT_PUBLIC_SOURCEBOT_TELEMETRY_DISABLED
ARG NEXT_PUBLIC_SOURCEBOT_VERSION=BAKED_NEXT_PUBLIC_SOURCEBOT_VERSION ARG NEXT_PUBLIC_SOURCEBOT_VERSION=BAKED_NEXT_PUBLIC_SOURCEBOT_VERSION
RUN yarn run build RUN yarn workspace @sourcebot/web build
# ------ Build Backend ------
FROM node-alpine AS backend-builder
WORKDIR /app
COPY package.json yarn.lock* ./
COPY ./schemas ./schemas
COPY ./packages/backend ./packages/backend
RUN yarn workspace @sourcebot/backend install --frozen-lockfile
RUN yarn workspace @sourcebot/backend build
# ------ Runner ------ # ------ Runner ------
FROM node-alpine AS runner FROM node-alpine AS runner
@ -40,8 +51,8 @@ ARG SOURCEBOT_VERSION=unknown
ENV SOURCEBOT_VERSION=$SOURCEBOT_VERSION ENV SOURCEBOT_VERSION=$SOURCEBOT_VERSION
RUN echo "Sourcebot Version: $SOURCEBOT_VERSION" RUN echo "Sourcebot Version: $SOURCEBOT_VERSION"
ENV GITHUB_HOSTNAME=github.com # Valid values are: debug, info, warn, error
ENV GITLAB_HOSTNAME=gitlab.com ENV SOURCEBOT_LOG_LEVEL=info
# @note: This is also set in .env # @note: This is also set in .env
ENV NEXT_PUBLIC_POSTHOG_KEY=phc_VFn4CkEGHRdlVyOOw8mfkoj1DKVoG6y1007EClvzAnS ENV NEXT_PUBLIC_POSTHOG_KEY=phc_VFn4CkEGHRdlVyOOw8mfkoj1DKVoG6y1007EClvzAnS
@ -50,7 +61,7 @@ ENV NEXT_PUBLIC_POSTHOG_KEY=phc_VFn4CkEGHRdlVyOOw8mfkoj1DKVoG6y1007EClvzAnS
# ENV SOURCEBOT_TELEMETRY_DISABLED=1 # ENV SOURCEBOT_TELEMETRY_DISABLED=1
# Configure dependencies # Configure dependencies
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl
# Configure zoekt # Configure zoekt
COPY vendor/zoekt/install-ctags-alpine.sh . COPY vendor/zoekt/install-ctags-alpine.sh .
@ -68,12 +79,17 @@ COPY --from=zoekt-builder \
/usr/local/bin/ /usr/local/bin/
# Configure the webapp # Configure the webapp
COPY --from=web-builder /app/public ./public COPY --from=web-builder /app/packages/web/public ./packages/web/public
RUN mkdir .next COPY --from=web-builder /app/packages/web/.next/standalone ./
COPY --from=web-builder /app/.next/standalone ./ COPY --from=web-builder /app/packages/web/.next/static ./packages/web/.next/static
COPY --from=web-builder /app/.next/static ./.next/static
# Configure the backend
COPY --from=backend-builder /app/node_modules ./node_modules
COPY --from=backend-builder /app/packages/backend ./packages/backend
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY prefix-output.sh ./prefix-output.sh
RUN chmod +x ./prefix-output.sh
COPY entrypoint.sh ./entrypoint.sh COPY entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh RUN chmod +x ./entrypoint.sh

View file

@ -9,8 +9,17 @@ ui:
zoekt: zoekt:
mkdir -p bin mkdir -p bin
go build -C vendor/zoekt -o $(PWD)/bin ./cmd/... go build -C vendor/zoekt -o $(PWD)/bin ./cmd/...
export PATH=$(PWD)/bin:$(PATH)
export CTAGS_COMMANDS=ctags
clean: clean:
rm -rf bin node_modules .next .sourcebot rm -rf \
bin \
node_modules \
packages/web/node_modules \
packages/web/.next \
packages/backend/dist \
packages/backend/node_modules \
.sourcebot
.PHONY: bin .PHONY: bin

215
README.md
View file

@ -70,23 +70,25 @@ Sourcebot supports indexing and searching through public and private repositorie
cd sourcebot_workspace cd sourcebot_workspace
``` ```
2. Create a new config following the [configuration schema](./schemas/index.json) to specify which repositories Sourcebot should index. For example, to index [llama.cpp](https://github.com/ggerganov/llama.cpp): 2. Create a new config following the [configuration schema](./schemas/v2/index.json) to specify which repositories Sourcebot should index. For example, let's index llama.cpp:
```sh ```sh
touch my_config.json touch my_config.json
echo '{ echo '{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/index.json", "$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/refs/tags/latest/schemas/v2/index.json",
"Configs": [ "repos": [
{ {
"Type": "github", "type": "github",
"GitHubUser": "ggerganov", "repos": [
"Name": "^llama\\\\.cpp$" "ggerganov/llama.cpp"
]
} }
] ]
}' > my_config.json }' > my_config.json
``` ```
(For more examples, see [example-config.json](./example-config.json). For additional usage information, see the [configuration schema](./schemas/index.json)). >[!NOTE]
> Sourcebot can also index all repos owned by a organization, user, group, etc., instead of listing them individually. For examples, see the [configs](./configs) directory. For additional usage information, see the [configuration schema](./schemas/v2/index.json).
3. Run Sourcebot and point it to the new config you created with the `-e CONFIG_PATH` flag: 3. Run Sourcebot and point it to the new config you created with the `-e CONFIG_PATH` flag:
@ -106,31 +108,8 @@ Sourcebot supports indexing and searching through public and private repositorie
</details> </details>
<br> <br>
You should see a `.sourcebot` folder in your current directory. This folder stores a cache of the repositories zoekt has indexed. The `HEAD` commit of a repository is re-indexed [every hour](https://github.com/sourcebot-dev/zoekt/blob/11b7713f1fb511073c502c41cea413d616f7761f/cmd/zoekt-indexserver/main.go#L86). Indexing private repos? See [Providing an access token](#providing-an-access-token). You should see a `.sourcebot` folder in your current directory. This folder stores a cache of the repositories zoekt has indexed. The `HEAD` commit of a repository is re-indexed [every hour](./packages/backend/src/constants.ts). Indexing private repos? See [Providing an access token](#providing-an-access-token).
>[!WARNING]
> Depending on the size of your repo(s), SourceBot could take a couple of minutes to finish indexing. SourceBot doesn't currently support displaying indexing progress in real-time, so please be patient while it finishes. You can track the progress manually by investigating the `.sourcebot` cache in your workspace.
<details>
<summary><img src="https://gitlab.com/favicon.ico" width="16" height="16" /> Using GitLab?</summary>
_tl;dr: A `GITLAB_TOKEN` is required to index GitLab repositories (both private & public). See [Providing an access token](#providing-an-access-token)._
Currently, the GitLab indexer is restricted to only indexing repositories that the associated `GITLAB_TOKEN` has access to. For example, if the token has access to `foo`, `bar`, and `baz` repositories, the following config will index all three:
```sh
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/index.json",
"Configs": [
{
"Type": "gitlab"
}
]
}
```
See [Providing an access token](#providing-an-access-token).
</details>
</br> </br>
## Providing an access token ## Providing an access token
@ -145,31 +124,92 @@ This will depend on the code hosting platform you're using:
</picture> GitHub </picture> GitHub
</summary> </summary>
In order to index private repositories, you'll need to generate a GitHub Personal Access Token (PAT) and pass it to Sourcebot. Create a new PAT [here](https://github.com/settings/tokens/new) and make sure you select the `repo` scope: In order to index private repositories, you'll need to generate a GitHub Personal Access Token (PAT). Create a new PAT [here](https://github.com/settings/tokens/new) and make sure you select the `repo` scope:
![GitHub PAT creation](.github/images/github-pat-creation.png) ![GitHub PAT creation](.github/images/github-pat-creation.png)
You'll need to pass this PAT each time you run Sourcebot by setting the `GITHUB_TOKEN` environment variable: Next, update your configuration with the `token` field:
```json
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/refs/tags/latest/schemas/v2/index.json",
"repos": [
{
"type": "github",
"token": "ghp_mytoken",
...
}
]
}
```
You can also pass tokens as environment variables:
```json
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/refs/tags/latest/schemas/v2/index.json",
"repos": [
{
"type": "github",
"token": {
// note: this env var can be named anything. It
// doesn't need to be `GITHUB_TOKEN`.
"env": "GITHUB_TOKEN"
},
...
}
]
}
```
You'll need to pass this environment variable each time you run Sourcebot:
<pre> <pre>
docker run -p 3000:3000 --rm --name sourcebot -e <b>GITHUB_TOKEN=[your-github-token]</b> -e CONFIG_PATH=/data/my_config.json -v $(pwd):/data ghcr.io/sourcebot-dev/sourcebot:latest docker run -e <b>GITHUB_TOKEN=ghp_mytoken</b> /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
</pre> </pre>
</details> </details>
<details> <details>
<summary><img src="https://gitlab.com/favicon.ico" width="16" height="16" /> GitLab</summary> <summary><img src="https://gitlab.com/favicon.ico" width="16" height="16" /> GitLab</summary>
>[!NOTE]
> An access token is <b>required</b> to index GitLab repositories (both private & public) since the GitLab indexer needs the token to determine which repositories to index. See [example-config.json](./example-config.json) for example usage.
Generate a GitLab Personal Access Token (PAT) [here](https://gitlab.com/-/user_settings/personal_access_tokens) and make sure you select the `read_api` scope: Generate a GitLab Personal Access Token (PAT) [here](https://gitlab.com/-/user_settings/personal_access_tokens) and make sure you select the `read_api` scope:
![GitLab PAT creation](.github/images/gitlab-pat-creation.png) ![GitLab PAT creation](.github/images/gitlab-pat-creation.png)
You'll need to pass this PAT each time you run Sourcebot by setting the `GITLAB_TOKEN` environment variable: Next, update your configuration with the `token` field:
```json
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/refs/tags/latest/schemas/v2/index.json",
"repos": [
{
"type": "gitlab",
"token": "glpat-mytoken",
...
}
]
}
```
You can also pass tokens as environment variables:
```json
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/refs/tags/latest/schemas/v2/index.json",
"repos": [
{
"type": "gitlab",
"token": {
// note: this env var can be named anything. It
// doesn't need to be `GITLAB_TOKEN`.
"env": "GITLAB_TOKEN"
},
...
}
]
}
```
You'll need to pass this environment variable each time you run Sourcebot:
<pre> <pre>
docker run -p 3000:3000 --rm --name sourcebot -e <b>GITLAB_TOKEN=[your-gitlab-token]</b> -e CONFIG_PATH=/data/my_config.json -v $(pwd):/data ghcr.io/sourcebot-dev/sourcebot:latest docker run -e <b>GITLAB_TOKEN=glpat-mytoken</b> /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
</pre> </pre>
</details> </details>
@ -178,63 +218,7 @@ docker run -p 3000:3000 --rm --name sourcebot -e <b>GITLAB_TOKEN=[your-gitlab-to
## Using a self-hosted GitLab / GitHub instance ## Using a self-hosted GitLab / GitHub instance
If you're using a self-hosted GitLab or GitHub instance with a custom domain, there is some additional config required: If you're using a self-hosted GitLab or GitHub instance with a custom domain, you can specify the domain in your config file. See [configs/self-hosted.json](configs/self-hosted.json) for examples.
<div>
<details>
<summary>
<picture>
<source media="(prefers-color-scheme: dark)" srcset=".github/images/github-favicon-inverted.png">
<img src="https://github.com/favicon.ico" width="16" height="16" alt="GitHub icon">
</picture> GitHub
</summary>
1. In your config, add the `GitHubURL` field to point to your deployment's URL. For example:
```json
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/index.json",
"Configs": [
{
"Type": "github",
"GitHubUrl": "https://github.example.com"
}
]
}
2. Set the `GITHUB_HOSTNAME` environment variable to your deployment's hostname. For example:
<pre>
docker run -e <b>GITHUB_HOSTNAME=github.example.com</b> /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
</pre>
</details>
<details>
<summary><img src="https://gitlab.com/favicon.ico" width="16" height="16" /> GitLab</summary>
1. In your config, add the `GitLabURL` field to point to your deployment's URL. For example:
```json
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/index.json",
"Configs": [
{
"Type": "gitlab",
"GitLabURL": "https://gitlab.example.com"
}
]
}
```
2. Set the `GITLAB_HOSTNAME` environment variable to your deployment's hostname. For example:
<pre>
docker run -e <b>GITLAB_HOSTNAME=gitlab.example.com</b> /* additional args */ ghcr.io/sourcebot-dev/sourcebot:latest
</pre>
</details>
</div>
## Build from source ## Build from source
>[!NOTE] >[!NOTE]
@ -266,49 +250,14 @@ If you're using a self-hosted GitLab or GitHub instance with a custom domain, th
5. Create a `config.json` file at the repository root. See [Configuring Sourcebot](#configuring-sourcebot) for more information. 5. Create a `config.json` file at the repository root. See [Configuring Sourcebot](#configuring-sourcebot) for more information.
6. (Optional) Depending on your `config.json`, you may need to pass an access token to Sourcebot: 6. Start Sourcebot with the command:
<div>
<details>
<summary>
<picture>
<source media="(prefers-color-scheme: dark)" srcset=".github/images/github-favicon-inverted.png">
<img src="https://github.com/favicon.ico" width="16" height="16" alt="GitHub icon">
</picture>
GitHub
</summary>
First, generate a personal access token (PAT). See [Providing an access token](#providing-an-access-token).
Next, Create a text file named `.github-token` **in your home directory** and paste the token in it. The file should look like:
```sh
ghp_...
```
zoekt will [read this file](https://github.com/sourcebot-dev/zoekt/blob/6a5753692b46e669f851ab23211e756a3677185d/cmd/zoekt-mirror-github/main.go#L60) to authenticate with GitHub.
</details>
<details>
<summary>
<img src="https://gitlab.com/favicon.ico" width="16" height="16" /> GitLab
</summary>
First, generate a personal access token (PAT). See [Providing an access token](#providing-an-access-token).
Next, Create a text file named `.gitlab-token` **in your home directory** and paste the token in it. The file should look like:
```sh
glpat-...
```
zoekt will [read this file](https://github.com/sourcebot-dev/zoekt/blob/11b7713f1fb511073c502c41cea413d616f7761f/cmd/zoekt-mirror-gitlab/main.go#L43) to authenticate with GitLab.
</details>
</div>
7. Start Sourcebot with the command:
```sh ```sh
yarn dev yarn dev
``` ```
A `.sourcebot` directory will be created and zoekt will begin to index the repositories found given `config.json`. A `.sourcebot` directory will be created and zoekt will begin to index the repositories found given `config.json`.
8. Start searching at `http://localhost:3000`. 7. Start searching at `http://localhost:3000`.
## Telemetry ## Telemetry

39
configs/auth.json Normal file
View file

@ -0,0 +1,39 @@
{
"$schema": "../schemas/v2/index.json",
"repos": [
// Authenticate using a token directly in the config.
// Private and public repositories will be included.
{
"type": "github",
"token": "ghp_token1234",
"orgs": [
"my-org"
]
},
{
"type": "gitlab",
"token": "glpat-1234",
"groups": [
"my-group"
]
},
// You can also store the token in a environment variable and then
// references it from the config.
{
"type": "github",
"token": {
"env": "GITHUB_TOKEN_ENV_VAR"
}
},
{
"type": "gitlab",
"token": {
"env": "GITLAB_TOKEN_ENV_VAR"
},
"groups": [
"my-group"
]
}
]
}

42
configs/basic.json Normal file
View file

@ -0,0 +1,42 @@
{
"$schema": "../schemas/v2/index.json",
// Note: to include private repositories, you must provide an authentication token.
// See: configs/auth.json for a example.
"repos": [
// From GitHub, include:
// - all public repos owned by user `torvalds`
// - all public repos owned by organization `commai`
// - repo `sourcebot-dev/sourcebot`
{
"type": "github",
"token": "my-token",
"users": [
"torvalds"
],
"orgs": [
"commaai"
],
"repos": [
"sourcebot-dev/sourcebot"
]
},
// From GitLab, include:
// - all public projects owned by user `brendan67`
// - all public projects in group `my-group` and sub-group `sub-group`
// - project `my-group/project1`
{
"type": "gitlab",
"token": "my-token",
"users": [
"brendan67"
],
"groups": [
"my-group",
"my-other-group/sub-group"
],
"projects": [
"my-group/project1"
]
}
]
}

42
configs/filter.json Normal file
View file

@ -0,0 +1,42 @@
{
"$schema": "../schemas/v2/index.json",
"repos": [
// Include all repos in my-org, except:
// - repo1 & repo2
// - repos that are archived or forks
{
"type": "github",
"token": "my-token",
"orgs": [
"my-org"
],
"exclude": {
"archived": true,
"forks": true,
"repos": [
"my-org/repo1",
"my-org/repo2"
]
}
},
// Include all projects in my-group, except:
// - project1 & project2
// - projects that are archived or forks
{
"type": "gitlab",
"token": "my-token",
"groups": [
"my-group"
],
"exclude": {
"archived": true,
"forks": true,
"projects": [
"my-group/project1",
"my-group/project2"
]
}
}
]
}

19
configs/self-hosted.json Normal file
View file

@ -0,0 +1,19 @@
{
"$schema": "../schemas/v2/index.json",
"repos": [
{
"type": "github",
"url": "https://github.example.com",
"orgs": [
"my-org-name"
]
},
{
"type": "gitlab",
"url": "https://gitlab.example.com",
"groups": [
"my-group"
]
}
]
}

View file

@ -1,10 +1,11 @@
{ {
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/index.json", "$schema": "./schemas/v2/index.json",
"Configs": [ "repos": [
{ {
"Type": "github", "type": "github",
"GitHubOrg": "sourcebot-dev", "repos": [
"Name": "^sourcebot$" "sourcebot-dev/sourcebot"
]
} }
] ]
} }

View file

@ -1,41 +1,18 @@
{ {
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/index.json", "$schema": "./schemas/v2/index.json",
"Configs": [ "repos": [
{ {
"Type": "github", "type": "github",
"GitHubUser": "torvalds", "repos": [
"Name": "linux" "torvalds/linux",
}, "pytorch/pytorch",
{ "commaai/openpilot",
"Type": "github", "ggerganov/whisper.cpp",
"GitHubOrg": "pytorch", "ggerganov/llama.cpp",
"Name": "pytorch" "codemirror/dev",
}, "tailwindlabs/tailwindcss",
{ "sourcebot-dev/sourcebot"
"Type": "github", ]
"GitHubOrg": "commaai",
"Name": "^(openpilot|tinygrad)$",
"IncludeForks": true
},
{
"Type": "github",
"GitHubUser": "ggerganov",
"Name": "^(whisper\\.cpp|llama\\.cpp)$"
},
{
"Type": "github",
"GitHubOrg": "codemirror",
"Name": "^(dev|lang-.*)$"
},
{
"Type": "github",
"GitHubOrg": "tailwindlabs",
"Name": "^tailwindcss$"
},
{
"Type": "github",
"GitHubOrg": "sourcebot-dev",
"Name": "^sourcebot$"
} }
] ]
} }

View file

@ -46,36 +46,6 @@ fi
echo -e "\e[34m[Info] Using config file at: '$CONFIG_PATH'.\e[0m" echo -e "\e[34m[Info] Using config file at: '$CONFIG_PATH'.\e[0m"
# Check if GITHUB_TOKEN is set
if [ -n "$GITHUB_TOKEN" ]; then
echo "$GITHUB_TOKEN" > "$HOME/.github-token"
chmod 600 "$HOME/.github-token"
# Configure Git with the provided GITHUB_TOKEN
echo -e "\e[34m[Info] Configuring GitHub credentials with hostname '$GITHUB_HOSTNAME'.\e[0m"
echo "machine ${GITHUB_HOSTNAME}
login oauth
password ${GITHUB_TOKEN}" >> "$HOME/.netrc"
chmod 600 "$HOME/.netrc"
else
echo -e "\e[34m[Info] Private GitHub repositories will not be indexed since GITHUB_TOKEN was not set.\e[0m"
fi
# Check if GITLAB_TOKEN is set
if [ -n "$GITLAB_TOKEN" ]; then
echo "$GITLAB_TOKEN" > "$HOME/.gitlab-token"
chmod 600 "$HOME/.gitlab-token"
# Configure Git with the provided GITLAB_TOKEN
echo -e "\e[34m[Info] Configuring GitLab credentials with hostname '$GITLAB_HOSTNAME'.\e[0m"
echo "machine ${GITLAB_HOSTNAME}
login oauth
password ${GITLAB_TOKEN}" >> "$HOME/.netrc"
chmod 600 "$HOME/.netrc"
else
echo -e "\e[34m[Info] GitLab repositories will not be indexed since GITLAB_TOKEN was not set.\e[0m"
fi
# Update nextjs public env variables w/o requiring a rebuild. # Update nextjs public env variables w/o requiring a rebuild.
# @see: https://phase.dev/blog/nextjs-public-runtime-variables/ # @see: https://phase.dev/blog/nextjs-public-runtime-variables/
@ -89,7 +59,7 @@ if [ -z "$NEXT_PUBLIC_SOURCEBOT_VERSION" ] && [ ! -z "$SOURCEBOT_VERSION" ]; the
export NEXT_PUBLIC_SOURCEBOT_VERSION="$SOURCEBOT_VERSION" export NEXT_PUBLIC_SOURCEBOT_VERSION="$SOURCEBOT_VERSION"
fi fi
find /app/public /app/.next -type f -name "*.js" | find /app/packages/web/public /app/packages/web/.next -type f -name "*.js" |
while read file; do while read file; do
sed -i "s|BAKED_NEXT_PUBLIC_SOURCEBOT_TELEMETRY_DISABLED|${NEXT_PUBLIC_SOURCEBOT_TELEMETRY_DISABLED}|g" "$file" sed -i "s|BAKED_NEXT_PUBLIC_SOURCEBOT_TELEMETRY_DISABLED|${NEXT_PUBLIC_SOURCEBOT_TELEMETRY_DISABLED}|g" "$file"
sed -i "s|BAKED_NEXT_PUBLIC_SOURCEBOT_VERSION|${NEXT_PUBLIC_SOURCEBOT_VERSION}|g" "$file" sed -i "s|BAKED_NEXT_PUBLIC_SOURCEBOT_VERSION|${NEXT_PUBLIC_SOURCEBOT_VERSION}|g" "$file"

View file

@ -1,86 +0,0 @@
{
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/index.json",
"Configs": [
// ~~~~~~~~~~~~ GitHub Examples ~~~~~~~~~~~~
// Index all repos in organization "my-org".
{
"Type": "github",
"GitHubOrg": "my-org"
},
// Index all repos in self-hosted GitHub instance.
// @note: the environment variable GITHUB_HOSTNAME must be set. See README.
{
"Type": "github",
"GitHubUrl": "https://github.example.com"
},
// Index all repos in user "my-user".
{
"Type": "github",
"GitHubUser": "my-user"
},
// Index repos foo & bar in organization "my-org".
{
"Type": "github",
"GitHubOrg": "my-org",
"Name": "^(foo|bar)$"
},
// Index all repos except foo & bar in organization "my-org".
{
"Type": "github",
"GitHubOrg": "my-org",
"Exclude": "^(foo|bar)$"
},
// Index all repos that contain topic "topic_a" or "topic_b" in organization "my-org".
{
"Type": "github",
"GitHubOrg": "my-org",
"Topics": ["topic_a", "topic_b"]
},
// Index all repos that _do not_ contain "topic_x" and "topic_y" in organization "my-org".
{
"Type": "github",
"GitHubOrg": "my-org",
"ExcludeTopics": ["topic_x", "topic_y"]
},
// Index all repos in organization, including forks in "my-org".
{
"Type": "github",
"GitHubOrg": "my-org",
"IncludeForks": true /* default: false */
},
// Index all repos in organization, excluding repos that are archived in "my-org".
{
"Type": "github",
"GitHubOrg": "my-org",
"NoArchived": true /* default: false */
}
// ~~~~~~~~~~~~ GitLab Examples ~~~~~~~~~~~~
// Index all repos visible to the GITLAB_TOKEN.
{
"Type": "gitlab"
},
// Index all repos visible to the GITLAB_TOKEN (custom GitLab URL).
// @note: the environment variable GITLAB_HOSTNAME must also be set. See README.
{
"Type": "gitlab",
"GitLabURL": "https://gitlab.example.com"
},
// Index all repos (public only) visible to the GITLAB_TOKEN.
{
"Type": "gitlab",
"OnlyPublic": true
},
// Index only the repos foo & bar.
{
"Type": "gitlab",
"Name": "^(foo|bar)$"
},
// Index all repos except fizz & buzz visible to the GITLAB_TOKEN.
{
"Type": "gitlab",
"Exclude": "^(fizz|buzz)$"
},
]
}

View file

@ -1,83 +1,16 @@
{ {
"name": "sourcebot", "private": true,
"version": "0.1.0", "workspaces": [
"private": true, "packages/*"
"scripts": { ],
"dev": "npm-run-all --print-label --parallel next:dev zoekt:webserver zoekt:indexserver", "scripts": {
"zoekt:webserver": "export PATH=\"$PWD/bin:$PATH\" && zoekt-webserver -index .sourcebot/index -rpc", "build": "yarn workspaces run build",
"zoekt:indexserver": "export PATH=\"$PWD/bin:$PATH\" && export CTAGS_COMMAND=ctags && zoekt-indexserver -data_dir .sourcebot -mirror_config config.json", "dev": "npm-run-all --print-label --parallel dev:zoekt dev:backend dev:web",
"next:dev": "next dev", "dev:zoekt": "export PATH=\"$PWD/bin:$PATH\" && zoekt-webserver -index .sourcebot/index -rpc",
"build": "next build", "dev:backend": "yarn workspace @sourcebot/backend dev:watch",
"start": "next start", "dev:web": "yarn workspace @sourcebot/web dev"
"lint": "next lint" },
}, "devDependencies": {
"dependencies": { "npm-run-all": "^4.1.5"
"@codemirror/commands": "^6.6.0", }
"@codemirror/lang-cpp": "^6.0.2",
"@codemirror/lang-css": "^6.3.0",
"@codemirror/lang-go": "^6.0.1",
"@codemirror/lang-html": "^6.4.9",
"@codemirror/lang-java": "^6.0.1",
"@codemirror/lang-javascript": "^6.2.2",
"@codemirror/lang-json": "^6.0.1",
"@codemirror/lang-markdown": "^6.2.5",
"@codemirror/lang-php": "^6.0.1",
"@codemirror/lang-python": "^6.1.6",
"@codemirror/lang-rust": "^6.0.1",
"@codemirror/lang-sql": "^6.7.1",
"@codemirror/search": "^6.5.6",
"@codemirror/state": "^6.4.1",
"@codemirror/view": "^6.33.0",
"@hookform/resolvers": "^3.9.0",
"@radix-ui/react-dropdown-menu": "^2.1.1",
"@radix-ui/react-icons": "^1.3.0",
"@radix-ui/react-label": "^2.1.0",
"@radix-ui/react-navigation-menu": "^1.2.0",
"@radix-ui/react-scroll-area": "^1.1.0",
"@radix-ui/react-separator": "^1.1.0",
"@radix-ui/react-slot": "^1.1.0",
"@replit/codemirror-lang-csharp": "^6.2.0",
"@replit/codemirror-vim": "^6.2.1",
"@tanstack/react-query": "^5.53.3",
"@tanstack/react-table": "^8.20.5",
"@uiw/react-codemirror": "^4.23.0",
"class-variance-authority": "^0.7.0",
"client-only": "^0.0.1",
"clsx": "^2.1.1",
"embla-carousel-auto-scroll": "^8.3.0",
"embla-carousel-react": "^8.3.0",
"escape-string-regexp": "^5.0.0",
"http-status-codes": "^2.3.0",
"lucide-react": "^0.435.0",
"next": "14.2.10",
"next-themes": "^0.3.0",
"posthog-js": "^1.161.5",
"pretty-bytes": "^6.1.1",
"react": "^18",
"react-dom": "^18",
"react-hook-form": "^7.53.0",
"react-hotkeys-hook": "^4.5.1",
"react-resizable-panels": "^2.1.1",
"server-only": "^0.0.1",
"sharp": "^0.33.5",
"tailwind-merge": "^2.5.2",
"tailwindcss-animate": "^1.0.7",
"usehooks-ts": "^3.1.0",
"zod": "^3.23.8"
},
"devDependencies": {
"@types/node": "^20",
"@types/react": "^18",
"@types/react-dom": "^18",
"@typescript-eslint/eslint-plugin": "^8.3.0",
"@typescript-eslint/parser": "^8.3.0",
"eslint": "^8",
"eslint-config-next": "14.2.6",
"eslint-plugin-react": "^7.35.0",
"eslint-plugin-react-hooks": "^4.6.2",
"npm-run-all": "^4.1.5",
"postcss": "^8",
"tailwindcss": "^3.4.1",
"typescript": "^5"
}
} }

1
packages/backend/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
dist/

View file

@ -0,0 +1,30 @@
{
"name": "@sourcebot/backend",
"version": "0.1.0",
"private": true,
"main": "index.js",
"type": "module",
"scripts": {
"dev:watch": "yarn generate:types && tsc-watch --preserveWatchOutput --onSuccess \"yarn dev --configPath ../../config.json --cacheDir ../../.sourcebot\"",
"dev": "export PATH=\"$PWD/../../bin:$PATH\" && export CTAGS_COMMAND=ctags && node ./dist/index.js",
"build": "yarn generate:types && tsc",
"generate:types": "tsx tools/generateTypes.ts"
},
"devDependencies": {
"@types/argparse": "^2.0.16",
"@types/node": "^22.7.5",
"json-schema-to-typescript": "^15.0.2",
"tsc-watch": "^6.2.0",
"tsx": "^4.19.1",
"typescript": "^5.6.2"
},
"dependencies": {
"@gitbeaker/rest": "^40.5.1",
"@octokit/rest": "^21.0.2",
"argparse": "^2.0.1",
"lowdb": "^7.0.1",
"simple-git": "^3.27.0",
"strip-json-comments": "^5.0.1",
"winston": "^3.15.0"
}
}

View file

@ -0,0 +1,10 @@
/**
* The interval to reindex a given repository.
*/
export const REINDEX_INTERVAL_MS = 1000 * 60 * 60;
/**
* The interval to re-sync the config.
*/
export const RESYNC_CONFIG_INTERVAL_MS = 1000 * 60 * 60 * 24;

View file

@ -0,0 +1,28 @@
import { JSONFilePreset } from "lowdb/node";
import { type Low } from "lowdb";
import { AppContext, Repository } from "./types.js";
type Schema = {
repos: {
[key: string]: Repository;
}
}
export type Database = Low<Schema>;
export const loadDB = async (ctx: AppContext): Promise<Database> => {
const db = await JSONFilePreset<Schema>(`${ctx.cachePath}/db.json`, { repos: {} });
return db;
}
export const updateRepository = async (repoId: string, data: Partial<Repository>, db: Database) => {
db.data.repos[repoId] = {
...db.data.repos[repoId],
...data,
}
await db.write();
}
export const createRepository = async (repo: Repository, db: Database) => {
db.data.repos[repo.id] = repo;
await db.write();
}

View file

@ -0,0 +1,6 @@
export const getEnv = (env: string | undefined, defaultValue = '') => {
return env ?? defaultValue;
}
export const SOURCEBOT_LOG_LEVEL = getEnv(process.env.SOURCEBOT_LOG_LEVEL, 'info');

View file

@ -0,0 +1,51 @@
import { Repository } from './types.js';
import { simpleGit, SimpleGitProgressEvent } from 'simple-git';
import { existsSync } from 'fs';
import { createLogger } from './logger.js';
const logger = createLogger('git');
export const cloneRepository = async (repo: Repository, onProgress?: (event: SimpleGitProgressEvent) => void) => {
if (existsSync(repo.path)) {
logger.warn(`${repo.id} already exists. Skipping clone.`)
return;
}
const git = simpleGit({
progress: onProgress,
});
const gitConfig = Object.entries(repo.gitConfigMetadata ?? {}).flatMap(
([key, value]) => ['--config', `${key}=${value}`]
);
await git.clone(
repo.cloneUrl,
repo.path,
[
"--bare",
...gitConfig
]
);
await git.cwd({
path: repo.path,
}).addConfig("remote.origin.fetch", "+refs/heads/*:refs/heads/*");
}
export const fetchRepository = async (repo: Repository, onProgress?: (event: SimpleGitProgressEvent) => void) => {
const git = simpleGit({
progress: onProgress,
});
await git.cwd({
path: repo.path,
}).fetch(
"origin",
[
"--prune",
"--progress"
]
);
}

View file

@ -0,0 +1,195 @@
import { Octokit } from "@octokit/rest";
import { GitHubConfig } from "./schemas/v2.js";
import { createLogger } from "./logger.js";
import { AppContext, Repository } from "./types.js";
import path from 'path';
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool } from "./utils.js";
const logger = createLogger("GitHub");
type OctokitRepository = {
name: string,
full_name: string,
fork: boolean,
private: boolean,
html_url: string,
clone_url?: string,
stargazers_count?: number,
watchers_count?: number,
subscribers_count?: number,
forks_count?: number,
archived?: boolean,
}
export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: AbortSignal, ctx: AppContext) => {
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
const octokit = new Octokit({
auth: token,
...(config.url ? {
baseUrl: `${config.url}/api/v3`
} : {}),
});
let allRepos: OctokitRepository[] = [];
if (config.orgs) {
const _repos = await getReposForOrgs(config.orgs, octokit, signal);
allRepos = allRepos.concat(_repos);
}
if (config.repos) {
const _repos = await getRepos(config.repos, octokit, signal);
allRepos = allRepos.concat(_repos);
}
if (config.users) {
const isAuthenticated = config.token !== undefined;
const _repos = await getReposOwnedByUsers(config.users, isAuthenticated, octokit, signal);
allRepos = allRepos.concat(_repos);
}
// Marshall results to our type
let repos: Repository[] = allRepos
.filter((repo) => {
if (!repo.clone_url) {
logger.warn(`Repository ${repo.name} missing property 'clone_url'. Excluding.`)
return false;
}
return true;
})
.map((repo) => {
const hostname = config.url ? new URL(config.url).hostname : 'github.com';
const repoId = `${hostname}/${repo.full_name}`;
const repoPath = path.resolve(path.join(ctx.reposPath, `${repoId}.git`));
const cloneUrl = new URL(repo.clone_url!);
if (token) {
cloneUrl.username = token;
}
return {
name: repo.full_name,
id: repoId,
cloneUrl: cloneUrl.toString(),
path: repoPath,
isStale: false,
isFork: repo.fork,
isArchived: !!repo.archived,
gitConfigMetadata: {
'zoekt.web-url-type': 'github',
'zoekt.web-url': repo.html_url,
'zoekt.name': repoId,
'zoekt.github-stars': (repo.stargazers_count ?? 0).toString(),
'zoekt.github-watchers': (repo.watchers_count ?? 0).toString(),
'zoekt.github-subscribers': (repo.subscribers_count ?? 0).toString(),
'zoekt.github-forks': (repo.forks_count ?? 0).toString(),
'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork),
'zoekt.public': marshalBool(repo.private === false)
}
} satisfies Repository;
});
if (config.exclude) {
if (!!config.exclude.forks) {
repos = excludeForkedRepos(repos, logger);
}
if (!!config.exclude.archived) {
repos = excludeArchivedRepos(repos, logger);
}
if (config.exclude.repos) {
repos = excludeReposByName(repos, config.exclude.repos, logger);
}
}
logger.debug(`Found ${repos.length} total repositories.`);
return repos;
}
const getReposOwnedByUsers = async (users: string[], isAuthenticated: boolean, octokit: Octokit, signal: AbortSignal) => {
// @todo : error handling
const repos = (await Promise.all(users.map(async (user) => {
logger.debug(`Fetching repository info for user ${user}...`);
const start = Date.now();
const result = await (() => {
if (isAuthenticated) {
return octokit.paginate(octokit.repos.listForAuthenticatedUser, {
username: user,
visibility: 'all',
affiliation: 'owner',
per_page: 100,
request: {
signal,
},
});
} else {
return octokit.paginate(octokit.repos.listForUser, {
username: user,
per_page: 100,
request: {
signal,
},
});
}
})();
const duration = Date.now() - start;
logger.debug(`Found ${result.length} owned by user ${user} in ${duration}ms.`);
return result;
}))).flat();
return repos;
}
const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSignal) => {
// @todo : error handling
const repos = (await Promise.all(orgs.map(async (org) => {
logger.debug(`Fetching repository info for org ${org}...`);
const start = Date.now();
const result = await octokit.paginate(octokit.repos.listForOrg, {
org: org,
per_page: 100,
request: {
signal
}
});
const duration = Date.now() - start;
logger.debug(`Found ${result.length} in org ${org} in ${duration}ms.`);
return result;
}))).flat();
return repos;
}
const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSignal) => {
// @todo : error handling
const repos = await Promise.all(repoList.map(async (repo) => {
logger.debug(`Fetching repository info for ${repo}...`);
const start = Date.now();
const [owner, repoName] = repo.split('/');
const result = await octokit.repos.get({
owner,
repo: repoName,
request: {
signal
}
});
const duration = Date.now() - start;
logger.debug(`Found info for repository ${repo} in ${duration}ms`);
return result.data;
}));
return repos;
}

View file

@ -0,0 +1,114 @@
import { Gitlab, ProjectSchema } from "@gitbeaker/rest";
import { GitLabConfig } from "./schemas/v2.js";
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool, measure } from "./utils.js";
import { createLogger } from "./logger.js";
import { AppContext, Repository } from "./types.js";
import path from 'path';
const logger = createLogger("GitLab");
export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppContext) => {
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
const api = new Gitlab({
...(config.token ? {
token,
} : {}),
...(config.url ? {
host: config.url,
} : {}),
});
let allProjects: ProjectSchema[] = [];
if (config.groups) {
const _projects = (await Promise.all(config.groups.map(async (group) => {
logger.debug(`Fetching project info for group ${group}...`);
const { durationMs, data } = await measure(() => api.Groups.allProjects(group, {
perPage: 100,
owned: true,
}));
logger.debug(`Found ${data.length} projects in group ${group} in ${durationMs}ms.`);
return data;
}))).flat();
allProjects = allProjects.concat(_projects);
}
if (config.users) {
const _projects = (await Promise.all(config.users.map(async (user) => {
logger.debug(`Fetching project info for user ${user}...`);
const { durationMs, data } = await measure(() => api.Users.allProjects(user, {
perPage: 100,
owned: true,
}));
logger.debug(`Found ${data.length} projects owned by user ${user} in ${durationMs}ms.`);
return data;
}))).flat();
allProjects = allProjects.concat(_projects);
}
if (config.projects) {
const _projects = await Promise.all(config.projects.map(async (project) => {
logger.debug(`Fetching project info for project ${project}...`);
const { durationMs, data } = await measure(() => api.Projects.show(project));
logger.debug(`Found project ${project} in ${durationMs}ms.`);
return data;
}));
allProjects = allProjects.concat(_projects);
}
let repos: Repository[] = allProjects
.map((project) => {
const hostname = config.url ? new URL(config.url).hostname : "gitlab.com";
const repoId = `${hostname}/${project.path_with_namespace}`;
const repoPath = path.resolve(path.join(ctx.reposPath, `${repoId}.git`))
const isFork = project.forked_from_project !== undefined;
const cloneUrl = new URL(project.http_url_to_repo);
if (token) {
cloneUrl.username = 'oauth2';
cloneUrl.password = token;
}
return {
name: project.path_with_namespace,
id: repoId,
cloneUrl: cloneUrl.toString(),
path: repoPath,
isStale: false,
isFork,
isArchived: project.archived,
gitConfigMetadata: {
'zoekt.web-url-type': 'gitlab',
'zoekt.web-url': project.web_url,
'zoekt.name': repoId,
'zoekt.gitlab-stars': project.star_count.toString(),
'zoekt.gitlab-forks': project.forks_count.toString(),
'zoekt.archived': marshalBool(project.archived),
'zoekt.fork': marshalBool(isFork),
'zoekt.public': marshalBool(project.visibility === 'public'),
}
} satisfies Repository;
});
if (config.exclude) {
if (!!config.exclude.forks) {
repos = excludeForkedRepos(repos, logger);
}
if (!!config.exclude.archived) {
repos = excludeArchivedRepos(repos, logger);
}
if (config.exclude.projects) {
repos = excludeReposByName(repos, config.exclude.projects, logger);
}
}
logger.debug(`Found ${repos.length} total repositories.`);
return repos;
}

View file

@ -0,0 +1,235 @@
import { ArgumentParser } from "argparse";
import { mkdir, readFile } from 'fs/promises';
import { existsSync, watch } from 'fs';
import { exec } from "child_process";
import path from 'path';
import { SourcebotConfigurationSchema } from "./schemas/v2.js";
import { getGitHubReposFromConfig } from "./github.js";
import { getGitLabReposFromConfig } from "./gitlab.js";
import { AppContext, Repository } from "./types.js";
import { cloneRepository, fetchRepository } from "./git.js";
import { createLogger } from "./logger.js";
import { createRepository, Database, loadDB, updateRepository } from './db.js';
import { measure } from "./utils.js";
import { REINDEX_INTERVAL_MS, RESYNC_CONFIG_INTERVAL_MS } from "./constants.js";
import stripJsonComments from 'strip-json-comments';
const logger = createLogger('main');
const parser = new ArgumentParser({
description: "Sourcebot backend tool",
});
type Arguments = {
configPath: string;
cacheDir: string;
}
const indexRepository = async (repo: Repository, ctx: AppContext) => {
return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
exec(`zoekt-git-index -index ${ctx.indexPath} ${repo.path}`, (error, stdout, stderr) => {
if (error) {
reject(error);
return;
}
resolve({
stdout,
stderr
});
})
});
}
const syncConfig = async (configPath: string, db: Database, signal: AbortSignal, ctx: AppContext) => {
const configContent = await readFile(configPath, {
encoding: 'utf-8',
signal,
});
// @todo: we should validate the configuration file's structure here.
const config = JSON.parse(stripJsonComments(configContent)) as SourcebotConfigurationSchema;
// Fetch all repositories from the config file
let configRepos: Repository[] = [];
for (const repoConfig of config.repos ?? []) {
switch (repoConfig.type) {
case 'github': {
const gitHubRepos = await getGitHubReposFromConfig(repoConfig, signal, ctx);
configRepos.push(...gitHubRepos);
break;
}
case 'gitlab': {
const gitLabRepos = await getGitLabReposFromConfig(repoConfig, ctx);
configRepos.push(...gitLabRepos);
break;
}
}
}
// De-duplicate on id
configRepos.sort((a, b) => {
return a.id.localeCompare(b.id);
});
configRepos = configRepos.filter((item, index, self) => {
if (index === 0) return true;
if (item.id === self[index - 1].id) {
logger.debug(`Duplicate repository ${item.id} found in config file.`);
return false;
}
return true;
});
logger.info(`Discovered ${configRepos.length} unique repositories from config.`);
// Merge the repositories into the database
for (const newRepo of configRepos) {
if (newRepo.id in db.data.repos) {
await updateRepository(newRepo.id, newRepo, db);
} else {
await createRepository(newRepo, db);
}
}
// Find repositories that are in the database, but not in the configuration file
{
const a = configRepos.map(repo => repo.id);
const b = Object.keys(db.data.repos);
const diff = b.filter(x => !a.includes(x));
for (const id of diff) {
await db.update(({ repos }) => {
const repo = repos[id];
if (repo.isStale) {
return;
}
logger.warn(`Repository ${id} is no longer listed in the configuration file or was not found. Marking as stale.`);
repo.isStale = true;
});
}
}
}
(async () => {
parser.add_argument("--configPath", {
help: "Path to config file",
required: true,
});
parser.add_argument("--cacheDir", {
help: "Path to .sourcebot cache directory",
required: true,
});
const args = parser.parse_args() as Arguments;
if (!existsSync(args.configPath)) {
console.error(`Config file ${args.configPath} does not exist`);
process.exit(1);
}
const cacheDir = args.cacheDir;
const reposPath = path.join(cacheDir, 'repos');
const indexPath = path.join(cacheDir, 'index');
if (!existsSync(reposPath)) {
await mkdir(reposPath, { recursive: true });
}
if (!existsSync(indexPath)) {
await mkdir(indexPath, { recursive: true });
}
const context: AppContext = {
indexPath,
reposPath,
cachePath: cacheDir,
configPath: args.configPath,
}
const db = await loadDB(context);
let abortController = new AbortController();
let isSyncing = false;
const _syncConfig = () => {
if (isSyncing) {
abortController.abort();
abortController = new AbortController();
}
logger.info(`Syncing configuration file ${args.configPath} ...`);
isSyncing = true;
measure(() => syncConfig(args.configPath, db, abortController.signal, context))
.then(({ durationMs }) => {
logger.info(`Synced configuration file ${args.configPath} in ${durationMs / 1000}s`);
isSyncing = false;
})
.catch((err) => {
if (err.name === "AbortError") {
// @note: If we're aborting, we don't want to set isSyncing to false
// since it implies another sync is in progress.
} else {
isSyncing = false;
logger.error(`Failed to sync configuration file ${args.configPath} with error:\n`, err);
}
});
}
// Re-sync on file changes
watch(args.configPath, () => {
logger.info(`Config file ${args.configPath} changed. Re-syncing...`);
_syncConfig();
});
// Re-sync every 24 hours
setInterval(() => {
logger.info(`Re-syncing configuration file ${args.configPath}`);
_syncConfig();
}, RESYNC_CONFIG_INTERVAL_MS);
// Sync immediately on startup
_syncConfig();
while (true) {
const repos = db.data.repos;
for (const [_, repo] of Object.entries(repos)) {
const lastIndexed = repo.lastIndexedDate ? new Date(repo.lastIndexedDate) : new Date(0);
if (
repo.isStale ||
lastIndexed.getTime() > Date.now() - REINDEX_INTERVAL_MS
) {
continue;
}
try {
if (existsSync(repo.path)) {
logger.info(`Fetching ${repo.id}...`);
const { durationMs } = await measure(() => fetchRepository(repo, ({ method, stage , progress}) => {
logger.info(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
}));
process.stdout.write('\n');
logger.info(`Fetched ${repo.id} in ${durationMs / 1000}s`);
} else {
logger.info(`Cloning ${repo.id}...`);
const { durationMs } = await measure(() => cloneRepository(repo, ({ method, stage, progress }) => {
logger.info(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
}));
process.stdout.write('\n');
logger.info(`Cloned ${repo.id} in ${durationMs / 1000}s`);
}
logger.info(`Indexing ${repo.id}...`);
const { durationMs } = await measure(() => indexRepository(repo, context));
logger.info(`Indexed ${repo.id} in ${durationMs / 1000}s`);
} catch (err: any) {
// @todo : better error handling here..
logger.error(err);
continue;
}
await db.update(({ repos }) => repos[repo.id].lastIndexedDate = new Date().toUTCString());
}
await new Promise(resolve => setTimeout(resolve, 1000));
}
})();

View file

@ -0,0 +1,38 @@
import winston, { format } from 'winston';
import { SOURCEBOT_LOG_LEVEL } from './environment.js';
const { combine, colorize, timestamp, prettyPrint, errors, printf, label: labelFn } = format;
const createLogger = (label: string) => {
return winston.createLogger({
// @todo: Make log level configurable
level: SOURCEBOT_LOG_LEVEL,
format: combine(
errors({ stack: true }),
timestamp(),
prettyPrint(),
labelFn({
label: label,
})
),
transports: [
new winston.transports.Console({
format: combine(
errors({ stack: true }),
colorize(),
printf(({ level, message, timestamp, stack, label: _label }) => {
const label = `[${_label}] `;
if (stack) {
return `${timestamp} ${level}: ${label}${message}\n${stack}`;
}
return `${timestamp} ${level}: ${label}${message}`;
}),
),
}),
]
});
}
export {
createLogger
};

View file

@ -0,0 +1,108 @@
// THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY!
export type Repos = GitHubConfig | GitLabConfig;
/**
* A Sourcebot configuration file outlines which repositories Sourcebot should sync and index.
*/
export interface SourcebotConfigurationSchema {
$schema?: string;
/**
* Defines a collection of repositories from varying code hosts that Sourcebot should sync with.
*/
repos?: Repos[];
}
export interface GitHubConfig {
/**
* GitHub Configuration
*/
type: "github";
/**
* A Personal Access Token (PAT).
*/
token?:
| string
| {
/**
* The name of the environment variable that contains the token.
*/
env: string;
};
/**
* The URL of the GitHub host. Defaults to https://github.com
*/
url?: string;
/**
* List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property.
*/
users?: string[];
/**
* List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property.
*/
orgs?: string[];
/**
* List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'.
*/
repos?: string[];
exclude?: {
/**
* Exlcude forked repositories from syncing.
*/
forks?: boolean;
/**
* Exlcude archived repositories from syncing.
*/
archived?: boolean;
/**
* List of individual repositories to exclude from syncing. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'.
*/
repos?: string[];
};
}
export interface GitLabConfig {
/**
* GitLab Configuration
*/
type: "gitlab";
/**
* An authentication token.
*/
token?:
| string
| {
/**
* The name of the environment variable that contains the token.
*/
env: string;
};
/**
* The URL of the GitLab host. Defaults to https://gitlab.com
*/
url?: string;
/**
* List of users to sync with. All personal projects that the user owns will be synced, unless explicitly defined in the `exclude` property.
*/
users?: string[];
/**
* List of groups to sync with. All projects in the group visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property. Subgroups can be specified by providing the path to the subgroup (e.g. `my-group/sub-group-a`).
*/
groups?: string[];
/**
* List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/
*/
projects?: string[];
exclude?: {
/**
* Exlcude forked projects from syncing.
*/
forks?: boolean;
/**
* Exlcude archived projects from syncing.
*/
archived?: boolean;
/**
* List of individual projects to exclude from syncing. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/
*/
projects?: string[];
};
}

View file

@ -0,0 +1,46 @@
export type Repository = {
/**
* Name of the repository (e.g., 'sourcebot-dev/sourcebot')
*/
name: string;
/**
* The unique identifier for the repository. (e.g., `github.com/sourcebot-dev/sourcebot`)
*/
id: string;
/**
* The .git url for the repository
*/
cloneUrl: string;
/**
* Path to where the repository is cloned
*/
path: string;
gitConfigMetadata?: Record<string, string>;
lastIndexedDate?: string;
isStale: boolean;
isFork: boolean;
isArchived: boolean;
}
export type AppContext = {
/**
* Path to the repos cache directory.
*/
reposPath: string;
/**
* Path to the index cache directory;
*/
indexPath: string;
cachePath: string;
configPath: string;
}

View file

@ -0,0 +1,58 @@
import { Logger } from "winston";
import { AppContext, Repository } from "./types.js";
export const measure = async <T>(cb : () => Promise<T>) => {
const start = Date.now();
const data = await cb();
const durationMs = Date.now() - start;
return {
data,
durationMs
}
}
export const marshalBool = (value?: boolean) => {
return !!value ? '1' : '0';
}
export const excludeForkedRepos = (repos: Repository[], logger?: Logger) => {
return repos.filter((repo) => {
if (repo.isFork) {
logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.forks is true`);
return false;
}
return true;
});
}
export const excludeArchivedRepos = (repos: Repository[], logger?: Logger) => {
return repos.filter((repo) => {
if (repo.isArchived) {
logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.archived is true`);
return false;
}
return true;
});
}
export const excludeReposByName = (repos: Repository[], excludedRepoNames: string[], logger?: Logger) => {
const excludedRepos = new Set(excludedRepoNames);
return repos.filter((repo) => {
if (excludedRepos.has(repo.name)) {
logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.repos contains ${repo.name}`);
return false;
}
return true;
});
}
export const getTokenFromConfig = (token: string | { env: string }, ctx: AppContext) => {
if (typeof token === 'string') {
return token;
}
const tokenValue = process.env[token.env];
if (!tokenValue) {
throw new Error(`The environment variable '${token.env}' was referenced in ${ctx.configPath}, but was not set.`);
}
return tokenValue;
}

View file

@ -0,0 +1,22 @@
import { compileFromFile } from 'json-schema-to-typescript'
import path from 'path';
import fs from 'fs';
const BANNER_COMMENT = '// THIS IS A AUTO-GENERATED FILE. DO NOT MODIFY MANUALLY!\n';
(async () => {
const cwd = process.cwd();
const schemaPath = path.resolve(`${cwd}/../../schemas/v2/index.json`);
const outputPath = path.resolve(`${cwd}/src/schemas/v2.ts`);
const content = await compileFromFile(schemaPath, {
bannerComment: BANNER_COMMENT,
cwd,
});
await fs.promises.writeFile(
outputPath,
content,
"utf-8"
);
})();

View file

@ -0,0 +1,26 @@
{
"compilerOptions": {
"outDir": "dist",
"incremental": true,
"declaration": true,
"emitDecoratorMetadata": true,
"esModuleInterop": true,
"experimentalDecorators": true,
"forceConsistentCasingInFileNames": true,
"isolatedModules": true,
"module": "Node16",
"moduleResolution": "Node16",
"target": "ES2022",
"noEmitOnError": false,
"noImplicitAny": true,
"noUnusedLocals": false,
"pretty": true,
"resolveJsonModule": true,
"skipLibCheck": true,
"strict": true,
"sourceMap": true,
"inlineSources": true
},
"include": ["src/index.ts"],
"exclude": ["node_modules"]
}

42
packages/web/.gitignore vendored Normal file
View file

@ -0,0 +1,42 @@
# Created by https://www.toptal.com/developers/gitignore/api/nextjs
# Edit at https://www.toptal.com/developers/gitignore?templates=nextjs
### NextJS ###
# dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*
# local env files
.env*.local
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts
# End of https://www.toptal.com/developers/gitignore/api/nextjs
!.env

80
packages/web/package.json Normal file
View file

@ -0,0 +1,80 @@
{
"name": "@sourcebot/web",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"@codemirror/commands": "^6.6.0",
"@codemirror/lang-cpp": "^6.0.2",
"@codemirror/lang-css": "^6.3.0",
"@codemirror/lang-go": "^6.0.1",
"@codemirror/lang-html": "^6.4.9",
"@codemirror/lang-java": "^6.0.1",
"@codemirror/lang-javascript": "^6.2.2",
"@codemirror/lang-json": "^6.0.1",
"@codemirror/lang-markdown": "^6.2.5",
"@codemirror/lang-php": "^6.0.1",
"@codemirror/lang-python": "^6.1.6",
"@codemirror/lang-rust": "^6.0.1",
"@codemirror/lang-sql": "^6.7.1",
"@codemirror/search": "^6.5.6",
"@codemirror/state": "^6.4.1",
"@codemirror/view": "^6.33.0",
"@hookform/resolvers": "^3.9.0",
"@radix-ui/react-dropdown-menu": "^2.1.1",
"@radix-ui/react-icons": "^1.3.0",
"@radix-ui/react-label": "^2.1.0",
"@radix-ui/react-navigation-menu": "^1.2.0",
"@radix-ui/react-scroll-area": "^1.1.0",
"@radix-ui/react-separator": "^1.1.0",
"@radix-ui/react-slot": "^1.1.0",
"@replit/codemirror-lang-csharp": "^6.2.0",
"@replit/codemirror-vim": "^6.2.1",
"@tanstack/react-query": "^5.53.3",
"@tanstack/react-table": "^8.20.5",
"@uiw/react-codemirror": "^4.23.0",
"class-variance-authority": "^0.7.0",
"client-only": "^0.0.1",
"clsx": "^2.1.1",
"embla-carousel-auto-scroll": "^8.3.0",
"embla-carousel-react": "^8.3.0",
"escape-string-regexp": "^5.0.0",
"http-status-codes": "^2.3.0",
"lucide-react": "^0.435.0",
"next": "14.2.10",
"next-themes": "^0.3.0",
"posthog-js": "^1.161.5",
"pretty-bytes": "^6.1.1",
"react": "^18",
"react-dom": "^18",
"react-hook-form": "^7.53.0",
"react-hotkeys-hook": "^4.5.1",
"react-resizable-panels": "^2.1.1",
"server-only": "^0.0.1",
"sharp": "^0.33.5",
"tailwind-merge": "^2.5.2",
"tailwindcss-animate": "^1.0.7",
"usehooks-ts": "^3.1.0",
"zod": "^3.23.8"
},
"devDependencies": {
"@types/node": "^20",
"@types/react": "^18",
"@types/react-dom": "^18",
"@typescript-eslint/eslint-plugin": "^8.3.0",
"@typescript-eslint/parser": "^8.3.0",
"eslint": "^8",
"eslint-config-next": "14.2.6",
"eslint-plugin-react": "^7.35.0",
"eslint-plugin-react-hooks": "^4.6.2",
"npm-run-all": "^4.1.5",
"postcss": "^8",
"tailwindcss": "^3.4.1",
"typescript": "^5"
}
}

View file

Before

Width:  |  Height:  |  Size: 822 B

After

Width:  |  Height:  |  Size: 822 B

View file

Before

Width:  |  Height:  |  Size: 573 B

After

Width:  |  Height:  |  Size: 573 B

View file

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 24 KiB

View file

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

View file

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View file

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 22 KiB

View file

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 24 KiB

View file

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 23 KiB

View file

Before

Width:  |  Height:  |  Size: 4.2 KiB

After

Width:  |  Height:  |  Size: 4.2 KiB

Some files were not shown because too many files have changed in this diff Show more