Add tenant ID concept into web app and backend (#160)

* hacked together a example of using zoekt grpc api

* provide tenant id to zoekt git indexer

* update zoekt version to point to multitenant branch

* pipe tenant id through header to zoekt

* remove incorrect submodule reference and settings typo

* update zoekt commit

* remove unused yarn script

* remove unused grpc client in web server

* remove unneeded deps and improve tenant id log

* pass tenant id when creating repo in db

* add mt yarn script

* add nocheckin comment to tenant id in v2 schema

---------

Co-authored-by: bkellam <bshizzle1234@gmail.com>
This commit is contained in:
Michael Sukkarieh 2025-01-14 16:46:36 -08:00 committed by GitHub
parent 3c3140e2d6
commit 553f5d25f7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 73 additions and 4 deletions

View file

@ -7,7 +7,9 @@
"build": "yarn workspaces run build",
"test": "yarn workspaces run test",
"dev": "npm-run-all --print-label --parallel dev:zoekt dev:backend dev:web",
"dev:zoekt": "export PATH=\"$PWD/bin:$PATH\" && zoekt-webserver -index .sourcebot/index -rpc",
"dev:mt": "npm-run-all --print-label --parallel dev:zoekt:mt dev:backend dev:web",
"dev:zoekt": "export PATH=\"$PWD/bin:$PATH\" && export SRC_TENANT_ENFORCEMENT_MODE=none && zoekt-webserver -index .sourcebot/index -rpc",
"dev:zoekt:mt": "export PATH=\"$PWD/bin:$PATH\" && export SRC_TENANT_ENFORCEMENT_MODE=strict && zoekt-webserver -index .sourcebot/index -rpc",
"dev:backend": "yarn workspace @sourcebot/backend dev:watch",
"dev:web": "yarn workspace @sourcebot/web dev"
},

View file

@ -35,6 +35,7 @@ export const syncConfig = async (configPath: string, db: PrismaClient, signal: A
const gitHubRepos = await getGitHubReposFromConfig(repoConfig, signal, ctx);
const hostUrl = repoConfig.url ?? 'https://github.com';
const hostname = repoConfig.url ? new URL(repoConfig.url).hostname : 'github.com';
const tenantId = repoConfig.tenantId ?? 0;
await Promise.all(gitHubRepos.map((repo) => {
const repoName = `${hostname}/${repo.full_name}`;
@ -51,6 +52,7 @@ export const syncConfig = async (configPath: string, db: PrismaClient, signal: A
name: repoName,
isFork: repo.fork,
isArchived: !!repo.archived,
tenantId: tenantId,
metadata: {
'zoekt.web-url-type': 'github',
'zoekt.web-url': repo.html_url,
@ -101,6 +103,7 @@ export const syncConfig = async (configPath: string, db: PrismaClient, signal: A
external_codeHostUrl: hostUrl,
cloneUrl: cloneUrl.toString(),
name: repoName,
tenantId: 0, // TODO: add support for tenantId in GitLab config
isFork,
isArchived: project.archived,
metadata: {

View file

@ -72,6 +72,10 @@ export interface GitHubConfig {
* @minItems 1
*/
topics?: string[];
/**
* @nocheckin
*/
tenantId?: number;
exclude?: {
/**
* Exclude forked repositories from syncing.

View file

@ -13,6 +13,7 @@ interface BaseRepository {
codeHost?: string;
topics?: string[];
sizeInBytes?: number;
tenantId?: number;
}
/**

View file

@ -10,9 +10,12 @@ export const indexGitRepository = async (repo: Repo, ctx: AppContext) => {
const revisions = [
'HEAD'
];
const tenantId = repo.tenantId ?? 0;
const shardPrefix = `${tenantId}_${repo.id}`;
const repoPath = getRepoPath(repo, ctx);
const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -file_limit ${DEFAULT_SETTINGS.maxFileSize} -branches ${revisions.join(',')} -shard_prefix ${repo.id} ${repoPath}`;
const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -file_limit ${DEFAULT_SETTINGS.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${tenantId} -shard_prefix ${shardPrefix} ${repoPath}`;
return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
exec(command, (error, stdout, stderr) => {

View file

@ -0,0 +1,30 @@
/*
Warnings:
- Added the required column `tenantId` to the `Repo` table without a default value. This is not possible if the table is not empty.
*/
-- RedefineTables
PRAGMA defer_foreign_keys=ON;
PRAGMA foreign_keys=OFF;
CREATE TABLE "new_Repo" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"name" TEXT NOT NULL,
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" DATETIME NOT NULL,
"indexedAt" DATETIME,
"isFork" BOOLEAN NOT NULL,
"isArchived" BOOLEAN NOT NULL,
"metadata" JSONB NOT NULL,
"cloneUrl" TEXT NOT NULL,
"tenantId" INTEGER NOT NULL,
"external_id" TEXT NOT NULL,
"external_codeHostType" TEXT NOT NULL,
"external_codeHostUrl" TEXT NOT NULL
);
INSERT INTO "new_Repo" ("cloneUrl", "createdAt", "external_codeHostType", "external_codeHostUrl", "external_id", "id", "indexedAt", "isArchived", "isFork", "metadata", "name", "updatedAt") SELECT "cloneUrl", "createdAt", "external_codeHostType", "external_codeHostUrl", "external_id", "id", "indexedAt", "isArchived", "isFork", "metadata", "name", "updatedAt" FROM "Repo";
DROP TABLE "Repo";
ALTER TABLE "new_Repo" RENAME TO "Repo";
CREATE UNIQUE INDEX "Repo_external_id_external_codeHostUrl_key" ON "Repo"("external_id", "external_codeHostUrl");
PRAGMA foreign_keys=ON;
PRAGMA defer_foreign_keys=OFF;

View file

@ -20,6 +20,7 @@ model Repo {
isArchived Boolean
metadata Json
cloneUrl String
tenantId Int
// The id of the repo in the external service
external_id String

View file

@ -8,13 +8,21 @@ import { NextRequest } from "next/server";
export const POST = async (request: NextRequest) => {
const body = await request.json();
const parsed = await searchRequestSchema.safeParseAsync(body);
const tenantId = await request.headers.get("X-Tenant-ID");
console.log(`Search request received. Tenant ID: ${tenantId}`);
const parsed = await searchRequestSchema.safeParseAsync({
...body,
...(tenantId && { tenantId: parseInt(tenantId) }),
});
if (!parsed.success) {
return serviceErrorResponse(
schemaValidationError(parsed.error)
);
}
const response = await search(parsed.data);
if (isServiceError(response)) {
return serviceErrorResponse(response);

View file

@ -4,6 +4,7 @@ export const searchRequestSchema = z.object({
query: z.string(),
maxMatchDisplayCount: z.number(),
whole: z.boolean().optional(),
tenantId: z.number().optional(),
});

View file

@ -34,7 +34,7 @@ const aliasPrefixMappings: Record<string, zoektPrefixes> = {
"revision:": zoektPrefixes.branch,
}
export const search = async ({ query, maxMatchDisplayCount, whole }: SearchRequest): Promise<SearchResponse | ServiceError> => {
export const search = async ({ query, maxMatchDisplayCount, whole, tenantId }: SearchRequest): Promise<SearchResponse | ServiceError> => {
// Replace any alias prefixes with their corresponding zoekt prefixes.
for (const [prefix, zoektPrefix] of Object.entries(aliasPrefixMappings)) {
query = query.replaceAll(prefix, zoektPrefix);
@ -53,9 +53,17 @@ export const search = async ({ query, maxMatchDisplayCount, whole }: SearchReque
}
});
let header: Record<string, string> = {};
if (tenantId) {
header = {
"X-Tenant-ID": tenantId.toString()
};
}
const searchResponse = await zoektFetch({
path: "/api/search",
body,
header,
method: "POST",
});

View file

@ -1,3 +1,4 @@
import { headers } from "next/headers";
import { ZOEKT_WEBSERVER_URL } from "../environment"
@ -5,6 +6,7 @@ interface ZoektRequest {
path: string,
body: string,
method: string,
header?: Record<string, string>,
cache?: RequestCache,
}
@ -12,6 +14,7 @@ export const zoektFetch = async ({
path,
body,
method,
header,
cache,
}: ZoektRequest) => {
const response = await fetch(
@ -19,6 +22,7 @@ export const zoektFetch = async ({
{
method,
headers: {
...header,
"Content-Type": "application/json",
},
body,

View file

@ -141,6 +141,10 @@
["docs", "core"]
]
},
"tenantId": {
"type": "number",
"description": "@nocheckin"
},
"exclude": {
"type": "object",
"properties": {