Add tenant ID concept into web app and backend (#160)

* hacked together a example of using zoekt grpc api

* provide tenant id to zoekt git indexer

* update zoekt version to point to multitenant branch

* pipe tenant id through header to zoekt

* remove incorrect submodule reference and settings typo

* update zoekt commit

* remove unused yarn script

* remove unused grpc client in web server

* remove unneeded deps and improve tenant id log

* pass tenant id when creating repo in db

* add mt yarn script

* add nocheckin comment to tenant id in v2 schema

---------

Co-authored-by: bkellam <bshizzle1234@gmail.com>
This commit is contained in:
Michael Sukkarieh 2025-01-14 16:46:36 -08:00 committed by GitHub
parent 3c3140e2d6
commit 553f5d25f7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 73 additions and 4 deletions

View file

@ -7,7 +7,9 @@
"build": "yarn workspaces run build", "build": "yarn workspaces run build",
"test": "yarn workspaces run test", "test": "yarn workspaces run test",
"dev": "npm-run-all --print-label --parallel dev:zoekt dev:backend dev:web", "dev": "npm-run-all --print-label --parallel dev:zoekt dev:backend dev:web",
"dev:zoekt": "export PATH=\"$PWD/bin:$PATH\" && zoekt-webserver -index .sourcebot/index -rpc", "dev:mt": "npm-run-all --print-label --parallel dev:zoekt:mt dev:backend dev:web",
"dev:zoekt": "export PATH=\"$PWD/bin:$PATH\" && export SRC_TENANT_ENFORCEMENT_MODE=none && zoekt-webserver -index .sourcebot/index -rpc",
"dev:zoekt:mt": "export PATH=\"$PWD/bin:$PATH\" && export SRC_TENANT_ENFORCEMENT_MODE=strict && zoekt-webserver -index .sourcebot/index -rpc",
"dev:backend": "yarn workspace @sourcebot/backend dev:watch", "dev:backend": "yarn workspace @sourcebot/backend dev:watch",
"dev:web": "yarn workspace @sourcebot/web dev" "dev:web": "yarn workspace @sourcebot/web dev"
}, },

View file

@ -35,6 +35,7 @@ export const syncConfig = async (configPath: string, db: PrismaClient, signal: A
const gitHubRepos = await getGitHubReposFromConfig(repoConfig, signal, ctx); const gitHubRepos = await getGitHubReposFromConfig(repoConfig, signal, ctx);
const hostUrl = repoConfig.url ?? 'https://github.com'; const hostUrl = repoConfig.url ?? 'https://github.com';
const hostname = repoConfig.url ? new URL(repoConfig.url).hostname : 'github.com'; const hostname = repoConfig.url ? new URL(repoConfig.url).hostname : 'github.com';
const tenantId = repoConfig.tenantId ?? 0;
await Promise.all(gitHubRepos.map((repo) => { await Promise.all(gitHubRepos.map((repo) => {
const repoName = `${hostname}/${repo.full_name}`; const repoName = `${hostname}/${repo.full_name}`;
@ -51,6 +52,7 @@ export const syncConfig = async (configPath: string, db: PrismaClient, signal: A
name: repoName, name: repoName,
isFork: repo.fork, isFork: repo.fork,
isArchived: !!repo.archived, isArchived: !!repo.archived,
tenantId: tenantId,
metadata: { metadata: {
'zoekt.web-url-type': 'github', 'zoekt.web-url-type': 'github',
'zoekt.web-url': repo.html_url, 'zoekt.web-url': repo.html_url,
@ -101,6 +103,7 @@ export const syncConfig = async (configPath: string, db: PrismaClient, signal: A
external_codeHostUrl: hostUrl, external_codeHostUrl: hostUrl,
cloneUrl: cloneUrl.toString(), cloneUrl: cloneUrl.toString(),
name: repoName, name: repoName,
tenantId: 0, // TODO: add support for tenantId in GitLab config
isFork, isFork,
isArchived: project.archived, isArchived: project.archived,
metadata: { metadata: {

View file

@ -72,6 +72,10 @@ export interface GitHubConfig {
* @minItems 1 * @minItems 1
*/ */
topics?: string[]; topics?: string[];
/**
* @nocheckin
*/
tenantId?: number;
exclude?: { exclude?: {
/** /**
* Exclude forked repositories from syncing. * Exclude forked repositories from syncing.

View file

@ -13,6 +13,7 @@ interface BaseRepository {
codeHost?: string; codeHost?: string;
topics?: string[]; topics?: string[];
sizeInBytes?: number; sizeInBytes?: number;
tenantId?: number;
} }
/** /**

View file

@ -10,9 +10,12 @@ export const indexGitRepository = async (repo: Repo, ctx: AppContext) => {
const revisions = [ const revisions = [
'HEAD' 'HEAD'
]; ];
const tenantId = repo.tenantId ?? 0;
const shardPrefix = `${tenantId}_${repo.id}`;
const repoPath = getRepoPath(repo, ctx); const repoPath = getRepoPath(repo, ctx);
const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -file_limit ${DEFAULT_SETTINGS.maxFileSize} -branches ${revisions.join(',')} -shard_prefix ${repo.id} ${repoPath}`; const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -file_limit ${DEFAULT_SETTINGS.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${tenantId} -shard_prefix ${shardPrefix} ${repoPath}`;
return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => { return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
exec(command, (error, stdout, stderr) => { exec(command, (error, stdout, stderr) => {

View file

@ -0,0 +1,30 @@
/*
Warnings:
- Added the required column `tenantId` to the `Repo` table without a default value. This is not possible if the table is not empty.
*/
-- RedefineTables
PRAGMA defer_foreign_keys=ON;
PRAGMA foreign_keys=OFF;
CREATE TABLE "new_Repo" (
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"name" TEXT NOT NULL,
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" DATETIME NOT NULL,
"indexedAt" DATETIME,
"isFork" BOOLEAN NOT NULL,
"isArchived" BOOLEAN NOT NULL,
"metadata" JSONB NOT NULL,
"cloneUrl" TEXT NOT NULL,
"tenantId" INTEGER NOT NULL,
"external_id" TEXT NOT NULL,
"external_codeHostType" TEXT NOT NULL,
"external_codeHostUrl" TEXT NOT NULL
);
INSERT INTO "new_Repo" ("cloneUrl", "createdAt", "external_codeHostType", "external_codeHostUrl", "external_id", "id", "indexedAt", "isArchived", "isFork", "metadata", "name", "updatedAt") SELECT "cloneUrl", "createdAt", "external_codeHostType", "external_codeHostUrl", "external_id", "id", "indexedAt", "isArchived", "isFork", "metadata", "name", "updatedAt" FROM "Repo";
DROP TABLE "Repo";
ALTER TABLE "new_Repo" RENAME TO "Repo";
CREATE UNIQUE INDEX "Repo_external_id_external_codeHostUrl_key" ON "Repo"("external_id", "external_codeHostUrl");
PRAGMA foreign_keys=ON;
PRAGMA defer_foreign_keys=OFF;

View file

@ -20,6 +20,7 @@ model Repo {
isArchived Boolean isArchived Boolean
metadata Json metadata Json
cloneUrl String cloneUrl String
tenantId Int
// The id of the repo in the external service // The id of the repo in the external service
external_id String external_id String

View file

@ -8,13 +8,21 @@ import { NextRequest } from "next/server";
export const POST = async (request: NextRequest) => { export const POST = async (request: NextRequest) => {
const body = await request.json(); const body = await request.json();
const parsed = await searchRequestSchema.safeParseAsync(body); const tenantId = await request.headers.get("X-Tenant-ID");
console.log(`Search request received. Tenant ID: ${tenantId}`);
const parsed = await searchRequestSchema.safeParseAsync({
...body,
...(tenantId && { tenantId: parseInt(tenantId) }),
});
if (!parsed.success) { if (!parsed.success) {
return serviceErrorResponse( return serviceErrorResponse(
schemaValidationError(parsed.error) schemaValidationError(parsed.error)
); );
} }
const response = await search(parsed.data); const response = await search(parsed.data);
if (isServiceError(response)) { if (isServiceError(response)) {
return serviceErrorResponse(response); return serviceErrorResponse(response);

View file

@ -4,6 +4,7 @@ export const searchRequestSchema = z.object({
query: z.string(), query: z.string(),
maxMatchDisplayCount: z.number(), maxMatchDisplayCount: z.number(),
whole: z.boolean().optional(), whole: z.boolean().optional(),
tenantId: z.number().optional(),
}); });

View file

@ -34,7 +34,7 @@ const aliasPrefixMappings: Record<string, zoektPrefixes> = {
"revision:": zoektPrefixes.branch, "revision:": zoektPrefixes.branch,
} }
export const search = async ({ query, maxMatchDisplayCount, whole }: SearchRequest): Promise<SearchResponse | ServiceError> => { export const search = async ({ query, maxMatchDisplayCount, whole, tenantId }: SearchRequest): Promise<SearchResponse | ServiceError> => {
// Replace any alias prefixes with their corresponding zoekt prefixes. // Replace any alias prefixes with their corresponding zoekt prefixes.
for (const [prefix, zoektPrefix] of Object.entries(aliasPrefixMappings)) { for (const [prefix, zoektPrefix] of Object.entries(aliasPrefixMappings)) {
query = query.replaceAll(prefix, zoektPrefix); query = query.replaceAll(prefix, zoektPrefix);
@ -53,9 +53,17 @@ export const search = async ({ query, maxMatchDisplayCount, whole }: SearchReque
} }
}); });
let header: Record<string, string> = {};
if (tenantId) {
header = {
"X-Tenant-ID": tenantId.toString()
};
}
const searchResponse = await zoektFetch({ const searchResponse = await zoektFetch({
path: "/api/search", path: "/api/search",
body, body,
header,
method: "POST", method: "POST",
}); });

View file

@ -1,3 +1,4 @@
import { headers } from "next/headers";
import { ZOEKT_WEBSERVER_URL } from "../environment" import { ZOEKT_WEBSERVER_URL } from "../environment"
@ -5,6 +6,7 @@ interface ZoektRequest {
path: string, path: string,
body: string, body: string,
method: string, method: string,
header?: Record<string, string>,
cache?: RequestCache, cache?: RequestCache,
} }
@ -12,6 +14,7 @@ export const zoektFetch = async ({
path, path,
body, body,
method, method,
header,
cache, cache,
}: ZoektRequest) => { }: ZoektRequest) => {
const response = await fetch( const response = await fetch(
@ -19,6 +22,7 @@ export const zoektFetch = async ({
{ {
method, method,
headers: { headers: {
...header,
"Content-Type": "application/json", "Content-Type": "application/json",
}, },
body, body,

View file

@ -141,6 +141,10 @@
["docs", "core"] ["docs", "core"]
] ]
}, },
"tenantId": {
"type": "number",
"description": "@nocheckin"
},
"exclude": { "exclude": {
"type": "object", "type": "object",
"properties": { "properties": {