From 4353d2008aefd895d4ac8583ad7eb60db4418d47 Mon Sep 17 00:00:00 2001 From: Brendan Kellam Date: Fri, 13 Dec 2024 12:34:02 -0800 Subject: [PATCH] Add `autoDeleteStaleRepos` config option (#128) --- CHANGELOG.md | 1 + packages/backend/package.json | 1 + packages/backend/src/constants.ts | 1 + packages/backend/src/db.test.ts | 33 ++++++++- packages/backend/src/db.ts | 23 ++++-- packages/backend/src/github.ts | 6 +- packages/backend/src/gitlab.ts | 6 +- packages/backend/src/main.test.ts | 109 ++++++++++++++++++++++++++++- packages/backend/src/main.ts | 80 +++++++++++++++++++-- packages/backend/src/schemas/v2.ts | 4 ++ packages/backend/src/types.ts | 1 + schemas/v2/index.json | 5 ++ yarn.lock | 39 +++++++++++ 13 files changed, 292 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bd9a436..1ac98b90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Made language suggestions case insensitive. ([#124](https://github.com/sourcebot-dev/sourcebot/pull/124)) +- Stale repositories are now automatically deleted from the index. This can be configured via `settings.autoDeleteStaleRepos` in the config. ([#128](https://github.com/sourcebot-dev/sourcebot/pull/128)) ## [2.6.1] - 2024-12-09 diff --git a/packages/backend/package.json b/packages/backend/package.json index c0ae5542..2514cd85 100644 --- a/packages/backend/package.json +++ b/packages/backend/package.json @@ -28,6 +28,7 @@ "cross-fetch": "^4.0.0", "dotenv": "^16.4.5", "gitea-js": "^1.22.0", + "glob": "^11.0.0", "lowdb": "^7.0.1", "micromatch": "^4.0.8", "posthog-node": "^4.2.1", diff --git a/packages/backend/src/constants.ts b/packages/backend/src/constants.ts index 579b0b16..01a2ffc5 100644 --- a/packages/backend/src/constants.ts +++ b/packages/backend/src/constants.ts @@ -15,4 +15,5 @@ export const RESYNC_CONFIG_INTERVAL_MS = 1000 * 60 * 60 * 24; */ export const DEFAULT_SETTINGS: Settings = { maxFileSize: 2 * 1024 * 1024, // 2MB in bytes + autoDeleteStaleRepos: true, } \ No newline at end of file diff --git a/packages/backend/src/db.test.ts b/packages/backend/src/db.test.ts index fc03eccf..ed5d9391 100644 --- a/packages/backend/src/db.test.ts +++ b/packages/backend/src/db.test.ts @@ -1,8 +1,23 @@ import { expect, test } from 'vitest'; -import { migration_addMaxFileSize, migration_addSettings, Schema } from './db'; +import { DEFAULT_DB_DATA, migration_addDeleteStaleRepos, migration_addMaxFileSize, migration_addSettings, Schema } from './db'; import { DEFAULT_SETTINGS } from './constants'; import { DeepPartial } from './types'; +import { Low } from 'lowdb'; +class InMemoryAdapter { + private data: T; + async read() { + return this.data; + } + async write(data: T) { + this.data = data; + } +} + +export const createMockDB = (defaultData: Schema = DEFAULT_DB_DATA) => { + const db = new Low(new InMemoryAdapter(), defaultData); + return db; +} test('migration_addSettings adds the `settings` field with defaults if it does not exist', () => { const schema: DeepPartial = {}; @@ -29,4 +44,20 @@ test('migration_addMaxFileSize adds the `maxFileSize` field with the default val test('migration_addMaxFileSize will throw if `settings` is not defined', () => { const schema: DeepPartial = {}; expect(() => migration_addMaxFileSize(schema as Schema)).toThrow(); +}); + +test('migration_addDeleteStaleRepos adds the `autoDeleteStaleRepos` field with the default value if it does not exist', () => { + const schema: DeepPartial = { + settings: { + maxFileSize: DEFAULT_SETTINGS.maxFileSize, + }, + } + + const migratedSchema = migration_addDeleteStaleRepos(schema as Schema); + expect(migratedSchema).toStrictEqual({ + settings: { + maxFileSize: DEFAULT_SETTINGS.maxFileSize, + autoDeleteStaleRepos: DEFAULT_SETTINGS.autoDeleteStaleRepos, + } + }); }); \ No newline at end of file diff --git a/packages/backend/src/db.ts b/packages/backend/src/db.ts index a9e60364..3eed377b 100644 --- a/packages/backend/src/db.ts +++ b/packages/backend/src/db.ts @@ -13,13 +13,15 @@ export type Schema = { } } +export const DEFAULT_DB_DATA: Schema = { + repos: {}, + settings: DEFAULT_SETTINGS, +} + export type Database = Low; export const loadDB = async (ctx: AppContext): Promise => { - const db = await JSONFilePreset(`${ctx.cachePath}/db.json`, { - repos: {}, - settings: DEFAULT_SETTINGS, - }); + const db = await JSONFilePreset(`${ctx.cachePath}/db.json`, DEFAULT_DB_DATA); await applyMigrations(db); @@ -53,6 +55,7 @@ export const applyMigrations = async (db: Database) => { // @NOTE: please ensure new migrations are added after older ones! schema = migration_addSettings(schema, log); schema = migration_addMaxFileSize(schema, log); + schema = migration_addDeleteStaleRepos(schema, log); return schema; }); } @@ -78,5 +81,17 @@ export const migration_addMaxFileSize = (schema: Schema, log?: (name: string) => schema.settings.maxFileSize = DEFAULT_SETTINGS.maxFileSize; } + return schema; +} + +/** + * @see: https://github.com/sourcebot-dev/sourcebot/pull/128 + */ +export const migration_addDeleteStaleRepos = (schema: Schema, log?: (name: string) => void) => { + if (schema.settings.autoDeleteStaleRepos === undefined) { + log?.("deleteStaleRepos"); + schema.settings.autoDeleteStaleRepos = DEFAULT_SETTINGS.autoDeleteStaleRepos; + } + return schema; } \ No newline at end of file diff --git a/packages/backend/src/github.ts b/packages/backend/src/github.ts index cf28f463..498dda6c 100644 --- a/packages/backend/src/github.ts +++ b/packages/backend/src/github.ts @@ -100,7 +100,8 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo }); if (config.topics) { - repos = includeReposByTopic(repos, config.topics, logger); + const topics = config.topics.map(topic => topic.toLowerCase()); + repos = includeReposByTopic(repos, topics, logger); } if (config.exclude) { @@ -117,7 +118,8 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo } if (config.exclude.topics) { - repos = excludeReposByTopic(repos, config.exclude.topics, logger); + const topics = config.exclude.topics.map(topic => topic.toLowerCase()); + repos = excludeReposByTopic(repos, topics, logger); } } diff --git a/packages/backend/src/gitlab.ts b/packages/backend/src/gitlab.ts index e5ca8f54..3ac1803e 100644 --- a/packages/backend/src/gitlab.ts +++ b/packages/backend/src/gitlab.ts @@ -115,7 +115,8 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon }); if (config.topics) { - repos = includeReposByTopic(repos, config.topics, logger); + const topics = config.topics.map(topic => topic.toLowerCase()); + repos = includeReposByTopic(repos, topics, logger); } if (config.exclude) { @@ -132,7 +133,8 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon } if (config.exclude.topics) { - repos = excludeReposByTopic(repos, config.exclude.topics, logger); + const topics = config.exclude.topics.map(topic => topic.toLowerCase()); + repos = excludeReposByTopic(repos, topics, logger); } } diff --git a/packages/backend/src/main.test.ts b/packages/backend/src/main.test.ts index 37e009a0..312f535a 100644 --- a/packages/backend/src/main.test.ts +++ b/packages/backend/src/main.test.ts @@ -1,6 +1,29 @@ -import { expect, test } from 'vitest'; -import { isAllRepoReindexingRequired, isRepoReindexingRequired } from './main'; -import { Repository, Settings } from './types'; +import { expect, test, vi } from 'vitest'; +import { deleteStaleRepository, isAllRepoReindexingRequired, isRepoReindexingRequired } from './main'; +import { AppContext, GitRepository, LocalRepository, Repository, Settings } from './types'; +import { DEFAULT_DB_DATA } from './db'; +import { createMockDB } from './db.test'; +import { rm } from 'fs/promises'; +import path from 'path'; +import { glob } from 'glob'; + +vi.mock('fs/promises', () => ({ + rm: vi.fn(), +})); + +vi.mock('glob', () => ({ + glob: vi.fn().mockReturnValue(['fake_index.zoekt']), +})); + +const createMockContext = (rootPath: string = '/app') => { + return { + configPath: path.join(rootPath, 'config.json'), + cachePath: path.join(rootPath, '.sourcebot'), + indexPath: path.join(rootPath, '.sourcebot/index'), + reposPath: path.join(rootPath, '.sourcebot/repos'), + } satisfies AppContext; +} + test('isRepoReindexingRequired should return false when no changes are made', () => { const previous: Repository = { @@ -80,6 +103,7 @@ test('isRepoReindexingRequired should return true when local excludedPaths chang test('isAllRepoReindexingRequired should return false when fileLimitSize has not changed', () => { const previous: Settings = { maxFileSize: 1000, + autoDeleteStaleRepos: true, } const current: Settings = { ...previous, @@ -90,10 +114,89 @@ test('isAllRepoReindexingRequired should return false when fileLimitSize has not test('isAllRepoReindexingRequired should return true when fileLimitSize has changed', () => { const previous: Settings = { maxFileSize: 1000, + autoDeleteStaleRepos: true, } const current: Settings = { ...previous, maxFileSize: 2000, } expect(isAllRepoReindexingRequired(previous, current)).toBe(true); +}); + +test('isAllRepoReindexingRequired should return false when autoDeleteStaleRepos has changed', () => { + const previous: Settings = { + maxFileSize: 1000, + autoDeleteStaleRepos: true, + } + const current: Settings = { + ...previous, + autoDeleteStaleRepos: false, + } + expect(isAllRepoReindexingRequired(previous, current)).toBe(false); +}); + +test('deleteStaleRepository can delete a git repository', async () => { + const ctx = createMockContext(); + + const repo: GitRepository = { + id: 'github.com/sourcebot-dev/sourcebot', + vcs: 'git', + name: 'sourcebot', + cloneUrl: 'https://github.com/sourcebot-dev/sourcebot', + path: `${ctx.reposPath}/github.com/sourcebot-dev/sourcebot`, + branches: ['main'], + tags: [''], + isStale: true, + } + + const db = createMockDB({ + ...DEFAULT_DB_DATA, + repos: { + 'github.com/sourcebot-dev/sourcebot': repo, + } + }); + + + await deleteStaleRepository(repo, db, ctx); + + expect(db.data.repos['github.com/sourcebot-dev/sourcebot']).toBeUndefined();; + expect(rm).toHaveBeenCalledWith(`${ctx.reposPath}/github.com/sourcebot-dev/sourcebot`, { + recursive: true, + }); + expect(glob).toHaveBeenCalledWith(`github.com%2Fsourcebot-dev%2Fsourcebot*.zoekt`, { + cwd: ctx.indexPath, + absolute: true + }); + expect(rm).toHaveBeenCalledWith(`fake_index.zoekt`); +}); + +test('deleteStaleRepository can delete a local repository', async () => { + const ctx = createMockContext(); + + const repo: LocalRepository = { + vcs: 'local', + name: 'UnrealEngine', + id: '/path/to/UnrealEngine', + path: '/path/to/UnrealEngine', + watch: false, + excludedPaths: [], + isStale: true, + } + + const db = createMockDB({ + ...DEFAULT_DB_DATA, + repos: { + '/path/to/UnrealEngine': repo, + } + }); + + await deleteStaleRepository(repo, db, ctx); + + expect(db.data.repos['/path/to/UnrealEngine']).toBeUndefined(); + expect(rm).not.toHaveBeenCalledWith('/path/to/UnrealEngine'); + expect(glob).toHaveBeenCalledWith(`UnrealEngine*.zoekt`, { + cwd: ctx.indexPath, + absolute: true + }); + expect(rm).toHaveBeenCalledWith('fake_index.zoekt'); }); \ No newline at end of file diff --git a/packages/backend/src/main.ts b/packages/backend/src/main.ts index 50064d92..2942b003 100644 --- a/packages/backend/src/main.ts +++ b/packages/backend/src/main.ts @@ -1,4 +1,4 @@ -import { readFile } from 'fs/promises'; +import { readFile, rm } from 'fs/promises'; import { existsSync, watch } from 'fs'; import { SourcebotConfigurationSchema } from "./schemas/v2.js"; import { getGitHubReposFromConfig } from "./github.js"; @@ -15,6 +15,8 @@ import stripJsonComments from 'strip-json-comments'; import { indexGitRepository, indexLocalRepository } from "./zoekt.js"; import { getLocalRepoFromConfig, initLocalRepoFileWatchers } from "./local.js"; import { captureEvent } from "./posthog.js"; +import { glob } from 'glob'; +import path from 'path'; const logger = createLogger('main'); @@ -67,6 +69,67 @@ const syncLocalRepository = async (repo: LocalRepository, settings: Settings, ct } } +export const deleteStaleRepository = async (repo: Repository, db: Database, ctx: AppContext) => { + logger.info(`Deleting stale repository ${repo.id}:`); + + // Delete the checked out git repository (if applicable) + if (repo.vcs === "git") { + logger.info(`\tDeleting git directory ${repo.path}...`); + await rm(repo.path, { + recursive: true + }); + } + + // Delete all .zoekt index files + { + // .zoekt index files are named with the repository name, + // index version, and shard number. Some examples: + // + // git repos: + // github.com%2Fsourcebot-dev%2Fsourcebot_v16.00000.zoekt + // gitlab.com%2Fmy-org%2Fmy-project.00000.zoekt + // + // local repos: + // UnrealEngine_v16.00000.zoekt + // UnrealEngine_v16.00001.zoekt + // ... + // UnrealEngine_v16.00016.zoekt + // + // Notice that local repos are named with the repository basename and + // git repos are named with the query-encoded repository name. Form a + // glob pattern with the correct prefix & suffix to match the correct + // index file(s) for the repository. + // + // @see : https://github.com/sourcegraph/zoekt/blob/c03b77fbf18b76904c0e061f10f46597eedd7b14/build/builder.go#L348 + const indexFilesGlobPattern = (() => { + switch (repo.vcs) { + case 'git': + return `${encodeURIComponent(repo.id)}*.zoekt`; + case 'local': + return `${path.basename(repo.path)}*.zoekt`; + } + })(); + + const indexFiles = await glob(indexFilesGlobPattern, { + cwd: ctx.indexPath, + absolute: true + }); + + await Promise.all(indexFiles.map((file) => { + logger.info(`\tDeleting index file ${file}...`); + return rm(file); + })); + } + + // Delete db entry + logger.info(`\tDeleting db entry...`); + await db.update(({ repos }) => { + delete repos[repo.id]; + }); + + logger.info(`Deleted stale repository ${repo.id}`); +} + /** * Certain configuration changes (e.g., a branch is added) require * a reindexing of the repository. @@ -137,6 +200,7 @@ const syncConfig = async (configPath: string, db: Database, signal: AbortSignal, // Update the settings const updatedSettings: Settings = { maxFileSize: config.settings?.maxFileSize ?? DEFAULT_SETTINGS.maxFileSize, + autoDeleteStaleRepos: config.settings?.autoDeleteStaleRepos ?? DEFAULT_SETTINGS.autoDeleteStaleRepos, } const _isAllRepoReindexingRequired = isAllRepoReindexingRequired(db.data.settings, updatedSettings); await updateSettings(updatedSettings, db); @@ -292,10 +356,16 @@ export const main = async (context: AppContext) => { for (const [_, repo] of Object.entries(repos)) { const lastIndexed = repo.lastIndexedDate ? new Date(repo.lastIndexedDate) : new Date(0); - if ( - repo.isStale || - lastIndexed.getTime() > Date.now() - REINDEX_INTERVAL_MS - ) { + if (repo.isStale) { + if (db.data.settings.autoDeleteStaleRepos) { + await deleteStaleRepository(repo, db, context); + } else { + // skip deletion... + } + continue; + } + + if (lastIndexed.getTime() > Date.now() - REINDEX_INTERVAL_MS) { continue; } diff --git a/packages/backend/src/schemas/v2.ts b/packages/backend/src/schemas/v2.ts index f519a55d..0cbc4aa0 100644 --- a/packages/backend/src/schemas/v2.ts +++ b/packages/backend/src/schemas/v2.ts @@ -21,6 +21,10 @@ export interface Settings { * The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be inexed. Defaults to 2MB (2097152 bytes). */ maxFileSize?: number; + /** + * Automatically delete stale repositories from the index. Defaults to true. + */ + autoDeleteStaleRepos?: boolean; } export interface GitHubConfig { /** diff --git a/packages/backend/src/types.ts b/packages/backend/src/types.ts index 7f4a51cc..97ded5d8 100644 --- a/packages/backend/src/types.ts +++ b/packages/backend/src/types.ts @@ -45,6 +45,7 @@ export type AppContext = { export type Settings = { maxFileSize: number; + autoDeleteStaleRepos: boolean; } // @see : https://stackoverflow.com/a/61132308 diff --git a/schemas/v2/index.json b/schemas/v2/index.json index 9f86a104..649dbab0 100644 --- a/schemas/v2/index.json +++ b/schemas/v2/index.json @@ -529,6 +529,11 @@ "description": "The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be inexed. Defaults to 2MB (2097152 bytes).", "default": 2097152, "minimum": 1 + }, + "autoDeleteStaleRepos": { + "type": "boolean", + "description": "Automatically delete stale repositories from the index. Defaults to true.", + "default": true } }, "additionalProperties": false diff --git a/yarn.lock b/yarn.lock index 2f8b140b..b78c004c 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3360,6 +3360,18 @@ glob@^10.3.10, glob@^10.3.12: package-json-from-dist "^1.0.0" path-scurry "^1.11.1" +glob@^11.0.0: + version "11.0.0" + resolved "https://registry.yarnpkg.com/glob/-/glob-11.0.0.tgz#6031df0d7b65eaa1ccb9b29b5ced16cea658e77e" + integrity sha512-9UiX/Bl6J2yaBbxKoEBRm4Cipxgok8kQYcOPEhScPwebu2I0HoQOuYdIO6S3hLuWoZgpDpwQZMzTFxgpkyT76g== + dependencies: + foreground-child "^3.1.0" + jackspeak "^4.0.1" + minimatch "^10.0.0" + minipass "^7.1.2" + package-json-from-dist "^1.0.0" + path-scurry "^2.0.0" + glob@^7.1.3: version "7.2.3" resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.3.tgz#b8df0fb802bbfa8e89bd1d938b4e16578ed44f2b" @@ -3815,6 +3827,13 @@ jackspeak@^3.1.2: optionalDependencies: "@pkgjs/parseargs" "^0.11.0" +jackspeak@^4.0.1: + version "4.0.2" + resolved "https://registry.yarnpkg.com/jackspeak/-/jackspeak-4.0.2.tgz#11f9468a3730c6ff6f56823a820d7e3be9bef015" + integrity sha512-bZsjR/iRjl1Nk1UkjGpAzLNfQtzuijhn2g+pbZb98HQ1Gk8vM9hfbxeMBP+M2/UUdwj0RqGG3mlvk2MsAqwvEw== + dependencies: + "@isaacs/cliui" "^8.0.2" + jiti@^1.21.0: version "1.21.6" resolved "https://registry.yarnpkg.com/jiti/-/jiti-1.21.6.tgz#6c7f7398dd4b3142767f9a168af2f317a428d268" @@ -4026,6 +4045,11 @@ lru-cache@^10.2.0: resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-10.4.3.tgz#410fc8a17b70e598013df257c2446b7f3383f119" integrity sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ== +lru-cache@^11.0.0: + version "11.0.2" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-11.0.2.tgz#fbd8e7cf8211f5e7e5d91905c415a3f55755ca39" + integrity sha512-123qHRfJBmo2jXDbo/a5YOQrJoHF/GNQTLzQ5+IdK5pWpceK17yRc6ozlWd25FxvGKQbIUs91fDFkXmDHTKcyA== + lucide-react@^0.435.0: version "0.435.0" resolved "https://registry.yarnpkg.com/lucide-react/-/lucide-react-0.435.0.tgz#88c5cc6de61b89e42cbef309a38f100deee1bb32" @@ -4080,6 +4104,13 @@ minimatch@9.0.3: dependencies: brace-expansion "^2.0.1" +minimatch@^10.0.0: + version "10.0.1" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-10.0.1.tgz#ce0521856b453c86e25f2c4c0d03e6ff7ddc440b" + integrity sha512-ethXTt3SGGR+95gudmqJ1eNhRO7eGEGIgYA9vnPatK4/etz2MEVDno5GMCibdMTuBMyElzIlgxMna3K94XDIDQ== + dependencies: + brace-expansion "^2.0.1" + minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.1, minimatch@^3.1.2: version "3.1.2" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" @@ -4383,6 +4414,14 @@ path-scurry@^1.10.1, path-scurry@^1.11.1: lru-cache "^10.2.0" minipass "^5.0.0 || ^6.0.2 || ^7.0.0" +path-scurry@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/path-scurry/-/path-scurry-2.0.0.tgz#9f052289f23ad8bf9397a2a0425e7b8615c58580" + integrity sha512-ypGJsmGtdXUOeM5u93TyeIEfEhM6s+ljAhrk5vAvSx8uyY/02OvrZnA0YNGUrPXfpJMgI1ODd3nwz8Npx4O4cg== + dependencies: + lru-cache "^11.0.0" + minipass "^7.1.2" + path-type@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/path-type/-/path-type-3.0.0.tgz#cef31dc8e0a1a3bb0d105c0cd97cf3bf47f4e36f"