From 4d358f94a2fd2b5f4b39ba70ed730b36b84392ff Mon Sep 17 00:00:00 2001 From: Brendan Kellam Date: Wed, 11 Dec 2024 14:17:57 -0800 Subject: [PATCH] Add `topics` and `exclude.topics` to GitHub & GitLab config (#121) --- CHANGELOG.md | 4 + configs/filter.json | 34 ++++++++ packages/backend/src/github.ts | 12 ++- packages/backend/src/gitlab.ts | 11 ++- packages/backend/src/schemas/v2.ts | 20 +++++ packages/backend/src/types.ts | 1 + packages/backend/src/utils.test.ts | 134 ++++++++++++++++++++++++++++- packages/backend/src/utils.ts | 35 +++++++- schemas/v2/index.json | 42 +++++++++ 9 files changed, 286 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 055c61aa..6bd9a436 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Added config support for filtering GitLab & GitHub repositories by topic. ([#121](https://github.com/sourcebot-dev/sourcebot/pull/121)) + ### Changed - Made language suggestions case insensitive. ([#124](https://github.com/sourcebot-dev/sourcebot/pull/124)) diff --git a/configs/filter.json b/configs/filter.json index ca6bd283..98e5881d 100644 --- a/configs/filter.json +++ b/configs/filter.json @@ -63,5 +63,39 @@ ] } }, + // Include all repos in my-org that have the topic + // "TypeScript" and do not have a topic that starts + // with "test-" + { + "type": "github", + "orgs": [ + "my-org" + ], + "topics": [ + "TypeScript" + ], + "exclude": { + "topics": [ + "test-**" + ] + } + }, + // Include all repos in my-group that have the topic + // "TypeScript" and do not have a topic that starts + // with "test-" + { + "type": "gitlab", + "groups": [ + "my-group" + ], + "topics": [ + "TypeScript" + ], + "exclude": { + "topics": [ + "test-**" + ] + } + } ] } \ No newline at end of file diff --git a/packages/backend/src/github.ts b/packages/backend/src/github.ts index d487208b..cf28f463 100644 --- a/packages/backend/src/github.ts +++ b/packages/backend/src/github.ts @@ -3,7 +3,7 @@ import { GitHubConfig } from "./schemas/v2.js"; import { createLogger } from "./logger.js"; import { AppContext, GitRepository } from "./types.js"; import path from 'path'; -import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool, measure } from "./utils.js"; +import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, excludeReposByTopic, getTokenFromConfig, includeReposByTopic, marshalBool, measure } from "./utils.js"; import micromatch from "micromatch"; const logger = createLogger("GitHub"); @@ -21,6 +21,7 @@ type OctokitRepository = { subscribers_count?: number, forks_count?: number, archived?: boolean, + topics?: string[], } export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: AbortSignal, ctx: AppContext) => { @@ -80,6 +81,7 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo isStale: false, isFork: repo.fork, isArchived: !!repo.archived, + topics: repo.topics ?? [], gitConfigMetadata: { 'zoekt.web-url-type': 'github', 'zoekt.web-url': repo.html_url, @@ -97,6 +99,10 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo } satisfies GitRepository; }); + if (config.topics) { + repos = includeReposByTopic(repos, config.topics, logger); + } + if (config.exclude) { if (!!config.exclude.forks) { repos = excludeForkedRepos(repos, logger); @@ -109,6 +115,10 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo if (config.exclude.repos) { repos = excludeReposByName(repos, config.exclude.repos, logger); } + + if (config.exclude.topics) { + repos = excludeReposByTopic(repos, config.exclude.topics, logger); + } } logger.debug(`Found ${repos.length} total repositories.`); diff --git a/packages/backend/src/gitlab.ts b/packages/backend/src/gitlab.ts index 58793c75..e5ca8f54 100644 --- a/packages/backend/src/gitlab.ts +++ b/packages/backend/src/gitlab.ts @@ -1,6 +1,6 @@ import { Gitlab, ProjectSchema } from "@gitbeaker/rest"; import { GitLabConfig } from "./schemas/v2.js"; -import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool, measure } from "./utils.js"; +import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, excludeReposByTopic, getTokenFromConfig, includeReposByTopic, marshalBool, measure } from "./utils.js"; import { createLogger } from "./logger.js"; import { AppContext, GitRepository } from "./types.js"; import path from 'path'; @@ -98,6 +98,7 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon isStale: false, isFork, isArchived: project.archived, + topics: project.topics ?? [], gitConfigMetadata: { 'zoekt.web-url-type': 'gitlab', 'zoekt.web-url': project.web_url, @@ -113,6 +114,10 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon } satisfies GitRepository; }); + if (config.topics) { + repos = includeReposByTopic(repos, config.topics, logger); + } + if (config.exclude) { if (!!config.exclude.forks) { repos = excludeForkedRepos(repos, logger); @@ -125,6 +130,10 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon if (config.exclude.projects) { repos = excludeReposByName(repos, config.exclude.projects, logger); } + + if (config.exclude.topics) { + repos = excludeReposByTopic(repos, config.exclude.topics, logger); + } } logger.debug(`Found ${repos.length} total repositories.`); diff --git a/packages/backend/src/schemas/v2.ts b/packages/backend/src/schemas/v2.ts index 9e95a264..f519a55d 100644 --- a/packages/backend/src/schemas/v2.ts +++ b/packages/backend/src/schemas/v2.ts @@ -54,6 +54,12 @@ export interface GitHubConfig { * List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'. */ repos?: string[]; + /** + * List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported. + * + * @minItems 1 + */ + topics?: [string, ...string[]]; exclude?: { /** * Exclude forked repositories from syncing. @@ -67,6 +73,10 @@ export interface GitHubConfig { * List of individual repositories to exclude from syncing. Glob patterns are supported. */ repos?: string[]; + /** + * List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported. + */ + topics?: string[]; }; revisions?: GitRevisions; } @@ -119,6 +129,12 @@ export interface GitLabConfig { * List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/ */ projects?: string[]; + /** + * List of project topics to include when syncing. Only projects that match at least one of the provided `topics` will be synced. If not specified, all projects will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported. + * + * @minItems 1 + */ + topics?: [string, ...string[]]; exclude?: { /** * Exclude forked projects from syncing. @@ -132,6 +148,10 @@ export interface GitLabConfig { * List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/ */ projects?: string[]; + /** + * List of project topics to exclude when syncing. Projects that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported. + */ + topics?: string[]; }; revisions?: GitRevisions; } diff --git a/packages/backend/src/types.ts b/packages/backend/src/types.ts index b6893518..7f4a51cc 100644 --- a/packages/backend/src/types.ts +++ b/packages/backend/src/types.ts @@ -8,6 +8,7 @@ interface BaseRepository { isFork?: boolean; isArchived?: boolean; codeHost?: string; + topics?: string[]; } export interface GitRepository extends BaseRepository { diff --git a/packages/backend/src/utils.test.ts b/packages/backend/src/utils.test.ts index 76c14067..34e0a19e 100644 --- a/packages/backend/src/utils.test.ts +++ b/packages/backend/src/utils.test.ts @@ -1,5 +1,5 @@ import { expect, test } from 'vitest'; -import { arraysEqualShallow, isRemotePath, excludeReposByName } from './utils'; +import { arraysEqualShallow, isRemotePath, excludeReposByName, includeReposByTopic, excludeReposByTopic } from './utils'; import { Repository } from './types'; const testNames: string[] = [ @@ -125,3 +125,135 @@ test('isRemotePath should return false for non HTTP paths', () => { expect(isRemotePath('')).toBe(false); expect(isRemotePath(' ')).toBe(false); }); + + +test('includeReposByTopic should return repos with matching topics', () => { + const repos = [ + { id: '1', topics: ['javascript', 'typescript'] }, + { id: '2', topics: ['python', 'django'] }, + { id: '3', topics: ['typescript', 'react'] } + ].map(r => ({ + ...createRepository(r.id), + ...r, + } satisfies Repository)); + + const result = includeReposByTopic(repos, ['typescript']); + expect(result.length).toBe(2); + expect(result.map(r => r.id)).toEqual(['1', '3']); +}); + +test('includeReposByTopic should handle glob patterns in topic matching', () => { + const repos = [ + { id: '1', topics: ['frontend-app', 'backend-app'] }, + { id: '2', topics: ['mobile-app', 'web-app'] }, + { id: '3', topics: ['desktop-app', 'cli-app'] } + ].map(r => ({ + ...createRepository(r.id), + ...r, + } satisfies Repository)); + + const result = includeReposByTopic(repos, ['*-app']); + expect(result.length).toBe(3); +}); + +test('includeReposByTopic should handle repos with no topics', () => { + const repos = [ + { id: '1', topics: ['javascript'] }, + { id: '2', topics: undefined }, + { id: '3', topics: [] } + ].map(r => ({ + ...createRepository(r.id), + ...r, + } satisfies Repository)); + + const result = includeReposByTopic(repos, ['javascript']); + expect(result.length).toBe(1); + expect(result[0].id).toBe('1'); +}); + +test('includeReposByTopic should return empty array when no repos match topics', () => { + const repos = [ + { id: '1', topics: ['frontend'] }, + { id: '2', topics: ['backend'] } + ].map(r => ({ + ...createRepository(r.id), + ...r, + } satisfies Repository)); + + const result = includeReposByTopic(repos, ['mobile']); + expect(result).toEqual([]); +}); + + +test('excludeReposByTopic should exclude repos with matching topics', () => { + const repos = [ + { id: '1', topics: ['javascript', 'typescript'] }, + { id: '2', topics: ['python', 'django'] }, + { id: '3', topics: ['typescript', 'react'] } + ].map(r => ({ + ...createRepository(r.id), + ...r, + } satisfies Repository)); + + const result = excludeReposByTopic(repos, ['typescript']); + expect(result.length).toBe(1); + expect(result[0].id).toBe('2'); +}); + +test('excludeReposByTopic should handle glob patterns', () => { + const repos = [ + { id: '1', topics: ['test-lib', 'test-app'] }, + { id: '2', topics: ['prod-lib', 'prod-app'] }, + { id: '3', topics: ['dev-tool'] } + ].map(r => ({ + ...createRepository(r.id), + ...r, + } satisfies Repository)); + + const result = excludeReposByTopic(repos, ['test-*']); + expect(result.length).toBe(2); + expect(result.map(r => r.id)).toEqual(['2', '3']); +}); + +test('excludeReposByTopic should handle multiple exclude patterns', () => { + const repos = [ + { id: '1', topics: ['frontend', 'react'] }, + { id: '2', topics: ['backend', 'node'] }, + { id: '3', topics: ['mobile', 'react-native'] } + ].map(r => ({ + ...createRepository(r.id), + ...r, + } satisfies Repository)); + + const result = excludeReposByTopic(repos, ['*end', '*native']); + expect(result.length).toBe(0); +}); + +test('excludeReposByTopic should not exclude repos when no topics match', () => { + const repos = [ + { id: '1', topics: ['frontend'] }, + { id: '2', topics: ['backend'] }, + { id: '3', topics: undefined } + ].map(r => ({ + ...createRepository(r.id), + ...r, + } satisfies Repository)); + + const result = excludeReposByTopic(repos, ['mobile']); + expect(result.length).toBe(3); + expect(result.map(r => r.id)).toEqual(['1', '2', '3']); +}); + +test('excludeReposByTopic should handle empty exclude patterns array', () => { + const repos = [ + { id: '1', topics: ['frontend'] }, + { id: '2', topics: ['backend'] } + ].map(r => ({ + ...createRepository(r.id), + ...r, + } satisfies Repository)); + + const result = excludeReposByTopic(repos, []); + expect(result.length).toBe(2); + expect(result).toEqual(repos); +}); diff --git a/packages/backend/src/utils.ts b/packages/backend/src/utils.ts index 676ca1a3..7e94905a 100644 --- a/packages/backend/src/utils.ts +++ b/packages/backend/src/utils.ts @@ -20,7 +20,7 @@ export const marshalBool = (value?: boolean) => { export const excludeForkedRepos = (repos: T[], logger?: Logger) => { return repos.filter((repo) => { if (!!repo.isFork) { - logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.forks is true`); + logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.forks\` is true`); return false; } return true; @@ -30,7 +30,7 @@ export const excludeForkedRepos = (repos: T[], logger?: Lo export const excludeArchivedRepos = (repos: T[], logger?: Logger) => { return repos.filter((repo) => { if (!!repo.isArchived) { - logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.archived is true`); + logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.archived\` is true`); return false; } return true; @@ -41,7 +41,7 @@ export const excludeArchivedRepos = (repos: T[], logger?: export const excludeReposByName = (repos: T[], excludedRepoNames: string[], logger?: Logger) => { return repos.filter((repo) => { if (micromatch.isMatch(repo.name, excludedRepoNames)) { - logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.repos contains ${repo.name}`); + logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.repos\` contains ${repo.name}`); return false; } return true; @@ -51,13 +51,40 @@ export const excludeReposByName = (repos: T[], excludedRep export const includeReposByName = (repos: T[], includedRepoNames: string[], logger?: Logger) => { return repos.filter((repo) => { if (micromatch.isMatch(repo.name, includedRepoNames)) { - logger?.debug(`Including repo ${repo.id}. Reason: repos contain ${repo.name}`); + logger?.debug(`Including repo ${repo.id}. Reason: \`repos\` contain ${repo.name}`); return true; } return false; }); } +export const includeReposByTopic = (repos: T[], includedRepoTopics: string[], logger?: Logger) => { + return repos.filter((repo) => { + const topics = repo.topics ?? []; + const matchingTopics = topics.filter((topic) => micromatch.isMatch(topic, includedRepoTopics)); + + if (matchingTopics.length > 0) { + + logger?.debug(`Including repo ${repo.id}. Reason: \`topics\` matches the following topics: ${matchingTopics.join(', ')}`); + return true; + } + return false; + }); +} + +export const excludeReposByTopic = (repos: T[], excludedRepoTopics: string[], logger?: Logger) => { + return repos.filter((repo) => { + const topics = repo.topics ?? []; + const matchingTopics = topics.filter((topic) => micromatch.isMatch(topic, excludedRepoTopics)); + + if (matchingTopics.length > 0) { + logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.topics\` matches the following topics: ${matchingTopics.join(', ')}`); + return false; + } + return true; + }); +} + export const getTokenFromConfig = (token: string | { env: string }, ctx: AppContext) => { if (typeof token === 'string') { return token; diff --git a/schemas/v2/index.json b/schemas/v2/index.json index 90e86042..9f86a104 100644 --- a/schemas/v2/index.json +++ b/schemas/v2/index.json @@ -130,6 +130,17 @@ }, "description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'." }, + "topics": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "description": "List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.", + "examples": [ + ["docs", "core"] + ] + }, "exclude": { "type": "object", "properties": { @@ -150,6 +161,16 @@ }, "default": [], "description": "List of individual repositories to exclude from syncing. Glob patterns are supported." + }, + "topics": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.", + "examples": [ + ["tests", "ci"] + ] } }, "additionalProperties": false @@ -226,6 +247,17 @@ ], "description": "List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/" }, + "topics": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "description": "List of project topics to include when syncing. Only projects that match at least one of the provided `topics` will be synced. If not specified, all projects will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.", + "examples": [ + ["docs", "core"] + ] + }, "exclude": { "type": "object", "properties": { @@ -251,6 +283,16 @@ ] ], "description": "List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/" + }, + "topics": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of project topics to exclude when syncing. Projects that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.", + "examples": [ + ["tests", "ci"] + ] } }, "additionalProperties": false