Add topics and exclude.topics to GitHub & GitLab config (#121)

This commit is contained in:
Brendan Kellam 2024-12-11 14:17:57 -08:00 committed by GitHub
parent 3dd4a16b7f
commit 4d358f94a2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 286 additions and 7 deletions

View file

@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
### Added
- Added config support for filtering GitLab & GitHub repositories by topic. ([#121](https://github.com/sourcebot-dev/sourcebot/pull/121))
### Changed ### Changed
- Made language suggestions case insensitive. ([#124](https://github.com/sourcebot-dev/sourcebot/pull/124)) - Made language suggestions case insensitive. ([#124](https://github.com/sourcebot-dev/sourcebot/pull/124))

View file

@ -63,5 +63,39 @@
] ]
} }
}, },
// Include all repos in my-org that have the topic
// "TypeScript" and do not have a topic that starts
// with "test-"
{
"type": "github",
"orgs": [
"my-org"
],
"topics": [
"TypeScript"
],
"exclude": {
"topics": [
"test-**"
]
}
},
// Include all repos in my-group that have the topic
// "TypeScript" and do not have a topic that starts
// with "test-"
{
"type": "gitlab",
"groups": [
"my-group"
],
"topics": [
"TypeScript"
],
"exclude": {
"topics": [
"test-**"
]
}
}
] ]
} }

View file

@ -3,7 +3,7 @@ import { GitHubConfig } from "./schemas/v2.js";
import { createLogger } from "./logger.js"; import { createLogger } from "./logger.js";
import { AppContext, GitRepository } from "./types.js"; import { AppContext, GitRepository } from "./types.js";
import path from 'path'; import path from 'path';
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool, measure } from "./utils.js"; import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, excludeReposByTopic, getTokenFromConfig, includeReposByTopic, marshalBool, measure } from "./utils.js";
import micromatch from "micromatch"; import micromatch from "micromatch";
const logger = createLogger("GitHub"); const logger = createLogger("GitHub");
@ -21,6 +21,7 @@ type OctokitRepository = {
subscribers_count?: number, subscribers_count?: number,
forks_count?: number, forks_count?: number,
archived?: boolean, archived?: boolean,
topics?: string[],
} }
export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: AbortSignal, ctx: AppContext) => { export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: AbortSignal, ctx: AppContext) => {
@ -80,6 +81,7 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo
isStale: false, isStale: false,
isFork: repo.fork, isFork: repo.fork,
isArchived: !!repo.archived, isArchived: !!repo.archived,
topics: repo.topics ?? [],
gitConfigMetadata: { gitConfigMetadata: {
'zoekt.web-url-type': 'github', 'zoekt.web-url-type': 'github',
'zoekt.web-url': repo.html_url, 'zoekt.web-url': repo.html_url,
@ -97,6 +99,10 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo
} satisfies GitRepository; } satisfies GitRepository;
}); });
if (config.topics) {
repos = includeReposByTopic(repos, config.topics, logger);
}
if (config.exclude) { if (config.exclude) {
if (!!config.exclude.forks) { if (!!config.exclude.forks) {
repos = excludeForkedRepos(repos, logger); repos = excludeForkedRepos(repos, logger);
@ -109,6 +115,10 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo
if (config.exclude.repos) { if (config.exclude.repos) {
repos = excludeReposByName(repos, config.exclude.repos, logger); repos = excludeReposByName(repos, config.exclude.repos, logger);
} }
if (config.exclude.topics) {
repos = excludeReposByTopic(repos, config.exclude.topics, logger);
}
} }
logger.debug(`Found ${repos.length} total repositories.`); logger.debug(`Found ${repos.length} total repositories.`);

View file

@ -1,6 +1,6 @@
import { Gitlab, ProjectSchema } from "@gitbeaker/rest"; import { Gitlab, ProjectSchema } from "@gitbeaker/rest";
import { GitLabConfig } from "./schemas/v2.js"; import { GitLabConfig } from "./schemas/v2.js";
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool, measure } from "./utils.js"; import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, excludeReposByTopic, getTokenFromConfig, includeReposByTopic, marshalBool, measure } from "./utils.js";
import { createLogger } from "./logger.js"; import { createLogger } from "./logger.js";
import { AppContext, GitRepository } from "./types.js"; import { AppContext, GitRepository } from "./types.js";
import path from 'path'; import path from 'path';
@ -98,6 +98,7 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon
isStale: false, isStale: false,
isFork, isFork,
isArchived: project.archived, isArchived: project.archived,
topics: project.topics ?? [],
gitConfigMetadata: { gitConfigMetadata: {
'zoekt.web-url-type': 'gitlab', 'zoekt.web-url-type': 'gitlab',
'zoekt.web-url': project.web_url, 'zoekt.web-url': project.web_url,
@ -113,6 +114,10 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon
} satisfies GitRepository; } satisfies GitRepository;
}); });
if (config.topics) {
repos = includeReposByTopic(repos, config.topics, logger);
}
if (config.exclude) { if (config.exclude) {
if (!!config.exclude.forks) { if (!!config.exclude.forks) {
repos = excludeForkedRepos(repos, logger); repos = excludeForkedRepos(repos, logger);
@ -125,6 +130,10 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon
if (config.exclude.projects) { if (config.exclude.projects) {
repos = excludeReposByName(repos, config.exclude.projects, logger); repos = excludeReposByName(repos, config.exclude.projects, logger);
} }
if (config.exclude.topics) {
repos = excludeReposByTopic(repos, config.exclude.topics, logger);
}
} }
logger.debug(`Found ${repos.length} total repositories.`); logger.debug(`Found ${repos.length} total repositories.`);

View file

@ -54,6 +54,12 @@ export interface GitHubConfig {
* List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'. * List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'.
*/ */
repos?: string[]; repos?: string[];
/**
* List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.
*
* @minItems 1
*/
topics?: [string, ...string[]];
exclude?: { exclude?: {
/** /**
* Exclude forked repositories from syncing. * Exclude forked repositories from syncing.
@ -67,6 +73,10 @@ export interface GitHubConfig {
* List of individual repositories to exclude from syncing. Glob patterns are supported. * List of individual repositories to exclude from syncing. Glob patterns are supported.
*/ */
repos?: string[]; repos?: string[];
/**
* List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.
*/
topics?: string[];
}; };
revisions?: GitRevisions; revisions?: GitRevisions;
} }
@ -119,6 +129,12 @@ export interface GitLabConfig {
* List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/ * List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/
*/ */
projects?: string[]; projects?: string[];
/**
* List of project topics to include when syncing. Only projects that match at least one of the provided `topics` will be synced. If not specified, all projects will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.
*
* @minItems 1
*/
topics?: [string, ...string[]];
exclude?: { exclude?: {
/** /**
* Exclude forked projects from syncing. * Exclude forked projects from syncing.
@ -132,6 +148,10 @@ export interface GitLabConfig {
* List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/ * List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/
*/ */
projects?: string[]; projects?: string[];
/**
* List of project topics to exclude when syncing. Projects that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.
*/
topics?: string[];
}; };
revisions?: GitRevisions; revisions?: GitRevisions;
} }

View file

@ -8,6 +8,7 @@ interface BaseRepository {
isFork?: boolean; isFork?: boolean;
isArchived?: boolean; isArchived?: boolean;
codeHost?: string; codeHost?: string;
topics?: string[];
} }
export interface GitRepository extends BaseRepository { export interface GitRepository extends BaseRepository {

View file

@ -1,5 +1,5 @@
import { expect, test } from 'vitest'; import { expect, test } from 'vitest';
import { arraysEqualShallow, isRemotePath, excludeReposByName } from './utils'; import { arraysEqualShallow, isRemotePath, excludeReposByName, includeReposByTopic, excludeReposByTopic } from './utils';
import { Repository } from './types'; import { Repository } from './types';
const testNames: string[] = [ const testNames: string[] = [
@ -125,3 +125,135 @@ test('isRemotePath should return false for non HTTP paths', () => {
expect(isRemotePath('')).toBe(false); expect(isRemotePath('')).toBe(false);
expect(isRemotePath(' ')).toBe(false); expect(isRemotePath(' ')).toBe(false);
}); });
test('includeReposByTopic should return repos with matching topics', () => {
const repos = [
{ id: '1', topics: ['javascript', 'typescript'] },
{ id: '2', topics: ['python', 'django'] },
{ id: '3', topics: ['typescript', 'react'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = includeReposByTopic(repos, ['typescript']);
expect(result.length).toBe(2);
expect(result.map(r => r.id)).toEqual(['1', '3']);
});
test('includeReposByTopic should handle glob patterns in topic matching', () => {
const repos = [
{ id: '1', topics: ['frontend-app', 'backend-app'] },
{ id: '2', topics: ['mobile-app', 'web-app'] },
{ id: '3', topics: ['desktop-app', 'cli-app'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = includeReposByTopic(repos, ['*-app']);
expect(result.length).toBe(3);
});
test('includeReposByTopic should handle repos with no topics', () => {
const repos = [
{ id: '1', topics: ['javascript'] },
{ id: '2', topics: undefined },
{ id: '3', topics: [] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = includeReposByTopic(repos, ['javascript']);
expect(result.length).toBe(1);
expect(result[0].id).toBe('1');
});
test('includeReposByTopic should return empty array when no repos match topics', () => {
const repos = [
{ id: '1', topics: ['frontend'] },
{ id: '2', topics: ['backend'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = includeReposByTopic(repos, ['mobile']);
expect(result).toEqual([]);
});
test('excludeReposByTopic should exclude repos with matching topics', () => {
const repos = [
{ id: '1', topics: ['javascript', 'typescript'] },
{ id: '2', topics: ['python', 'django'] },
{ id: '3', topics: ['typescript', 'react'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = excludeReposByTopic(repos, ['typescript']);
expect(result.length).toBe(1);
expect(result[0].id).toBe('2');
});
test('excludeReposByTopic should handle glob patterns', () => {
const repos = [
{ id: '1', topics: ['test-lib', 'test-app'] },
{ id: '2', topics: ['prod-lib', 'prod-app'] },
{ id: '3', topics: ['dev-tool'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = excludeReposByTopic(repos, ['test-*']);
expect(result.length).toBe(2);
expect(result.map(r => r.id)).toEqual(['2', '3']);
});
test('excludeReposByTopic should handle multiple exclude patterns', () => {
const repos = [
{ id: '1', topics: ['frontend', 'react'] },
{ id: '2', topics: ['backend', 'node'] },
{ id: '3', topics: ['mobile', 'react-native'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = excludeReposByTopic(repos, ['*end', '*native']);
expect(result.length).toBe(0);
});
test('excludeReposByTopic should not exclude repos when no topics match', () => {
const repos = [
{ id: '1', topics: ['frontend'] },
{ id: '2', topics: ['backend'] },
{ id: '3', topics: undefined }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = excludeReposByTopic(repos, ['mobile']);
expect(result.length).toBe(3);
expect(result.map(r => r.id)).toEqual(['1', '2', '3']);
});
test('excludeReposByTopic should handle empty exclude patterns array', () => {
const repos = [
{ id: '1', topics: ['frontend'] },
{ id: '2', topics: ['backend'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = excludeReposByTopic(repos, []);
expect(result.length).toBe(2);
expect(result).toEqual(repos);
});

View file

@ -20,7 +20,7 @@ export const marshalBool = (value?: boolean) => {
export const excludeForkedRepos = <T extends Repository>(repos: T[], logger?: Logger) => { export const excludeForkedRepos = <T extends Repository>(repos: T[], logger?: Logger) => {
return repos.filter((repo) => { return repos.filter((repo) => {
if (!!repo.isFork) { if (!!repo.isFork) {
logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.forks is true`); logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.forks\` is true`);
return false; return false;
} }
return true; return true;
@ -30,7 +30,7 @@ export const excludeForkedRepos = <T extends Repository>(repos: T[], logger?: Lo
export const excludeArchivedRepos = <T extends Repository>(repos: T[], logger?: Logger) => { export const excludeArchivedRepos = <T extends Repository>(repos: T[], logger?: Logger) => {
return repos.filter((repo) => { return repos.filter((repo) => {
if (!!repo.isArchived) { if (!!repo.isArchived) {
logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.archived is true`); logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.archived\` is true`);
return false; return false;
} }
return true; return true;
@ -41,7 +41,7 @@ export const excludeArchivedRepos = <T extends Repository>(repos: T[], logger?:
export const excludeReposByName = <T extends Repository>(repos: T[], excludedRepoNames: string[], logger?: Logger) => { export const excludeReposByName = <T extends Repository>(repos: T[], excludedRepoNames: string[], logger?: Logger) => {
return repos.filter((repo) => { return repos.filter((repo) => {
if (micromatch.isMatch(repo.name, excludedRepoNames)) { if (micromatch.isMatch(repo.name, excludedRepoNames)) {
logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.repos contains ${repo.name}`); logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.repos\` contains ${repo.name}`);
return false; return false;
} }
return true; return true;
@ -51,13 +51,40 @@ export const excludeReposByName = <T extends Repository>(repos: T[], excludedRep
export const includeReposByName = <T extends Repository>(repos: T[], includedRepoNames: string[], logger?: Logger) => { export const includeReposByName = <T extends Repository>(repos: T[], includedRepoNames: string[], logger?: Logger) => {
return repos.filter((repo) => { return repos.filter((repo) => {
if (micromatch.isMatch(repo.name, includedRepoNames)) { if (micromatch.isMatch(repo.name, includedRepoNames)) {
logger?.debug(`Including repo ${repo.id}. Reason: repos contain ${repo.name}`); logger?.debug(`Including repo ${repo.id}. Reason: \`repos\` contain ${repo.name}`);
return true; return true;
} }
return false; return false;
}); });
} }
export const includeReposByTopic = <T extends Repository>(repos: T[], includedRepoTopics: string[], logger?: Logger) => {
return repos.filter((repo) => {
const topics = repo.topics ?? [];
const matchingTopics = topics.filter((topic) => micromatch.isMatch(topic, includedRepoTopics));
if (matchingTopics.length > 0) {
logger?.debug(`Including repo ${repo.id}. Reason: \`topics\` matches the following topics: ${matchingTopics.join(', ')}`);
return true;
}
return false;
});
}
export const excludeReposByTopic = <T extends Repository>(repos: T[], excludedRepoTopics: string[], logger?: Logger) => {
return repos.filter((repo) => {
const topics = repo.topics ?? [];
const matchingTopics = topics.filter((topic) => micromatch.isMatch(topic, excludedRepoTopics));
if (matchingTopics.length > 0) {
logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.topics\` matches the following topics: ${matchingTopics.join(', ')}`);
return false;
}
return true;
});
}
export const getTokenFromConfig = (token: string | { env: string }, ctx: AppContext) => { export const getTokenFromConfig = (token: string | { env: string }, ctx: AppContext) => {
if (typeof token === 'string') { if (typeof token === 'string') {
return token; return token;

View file

@ -130,6 +130,17 @@
}, },
"description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'." "description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'."
}, },
"topics": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"description": "List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.",
"examples": [
["docs", "core"]
]
},
"exclude": { "exclude": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -150,6 +161,16 @@
}, },
"default": [], "default": [],
"description": "List of individual repositories to exclude from syncing. Glob patterns are supported." "description": "List of individual repositories to exclude from syncing. Glob patterns are supported."
},
"topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.",
"examples": [
["tests", "ci"]
]
} }
}, },
"additionalProperties": false "additionalProperties": false
@ -226,6 +247,17 @@
], ],
"description": "List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/" "description": "List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/"
}, },
"topics": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"description": "List of project topics to include when syncing. Only projects that match at least one of the provided `topics` will be synced. If not specified, all projects will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.",
"examples": [
["docs", "core"]
]
},
"exclude": { "exclude": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -251,6 +283,16 @@
] ]
], ],
"description": "List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/" "description": "List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/"
},
"topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of project topics to exclude when syncing. Projects that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.",
"examples": [
["tests", "ci"]
]
} }
}, },
"additionalProperties": false "additionalProperties": false