Add topics and exclude.topics to GitHub & GitLab config (#121)

This commit is contained in:
Brendan Kellam 2024-12-11 14:17:57 -08:00 committed by GitHub
parent 3dd4a16b7f
commit 4d358f94a2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 286 additions and 7 deletions

View file

@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Added config support for filtering GitLab & GitHub repositories by topic. ([#121](https://github.com/sourcebot-dev/sourcebot/pull/121))
### Changed
- Made language suggestions case insensitive. ([#124](https://github.com/sourcebot-dev/sourcebot/pull/124))

View file

@ -63,5 +63,39 @@
]
}
},
// Include all repos in my-org that have the topic
// "TypeScript" and do not have a topic that starts
// with "test-"
{
"type": "github",
"orgs": [
"my-org"
],
"topics": [
"TypeScript"
],
"exclude": {
"topics": [
"test-**"
]
}
},
// Include all repos in my-group that have the topic
// "TypeScript" and do not have a topic that starts
// with "test-"
{
"type": "gitlab",
"groups": [
"my-group"
],
"topics": [
"TypeScript"
],
"exclude": {
"topics": [
"test-**"
]
}
}
]
}

View file

@ -3,7 +3,7 @@ import { GitHubConfig } from "./schemas/v2.js";
import { createLogger } from "./logger.js";
import { AppContext, GitRepository } from "./types.js";
import path from 'path';
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool, measure } from "./utils.js";
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, excludeReposByTopic, getTokenFromConfig, includeReposByTopic, marshalBool, measure } from "./utils.js";
import micromatch from "micromatch";
const logger = createLogger("GitHub");
@ -21,6 +21,7 @@ type OctokitRepository = {
subscribers_count?: number,
forks_count?: number,
archived?: boolean,
topics?: string[],
}
export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: AbortSignal, ctx: AppContext) => {
@ -80,6 +81,7 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo
isStale: false,
isFork: repo.fork,
isArchived: !!repo.archived,
topics: repo.topics ?? [],
gitConfigMetadata: {
'zoekt.web-url-type': 'github',
'zoekt.web-url': repo.html_url,
@ -97,6 +99,10 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo
} satisfies GitRepository;
});
if (config.topics) {
repos = includeReposByTopic(repos, config.topics, logger);
}
if (config.exclude) {
if (!!config.exclude.forks) {
repos = excludeForkedRepos(repos, logger);
@ -109,6 +115,10 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo
if (config.exclude.repos) {
repos = excludeReposByName(repos, config.exclude.repos, logger);
}
if (config.exclude.topics) {
repos = excludeReposByTopic(repos, config.exclude.topics, logger);
}
}
logger.debug(`Found ${repos.length} total repositories.`);

View file

@ -1,6 +1,6 @@
import { Gitlab, ProjectSchema } from "@gitbeaker/rest";
import { GitLabConfig } from "./schemas/v2.js";
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool, measure } from "./utils.js";
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, excludeReposByTopic, getTokenFromConfig, includeReposByTopic, marshalBool, measure } from "./utils.js";
import { createLogger } from "./logger.js";
import { AppContext, GitRepository } from "./types.js";
import path from 'path';
@ -98,6 +98,7 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon
isStale: false,
isFork,
isArchived: project.archived,
topics: project.topics ?? [],
gitConfigMetadata: {
'zoekt.web-url-type': 'gitlab',
'zoekt.web-url': project.web_url,
@ -113,6 +114,10 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon
} satisfies GitRepository;
});
if (config.topics) {
repos = includeReposByTopic(repos, config.topics, logger);
}
if (config.exclude) {
if (!!config.exclude.forks) {
repos = excludeForkedRepos(repos, logger);
@ -125,6 +130,10 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon
if (config.exclude.projects) {
repos = excludeReposByName(repos, config.exclude.projects, logger);
}
if (config.exclude.topics) {
repos = excludeReposByTopic(repos, config.exclude.topics, logger);
}
}
logger.debug(`Found ${repos.length} total repositories.`);

View file

@ -54,6 +54,12 @@ export interface GitHubConfig {
* List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'.
*/
repos?: string[];
/**
* List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.
*
* @minItems 1
*/
topics?: [string, ...string[]];
exclude?: {
/**
* Exclude forked repositories from syncing.
@ -67,6 +73,10 @@ export interface GitHubConfig {
* List of individual repositories to exclude from syncing. Glob patterns are supported.
*/
repos?: string[];
/**
* List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.
*/
topics?: string[];
};
revisions?: GitRevisions;
}
@ -119,6 +129,12 @@ export interface GitLabConfig {
* List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/
*/
projects?: string[];
/**
* List of project topics to include when syncing. Only projects that match at least one of the provided `topics` will be synced. If not specified, all projects will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.
*
* @minItems 1
*/
topics?: [string, ...string[]];
exclude?: {
/**
* Exclude forked projects from syncing.
@ -132,6 +148,10 @@ export interface GitLabConfig {
* List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/
*/
projects?: string[];
/**
* List of project topics to exclude when syncing. Projects that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.
*/
topics?: string[];
};
revisions?: GitRevisions;
}

View file

@ -8,6 +8,7 @@ interface BaseRepository {
isFork?: boolean;
isArchived?: boolean;
codeHost?: string;
topics?: string[];
}
export interface GitRepository extends BaseRepository {

View file

@ -1,5 +1,5 @@
import { expect, test } from 'vitest';
import { arraysEqualShallow, isRemotePath, excludeReposByName } from './utils';
import { arraysEqualShallow, isRemotePath, excludeReposByName, includeReposByTopic, excludeReposByTopic } from './utils';
import { Repository } from './types';
const testNames: string[] = [
@ -125,3 +125,135 @@ test('isRemotePath should return false for non HTTP paths', () => {
expect(isRemotePath('')).toBe(false);
expect(isRemotePath(' ')).toBe(false);
});
test('includeReposByTopic should return repos with matching topics', () => {
const repos = [
{ id: '1', topics: ['javascript', 'typescript'] },
{ id: '2', topics: ['python', 'django'] },
{ id: '3', topics: ['typescript', 'react'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = includeReposByTopic(repos, ['typescript']);
expect(result.length).toBe(2);
expect(result.map(r => r.id)).toEqual(['1', '3']);
});
test('includeReposByTopic should handle glob patterns in topic matching', () => {
const repos = [
{ id: '1', topics: ['frontend-app', 'backend-app'] },
{ id: '2', topics: ['mobile-app', 'web-app'] },
{ id: '3', topics: ['desktop-app', 'cli-app'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = includeReposByTopic(repos, ['*-app']);
expect(result.length).toBe(3);
});
test('includeReposByTopic should handle repos with no topics', () => {
const repos = [
{ id: '1', topics: ['javascript'] },
{ id: '2', topics: undefined },
{ id: '3', topics: [] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = includeReposByTopic(repos, ['javascript']);
expect(result.length).toBe(1);
expect(result[0].id).toBe('1');
});
test('includeReposByTopic should return empty array when no repos match topics', () => {
const repos = [
{ id: '1', topics: ['frontend'] },
{ id: '2', topics: ['backend'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = includeReposByTopic(repos, ['mobile']);
expect(result).toEqual([]);
});
test('excludeReposByTopic should exclude repos with matching topics', () => {
const repos = [
{ id: '1', topics: ['javascript', 'typescript'] },
{ id: '2', topics: ['python', 'django'] },
{ id: '3', topics: ['typescript', 'react'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = excludeReposByTopic(repos, ['typescript']);
expect(result.length).toBe(1);
expect(result[0].id).toBe('2');
});
test('excludeReposByTopic should handle glob patterns', () => {
const repos = [
{ id: '1', topics: ['test-lib', 'test-app'] },
{ id: '2', topics: ['prod-lib', 'prod-app'] },
{ id: '3', topics: ['dev-tool'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = excludeReposByTopic(repos, ['test-*']);
expect(result.length).toBe(2);
expect(result.map(r => r.id)).toEqual(['2', '3']);
});
test('excludeReposByTopic should handle multiple exclude patterns', () => {
const repos = [
{ id: '1', topics: ['frontend', 'react'] },
{ id: '2', topics: ['backend', 'node'] },
{ id: '3', topics: ['mobile', 'react-native'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = excludeReposByTopic(repos, ['*end', '*native']);
expect(result.length).toBe(0);
});
test('excludeReposByTopic should not exclude repos when no topics match', () => {
const repos = [
{ id: '1', topics: ['frontend'] },
{ id: '2', topics: ['backend'] },
{ id: '3', topics: undefined }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = excludeReposByTopic(repos, ['mobile']);
expect(result.length).toBe(3);
expect(result.map(r => r.id)).toEqual(['1', '2', '3']);
});
test('excludeReposByTopic should handle empty exclude patterns array', () => {
const repos = [
{ id: '1', topics: ['frontend'] },
{ id: '2', topics: ['backend'] }
].map(r => ({
...createRepository(r.id),
...r,
} satisfies Repository));
const result = excludeReposByTopic(repos, []);
expect(result.length).toBe(2);
expect(result).toEqual(repos);
});

View file

@ -20,7 +20,7 @@ export const marshalBool = (value?: boolean) => {
export const excludeForkedRepos = <T extends Repository>(repos: T[], logger?: Logger) => {
return repos.filter((repo) => {
if (!!repo.isFork) {
logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.forks is true`);
logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.forks\` is true`);
return false;
}
return true;
@ -30,7 +30,7 @@ export const excludeForkedRepos = <T extends Repository>(repos: T[], logger?: Lo
export const excludeArchivedRepos = <T extends Repository>(repos: T[], logger?: Logger) => {
return repos.filter((repo) => {
if (!!repo.isArchived) {
logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.archived is true`);
logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.archived\` is true`);
return false;
}
return true;
@ -41,7 +41,7 @@ export const excludeArchivedRepos = <T extends Repository>(repos: T[], logger?:
export const excludeReposByName = <T extends Repository>(repos: T[], excludedRepoNames: string[], logger?: Logger) => {
return repos.filter((repo) => {
if (micromatch.isMatch(repo.name, excludedRepoNames)) {
logger?.debug(`Excluding repo ${repo.id}. Reason: exclude.repos contains ${repo.name}`);
logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.repos\` contains ${repo.name}`);
return false;
}
return true;
@ -51,13 +51,40 @@ export const excludeReposByName = <T extends Repository>(repos: T[], excludedRep
export const includeReposByName = <T extends Repository>(repos: T[], includedRepoNames: string[], logger?: Logger) => {
return repos.filter((repo) => {
if (micromatch.isMatch(repo.name, includedRepoNames)) {
logger?.debug(`Including repo ${repo.id}. Reason: repos contain ${repo.name}`);
logger?.debug(`Including repo ${repo.id}. Reason: \`repos\` contain ${repo.name}`);
return true;
}
return false;
});
}
export const includeReposByTopic = <T extends Repository>(repos: T[], includedRepoTopics: string[], logger?: Logger) => {
return repos.filter((repo) => {
const topics = repo.topics ?? [];
const matchingTopics = topics.filter((topic) => micromatch.isMatch(topic, includedRepoTopics));
if (matchingTopics.length > 0) {
logger?.debug(`Including repo ${repo.id}. Reason: \`topics\` matches the following topics: ${matchingTopics.join(', ')}`);
return true;
}
return false;
});
}
export const excludeReposByTopic = <T extends Repository>(repos: T[], excludedRepoTopics: string[], logger?: Logger) => {
return repos.filter((repo) => {
const topics = repo.topics ?? [];
const matchingTopics = topics.filter((topic) => micromatch.isMatch(topic, excludedRepoTopics));
if (matchingTopics.length > 0) {
logger?.debug(`Excluding repo ${repo.id}. Reason: \`exclude.topics\` matches the following topics: ${matchingTopics.join(', ')}`);
return false;
}
return true;
});
}
export const getTokenFromConfig = (token: string | { env: string }, ctx: AppContext) => {
if (typeof token === 'string') {
return token;

View file

@ -130,6 +130,17 @@
},
"description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'."
},
"topics": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"description": "List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.",
"examples": [
["docs", "core"]
]
},
"exclude": {
"type": "object",
"properties": {
@ -150,6 +161,16 @@
},
"default": [],
"description": "List of individual repositories to exclude from syncing. Glob patterns are supported."
},
"topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.",
"examples": [
["tests", "ci"]
]
}
},
"additionalProperties": false
@ -226,6 +247,17 @@
],
"description": "List of individual projects to sync with. The project's namespace must be specified. See: https://docs.gitlab.com/ee/user/namespace/"
},
"topics": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"description": "List of project topics to include when syncing. Only projects that match at least one of the provided `topics` will be synced. If not specified, all projects will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.",
"examples": [
["docs", "core"]
]
},
"exclude": {
"type": "object",
"properties": {
@ -251,6 +283,16 @@
]
],
"description": "List of projects to exclude from syncing. Glob patterns are supported. The project's namespace must be specified, see: https://docs.gitlab.com/ee/user/namespace/"
},
"topics": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of project topics to exclude when syncing. Projects that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.",
"examples": [
["tests", "ci"]
]
}
},
"additionalProperties": false