mirror of
https://github.com/sourcebot-dev/sourcebot.git
synced 2025-12-12 04:15:30 +00:00
329 lines
No EOL
11 KiB
TypeScript
329 lines
No EOL
11 KiB
TypeScript
import { Octokit } from "@octokit/rest";
|
|
import { GitHubConfig } from "./schemas/v2.js";
|
|
import { createLogger } from "./logger.js";
|
|
import { AppContext, GitRepository } from "./types.js";
|
|
import path from 'path';
|
|
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, excludeReposByTopic, getTokenFromConfig, includeReposByTopic, marshalBool, measure } from "./utils.js";
|
|
import micromatch from "micromatch";
|
|
|
|
const logger = createLogger("GitHub");
|
|
|
|
type OctokitRepository = {
|
|
name: string,
|
|
id: number,
|
|
full_name: string,
|
|
fork: boolean,
|
|
private: boolean,
|
|
html_url: string,
|
|
clone_url?: string,
|
|
stargazers_count?: number,
|
|
watchers_count?: number,
|
|
subscribers_count?: number,
|
|
forks_count?: number,
|
|
archived?: boolean,
|
|
topics?: string[],
|
|
size?: number,
|
|
}
|
|
|
|
export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: AbortSignal, ctx: AppContext) => {
|
|
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
|
|
|
|
const octokit = new Octokit({
|
|
auth: token,
|
|
...(config.url ? {
|
|
baseUrl: `${config.url}/api/v3`
|
|
} : {}),
|
|
});
|
|
|
|
let allRepos: OctokitRepository[] = [];
|
|
|
|
if (config.orgs) {
|
|
const _repos = await getReposForOrgs(config.orgs, octokit, signal);
|
|
allRepos = allRepos.concat(_repos);
|
|
}
|
|
|
|
if (config.repos) {
|
|
const _repos = await getRepos(config.repos, octokit, signal);
|
|
allRepos = allRepos.concat(_repos);
|
|
}
|
|
|
|
if (config.users) {
|
|
const isAuthenticated = config.token !== undefined;
|
|
const _repos = await getReposOwnedByUsers(config.users, isAuthenticated, octokit, signal);
|
|
allRepos = allRepos.concat(_repos);
|
|
}
|
|
|
|
// Marshall results to our type
|
|
let repos: GitRepository[] = allRepos
|
|
.filter((repo) => {
|
|
if (!repo.clone_url) {
|
|
logger.warn(`Repository ${repo.name} missing property 'clone_url'. Excluding.`)
|
|
return false;
|
|
}
|
|
return true;
|
|
})
|
|
.map((repo) => {
|
|
const hostname = config.url ? new URL(config.url).hostname : 'github.com';
|
|
const repoId = `${hostname}/${repo.full_name}`;
|
|
const repoPath = path.resolve(path.join(ctx.reposPath, `${repoId}.git`));
|
|
|
|
const cloneUrl = new URL(repo.clone_url!);
|
|
if (token) {
|
|
cloneUrl.username = token;
|
|
}
|
|
|
|
return {
|
|
vcs: 'git',
|
|
codeHost: 'github',
|
|
name: repo.full_name,
|
|
id: repoId,
|
|
cloneUrl: cloneUrl.toString(),
|
|
path: repoPath,
|
|
isStale: false,
|
|
isFork: repo.fork,
|
|
isArchived: !!repo.archived,
|
|
topics: repo.topics ?? [],
|
|
gitConfigMetadata: {
|
|
'zoekt.web-url-type': 'github',
|
|
'zoekt.web-url': repo.html_url,
|
|
'zoekt.name': repoId,
|
|
'zoekt.github-stars': (repo.stargazers_count ?? 0).toString(),
|
|
'zoekt.github-watchers': (repo.watchers_count ?? 0).toString(),
|
|
'zoekt.github-subscribers': (repo.subscribers_count ?? 0).toString(),
|
|
'zoekt.github-forks': (repo.forks_count ?? 0).toString(),
|
|
'zoekt.archived': marshalBool(repo.archived),
|
|
'zoekt.fork': marshalBool(repo.fork),
|
|
'zoekt.public': marshalBool(repo.private === false)
|
|
},
|
|
sizeInBytes: repo.size ? repo.size * 1000 : undefined,
|
|
branches: [],
|
|
tags: [],
|
|
} satisfies GitRepository;
|
|
});
|
|
|
|
if (config.topics) {
|
|
const topics = config.topics.map(topic => topic.toLowerCase());
|
|
repos = includeReposByTopic(repos, topics, logger);
|
|
}
|
|
|
|
if (config.exclude) {
|
|
if (!!config.exclude.forks) {
|
|
repos = excludeForkedRepos(repos, logger);
|
|
}
|
|
|
|
if (!!config.exclude.archived) {
|
|
repos = excludeArchivedRepos(repos, logger);
|
|
}
|
|
|
|
if (config.exclude.repos) {
|
|
repos = excludeReposByName(repos, config.exclude.repos, logger);
|
|
}
|
|
|
|
if (config.exclude.topics) {
|
|
const topics = config.exclude.topics.map(topic => topic.toLowerCase());
|
|
repos = excludeReposByTopic(repos, topics, logger);
|
|
}
|
|
|
|
if (config.exclude.size) {
|
|
const min = config.exclude.size.min;
|
|
const max = config.exclude.size.max;
|
|
if (min) {
|
|
repos = repos.filter((repo) => {
|
|
// If we don't have a size, we can't filter by size.
|
|
if (!repo.sizeInBytes) {
|
|
return true;
|
|
}
|
|
|
|
if (repo.sizeInBytes < min) {
|
|
logger.debug(`Excluding repo ${repo.name}. Reason: repo is less than \`exclude.size.min\`=${min} bytes.`);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
});
|
|
}
|
|
|
|
if (max) {
|
|
repos = repos.filter((repo) => {
|
|
// If we don't have a size, we can't filter by size.
|
|
if (!repo.sizeInBytes) {
|
|
return true;
|
|
}
|
|
|
|
if (repo.sizeInBytes > max) {
|
|
logger.debug(`Excluding repo ${repo.name}. Reason: repo is greater than \`exclude.size.max\`=${max} bytes.`);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
logger.debug(`Found ${repos.length} total repositories.`);
|
|
|
|
if (config.revisions) {
|
|
if (config.revisions.branches) {
|
|
const branchGlobs = config.revisions.branches;
|
|
repos = await Promise.all(
|
|
repos.map(async (repo) => {
|
|
const [owner, name] = repo.name.split('/');
|
|
let branches = (await getBranchesForRepo(owner, name, octokit, signal)).map(branch => branch.name);
|
|
branches = micromatch.match(branches, branchGlobs);
|
|
|
|
return {
|
|
...repo,
|
|
branches,
|
|
};
|
|
})
|
|
)
|
|
}
|
|
|
|
if (config.revisions.tags) {
|
|
const tagGlobs = config.revisions.tags;
|
|
repos = await Promise.all(
|
|
repos.map(async (repo) => {
|
|
const [owner, name] = repo.name.split('/');
|
|
let tags = (await getTagsForRepo(owner, name, octokit, signal)).map(tag => tag.name);
|
|
tags = micromatch.match(tags, tagGlobs);
|
|
|
|
return {
|
|
...repo,
|
|
tags,
|
|
};
|
|
})
|
|
)
|
|
}
|
|
}
|
|
|
|
return repos;
|
|
}
|
|
|
|
const getTagsForRepo = async (owner: string, repo: string, octokit: Octokit, signal: AbortSignal) => {
|
|
try {
|
|
logger.debug(`Fetching tags for repo ${owner}/${repo}...`);
|
|
const { durationMs, data: tags } = await measure(() => octokit.paginate(octokit.repos.listTags, {
|
|
owner,
|
|
repo,
|
|
per_page: 100,
|
|
request: {
|
|
signal
|
|
}
|
|
}));
|
|
|
|
logger.debug(`Found ${tags.length} tags for repo ${owner}/${repo} in ${durationMs}ms`);
|
|
return tags;
|
|
} catch (e) {
|
|
logger.debug(`Error fetching tags for repo ${owner}/${repo}: ${e}`);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
const getBranchesForRepo = async (owner: string, repo: string, octokit: Octokit, signal: AbortSignal) => {
|
|
try {
|
|
logger.debug(`Fetching branches for repo ${owner}/${repo}...`);
|
|
const { durationMs, data: branches } = await measure(() => octokit.paginate(octokit.repos.listBranches, {
|
|
owner,
|
|
repo,
|
|
per_page: 100,
|
|
request: {
|
|
signal
|
|
}
|
|
}));
|
|
logger.debug(`Found ${branches.length} branches for repo ${owner}/${repo} in ${durationMs}ms`);
|
|
return branches;
|
|
} catch (e) {
|
|
logger.debug(`Error fetching branches for repo ${owner}/${repo}: ${e}`);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
|
|
const getReposOwnedByUsers = async (users: string[], isAuthenticated: boolean, octokit: Octokit, signal: AbortSignal) => {
|
|
const repos = (await Promise.all(users.map(async (user) => {
|
|
try {
|
|
logger.debug(`Fetching repository info for user ${user}...`);
|
|
|
|
const { durationMs, data } = await measure(async () => {
|
|
if (isAuthenticated) {
|
|
return octokit.paginate(octokit.repos.listForAuthenticatedUser, {
|
|
username: user,
|
|
visibility: 'all',
|
|
affiliation: 'owner',
|
|
per_page: 100,
|
|
request: {
|
|
signal,
|
|
},
|
|
});
|
|
} else {
|
|
return octokit.paginate(octokit.repos.listForUser, {
|
|
username: user,
|
|
per_page: 100,
|
|
request: {
|
|
signal,
|
|
},
|
|
});
|
|
}
|
|
});
|
|
|
|
logger.debug(`Found ${data.length} owned by user ${user} in ${durationMs}ms.`);
|
|
return data;
|
|
} catch (e) {
|
|
logger.error(`Failed to fetch repository info for user ${user}.`, e);
|
|
return [];
|
|
}
|
|
}))).flat();
|
|
|
|
return repos;
|
|
}
|
|
|
|
const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSignal) => {
|
|
const repos = (await Promise.all(orgs.map(async (org) => {
|
|
try {
|
|
logger.debug(`Fetching repository info for org ${org}...`);
|
|
|
|
const { durationMs, data } = await measure(() => octokit.paginate(octokit.repos.listForOrg, {
|
|
org: org,
|
|
per_page: 100,
|
|
request: {
|
|
signal
|
|
}
|
|
}));
|
|
|
|
logger.debug(`Found ${data.length} in org ${org} in ${durationMs}ms.`);
|
|
return data;
|
|
} catch (e) {
|
|
logger.error(`Failed to fetch repository info for org ${org}.`, e);
|
|
return [];
|
|
}
|
|
}))).flat();
|
|
|
|
return repos;
|
|
}
|
|
|
|
const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSignal) => {
|
|
const repos = (await Promise.all(repoList.map(async (repo) => {
|
|
try {
|
|
logger.debug(`Fetching repository info for ${repo}...`);
|
|
|
|
const [owner, repoName] = repo.split('/');
|
|
const { durationMs, data: result } = await measure(() => octokit.repos.get({
|
|
owner,
|
|
repo: repoName,
|
|
request: {
|
|
signal
|
|
}
|
|
}));
|
|
|
|
logger.debug(`Found info for repository ${repo} in ${durationMs}ms`);
|
|
|
|
return [result.data];
|
|
} catch (e) {
|
|
logger.error(`Failed to fetch repository info for ${repo}.`, e);
|
|
return [];
|
|
}
|
|
}))).flat();
|
|
|
|
return repos;
|
|
} |