Random fixes and improvements (#244)

This commit is contained in:
Brendan Kellam 2025-03-24 17:01:32 -07:00 committed by GitHub
parent f4db3d226f
commit 92a81387df
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 71 additions and 16 deletions

View file

@ -34,7 +34,7 @@ AUTH_URL="http://localhost:3000"
# Sentry
# SENTRY_BACKEND_DSN=""
# NEXT_PUBLIC_SENTRY_WEBAPP_DSN=""
# SENTRY_ENVIRONMENT="dev"
SENTRY_ENVIRONMENT="dev"
# NEXT_PUBLIC_SENTRY_ENVIRONMENT="dev"
# SENTRY_AUTH_TOKEN=

View file

@ -1,4 +1,4 @@
import { Connection, ConnectionSyncStatus, PrismaClient, Prisma } from "@sourcebot/db";
import { Connection, ConnectionSyncStatus, PrismaClient, Prisma, RepoIndexingStatus } from "@sourcebot/db";
import { Job, Queue, Worker } from 'bullmq';
import { Settings } from "./types.js";
import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
@ -160,7 +160,7 @@ export class ConnectionManager implements IConnectionManager {
}
}
const { repoData, notFound } = result;
let { repoData, notFound } = result;
// Push the information regarding not found users, orgs, and repos to the connection's syncStatusMetadata. Note that
// this won't be overwritten even if the connection job fails
@ -174,7 +174,7 @@ export class ConnectionManager implements IConnectionManager {
});
// Filter out any duplicates by external_id and external_codeHostUrl.
repoData.filter((repo, index, self) => {
repoData = repoData.filter((repo, index, self) => {
return index === self.findIndex(r =>
r.external_id === repo.external_id &&
r.external_codeHostUrl === repo.external_codeHostUrl
@ -263,6 +263,14 @@ export class ConnectionManager implements IConnectionManager {
private async onSyncJobFailed(job: Job | undefined, err: unknown) {
this.logger.info(`Connection sync job failed with error: ${err}`);
Sentry.captureException(err, {
tags: {
repoId: job?.data.repo.id,
jobId: job?.id,
queue: QUEUE_NAME,
}
});
if (job) {
const { connectionId } = job.data;

View file

@ -130,15 +130,25 @@ export class RepoManager implements IRepoManager {
const thresholdDate = new Date(Date.now() - this.settings.reindexIntervalMs);
const repos = await this.db.repo.findMany({
where: {
repoIndexingStatus: {
in: [
RepoIndexingStatus.NEW,
RepoIndexingStatus.INDEXED
]
},
OR: [
{ indexedAt: null },
{ indexedAt: { lt: thresholdDate } },
// "NEW" is really a misnomer here - it just means that the repo needs to be indexed
// immediately. In most cases, this will be because the repo was just created and
// is indeed "new". However, it could also be that a "retry" was requested on a failed
// index. So, we don't want to block on the indexedAt timestamp here.
{
repoIndexingStatus: RepoIndexingStatus.NEW,
},
// When the repo has already been indexed, we only want to reindex if the reindexing
// interval has elapsed (or if the date isn't set for some reason).
{
AND: [
{ repoIndexingStatus: RepoIndexingStatus.INDEXED },
{ OR: [
{ indexedAt: null },
{ indexedAt: { lt: thresholdDate } },
]}
]
}
]
},
include: {
@ -335,7 +345,15 @@ export class RepoManager implements IRepoManager {
}
private async onIndexJobFailed(job: Job<RepoIndexingPayload> | undefined, err: unknown) {
this.logger.info(`Repo index job failed (id: ${job?.id ?? 'unknown'})`);
this.logger.info(`Repo index job failed (id: ${job?.id ?? 'unknown'}) with error: ${err}`);
Sentry.captureException(err, {
tags: {
repoId: job?.data.repo.id,
jobId: job?.id,
queue: REPO_INDEXING_QUEUE,
}
});
if (job) {
this.promClient.activeRepoIndexingJobs.dec();
this.promClient.repoIndexingFailTotal.inc();
@ -474,6 +492,13 @@ export class RepoManager implements IRepoManager {
private async onGarbageCollectionJobFailed(job: Job<RepoGarbageCollectionPayload> | undefined, err: unknown) {
this.logger.info(`Garbage collection job failed (id: ${job?.id ?? 'unknown'}) with error: ${err}`);
Sentry.captureException(err, {
tags: {
repoId: job?.data.repo.id,
jobId: job?.id,
queue: REPO_GC_QUEUE,
}
});
if (job) {
this.promClient.activeRepoGarbageCollectionJobs.dec();

View file

@ -38,8 +38,8 @@ export function PostHogProvider({ children, disabled }: PostHogProviderProps) {
// @see next.config.mjs for path rewrites to the "/ingest" route.
api_host: "/ingest",
person_profiles: 'identified_only',
capture_pageview: false, // @nocheckin Disable automatic pageview capture if we're not in public demo mode
autocapture: false, // Disable automatic event capture
capture_pageview: false,
autocapture: false,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
sanitize_properties: (properties: Record<string, any>, _event: string) => {
// https://posthog.com/docs/libraries/js#config

View file

@ -1,4 +1,4 @@
import { ConnectionSyncStatus, OrgRole, Prisma } from '@sourcebot/db';
import { ConnectionSyncStatus, OrgRole, Prisma, RepoIndexingStatus } from '@sourcebot/db';
import { env } from './env.mjs';
import { prisma } from "@/prisma";
import { SINGLE_TENANT_USER_ID, SINGLE_TENANT_ORG_ID, SINGLE_TENANT_ORG_DOMAIN, SINGLE_TENANT_ORG_NAME, SINGLE_TENANT_USER_EMAIL } from './lib/constants';
@ -105,6 +105,13 @@ const initSingleTenancy = async () => {
name: key,
orgId: SINGLE_TENANT_ORG_ID,
}
},
include: {
repos: {
include: {
repo: true,
}
}
}
});
@ -137,6 +144,21 @@ const initSingleTenancy = async () => {
});
console.log(`Upserted connection with name '${key}'. Connection ID: ${connectionDb.id}`);
// Re-try any repos that failed to index.
const failedRepos = currentConnection?.repos.filter(repo => repo.repo.repoIndexingStatus === RepoIndexingStatus.FAILED).map(repo => repo.repo.id) ?? [];
if (failedRepos.length > 0) {
await prisma.repo.updateMany({
where: {
id: {
in: failedRepos,
}
},
data: {
repoIndexingStatus: RepoIndexingStatus.NEW,
}
})
}
}
}
}