All checks were successful
Deploy API Server / build-and-deploy (push) Successful in 45s
- runtime-internal.service: resolveSnapshot 自动重建、persistResult 5种jobType持久化、validateOutput 校验、convertQuizCandidates/convertFlashcardCandidates 候选转换、notifyJobComplete 通知、JOB_CANCELLED处理、heartbeat 双阶段更新+取消检测 - user-ai.service: createAnalysisJob 11步流程、cancelJob、publishQuiz/publishFlashcard、getAnalysis/listAnalyses等 - user-ai.controller: 20+ 用户API端点 - 新增服务: SnapshotBuilderService、PriorityRulesService、SnapshotCleanupService、JobReaperService - 新增模块: admin-learning (CRUD管理) - Prisma schema: cancelRequestedAt/cancelledAt/sourceBlockIds 字段、expiresAt 索引 - 文档: ai-runtime-user-api.md、Issue 记录 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
99 lines
3.4 KiB
TypeScript
99 lines
3.4 KiB
TypeScript
import { Injectable, Logger, OnModuleInit, OnModuleDestroy } from '@nestjs/common';
|
|
import { PrismaService } from '../../infrastructure/database/prisma.service';
|
|
|
|
const REAP_INTERVAL_MS = 30_000; // every 30 seconds
|
|
|
|
@Injectable()
|
|
export class JobReaperService implements OnModuleInit, OnModuleDestroy {
|
|
private readonly logger = new Logger(JobReaperService.name);
|
|
private timer: ReturnType<typeof setInterval> | null = null;
|
|
|
|
constructor(private readonly prisma: PrismaService) {}
|
|
|
|
async onModuleInit() {
|
|
await this.reap().catch(() => {});
|
|
this.timer = setInterval(() => this.reap().catch(() => {}), REAP_INTERVAL_MS);
|
|
}
|
|
|
|
onModuleDestroy() {
|
|
if (this.timer) clearInterval(this.timer);
|
|
}
|
|
|
|
/** Recover jobs stuck in locked or running state past their timeout. */
|
|
async reap(): Promise<{ expired: number; retried: number; failed: number }> {
|
|
const now = new Date();
|
|
|
|
// 1. Expire stuck locked jobs (lockUntil passed)
|
|
const expiredLocks = await this.prisma.aiRuntimeJob.updateMany({
|
|
where: { status: 'locked', lockUntil: { lt: now } },
|
|
data: { status: 'expired' },
|
|
});
|
|
|
|
// 2. Expire stuck running jobs (startedAt + timeoutSeconds < now)
|
|
// We can't do arithmetic in Prisma where, so fetch IDs and update
|
|
const stuckRunning = await this.prisma.aiRuntimeJob.findMany({
|
|
where: { status: 'running' },
|
|
select: { id: true, startedAt: true, timeoutSeconds: true, retryCount: true, maxRetryCount: true },
|
|
take: 500,
|
|
});
|
|
const stuckIds = stuckRunning
|
|
.filter(j => j.startedAt && (now.getTime() - j.startedAt.getTime()) > j.timeoutSeconds * 1000)
|
|
.map(j => j.id);
|
|
|
|
let expiredRunning = 0;
|
|
if (stuckIds.length > 0) {
|
|
const result = await this.prisma.aiRuntimeJob.updateMany({
|
|
where: { id: { in: stuckIds }, status: 'running' },
|
|
data: { status: 'expired' },
|
|
});
|
|
expiredRunning = result.count;
|
|
}
|
|
|
|
// 3. Retry expired jobs where retryCount < maxRetryCount
|
|
// Prisma doesn't support comparing two columns in where, so fetch and batch
|
|
const expiredJobs = await this.prisma.aiRuntimeJob.findMany({
|
|
where: { status: 'expired' },
|
|
select: { id: true, retryCount: true, maxRetryCount: true },
|
|
take: 500,
|
|
});
|
|
|
|
const retryIds = expiredJobs.filter(j => j.retryCount < j.maxRetryCount).map(j => j.id);
|
|
const failIds = expiredJobs.filter(j => j.retryCount >= j.maxRetryCount).map(j => j.id);
|
|
|
|
let retried = 0;
|
|
let failed = 0;
|
|
|
|
if (retryIds.length > 0) {
|
|
const result = await this.prisma.aiRuntimeJob.updateMany({
|
|
where: { id: { in: retryIds }, status: 'expired' },
|
|
data: {
|
|
status: 'pending',
|
|
lockedBy: null,
|
|
lockedAt: null,
|
|
lockUntil: null,
|
|
retryCount: { increment: 1 },
|
|
},
|
|
});
|
|
retried = result.count;
|
|
}
|
|
|
|
if (failIds.length > 0) {
|
|
const result = await this.prisma.aiRuntimeJob.updateMany({
|
|
where: { id: { in: failIds }, status: 'expired' },
|
|
data: { status: 'failed', finishedAt: new Date() },
|
|
});
|
|
failed = result.count;
|
|
}
|
|
|
|
const total = expiredLocks.count + expiredRunning + retried + failed;
|
|
if (total > 0) {
|
|
this.logger.log(
|
|
`Reaped: ${expiredLocks.count} locked expired, ${expiredRunning} running expired, ` +
|
|
`${retried} retried → pending, ${failed} failed`,
|
|
);
|
|
}
|
|
|
|
return { expired: expiredLocks.count + expiredRunning, retried, failed };
|
|
}
|
|
}
|