fix: add observability to fire-and-forget persistResult & notifyJobComplete
All checks were successful
Deploy API Server / build-and-deploy (push) Successful in 44s

- Add static counters: persistResultFailures, notifyFailures
- Replace .catch(() => {}) with logger.error + counter increment
- Add error-path unit tests for both counter increments
- Reset counters in beforeEach for test isolation

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
wangdl 2026-06-18 11:38:30 +08:00
parent 7aea03f6e0
commit c2e5590718
2 changed files with 48 additions and 2 deletions

View File

@ -94,6 +94,8 @@ describe('RuntimeInternalService', () => {
mockSnapshotBuilder = { buildSnapshot: jest.fn() };
service = new RuntimeInternalService(mockPrisma as any, mockUserAi, mockSnapshotBuilder);
RuntimeInternalService.persistResultFailures = 0;
RuntimeInternalService.notifyFailures = 0;
});
afterEach(() => {
@ -458,6 +460,39 @@ describe('RuntimeInternalService', () => {
}));
});
it('increments notifyFailures counter when notification fails', async () => {
const loggerSpy = jest.spyOn((service as any).logger, 'error');
mockAiRuntimeJob.findUnique.mockResolvedValue(job);
mockAiRuntimeResult.findFirst.mockResolvedValue(null);
mockAiRuntimeResult.findUnique.mockResolvedValue(null);
mockAiRuntimeResult.create.mockResolvedValue({});
mockAiRuntimeJob.update.mockResolvedValue({});
mockAiLearningAnalysis.create.mockResolvedValue({});
mockNotification.create.mockRejectedValue(new Error('DB down'));
await service.submitResult('j1', dto);
// Flush pending microtasks so fire-and-forget .catch() runs
await new Promise(r => setTimeout(r, 0));
expect(RuntimeInternalService.notifyFailures).toBeGreaterThanOrEqual(1);
expect(loggerSpy).toHaveBeenCalled();
});
it('increments persistResultFailures counter when persistence fails', async () => {
const loggerSpy = jest.spyOn((service as any).logger, 'error');
mockAiRuntimeJob.findUnique.mockResolvedValue(job);
mockAiRuntimeResult.findFirst.mockResolvedValue(null);
mockAiRuntimeResult.findUnique.mockResolvedValue(null);
mockAiRuntimeResult.create.mockResolvedValue({});
mockAiRuntimeJob.update.mockResolvedValue({});
mockAiLearningAnalysis.create.mockRejectedValue(new Error('DB down'));
await service.submitResult('j1', dto);
expect(RuntimeInternalService.persistResultFailures).toBeGreaterThanOrEqual(1);
expect(loggerSpy).toHaveBeenCalled();
});
// ── persistResult: job type routing ──
it('persists learning_state_analysis output', async () => {

View File

@ -7,6 +7,10 @@ import { SnapshotBuilderService, SOURCE_DATA_VERSION } from '../snapshot-builder
export class RuntimeInternalService {
private readonly logger = new Logger(RuntimeInternalService.name);
/** Counters for fire-and-forget failures — exposed for health checks and alerting */
static persistResultFailures = 0;
static notifyFailures = 0;
constructor(
private readonly prisma: PrismaService,
private readonly userAi: UserAiService,
@ -267,10 +271,14 @@ export class RuntimeInternalService {
});
await this.persistResult(job, dto).catch(err => {
RuntimeInternalService.persistResultFailures++;
this.logger.error(`Result persistence failed for job=${jobId}: ${err.message}`, err.stack);
});
this.notifyJobComplete(job.userId, jobId, job.jobType, 'succeeded').catch(() => {});
this.notifyJobComplete(job.userId, jobId, job.jobType, 'succeeded').catch(err => {
RuntimeInternalService.notifyFailures++;
this.logger.error(`Notification failed for job=${jobId}: ${err.message}`, err.stack);
});
return { status: 'ok', duplicate: false };
}
@ -599,7 +607,10 @@ export class RuntimeInternalService {
}
if (exceeded) {
this.notifyJobComplete(job.userId, jobId, job.jobType, 'failed').catch(() => {});
this.notifyJobComplete(job.userId, jobId, job.jobType, 'failed').catch(err => {
RuntimeInternalService.notifyFailures++;
this.logger.error(`Notification failed for job=${jobId}: ${err.message}`, err.stack);
});
}
return { status: exceeded ? 'failed' : 'pending', retryCount: newRetryCount };