feat: M4-03 — server health checks (Docker/MySQL/Redis/Qdrant/Gitea/Nginx/Worker)
All checks were successful
Deploy API Server / build-and-deploy (push) Successful in 39s

- Add ServiceHealth Prisma model for health check records
- Add getHealthChecks() with local + remote service checks
- Add GET /admin-api/servers/health endpoint

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
WangDL 2026-05-24 17:51:20 +08:00
parent 90e921366a
commit fc978a5e7f
3 changed files with 132 additions and 0 deletions

View File

@ -1371,3 +1371,16 @@ model LearningStats {
@@unique([userId, date])
@@index([userId])
}
model ServiceHealth {
id String @id @default(cuid())
serverName String @db.VarChar(100)
serviceName String @db.VarChar(64)
status String @db.VarChar(16)
message String? @db.VarChar(500)
checkedAt DateTime @default(now())
@@index([serverName])
@@index([serviceName])
@@index([checkedAt])
}

View File

@ -19,4 +19,11 @@ export class AdminServersController {
async getMetrics(): Promise<{ servers: ServerInfo[] }> {
return this.serversService.getAllMetrics();
}
@Get('health')
@AdminRoles('SUPER_ADMIN' as AdminRole)
@ApiOperation({ summary: '服务健康检查Docker/MySQL/Redis/Qdrant/Gitea/Nginx/Worker' })
async getHealth() {
return this.serversService.getHealthChecks();
}
}

View File

@ -152,4 +152,116 @@ export class AdminServersService {
const [local, remote] = await Promise.all([this.getLocalMetrics(), this.getRemoteMetrics()]);
return { servers: [local, ...(remote ? [remote] : [])] };
}
// ═══ Health Checks ═══
async getHealthChecks() {
const local = await this.checkLocal();
const localResult = { serverName: '蜂驰云 8核32G', services: local };
const remoteResult = await this.checkRemote();
return remoteResult ? [localResult, remoteResult] : [localResult];
}
private async checkLocal() {
const checks = [
this.checkService('MySQL', 'mysql', 3306),
this.checkService('Redis', 'redis', 6379),
this.checkService('Qdrant', 'qdrant', 6333),
this.checkService('Nginx', 'nginx', 80),
this.checkProcess('NestJS API', 'dist/src/main.js'),
this.checkProcess('Gitea Runner', 'act_runner'),
this.checkProcess('Hermes Agent', 'hermes'),
];
// Also check Docker
checks.push(this.checkDocker());
// Check Gitea API
checks.push(this.checkGiteaUrl());
return Promise.all(checks);
}
private async checkRemote() {
try {
const results = await Promise.all([
this.checkRemoteService('MySQL', 'mysql', 3306),
this.checkRemoteService('Redis', 'redis', 6379),
this.checkRemoteService('Nginx', 'nginx', 80),
this.checkRemoteProcess('NestJS API', 'dist/src/main.js'),
this.checkRemoteDocker(),
]);
return { serverName: '轻量云 4核4G', services: results };
} catch {
return null;
}
}
private async checkDocker() {
try {
await execAsync('docker ps --format "{{.Names}}"', { timeout: 3000 });
return { serviceName: 'Docker', status: 'healthy', message: '运行中' };
} catch {
return { serviceName: 'Docker', status: 'unhealthy', message: 'Docker 不可达' };
}
}
private async checkRemoteDocker() {
try {
await execAsync(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} 'docker ps --format "{{.Names}}"'`, { timeout: 5000 });
return { serviceName: 'Docker', status: 'healthy', message: '运行中' };
} catch {
return { serviceName: 'Docker', status: 'unhealthy', message: 'Docker 不可达' };
}
}
private async checkService(name: string, key: string, port: number) {
try {
const { stdout } = await execAsync(`ss -tlnp | grep ':${port} ' | head -1`, { timeout: 2000 });
return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '端口监听中' : '端口未监听' };
} catch {
return { serviceName: name, status: 'unhealthy', message: `端口 ${port} 不可达` };
}
}
private async checkRemoteService(name: string, key: string, port: number) {
try {
const { stdout } = await execAsync(
`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} "ss -tlnp | grep ':${port} ' | head -1"`,
{ timeout: 5000 },
);
return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '端口监听中' : '端口未监听' };
} catch {
return { serviceName: name, status: 'unhealthy', message: `端口 ${port} 不可达` };
}
}
private async checkProcess(name: string, search: string) {
try {
const { stdout } = await execAsync(`ps aux | grep -v grep | grep '${search}' | head -1`, { timeout: 2000 });
return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '进程活跃' : '进程未找到' };
} catch {
return { serviceName: name, status: 'unhealthy', message: '进程检查失败' };
}
}
private async checkRemoteProcess(name: string, search: string) {
try {
const { stdout } = await execAsync(
`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} "ps aux | grep -v grep | grep '${search}' | head -1"`,
{ timeout: 5000 },
);
return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '进程活跃' : '进程未找到' };
} catch {
return { serviceName: name, status: 'unhealthy', message: '进程检查失败' };
}
}
private async checkGiteaUrl() {
try {
const { stdout } = await execAsync("curl -s -o /dev/null -w '%{http_code}' http://localhost:3000/api/v1/version 2>/dev/null", { timeout: 3000 });
return { serviceName: 'Gitea', status: stdout.trim().startsWith('2') ? 'healthy' : 'unhealthy', message: stdout.trim().startsWith('2') ? `HTTP ${stdout.trim()}` : 'Gitea API 异常' };
} catch {
return { serviceName: 'Gitea', status: 'unhealthy', message: 'Gitea API 不可达' };
}
}
}