feat: M4-03 — server health checks (Docker/MySQL/Redis/Qdrant/Gitea/Nginx/Worker)
All checks were successful
Deploy API Server / build-and-deploy (push) Successful in 39s
All checks were successful
Deploy API Server / build-and-deploy (push) Successful in 39s
- Add ServiceHealth Prisma model for health check records - Add getHealthChecks() with local + remote service checks - Add GET /admin-api/servers/health endpoint Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
90e921366a
commit
fc978a5e7f
@ -1371,3 +1371,16 @@ model LearningStats {
|
||||
@@unique([userId, date])
|
||||
@@index([userId])
|
||||
}
|
||||
|
||||
model ServiceHealth {
|
||||
id String @id @default(cuid())
|
||||
serverName String @db.VarChar(100)
|
||||
serviceName String @db.VarChar(64)
|
||||
status String @db.VarChar(16)
|
||||
message String? @db.VarChar(500)
|
||||
checkedAt DateTime @default(now())
|
||||
|
||||
@@index([serverName])
|
||||
@@index([serviceName])
|
||||
@@index([checkedAt])
|
||||
}
|
||||
|
||||
@ -19,4 +19,11 @@ export class AdminServersController {
|
||||
async getMetrics(): Promise<{ servers: ServerInfo[] }> {
|
||||
return this.serversService.getAllMetrics();
|
||||
}
|
||||
|
||||
@Get('health')
|
||||
@AdminRoles('SUPER_ADMIN' as AdminRole)
|
||||
@ApiOperation({ summary: '服务健康检查(Docker/MySQL/Redis/Qdrant/Gitea/Nginx/Worker)' })
|
||||
async getHealth() {
|
||||
return this.serversService.getHealthChecks();
|
||||
}
|
||||
}
|
||||
|
||||
@ -152,4 +152,116 @@ export class AdminServersService {
|
||||
const [local, remote] = await Promise.all([this.getLocalMetrics(), this.getRemoteMetrics()]);
|
||||
return { servers: [local, ...(remote ? [remote] : [])] };
|
||||
}
|
||||
|
||||
// ═══ Health Checks ═══
|
||||
|
||||
async getHealthChecks() {
|
||||
const local = await this.checkLocal();
|
||||
const localResult = { serverName: '蜂驰云 8核32G', services: local };
|
||||
const remoteResult = await this.checkRemote();
|
||||
return remoteResult ? [localResult, remoteResult] : [localResult];
|
||||
}
|
||||
|
||||
private async checkLocal() {
|
||||
const checks = [
|
||||
this.checkService('MySQL', 'mysql', 3306),
|
||||
this.checkService('Redis', 'redis', 6379),
|
||||
this.checkService('Qdrant', 'qdrant', 6333),
|
||||
this.checkService('Nginx', 'nginx', 80),
|
||||
this.checkProcess('NestJS API', 'dist/src/main.js'),
|
||||
this.checkProcess('Gitea Runner', 'act_runner'),
|
||||
this.checkProcess('Hermes Agent', 'hermes'),
|
||||
];
|
||||
|
||||
// Also check Docker
|
||||
checks.push(this.checkDocker());
|
||||
// Check Gitea API
|
||||
checks.push(this.checkGiteaUrl());
|
||||
|
||||
return Promise.all(checks);
|
||||
}
|
||||
|
||||
private async checkRemote() {
|
||||
try {
|
||||
const results = await Promise.all([
|
||||
this.checkRemoteService('MySQL', 'mysql', 3306),
|
||||
this.checkRemoteService('Redis', 'redis', 6379),
|
||||
this.checkRemoteService('Nginx', 'nginx', 80),
|
||||
this.checkRemoteProcess('NestJS API', 'dist/src/main.js'),
|
||||
this.checkRemoteDocker(),
|
||||
]);
|
||||
return { serverName: '轻量云 4核4G', services: results };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async checkDocker() {
|
||||
try {
|
||||
await execAsync('docker ps --format "{{.Names}}"', { timeout: 3000 });
|
||||
return { serviceName: 'Docker', status: 'healthy', message: '运行中' };
|
||||
} catch {
|
||||
return { serviceName: 'Docker', status: 'unhealthy', message: 'Docker 不可达' };
|
||||
}
|
||||
}
|
||||
|
||||
private async checkRemoteDocker() {
|
||||
try {
|
||||
await execAsync(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} 'docker ps --format "{{.Names}}"'`, { timeout: 5000 });
|
||||
return { serviceName: 'Docker', status: 'healthy', message: '运行中' };
|
||||
} catch {
|
||||
return { serviceName: 'Docker', status: 'unhealthy', message: 'Docker 不可达' };
|
||||
}
|
||||
}
|
||||
|
||||
private async checkService(name: string, key: string, port: number) {
|
||||
try {
|
||||
const { stdout } = await execAsync(`ss -tlnp | grep ':${port} ' | head -1`, { timeout: 2000 });
|
||||
return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '端口监听中' : '端口未监听' };
|
||||
} catch {
|
||||
return { serviceName: name, status: 'unhealthy', message: `端口 ${port} 不可达` };
|
||||
}
|
||||
}
|
||||
|
||||
private async checkRemoteService(name: string, key: string, port: number) {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} "ss -tlnp | grep ':${port} ' | head -1"`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '端口监听中' : '端口未监听' };
|
||||
} catch {
|
||||
return { serviceName: name, status: 'unhealthy', message: `端口 ${port} 不可达` };
|
||||
}
|
||||
}
|
||||
|
||||
private async checkProcess(name: string, search: string) {
|
||||
try {
|
||||
const { stdout } = await execAsync(`ps aux | grep -v grep | grep '${search}' | head -1`, { timeout: 2000 });
|
||||
return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '进程活跃' : '进程未找到' };
|
||||
} catch {
|
||||
return { serviceName: name, status: 'unhealthy', message: '进程检查失败' };
|
||||
}
|
||||
}
|
||||
|
||||
private async checkRemoteProcess(name: string, search: string) {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} "ps aux | grep -v grep | grep '${search}' | head -1"`,
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '进程活跃' : '进程未找到' };
|
||||
} catch {
|
||||
return { serviceName: name, status: 'unhealthy', message: '进程检查失败' };
|
||||
}
|
||||
}
|
||||
|
||||
private async checkGiteaUrl() {
|
||||
try {
|
||||
const { stdout } = await execAsync("curl -s -o /dev/null -w '%{http_code}' http://localhost:3000/api/v1/version 2>/dev/null", { timeout: 3000 });
|
||||
return { serviceName: 'Gitea', status: stdout.trim().startsWith('2') ? 'healthy' : 'unhealthy', message: stdout.trim().startsWith('2') ? `HTTP ${stdout.trim()}` : 'Gitea API 异常' };
|
||||
} catch {
|
||||
return { serviceName: 'Gitea', status: 'unhealthy', message: 'Gitea API 不可达' };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user