diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 139edc5..d165cb3 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -1371,3 +1371,16 @@ model LearningStats { @@unique([userId, date]) @@index([userId]) } + +model ServiceHealth { + id String @id @default(cuid()) + serverName String @db.VarChar(100) + serviceName String @db.VarChar(64) + status String @db.VarChar(16) + message String? @db.VarChar(500) + checkedAt DateTime @default(now()) + + @@index([serverName]) + @@index([serviceName]) + @@index([checkedAt]) +} diff --git a/src/modules/admin-servers/admin-servers.controller.ts b/src/modules/admin-servers/admin-servers.controller.ts index 20de1b7..c842199 100644 --- a/src/modules/admin-servers/admin-servers.controller.ts +++ b/src/modules/admin-servers/admin-servers.controller.ts @@ -19,4 +19,11 @@ export class AdminServersController { async getMetrics(): Promise<{ servers: ServerInfo[] }> { return this.serversService.getAllMetrics(); } + + @Get('health') + @AdminRoles('SUPER_ADMIN' as AdminRole) + @ApiOperation({ summary: '服务健康检查(Docker/MySQL/Redis/Qdrant/Gitea/Nginx/Worker)' }) + async getHealth() { + return this.serversService.getHealthChecks(); + } } diff --git a/src/modules/admin-servers/admin-servers.service.ts b/src/modules/admin-servers/admin-servers.service.ts index 4bcca3b..0ba141d 100644 --- a/src/modules/admin-servers/admin-servers.service.ts +++ b/src/modules/admin-servers/admin-servers.service.ts @@ -152,4 +152,116 @@ export class AdminServersService { const [local, remote] = await Promise.all([this.getLocalMetrics(), this.getRemoteMetrics()]); return { servers: [local, ...(remote ? [remote] : [])] }; } + + // ═══ Health Checks ═══ + + async getHealthChecks() { + const local = await this.checkLocal(); + const localResult = { serverName: '蜂驰云 8核32G', services: local }; + const remoteResult = await this.checkRemote(); + return remoteResult ? [localResult, remoteResult] : [localResult]; + } + + private async checkLocal() { + const checks = [ + this.checkService('MySQL', 'mysql', 3306), + this.checkService('Redis', 'redis', 6379), + this.checkService('Qdrant', 'qdrant', 6333), + this.checkService('Nginx', 'nginx', 80), + this.checkProcess('NestJS API', 'dist/src/main.js'), + this.checkProcess('Gitea Runner', 'act_runner'), + this.checkProcess('Hermes Agent', 'hermes'), + ]; + + // Also check Docker + checks.push(this.checkDocker()); + // Check Gitea API + checks.push(this.checkGiteaUrl()); + + return Promise.all(checks); + } + + private async checkRemote() { + try { + const results = await Promise.all([ + this.checkRemoteService('MySQL', 'mysql', 3306), + this.checkRemoteService('Redis', 'redis', 6379), + this.checkRemoteService('Nginx', 'nginx', 80), + this.checkRemoteProcess('NestJS API', 'dist/src/main.js'), + this.checkRemoteDocker(), + ]); + return { serverName: '轻量云 4核4G', services: results }; + } catch { + return null; + } + } + + private async checkDocker() { + try { + await execAsync('docker ps --format "{{.Names}}"', { timeout: 3000 }); + return { serviceName: 'Docker', status: 'healthy', message: '运行中' }; + } catch { + return { serviceName: 'Docker', status: 'unhealthy', message: 'Docker 不可达' }; + } + } + + private async checkRemoteDocker() { + try { + await execAsync(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} 'docker ps --format "{{.Names}}"'`, { timeout: 5000 }); + return { serviceName: 'Docker', status: 'healthy', message: '运行中' }; + } catch { + return { serviceName: 'Docker', status: 'unhealthy', message: 'Docker 不可达' }; + } + } + + private async checkService(name: string, key: string, port: number) { + try { + const { stdout } = await execAsync(`ss -tlnp | grep ':${port} ' | head -1`, { timeout: 2000 }); + return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '端口监听中' : '端口未监听' }; + } catch { + return { serviceName: name, status: 'unhealthy', message: `端口 ${port} 不可达` }; + } + } + + private async checkRemoteService(name: string, key: string, port: number) { + try { + const { stdout } = await execAsync( + `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} "ss -tlnp | grep ':${port} ' | head -1"`, + { timeout: 5000 }, + ); + return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '端口监听中' : '端口未监听' }; + } catch { + return { serviceName: name, status: 'unhealthy', message: `端口 ${port} 不可达` }; + } + } + + private async checkProcess(name: string, search: string) { + try { + const { stdout } = await execAsync(`ps aux | grep -v grep | grep '${search}' | head -1`, { timeout: 2000 }); + return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '进程活跃' : '进程未找到' }; + } catch { + return { serviceName: name, status: 'unhealthy', message: '进程检查失败' }; + } + } + + private async checkRemoteProcess(name: string, search: string) { + try { + const { stdout } = await execAsync( + `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} "ps aux | grep -v grep | grep '${search}' | head -1"`, + { timeout: 5000 }, + ); + return { serviceName: name, status: stdout.trim() ? 'healthy' : 'unhealthy', message: stdout.trim() ? '进程活跃' : '进程未找到' }; + } catch { + return { serviceName: name, status: 'unhealthy', message: '进程检查失败' }; + } + } + + private async checkGiteaUrl() { + try { + const { stdout } = await execAsync("curl -s -o /dev/null -w '%{http_code}' http://localhost:3000/api/v1/version 2>/dev/null", { timeout: 3000 }); + return { serviceName: 'Gitea', status: stdout.trim().startsWith('2') ? 'healthy' : 'unhealthy', message: stdout.trim().startsWith('2') ? `HTTP ${stdout.trim()}` : 'Gitea API 异常' }; + } catch { + return { serviceName: 'Gitea', status: 'unhealthy', message: 'Gitea API 不可达' }; + } + } }