From 3079b2a18ef09edc0b0efb272066886775fb3638 Mon Sep 17 00:00:00 2001 From: WangDL Date: Fri, 22 May 2026 14:04:14 +0800 Subject: [PATCH] fix: clean rewrite remote metrics with individual SSH calls --- .../admin-servers/admin-servers.service.ts | 143 ++++++++---------- 1 file changed, 64 insertions(+), 79 deletions(-) diff --git a/src/modules/admin-servers/admin-servers.service.ts b/src/modules/admin-servers/admin-servers.service.ts index dc74dab..cf7fdbd 100644 --- a/src/modules/admin-servers/admin-servers.service.ts +++ b/src/modules/admin-servers/admin-servers.service.ts @@ -20,7 +20,7 @@ export interface ServerInfo { const SSH_KEY_PATH = process.env.SSH_KEY_PATH || '/home/ubuntu/.ssh/wangdl.pem'; const REMOTE_HOST = '10.2.0.7'; -const PROCESS_ALIASES: Record = { +const ALIASES: Record = { 'mysqld': { name: 'MySQL 8.0', desc: '业务数据库' }, 'redis-server': { name: 'Redis 7', desc: '缓存/队列' }, 'qdrant': { name: 'Qdrant', desc: '向量索引库' }, @@ -39,117 +39,104 @@ const PROCESS_ALIASES: Record = { 'barad_agent': { name: '云监控上报', desc: '指标采集' }, }; -function friendlyProcess(cmd: string): { name: string; desc: string } { - for (const [pattern, info] of Object.entries(PROCESS_ALIASES)) - if (cmd.includes(pattern)) return info; - const short = cmd.split('/').pop()?.slice(0, 20) || cmd.slice(0, 20); - return { name: short, desc: '' }; +function friendly(cmd: string): { name: string; desc: string } { + for (const [p, info] of Object.entries(ALIASES)) + if (cmd.includes(p)) return info; + const s = cmd.split('/').pop()?.slice(0, 20) || cmd.slice(0, 18); + return { name: s, desc: '' }; } -// Parse "ps auxww" output: USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND... function parsePsLine(line: string): ProcessInfo { - const parts = line.trim().split(/\s+/); - // columns: 0=USER 1=PID 2=%CPU 3=%MEM 4=VSZ 5=RSS 6=TTY 7=STAT 8=START 9=TIME 10...=COMMAND - const pid = parseInt(parts[1]) || 0; - const cpu = (parts[2] || '0') + '%'; - const mem = (parts[3] || '0') + '%'; - const cmd = parts.slice(10).join(' '); - const info = friendlyProcess(cmd); + const p = line.trim().split(/\s+/); + const pid = parseInt(p[1]) || 0; // col 1 = PID + const cpu = (p[2] || '0') + '%'; + const mem = (p[3] || '0') + '%'; + const cmd = p.slice(10).join(' '); + const info = friendly(cmd); return { pid, cpu, mem, name: info.name, desc: info.desc, command: cmd.slice(0, 80) }; } +function chineseUptime(sec: number): string { + const d = Math.floor(sec / 86400), h = Math.floor((sec % 86400) / 3600), m = Math.floor((sec % 3600) / 60); + return `${d}天${h}时${m}分`; +} + @Injectable() export class AdminServersService { private readonly logger = new Logger(AdminServersService.name); async getLocalMetrics(): Promise { const cpus = os.cpus(); - const totalMem = os.totalmem(); - const freeMem = os.freemem(); - const usedMem = totalMem - freeMem; - const cpuUsage = Math.min(100, Math.round((os.loadavg()[0] / cpus.length) * 100)); + const total = os.totalmem(), free = os.freemem(), used = total - free; + const cpuPct = Math.min(100, Math.round((os.loadavg()[0] / cpus.length) * 100)); - // Disks - const diskResults: DiskInfo[] = []; + const disks: DiskInfo[] = []; for (const m of ['/', '/data']) { try { - const { stdout } = await execAsync(`df -h ${m} | tail -1 | awk '{print $2","$3","$4","$5}'`); + const { stdout } = await execAsync(`df -h ${m} | tail -1 | awk '{printf "%s,%s,%s,%d",$2,$3,$4,int($5)}'`); const parts = stdout.trim().split(','); - if (parts.length >= 3) diskResults.push({ mount: m, total: parts[0], used: parts[1], free: parts[2], percent: parseInt(parts[4]) || parseInt(parts[3]) || 0 }); - } catch { diskResults.push({ mount: m, total: '-', used: '-', free: '-', percent: 0 }) } + if (parts.length >= 3) disks.push({ mount: m, total: parts[0], used: parts[1], free: parts[2], percent: parseInt(parts[3]) || 0 }); + else disks.push({ mount: m, total: '-', used: '-', free: '-', percent: 0 }); + } catch { disks.push({ mount: m, total: '-', used: '-', free: '-', percent: 0 }) } } - // Processes — use auxww for full command line - let processes: ProcessInfo[] = []; + let procs: ProcessInfo[] = []; try { - const { stdout } = await execAsync("ps auxww --sort=-%mem --no-headers 2>/dev/null | head -10"); - processes = stdout.trim().split('\n').filter(Boolean).map(parsePsLine); + const { stdout } = await execAsync("ps auxww --sort=-%mem --no-headers | head -10"); + procs = stdout.trim().split('\n').filter(l => l).map(parsePsLine); } catch {} const nets = os.networkInterfaces(); - const privateIp = Object.values(nets).flat().find(n => n?.family === 'IPv4' && !n.internal)?.address || '172.21.0.4'; - const u = os.uptime(); - const d = Math.floor(u / 86400), h = Math.floor((u % 86400) / 3600), m = Math.floor((u % 3600) / 60); + const privIp = Object.values(nets).flat().find(n => n?.family === 'IPv4' && !n.internal)?.address || '172.21.0.4'; return { name: '蜂驰云 8核32G', role: '生产核心', hostname: os.hostname(), - cpu: { model: cpus[0]?.model || '', cores: cpus.length, usagePercent: cpuUsage }, - memory: { total: (totalMem / 1e9).toFixed(1) + 'G', used: (usedMem / 1e9).toFixed(1) + 'G', free: (freeMem / 1e9).toFixed(1) + 'G', percent: Math.round((usedMem / totalMem) * 100) }, - disks: diskResults, uptime: `${d}天${h}时${m}分`, processes, - network: { publicIp: '120.53.227.155', privateIp, domains: ['api.longde.cloud', 'admin.longde.cloud'] }, + cpu: { model: cpus[0]?.model || '', cores: cpus.length, usagePercent: cpuPct }, + memory: { total: (total / 1e9).toFixed(1) + 'G', used: (used / 1e9).toFixed(1) + 'G', free: (free / 1e9).toFixed(1) + 'G', percent: Math.round((used / total) * 100) }, + disks, uptime: chineseUptime(os.uptime()), processes: procs, + network: { publicIp: '120.53.227.155', privateIp: privIp, domains: ['api.longde.cloud', 'admin.longde.cloud'] }, }; } async getRemoteMetrics(): Promise { + const run = (cmd: string) => + execAsync(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} '${cmd.replace(/'/g, "'\\''")}'`, { timeout: 8000 }) + .then(r => r.stdout.trim()).catch(() => ''); + try { - // Single SSH to collect all metrics - const script = `echo "==HOST==" && hostname -echo "==IP==" && hostname -I | awk '{print \$1}' -echo "==LOAD==" && cat /proc/loadavg | awk '{print \$1}' -echo "==CORES==" && cat /proc/cpuinfo | grep processor | wc -l -echo "==MEM==" && free -m | awk '/Mem/{printf "%.1fG,%.1fG,%.1fG,%d\n",\$2/1024,\$3/1024,\$4/1024,int(\$3/\$2*100)}' -echo "==DISKROOT==" && df -h / | awk 'NR==2{printf "%s,%s,%s,%d\n",\$2,\$3,\$4,int(\$5)}' -echo "==DISKDATA==" && df -h /data 2>/dev/null | awk 'NR==2{printf "%s,%s,%s,%d\n",\$2,\$3,\$4,int(\$5)}' -echo "==UPTIME==" && uptime -p | sed 's/up //' -echo "==PROCS==" && ps auxww --sort=-%mem --no-headers 2>/dev/null | head -8`; + const [hostname, privIp, load, cores, memStr, diskRoot, diskData, uptimeRaw, procRaw] = await Promise.all([ + run('hostname'), + run("hostname -I | awk '{print $1}'"), + run("cat /proc/loadavg | awk '{print $1}'"), + run('cat /proc/cpuinfo | grep processor | wc -l'), + run("free -m | awk '/Mem/{printf \"%.1fG,%.1fG,%.1fG,%d\",$2/1024,$3/1024,$4/1024,int($3/$2*100)}'"), + run("df -h / | awk 'NR==2{printf \"%s,%s,%s,%d\",$2,$3,$4,int($5)}'"), + run("df -h /data 2>/dev/null | awk 'NR==2{printf \"%s,%s,%s,%d\",$2,$3,$4,int($5)}'"), + run("uptime -p | sed 's/up //'"), + run("ps auxww --sort=-%mem --no-headers 2>/dev/null | head -8"), + ]); - const { stdout } = await execAsync( - `ssh -o StrictHostKeyChecking=no -o ConnectTimeout=8 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} '${script}'`, - { timeout: 12000 }, - ); + const load1 = parseFloat(load) || 0; + const cpuCores = parseInt(cores) || 4; + const cpuPct = Math.min(100, Math.round((load1 / Math.max(cpuCores, 1)) * 100)); + const memParts = memStr.split(','); + const memPct = parseInt(memParts[3]) || 0; + const drParts = diskRoot.split(','); + const ddParts = diskData.split(','); + const disks: DiskInfo[] = [{ mount: '/', total: drParts[0] || '-', used: drParts[1] || '-', free: drParts[2] || '-', percent: parseInt(drParts[3]) || 0 }]; + if (ddParts.length >= 3) disks.push({ mount: '/data', total: ddParts[0], used: ddParts[1], free: ddParts[2], percent: parseInt(ddParts[3]) || 0 }); - // Parse sections - const lines = stdout.split('\n'); - const get = (tag: string) => lines.find(l => l.startsWith(`==${tag}==`))?.replace(`==${tag}==`, '').trim() || ''; + const procs: ProcessInfo[] = procRaw.split('\n').filter(l => /^\S+\s+\d+\s/.test(l)).map(parsePsLine); - const hostname = get('HOST') || 'remote'; - const privateIp = get('IP') || '10.2.0.7'; - const load1 = parseFloat(get('LOAD')) || 0; - const cores = parseInt(get('CORES')) || 4; - const cpuUsage = Math.min(100, Math.round((load1 / Math.max(cores, 1)) * 100)); - - const memParts = get('MEM').split(','); - const memPercent = parseInt(memParts[3]) || 0; - - const diskRootParts = get('DISKROOT').split(','); - const diskDataParts = get('DISKDATA').split(','); - const disks: DiskInfo[] = []; - if (diskRootParts.length >= 3) disks.push({ mount: '/', total: diskRootParts[0], used: diskRootParts[1], free: diskRootParts[2], percent: parseInt(diskRootParts[3]) || 0 }); - else disks.push({ mount: '/', total: '-', used: '-', free: '-', percent: 0 }); - if (diskDataParts.length >= 3) disks.push({ mount: '/data', total: diskDataParts[0], used: diskDataParts[1], free: diskDataParts[2], percent: parseInt(diskDataParts[3]) || 0 }); - - const procLines = lines.filter(l => !l.startsWith('==') && !l.startsWith('USER') && /^\S+\s+\d+\s/.test(l)); - const processes: ProcessInfo[] = procLines.map(parsePsLine); - - let up = get('UPTIME'); + let up = uptimeRaw || ''; up = up.replace(/(\d+)\s+weeks?,?\s*/g, '$1周').replace(/(\d+)\s+days?,?\s*/g, '$1天').replace(/(\d+)\s+hours?,?\s*/g, '$1时').replace(/(\d+)\s+minutes?/g, '$1分'); return { - name: '轻量云 4核4G', role: '工具/辅助', hostname, - cpu: { model: 'Intel Xeon (Lighthouse)', cores, usagePercent: cpuUsage }, - memory: { total: memParts[0] || '-', used: memParts[1] || '-', free: memParts[2] || '-', percent: memPercent }, - disks, uptime: up || '-', processes, - network: { publicIp: '81.70.187.179', privateIp, domains: ['longde.cloud', 'git.longde.cloud'] }, + name: '轻量云 4核4G', role: '工具/辅助', hostname: hostname || 'remote', + cpu: { model: 'Intel Xeon (Lighthouse)', cores: cpuCores, usagePercent: cpuPct }, + memory: { total: memParts[0] || '-', used: memParts[1] || '-', free: memParts[2] || '-', percent: memPct }, + disks, uptime: up || '-', processes: procs, + network: { publicIp: '81.70.187.179', privateIp: privIp || '10.2.0.7', domains: ['longde.cloud', 'git.longde.cloud'] }, }; } catch (err: any) { this.logger.warn('Remote metrics failed: ' + err.message); @@ -159,8 +146,6 @@ echo "==PROCS==" && ps auxww --sort=-%mem --no-headers 2>/dev/null | head -8`; async getAllMetrics() { const [local, remote] = await Promise.all([this.getLocalMetrics(), this.getRemoteMetrics()]); - const servers = [local]; - if (remote) servers.push(remote); - return { servers }; + return { servers: [local, ...(remote ? [remote] : [])] }; } }