fix: clean rewrite remote metrics with individual SSH calls
All checks were successful
Deploy API Server / build-and-deploy (push) Successful in 36s

This commit is contained in:
WangDL 2026-05-22 14:04:14 +08:00
parent 27dfc1c028
commit 3079b2a18e

View File

@ -20,7 +20,7 @@ export interface ServerInfo {
const SSH_KEY_PATH = process.env.SSH_KEY_PATH || '/home/ubuntu/.ssh/wangdl.pem'; const SSH_KEY_PATH = process.env.SSH_KEY_PATH || '/home/ubuntu/.ssh/wangdl.pem';
const REMOTE_HOST = '10.2.0.7'; const REMOTE_HOST = '10.2.0.7';
const PROCESS_ALIASES: Record<string, { name: string; desc: string }> = { const ALIASES: Record<string, { name: string; desc: string }> = {
'mysqld': { name: 'MySQL 8.0', desc: '业务数据库' }, 'mysqld': { name: 'MySQL 8.0', desc: '业务数据库' },
'redis-server': { name: 'Redis 7', desc: '缓存/队列' }, 'redis-server': { name: 'Redis 7', desc: '缓存/队列' },
'qdrant': { name: 'Qdrant', desc: '向量索引库' }, 'qdrant': { name: 'Qdrant', desc: '向量索引库' },
@ -39,117 +39,104 @@ const PROCESS_ALIASES: Record<string, { name: string; desc: string }> = {
'barad_agent': { name: '云监控上报', desc: '指标采集' }, 'barad_agent': { name: '云监控上报', desc: '指标采集' },
}; };
function friendlyProcess(cmd: string): { name: string; desc: string } { function friendly(cmd: string): { name: string; desc: string } {
for (const [pattern, info] of Object.entries(PROCESS_ALIASES)) for (const [p, info] of Object.entries(ALIASES))
if (cmd.includes(pattern)) return info; if (cmd.includes(p)) return info;
const short = cmd.split('/').pop()?.slice(0, 20) || cmd.slice(0, 20); const s = cmd.split('/').pop()?.slice(0, 20) || cmd.slice(0, 18);
return { name: short, desc: '' }; return { name: s, desc: '' };
} }
// Parse "ps auxww" output: USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND...
function parsePsLine(line: string): ProcessInfo { function parsePsLine(line: string): ProcessInfo {
const parts = line.trim().split(/\s+/); const p = line.trim().split(/\s+/);
// columns: 0=USER 1=PID 2=%CPU 3=%MEM 4=VSZ 5=RSS 6=TTY 7=STAT 8=START 9=TIME 10...=COMMAND const pid = parseInt(p[1]) || 0; // col 1 = PID
const pid = parseInt(parts[1]) || 0; const cpu = (p[2] || '0') + '%';
const cpu = (parts[2] || '0') + '%'; const mem = (p[3] || '0') + '%';
const mem = (parts[3] || '0') + '%'; const cmd = p.slice(10).join(' ');
const cmd = parts.slice(10).join(' '); const info = friendly(cmd);
const info = friendlyProcess(cmd);
return { pid, cpu, mem, name: info.name, desc: info.desc, command: cmd.slice(0, 80) }; return { pid, cpu, mem, name: info.name, desc: info.desc, command: cmd.slice(0, 80) };
} }
function chineseUptime(sec: number): string {
const d = Math.floor(sec / 86400), h = Math.floor((sec % 86400) / 3600), m = Math.floor((sec % 3600) / 60);
return `${d}${h}${m}`;
}
@Injectable() @Injectable()
export class AdminServersService { export class AdminServersService {
private readonly logger = new Logger(AdminServersService.name); private readonly logger = new Logger(AdminServersService.name);
async getLocalMetrics(): Promise<ServerInfo> { async getLocalMetrics(): Promise<ServerInfo> {
const cpus = os.cpus(); const cpus = os.cpus();
const totalMem = os.totalmem(); const total = os.totalmem(), free = os.freemem(), used = total - free;
const freeMem = os.freemem(); const cpuPct = Math.min(100, Math.round((os.loadavg()[0] / cpus.length) * 100));
const usedMem = totalMem - freeMem;
const cpuUsage = Math.min(100, Math.round((os.loadavg()[0] / cpus.length) * 100));
// Disks const disks: DiskInfo[] = [];
const diskResults: DiskInfo[] = [];
for (const m of ['/', '/data']) { for (const m of ['/', '/data']) {
try { try {
const { stdout } = await execAsync(`df -h ${m} | tail -1 | awk '{print $2","$3","$4","$5}'`); const { stdout } = await execAsync(`df -h ${m} | tail -1 | awk '{printf "%s,%s,%s,%d",$2,$3,$4,int($5)}'`);
const parts = stdout.trim().split(','); const parts = stdout.trim().split(',');
if (parts.length >= 3) diskResults.push({ mount: m, total: parts[0], used: parts[1], free: parts[2], percent: parseInt(parts[4]) || parseInt(parts[3]) || 0 }); if (parts.length >= 3) disks.push({ mount: m, total: parts[0], used: parts[1], free: parts[2], percent: parseInt(parts[3]) || 0 });
} catch { diskResults.push({ mount: m, total: '-', used: '-', free: '-', percent: 0 }) } else disks.push({ mount: m, total: '-', used: '-', free: '-', percent: 0 });
} catch { disks.push({ mount: m, total: '-', used: '-', free: '-', percent: 0 }) }
} }
// Processes — use auxww for full command line let procs: ProcessInfo[] = [];
let processes: ProcessInfo[] = [];
try { try {
const { stdout } = await execAsync("ps auxww --sort=-%mem --no-headers 2>/dev/null | head -10"); const { stdout } = await execAsync("ps auxww --sort=-%mem --no-headers | head -10");
processes = stdout.trim().split('\n').filter(Boolean).map(parsePsLine); procs = stdout.trim().split('\n').filter(l => l).map(parsePsLine);
} catch {} } catch {}
const nets = os.networkInterfaces(); const nets = os.networkInterfaces();
const privateIp = Object.values(nets).flat().find(n => n?.family === 'IPv4' && !n.internal)?.address || '172.21.0.4'; const privIp = Object.values(nets).flat().find(n => n?.family === 'IPv4' && !n.internal)?.address || '172.21.0.4';
const u = os.uptime();
const d = Math.floor(u / 86400), h = Math.floor((u % 86400) / 3600), m = Math.floor((u % 3600) / 60);
return { return {
name: '蜂驰云 8核32G', role: '生产核心', hostname: os.hostname(), name: '蜂驰云 8核32G', role: '生产核心', hostname: os.hostname(),
cpu: { model: cpus[0]?.model || '', cores: cpus.length, usagePercent: cpuUsage }, cpu: { model: cpus[0]?.model || '', cores: cpus.length, usagePercent: cpuPct },
memory: { total: (totalMem / 1e9).toFixed(1) + 'G', used: (usedMem / 1e9).toFixed(1) + 'G', free: (freeMem / 1e9).toFixed(1) + 'G', percent: Math.round((usedMem / totalMem) * 100) }, memory: { total: (total / 1e9).toFixed(1) + 'G', used: (used / 1e9).toFixed(1) + 'G', free: (free / 1e9).toFixed(1) + 'G', percent: Math.round((used / total) * 100) },
disks: diskResults, uptime: `${d}${h}${m}`, processes, disks, uptime: chineseUptime(os.uptime()), processes: procs,
network: { publicIp: '120.53.227.155', privateIp, domains: ['api.longde.cloud', 'admin.longde.cloud'] }, network: { publicIp: '120.53.227.155', privateIp: privIp, domains: ['api.longde.cloud', 'admin.longde.cloud'] },
}; };
} }
async getRemoteMetrics(): Promise<ServerInfo | null> { async getRemoteMetrics(): Promise<ServerInfo | null> {
const run = (cmd: string) =>
execAsync(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} '${cmd.replace(/'/g, "'\\''")}'`, { timeout: 8000 })
.then(r => r.stdout.trim()).catch(() => '');
try { try {
// Single SSH to collect all metrics const [hostname, privIp, load, cores, memStr, diskRoot, diskData, uptimeRaw, procRaw] = await Promise.all([
const script = `echo "==HOST==" && hostname run('hostname'),
echo "==IP==" && hostname -I | awk '{print \$1}' run("hostname -I | awk '{print $1}'"),
echo "==LOAD==" && cat /proc/loadavg | awk '{print \$1}' run("cat /proc/loadavg | awk '{print $1}'"),
echo "==CORES==" && cat /proc/cpuinfo | grep processor | wc -l run('cat /proc/cpuinfo | grep processor | wc -l'),
echo "==MEM==" && free -m | awk '/Mem/{printf "%.1fG,%.1fG,%.1fG,%d\n",\$2/1024,\$3/1024,\$4/1024,int(\$3/\$2*100)}' run("free -m | awk '/Mem/{printf \"%.1fG,%.1fG,%.1fG,%d\",$2/1024,$3/1024,$4/1024,int($3/$2*100)}'"),
echo "==DISKROOT==" && df -h / | awk 'NR==2{printf "%s,%s,%s,%d\n",\$2,\$3,\$4,int(\$5)}' run("df -h / | awk 'NR==2{printf \"%s,%s,%s,%d\",$2,$3,$4,int($5)}'"),
echo "==DISKDATA==" && df -h /data 2>/dev/null | awk 'NR==2{printf "%s,%s,%s,%d\n",\$2,\$3,\$4,int(\$5)}' run("df -h /data 2>/dev/null | awk 'NR==2{printf \"%s,%s,%s,%d\",$2,$3,$4,int($5)}'"),
echo "==UPTIME==" && uptime -p | sed 's/up //' run("uptime -p | sed 's/up //'"),
echo "==PROCS==" && ps auxww --sort=-%mem --no-headers 2>/dev/null | head -8`; run("ps auxww --sort=-%mem --no-headers 2>/dev/null | head -8"),
]);
const { stdout } = await execAsync( const load1 = parseFloat(load) || 0;
`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=8 -i ${SSH_KEY_PATH} ubuntu@${REMOTE_HOST} '${script}'`, const cpuCores = parseInt(cores) || 4;
{ timeout: 12000 }, const cpuPct = Math.min(100, Math.round((load1 / Math.max(cpuCores, 1)) * 100));
); const memParts = memStr.split(',');
const memPct = parseInt(memParts[3]) || 0;
const drParts = diskRoot.split(',');
const ddParts = diskData.split(',');
const disks: DiskInfo[] = [{ mount: '/', total: drParts[0] || '-', used: drParts[1] || '-', free: drParts[2] || '-', percent: parseInt(drParts[3]) || 0 }];
if (ddParts.length >= 3) disks.push({ mount: '/data', total: ddParts[0], used: ddParts[1], free: ddParts[2], percent: parseInt(ddParts[3]) || 0 });
// Parse sections const procs: ProcessInfo[] = procRaw.split('\n').filter(l => /^\S+\s+\d+\s/.test(l)).map(parsePsLine);
const lines = stdout.split('\n');
const get = (tag: string) => lines.find(l => l.startsWith(`==${tag}==`))?.replace(`==${tag}==`, '').trim() || '';
const hostname = get('HOST') || 'remote'; let up = uptimeRaw || '';
const privateIp = get('IP') || '10.2.0.7';
const load1 = parseFloat(get('LOAD')) || 0;
const cores = parseInt(get('CORES')) || 4;
const cpuUsage = Math.min(100, Math.round((load1 / Math.max(cores, 1)) * 100));
const memParts = get('MEM').split(',');
const memPercent = parseInt(memParts[3]) || 0;
const diskRootParts = get('DISKROOT').split(',');
const diskDataParts = get('DISKDATA').split(',');
const disks: DiskInfo[] = [];
if (diskRootParts.length >= 3) disks.push({ mount: '/', total: diskRootParts[0], used: diskRootParts[1], free: diskRootParts[2], percent: parseInt(diskRootParts[3]) || 0 });
else disks.push({ mount: '/', total: '-', used: '-', free: '-', percent: 0 });
if (diskDataParts.length >= 3) disks.push({ mount: '/data', total: diskDataParts[0], used: diskDataParts[1], free: diskDataParts[2], percent: parseInt(diskDataParts[3]) || 0 });
const procLines = lines.filter(l => !l.startsWith('==') && !l.startsWith('USER') && /^\S+\s+\d+\s/.test(l));
const processes: ProcessInfo[] = procLines.map(parsePsLine);
let up = get('UPTIME');
up = up.replace(/(\d+)\s+weeks?,?\s*/g, '$1周').replace(/(\d+)\s+days?,?\s*/g, '$1天').replace(/(\d+)\s+hours?,?\s*/g, '$1时').replace(/(\d+)\s+minutes?/g, '$1分'); up = up.replace(/(\d+)\s+weeks?,?\s*/g, '$1周').replace(/(\d+)\s+days?,?\s*/g, '$1天').replace(/(\d+)\s+hours?,?\s*/g, '$1时').replace(/(\d+)\s+minutes?/g, '$1分');
return { return {
name: '轻量云 4核4G', role: '工具/辅助', hostname, name: '轻量云 4核4G', role: '工具/辅助', hostname: hostname || 'remote',
cpu: { model: 'Intel Xeon (Lighthouse)', cores, usagePercent: cpuUsage }, cpu: { model: 'Intel Xeon (Lighthouse)', cores: cpuCores, usagePercent: cpuPct },
memory: { total: memParts[0] || '-', used: memParts[1] || '-', free: memParts[2] || '-', percent: memPercent }, memory: { total: memParts[0] || '-', used: memParts[1] || '-', free: memParts[2] || '-', percent: memPct },
disks, uptime: up || '-', processes, disks, uptime: up || '-', processes: procs,
network: { publicIp: '81.70.187.179', privateIp, domains: ['longde.cloud', 'git.longde.cloud'] }, network: { publicIp: '81.70.187.179', privateIp: privIp || '10.2.0.7', domains: ['longde.cloud', 'git.longde.cloud'] },
}; };
} catch (err: any) { } catch (err: any) {
this.logger.warn('Remote metrics failed: ' + err.message); this.logger.warn('Remote metrics failed: ' + err.message);
@ -159,8 +146,6 @@ echo "==PROCS==" && ps auxww --sort=-%mem --no-headers 2>/dev/null | head -8`;
async getAllMetrics() { async getAllMetrics() {
const [local, remote] = await Promise.all([this.getLocalMetrics(), this.getRemoteMetrics()]); const [local, remote] = await Promise.all([this.getLocalMetrics(), this.getRemoteMetrics()]);
const servers = [local]; return { servers: [local, ...(remote ? [remote] : [])] };
if (remote) servers.push(remote);
return { servers };
} }
} }