Add live VM CPU and memory usage via virsh domstats
Parse domstats --cpu-total --balloon for all VMs in a single call. Track CPU time deltas between samples to compute per-VM CPU %. Compute guest memory usage from balloon stats (available - unused). Split VM caching: base info (dominfo) 30s TTL, live stats 5s TTL. UI shows CPU % column (color-coded) and memory used/total with bars. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
156
app.py
156
app.py
@@ -26,10 +26,19 @@ IS_DASHBOARD = bool(servers)
|
|||||||
cpu_snapshot = {"cores": [], "overall": 0.0}
|
cpu_snapshot = {"cores": [], "overall": 0.0}
|
||||||
cpu_lock = threading.Lock()
|
cpu_lock = threading.Lock()
|
||||||
|
|
||||||
# VM cache
|
# VM base info cache (dominfo — slow, 30s TTL)
|
||||||
_vm_cache = {"data": [], "ts": 0}
|
_vm_base_cache = {"data": [], "ts": 0}
|
||||||
_vm_lock = threading.Lock()
|
_vm_base_lock = threading.Lock()
|
||||||
VM_CACHE_TTL = 10
|
VM_BASE_TTL = 30
|
||||||
|
|
||||||
|
# VM live stats cache (domstats — fast, 5s TTL)
|
||||||
|
_vm_live_cache = {"data": {}, "ts": 0}
|
||||||
|
_vm_live_lock = threading.Lock()
|
||||||
|
VM_LIVE_TTL = 5
|
||||||
|
|
||||||
|
# CPU delta tracking for VM CPU %
|
||||||
|
_prev_domstats = {"by_name": {}, "ts": 0}
|
||||||
|
_prev_domstats_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
def parse_proc_stat():
|
def parse_proc_stat():
|
||||||
@@ -133,12 +142,106 @@ def get_uptime():
|
|||||||
return {"seconds": round(secs), "human": " ".join(parts)}
|
return {"seconds": round(secs), "human": " ".join(parts)}
|
||||||
|
|
||||||
|
|
||||||
def get_vms():
|
def parse_domstats():
|
||||||
"""Get VM list via sudo virsh. Returns list of dicts. Cached for VM_CACHE_TTL seconds."""
|
"""Run virsh domstats once for all VMs. Returns {name: {cpu_time, balloon_available, balloon_unused, balloon_rss}}."""
|
||||||
with _vm_lock:
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["sudo", "virsh", "domstats", "--cpu-total", "--balloon"],
|
||||||
|
capture_output=True, text=True, timeout=10
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
return {}
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
stats = {}
|
||||||
|
current_name = None
|
||||||
|
current = {}
|
||||||
|
for line in result.stdout.split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith("Domain:"):
|
||||||
|
if current_name and current:
|
||||||
|
stats[current_name] = current
|
||||||
|
current_name = line.split("'")[1] if "'" in line else None
|
||||||
|
current = {}
|
||||||
|
elif "=" in line and current_name:
|
||||||
|
key, val = line.split("=", 1)
|
||||||
|
key = key.strip()
|
||||||
|
val = val.strip()
|
||||||
|
if key == "cpu.time":
|
||||||
|
current["cpu_time"] = int(val)
|
||||||
|
elif key == "balloon.available":
|
||||||
|
current["balloon_available"] = int(val)
|
||||||
|
elif key == "balloon.unused":
|
||||||
|
current["balloon_unused"] = int(val)
|
||||||
|
elif key == "balloon.rss":
|
||||||
|
current["balloon_rss"] = int(val)
|
||||||
|
if current_name and current:
|
||||||
|
stats[current_name] = current
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def get_vm_live_stats():
|
||||||
|
"""Get live VM stats (domstats) with CPU delta tracking. Cached for VM_LIVE_TTL seconds."""
|
||||||
|
with _vm_live_lock:
|
||||||
now = time.time()
|
now = time.time()
|
||||||
if now - _vm_cache["ts"] < VM_CACHE_TTL:
|
if now - _vm_live_cache["ts"] < VM_LIVE_TTL:
|
||||||
return _vm_cache["data"]
|
return _vm_live_cache["data"]
|
||||||
|
|
||||||
|
raw = parse_domstats()
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
with _prev_domstats_lock:
|
||||||
|
prev_cpu = _prev_domstats["by_name"]
|
||||||
|
prev_ts = _prev_domstats["ts"]
|
||||||
|
dt = now - prev_ts if prev_ts > 0 else 0
|
||||||
|
|
||||||
|
live = {}
|
||||||
|
for name, s in raw.items():
|
||||||
|
cpu_pct = 0.0
|
||||||
|
if dt > 0 and name in prev_cpu:
|
||||||
|
delta_ns = s.get("cpu_time", 0) - prev_cpu[name]
|
||||||
|
if delta_ns > 0:
|
||||||
|
# cpu.time is total across all vcpus, so divide by wall time only
|
||||||
|
# This gives % of one CPU core; cap at 100 * vcpus but we'll
|
||||||
|
# normalize per-VM in get_vms() using vcpu count
|
||||||
|
cpu_pct = delta_ns / (dt * 1e9) * 100
|
||||||
|
|
||||||
|
balloon_avail = s.get("balloon_available", 0)
|
||||||
|
balloon_unused = s.get("balloon_unused", 0)
|
||||||
|
balloon_rss = s.get("balloon_rss", 0)
|
||||||
|
|
||||||
|
if balloon_avail > 0:
|
||||||
|
mem_total = balloon_avail // 1024 # KiB to MB
|
||||||
|
mem_used = (balloon_avail - balloon_unused) // 1024
|
||||||
|
elif balloon_rss > 0:
|
||||||
|
mem_total = 0
|
||||||
|
mem_used = balloon_rss // 1024
|
||||||
|
else:
|
||||||
|
mem_total = 0
|
||||||
|
mem_used = 0
|
||||||
|
|
||||||
|
live[name] = {
|
||||||
|
"raw_cpu_pct": round(cpu_pct, 2),
|
||||||
|
"memory_used_mb": mem_used,
|
||||||
|
"memory_total_mb": mem_total,
|
||||||
|
}
|
||||||
|
|
||||||
|
_prev_domstats["by_name"] = {n: s.get("cpu_time", 0) for n, s in raw.items()}
|
||||||
|
_prev_domstats["ts"] = now
|
||||||
|
|
||||||
|
with _vm_live_lock:
|
||||||
|
_vm_live_cache["data"] = live
|
||||||
|
_vm_live_cache["ts"] = now
|
||||||
|
return live
|
||||||
|
|
||||||
|
|
||||||
|
def get_vm_base_info():
|
||||||
|
"""Get base VM info (dominfo). Cached for VM_BASE_TTL seconds."""
|
||||||
|
with _vm_base_lock:
|
||||||
|
now = time.time()
|
||||||
|
if now - _vm_base_cache["ts"] < VM_BASE_TTL:
|
||||||
|
return _vm_base_cache["data"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
@@ -169,24 +272,48 @@ def get_vms():
|
|||||||
elif key == "CPU(s)":
|
elif key == "CPU(s)":
|
||||||
vm["vcpus"] = int(val)
|
vm["vcpus"] = int(val)
|
||||||
elif key == "Max memory":
|
elif key == "Max memory":
|
||||||
# virsh reports in KiB
|
|
||||||
vm["memory_mb"] = int(val.split()[0]) // 1024
|
vm["memory_mb"] = int(val.split()[0]) // 1024
|
||||||
elif key == "Autostart":
|
elif key == "Autostart":
|
||||||
vm["autostart"] = val.lower() in ("enable", "enabled")
|
vm["autostart"] = val.lower() in ("enable", "enabled")
|
||||||
vms.append(vm)
|
vms.append(vm)
|
||||||
|
|
||||||
with _vm_lock:
|
with _vm_base_lock:
|
||||||
_vm_cache["data"] = vms
|
_vm_base_cache["data"] = vms
|
||||||
_vm_cache["ts"] = time.time()
|
_vm_base_cache["ts"] = time.time()
|
||||||
return vms
|
return vms
|
||||||
except Exception:
|
except Exception:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_vms():
|
||||||
|
"""Get VM list with live CPU % and memory usage merged in."""
|
||||||
|
base = get_vm_base_info()
|
||||||
|
live = get_vm_live_stats()
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for vm in base:
|
||||||
|
vm = vm.copy()
|
||||||
|
stats = live.get(vm["name"])
|
||||||
|
if stats and vm["state"] == "running":
|
||||||
|
# Normalize CPU: raw_cpu_pct is % of one core, divide by vcpus for per-VM %
|
||||||
|
vcpus = vm["vcpus"] or 1
|
||||||
|
cpu_pct = stats["raw_cpu_pct"] / vcpus
|
||||||
|
cpu_pct = round(max(0, min(100, cpu_pct)), 1)
|
||||||
|
vm["cpu_percent"] = cpu_pct
|
||||||
|
vm["memory_used_mb"] = stats["memory_used_mb"]
|
||||||
|
vm["memory_total_mb"] = stats["memory_total_mb"] or vm["memory_mb"]
|
||||||
|
else:
|
||||||
|
vm["cpu_percent"] = 0.0
|
||||||
|
vm["memory_used_mb"] = 0
|
||||||
|
vm["memory_total_mb"] = vm["memory_mb"] if vm["state"] == "running" else 0
|
||||||
|
result.append(vm)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def get_local_stats():
|
def get_local_stats():
|
||||||
with cpu_lock:
|
with cpu_lock:
|
||||||
snap = cpu_snapshot.copy()
|
snap = cpu_snapshot.copy()
|
||||||
stats = {
|
return {
|
||||||
"cores": snap["cores"],
|
"cores": snap["cores"],
|
||||||
"overall_cpu": snap["overall"],
|
"overall_cpu": snap["overall"],
|
||||||
"memory": get_memory(),
|
"memory": get_memory(),
|
||||||
@@ -195,7 +322,6 @@ def get_local_stats():
|
|||||||
"num_cores": len(snap["cores"]),
|
"num_cores": len(snap["cores"]),
|
||||||
"vms": get_vms(),
|
"vms": get_vms(),
|
||||||
}
|
}
|
||||||
return stats
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_remote_stats(url, timeout=3):
|
def fetch_remote_stats(url, timeout=3):
|
||||||
|
|||||||
@@ -153,6 +153,13 @@
|
|||||||
.vm-state.shut-off { background: #484f58; color: #c9d1d9; }
|
.vm-state.shut-off { background: #484f58; color: #c9d1d9; }
|
||||||
.vm-state.other { background: #d29922; color: #fff; }
|
.vm-state.other { background: #d29922; color: #fff; }
|
||||||
.vm-none { color: #484f58; font-style: italic; padding: 16px; }
|
.vm-none { color: #484f58; font-style: italic; padding: 16px; }
|
||||||
|
.vm-mem-bar {
|
||||||
|
display: inline-block; width: 60px; height: 8px; background: #21262d;
|
||||||
|
border-radius: 4px; overflow: hidden; vertical-align: middle; margin-left: 6px;
|
||||||
|
}
|
||||||
|
.vm-mem-fill { height: 100%; border-radius: 4px; transition: width 0.3s; }
|
||||||
|
.vm-table td.vm-cpu { font-weight: bold; }
|
||||||
|
.vm-table td.vm-mem { white-space: nowrap; }
|
||||||
|
|
||||||
@media (max-width: 700px) {
|
@media (max-width: 700px) {
|
||||||
body { padding: 12px; }
|
body { padding: 12px; }
|
||||||
@@ -261,7 +268,15 @@ function renderDashboard(servers) {
|
|||||||
'<div class="card-stat-value">' + srv.uptime.human + '</div>' +
|
'<div class="card-stat-value">' + srv.uptime.human + '</div>' +
|
||||||
'</div>' +
|
'</div>' +
|
||||||
'</div>' +
|
'</div>' +
|
||||||
(vmCount > 0 ? '<div style="margin-top:10px;font-size:12px;color:#8b949e">VMs: ' + runningVms + ' running / ' + vmCount + ' total</div>' : '') +
|
(vmCount > 0 ? (function() {
|
||||||
|
const running = (srv.vms || []).filter(v => v.state === 'running');
|
||||||
|
const vmMemUsed = running.reduce((s, v) => s + (v.memory_used_mb || 0), 0);
|
||||||
|
const vmMemTotal = running.reduce((s, v) => s + (v.memory_total_mb || v.memory_mb || 0), 0);
|
||||||
|
return '<div style="margin-top:10px;font-size:12px;color:#8b949e">' +
|
||||||
|
'VMs: ' + runningVms + ' running / ' + vmCount + ' total' +
|
||||||
|
(vmMemTotal > 0 ? ' · Mem: ' + formatMB(vmMemUsed) + ' / ' + formatMB(vmMemTotal) : '') +
|
||||||
|
'</div>';
|
||||||
|
})() : '') +
|
||||||
'</div>';
|
'</div>';
|
||||||
}
|
}
|
||||||
html += '</div>';
|
html += '</div>';
|
||||||
@@ -338,14 +353,24 @@ function renderDetail(srv) {
|
|||||||
return a.name.localeCompare(b.name);
|
return a.name.localeCompare(b.name);
|
||||||
});
|
});
|
||||||
html += '<table class="vm-table"><thead><tr>' +
|
html += '<table class="vm-table"><thead><tr>' +
|
||||||
'<th>Name</th><th>State</th><th>vCPUs</th><th>RAM</th><th>Autostart</th>' +
|
'<th>Name</th><th>State</th><th>CPU</th><th>Memory</th><th>vCPUs</th><th>Autostart</th>' +
|
||||||
'</tr></thead><tbody>';
|
'</tr></thead><tbody>';
|
||||||
for (const vm of sorted) {
|
for (const vm of sorted) {
|
||||||
|
const isRunning = vm.state === 'running';
|
||||||
|
const cpuPct = vm.cpu_percent || 0;
|
||||||
|
const memUsed = vm.memory_used_mb || 0;
|
||||||
|
const memTotal = vm.memory_total_mb || vm.memory_mb || 0;
|
||||||
|
const memPct = memTotal > 0 ? (memUsed / memTotal * 100) : 0;
|
||||||
html += '<tr>' +
|
html += '<tr>' +
|
||||||
'<td>' + vm.name + '</td>' +
|
'<td>' + vm.name + '</td>' +
|
||||||
'<td><span class="vm-state ' + vmStateClass(vm.state) + '">' + vm.state + '</span></td>' +
|
'<td><span class="vm-state ' + vmStateClass(vm.state) + '">' + vm.state + '</span></td>' +
|
||||||
|
'<td class="vm-cpu" style="color:' + (isRunning ? usageColor(cpuPct) : '#484f58') + '">' + (isRunning ? cpuPct + '%' : '—') + '</td>' +
|
||||||
|
'<td class="vm-mem">' + (isRunning && memTotal > 0 ?
|
||||||
|
formatMB(memUsed) + ' / ' + formatMB(memTotal) +
|
||||||
|
'<div class="vm-mem-bar"><div class="vm-mem-fill" style="width:' + memPct + '%;background:' + usageColor(memPct) + '"></div></div>'
|
||||||
|
: (isRunning ? formatMB(vm.memory_mb) : '—')) +
|
||||||
|
'</td>' +
|
||||||
'<td>' + vm.vcpus + '</td>' +
|
'<td>' + vm.vcpus + '</td>' +
|
||||||
'<td>' + formatMB(vm.memory_mb) + '</td>' +
|
|
||||||
'<td>' + (vm.autostart ? 'yes' : 'no') + '</td>' +
|
'<td>' + (vm.autostart ? 'yes' : 'no') + '</td>' +
|
||||||
'</tr>';
|
'</tr>';
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user