Add live VM CPU and memory usage via virsh domstats

Parse domstats --cpu-total --balloon for all VMs in a single call.
Track CPU time deltas between samples to compute per-VM CPU %.
Compute guest memory usage from balloon stats (available - unused).
Split VM caching: base info (dominfo) 30s TTL, live stats 5s TTL.
UI shows CPU % column (color-coded) and memory used/total with bars.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-24 20:26:01 -06:00
parent fb79f81527
commit 7d8dbc3305
2 changed files with 169 additions and 18 deletions

156
app.py
View File

@@ -26,10 +26,19 @@ IS_DASHBOARD = bool(servers)
cpu_snapshot = {"cores": [], "overall": 0.0}
cpu_lock = threading.Lock()
# VM cache
_vm_cache = {"data": [], "ts": 0}
_vm_lock = threading.Lock()
VM_CACHE_TTL = 10
# VM base info cache (dominfo — slow, 30s TTL)
_vm_base_cache = {"data": [], "ts": 0}
_vm_base_lock = threading.Lock()
VM_BASE_TTL = 30
# VM live stats cache (domstats — fast, 5s TTL)
_vm_live_cache = {"data": {}, "ts": 0}
_vm_live_lock = threading.Lock()
VM_LIVE_TTL = 5
# CPU delta tracking for VM CPU %
_prev_domstats = {"by_name": {}, "ts": 0}
_prev_domstats_lock = threading.Lock()
def parse_proc_stat():
@@ -133,12 +142,106 @@ def get_uptime():
return {"seconds": round(secs), "human": " ".join(parts)}
def get_vms():
"""Get VM list via sudo virsh. Returns list of dicts. Cached for VM_CACHE_TTL seconds."""
with _vm_lock:
def parse_domstats():
"""Run virsh domstats once for all VMs. Returns {name: {cpu_time, balloon_available, balloon_unused, balloon_rss}}."""
try:
result = subprocess.run(
["sudo", "virsh", "domstats", "--cpu-total", "--balloon"],
capture_output=True, text=True, timeout=10
)
if result.returncode != 0:
return {}
except Exception:
return {}
stats = {}
current_name = None
current = {}
for line in result.stdout.split("\n"):
line = line.strip()
if line.startswith("Domain:"):
if current_name and current:
stats[current_name] = current
current_name = line.split("'")[1] if "'" in line else None
current = {}
elif "=" in line and current_name:
key, val = line.split("=", 1)
key = key.strip()
val = val.strip()
if key == "cpu.time":
current["cpu_time"] = int(val)
elif key == "balloon.available":
current["balloon_available"] = int(val)
elif key == "balloon.unused":
current["balloon_unused"] = int(val)
elif key == "balloon.rss":
current["balloon_rss"] = int(val)
if current_name and current:
stats[current_name] = current
return stats
def get_vm_live_stats():
"""Get live VM stats (domstats) with CPU delta tracking. Cached for VM_LIVE_TTL seconds."""
with _vm_live_lock:
now = time.time()
if now - _vm_cache["ts"] < VM_CACHE_TTL:
return _vm_cache["data"]
if now - _vm_live_cache["ts"] < VM_LIVE_TTL:
return _vm_live_cache["data"]
raw = parse_domstats()
now = time.time()
with _prev_domstats_lock:
prev_cpu = _prev_domstats["by_name"]
prev_ts = _prev_domstats["ts"]
dt = now - prev_ts if prev_ts > 0 else 0
live = {}
for name, s in raw.items():
cpu_pct = 0.0
if dt > 0 and name in prev_cpu:
delta_ns = s.get("cpu_time", 0) - prev_cpu[name]
if delta_ns > 0:
# cpu.time is total across all vcpus, so divide by wall time only
# This gives % of one CPU core; cap at 100 * vcpus but we'll
# normalize per-VM in get_vms() using vcpu count
cpu_pct = delta_ns / (dt * 1e9) * 100
balloon_avail = s.get("balloon_available", 0)
balloon_unused = s.get("balloon_unused", 0)
balloon_rss = s.get("balloon_rss", 0)
if balloon_avail > 0:
mem_total = balloon_avail // 1024 # KiB to MB
mem_used = (balloon_avail - balloon_unused) // 1024
elif balloon_rss > 0:
mem_total = 0
mem_used = balloon_rss // 1024
else:
mem_total = 0
mem_used = 0
live[name] = {
"raw_cpu_pct": round(cpu_pct, 2),
"memory_used_mb": mem_used,
"memory_total_mb": mem_total,
}
_prev_domstats["by_name"] = {n: s.get("cpu_time", 0) for n, s in raw.items()}
_prev_domstats["ts"] = now
with _vm_live_lock:
_vm_live_cache["data"] = live
_vm_live_cache["ts"] = now
return live
def get_vm_base_info():
"""Get base VM info (dominfo). Cached for VM_BASE_TTL seconds."""
with _vm_base_lock:
now = time.time()
if now - _vm_base_cache["ts"] < VM_BASE_TTL:
return _vm_base_cache["data"]
try:
result = subprocess.run(
@@ -169,24 +272,48 @@ def get_vms():
elif key == "CPU(s)":
vm["vcpus"] = int(val)
elif key == "Max memory":
# virsh reports in KiB
vm["memory_mb"] = int(val.split()[0]) // 1024
elif key == "Autostart":
vm["autostart"] = val.lower() in ("enable", "enabled")
vms.append(vm)
with _vm_lock:
_vm_cache["data"] = vms
_vm_cache["ts"] = time.time()
with _vm_base_lock:
_vm_base_cache["data"] = vms
_vm_base_cache["ts"] = time.time()
return vms
except Exception:
return []
def get_vms():
"""Get VM list with live CPU % and memory usage merged in."""
base = get_vm_base_info()
live = get_vm_live_stats()
result = []
for vm in base:
vm = vm.copy()
stats = live.get(vm["name"])
if stats and vm["state"] == "running":
# Normalize CPU: raw_cpu_pct is % of one core, divide by vcpus for per-VM %
vcpus = vm["vcpus"] or 1
cpu_pct = stats["raw_cpu_pct"] / vcpus
cpu_pct = round(max(0, min(100, cpu_pct)), 1)
vm["cpu_percent"] = cpu_pct
vm["memory_used_mb"] = stats["memory_used_mb"]
vm["memory_total_mb"] = stats["memory_total_mb"] or vm["memory_mb"]
else:
vm["cpu_percent"] = 0.0
vm["memory_used_mb"] = 0
vm["memory_total_mb"] = vm["memory_mb"] if vm["state"] == "running" else 0
result.append(vm)
return result
def get_local_stats():
with cpu_lock:
snap = cpu_snapshot.copy()
stats = {
return {
"cores": snap["cores"],
"overall_cpu": snap["overall"],
"memory": get_memory(),
@@ -195,7 +322,6 @@ def get_local_stats():
"num_cores": len(snap["cores"]),
"vms": get_vms(),
}
return stats
def fetch_remote_stats(url, timeout=3):