Add dashboard summary view, server detail routing, and VM listing
Rearchitect to pure aggregator dashboard (no /proc) with bare agents on each host. Agents report VM data via sudo virsh (cached 10s). UI rewritten with hash-based routing: summary card grid and per-server detail view with CPU grid, memory, load, uptime, and VM table. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
97
app.py
97
app.py
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
import urllib.request
|
||||
@@ -10,7 +11,7 @@ app = Flask(__name__)
|
||||
|
||||
PROC = os.environ.get("SYSMON_PROC", "/host/proc")
|
||||
|
||||
# Server configuration: "name:url,name:url" where "local" means read from /proc
|
||||
# Server configuration: "name:url,name:url"
|
||||
SYSMON_SERVERS = os.environ.get("SYSMON_SERVERS", "")
|
||||
servers = []
|
||||
if SYSMON_SERVERS:
|
||||
@@ -18,13 +19,20 @@ if SYSMON_SERVERS:
|
||||
name, url = entry.strip().split(":", 1)
|
||||
servers.append({"name": name.strip(), "url": url.strip()})
|
||||
|
||||
# Dashboard-only mode: no local /proc reading when SYSMON_SERVERS is set
|
||||
IS_DASHBOARD = bool(servers)
|
||||
|
||||
# Shared state for per-core CPU usage
|
||||
cpu_snapshot = {"cores": [], "overall": 0.0}
|
||||
cpu_lock = threading.Lock()
|
||||
|
||||
# VM cache
|
||||
_vm_cache = {"data": [], "ts": 0}
|
||||
_vm_lock = threading.Lock()
|
||||
VM_CACHE_TTL = 10
|
||||
|
||||
|
||||
def parse_proc_stat():
|
||||
"""Parse /proc/stat and return per-cpu jiffies as list of (id, user+nice+system, total)."""
|
||||
cores = []
|
||||
overall = None
|
||||
with open(f"{PROC}/stat") as f:
|
||||
@@ -45,7 +53,6 @@ def parse_proc_stat():
|
||||
|
||||
|
||||
def cpu_sampler():
|
||||
"""Background thread: sample /proc/stat every 1s, compute deltas."""
|
||||
global cpu_snapshot
|
||||
prev_cores, prev_overall = parse_proc_stat()
|
||||
prev_map = {cid: (busy, total) for cid, busy, total in prev_cores}
|
||||
@@ -79,7 +86,6 @@ def cpu_sampler():
|
||||
|
||||
|
||||
def get_memory():
|
||||
"""Parse /proc/meminfo, return dict with MB values."""
|
||||
info = {}
|
||||
with open(f"{PROC}/meminfo") as f:
|
||||
for line in f:
|
||||
@@ -103,7 +109,6 @@ def get_memory():
|
||||
|
||||
|
||||
def get_load():
|
||||
"""Parse /proc/loadavg."""
|
||||
with open(f"{PROC}/loadavg") as f:
|
||||
parts = f.read().split()
|
||||
return {
|
||||
@@ -114,7 +119,6 @@ def get_load():
|
||||
|
||||
|
||||
def get_uptime():
|
||||
"""Parse /proc/uptime, return human-readable string."""
|
||||
with open(f"{PROC}/uptime") as f:
|
||||
secs = float(f.read().split()[0])
|
||||
days = int(secs // 86400)
|
||||
@@ -129,22 +133,72 @@ def get_uptime():
|
||||
return {"seconds": round(secs), "human": " ".join(parts)}
|
||||
|
||||
|
||||
def get_vms():
|
||||
"""Get VM list via sudo virsh. Returns list of dicts. Cached for VM_CACHE_TTL seconds."""
|
||||
with _vm_lock:
|
||||
now = time.time()
|
||||
if now - _vm_cache["ts"] < VM_CACHE_TTL:
|
||||
return _vm_cache["data"]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["sudo", "virsh", "list", "--all", "--name"],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
|
||||
names = [n.strip() for n in result.stdout.strip().split("\n") if n.strip()]
|
||||
vms = []
|
||||
for name in names:
|
||||
info = subprocess.run(
|
||||
["sudo", "virsh", "dominfo", name],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
if info.returncode != 0:
|
||||
continue
|
||||
vm = {"name": name, "state": "unknown", "vcpus": 0, "memory_mb": 0, "autostart": False}
|
||||
for line in info.stdout.split("\n"):
|
||||
if ":" not in line:
|
||||
continue
|
||||
key, val = line.split(":", 1)
|
||||
key = key.strip()
|
||||
val = val.strip()
|
||||
if key == "State":
|
||||
vm["state"] = val
|
||||
elif key == "CPU(s)":
|
||||
vm["vcpus"] = int(val)
|
||||
elif key == "Max memory":
|
||||
# virsh reports in KiB
|
||||
vm["memory_mb"] = int(val.split()[0]) // 1024
|
||||
elif key == "Autostart":
|
||||
vm["autostart"] = val.lower() in ("enable", "enabled")
|
||||
vms.append(vm)
|
||||
|
||||
with _vm_lock:
|
||||
_vm_cache["data"] = vms
|
||||
_vm_cache["ts"] = time.time()
|
||||
return vms
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def get_local_stats():
|
||||
"""Build stats dict from local /proc."""
|
||||
with cpu_lock:
|
||||
snap = cpu_snapshot.copy()
|
||||
return {
|
||||
stats = {
|
||||
"cores": snap["cores"],
|
||||
"overall_cpu": snap["overall"],
|
||||
"memory": get_memory(),
|
||||
"load": get_load(),
|
||||
"uptime": get_uptime(),
|
||||
"num_cores": len(snap["cores"]),
|
||||
"vms": get_vms(),
|
||||
}
|
||||
return stats
|
||||
|
||||
|
||||
def fetch_remote_stats(url, timeout=2):
|
||||
"""Fetch /api/stats from a remote agent. Returns dict or None on failure."""
|
||||
def fetch_remote_stats(url, timeout=3):
|
||||
try:
|
||||
req = urllib.request.Request(url.rstrip("/") + "/api/stats")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
@@ -160,6 +214,8 @@ def index():
|
||||
|
||||
@app.route("/api/stats")
|
||||
def stats():
|
||||
if IS_DASHBOARD:
|
||||
return jsonify({"error": "dashboard-only mode, use /api/servers"}), 400
|
||||
return jsonify(get_local_stats())
|
||||
|
||||
|
||||
@@ -167,23 +223,18 @@ def stats():
|
||||
def all_servers():
|
||||
results = []
|
||||
for srv in servers:
|
||||
if srv["url"] == "local":
|
||||
data = get_local_stats()
|
||||
data = fetch_remote_stats(srv["url"])
|
||||
if data:
|
||||
data["name"] = srv["name"]
|
||||
data["status"] = "online"
|
||||
data.setdefault("num_cores", len(data.get("cores", [])))
|
||||
results.append(data)
|
||||
else:
|
||||
data = fetch_remote_stats(srv["url"])
|
||||
if data:
|
||||
data["name"] = srv["name"]
|
||||
data["status"] = "online"
|
||||
data.setdefault("num_cores", len(data.get("cores", [])))
|
||||
results.append(data)
|
||||
else:
|
||||
results.append({"name": srv["name"], "status": "unreachable"})
|
||||
results.append({"name": srv["name"], "status": "unreachable"})
|
||||
return jsonify(servers=results)
|
||||
|
||||
|
||||
# Start background sampler
|
||||
t = threading.Thread(target=cpu_sampler, daemon=True)
|
||||
t.start()
|
||||
# Only start CPU sampler in agent mode (not dashboard-only)
|
||||
if not IS_DASHBOARD:
|
||||
t = threading.Thread(target=cpu_sampler, daemon=True)
|
||||
t.start()
|
||||
|
||||
Reference in New Issue
Block a user