Compare commits

...

3 Commits

Author SHA1 Message Date
fcee198a9f style(hub): render health status emoji cleanly (neutralize .led dot) (#738)
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-25 14:53:49 +02:00
913e100717 perf(hub): double-buffer status cache + emojized health page (#738)
Navbar status (menu + health-batch) is now a strict double-buffer cache:
- request handlers NEVER compute on the request path — they return the current
  snapshot instantly (or a 'warming' placeholder), so the sidebar's polling can
  no longer serialize behind a ~3s systemctl walk and starve the loop;
- the background refresher is kicked from the request path (_ensure_bg) because
  mounted sub-apps receive neither startup nor @app.middleware events under the
  aggregator — the previous lazy-start middleware never fired there;
- snapshots are built complete then swapped atomically, so the dashboard never
  shows partial/bad counts.

Served by the dedicated secubox-hub process (:8001, isolated loop) the navbar
stays <50ms and holds 200 under 25+ concurrent polls where the aggregator-
mounted copy wedged (000). health.js: 🟢🟡🔴 emoji status indicators.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-25 14:52:47 +02:00
92d20ab589 perf(aggregator): raise AnyIO threadpool to 80 tokens (#738)
The async-sweep moves ~243 blocking handlers to the threadpool. With ~110
modules in one process, the default 40-token pool can queue head-of-line under
concurrent blocking load. Raise to 80 on startup (best-effort, never breaks
boot).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-25 14:28:38 +02:00
4 changed files with 67 additions and 37 deletions

View File

@ -214,6 +214,19 @@ def _build_app() -> FastAPI:
for name in cfg.get("modules", []): for name in cfg.get("modules", []):
_mount_module(app, name) _mount_module(app, name)
@app.on_event("startup")
async def _raise_threadpool() -> None:
"""Sync (`def`) route handlers — including the blocking ones converted
by the #738 async-sweep — run in AnyIO's default threadpool (40 tokens).
With ~110 modules sharing one process, raise the cap so concurrent
blocking calls don't queue head-of-line behind a full pool."""
try:
import anyio
anyio.to_thread.current_default_thread_limiter().total_tokens = 80
log.info("threadpool limiter raised to 80 tokens")
except Exception as e: # never let this break startup
log.warning("could not raise threadpool limiter: %s", e)
@app.get("/health") @app.get("/health")
def health() -> dict: def health() -> dict:
"""Aggregator health. Reports per-module load state.""" """Aggregator health. Reports per-module load state."""

View File

@ -153,24 +153,29 @@ def _load_menu_cache_from_file() -> dict:
@public_router.get("/menu") @public_router.get("/menu")
async def public_menu(): async def public_menu():
"""Public menu endpoint for sidebar navigation (no auth required). """Public menu endpoint for sidebar navigation (no auth required).
Returns basic menu structure without sensitive data.
Uses pre-computed cache for instant response. Double-buffer cache: ALWAYS returns the current snapshot instantly and never
computes on the request path (a sync systemctl walk here, multiplied by the
sidebar's polling, is what froze the shared aggregator loop). The background
refresher kicked here because mounted sub-apps get no startup/middleware
fills the buffer within a few seconds; until then we serve the file snapshot
or an explicit `warming` placeholder.
""" """
global _menu_cache global _menu_cache
_ensure_bg()
# Return from in-memory cache (instant) # Active buffer (instant).
if _menu_cache: if _menu_cache:
return _menu_cache return _menu_cache
# Fallback to file cache (fast startup) # Cold start: last-good snapshot persisted to file (cheap read, no systemctl).
file_cache = _load_menu_cache_from_file() file_cache = _load_menu_cache_from_file()
if file_cache: if file_cache:
_menu_cache = file_cache _menu_cache = file_cache
return file_cache return file_cache
# Last resort: compute synchronously (only on first request before cache ready) # Nothing yet — never block; the background task will fill it shortly.
log.warning("Menu cache miss - computing synchronously") return {"categories": [], "total_installed": 0, "total_active": 0, "warming": True}
return _compute_menu_sync()
@public_router.get("/info") @public_router.get("/info")
@ -262,22 +267,20 @@ async def public_led_status():
@public_router.get("/health-batch") @public_router.get("/health-batch")
async def public_health_batch(): async def public_health_batch():
"""Batch health check for all modules — returns status for sidebar LEDs. """Batch health snapshot for the sidebar LEDs.
Serves the TTL snapshot built by the background loop; on a cold miss it Double-buffer cache: returns the last fully-built snapshot instantly and
builds it ONCE off the event loop. Never makes a synchronous systemctl call NEVER rebuilds on the request path. The previous cold-miss rebuilt under a
on the request path. lock, so concurrent sidebar polls serialized behind a ~3 s systemctl walk
and starved the shared loop. The background refresher (kicked here) swaps in
a complete snapshot atomically so we never serve partial/bad counts.
""" """
_ensure_bg()
hb = _cache.get("health_batch") hb = _cache.get("health_batch")
if hb and (time.time() - _cache.get("health_batch_ts", 0)) < CACHE_TTL * 2: if hb:
return hb return hb
async with _health_batch_lock: # Not warmed yet — serve an explicit placeholder rather than block/compute.
# Re-check under the lock: a concurrent waiter may have just rebuilt it. return {"modules": {}, "count": 0, "warming": True}
hb = _cache.get("health_batch")
if not hb or (time.time() - _cache.get("health_batch_ts", 0)) >= CACHE_TTL * 2:
await asyncio.to_thread(_refresh_health_batch)
hb = _cache.get("health_batch") or {"modules": {}, "count": 0}
return hb
app.include_router(public_router) app.include_router(public_router)
@ -503,17 +506,27 @@ async def startup():
await _start_background_once() await _start_background_once()
def _ensure_bg() -> None:
"""Reliably kick the background warm-up + refresh loops from the request path.
Mounted in the aggregator, a sub-app receives neither startup/lifespan nor
`@app.middleware` events so the navbar status endpoints trigger the warm-up
themselves on first hit. Fire-and-forget: never blocks or delays the request.
Idempotent (``_start_background_once`` guards on ``_bg_started``).
"""
if _bg_started:
return
try:
asyncio.create_task(_start_background_once())
except RuntimeError:
# No running loop yet (e.g. import time) — a later request retries.
pass
# Kept for the standalone-uvicorn path; harmless (no-op) when mounted.
@app.middleware("http") @app.middleware("http")
async def _lazy_background_start(request, call_next): async def _lazy_background_start(request, call_next):
"""Kick the background warm-up on the first request. _ensure_bg()
Mounted sub-apps don't receive startup/lifespan events under the aggregator,
so the cache would otherwise stay cold and every _svc() would fall back to a
blocking per-module systemctl call. Fire-and-forget so this request isn't
delayed by the warm-up.
"""
if not _bg_started:
asyncio.create_task(_start_background_once())
return await call_next(request) return await call_next(request)

View File

@ -39,10 +39,10 @@ h2 { font-size: 16px; font-weight: 600; margin: var(--sp-xl) 0 var(--sp-m); }
.svc-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); gap: var(--sp-s); } .svc-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); gap: var(--sp-s); }
.svc { display: flex; align-items: center; gap: var(--sp-s); background: var(--bg1); border: 1px solid var(--bd); .svc { display: flex; align-items: center; gap: var(--sp-s); background: var(--bg1); border: 1px solid var(--bd);
border-radius: 8px; padding: 10px 12px; min-width: 0; } border-radius: 8px; padding: 10px 12px; min-width: 0; }
.svc .led { width: 9px; height: 9px; border-radius: 50%; flex: none; box-shadow: 0 0 6px currentColor; } /* .led now carries the status emoji (🟢🟡🔴) instead of a CSS dot. */
.svc.ok .led { background: #2ecc8f; color: #2ecc8f; } .svc .led { flex: none; width: auto; height: auto; background: none; box-shadow: none;
.svc.warn .led, .svc.unknown .led { background: #f0b94c; color: #f0b94c; } font-size: 14px; line-height: 1; font-family: "Noto Color Emoji", "Apple Color Emoji", sans-serif; }
.svc.error .led { background: #ff7a6b; color: #ff7a6b; animation: pulse 1.2s infinite; } .svc.error .led { animation: pulse 1.2s infinite; }
@keyframes pulse { 50% { opacity: .4; } } @keyframes pulse { 50% { opacity: .4; } }
.svc.error { border-left: 3px solid #803018; } .svc.error { border-left: 3px solid #803018; }
.svc-name { font-weight: 600; font-size: 13px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .svc-name { font-weight: 600; font-size: 13px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }

View File

@ -25,11 +25,15 @@
return r.json(); return r.json();
} }
// Status → emoji indicator (replaces the CSS LED dot).
const EMOJI = { ok: '🟢', warn: '🟡', error: '🔴', unknown: '⚪' };
const emo = (status) => EMOJI[status] || EMOJI.unknown;
function chip(id, st) { function chip(id, st) {
const status = (st && st.status) || 'unknown'; const status = (st && st.status) || 'unknown';
const msg = (st && st.msg) || ''; const msg = (st && st.msg) || '';
return `<div class="svc ${status}" title="${esc(id)}: ${esc(msg)}"> return `<div class="svc ${status}" title="${esc(id)}: ${esc(msg)}">
<span class="led"></span> <span class="led">${emo(status)}</span>
<span class="svc-name">${esc(id)}</span> <span class="svc-name">${esc(id)}</span>
<span class="svc-msg">${esc(msg)}</span> <span class="svc-msg">${esc(msg)}</span>
</div>`; </div>`;
@ -44,10 +48,10 @@
}); });
$('summary').innerHTML = $('summary').innerHTML =
`<div class="sum ok"><b>${ok}</b><span>healthy</span></div>` + `<div class="sum ok"><b>${ok}</b><span>🟢 healthy</span></div>` +
`<div class="sum warn"><b>${warn}</b><span>degraded</span></div>` + `<div class="sum warn"><b>${warn}</b><span>🟡 degraded</span></div>` +
`<div class="sum err"><b>${err}</b><span>down</span></div>` + `<div class="sum err"><b>${err}</b><span>🔴 down</span></div>` +
`<div class="sum total"><b>${ids.length}</b><span>services</span></div>`; `<div class="sum total"><b>${ids.length}</b><span>📊 services</span></div>`;
const vital = ids.filter((id) => VITAL_SET.has(id)); const vital = ids.filter((id) => VITAL_SET.has(id));
const common = ids.filter((id) => !VITAL_SET.has(id)); const common = ids.filter((id) => !VITAL_SET.has(id));