mirror of
https://github.com/CyberMind-FR/secubox-deb.git
synced 2026-06-30 14:10:44 +00:00
Compare commits
2 Commits
ff6fd7632f
...
36cfb72e41
| Author | SHA1 | Date | |
|---|---|---|---|
| 36cfb72e41 | |||
| 6e62c0166d |
|
|
@ -1,3 +1,14 @@
|
||||||
|
secubox-aggregator (0.2.3-1~bookworm1) bookworm; urgency=medium
|
||||||
|
|
||||||
|
* #727 auto-heal watchdog: ship secubox-aggregator-watchdog.{sh,service,timer}.
|
||||||
|
The in-process aggregator is the hub/auth/menu SPOF; under a host load spike
|
||||||
|
its event loop can wedge and the socket stops answering (board-wide 502/000:
|
||||||
|
sparse navbar, login errors). The timer probes aggregator.sock every 2 min
|
||||||
|
and restarts the service after 2 consecutive failures. Enabled in postinst
|
||||||
|
(respects operator masking). Packages the live fix from the 2026-06-24 incident.
|
||||||
|
|
||||||
|
-- Gerald KERMA <devel@cybermind.fr> Wed, 24 Jun 2026 15:10:00 +0000
|
||||||
|
|
||||||
secubox-aggregator (0.2.1-1~bookworm1) bookworm; urgency=medium
|
secubox-aggregator (0.2.1-1~bookworm1) bookworm; urgency=medium
|
||||||
|
|
||||||
* Phase 7 follow-up (#498) — relax hardening for module sudoers :
|
* Phase 7 follow-up (#498) — relax hardening for module sudoers :
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,12 @@ case "$1" in
|
||||||
systemctl enable secubox-aggregator.service
|
systemctl enable secubox-aggregator.service
|
||||||
systemctl start secubox-aggregator.service || true
|
systemctl start secubox-aggregator.service || true
|
||||||
|
|
||||||
|
# Auto-heal watchdog (#727): restart the aggregator if its socket wedges
|
||||||
|
# under load (the hub/auth/menu SPOF). Respect operator masking.
|
||||||
|
if [ "$(systemctl is-enabled secubox-aggregator-watchdog.timer 2>/dev/null)" != "masked" ]; then
|
||||||
|
systemctl enable --now secubox-aggregator-watchdog.timer 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
echo "secubox-aggregator: to migrate all installed SecuBox modules into"
|
echo "secubox-aggregator: to migrate all installed SecuBox modules into"
|
||||||
echo " the aggregator (replaces per-module uvicorn processes) run :"
|
echo " the aggregator (replaces per-module uvicorn processes) run :"
|
||||||
echo " sudo /usr/sbin/secubox-aggregator-migrate"
|
echo " sudo /usr/sbin/secubox-aggregator-migrate"
|
||||||
|
|
|
||||||
6
packages/secubox-aggregator/debian/rules
Normal file → Executable file
6
packages/secubox-aggregator/debian/rules
Normal file → Executable file
|
|
@ -14,6 +14,12 @@ override_dh_auto_install:
|
||||||
install -d $(CURDIR)/debian/secubox-aggregator/lib/systemd/system
|
install -d $(CURDIR)/debian/secubox-aggregator/lib/systemd/system
|
||||||
install -m 644 systemd/secubox-aggregator.service \
|
install -m 644 systemd/secubox-aggregator.service \
|
||||||
$(CURDIR)/debian/secubox-aggregator/lib/systemd/system/
|
$(CURDIR)/debian/secubox-aggregator/lib/systemd/system/
|
||||||
|
install -m 644 systemd/secubox-aggregator-watchdog.service \
|
||||||
|
$(CURDIR)/debian/secubox-aggregator/lib/systemd/system/
|
||||||
|
install -m 644 systemd/secubox-aggregator-watchdog.timer \
|
||||||
|
$(CURDIR)/debian/secubox-aggregator/lib/systemd/system/
|
||||||
install -d $(CURDIR)/debian/secubox-aggregator/usr/sbin
|
install -d $(CURDIR)/debian/secubox-aggregator/usr/sbin
|
||||||
install -m 755 sbin/secubox-aggregator-migrate \
|
install -m 755 sbin/secubox-aggregator-migrate \
|
||||||
$(CURDIR)/debian/secubox-aggregator/usr/sbin/
|
$(CURDIR)/debian/secubox-aggregator/usr/sbin/
|
||||||
|
install -m 755 sbin/secubox-aggregator-watchdog.sh \
|
||||||
|
$(CURDIR)/debian/secubox-aggregator/usr/sbin/
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
|
||||||
|
# Automatically added by dh_python3
|
||||||
|
if command -v py3compile >/dev/null 2>&1; then
|
||||||
|
py3compile -p secubox-aggregator
|
||||||
|
fi
|
||||||
|
if command -v pypy3compile >/dev/null 2>&1; then
|
||||||
|
pypy3compile -p secubox-aggregator || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# End automatically added section
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
|
||||||
|
# Automatically added by dh_python3
|
||||||
|
if command -v py3clean >/dev/null 2>&1; then
|
||||||
|
py3clean -p secubox-aggregator
|
||||||
|
else
|
||||||
|
dpkg -L secubox-aggregator | sed -En -e '/^(.*)\/(.+)\.py$/s,,rm "\1/__pycache__/\2".*,e'
|
||||||
|
find /usr/lib/python3/dist-packages/ -type d -name __pycache__ -empty -print0 | xargs --null --no-run-if-empty rmdir
|
||||||
|
fi
|
||||||
|
|
||||||
|
# End automatically added section
|
||||||
48
packages/secubox-aggregator/sbin/secubox-aggregator-watchdog.sh
Executable file
48
packages/secubox-aggregator/sbin/secubox-aggregator-watchdog.sh
Executable file
|
|
@ -0,0 +1,48 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||||
|
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||||
|
# Source-Disclosed License — All rights reserved except as expressly granted.
|
||||||
|
# See LICENCE-CMSD-1.0.md for terms.
|
||||||
|
#
|
||||||
|
# SecuBox-Deb :: secubox-aggregator-watchdog
|
||||||
|
#
|
||||||
|
# Auto-heal the in-process aggregator — the hub/auth/menu single point of
|
||||||
|
# failure. Under a host load spike its shared event loop can wedge and its
|
||||||
|
# socket stops answering, taking down the navbar, login and service status
|
||||||
|
# board-wide (incident 2026-06-24). Probe the socket; if /api/v1/hub/public/menu
|
||||||
|
# stops answering for N consecutive checks, restart the service. Idempotent,
|
||||||
|
# safe to run on a timer.
|
||||||
|
set -uo pipefail
|
||||||
|
readonly MODULE="secubox-aggregator-watchdog"
|
||||||
|
readonly VERSION="1.0"
|
||||||
|
|
||||||
|
SOCK="/run/secubox/aggregator.sock"
|
||||||
|
# State lives in /run (root-owned), NOT the shared sticky /run/secubox: that dir
|
||||||
|
# is 1777 and a stale secubox-owned file there can't be overwritten by this
|
||||||
|
# (CSPN-hardened, CAP_DAC_OVERRIDE-less) root — which would silently freeze the
|
||||||
|
# streak counter and stop the watchdog ever triggering.
|
||||||
|
STATE="/run/secubox-aggregator-watchdog.fails"
|
||||||
|
FAIL_THRESHOLD="${SECUBOX_AGG_WD_THRESHOLD:-2}"
|
||||||
|
TIMEOUT="${SECUBOX_AGG_WD_TIMEOUT:-12}"
|
||||||
|
|
||||||
|
# No socket yet (service still starting / not migrated) → nothing to heal.
|
||||||
|
[ -S "$SOCK" ] || exit 0
|
||||||
|
|
||||||
|
code=$(curl -s -o /dev/null -w "%{http_code}" --max-time "$TIMEOUT" \
|
||||||
|
--unix-socket "$SOCK" http://localhost/api/v1/hub/public/menu 2>/dev/null || echo 000)
|
||||||
|
|
||||||
|
if [ "$code" = "200" ]; then
|
||||||
|
echo 0 > "$STATE" 2>/dev/null || true
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
n=$(( $(cat "$STATE" 2>/dev/null || echo 0) + 1 ))
|
||||||
|
echo "$n" > "$STATE" 2>/dev/null || true
|
||||||
|
logger -t "$MODULE" "aggregator probe failed (code=$code, streak=$n/$FAIL_THRESHOLD)"
|
||||||
|
|
||||||
|
if [ "$n" -ge "$FAIL_THRESHOLD" ]; then
|
||||||
|
logger -t "$MODULE" "restarting secubox-aggregator (auto-heal)"
|
||||||
|
systemctl restart secubox-aggregator.service
|
||||||
|
echo 0 > "$STATE" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
[Unit]
|
||||||
|
Description=SecuBox aggregator auto-heal watchdog
|
||||||
|
Documentation=https://github.com/CyberMind-FR/secubox-deb/issues/727
|
||||||
|
After=secubox-aggregator.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/usr/sbin/secubox-aggregator-watchdog.sh
|
||||||
|
Nice=10
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
[Unit]
|
||||||
|
Description=Probe + auto-heal secubox-aggregator every 2 min
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnBootSec=2min
|
||||||
|
OnUnitActiveSec=2min
|
||||||
|
AccuracySec=20s
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
||||||
Loading…
Reference in New Issue
Block a user