mirror of
https://github.com/CyberMind-FR/secubox-deb.git
synced 2026-06-30 10:00:52 +00:00
Compare commits
No commits in common. "36cfb72e41f89a1d52108df22b5228db300d9293" and "ff6fd7632f31b2a948d9ef9598cd95102d446f39" have entirely different histories.
36cfb72e41
...
ff6fd7632f
|
|
@ -1,14 +1,3 @@
|
||||||
secubox-aggregator (0.2.3-1~bookworm1) bookworm; urgency=medium
|
|
||||||
|
|
||||||
* #727 auto-heal watchdog: ship secubox-aggregator-watchdog.{sh,service,timer}.
|
|
||||||
The in-process aggregator is the hub/auth/menu SPOF; under a host load spike
|
|
||||||
its event loop can wedge and the socket stops answering (board-wide 502/000:
|
|
||||||
sparse navbar, login errors). The timer probes aggregator.sock every 2 min
|
|
||||||
and restarts the service after 2 consecutive failures. Enabled in postinst
|
|
||||||
(respects operator masking). Packages the live fix from the 2026-06-24 incident.
|
|
||||||
|
|
||||||
-- Gerald KERMA <devel@cybermind.fr> Wed, 24 Jun 2026 15:10:00 +0000
|
|
||||||
|
|
||||||
secubox-aggregator (0.2.1-1~bookworm1) bookworm; urgency=medium
|
secubox-aggregator (0.2.1-1~bookworm1) bookworm; urgency=medium
|
||||||
|
|
||||||
* Phase 7 follow-up (#498) — relax hardening for module sudoers :
|
* Phase 7 follow-up (#498) — relax hardening for module sudoers :
|
||||||
|
|
|
||||||
|
|
@ -17,12 +17,6 @@ case "$1" in
|
||||||
systemctl enable secubox-aggregator.service
|
systemctl enable secubox-aggregator.service
|
||||||
systemctl start secubox-aggregator.service || true
|
systemctl start secubox-aggregator.service || true
|
||||||
|
|
||||||
# Auto-heal watchdog (#727): restart the aggregator if its socket wedges
|
|
||||||
# under load (the hub/auth/menu SPOF). Respect operator masking.
|
|
||||||
if [ "$(systemctl is-enabled secubox-aggregator-watchdog.timer 2>/dev/null)" != "masked" ]; then
|
|
||||||
systemctl enable --now secubox-aggregator-watchdog.timer 2>/dev/null || true
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "secubox-aggregator: to migrate all installed SecuBox modules into"
|
echo "secubox-aggregator: to migrate all installed SecuBox modules into"
|
||||||
echo " the aggregator (replaces per-module uvicorn processes) run :"
|
echo " the aggregator (replaces per-module uvicorn processes) run :"
|
||||||
echo " sudo /usr/sbin/secubox-aggregator-migrate"
|
echo " sudo /usr/sbin/secubox-aggregator-migrate"
|
||||||
|
|
|
||||||
6
packages/secubox-aggregator/debian/rules
Executable file → Normal file
6
packages/secubox-aggregator/debian/rules
Executable file → Normal file
|
|
@ -14,12 +14,6 @@ override_dh_auto_install:
|
||||||
install -d $(CURDIR)/debian/secubox-aggregator/lib/systemd/system
|
install -d $(CURDIR)/debian/secubox-aggregator/lib/systemd/system
|
||||||
install -m 644 systemd/secubox-aggregator.service \
|
install -m 644 systemd/secubox-aggregator.service \
|
||||||
$(CURDIR)/debian/secubox-aggregator/lib/systemd/system/
|
$(CURDIR)/debian/secubox-aggregator/lib/systemd/system/
|
||||||
install -m 644 systemd/secubox-aggregator-watchdog.service \
|
|
||||||
$(CURDIR)/debian/secubox-aggregator/lib/systemd/system/
|
|
||||||
install -m 644 systemd/secubox-aggregator-watchdog.timer \
|
|
||||||
$(CURDIR)/debian/secubox-aggregator/lib/systemd/system/
|
|
||||||
install -d $(CURDIR)/debian/secubox-aggregator/usr/sbin
|
install -d $(CURDIR)/debian/secubox-aggregator/usr/sbin
|
||||||
install -m 755 sbin/secubox-aggregator-migrate \
|
install -m 755 sbin/secubox-aggregator-migrate \
|
||||||
$(CURDIR)/debian/secubox-aggregator/usr/sbin/
|
$(CURDIR)/debian/secubox-aggregator/usr/sbin/
|
||||||
install -m 755 sbin/secubox-aggregator-watchdog.sh \
|
|
||||||
$(CURDIR)/debian/secubox-aggregator/usr/sbin/
|
|
||||||
|
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
|
|
||||||
# Automatically added by dh_python3
|
|
||||||
if command -v py3compile >/dev/null 2>&1; then
|
|
||||||
py3compile -p secubox-aggregator
|
|
||||||
fi
|
|
||||||
if command -v pypy3compile >/dev/null 2>&1; then
|
|
||||||
pypy3compile -p secubox-aggregator || true
|
|
||||||
fi
|
|
||||||
|
|
||||||
# End automatically added section
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
|
|
||||||
# Automatically added by dh_python3
|
|
||||||
if command -v py3clean >/dev/null 2>&1; then
|
|
||||||
py3clean -p secubox-aggregator
|
|
||||||
else
|
|
||||||
dpkg -L secubox-aggregator | sed -En -e '/^(.*)\/(.+)\.py$/s,,rm "\1/__pycache__/\2".*,e'
|
|
||||||
find /usr/lib/python3/dist-packages/ -type d -name __pycache__ -empty -print0 | xargs --null --no-run-if-empty rmdir
|
|
||||||
fi
|
|
||||||
|
|
||||||
# End automatically added section
|
|
||||||
|
|
@ -1,48 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
|
||||||
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
|
||||||
# Source-Disclosed License — All rights reserved except as expressly granted.
|
|
||||||
# See LICENCE-CMSD-1.0.md for terms.
|
|
||||||
#
|
|
||||||
# SecuBox-Deb :: secubox-aggregator-watchdog
|
|
||||||
#
|
|
||||||
# Auto-heal the in-process aggregator — the hub/auth/menu single point of
|
|
||||||
# failure. Under a host load spike its shared event loop can wedge and its
|
|
||||||
# socket stops answering, taking down the navbar, login and service status
|
|
||||||
# board-wide (incident 2026-06-24). Probe the socket; if /api/v1/hub/public/menu
|
|
||||||
# stops answering for N consecutive checks, restart the service. Idempotent,
|
|
||||||
# safe to run on a timer.
|
|
||||||
set -uo pipefail
|
|
||||||
readonly MODULE="secubox-aggregator-watchdog"
|
|
||||||
readonly VERSION="1.0"
|
|
||||||
|
|
||||||
SOCK="/run/secubox/aggregator.sock"
|
|
||||||
# State lives in /run (root-owned), NOT the shared sticky /run/secubox: that dir
|
|
||||||
# is 1777 and a stale secubox-owned file there can't be overwritten by this
|
|
||||||
# (CSPN-hardened, CAP_DAC_OVERRIDE-less) root — which would silently freeze the
|
|
||||||
# streak counter and stop the watchdog ever triggering.
|
|
||||||
STATE="/run/secubox-aggregator-watchdog.fails"
|
|
||||||
FAIL_THRESHOLD="${SECUBOX_AGG_WD_THRESHOLD:-2}"
|
|
||||||
TIMEOUT="${SECUBOX_AGG_WD_TIMEOUT:-12}"
|
|
||||||
|
|
||||||
# No socket yet (service still starting / not migrated) → nothing to heal.
|
|
||||||
[ -S "$SOCK" ] || exit 0
|
|
||||||
|
|
||||||
code=$(curl -s -o /dev/null -w "%{http_code}" --max-time "$TIMEOUT" \
|
|
||||||
--unix-socket "$SOCK" http://localhost/api/v1/hub/public/menu 2>/dev/null || echo 000)
|
|
||||||
|
|
||||||
if [ "$code" = "200" ]; then
|
|
||||||
echo 0 > "$STATE" 2>/dev/null || true
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
n=$(( $(cat "$STATE" 2>/dev/null || echo 0) + 1 ))
|
|
||||||
echo "$n" > "$STATE" 2>/dev/null || true
|
|
||||||
logger -t "$MODULE" "aggregator probe failed (code=$code, streak=$n/$FAIL_THRESHOLD)"
|
|
||||||
|
|
||||||
if [ "$n" -ge "$FAIL_THRESHOLD" ]; then
|
|
||||||
logger -t "$MODULE" "restarting secubox-aggregator (auto-heal)"
|
|
||||||
systemctl restart secubox-aggregator.service
|
|
||||||
echo 0 > "$STATE" 2>/dev/null || true
|
|
||||||
fi
|
|
||||||
exit 0
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
[Unit]
|
|
||||||
Description=SecuBox aggregator auto-heal watchdog
|
|
||||||
Documentation=https://github.com/CyberMind-FR/secubox-deb/issues/727
|
|
||||||
After=secubox-aggregator.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=oneshot
|
|
||||||
ExecStart=/usr/sbin/secubox-aggregator-watchdog.sh
|
|
||||||
Nice=10
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
[Unit]
|
|
||||||
Description=Probe + auto-heal secubox-aggregator every 2 min
|
|
||||||
|
|
||||||
[Timer]
|
|
||||||
OnBootSec=2min
|
|
||||||
OnUnitActiveSec=2min
|
|
||||||
AccuracySec=20s
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=timers.target
|
|
||||||
Loading…
Reference in New Issue
Block a user