mirror of
https://github.com/CyberMind-FR/secubox-deb.git
synced 2026-06-29 16:31:31 +00:00
Compare commits
8 Commits
a3cd643da4
...
051ca6d1d7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
051ca6d1d7 | ||
| 69659f6a67 | |||
| 1bd5108472 | |||
| 6c96ba62e4 | |||
| 4d0cbf8b7f | |||
| 4f96da87d7 | |||
|
|
2b036db0d6 | ||
| 376b4ecd2a |
|
|
@ -1,3 +1,30 @@
|
|||
secubox-toolbox (2.6.58-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* feat(#659): per-visitor ad-block breakdown in #ads. ad_ghost now also tallies
|
||||
the visitor (mac_hash_of(client_ip), cached WG-hash for R3 — no blocking) per
|
||||
blocked ad into a new `ad_block_client_host` table (hot-path = dict increments,
|
||||
bg-thread flush). `/admin/ad-stats` gains `top_visitors`; new
|
||||
`/admin/ad-stats/client/{mac_hash}` for the drill-down. #ads tab shows a
|
||||
"Top visiteurs (pubs bloquées)" table; click a visitor → their top ad hosts.
|
||||
Bounded (clients×hosts); own-infra guard (#658) keeps our own hosts out.
|
||||
|
||||
-- Gerald KERMA <devel@cybermind.fr> Thu, 18 Jun 2026 20:00:00 +0200
|
||||
|
||||
secubox-toolbox (2.6.57-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* fix(#658): ad-learn hardening — never self-block own infrastructure.
|
||||
- ad_ghost `_allowed` now ALWAYS allows the appliance's own domains
|
||||
(`_SELF_REGS`, default {secubox.in}, env `SECUBOX_SELF_DOMAINS`) — hard-coded
|
||||
so it survives a reflash with no allowlist file; this also stops own-infra
|
||||
from ever being captured as a candidate (early-return).
|
||||
- autolearn `_ad_feed` excludes own-infra AND promotes the EXACT candidate
|
||||
host instead of its registrable — so a tracker subdomain
|
||||
(analytics.tiktok.com) no longer blocks the parent site (tiktok.com).
|
||||
Root cause: the aggressive learner self-promoted secubox.in → 204'd all
|
||||
*.secubox.in for R3; live-mitigated via allowlist, now fixed at source.
|
||||
|
||||
-- Gerald KERMA <devel@cybermind.fr> Thu, 18 Jun 2026 19:00:00 +0200
|
||||
|
||||
secubox-toolbox (2.6.56-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* feat(#656): Ad Intelligence — learn · act · measure.
|
||||
|
|
|
|||
|
|
@ -41,6 +41,12 @@ try:
|
|||
except Exception: # pragma: no cover
|
||||
_store = None
|
||||
|
||||
# #659 — resolve client IP → stable per-visitor identity hash (best-effort).
|
||||
try:
|
||||
from _common import mac_hash_of # noqa: E402
|
||||
except Exception: # pragma: no cover
|
||||
mac_hash_of = None
|
||||
|
||||
_executor = concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=1, thread_name_prefix="sbx_ad")
|
||||
|
||||
|
|
@ -50,6 +56,12 @@ _EST_BYTES_PER_REQ = 45000 # honest estimate per blocked ad/tracker request
|
|||
# #656 — operator allowlist (host or registrable, one per line, # comments).
|
||||
# Allowlist ALWAYS wins: an allowlisted host is never 204'd nor recorded.
|
||||
_ALLOW_PATH = "/var/lib/secubox/toolbox/ad-allowlist.txt"
|
||||
# #658 — the appliance's OWN domains. NEVER blocked/learned (the aggressive
|
||||
# learner once self-promoted secubox.in → 204'd all *.secubox.in for R3).
|
||||
# Hard-coded (env-overridable) so it survives a reflash with no allowlist file.
|
||||
_SELF_REGS = {d.strip().lower() for d in
|
||||
os.environ.get("SECUBOX_SELF_DOMAINS", "secubox.in").split(",")
|
||||
if d.strip()}
|
||||
# Path heuristics for 3rd-party ad/track candidate capture (learning only).
|
||||
_AD_PATH = re.compile(r"/ads?/|/adserver|/pagead|/gampad|/doubleclick|/beacon|"
|
||||
r"/pixel|/collect|/track(ing)?|/telemetry|/metric", re.I)
|
||||
|
|
@ -57,6 +69,7 @@ _AD_PATH = re.compile(r"/ads?/|/adserver|/pagead|/gampad|/doubleclick|/beacon|"
|
|||
# Hot-path dict increments only; drained + offloaded to SQLite in _flush.
|
||||
_ctx: dict = {} # (host, site, action) -> [hits, bytes]
|
||||
_cand: dict = {} # (host, site) -> hits
|
||||
_cli: dict = {} # #659 (mac_hash, ad_host) -> [hits, bytes]
|
||||
_allow: set = set()
|
||||
_allow_mtime = 0.0
|
||||
|
||||
|
|
@ -109,6 +122,10 @@ def _allowed(host: str) -> bool:
|
|||
pass
|
||||
h = (host or "").lower()
|
||||
reg = _registrable(h) or h
|
||||
# #658 — own infra always allowed (never block/capture our own domains),
|
||||
# independent of the allowlist file (reflash-safe).
|
||||
if reg in _SELF_REGS or any(h == d or h.endswith("." + d) for d in _SELF_REGS):
|
||||
return True
|
||||
return h in _allow or reg in _allow
|
||||
|
||||
|
||||
|
|
@ -197,16 +214,20 @@ def _flush(force: bool = False) -> None:
|
|||
# thread, so the proxy event loop never touches the DB. Snapshot+clear
|
||||
# under no lock is fine: CPython dict ops are atomic and a missed increment
|
||||
# between snapshot and clear is harmless (stats, not security).
|
||||
if _store is not None and (_ctx or _cand):
|
||||
if _store is not None and (_ctx or _cand or _cli):
|
||||
try:
|
||||
rows = [(h, s, a, v[0], v[1]) for (h, s, a), v in _ctx.items()]
|
||||
cand_rows = [(h, s, n) for (h, s), n in _cand.items()]
|
||||
cli_rows = [(mh, h, v[0], v[1]) for (mh, h), v in _cli.items()]
|
||||
_ctx.clear()
|
||||
_cand.clear()
|
||||
_cli.clear()
|
||||
if rows:
|
||||
_executor.submit(_store.record_ad_blocks, rows)
|
||||
if cand_rows:
|
||||
_executor.submit(_store.record_ad_candidates, cand_rows)
|
||||
if cli_rows:
|
||||
_executor.submit(_store.record_ad_client_blocks, cli_rows)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
@ -261,6 +282,20 @@ class AdGhost:
|
|||
_ctx[k] = v
|
||||
except Exception:
|
||||
pass
|
||||
# #659 — per-visitor breakdown: resolve the client identity and
|
||||
# tally this blocked ad host against it. Dict increment only.
|
||||
try:
|
||||
if mac_hash_of is not None and len(_cli) < 50000:
|
||||
ip = flow.client_conn.peername[0] if flow.client_conn.peername else None
|
||||
mh = mac_hash_of(ip) if ip else None
|
||||
if mh:
|
||||
ck = (mh, host)
|
||||
cv = _cli.get(ck) or [0, 0]
|
||||
cv[0] += 1
|
||||
cv[1] += _EST_BYTES_PER_REQ
|
||||
_cli[ck] = cv
|
||||
except Exception:
|
||||
pass
|
||||
_flush()
|
||||
elif f.get("ad_learn", True) and site:
|
||||
# #656 — aggressive candidate capture: 3rd-party request whose path
|
||||
|
|
|
|||
|
|
@ -176,6 +176,11 @@ def _ad_feed() -> int:
|
|||
sys.stderr.write(f"autolearn: ad query failed: {e}\n")
|
||||
return -1
|
||||
allow = _load_ad_allowlist()
|
||||
# #658 — never promote the appliance's own domains (the learner once
|
||||
# self-promoted secubox.in). Hard default + env-overridable.
|
||||
self_doms = {d.strip().lower() for d in
|
||||
os.environ.get("SECUBOX_SELF_DOMAINS", "secubox.in").split(",")
|
||||
if d.strip()}
|
||||
promoted: set = set()
|
||||
for r in rows:
|
||||
h = (r[0] or "").lower().strip(".")
|
||||
|
|
@ -184,7 +189,12 @@ def _ad_feed() -> int:
|
|||
reg = registrable(h) or h
|
||||
if h in allow or reg in allow:
|
||||
continue
|
||||
promoted.add(reg)
|
||||
if reg in self_doms or any(h == d or h.endswith("." + d) for d in self_doms):
|
||||
continue
|
||||
# #658 — promote the EXACT host, NOT the registrable: blocking a tracker
|
||||
# subdomain (analytics.tiktok.com) must never block the parent site
|
||||
# (tiktok.com). Dedicated ad hosts are already registrable-level.
|
||||
promoted.add(h)
|
||||
if not promoted:
|
||||
return 0
|
||||
# MERGE with existing learned-trackers.txt (union, dedup, cap).
|
||||
|
|
|
|||
|
|
@ -2436,6 +2436,13 @@ async def admin_ad_stats(hours: int = 24) -> dict:
|
|||
return store.ad_stats(hours=h)
|
||||
|
||||
|
||||
@router.get("/admin/ad-stats/client/{mac_hash}")
|
||||
async def admin_ad_stats_client(mac_hash: str, hours: int = 24) -> dict:
|
||||
"""#659 — one visitor's ad-block drill-down (read-only)."""
|
||||
h = max(1, min(int(hours if hours is not None else 24), 168))
|
||||
return store.ad_client_stats(mac_hash, hours=h)
|
||||
|
||||
|
||||
@router.get("/admin/ghost")
|
||||
async def admin_ghost() -> dict:
|
||||
"""#566 — ad/banner ghoster savings (R3+/R4). Read-only counters."""
|
||||
|
|
|
|||
|
|
@ -58,6 +58,10 @@ CREATE TABLE IF NOT EXISTS ad_block_stats (
|
|||
CREATE TABLE IF NOT EXISTS ad_candidates (
|
||||
host TEXT, site TEXT, hits INTEGER NOT NULL DEFAULT 0, last_seen REAL,
|
||||
PRIMARY KEY (host, site));
|
||||
CREATE TABLE IF NOT EXISTS ad_block_client_host (
|
||||
mac_hash TEXT, ad_host TEXT, hits INTEGER NOT NULL DEFAULT 0,
|
||||
bytes INTEGER NOT NULL DEFAULT 0, last_seen REAL,
|
||||
PRIMARY KEY (mac_hash, ad_host));
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -86,6 +90,43 @@ def record_ad_blocks(rows) -> None:
|
|||
log.debug("record_ad_blocks failed: %s", e)
|
||||
|
||||
|
||||
def record_ad_client_blocks(rows) -> None:
|
||||
"""rows: iterable of (mac_hash, ad_host, hits, bytes). Per-visitor ad-block
|
||||
breakdown (#659). Batch upsert. Skips rows with empty mac_hash."""
|
||||
rows = [r for r in rows if r and r[0]]
|
||||
if not rows:
|
||||
return
|
||||
now = time.time()
|
||||
try:
|
||||
with _conn() as c:
|
||||
c.executemany(
|
||||
"INSERT INTO ad_block_client_host(mac_hash,ad_host,hits,bytes,last_seen) "
|
||||
"VALUES(?,?,?,?,?) ON CONFLICT(mac_hash,ad_host) DO UPDATE SET "
|
||||
"hits=hits+excluded.hits, bytes=bytes+excluded.bytes, last_seen=excluded.last_seen",
|
||||
[(mh, h or "", int(n), int(b), now) for (mh, h, n, b) in rows])
|
||||
except Exception as e:
|
||||
log.debug("record_ad_client_blocks failed: %s", e)
|
||||
|
||||
|
||||
def ad_client_stats(mac_hash: str, hours: int = 24, top: int = 25) -> dict:
|
||||
"""One visitor's top ad hosts blocked, within the time window (#659)."""
|
||||
cutoff = time.time() - hours * 3600
|
||||
out = {"mac_hash": mac_hash, "total": 0, "top_hosts": []}
|
||||
try:
|
||||
with _conn() as c:
|
||||
r = c.execute(
|
||||
"SELECT SUM(hits) FROM ad_block_client_host WHERE mac_hash=? AND last_seen>=?",
|
||||
(mac_hash, cutoff)).fetchone()
|
||||
out["total"] = int((r and r[0]) or 0)
|
||||
out["top_hosts"] = [{"host": h, "hits": int(n), "bytes": int(b or 0)} for h, n, b in c.execute(
|
||||
"SELECT ad_host, SUM(hits), SUM(bytes) FROM ad_block_client_host "
|
||||
"WHERE mac_hash=? AND last_seen>=? GROUP BY ad_host ORDER BY SUM(hits) DESC LIMIT ?",
|
||||
(mac_hash, cutoff, top))]
|
||||
except Exception as e:
|
||||
log.debug("ad_client_stats failed: %s", e)
|
||||
return out
|
||||
|
||||
|
||||
def record_ad_candidates(rows) -> None:
|
||||
"""rows: iterable of (host, site, hits)."""
|
||||
rows = [r for r in rows if r and r[0]]
|
||||
|
|
@ -117,7 +158,8 @@ def ad_candidate_sites(min_sites: int = 1, max_hosts: int = 5000) -> list:
|
|||
def ad_stats(hours: int = 24, top: int = 25) -> dict:
|
||||
cutoff = time.time() - hours * 3600
|
||||
out = {"window_hours": hours, "total_blocked": 0, "total_bytes": 0,
|
||||
"by_action": {"block": 0, "silent": 0}, "top_hosts": [], "top_sites": []}
|
||||
"by_action": {"block": 0, "silent": 0}, "top_hosts": [], "top_sites": [],
|
||||
"top_visitors": []}
|
||||
try:
|
||||
with _conn() as c:
|
||||
for action, hits in c.execute(
|
||||
|
|
@ -134,6 +176,10 @@ def ad_stats(hours: int = 24, top: int = 25) -> dict:
|
|||
out["top_sites"] = [{"site": s, "hits": int(n)} for s, n in c.execute(
|
||||
"SELECT site, SUM(hits) FROM ad_block_stats WHERE action='block' AND last_seen>=? AND site<>'' "
|
||||
"GROUP BY site ORDER BY SUM(hits) DESC LIMIT ?", (cutoff, top))]
|
||||
out["top_visitors"] = [{"mac_hash": mh, "hits": int(n)} for mh, n in c.execute(
|
||||
"SELECT mac_hash, SUM(hits) FROM ad_block_client_host "
|
||||
"WHERE last_seen>=? AND mac_hash<>'' GROUP BY mac_hash "
|
||||
"ORDER BY SUM(hits) DESC LIMIT ?", (cutoff, top))]
|
||||
except Exception as e:
|
||||
log.debug("ad_stats failed: %s", e)
|
||||
return out
|
||||
|
|
|
|||
39
packages/secubox-toolbox/tests/test_ad_client_api.py
Normal file
39
packages/secubox-toolbox/tests/test_ad_client_api.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||
"""Tests for GET /admin/ad-stats/client/{mac_hash} (#659)."""
|
||||
import asyncio
|
||||
|
||||
from secubox_toolbox import api, store
|
||||
|
||||
_CANNED = {
|
||||
"mac_hash": "MH_FIXED",
|
||||
"total": 7,
|
||||
"top_hosts": [{"host": "ads.example.com", "hits": 5, "bytes": 225000},
|
||||
{"host": "px.tracker.io", "hits": 2, "bytes": 90000}],
|
||||
}
|
||||
|
||||
|
||||
def test_ad_stats_client_returns_store_data(monkeypatch):
|
||||
monkeypatch.setattr(store, "ad_client_stats",
|
||||
lambda mac_hash, hours=24, **kw: dict(_CANNED))
|
||||
result = asyncio.run(api.admin_ad_stats_client("MH_FIXED", hours=24))
|
||||
assert result["mac_hash"] == "MH_FIXED"
|
||||
assert result["total"] == 7
|
||||
assert result["top_hosts"][0]["host"] == "ads.example.com"
|
||||
|
||||
|
||||
def test_ad_stats_client_clamps_hours(monkeypatch):
|
||||
captured = {}
|
||||
|
||||
def fake(mac_hash, hours=24, **kw):
|
||||
captured["hours"] = hours
|
||||
captured["mac_hash"] = mac_hash
|
||||
return dict(_CANNED)
|
||||
|
||||
monkeypatch.setattr(store, "ad_client_stats", fake)
|
||||
asyncio.run(api.admin_ad_stats_client("MH", hours=0))
|
||||
assert captured["hours"] == 1
|
||||
assert captured["mac_hash"] == "MH"
|
||||
|
||||
asyncio.run(api.admin_ad_stats_client("MH", hours=9999))
|
||||
assert captured["hours"] == 168
|
||||
63
packages/secubox-toolbox/tests/test_ad_client_store.py
Normal file
63
packages/secubox-toolbox/tests/test_ad_client_store.py
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||
"""Tests for per-visitor ad-block breakdown store (#659)."""
|
||||
from pathlib import Path
|
||||
|
||||
from secubox_toolbox import store
|
||||
|
||||
|
||||
def _fresh(tmp_path, mp):
|
||||
mp.setattr(store, "DB_PATH", Path(tmp_path) / "t.db")
|
||||
|
||||
|
||||
def test_record_ad_client_blocks_accumulates(tmp_path, monkeypatch):
|
||||
_fresh(tmp_path, monkeypatch)
|
||||
store.record_ad_client_blocks([
|
||||
("mh_a", "ads.example.com", 3, 3 * 45000),
|
||||
("mh_a", "px.tracker.io", 1, 45000),
|
||||
])
|
||||
store.record_ad_client_blocks([("mh_a", "ads.example.com", 2, 2 * 45000)])
|
||||
c = store.ad_client_stats("mh_a", hours=24)
|
||||
assert c["mac_hash"] == "mh_a"
|
||||
assert c["total"] == 6
|
||||
hosts = {r["host"]: r for r in c["top_hosts"]}
|
||||
assert hosts["ads.example.com"]["hits"] == 5
|
||||
assert hosts["ads.example.com"]["bytes"] == 5 * 45000
|
||||
assert hosts["px.tracker.io"]["hits"] == 1
|
||||
|
||||
|
||||
def test_record_ad_client_blocks_skips_empty_mac(tmp_path, monkeypatch):
|
||||
_fresh(tmp_path, monkeypatch)
|
||||
store.record_ad_client_blocks([
|
||||
("", "ads.example.com", 5, 5 * 45000),
|
||||
("mh_b", "ads.example.com", 2, 2 * 45000),
|
||||
])
|
||||
s = store.ad_stats(hours=24)
|
||||
macs = {r["mac_hash"] for r in s["top_visitors"]}
|
||||
assert "" not in macs
|
||||
assert macs == {"mh_b"}
|
||||
|
||||
|
||||
def test_ad_stats_top_visitors_ranked(tmp_path, monkeypatch):
|
||||
_fresh(tmp_path, monkeypatch)
|
||||
store.record_ad_client_blocks([
|
||||
("mh_busy", "ads.example.com", 10, 0),
|
||||
("mh_busy", "px.tracker.io", 5, 0),
|
||||
("mh_quiet", "ads.example.com", 2, 0),
|
||||
])
|
||||
s = store.ad_stats(hours=24)
|
||||
tv = s["top_visitors"]
|
||||
assert tv[0]["mac_hash"] == "mh_busy" and tv[0]["hits"] == 15
|
||||
assert tv[1]["mac_hash"] == "mh_quiet" and tv[1]["hits"] == 2
|
||||
|
||||
|
||||
def test_ad_client_stats_window(tmp_path, monkeypatch):
|
||||
_fresh(tmp_path, monkeypatch)
|
||||
store.record_ad_client_blocks([("mh_c", "ads.example.com", 4, 0)])
|
||||
# within window
|
||||
assert store.ad_client_stats("mh_c", hours=24)["total"] == 4
|
||||
# zero-hour window (cutoff in the future) → nothing
|
||||
out = store.ad_client_stats("mh_c", hours=0)
|
||||
assert out["mac_hash"] == "mh_c"
|
||||
assert out["total"] == 0
|
||||
assert out["top_hosts"] == []
|
||||
76
packages/secubox-toolbox/tests/test_ad_ghost_visitor.py
Normal file
76
packages/secubox-toolbox/tests/test_ad_ghost_visitor.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||
"""Tests for ad_ghost per-visitor accumulation on the block path (#659)."""
|
||||
import time
|
||||
import types
|
||||
|
||||
import pytest
|
||||
|
||||
from mitmproxy_addons import ad_ghost
|
||||
|
||||
|
||||
def _flow(host, path="/", peer="10.99.1.5"):
|
||||
req = types.SimpleNamespace(
|
||||
pretty_host=host,
|
||||
path=path,
|
||||
headers=types.SimpleNamespace(get=lambda k, d="": d),
|
||||
)
|
||||
return types.SimpleNamespace(
|
||||
request=req,
|
||||
client_conn=types.SimpleNamespace(peername=(peer, 0)),
|
||||
response=None,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset(monkeypatch, tmp_path):
|
||||
ad_ghost._ctx.clear()
|
||||
ad_ghost._cand.clear()
|
||||
ad_ghost._cli.clear()
|
||||
ad_ghost._allow = set()
|
||||
ad_ghost._allow_mtime = 0.0
|
||||
monkeypatch.setattr(ad_ghost, "_ALLOW_PATH", str(tmp_path / "ad-allowlist.txt"))
|
||||
monkeypatch.setattr(ad_ghost, "get_filters", lambda force=False: {
|
||||
"ad_ghost": 1, "ad_ghost_block": 1, "ad_learn": 1, "autolearn": 1,
|
||||
"ad_ghost_categories": {},
|
||||
})
|
||||
monkeypatch.setattr(ad_ghost, "mac_hash_of", lambda ip: "MH_FIXED")
|
||||
# Freeze the 5s flush gate so _flush() early-returns and never drains/clears
|
||||
# the hot-path dicts before we assert on them.
|
||||
monkeypatch.setattr(ad_ghost, "_last_flush", time.time())
|
||||
yield
|
||||
ad_ghost._ctx.clear()
|
||||
ad_ghost._cand.clear()
|
||||
ad_ghost._cli.clear()
|
||||
# These tests issue 204 blocks → reset the cumulative counter so we don't
|
||||
# pollute any later-collected test that asserts on _counts.
|
||||
ad_ghost._counts["blocked_requests"] = 0
|
||||
|
||||
|
||||
def test_blocked_ad_recorded_per_visitor():
|
||||
flow = _flow("ad.doubleclick.net", path="/gampad/ads")
|
||||
ad_ghost.AdGhost().requestheaders(flow)
|
||||
assert flow.response is not None and flow.response.status_code == 204
|
||||
ck = ("MH_FIXED", "ad.doubleclick.net")
|
||||
assert ck in ad_ghost._cli
|
||||
assert ad_ghost._cli[ck][0] == 1
|
||||
assert ad_ghost._cli[ck][1] == ad_ghost._EST_BYTES_PER_REQ
|
||||
|
||||
|
||||
def test_visitor_accumulates_across_requests():
|
||||
g = ad_ghost.AdGhost()
|
||||
g.requestheaders(_flow("ad.doubleclick.net", path="/a"))
|
||||
g.requestheaders(_flow("ad.doubleclick.net", path="/b"))
|
||||
assert ad_ghost._cli[("MH_FIXED", "ad.doubleclick.net")][0] == 2
|
||||
|
||||
|
||||
def test_no_visitor_record_when_mac_hash_unavailable(monkeypatch):
|
||||
monkeypatch.setattr(ad_ghost, "mac_hash_of", lambda ip: None)
|
||||
ad_ghost.AdGhost().requestheaders(_flow("ad.doubleclick.net", path="/x"))
|
||||
assert ad_ghost._cli == {}
|
||||
|
||||
|
||||
def test_non_ad_host_not_recorded_per_visitor():
|
||||
# not an ad host, not learned → no block, no per-visitor record
|
||||
ad_ghost.AdGhost().requestheaders(_flow("static.cnn.com", path="/img.png"))
|
||||
assert ad_ghost._cli == {}
|
||||
57
packages/secubox-toolbox/tests/test_ad_learn_hardening.py
Normal file
57
packages/secubox-toolbox/tests/test_ad_learn_hardening.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||
"""#658 — ad-learn must never self-block own infra; promote exact host not registrable."""
|
||||
import sys
|
||||
import pathlib
|
||||
import importlib
|
||||
import sqlite3
|
||||
import importlib.util
|
||||
|
||||
ADDON_DIR = pathlib.Path(__file__).resolve().parents[1] / "mitmproxy_addons"
|
||||
sys.path.insert(0, str(ADDON_DIR))
|
||||
|
||||
|
||||
def test_allowed_never_blocks_own_infra(monkeypatch, tmp_path):
|
||||
# point the allowlist file at a nonexistent path → only the hard-coded
|
||||
# _SELF_REGS guard can allow secubox.in
|
||||
import ad_ghost
|
||||
importlib.reload(ad_ghost)
|
||||
monkeypatch.setattr(ad_ghost, "_ALLOW_PATH", str(tmp_path / "nope.txt"))
|
||||
assert ad_ghost._allowed("admin.gk2.secubox.in") is True # own infra
|
||||
assert ad_ghost._allowed("kbin.gk2.secubox.in") is True
|
||||
assert ad_ghost._allowed("secubox.in") is True
|
||||
assert ad_ghost._allowed("ads.doubleclick.net") is False # real ad host still blockable
|
||||
|
||||
|
||||
def _load_autolearn():
|
||||
p = pathlib.Path(__file__).resolve().parents[1] / "sbin" / "secubox-toolbox-autolearn"
|
||||
spec = importlib.util.spec_from_loader("autolearn_h", loader=None)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
exec(compile(p.read_text(), str(p), "exec"), mod.__dict__)
|
||||
return mod
|
||||
|
||||
|
||||
def test_ad_feed_exact_host_and_excludes_self(tmp_path, monkeypatch):
|
||||
db = tmp_path / "t.db"
|
||||
con = sqlite3.connect(db)
|
||||
con.executescript(
|
||||
"CREATE TABLE ad_candidates(host TEXT, site TEXT, hits INT, last_seen REAL, PRIMARY KEY(host,site));"
|
||||
# analytics.tiktok.com seen on 2 sites → promote EXACT, not tiktok.com
|
||||
"INSERT INTO ad_candidates VALUES('analytics.tiktok.com','a.com',1,0);"
|
||||
"INSERT INTO ad_candidates VALUES('analytics.tiktok.com','b.com',1,0);"
|
||||
# our own admin host seen on 2 sites → must NOT promote
|
||||
"INSERT INTO ad_candidates VALUES('admin.gk2.secubox.in','a.com',1,0);"
|
||||
"INSERT INTO ad_candidates VALUES('admin.gk2.secubox.in','b.com',1,0);")
|
||||
con.commit(); con.close()
|
||||
out = tmp_path / "learned.txt"
|
||||
monkeypatch.setenv("SECUBOX_AUTOLEARN_DB", str(db))
|
||||
monkeypatch.setenv("SECUBOX_AUTOLEARN_OUT", str(out))
|
||||
monkeypatch.setenv("SECUBOX_AD_ALLOWLIST", str(tmp_path / "allow.txt"))
|
||||
monkeypatch.setenv("SECUBOX_AD_MIN_SITES", "2")
|
||||
monkeypatch.setenv("SECUBOX_SELF_DOMAINS", "secubox.in")
|
||||
al = _load_autolearn()
|
||||
al._ad_feed()
|
||||
learned = set(out.read_text().split())
|
||||
assert "analytics.tiktok.com" in learned # exact host promoted
|
||||
assert "tiktok.com" not in learned # NOT the parent site
|
||||
assert "admin.gk2.secubox.in" not in learned # own infra excluded
|
||||
assert "secubox.in" not in learned
|
||||
|
|
@ -184,6 +184,11 @@
|
|||
<h2>🌐 Top sites visités</h2>
|
||||
<div id="ads-sites"><div class="empty">loading…</div></div>
|
||||
</div>
|
||||
<div class="card" style="grid-column:1/-1">
|
||||
<h2>👤 Top visiteurs (pubs bloquées)</h2>
|
||||
<div id="ads-visitors"><div class="empty">loading…</div></div>
|
||||
<div id="ads-client-detail"></div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
|
@ -523,6 +528,26 @@ async function loadAds() {
|
|||
document.getElementById('ads-sites').innerHTML = siteRows
|
||||
? '<table><thead><tr><th>Site</th><th>pubs bloquées</th></tr></thead><tbody>'+siteRows+'</tbody></table>'
|
||||
: '';
|
||||
const visRows = (d.top_visitors||[]).map(r=>{
|
||||
const mh = esc(r.mac_hash);
|
||||
return `<tr><td><a href="#" onclick="loadAdsClient('${mh}');return false;"><code>${mh}</code></a></td><td>${r.hits}</td></tr>`;
|
||||
}).join('');
|
||||
document.getElementById('ads-visitors').innerHTML = visRows
|
||||
? '<table><thead><tr><th>Visiteur</th><th>pubs bloquées</th></tr></thead><tbody>'+visRows+'</tbody></table>'
|
||||
: '<div class="empty">aucun visiteur dans la fenêtre</div>';
|
||||
}
|
||||
|
||||
async function loadAdsClient(mh) {
|
||||
const esc = s => String(s).replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>');
|
||||
const detail = document.getElementById('ads-client-detail');
|
||||
detail.innerHTML = '<div class="empty">loading…</div>';
|
||||
const d = await J('/admin/ad-stats/client/'+encodeURIComponent(mh));
|
||||
if (!d || d.__error) { detail.innerHTML = `<div class="empty">${(d&&d.__error)||'no data'}</div>`; return; }
|
||||
const rows = (d.top_hosts||[]).map(r=>`<tr><td><code>${esc(r.host)}</code></td><td>${r.hits}</td><td>${Math.round((r.bytes||0)/1024)}</td></tr>`).join('');
|
||||
detail.innerHTML = `<h3>👤 <code>${esc(d.mac_hash)}</code> — ${d.total||0} pubs bloquées</h3>`
|
||||
+ (rows
|
||||
? '<table><thead><tr><th>Ad host</th><th>bloqués</th><th>Ko</th></tr></thead><tbody>'+rows+'</tbody></table>'
|
||||
: '<div class="empty">aucune pub bloquée pour ce visiteur</div>');
|
||||
}
|
||||
|
||||
async function refreshAll() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user