Compare commits

...

4 Commits

Author SHA1 Message Date
CyberMind
77da033371
Merge pull request #678 from CyberMind-FR/feat/662-social-relay
Some checks are pending
License Headers / check (push) Waiting to run
feat(#662): restore /social cross-site tracker graph (faithful social_graph port + block-path correlation)
2026-06-19 10:13:36 +02:00
3850da5479 fix(toolbox-ng): correlate social edges on the block path (blocked trackers carry the cross-site cookie) (ref #662) 2026-06-19 09:58:07 +02:00
040e460876 chore: changelog 0.1.10 — social relay (ref #662) 2026-06-19 09:53:10 +02:00
55f9e4c803 feat(toolbox): restore /social cross-site tracker graph via Go engine + portal ingest (ref #662)
The #662 Phase-7 cutover decommissioned the in-process Python social_graph
addon that fed social.record_edge(), freezing the kbin /social d3 graph
(social_edges -> social_nodes/social_links in toolbox.db).

Go engine (packages/secubox-toolbox-ng/cmd/sbxmitm/social.go):
- cookieIDHash: byte-exact port of social.cookie_id_hash (lower-case
  domain+name, raw value, NUL separators), proven by a shared Python-generated
  fixture (social-cookie-id-fixtures.json) asserted by both social_test.go and
  tests/test_social_parity.py (anti-rig, same discipline as the jar harness).
- isDenyListed + _DEFAULT_DENY_COOKIES set; registrableSocial (the addon's
  _registrable_domain eTLD+1 flavour, distinct from policy.registrable);
  Set-Cookie + request-Cookie 3rd-party edge extraction; CMP consent_state
  (none_seen/pre_consent/post_consent) via a per-(peer,site) in-memory log.
- Edges (hash-only, NEVER raw values) buffered + flushed every 10s to the
  portal /__toolbox/social-event; WG-peer flows only; gated by --social-relay
  (default true); fire-and-forget, never blocks the flow.

Python portal (secubox_toolbox/api.py):
- POST /__toolbox/social-event ingest (sibling of /__toolbox/ad-event, same
  unauthenticated R3-perimeter trust + 2MB body guard): per-row record_edge
  with try/except, cap 5000, always 204; debounced safety fold_recent
  (<= once/60s) so new edges surface promptly between the existing app.py
  social_fold_loop ticks.

Go: build offline arm64+darwin, go vet, go test -race all green.
2026-06-19 09:51:26 +02:00
8 changed files with 1150 additions and 1 deletions

View File

@ -210,6 +210,14 @@ type Proxy struct {
// analysis sidecar sockets (#662 — restoring the "Qui te piste?" events the
// decommissioned Python addons fed). Default on; relay.go is the transport.
analysisRelay bool
// socialRelay gates the cross-site cookie-tracker correlation (#662 — restoring
// the kbin /social graph the decommissioned Python social_graph addon fed).
// Default on. social.go is the engine; edges are batched + POSTed to the
// portal's /__toolbox/social-event ingest. nil → off (CONNECT PoC / tests).
socialRelayOn bool
social *socialRelay
consent *consentLog
}
// recordAdBlock forwards a 204'd ad/tracker block to the engine's metrics
@ -378,6 +386,12 @@ func (px *Proxy) mitmPipeline(tconn *tls.Conn, rawClient net.Conn, host, verdict
// per-client breakdown keys on the WG persona hash. recordAdBlock is
// O(1) and never blocks the block path.
px.recordAdBlock(host, refererSite(req.Header.Get("Referer")), clientHashFromConn(rawClient))
// #662 — the cross-site tracking evidence lives PRECISELY on the blocked
// trackers: the browser still SENT its 3rd-party Cookie to doubleclick/
// adnxs/… before we 204 it. Correlate that request-Cookie here (resp=nil,
// request-only) or the /social graph misses the very trackers it exists to
// expose. Hash-only, WG-peer only, fire-and-forget — same as the allow path.
px.emitSocial(peerIP(rawClient), host, req, nil)
writeRaw(tconn, 204, "No Content", map[string]string{"X-SecuBox-Ng": "blocked"}, nil)
return
}
@ -447,6 +461,17 @@ func (px *Proxy) mitmPipeline(tconn *tls.Conn, rawClient net.Conn, host, verdict
// relayed names byte-for-byte the origin's. Fire-and-forget, gated.
px.emitCookies(relayIP, clientHash, req, resp)
// #662 — cross-site cookie-tracker correlation (restores the kbin /social
// graph). FAITHFUL to the decommissioned Python social_graph addon: extract
// 3rd-party cookie edges (Set-Cookie + request Cookie), hash the identifier
// (cookieIDHash — NEVER the raw value), classify consent_state, and buffer
// them for the batched POST to the portal /__toolbox/social-event ingest.
// Like the addon, this ONLY fires for known R3 WG peers (macHashOf, not the
// raw-IP fallback): non-WG flows yield no edges. allow|mitm only (the block
// 204 / splice paths return before here). Gated by --social-relay; pure +
// non-blocking (the flush is a background goroutine).
px.emitSocial(relayIP, host, req, resp)
// Poison: only on MITM'd tracker flows (never on allow/own-infra), and only
// when the jar key is loaded. Replaces tracking-id Set-Cookie values with a
// stable fabricated persona; benign cookies pass through untouched.
@ -515,6 +540,8 @@ func main() {
"CONSENTED DEMONSTRATION: relax a page's CSP so the injected transparency-banner loader runs even on strict-CSP sites, and flag the bypass (banner shows 🔓). Only on injected 2xx text/html R3 responses; never on non-injected responses. Set false to never touch CSP.")
analysisRelay := flag.Bool("analysis-relay", true,
"relay per-flow telemetry (dpi/cookies/ja4) to the analysis sidecar sockets so the kbin \"Qui te piste?\" events refill (#662; replaces the decommissioned Python relay addons). Fire-and-forget; a dead/slow sidecar never affects the proxy. Set false to emit nothing.")
socialRelay := flag.Bool("social-relay", true,
"compute cross-site cookie-tracker edges and POST them to the portal /__toolbox/social-event ingest so the kbin /social graph refills (#662; replaces the decommissioned Python social_graph addon). Hash-only (never raw cookie values); WG-peer flows only; batched + fire-and-forget — a dead/slow portal never affects the proxy. Set false to emit nothing.")
flag.Parse()
ca, err := loadCA(*caCert, *caKey)
if err != nil {
@ -545,7 +572,16 @@ func main() {
cspDemo: *cspDemo,
analysisRelay: *analysisRelay,
socialRelayOn: *socialRelay,
social: newSocialRelay(),
consent: newConsentLog(),
}
// #662 — start the social-edge flusher: the MITM path buffers cross-site
// tracker edges into px.social, drained every 10s to the portal's
// /__toolbox/social-event (best-effort, fire-and-forget) so the kbin /social
// graph (frozen since the cutover) refills.
go px.social.runFlusher(*portal)
// #662 — start the ad-block metrics flusher: the block path tallies every
// 204 into px.ads, drained every 10s to the portal's /__toolbox/ad-event
// (best-effort, fire-and-forget) so the #ads dashboard sees blocks again.

View File

@ -18,7 +18,9 @@
// avatar → /run/secubox/avatar.sock POST /fingerprint
// ja4 → /run/secubox/threat-analyst.sock POST /ja4
// soc_relay → /run/secubox/soc.sock POST /event
// social_graph: in-process (no socket) — correlated inside the engine, not emitted.
// social_graph: correlated in-process (social.go) — edges (hash-only, never raw
// cookie values) are NOT emitted to a module socket but POSTed to the portal
// /__toolbox/social-event ingest (the social store lives in the toolbox/portal).
//
// emit takes the full socket PATH (not an http+unix:// URL) plus the route in
// the payload's destination; callers build the path from the table above.

View File

@ -0,0 +1,605 @@
// SPDX-License-Identifier: LicenseRef-CMSD-1.0
// Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
//
// SecuBox-Deb :: toolbox-ng :: cross-site cookie-tracker correlation (#662)
//
// Restores the kbin "/social" cross-site tracker graph, frozen since the #662
// Phase-7 cutover decommissioned the in-process Python `social_graph` addon
// (packages/secubox-toolbox/mitmproxy_addons/social_graph.py). The graph reads
// social_nodes/social_links in toolbox.db, folded from raw social_edges — and
// the edges stopped flowing when the Python addon was retired.
//
// This is a FAITHFUL Go port of the addon's correlation logic:
// - cookieIDHash : byte-exact port of social.cookie_id_hash (Python = source
// of truth, proven by social_test.go ↔ tests/test_social_parity.py over a
// shared fixture — the same anti-rig discipline as jar.go).
// - isDenyListed + the _DEFAULT_DENY_COOKIES set (social.py).
// - registrableSocial : the addon's _registrable_domain eTLD+1 helper
// (DIFFERENT from policy.go's registrable() — IP literals pass through,
// no port strip, a larger multi-label-TLD table; the graph correctness
// depends on this exact flavour, so it is replicated verbatim and NOT
// consolidated with policy.registrable).
// - the 3rd-party decision (tracker_domain != src_site on eTLD+1) on BOTH the
// response Set-Cookie path and the request Cookie path, mirroring the
// addon's response()+request hooks.
// - the CMP consent-platform detection → consent_state ∈ {none_seen,
// pre_consent, post_consent} via a per-(peer,site) in-memory log.
//
// Privacy/CSPN invariant (the reason the original ran in-process): raw cookie
// VALUES NEVER leave the engine — only the truncated SHA-256 cookieIDHash is
// emitted. The edges are POSTed fire-and-forget to the portal's
// /__toolbox/social-event ingest (sibling of /__toolbox/ad-event), which calls
// social.record_edge(). Best-effort throughout; a dead/slow portal can never
// block or delay a client flow.
//
// Pure standard library — no external modules, no go.sum.
package main
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"log"
"net/http"
"strings"
"sync"
"time"
)
// ── registrableSocial: port of social_graph._registrable_domain ─────────────
//
// Python (mitmproxy_addons/social_graph.py):
//
// h = (host or "").lower().strip(".")
// if not h or h.replace(".", "").isdigit(): return h # raw IP → as-is
// parts = h.split(".")
// if len(parts) < 2: return h
// last_two = ".".join(parts[-2:])
// if last_two in _MULTI_LABEL_TLDS and len(parts) >= 3: return ".".join(parts[-3:])
// return last_two
//
// This DIFFERS from policy.registrable (ad_ghost flavour): no port strip, IP
// literals pass through unchanged (the store later drops IP trackers via
// _is_ip), and the multi-label-TLD table below is the addon's larger set. The
// graph's 3rd-party comparison is done with THIS function, so it must match the
// addon exactly.
var socialMultiLabelTLDs = map[string]bool{
"co.uk": true, "ac.uk": true, "gov.uk": true, "org.uk": true, "net.uk": true,
"co.jp": true, "ne.jp": true, "ac.jp": true,
"com.au": true, "net.au": true, "org.au": true,
"com.br": true, "com.cn": true, "com.hk": true, "com.tw": true, "com.mx": true,
}
func registrableSocial(host string) string {
h := strings.Trim(strings.ToLower(host), ".")
if h == "" {
return h
}
// h.replace(".","").isdigit() → all-digit (IPv4-ish) → return as-is.
if isAllDigits(strings.ReplaceAll(h, ".", "")) {
return h
}
parts := strings.Split(h, ".")
if len(parts) < 2 {
return h
}
last2 := strings.Join(parts[len(parts)-2:], ".")
if socialMultiLabelTLDs[last2] && len(parts) >= 3 {
return strings.Join(parts[len(parts)-3:], ".")
}
return last2
}
// ── cookieIDHash: BYTE-EXACT port of social.cookie_id_hash ───────────────────
//
// Python (secubox_toolbox/social.py):
//
// h = sha256()
// h.update(tracker_domain.lower().encode("utf-8","replace")); h.update(b"\x00")
// h.update(cookie_name.lower().encode("utf-8","replace")); h.update(b"\x00")
// h.update(cookie_value.encode("utf-8","replace"))
// return h.hexdigest()[:16]
//
// CRITICAL: tracker_domain + cookie_name are LOWER-cased; the cookie_value is
// NOT. NUL (0x00) separators between the three fields. Go strings are already
// UTF-8, and strings.ToLower is byte-identical to Python str.lower for the
// ASCII + Latin domain/name inputs the fixtures exercise (incl. the Ünîcödé
// case, verified at parity). hex of the first 8 digest bytes == hexdigest()[:16].
func cookieIDHash(trackerDomain, cookieName, cookieValue string) string {
h := sha256.New()
h.Write([]byte(strings.ToLower(trackerDomain)))
h.Write([]byte{0x00})
h.Write([]byte(strings.ToLower(cookieName)))
h.Write([]byte{0x00})
h.Write([]byte(cookieValue)) // value NOT lower-cased
sum := h.Sum(nil)
return hex.EncodeToString(sum)[:16]
}
// ── deny-list: port of social._DEFAULT_DENY_COOKIES + is_deny_listed ─────────
//
// Names whose presence on a flow is NEVER recorded as a tracker identifier
// (session / csrf / auth / cloudflare / consent / locale). Replicated verbatim
// from social.py; matched case-insensitively after trimming.
var socialDenyCookies = map[string]bool{
// session
"phpsessid": true, "jsessionid": true, "asp.net_sessionid": true, "ci_session": true,
"express.sid": true, "connect.sid": true, "sails.sid": true, "django_session": true,
"laravel_session": true, "flask_session": true, "session": true, "sessionid": true,
// csrf
"_csrf": true, "_csrf_token": true, "xsrf-token": true, "csrftoken": true, "csrf": true,
"x-csrf-token": true, "anti-csrf-token": true,
// auth (1st-party)
"auth": true, "auth_token": true, "access_token": true, "refresh_token": true, "bearer": true,
"remember_token": true, "remember_me": true, "_oauth2_proxy": true,
// cloudflare / consent / locale (low signal)
"__cf_bm": true, "cf_clearance": true, "consent": true, "cookieconsent_status": true,
"locale": true, "lang": true, "language": true, "_locale": true,
}
// isDenyListed mirrors social.is_deny_listed (default-deny set only; the engine
// does not load the TOML extra_deny override). An empty name is deny-listed
// (Python returns True for a blank name).
func isDenyListed(cookieName string) bool {
name := strings.ToLower(strings.TrimSpace(cookieName))
if name == "" {
return true
}
return socialDenyCookies[name]
}
// ── cookie parsers: port of _parse_set_cookie / _parse_cookie_header ─────────
// parseSetCookieNameValue mirrors social_graph._parse_set_cookie: name=value is
// the text up to the first ';'; the name is everything before the first '=',
// trimmed; the value is the rest of that first field, trimmed. Returns ok=false
// for an attribute-only / nameless / empty line.
func parseSetCookieNameValue(header string) (name, value string, ok bool) {
field := header
if i := strings.IndexByte(field, ';'); i >= 0 {
field = field[:i]
}
eq := strings.IndexByte(field, '=')
if eq < 0 {
return "", "", false
}
name = strings.TrimSpace(field[:eq])
value = strings.TrimSpace(field[eq+1:])
if name == "" {
return "", "", false
}
return name, value, true
}
// cookiePair is one (name,value) parsed from a request Cookie header.
type cookiePair struct{ name, value string }
// parseCookieHeader mirrors social_graph._parse_cookie_header: split on ';',
// each "name=value" yields a trimmed (name,value); nameless pairs are dropped.
func parseCookieHeader(header string) []cookiePair {
var out []cookiePair
for _, part := range strings.Split(header, ";") {
eq := strings.IndexByte(part, '=')
if eq < 0 {
continue
}
name := strings.TrimSpace(part[:eq])
value := strings.TrimSpace(part[eq+1:])
if name != "" {
out = append(out, cookiePair{name: name, value: value})
}
}
return out
}
// extractSetCookieDomainAttr mirrors social_graph._extract_domain_attr: pull the
// "; Domain=…" attribute from a Set-Cookie line, trimmed, leading dot stripped,
// lower-cased. Returns "" when absent.
func extractSetCookieDomainAttr(setCookie string) string {
low := strings.ToLower(setCookie)
idx := strings.Index(low, "domain")
for idx >= 0 {
// require it to be an attribute (preceded by ';' after optional spaces),
// mirroring the Python regex `;\s*domain\s*=`.
j := idx + len("domain")
// skip spaces, then '='
k := j
for k < len(setCookie) && (setCookie[k] == ' ' || setCookie[k] == '\t') {
k++
}
if k < len(setCookie) && setCookie[k] == '=' {
// confirm a ';' (or start) precedes `domain` (after spaces).
p := idx - 1
for p >= 0 && (setCookie[p] == ' ' || setCookie[p] == '\t') {
p--
}
if p < 0 || setCookie[p] == ';' {
rest := setCookie[k+1:]
if e := strings.IndexByte(rest, ';'); e >= 0 {
rest = rest[:e]
}
val := strings.ToLower(strings.TrimLeft(strings.TrimSpace(rest), "."))
if val == "" {
return ""
}
return val
}
}
next := strings.Index(low[idx+1:], "domain")
if next < 0 {
return ""
}
idx = idx + 1 + next
}
return ""
}
// srcSiteFromReferer mirrors social_graph._src_site_from_referer: take Referer
// (else Origin), strip scheme/path/query, return registrableSocial of the host.
func srcSiteFromReferer(req *http.Request) string {
ref := req.Header.Get("Referer")
if ref == "" {
ref = req.Header.Get("Origin")
}
if ref == "" {
return ""
}
s := ref
if i := strings.Index(s, "://"); i >= 0 {
s = s[i+3:]
}
if i := strings.IndexByte(s, '/'); i >= 0 {
s = s[:i]
}
if i := strings.IndexByte(s, '?'); i >= 0 {
s = s[:i]
}
return registrableSocial(s)
}
// ── consent-state detection: port of the _consent_log machinery ──────────────
//
// CMP (Consent Management Platform) cookie name prefixes + loader URL fragments,
// verbatim from social_graph._CMP_COOKIE_PREFIXES / _CMP_LOADER_FRAGMENTS. Seen
// on a flow → the site runs a CMP (has_cmp) and, for a cookie, consent recorded
// (consented). consent_state classifies a tracker edge as pre/post/none-consent.
var cmpCookiePrefixes = []string{
"optanonconsent", "onetrustconsent", "optanonalertboxclosed", // OneTrust
"didomi_token", "euconsent-v2", // Didomi / IAB TCF
"__qca", "quantcast", // Quantcast
"sp_choice", "consentuid", "_sp_", // Sourcepoint
}
var cmpLoaderFragments = []string{
"cdn.cookielaw.org", "onetrust.com", // OneTrust
"sdk.privacy-center.org", "didomi.io", // Didomi
"quantcast.mgr.consensu.org", "quantcast.com/choice", // Quantcast
"sourcepoint.mgr.consensu.org", "sp-prod.net", // Sourcepoint
}
// consentObservation is the per-(peer,site) state, mirroring the Python dict
// {"has_cmp": bool, "consented": bool}.
type consentObservation struct {
hasCMP bool
consented bool
}
// consentKey mirrors social_graph._consent_key = (mac_hash, site).
type consentKey struct{ macHash, site string }
// consentLog is the bounded in-memory per-(peer,site) observation log, mirroring
// the module-level _consent_log + its 20k soft-cap wholesale clear. The Go proxy
// is genuinely concurrent (Python relied on the GIL), so all access is
// mutex-guarded.
type consentLog struct {
mu sync.Mutex
log map[consentKey]consentObservation
}
const consentLogCap = 20000 // mirrors social_graph._consent_log soft cap
func newConsentLog() *consentLog {
return &consentLog{log: map[consentKey]consentObservation{}}
}
// update mirrors social_graph._update_consent_log: observe whether this flow
// reveals a CMP loader (URL fragment, both request and response side) or a CMP
// cookie (either direction) for the (peer,site) pair, and fold it into the log.
// - url is flow.request.pretty_url (lower-cased here).
// - cookieBlobs are the raw request Cookie + response Set-Cookie header lines.
func (cl *consentLog) update(macHash, site, url string, cookieBlobs []string) {
cl.mu.Lock()
defer cl.mu.Unlock()
if len(cl.log) > consentLogCap {
cl.log = map[consentKey]consentObservation{}
}
key := consentKey{macHash: macHash, site: site}
st := cl.log[key]
lurl := strings.ToLower(url)
for _, frag := range cmpLoaderFragments {
if strings.Contains(lurl, frag) {
st.hasCMP = true
break
}
}
for _, blob := range cookieBlobs {
low := strings.ToLower(blob)
for _, pref := range cmpCookiePrefixes {
if strings.Contains(low, pref) {
st.hasCMP = true
st.consented = true
break
}
}
}
cl.log[key] = st
}
// stateFor mirrors social_graph._consent_state_for: post_consent if a consent
// cookie was seen here, pre_consent if a CMP is present but no consent cookie
// yet, none_seen otherwise.
func (cl *consentLog) stateFor(macHash, site string) string {
cl.mu.Lock()
defer cl.mu.Unlock()
st, ok := cl.log[consentKey{macHash: macHash, site: site}]
if !ok {
return "none_seen"
}
if st.consented {
return "post_consent"
}
if st.hasCMP {
return "pre_consent"
}
return "none_seen"
}
// ── edge extraction: port of SocialGraph.response()+request() hook logic ──────
// socialEdge is one cross-site tracker edge, mirroring the kwargs the Python
// social.record_edge accepts; serialised straight into the ingest batch.
type socialEdge struct {
ClientMacHash string `json:"client_mac_hash"`
SrcSite string `json:"src_site"`
TrackerDomain string `json:"tracker_domain"`
CookieIDHashVal string `json:"cookie_id_hash_val"`
JA4Hash string `json:"ja4_hash,omitempty"`
ConsentState string `json:"consent_state"`
}
// socialEdgesFor extracts the cross-site tracker edges for ONE MITM'd flow,
// mirroring SocialGraph.response() + the request-Cookie tail. Pure (no I/O): the
// caller emits the returned edges. macHash MUST be the WG persona hash (the
// addon only fires for known R3 peers — empty macHash yields no edges). reqHost
// is flow.request.host; reqURL is flow.request.pretty_url (for CMP loader
// detection); ja4 is the captured fingerprint (may be "").
//
// Decision logic, faithful to the addon:
// - src_site = registrableSocial(reqHost); skip if empty.
// - update the consent log for (macHash, src_site), derive consent_state.
// - Set-Cookie path (first 50): for each non-deny-listed cookie, tracker_domain
// = registrableSocial(Domain= attr OR reqHost); emit IFF tracker_domain != ""
// and != src_site (3rd-party).
// - Cookie path: only when a Referer/Origin context site exists and differs
// from the tracker (= registrableSocial(reqHost)); cap 5 Cookie headers ×
// 50 pairs; emit per non-deny-listed cookie with the context site's
// consent_state.
func socialEdgesFor(macHash string, req *http.Request, resp *http.Response, reqHost, reqURL, ja4 string, cl *consentLog) []socialEdge {
if macHash == "" || cl == nil {
return nil
}
srcSite := registrableSocial(reqHost)
if srcSite == "" {
return nil
}
// Gather the cookie blobs (both directions) for the CMP cookie check, then
// fold the consent observation BEFORE deriving consent_state (matches the
// addon's ordering: _update_consent_log then _consent_state_for).
var setCookies []string
if resp != nil {
setCookies = resp.Header.Values("Set-Cookie")
}
var reqCookies []string
if req != nil {
reqCookies = req.Header.Values("Cookie")
}
blobs := make([]string, 0, len(reqCookies)+len(setCookies))
blobs = append(blobs, reqCookies...)
blobs = append(blobs, setCookies...)
cl.update(macHash, srcSite, reqURL, blobs)
consentState := cl.stateFor(macHash, srcSite)
var edges []socialEdge
// Set-Cookie path — first 50 lines (matches the addon's [:50]).
for i, sc := range setCookies {
if i >= 50 {
break
}
name, value, ok := parseSetCookieNameValue(sc)
if !ok || isDenyListed(name) {
continue
}
domainAttr := extractSetCookieDomainAttr(sc)
issuer := domainAttr
if issuer == "" {
issuer = reqHost
}
trackerDomain := registrableSocial(issuer)
if trackerDomain == "" || trackerDomain == srcSite {
continue // 1st-party Set-Cookie: not a cross-site tracker signal.
}
edges = append(edges, socialEdge{
ClientMacHash: macHash,
SrcSite: srcSite,
TrackerDomain: trackerDomain,
CookieIDHashVal: cookieIDHash(trackerDomain, name, value),
JA4Hash: ja4,
ConsentState: consentState,
})
}
// Request-Cookie path — only when this request is itself for a 3rd-party
// tracker and we have a differing 1st-party context from the Referer/Origin.
if len(reqCookies) == 0 {
return edges
}
trackerDomain := registrableSocial(reqHost)
if trackerDomain == "" {
return edges
}
ctxSite := srcSiteFromReferer(req)
if ctxSite == "" || ctxSite == trackerDomain {
return edges
}
ctxConsent := cl.stateFor(macHash, ctxSite)
for i, hdr := range reqCookies {
if i >= 5 { // addon caps Cookie headers at [:5]
break
}
pairs := parseCookieHeader(hdr)
for j, p := range pairs {
if j >= 50 { // and pairs at [:50]
break
}
if isDenyListed(p.name) {
continue
}
edges = append(edges, socialEdge{
ClientMacHash: macHash,
SrcSite: ctxSite,
TrackerDomain: trackerDomain,
CookieIDHashVal: cookieIDHash(trackerDomain, p.name, p.value),
JA4Hash: ja4,
ConsentState: ctxConsent,
})
}
}
return edges
}
// ── relay: batch + POST to the portal /__toolbox/social-event ingest ─────────
const (
socialFlushInterval = 10 * time.Second // drain cadence (sibling of adFlushInterval)
socialBatchCap = 5000 // max edges held between flushes (drop excess)
)
// socialEventPayload mirrors the portal /__toolbox/social-event JSON contract.
type socialEventPayload struct {
Edges []socialEdge `json:"edges"`
}
func (p socialEventPayload) empty() bool { return len(p.Edges) == 0 }
// socialRelay buffers extracted edges and flushes them to the portal. Bounded:
// once the buffer holds socialBatchCap edges, NEW edges are dropped until the
// next flush clears it (a dead portal can never grow memory unbounded). Edges
// carry ONLY the cookieIDHash — never raw values (privacy/CSPN).
type socialRelay struct {
mu sync.Mutex
buf []socialEdge
}
func newSocialRelay() *socialRelay { return &socialRelay{} }
// add appends edges to the buffer under the cap. Never blocks the flow.
func (s *socialRelay) add(edges ...socialEdge) {
if len(edges) == 0 {
return
}
s.mu.Lock()
defer s.mu.Unlock()
for _, e := range edges {
if len(s.buf) >= socialBatchCap {
return
}
s.buf = append(s.buf, e)
}
}
// snapshot atomically reads-and-clears the buffer.
func (s *socialRelay) snapshot() socialEventPayload {
s.mu.Lock()
defer s.mu.Unlock()
if len(s.buf) == 0 {
return socialEventPayload{}
}
p := socialEventPayload{Edges: s.buf}
s.buf = nil
return p
}
// socialEventClient is the short-timeout fire-and-forget client for the
// social-event POST (sibling of adEventClient). Never follows redirects (SSRF
// hygiene); tight timeout so a slow portal can't stall the flusher.
var socialEventClient = &http.Client{
Timeout: 5 * time.Second,
CheckRedirect: func(*http.Request, []*http.Request) error { return http.ErrUseLastResponse },
}
// flushOnce snapshots the buffer and, if non-empty, POSTs it to the portal's
// /__toolbox/social-event ingest. Best-effort: any error is swallowed with at
// most a log line — the engine must never block on the portal. Returns the
// flushed payload so the test can assert the snapshot/clear + shape.
func (s *socialRelay) flushOnce(portal string) socialEventPayload {
p := s.snapshot()
if p.empty() {
return p
}
buf, err := json.Marshal(p)
if err != nil {
log.Printf("social-event marshal failed: %v", err)
return p
}
url := portalTargetURL(portal, "/__toolbox/social-event")
resp, err := socialEventClient.Post(url, "application/json", bytes.NewReader(buf))
if err != nil {
log.Printf("social-event post failed for %s: %v", url, err)
return p
}
resp.Body.Close()
return p
}
// ── proxy wiring ──────────────────────────────────────────────────────────
// socialEnabled reports whether cross-site correlation is on (--social-relay →
// Proxy.socialRelayOn, with the buffer + consent log allocated). Nil-safe so the
// CONNECT PoC / tests that build a bare Proxy can call it.
func (px *Proxy) socialEnabled() bool {
return px != nil && px.socialRelayOn && px.social != nil && px.consent != nil
}
// emitSocial extracts the cross-site tracker edges for a MITM'd flow and buffers
// them for the batched portal POST. clientIP is the client's peer IP; the per-
// client identity is the WG persona hash (macHashOf) — NOT the raw-IP fallback,
// so non-WG flows produce no edges, exactly like the Python addon's
// _client_mac_hash gate. Gated, pure (the buffer.add is O(1) under a short
// mutex), never blocks the flow. reqURL feeds the CMP loader-fragment check.
func (px *Proxy) emitSocial(clientIP, host string, req *http.Request, resp *http.Response) {
if !px.socialEnabled() || req == nil {
return
}
macHash := macHashOf(clientIP)
if macHash == "" {
return // known R3 WG peers only (addon: `if not mac_hash: return`)
}
reqURL := req.URL.String()
edges := socialEdgesFor(macHash, req, resp, host, reqURL, "", px.consent)
px.social.add(edges...)
}
// runFlusher is the background flusher goroutine: every socialFlushInterval it
// drains the buffer to the portal. Start once from main(); runs for the process
// lifetime.
func (s *socialRelay) runFlusher(portal string) {
t := time.NewTicker(socialFlushInterval)
defer t.Stop()
for range t.C {
s.flushOnce(portal)
}
}

View File

@ -0,0 +1,297 @@
// SPDX-License-Identifier: LicenseRef-CMSD-1.0
// Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
//
// Cross-engine SOCIAL parity + decision harness — Go side (#662).
//
// Anti-rig: loads testdata/social-cookie-id-fixtures.json (GENERATED by the real
// secubox_toolbox.social.cookie_id_hash) and asserts cookieIDHash reproduces
// every `expect` byte-for-byte — Python is the source of truth, exactly like the
// jar parity harness. The Python side is tests/test_social_parity.py.
//
// The rest exercises the ported decision surface: deny-list, registrableSocial
// (the addon flavour, NOT policy.registrable), the 3rd-party Set-Cookie + Cookie
// edge extraction, consent_state classification, and the relay buffer/flush.
package main
import (
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
)
type socialCookieFixture struct {
TrackerDomain string `json:"tracker_domain"`
CookieName string `json:"cookie_name"`
CookieValue string `json:"cookie_value"`
Expect string `json:"expect"`
Why string `json:"why"`
}
type socialCookieFile struct {
Fixtures []socialCookieFixture `json:"fixtures"`
}
// TestCookieIDHashParity: cookieIDHash == the Python-generated expect for every
// fixture. This is the anti-rig that proves the Go hash is byte-identical to
// social.cookie_id_hash (lower-case domain+name, raw value, NUL separators).
func TestCookieIDHashParity(t *testing.T) {
dir := testdataDir(t)
raw, err := os.ReadFile(filepath.Join(dir, "social-cookie-id-fixtures.json"))
if err != nil {
t.Fatalf("read social fixtures: %v", err)
}
var f socialCookieFile
if err := json.Unmarshal(raw, &f); err != nil {
t.Fatalf("parse social fixtures: %v", err)
}
if len(f.Fixtures) == 0 {
t.Fatal("no social cookie-id fixtures")
}
for _, fx := range f.Fixtures {
got := cookieIDHash(fx.TrackerDomain, fx.CookieName, fx.CookieValue)
if got != fx.Expect {
t.Errorf("cookieIDHash(%q,%q,%q)=%q want %q (%s)",
fx.TrackerDomain, fx.CookieName, fx.CookieValue, got, fx.Expect, fx.Why)
}
}
}
// TestCookieIDHashFolding: domain+name are lower-cased but the value is NOT —
// the explicit invariant the store contract pins.
func TestCookieIDHashFolding(t *testing.T) {
if cookieIDHash("DoubleClick.NET", "IDE", "AbC") != cookieIDHash("doubleclick.net", "ide", "AbC") {
t.Error("domain+name must be case-folded")
}
if cookieIDHash("d.net", "n", "AbC") == cookieIDHash("d.net", "n", "abc") {
t.Error("value must NOT be case-folded")
}
}
func TestIsDenyListed(t *testing.T) {
deny := []string{"PHPSESSID", "session", " csrftoken ", "__cf_bm", "consent", "locale", "", " "}
for _, n := range deny {
if !isDenyListed(n) {
t.Errorf("isDenyListed(%q) = false, want true", n)
}
}
allow := []string{"IDE", "_ga", "_fbp", "uid", "datr"}
for _, n := range allow {
if isDenyListed(n) {
t.Errorf("isDenyListed(%q) = true, want false", n)
}
}
}
// TestRegistrableSocial: the addon flavour — IP literals pass through (NOT ""),
// no port strip semantics needed, the larger multi-label table.
func TestRegistrableSocial(t *testing.T) {
cases := map[string]string{
"www.lemonde.fr": "lemonde.fr",
"cdn.api.example.co.uk": "example.co.uk",
"tracker.com": "tracker.com",
"a.b.c.doubleclick.net": "doubleclick.net",
"WWW.Example.COM": "example.com",
"sub.example.com.au": "example.com.au",
"192.168.1.1": "192.168.1.1", // IP literal as-is (addon), store drops later
".trailing.dot.net.": "dot.net",
"single": "single",
"": "",
}
for in, want := range cases {
if got := registrableSocial(in); got != want {
t.Errorf("registrableSocial(%q)=%q want %q", in, got, want)
}
}
}
func TestParseSetCookieNameValue(t *testing.T) {
cases := []struct {
in string
name, value string
ok bool
}{
{"IDE=AHWqTUm; Domain=.doubleclick.net; Path=/", "IDE", "AHWqTUm", true},
{" _ga = GA1.2.3 ; Max-Age=63", "_ga", "GA1.2.3", true},
{"Secure; HttpOnly", "", "", false},
{"=novalue", "", "", false},
{"empty=", "empty", "", true},
}
for _, c := range cases {
n, v, ok := parseSetCookieNameValue(c.in)
if n != c.name || v != c.value || ok != c.ok {
t.Errorf("parseSetCookieNameValue(%q)=(%q,%q,%v) want (%q,%q,%v)", c.in, n, v, ok, c.name, c.value, c.ok)
}
}
}
func TestExtractSetCookieDomainAttr(t *testing.T) {
cases := map[string]string{
"IDE=x; Domain=.doubleclick.net; Path=/": "doubleclick.net",
"a=b; domain=Example.COM": "example.com",
"a=b; Path=/": "",
"a=b": "",
"a=domainlike=1; Path=/": "", // value containing "domain" is not the attr
}
for in, want := range cases {
if got := extractSetCookieDomainAttr(in); got != want {
t.Errorf("extractSetCookieDomainAttr(%q)=%q want %q", in, got, want)
}
}
}
func TestSrcSiteFromReferer(t *testing.T) {
req := httptest.NewRequest("GET", "https://tracker.io/p.gif", nil)
if got := srcSiteFromReferer(req); got != "" {
t.Errorf("no referer → %q want \"\"", got)
}
req.Header.Set("Referer", "https://www.lemonde.fr/article?x=1")
if got := srcSiteFromReferer(req); got != "lemonde.fr" {
t.Errorf("referer → %q want lemonde.fr", got)
}
req.Header.Del("Referer")
req.Header.Set("Origin", "https://news.example.co.uk")
if got := srcSiteFromReferer(req); got != "example.co.uk" {
t.Errorf("origin fallback → %q want example.co.uk", got)
}
}
// helper: build a response with the given Set-Cookie lines.
func respWithSetCookies(lines ...string) *http.Response {
h := http.Header{}
for _, l := range lines {
h.Add("Set-Cookie", l)
}
return &http.Response{Header: h}
}
// TestSocialEdgesThirdParty: a 3rd-party Set-Cookie (Domain= a different eTLD+1)
// on a 1st-party page yields one edge with the right src_site/tracker_domain.
func TestSocialEdgesThirdParty(t *testing.T) {
req := httptest.NewRequest("GET", "https://ads.doubleclick.net/pixel", nil)
resp := respWithSetCookies("IDE=AHWqTUm; Domain=.doubleclick.net; Path=/")
// reqHost is the responding host (doubleclick) — but src_site is also derived
// from it; so to model a TRUE 3rd-party we use the Domain attr differing from
// the request host's registrable. Here both are doubleclick.net → 1st-party,
// expect NO edge.
edges := socialEdgesFor("machash1", req, resp, "ads.doubleclick.net", "https://ads.doubleclick.net/pixel", "", newConsentLog())
if len(edges) != 0 {
t.Fatalf("1st-party Set-Cookie should yield 0 edges, got %d", len(edges))
}
// Now a genuine 3rd-party: the page host is lemonde.fr, a Set-Cookie with
// Domain=.doubleclick.net (the embedded tracker setting on its own domain via
// the request being to doubleclick but src derived from referer is the
// request-cookie path; the Set-Cookie path uses reqHost as src). Model the
// addon's Set-Cookie path: reqHost=lemonde.fr, Domain attr=doubleclick.net.
resp2 := respWithSetCookies("IDE=AHWqTUm; Domain=.doubleclick.net; Path=/")
edges = socialEdgesFor("machash1", req, resp2, "www.lemonde.fr", "https://www.lemonde.fr/", "", newConsentLog())
if len(edges) != 1 {
t.Fatalf("3rd-party Set-Cookie should yield 1 edge, got %d", len(edges))
}
e := edges[0]
if e.SrcSite != "lemonde.fr" || e.TrackerDomain != "doubleclick.net" {
t.Errorf("edge src/tracker = %q/%q want lemonde.fr/doubleclick.net", e.SrcSite, e.TrackerDomain)
}
if e.CookieIDHashVal != cookieIDHash("doubleclick.net", "IDE", "AHWqTUm") {
t.Errorf("edge cookie id hash mismatch: %q", e.CookieIDHashVal)
}
if e.ConsentState != "none_seen" {
t.Errorf("consent_state = %q want none_seen", e.ConsentState)
}
}
// TestSocialEdgesDenyAndIP: deny-listed names produce no edge; IP-literal hosts
// produce no edge (registrableSocial returns the IP, store drops it — but src
// derivation: an IP src_site == IP tracker → not 3rd party anyway).
func TestSocialEdgesDenyAndIP(t *testing.T) {
req := httptest.NewRequest("GET", "https://x/", nil)
resp := respWithSetCookies("PHPSESSID=abc; Domain=.doubleclick.net")
edges := socialEdgesFor("m", req, resp, "www.lemonde.fr", "https://www.lemonde.fr/", "", newConsentLog())
if len(edges) != 0 {
t.Fatalf("deny-listed cookie should yield 0 edges, got %d", len(edges))
}
// empty mac hash → no edges (R3-only gate)
if e := socialEdgesFor("", req, respWithSetCookies("IDE=x; Domain=.doubleclick.net"), "www.lemonde.fr", "u", "", newConsentLog()); len(e) != 0 {
t.Fatalf("empty macHash should yield 0 edges, got %d", len(e))
}
}
// TestSocialEdgesRequestCookiePath: a request TO a tracker carrying a Cookie,
// with a Referer to a different 1st-party, yields an edge attributed to the
// referer's site.
func TestSocialEdgesRequestCookiePath(t *testing.T) {
req := httptest.NewRequest("GET", "https://ads.doubleclick.net/px", nil)
req.Header.Set("Cookie", "IDE=AHWqTUm; session=secret")
req.Header.Set("Referer", "https://www.lemonde.fr/article")
// No Set-Cookie in the response; src_site = registrableSocial(reqHost) =
// doubleclick.net; the Set-Cookie loop emits nothing; the request-Cookie tail
// uses ctxSite=lemonde.fr (referer) != tracker doubleclick.net → edge. The
// deny-listed `session` cookie is skipped, so exactly 1 edge (IDE).
edges := socialEdgesFor("m", req, &http.Response{Header: http.Header{}}, "ads.doubleclick.net", "https://ads.doubleclick.net/px", "", newConsentLog())
if len(edges) != 1 {
t.Fatalf("request-cookie path should yield 1 edge, got %d", len(edges))
}
if edges[0].SrcSite != "lemonde.fr" || edges[0].TrackerDomain != "doubleclick.net" {
t.Errorf("edge = %q/%q want lemonde.fr/doubleclick.net", edges[0].SrcSite, edges[0].TrackerDomain)
}
}
// TestConsentLog: loader fragment → pre_consent; CMP cookie → post_consent.
func TestConsentLog(t *testing.T) {
cl := newConsentLog()
if got := cl.stateFor("m", "lemonde.fr"); got != "none_seen" {
t.Errorf("fresh → %q want none_seen", got)
}
// CMP loader request observed (no consent cookie yet) → pre_consent.
cl.update("m", "lemonde.fr", "https://cdn.cookielaw.org/consent/scripttemplates/otSDKStub.js", nil)
if got := cl.stateFor("m", "lemonde.fr"); got != "pre_consent" {
t.Errorf("after CMP loader → %q want pre_consent", got)
}
// CMP consent cookie observed → post_consent.
cl.update("m", "lemonde.fr", "https://www.lemonde.fr/", []string{"OptanonConsent=isGpcEnabled=0; Path=/"})
if got := cl.stateFor("m", "lemonde.fr"); got != "post_consent" {
t.Errorf("after CMP cookie → %q want post_consent", got)
}
}
// TestSocialRelayFlush: the buffer batches edges and flushOnce POSTs them to the
// portal /__toolbox/social-event, then clears.
func TestSocialRelayFlush(t *testing.T) {
var got socialEventPayload
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/__toolbox/social-event" {
t.Errorf("unexpected path %q", r.URL.Path)
}
_ = json.NewDecoder(r.Body).Decode(&got)
w.WriteHeader(204)
}))
defer srv.Close()
s := newSocialRelay()
s.add(socialEdge{ClientMacHash: "m", SrcSite: "a.fr", TrackerDomain: "t.com", CookieIDHashVal: "deadbeef", ConsentState: "none_seen"})
p := s.flushOnce(srv.URL)
if len(p.Edges) != 1 || len(got.Edges) != 1 {
t.Fatalf("flush sent %d / server got %d, want 1/1", len(p.Edges), len(got.Edges))
}
if got.Edges[0].TrackerDomain != "t.com" {
t.Errorf("server edge tracker = %q want t.com", got.Edges[0].TrackerDomain)
}
// Buffer cleared: a second flush sends nothing.
if p2 := s.flushOnce(srv.URL); !p2.empty() {
t.Errorf("second flush should be empty, got %d edges", len(p2.Edges))
}
}
// TestSocialRelayCap: the buffer never exceeds socialBatchCap.
func TestSocialRelayCap(t *testing.T) {
s := newSocialRelay()
for i := 0; i < socialBatchCap+100; i++ {
s.add(socialEdge{ClientMacHash: "m", SrcSite: "a", TrackerDomain: "t", CookieIDHashVal: "h", ConsentState: "none_seen"})
}
if got := s.snapshot(); len(got.Edges) != socialBatchCap {
t.Errorf("buffer held %d edges, want cap %d", len(got.Edges), socialBatchCap)
}
}

View File

@ -1,3 +1,22 @@
secubox-toolbox-ng (0.1.11-1~bookworm1) bookworm; urgency=medium
* social: ALSO correlate on the block path — blocked 3rd-party trackers still
carry the browser's request Cookie (the cross-site evidence); without this
the /social graph misses the very trackers it exists to expose (they're 204'd
before the allow/mitm correlation). resp=nil request-only, hash-only. (ref #662)
-- Gerald KERMA <devel@cybermind.fr> Thu, 19 Jun 2026 11:55:00 +0000
secubox-toolbox-ng (0.1.10-1~bookworm1) bookworm; urgency=medium
* social: faithfully port the in-process social_graph correlation — the engine
computes cross-site tracker edges (byte-exact cookie_id_hash, deny-list,
eTLD+1 3rd-party check, CMP consent_state) and relays HASH-ONLY edges
(never raw values, WG-only) to the new portal /__toolbox/social-event →
social.record_edge → /social graph un-frozen. --social-relay (default on). (ref #662)
-- Gerald KERMA <devel@cybermind.fr> Thu, 19 Jun 2026 11:30:00 +0000
secubox-toolbox-ng (0.1.9-1~bookworm1) bookworm; urgency=medium
* telemetry: relay per-flow metadata to the analysis sidecars (dpi /classify,

View File

@ -0,0 +1,61 @@
{
"_comment": "Cross-engine parity fixtures for social.cookie_id_hash (#662). GENERATED by the real secubox_toolbox.social.cookie_id_hash (Python = source of truth); the Go cookieIDHash MUST reproduce every `expect` byte-for-byte. Note: tracker_domain + cookie_name are LOWER-cased before hashing, the cookie_value is NOT; NUL (0x00) separators; UTF-8 with 'replace' errors. See tests/test_social_parity.py (Python) ↔ social_test.go (Go).",
"fixtures": [
{
"tracker_domain": "doubleclick.net",
"cookie_name": "IDE",
"cookie_value": "AHWqTUm123",
"expect": "8e7fadaeb2584768",
"why": "plain ascii"
},
{
"tracker_domain": "DoubleClick.NET",
"cookie_name": "ide",
"cookie_value": "AHWqTUm123",
"expect": "8e7fadaeb2584768",
"why": "domain+name UPPER folded, value verbatim -> identical hash to #1 (proves domain+name are lower-cased)"
},
{
"tracker_domain": "doubleclick.net",
"cookie_name": "IDE",
"cookie_value": "ahwqtum123",
"expect": "550317c9729652c2",
"why": "value lower-cased DIFFERS from #1 (proves the VALUE is NOT folded)"
},
{
"tracker_domain": "ads.example.com",
"cookie_name": "_ga",
"cookie_value": "GA1.2.999.111",
"expect": "89a398ebd72ee863",
"why": "GA cookie"
},
{
"tracker_domain": "tracker.io",
"cookie_name": "uid",
"cookie_value": "Ünîcødé✓",
"expect": "3b4923e9d9bb77a2",
"why": "unicode value (utf-8 encoded)"
},
{
"tracker_domain": "tracker.io",
"cookie_name": "Ünîcödé",
"cookie_value": "val",
"expect": "d4db5a0d71216313",
"why": "unicode cookie NAME (lower-cased + utf-8)"
},
{
"tracker_domain": "",
"cookie_name": "x",
"cookie_value": "y",
"expect": "2081f4f26135019e",
"why": "empty domain still hashes (NUL separators)"
},
{
"tracker_domain": "d.net",
"cookie_name": "n",
"cookie_value": "",
"expect": "b0da6b889cb198a1",
"why": "empty value"
}
]
}

View File

@ -138,6 +138,87 @@ async def toolbox_ad_event(request: Request) -> Response:
log.debug("ad-event ingest failed: %s", e)
return Response(status_code=204)
# #662 — cross-site cookie-tracker edge ingest from the Go MITM engine (sbxmitm).
# The #662 Phase-7 cutover decommissioned the in-process Python social_graph addon
# that fed social.record_edge(), so the kbin /social graph (social_edges →
# social_nodes/social_links) froze. The engine now computes the SAME 3rd-party
# cookie-tracker edges (FAITHFUL port of social_graph.py: deny-list, eTLD+1
# 3rd-party check, cookie_id_hash, CMP consent_state) and POSTs a batch here. We
# call social.record_edge() per row, which writes raw social_edges; the existing
# app.py social_fold_loop folds them into nodes/links.
#
# Raw cookie VALUES never reach this endpoint — only the truncated cookie_id_hash
# (privacy/CSPN; this is exactly why the original ran in-process).
#
# UNAUTHENTICATED, same trust note as /__toolbox/ad-event: the engine reaches the
# portal only over the R3 nft perimeter (loopback / WG ingress).
_SOCIAL_EVENT_ROW_CAP = 5000 # bound the edge list so a misbehaving engine can't flood us
_SOCIAL_FOLD_DEBOUNCE = 60 # seconds: floor between in-handler safety folds
_social_last_fold = 0.0 # module-level throttle timestamp
@router.post("/__toolbox/social-event")
async def toolbox_social_event(request: Request) -> Response:
"""Ingest a batch of cross-site tracker edges from the Go engine. Best-effort:
never 500s the engine (it is fire-and-forget) always returns 204. See the
trust note above for why this is unauthenticated."""
global _social_last_fold
try:
# Body-size guard BEFORE parsing (mirrors /__toolbox/ad-event): the legit
# payload (≤5000 edges) is well under 2 MB; reject larger outright so a
# misbehaving/compromised WG peer can't pressure portal memory.
try:
clen = int(request.headers.get("content-length") or 0)
except (TypeError, ValueError):
clen = 0
if clen > 2 * 1024 * 1024:
return Response(status_code=204)
body = await request.json()
if not isinstance(body, dict):
return Response(status_code=204)
edges = body.get("edges") or []
if not isinstance(edges, list):
edges = []
edges = edges[:_SOCIAL_EVENT_ROW_CAP]
from . import social as _social
recorded = 0
for e in edges:
if not isinstance(e, dict):
continue
try:
_social.record_edge(
client_mac_hash=e.get("client_mac_hash") or "",
src_site=e.get("src_site") or "",
tracker_domain=e.get("tracker_domain") or "",
cookie_id_hash_val=e.get("cookie_id_hash_val") or "",
ja4_hash=e.get("ja4_hash") or None,
consent_state=e.get("consent_state") or "none_seen",
)
recorded += 1
except Exception as row_err: # one bad row never fails the batch
log.debug("social-event row failed: %s", row_err)
# Safety fold: the app.py social_fold_loop already folds every 5 min, but
# fold here too (debounced to ≤ once / 60 s via a module-level timestamp)
# so a freshly-ingested edge surfaces in the d3 graph promptly even between
# loop ticks. Cheap (indexed window scan) and self-throttling; a fold
# failure is swallowed (the loop will catch up).
if recorded:
now = time.time()
if now - _social_last_fold >= _SOCIAL_FOLD_DEBOUNCE:
_social_last_fold = now
try:
_social.fold_recent(window_seconds=600)
except Exception as fold_err:
log.debug("social-event fold failed: %s", fold_err)
except Exception as e: # never raise into the engine's fire-and-forget POST
log.debug("social-event ingest failed: %s", e)
return Response(status_code=204)
# Cap geo/UA enrichment on /admin/clients/rich to the rows the UI actually shows
# (top-5 + headroom). Beyond this, clients get bare fields — avoids ~51 cached
# geo lookups per poll (ref #644).

View File

@ -0,0 +1,48 @@
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
"""Cross-engine SOCIAL parity harness — Python side (#662).
Loads the SAME ``social-cookie-id-fixtures.json`` the Go core uses
(``../secubox-toolbox-ng/testdata``) and asserts ``social.cookie_id_hash``
reproduces each fixture's ``expect``.
Python is the source of truth: the ``expect`` values were GENERATED by this very
``social.cookie_id_hash``. The Go side (cmd/sbxmitm/social_test.go) must
reproduce them byte-for-byte. Both files reading identical inputs is what makes
the parity meaningful the same anti-rig discipline as the jar parity harness.
"""
from __future__ import annotations
import json
import os
from secubox_toolbox import social
_HERE = os.path.dirname(os.path.abspath(__file__))
# tests/ → packages/secubox-toolbox → packages → packages/secubox-toolbox-ng
_NG_TESTDATA = os.path.normpath(
os.path.join(_HERE, "..", "..", "secubox-toolbox-ng", "testdata"))
_FIXTURES = os.path.join(_NG_TESTDATA, "social-cookie-id-fixtures.json")
def _load():
with open(_FIXTURES, encoding="utf-8") as f:
return json.load(f)
def test_cookie_id_hash_parity():
data = _load()
assert data["fixtures"], "no fixtures"
failures = []
for fx in data["fixtures"]:
got = social.cookie_id_hash(
fx["tracker_domain"], fx["cookie_name"], fx["cookie_value"])
if got != fx["expect"]:
failures.append((fx, got))
assert not failures, f"cookie_id_hash drift: {failures}"
def test_cookie_id_hash_invariants():
# domain + name are lower-cased; the value is NOT.
assert social.cookie_id_hash("A.NET", "N", "v") == social.cookie_id_hash("a.net", "n", "v")
assert social.cookie_id_hash("a.net", "n", "V") != social.cookie_id_hash("a.net", "n", "v")