mirror of
https://github.com/CyberMind-FR/secubox-deb.git
synced 2026-06-29 16:31:31 +00:00
Compare commits
4 Commits
257fc95182
...
77da033371
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
77da033371 | ||
| 3850da5479 | |||
| 040e460876 | |||
| 55f9e4c803 |
|
|
@ -210,6 +210,14 @@ type Proxy struct {
|
|||
// analysis sidecar sockets (#662 — restoring the "Qui te piste?" events the
|
||||
// decommissioned Python addons fed). Default on; relay.go is the transport.
|
||||
analysisRelay bool
|
||||
|
||||
// socialRelay gates the cross-site cookie-tracker correlation (#662 — restoring
|
||||
// the kbin /social graph the decommissioned Python social_graph addon fed).
|
||||
// Default on. social.go is the engine; edges are batched + POSTed to the
|
||||
// portal's /__toolbox/social-event ingest. nil → off (CONNECT PoC / tests).
|
||||
socialRelayOn bool
|
||||
social *socialRelay
|
||||
consent *consentLog
|
||||
}
|
||||
|
||||
// recordAdBlock forwards a 204'd ad/tracker block to the engine's metrics
|
||||
|
|
@ -378,6 +386,12 @@ func (px *Proxy) mitmPipeline(tconn *tls.Conn, rawClient net.Conn, host, verdict
|
|||
// per-client breakdown keys on the WG persona hash. recordAdBlock is
|
||||
// O(1) and never blocks the block path.
|
||||
px.recordAdBlock(host, refererSite(req.Header.Get("Referer")), clientHashFromConn(rawClient))
|
||||
// #662 — the cross-site tracking evidence lives PRECISELY on the blocked
|
||||
// trackers: the browser still SENT its 3rd-party Cookie to doubleclick/
|
||||
// adnxs/… before we 204 it. Correlate that request-Cookie here (resp=nil,
|
||||
// request-only) or the /social graph misses the very trackers it exists to
|
||||
// expose. Hash-only, WG-peer only, fire-and-forget — same as the allow path.
|
||||
px.emitSocial(peerIP(rawClient), host, req, nil)
|
||||
writeRaw(tconn, 204, "No Content", map[string]string{"X-SecuBox-Ng": "blocked"}, nil)
|
||||
return
|
||||
}
|
||||
|
|
@ -447,6 +461,17 @@ func (px *Proxy) mitmPipeline(tconn *tls.Conn, rawClient net.Conn, host, verdict
|
|||
// relayed names byte-for-byte the origin's. Fire-and-forget, gated.
|
||||
px.emitCookies(relayIP, clientHash, req, resp)
|
||||
|
||||
// #662 — cross-site cookie-tracker correlation (restores the kbin /social
|
||||
// graph). FAITHFUL to the decommissioned Python social_graph addon: extract
|
||||
// 3rd-party cookie edges (Set-Cookie + request Cookie), hash the identifier
|
||||
// (cookieIDHash — NEVER the raw value), classify consent_state, and buffer
|
||||
// them for the batched POST to the portal /__toolbox/social-event ingest.
|
||||
// Like the addon, this ONLY fires for known R3 WG peers (macHashOf, not the
|
||||
// raw-IP fallback): non-WG flows yield no edges. allow|mitm only (the block
|
||||
// 204 / splice paths return before here). Gated by --social-relay; pure +
|
||||
// non-blocking (the flush is a background goroutine).
|
||||
px.emitSocial(relayIP, host, req, resp)
|
||||
|
||||
// Poison: only on MITM'd tracker flows (never on allow/own-infra), and only
|
||||
// when the jar key is loaded. Replaces tracking-id Set-Cookie values with a
|
||||
// stable fabricated persona; benign cookies pass through untouched.
|
||||
|
|
@ -515,6 +540,8 @@ func main() {
|
|||
"CONSENTED DEMONSTRATION: relax a page's CSP so the injected transparency-banner loader runs even on strict-CSP sites, and flag the bypass (banner shows 🔓). Only on injected 2xx text/html R3 responses; never on non-injected responses. Set false to never touch CSP.")
|
||||
analysisRelay := flag.Bool("analysis-relay", true,
|
||||
"relay per-flow telemetry (dpi/cookies/ja4) to the analysis sidecar sockets so the kbin \"Qui te piste?\" events refill (#662; replaces the decommissioned Python relay addons). Fire-and-forget; a dead/slow sidecar never affects the proxy. Set false to emit nothing.")
|
||||
socialRelay := flag.Bool("social-relay", true,
|
||||
"compute cross-site cookie-tracker edges and POST them to the portal /__toolbox/social-event ingest so the kbin /social graph refills (#662; replaces the decommissioned Python social_graph addon). Hash-only (never raw cookie values); WG-peer flows only; batched + fire-and-forget — a dead/slow portal never affects the proxy. Set false to emit nothing.")
|
||||
flag.Parse()
|
||||
ca, err := loadCA(*caCert, *caKey)
|
||||
if err != nil {
|
||||
|
|
@ -545,7 +572,16 @@ func main() {
|
|||
cspDemo: *cspDemo,
|
||||
|
||||
analysisRelay: *analysisRelay,
|
||||
|
||||
socialRelayOn: *socialRelay,
|
||||
social: newSocialRelay(),
|
||||
consent: newConsentLog(),
|
||||
}
|
||||
// #662 — start the social-edge flusher: the MITM path buffers cross-site
|
||||
// tracker edges into px.social, drained every 10s to the portal's
|
||||
// /__toolbox/social-event (best-effort, fire-and-forget) so the kbin /social
|
||||
// graph (frozen since the cutover) refills.
|
||||
go px.social.runFlusher(*portal)
|
||||
// #662 — start the ad-block metrics flusher: the block path tallies every
|
||||
// 204 into px.ads, drained every 10s to the portal's /__toolbox/ad-event
|
||||
// (best-effort, fire-and-forget) so the #ads dashboard sees blocks again.
|
||||
|
|
|
|||
|
|
@ -18,7 +18,9 @@
|
|||
// avatar → /run/secubox/avatar.sock POST /fingerprint
|
||||
// ja4 → /run/secubox/threat-analyst.sock POST /ja4
|
||||
// soc_relay → /run/secubox/soc.sock POST /event
|
||||
// social_graph: in-process (no socket) — correlated inside the engine, not emitted.
|
||||
// social_graph: correlated in-process (social.go) — edges (hash-only, never raw
|
||||
// cookie values) are NOT emitted to a module socket but POSTed to the portal
|
||||
// /__toolbox/social-event ingest (the social store lives in the toolbox/portal).
|
||||
//
|
||||
// emit takes the full socket PATH (not an http+unix:// URL) plus the route in
|
||||
// the payload's destination; callers build the path from the table above.
|
||||
|
|
|
|||
605
packages/secubox-toolbox-ng/cmd/sbxmitm/social.go
Normal file
605
packages/secubox-toolbox-ng/cmd/sbxmitm/social.go
Normal file
|
|
@ -0,0 +1,605 @@
|
|||
// SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||
// Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||
//
|
||||
// SecuBox-Deb :: toolbox-ng :: cross-site cookie-tracker correlation (#662)
|
||||
//
|
||||
// Restores the kbin "/social" cross-site tracker graph, frozen since the #662
|
||||
// Phase-7 cutover decommissioned the in-process Python `social_graph` addon
|
||||
// (packages/secubox-toolbox/mitmproxy_addons/social_graph.py). The graph reads
|
||||
// social_nodes/social_links in toolbox.db, folded from raw social_edges — and
|
||||
// the edges stopped flowing when the Python addon was retired.
|
||||
//
|
||||
// This is a FAITHFUL Go port of the addon's correlation logic:
|
||||
// - cookieIDHash : byte-exact port of social.cookie_id_hash (Python = source
|
||||
// of truth, proven by social_test.go ↔ tests/test_social_parity.py over a
|
||||
// shared fixture — the same anti-rig discipline as jar.go).
|
||||
// - isDenyListed + the _DEFAULT_DENY_COOKIES set (social.py).
|
||||
// - registrableSocial : the addon's _registrable_domain eTLD+1 helper
|
||||
// (DIFFERENT from policy.go's registrable() — IP literals pass through,
|
||||
// no port strip, a larger multi-label-TLD table; the graph correctness
|
||||
// depends on this exact flavour, so it is replicated verbatim and NOT
|
||||
// consolidated with policy.registrable).
|
||||
// - the 3rd-party decision (tracker_domain != src_site on eTLD+1) on BOTH the
|
||||
// response Set-Cookie path and the request Cookie path, mirroring the
|
||||
// addon's response()+request hooks.
|
||||
// - the CMP consent-platform detection → consent_state ∈ {none_seen,
|
||||
// pre_consent, post_consent} via a per-(peer,site) in-memory log.
|
||||
//
|
||||
// Privacy/CSPN invariant (the reason the original ran in-process): raw cookie
|
||||
// VALUES NEVER leave the engine — only the truncated SHA-256 cookieIDHash is
|
||||
// emitted. The edges are POSTed fire-and-forget to the portal's
|
||||
// /__toolbox/social-event ingest (sibling of /__toolbox/ad-event), which calls
|
||||
// social.record_edge(). Best-effort throughout; a dead/slow portal can never
|
||||
// block or delay a client flow.
|
||||
//
|
||||
// Pure standard library — no external modules, no go.sum.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ── registrableSocial: port of social_graph._registrable_domain ─────────────
|
||||
//
|
||||
// Python (mitmproxy_addons/social_graph.py):
|
||||
//
|
||||
// h = (host or "").lower().strip(".")
|
||||
// if not h or h.replace(".", "").isdigit(): return h # raw IP → as-is
|
||||
// parts = h.split(".")
|
||||
// if len(parts) < 2: return h
|
||||
// last_two = ".".join(parts[-2:])
|
||||
// if last_two in _MULTI_LABEL_TLDS and len(parts) >= 3: return ".".join(parts[-3:])
|
||||
// return last_two
|
||||
//
|
||||
// This DIFFERS from policy.registrable (ad_ghost flavour): no port strip, IP
|
||||
// literals pass through unchanged (the store later drops IP trackers via
|
||||
// _is_ip), and the multi-label-TLD table below is the addon's larger set. The
|
||||
// graph's 3rd-party comparison is done with THIS function, so it must match the
|
||||
// addon exactly.
|
||||
var socialMultiLabelTLDs = map[string]bool{
|
||||
"co.uk": true, "ac.uk": true, "gov.uk": true, "org.uk": true, "net.uk": true,
|
||||
"co.jp": true, "ne.jp": true, "ac.jp": true,
|
||||
"com.au": true, "net.au": true, "org.au": true,
|
||||
"com.br": true, "com.cn": true, "com.hk": true, "com.tw": true, "com.mx": true,
|
||||
}
|
||||
|
||||
func registrableSocial(host string) string {
|
||||
h := strings.Trim(strings.ToLower(host), ".")
|
||||
if h == "" {
|
||||
return h
|
||||
}
|
||||
// h.replace(".","").isdigit() → all-digit (IPv4-ish) → return as-is.
|
||||
if isAllDigits(strings.ReplaceAll(h, ".", "")) {
|
||||
return h
|
||||
}
|
||||
parts := strings.Split(h, ".")
|
||||
if len(parts) < 2 {
|
||||
return h
|
||||
}
|
||||
last2 := strings.Join(parts[len(parts)-2:], ".")
|
||||
if socialMultiLabelTLDs[last2] && len(parts) >= 3 {
|
||||
return strings.Join(parts[len(parts)-3:], ".")
|
||||
}
|
||||
return last2
|
||||
}
|
||||
|
||||
// ── cookieIDHash: BYTE-EXACT port of social.cookie_id_hash ───────────────────
|
||||
//
|
||||
// Python (secubox_toolbox/social.py):
|
||||
//
|
||||
// h = sha256()
|
||||
// h.update(tracker_domain.lower().encode("utf-8","replace")); h.update(b"\x00")
|
||||
// h.update(cookie_name.lower().encode("utf-8","replace")); h.update(b"\x00")
|
||||
// h.update(cookie_value.encode("utf-8","replace"))
|
||||
// return h.hexdigest()[:16]
|
||||
//
|
||||
// CRITICAL: tracker_domain + cookie_name are LOWER-cased; the cookie_value is
|
||||
// NOT. NUL (0x00) separators between the three fields. Go strings are already
|
||||
// UTF-8, and strings.ToLower is byte-identical to Python str.lower for the
|
||||
// ASCII + Latin domain/name inputs the fixtures exercise (incl. the Ünîcödé
|
||||
// case, verified at parity). hex of the first 8 digest bytes == hexdigest()[:16].
|
||||
func cookieIDHash(trackerDomain, cookieName, cookieValue string) string {
|
||||
h := sha256.New()
|
||||
h.Write([]byte(strings.ToLower(trackerDomain)))
|
||||
h.Write([]byte{0x00})
|
||||
h.Write([]byte(strings.ToLower(cookieName)))
|
||||
h.Write([]byte{0x00})
|
||||
h.Write([]byte(cookieValue)) // value NOT lower-cased
|
||||
sum := h.Sum(nil)
|
||||
return hex.EncodeToString(sum)[:16]
|
||||
}
|
||||
|
||||
// ── deny-list: port of social._DEFAULT_DENY_COOKIES + is_deny_listed ─────────
|
||||
//
|
||||
// Names whose presence on a flow is NEVER recorded as a tracker identifier
|
||||
// (session / csrf / auth / cloudflare / consent / locale). Replicated verbatim
|
||||
// from social.py; matched case-insensitively after trimming.
|
||||
var socialDenyCookies = map[string]bool{
|
||||
// session
|
||||
"phpsessid": true, "jsessionid": true, "asp.net_sessionid": true, "ci_session": true,
|
||||
"express.sid": true, "connect.sid": true, "sails.sid": true, "django_session": true,
|
||||
"laravel_session": true, "flask_session": true, "session": true, "sessionid": true,
|
||||
// csrf
|
||||
"_csrf": true, "_csrf_token": true, "xsrf-token": true, "csrftoken": true, "csrf": true,
|
||||
"x-csrf-token": true, "anti-csrf-token": true,
|
||||
// auth (1st-party)
|
||||
"auth": true, "auth_token": true, "access_token": true, "refresh_token": true, "bearer": true,
|
||||
"remember_token": true, "remember_me": true, "_oauth2_proxy": true,
|
||||
// cloudflare / consent / locale (low signal)
|
||||
"__cf_bm": true, "cf_clearance": true, "consent": true, "cookieconsent_status": true,
|
||||
"locale": true, "lang": true, "language": true, "_locale": true,
|
||||
}
|
||||
|
||||
// isDenyListed mirrors social.is_deny_listed (default-deny set only; the engine
|
||||
// does not load the TOML extra_deny override). An empty name is deny-listed
|
||||
// (Python returns True for a blank name).
|
||||
func isDenyListed(cookieName string) bool {
|
||||
name := strings.ToLower(strings.TrimSpace(cookieName))
|
||||
if name == "" {
|
||||
return true
|
||||
}
|
||||
return socialDenyCookies[name]
|
||||
}
|
||||
|
||||
// ── cookie parsers: port of _parse_set_cookie / _parse_cookie_header ─────────
|
||||
|
||||
// parseSetCookieNameValue mirrors social_graph._parse_set_cookie: name=value is
|
||||
// the text up to the first ';'; the name is everything before the first '=',
|
||||
// trimmed; the value is the rest of that first field, trimmed. Returns ok=false
|
||||
// for an attribute-only / nameless / empty line.
|
||||
func parseSetCookieNameValue(header string) (name, value string, ok bool) {
|
||||
field := header
|
||||
if i := strings.IndexByte(field, ';'); i >= 0 {
|
||||
field = field[:i]
|
||||
}
|
||||
eq := strings.IndexByte(field, '=')
|
||||
if eq < 0 {
|
||||
return "", "", false
|
||||
}
|
||||
name = strings.TrimSpace(field[:eq])
|
||||
value = strings.TrimSpace(field[eq+1:])
|
||||
if name == "" {
|
||||
return "", "", false
|
||||
}
|
||||
return name, value, true
|
||||
}
|
||||
|
||||
// cookiePair is one (name,value) parsed from a request Cookie header.
|
||||
type cookiePair struct{ name, value string }
|
||||
|
||||
// parseCookieHeader mirrors social_graph._parse_cookie_header: split on ';',
|
||||
// each "name=value" yields a trimmed (name,value); nameless pairs are dropped.
|
||||
func parseCookieHeader(header string) []cookiePair {
|
||||
var out []cookiePair
|
||||
for _, part := range strings.Split(header, ";") {
|
||||
eq := strings.IndexByte(part, '=')
|
||||
if eq < 0 {
|
||||
continue
|
||||
}
|
||||
name := strings.TrimSpace(part[:eq])
|
||||
value := strings.TrimSpace(part[eq+1:])
|
||||
if name != "" {
|
||||
out = append(out, cookiePair{name: name, value: value})
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// extractSetCookieDomainAttr mirrors social_graph._extract_domain_attr: pull the
|
||||
// "; Domain=…" attribute from a Set-Cookie line, trimmed, leading dot stripped,
|
||||
// lower-cased. Returns "" when absent.
|
||||
func extractSetCookieDomainAttr(setCookie string) string {
|
||||
low := strings.ToLower(setCookie)
|
||||
idx := strings.Index(low, "domain")
|
||||
for idx >= 0 {
|
||||
// require it to be an attribute (preceded by ';' after optional spaces),
|
||||
// mirroring the Python regex `;\s*domain\s*=`.
|
||||
j := idx + len("domain")
|
||||
// skip spaces, then '='
|
||||
k := j
|
||||
for k < len(setCookie) && (setCookie[k] == ' ' || setCookie[k] == '\t') {
|
||||
k++
|
||||
}
|
||||
if k < len(setCookie) && setCookie[k] == '=' {
|
||||
// confirm a ';' (or start) precedes `domain` (after spaces).
|
||||
p := idx - 1
|
||||
for p >= 0 && (setCookie[p] == ' ' || setCookie[p] == '\t') {
|
||||
p--
|
||||
}
|
||||
if p < 0 || setCookie[p] == ';' {
|
||||
rest := setCookie[k+1:]
|
||||
if e := strings.IndexByte(rest, ';'); e >= 0 {
|
||||
rest = rest[:e]
|
||||
}
|
||||
val := strings.ToLower(strings.TrimLeft(strings.TrimSpace(rest), "."))
|
||||
if val == "" {
|
||||
return ""
|
||||
}
|
||||
return val
|
||||
}
|
||||
}
|
||||
next := strings.Index(low[idx+1:], "domain")
|
||||
if next < 0 {
|
||||
return ""
|
||||
}
|
||||
idx = idx + 1 + next
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// srcSiteFromReferer mirrors social_graph._src_site_from_referer: take Referer
|
||||
// (else Origin), strip scheme/path/query, return registrableSocial of the host.
|
||||
func srcSiteFromReferer(req *http.Request) string {
|
||||
ref := req.Header.Get("Referer")
|
||||
if ref == "" {
|
||||
ref = req.Header.Get("Origin")
|
||||
}
|
||||
if ref == "" {
|
||||
return ""
|
||||
}
|
||||
s := ref
|
||||
if i := strings.Index(s, "://"); i >= 0 {
|
||||
s = s[i+3:]
|
||||
}
|
||||
if i := strings.IndexByte(s, '/'); i >= 0 {
|
||||
s = s[:i]
|
||||
}
|
||||
if i := strings.IndexByte(s, '?'); i >= 0 {
|
||||
s = s[:i]
|
||||
}
|
||||
return registrableSocial(s)
|
||||
}
|
||||
|
||||
// ── consent-state detection: port of the _consent_log machinery ──────────────
|
||||
//
|
||||
// CMP (Consent Management Platform) cookie name prefixes + loader URL fragments,
|
||||
// verbatim from social_graph._CMP_COOKIE_PREFIXES / _CMP_LOADER_FRAGMENTS. Seen
|
||||
// on a flow → the site runs a CMP (has_cmp) and, for a cookie, consent recorded
|
||||
// (consented). consent_state classifies a tracker edge as pre/post/none-consent.
|
||||
var cmpCookiePrefixes = []string{
|
||||
"optanonconsent", "onetrustconsent", "optanonalertboxclosed", // OneTrust
|
||||
"didomi_token", "euconsent-v2", // Didomi / IAB TCF
|
||||
"__qca", "quantcast", // Quantcast
|
||||
"sp_choice", "consentuid", "_sp_", // Sourcepoint
|
||||
}
|
||||
|
||||
var cmpLoaderFragments = []string{
|
||||
"cdn.cookielaw.org", "onetrust.com", // OneTrust
|
||||
"sdk.privacy-center.org", "didomi.io", // Didomi
|
||||
"quantcast.mgr.consensu.org", "quantcast.com/choice", // Quantcast
|
||||
"sourcepoint.mgr.consensu.org", "sp-prod.net", // Sourcepoint
|
||||
}
|
||||
|
||||
// consentObservation is the per-(peer,site) state, mirroring the Python dict
|
||||
// {"has_cmp": bool, "consented": bool}.
|
||||
type consentObservation struct {
|
||||
hasCMP bool
|
||||
consented bool
|
||||
}
|
||||
|
||||
// consentKey mirrors social_graph._consent_key = (mac_hash, site).
|
||||
type consentKey struct{ macHash, site string }
|
||||
|
||||
// consentLog is the bounded in-memory per-(peer,site) observation log, mirroring
|
||||
// the module-level _consent_log + its 20k soft-cap wholesale clear. The Go proxy
|
||||
// is genuinely concurrent (Python relied on the GIL), so all access is
|
||||
// mutex-guarded.
|
||||
type consentLog struct {
|
||||
mu sync.Mutex
|
||||
log map[consentKey]consentObservation
|
||||
}
|
||||
|
||||
const consentLogCap = 20000 // mirrors social_graph._consent_log soft cap
|
||||
|
||||
func newConsentLog() *consentLog {
|
||||
return &consentLog{log: map[consentKey]consentObservation{}}
|
||||
}
|
||||
|
||||
// update mirrors social_graph._update_consent_log: observe whether this flow
|
||||
// reveals a CMP loader (URL fragment, both request and response side) or a CMP
|
||||
// cookie (either direction) for the (peer,site) pair, and fold it into the log.
|
||||
// - url is flow.request.pretty_url (lower-cased here).
|
||||
// - cookieBlobs are the raw request Cookie + response Set-Cookie header lines.
|
||||
func (cl *consentLog) update(macHash, site, url string, cookieBlobs []string) {
|
||||
cl.mu.Lock()
|
||||
defer cl.mu.Unlock()
|
||||
if len(cl.log) > consentLogCap {
|
||||
cl.log = map[consentKey]consentObservation{}
|
||||
}
|
||||
key := consentKey{macHash: macHash, site: site}
|
||||
st := cl.log[key]
|
||||
|
||||
lurl := strings.ToLower(url)
|
||||
for _, frag := range cmpLoaderFragments {
|
||||
if strings.Contains(lurl, frag) {
|
||||
st.hasCMP = true
|
||||
break
|
||||
}
|
||||
}
|
||||
for _, blob := range cookieBlobs {
|
||||
low := strings.ToLower(blob)
|
||||
for _, pref := range cmpCookiePrefixes {
|
||||
if strings.Contains(low, pref) {
|
||||
st.hasCMP = true
|
||||
st.consented = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
cl.log[key] = st
|
||||
}
|
||||
|
||||
// stateFor mirrors social_graph._consent_state_for: post_consent if a consent
|
||||
// cookie was seen here, pre_consent if a CMP is present but no consent cookie
|
||||
// yet, none_seen otherwise.
|
||||
func (cl *consentLog) stateFor(macHash, site string) string {
|
||||
cl.mu.Lock()
|
||||
defer cl.mu.Unlock()
|
||||
st, ok := cl.log[consentKey{macHash: macHash, site: site}]
|
||||
if !ok {
|
||||
return "none_seen"
|
||||
}
|
||||
if st.consented {
|
||||
return "post_consent"
|
||||
}
|
||||
if st.hasCMP {
|
||||
return "pre_consent"
|
||||
}
|
||||
return "none_seen"
|
||||
}
|
||||
|
||||
// ── edge extraction: port of SocialGraph.response()+request() hook logic ──────
|
||||
|
||||
// socialEdge is one cross-site tracker edge, mirroring the kwargs the Python
|
||||
// social.record_edge accepts; serialised straight into the ingest batch.
|
||||
type socialEdge struct {
|
||||
ClientMacHash string `json:"client_mac_hash"`
|
||||
SrcSite string `json:"src_site"`
|
||||
TrackerDomain string `json:"tracker_domain"`
|
||||
CookieIDHashVal string `json:"cookie_id_hash_val"`
|
||||
JA4Hash string `json:"ja4_hash,omitempty"`
|
||||
ConsentState string `json:"consent_state"`
|
||||
}
|
||||
|
||||
// socialEdgesFor extracts the cross-site tracker edges for ONE MITM'd flow,
|
||||
// mirroring SocialGraph.response() + the request-Cookie tail. Pure (no I/O): the
|
||||
// caller emits the returned edges. macHash MUST be the WG persona hash (the
|
||||
// addon only fires for known R3 peers — empty macHash yields no edges). reqHost
|
||||
// is flow.request.host; reqURL is flow.request.pretty_url (for CMP loader
|
||||
// detection); ja4 is the captured fingerprint (may be "").
|
||||
//
|
||||
// Decision logic, faithful to the addon:
|
||||
// - src_site = registrableSocial(reqHost); skip if empty.
|
||||
// - update the consent log for (macHash, src_site), derive consent_state.
|
||||
// - Set-Cookie path (first 50): for each non-deny-listed cookie, tracker_domain
|
||||
// = registrableSocial(Domain= attr OR reqHost); emit IFF tracker_domain != ""
|
||||
// and != src_site (3rd-party).
|
||||
// - Cookie path: only when a Referer/Origin context site exists and differs
|
||||
// from the tracker (= registrableSocial(reqHost)); cap 5 Cookie headers ×
|
||||
// 50 pairs; emit per non-deny-listed cookie with the context site's
|
||||
// consent_state.
|
||||
func socialEdgesFor(macHash string, req *http.Request, resp *http.Response, reqHost, reqURL, ja4 string, cl *consentLog) []socialEdge {
|
||||
if macHash == "" || cl == nil {
|
||||
return nil
|
||||
}
|
||||
srcSite := registrableSocial(reqHost)
|
||||
if srcSite == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Gather the cookie blobs (both directions) for the CMP cookie check, then
|
||||
// fold the consent observation BEFORE deriving consent_state (matches the
|
||||
// addon's ordering: _update_consent_log then _consent_state_for).
|
||||
var setCookies []string
|
||||
if resp != nil {
|
||||
setCookies = resp.Header.Values("Set-Cookie")
|
||||
}
|
||||
var reqCookies []string
|
||||
if req != nil {
|
||||
reqCookies = req.Header.Values("Cookie")
|
||||
}
|
||||
blobs := make([]string, 0, len(reqCookies)+len(setCookies))
|
||||
blobs = append(blobs, reqCookies...)
|
||||
blobs = append(blobs, setCookies...)
|
||||
cl.update(macHash, srcSite, reqURL, blobs)
|
||||
consentState := cl.stateFor(macHash, srcSite)
|
||||
|
||||
var edges []socialEdge
|
||||
|
||||
// Set-Cookie path — first 50 lines (matches the addon's [:50]).
|
||||
for i, sc := range setCookies {
|
||||
if i >= 50 {
|
||||
break
|
||||
}
|
||||
name, value, ok := parseSetCookieNameValue(sc)
|
||||
if !ok || isDenyListed(name) {
|
||||
continue
|
||||
}
|
||||
domainAttr := extractSetCookieDomainAttr(sc)
|
||||
issuer := domainAttr
|
||||
if issuer == "" {
|
||||
issuer = reqHost
|
||||
}
|
||||
trackerDomain := registrableSocial(issuer)
|
||||
if trackerDomain == "" || trackerDomain == srcSite {
|
||||
continue // 1st-party Set-Cookie: not a cross-site tracker signal.
|
||||
}
|
||||
edges = append(edges, socialEdge{
|
||||
ClientMacHash: macHash,
|
||||
SrcSite: srcSite,
|
||||
TrackerDomain: trackerDomain,
|
||||
CookieIDHashVal: cookieIDHash(trackerDomain, name, value),
|
||||
JA4Hash: ja4,
|
||||
ConsentState: consentState,
|
||||
})
|
||||
}
|
||||
|
||||
// Request-Cookie path — only when this request is itself for a 3rd-party
|
||||
// tracker and we have a differing 1st-party context from the Referer/Origin.
|
||||
if len(reqCookies) == 0 {
|
||||
return edges
|
||||
}
|
||||
trackerDomain := registrableSocial(reqHost)
|
||||
if trackerDomain == "" {
|
||||
return edges
|
||||
}
|
||||
ctxSite := srcSiteFromReferer(req)
|
||||
if ctxSite == "" || ctxSite == trackerDomain {
|
||||
return edges
|
||||
}
|
||||
ctxConsent := cl.stateFor(macHash, ctxSite)
|
||||
for i, hdr := range reqCookies {
|
||||
if i >= 5 { // addon caps Cookie headers at [:5]
|
||||
break
|
||||
}
|
||||
pairs := parseCookieHeader(hdr)
|
||||
for j, p := range pairs {
|
||||
if j >= 50 { // and pairs at [:50]
|
||||
break
|
||||
}
|
||||
if isDenyListed(p.name) {
|
||||
continue
|
||||
}
|
||||
edges = append(edges, socialEdge{
|
||||
ClientMacHash: macHash,
|
||||
SrcSite: ctxSite,
|
||||
TrackerDomain: trackerDomain,
|
||||
CookieIDHashVal: cookieIDHash(trackerDomain, p.name, p.value),
|
||||
JA4Hash: ja4,
|
||||
ConsentState: ctxConsent,
|
||||
})
|
||||
}
|
||||
}
|
||||
return edges
|
||||
}
|
||||
|
||||
// ── relay: batch + POST to the portal /__toolbox/social-event ingest ─────────
|
||||
|
||||
const (
|
||||
socialFlushInterval = 10 * time.Second // drain cadence (sibling of adFlushInterval)
|
||||
socialBatchCap = 5000 // max edges held between flushes (drop excess)
|
||||
)
|
||||
|
||||
// socialEventPayload mirrors the portal /__toolbox/social-event JSON contract.
|
||||
type socialEventPayload struct {
|
||||
Edges []socialEdge `json:"edges"`
|
||||
}
|
||||
|
||||
func (p socialEventPayload) empty() bool { return len(p.Edges) == 0 }
|
||||
|
||||
// socialRelay buffers extracted edges and flushes them to the portal. Bounded:
|
||||
// once the buffer holds socialBatchCap edges, NEW edges are dropped until the
|
||||
// next flush clears it (a dead portal can never grow memory unbounded). Edges
|
||||
// carry ONLY the cookieIDHash — never raw values (privacy/CSPN).
|
||||
type socialRelay struct {
|
||||
mu sync.Mutex
|
||||
buf []socialEdge
|
||||
}
|
||||
|
||||
func newSocialRelay() *socialRelay { return &socialRelay{} }
|
||||
|
||||
// add appends edges to the buffer under the cap. Never blocks the flow.
|
||||
func (s *socialRelay) add(edges ...socialEdge) {
|
||||
if len(edges) == 0 {
|
||||
return
|
||||
}
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
for _, e := range edges {
|
||||
if len(s.buf) >= socialBatchCap {
|
||||
return
|
||||
}
|
||||
s.buf = append(s.buf, e)
|
||||
}
|
||||
}
|
||||
|
||||
// snapshot atomically reads-and-clears the buffer.
|
||||
func (s *socialRelay) snapshot() socialEventPayload {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if len(s.buf) == 0 {
|
||||
return socialEventPayload{}
|
||||
}
|
||||
p := socialEventPayload{Edges: s.buf}
|
||||
s.buf = nil
|
||||
return p
|
||||
}
|
||||
|
||||
// socialEventClient is the short-timeout fire-and-forget client for the
|
||||
// social-event POST (sibling of adEventClient). Never follows redirects (SSRF
|
||||
// hygiene); tight timeout so a slow portal can't stall the flusher.
|
||||
var socialEventClient = &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
CheckRedirect: func(*http.Request, []*http.Request) error { return http.ErrUseLastResponse },
|
||||
}
|
||||
|
||||
// flushOnce snapshots the buffer and, if non-empty, POSTs it to the portal's
|
||||
// /__toolbox/social-event ingest. Best-effort: any error is swallowed with at
|
||||
// most a log line — the engine must never block on the portal. Returns the
|
||||
// flushed payload so the test can assert the snapshot/clear + shape.
|
||||
func (s *socialRelay) flushOnce(portal string) socialEventPayload {
|
||||
p := s.snapshot()
|
||||
if p.empty() {
|
||||
return p
|
||||
}
|
||||
buf, err := json.Marshal(p)
|
||||
if err != nil {
|
||||
log.Printf("social-event marshal failed: %v", err)
|
||||
return p
|
||||
}
|
||||
url := portalTargetURL(portal, "/__toolbox/social-event")
|
||||
resp, err := socialEventClient.Post(url, "application/json", bytes.NewReader(buf))
|
||||
if err != nil {
|
||||
log.Printf("social-event post failed for %s: %v", url, err)
|
||||
return p
|
||||
}
|
||||
resp.Body.Close()
|
||||
return p
|
||||
}
|
||||
|
||||
// ── proxy wiring ──────────────────────────────────────────────────────────
|
||||
|
||||
// socialEnabled reports whether cross-site correlation is on (--social-relay →
|
||||
// Proxy.socialRelayOn, with the buffer + consent log allocated). Nil-safe so the
|
||||
// CONNECT PoC / tests that build a bare Proxy can call it.
|
||||
func (px *Proxy) socialEnabled() bool {
|
||||
return px != nil && px.socialRelayOn && px.social != nil && px.consent != nil
|
||||
}
|
||||
|
||||
// emitSocial extracts the cross-site tracker edges for a MITM'd flow and buffers
|
||||
// them for the batched portal POST. clientIP is the client's peer IP; the per-
|
||||
// client identity is the WG persona hash (macHashOf) — NOT the raw-IP fallback,
|
||||
// so non-WG flows produce no edges, exactly like the Python addon's
|
||||
// _client_mac_hash gate. Gated, pure (the buffer.add is O(1) under a short
|
||||
// mutex), never blocks the flow. reqURL feeds the CMP loader-fragment check.
|
||||
func (px *Proxy) emitSocial(clientIP, host string, req *http.Request, resp *http.Response) {
|
||||
if !px.socialEnabled() || req == nil {
|
||||
return
|
||||
}
|
||||
macHash := macHashOf(clientIP)
|
||||
if macHash == "" {
|
||||
return // known R3 WG peers only (addon: `if not mac_hash: return`)
|
||||
}
|
||||
reqURL := req.URL.String()
|
||||
edges := socialEdgesFor(macHash, req, resp, host, reqURL, "", px.consent)
|
||||
px.social.add(edges...)
|
||||
}
|
||||
|
||||
// runFlusher is the background flusher goroutine: every socialFlushInterval it
|
||||
// drains the buffer to the portal. Start once from main(); runs for the process
|
||||
// lifetime.
|
||||
func (s *socialRelay) runFlusher(portal string) {
|
||||
t := time.NewTicker(socialFlushInterval)
|
||||
defer t.Stop()
|
||||
for range t.C {
|
||||
s.flushOnce(portal)
|
||||
}
|
||||
}
|
||||
297
packages/secubox-toolbox-ng/cmd/sbxmitm/social_test.go
Normal file
297
packages/secubox-toolbox-ng/cmd/sbxmitm/social_test.go
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
// SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||
// Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||
//
|
||||
// Cross-engine SOCIAL parity + decision harness — Go side (#662).
|
||||
//
|
||||
// Anti-rig: loads testdata/social-cookie-id-fixtures.json (GENERATED by the real
|
||||
// secubox_toolbox.social.cookie_id_hash) and asserts cookieIDHash reproduces
|
||||
// every `expect` byte-for-byte — Python is the source of truth, exactly like the
|
||||
// jar parity harness. The Python side is tests/test_social_parity.py.
|
||||
//
|
||||
// The rest exercises the ported decision surface: deny-list, registrableSocial
|
||||
// (the addon flavour, NOT policy.registrable), the 3rd-party Set-Cookie + Cookie
|
||||
// edge extraction, consent_state classification, and the relay buffer/flush.
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type socialCookieFixture struct {
|
||||
TrackerDomain string `json:"tracker_domain"`
|
||||
CookieName string `json:"cookie_name"`
|
||||
CookieValue string `json:"cookie_value"`
|
||||
Expect string `json:"expect"`
|
||||
Why string `json:"why"`
|
||||
}
|
||||
|
||||
type socialCookieFile struct {
|
||||
Fixtures []socialCookieFixture `json:"fixtures"`
|
||||
}
|
||||
|
||||
// TestCookieIDHashParity: cookieIDHash == the Python-generated expect for every
|
||||
// fixture. This is the anti-rig that proves the Go hash is byte-identical to
|
||||
// social.cookie_id_hash (lower-case domain+name, raw value, NUL separators).
|
||||
func TestCookieIDHashParity(t *testing.T) {
|
||||
dir := testdataDir(t)
|
||||
raw, err := os.ReadFile(filepath.Join(dir, "social-cookie-id-fixtures.json"))
|
||||
if err != nil {
|
||||
t.Fatalf("read social fixtures: %v", err)
|
||||
}
|
||||
var f socialCookieFile
|
||||
if err := json.Unmarshal(raw, &f); err != nil {
|
||||
t.Fatalf("parse social fixtures: %v", err)
|
||||
}
|
||||
if len(f.Fixtures) == 0 {
|
||||
t.Fatal("no social cookie-id fixtures")
|
||||
}
|
||||
for _, fx := range f.Fixtures {
|
||||
got := cookieIDHash(fx.TrackerDomain, fx.CookieName, fx.CookieValue)
|
||||
if got != fx.Expect {
|
||||
t.Errorf("cookieIDHash(%q,%q,%q)=%q want %q (%s)",
|
||||
fx.TrackerDomain, fx.CookieName, fx.CookieValue, got, fx.Expect, fx.Why)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestCookieIDHashFolding: domain+name are lower-cased but the value is NOT —
|
||||
// the explicit invariant the store contract pins.
|
||||
func TestCookieIDHashFolding(t *testing.T) {
|
||||
if cookieIDHash("DoubleClick.NET", "IDE", "AbC") != cookieIDHash("doubleclick.net", "ide", "AbC") {
|
||||
t.Error("domain+name must be case-folded")
|
||||
}
|
||||
if cookieIDHash("d.net", "n", "AbC") == cookieIDHash("d.net", "n", "abc") {
|
||||
t.Error("value must NOT be case-folded")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsDenyListed(t *testing.T) {
|
||||
deny := []string{"PHPSESSID", "session", " csrftoken ", "__cf_bm", "consent", "locale", "", " "}
|
||||
for _, n := range deny {
|
||||
if !isDenyListed(n) {
|
||||
t.Errorf("isDenyListed(%q) = false, want true", n)
|
||||
}
|
||||
}
|
||||
allow := []string{"IDE", "_ga", "_fbp", "uid", "datr"}
|
||||
for _, n := range allow {
|
||||
if isDenyListed(n) {
|
||||
t.Errorf("isDenyListed(%q) = true, want false", n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestRegistrableSocial: the addon flavour — IP literals pass through (NOT ""),
|
||||
// no port strip semantics needed, the larger multi-label table.
|
||||
func TestRegistrableSocial(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"www.lemonde.fr": "lemonde.fr",
|
||||
"cdn.api.example.co.uk": "example.co.uk",
|
||||
"tracker.com": "tracker.com",
|
||||
"a.b.c.doubleclick.net": "doubleclick.net",
|
||||
"WWW.Example.COM": "example.com",
|
||||
"sub.example.com.au": "example.com.au",
|
||||
"192.168.1.1": "192.168.1.1", // IP literal as-is (addon), store drops later
|
||||
".trailing.dot.net.": "dot.net",
|
||||
"single": "single",
|
||||
"": "",
|
||||
}
|
||||
for in, want := range cases {
|
||||
if got := registrableSocial(in); got != want {
|
||||
t.Errorf("registrableSocial(%q)=%q want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSetCookieNameValue(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
name, value string
|
||||
ok bool
|
||||
}{
|
||||
{"IDE=AHWqTUm; Domain=.doubleclick.net; Path=/", "IDE", "AHWqTUm", true},
|
||||
{" _ga = GA1.2.3 ; Max-Age=63", "_ga", "GA1.2.3", true},
|
||||
{"Secure; HttpOnly", "", "", false},
|
||||
{"=novalue", "", "", false},
|
||||
{"empty=", "empty", "", true},
|
||||
}
|
||||
for _, c := range cases {
|
||||
n, v, ok := parseSetCookieNameValue(c.in)
|
||||
if n != c.name || v != c.value || ok != c.ok {
|
||||
t.Errorf("parseSetCookieNameValue(%q)=(%q,%q,%v) want (%q,%q,%v)", c.in, n, v, ok, c.name, c.value, c.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractSetCookieDomainAttr(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"IDE=x; Domain=.doubleclick.net; Path=/": "doubleclick.net",
|
||||
"a=b; domain=Example.COM": "example.com",
|
||||
"a=b; Path=/": "",
|
||||
"a=b": "",
|
||||
"a=domainlike=1; Path=/": "", // value containing "domain" is not the attr
|
||||
}
|
||||
for in, want := range cases {
|
||||
if got := extractSetCookieDomainAttr(in); got != want {
|
||||
t.Errorf("extractSetCookieDomainAttr(%q)=%q want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSrcSiteFromReferer(t *testing.T) {
|
||||
req := httptest.NewRequest("GET", "https://tracker.io/p.gif", nil)
|
||||
if got := srcSiteFromReferer(req); got != "" {
|
||||
t.Errorf("no referer → %q want \"\"", got)
|
||||
}
|
||||
req.Header.Set("Referer", "https://www.lemonde.fr/article?x=1")
|
||||
if got := srcSiteFromReferer(req); got != "lemonde.fr" {
|
||||
t.Errorf("referer → %q want lemonde.fr", got)
|
||||
}
|
||||
req.Header.Del("Referer")
|
||||
req.Header.Set("Origin", "https://news.example.co.uk")
|
||||
if got := srcSiteFromReferer(req); got != "example.co.uk" {
|
||||
t.Errorf("origin fallback → %q want example.co.uk", got)
|
||||
}
|
||||
}
|
||||
|
||||
// helper: build a response with the given Set-Cookie lines.
|
||||
func respWithSetCookies(lines ...string) *http.Response {
|
||||
h := http.Header{}
|
||||
for _, l := range lines {
|
||||
h.Add("Set-Cookie", l)
|
||||
}
|
||||
return &http.Response{Header: h}
|
||||
}
|
||||
|
||||
// TestSocialEdgesThirdParty: a 3rd-party Set-Cookie (Domain= a different eTLD+1)
|
||||
// on a 1st-party page yields one edge with the right src_site/tracker_domain.
|
||||
func TestSocialEdgesThirdParty(t *testing.T) {
|
||||
req := httptest.NewRequest("GET", "https://ads.doubleclick.net/pixel", nil)
|
||||
resp := respWithSetCookies("IDE=AHWqTUm; Domain=.doubleclick.net; Path=/")
|
||||
// reqHost is the responding host (doubleclick) — but src_site is also derived
|
||||
// from it; so to model a TRUE 3rd-party we use the Domain attr differing from
|
||||
// the request host's registrable. Here both are doubleclick.net → 1st-party,
|
||||
// expect NO edge.
|
||||
edges := socialEdgesFor("machash1", req, resp, "ads.doubleclick.net", "https://ads.doubleclick.net/pixel", "", newConsentLog())
|
||||
if len(edges) != 0 {
|
||||
t.Fatalf("1st-party Set-Cookie should yield 0 edges, got %d", len(edges))
|
||||
}
|
||||
|
||||
// Now a genuine 3rd-party: the page host is lemonde.fr, a Set-Cookie with
|
||||
// Domain=.doubleclick.net (the embedded tracker setting on its own domain via
|
||||
// the request being to doubleclick but src derived from referer is the
|
||||
// request-cookie path; the Set-Cookie path uses reqHost as src). Model the
|
||||
// addon's Set-Cookie path: reqHost=lemonde.fr, Domain attr=doubleclick.net.
|
||||
resp2 := respWithSetCookies("IDE=AHWqTUm; Domain=.doubleclick.net; Path=/")
|
||||
edges = socialEdgesFor("machash1", req, resp2, "www.lemonde.fr", "https://www.lemonde.fr/", "", newConsentLog())
|
||||
if len(edges) != 1 {
|
||||
t.Fatalf("3rd-party Set-Cookie should yield 1 edge, got %d", len(edges))
|
||||
}
|
||||
e := edges[0]
|
||||
if e.SrcSite != "lemonde.fr" || e.TrackerDomain != "doubleclick.net" {
|
||||
t.Errorf("edge src/tracker = %q/%q want lemonde.fr/doubleclick.net", e.SrcSite, e.TrackerDomain)
|
||||
}
|
||||
if e.CookieIDHashVal != cookieIDHash("doubleclick.net", "IDE", "AHWqTUm") {
|
||||
t.Errorf("edge cookie id hash mismatch: %q", e.CookieIDHashVal)
|
||||
}
|
||||
if e.ConsentState != "none_seen" {
|
||||
t.Errorf("consent_state = %q want none_seen", e.ConsentState)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSocialEdgesDenyAndIP: deny-listed names produce no edge; IP-literal hosts
|
||||
// produce no edge (registrableSocial returns the IP, store drops it — but src
|
||||
// derivation: an IP src_site == IP tracker → not 3rd party anyway).
|
||||
func TestSocialEdgesDenyAndIP(t *testing.T) {
|
||||
req := httptest.NewRequest("GET", "https://x/", nil)
|
||||
resp := respWithSetCookies("PHPSESSID=abc; Domain=.doubleclick.net")
|
||||
edges := socialEdgesFor("m", req, resp, "www.lemonde.fr", "https://www.lemonde.fr/", "", newConsentLog())
|
||||
if len(edges) != 0 {
|
||||
t.Fatalf("deny-listed cookie should yield 0 edges, got %d", len(edges))
|
||||
}
|
||||
// empty mac hash → no edges (R3-only gate)
|
||||
if e := socialEdgesFor("", req, respWithSetCookies("IDE=x; Domain=.doubleclick.net"), "www.lemonde.fr", "u", "", newConsentLog()); len(e) != 0 {
|
||||
t.Fatalf("empty macHash should yield 0 edges, got %d", len(e))
|
||||
}
|
||||
}
|
||||
|
||||
// TestSocialEdgesRequestCookiePath: a request TO a tracker carrying a Cookie,
|
||||
// with a Referer to a different 1st-party, yields an edge attributed to the
|
||||
// referer's site.
|
||||
func TestSocialEdgesRequestCookiePath(t *testing.T) {
|
||||
req := httptest.NewRequest("GET", "https://ads.doubleclick.net/px", nil)
|
||||
req.Header.Set("Cookie", "IDE=AHWqTUm; session=secret")
|
||||
req.Header.Set("Referer", "https://www.lemonde.fr/article")
|
||||
// No Set-Cookie in the response; src_site = registrableSocial(reqHost) =
|
||||
// doubleclick.net; the Set-Cookie loop emits nothing; the request-Cookie tail
|
||||
// uses ctxSite=lemonde.fr (referer) != tracker doubleclick.net → edge. The
|
||||
// deny-listed `session` cookie is skipped, so exactly 1 edge (IDE).
|
||||
edges := socialEdgesFor("m", req, &http.Response{Header: http.Header{}}, "ads.doubleclick.net", "https://ads.doubleclick.net/px", "", newConsentLog())
|
||||
if len(edges) != 1 {
|
||||
t.Fatalf("request-cookie path should yield 1 edge, got %d", len(edges))
|
||||
}
|
||||
if edges[0].SrcSite != "lemonde.fr" || edges[0].TrackerDomain != "doubleclick.net" {
|
||||
t.Errorf("edge = %q/%q want lemonde.fr/doubleclick.net", edges[0].SrcSite, edges[0].TrackerDomain)
|
||||
}
|
||||
}
|
||||
|
||||
// TestConsentLog: loader fragment → pre_consent; CMP cookie → post_consent.
|
||||
func TestConsentLog(t *testing.T) {
|
||||
cl := newConsentLog()
|
||||
if got := cl.stateFor("m", "lemonde.fr"); got != "none_seen" {
|
||||
t.Errorf("fresh → %q want none_seen", got)
|
||||
}
|
||||
// CMP loader request observed (no consent cookie yet) → pre_consent.
|
||||
cl.update("m", "lemonde.fr", "https://cdn.cookielaw.org/consent/scripttemplates/otSDKStub.js", nil)
|
||||
if got := cl.stateFor("m", "lemonde.fr"); got != "pre_consent" {
|
||||
t.Errorf("after CMP loader → %q want pre_consent", got)
|
||||
}
|
||||
// CMP consent cookie observed → post_consent.
|
||||
cl.update("m", "lemonde.fr", "https://www.lemonde.fr/", []string{"OptanonConsent=isGpcEnabled=0; Path=/"})
|
||||
if got := cl.stateFor("m", "lemonde.fr"); got != "post_consent" {
|
||||
t.Errorf("after CMP cookie → %q want post_consent", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSocialRelayFlush: the buffer batches edges and flushOnce POSTs them to the
|
||||
// portal /__toolbox/social-event, then clears.
|
||||
func TestSocialRelayFlush(t *testing.T) {
|
||||
var got socialEventPayload
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/__toolbox/social-event" {
|
||||
t.Errorf("unexpected path %q", r.URL.Path)
|
||||
}
|
||||
_ = json.NewDecoder(r.Body).Decode(&got)
|
||||
w.WriteHeader(204)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
s := newSocialRelay()
|
||||
s.add(socialEdge{ClientMacHash: "m", SrcSite: "a.fr", TrackerDomain: "t.com", CookieIDHashVal: "deadbeef", ConsentState: "none_seen"})
|
||||
p := s.flushOnce(srv.URL)
|
||||
if len(p.Edges) != 1 || len(got.Edges) != 1 {
|
||||
t.Fatalf("flush sent %d / server got %d, want 1/1", len(p.Edges), len(got.Edges))
|
||||
}
|
||||
if got.Edges[0].TrackerDomain != "t.com" {
|
||||
t.Errorf("server edge tracker = %q want t.com", got.Edges[0].TrackerDomain)
|
||||
}
|
||||
// Buffer cleared: a second flush sends nothing.
|
||||
if p2 := s.flushOnce(srv.URL); !p2.empty() {
|
||||
t.Errorf("second flush should be empty, got %d edges", len(p2.Edges))
|
||||
}
|
||||
}
|
||||
|
||||
// TestSocialRelayCap: the buffer never exceeds socialBatchCap.
|
||||
func TestSocialRelayCap(t *testing.T) {
|
||||
s := newSocialRelay()
|
||||
for i := 0; i < socialBatchCap+100; i++ {
|
||||
s.add(socialEdge{ClientMacHash: "m", SrcSite: "a", TrackerDomain: "t", CookieIDHashVal: "h", ConsentState: "none_seen"})
|
||||
}
|
||||
if got := s.snapshot(); len(got.Edges) != socialBatchCap {
|
||||
t.Errorf("buffer held %d edges, want cap %d", len(got.Edges), socialBatchCap)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,22 @@
|
|||
secubox-toolbox-ng (0.1.11-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* social: ALSO correlate on the block path — blocked 3rd-party trackers still
|
||||
carry the browser's request Cookie (the cross-site evidence); without this
|
||||
the /social graph misses the very trackers it exists to expose (they're 204'd
|
||||
before the allow/mitm correlation). resp=nil request-only, hash-only. (ref #662)
|
||||
|
||||
-- Gerald KERMA <devel@cybermind.fr> Thu, 19 Jun 2026 11:55:00 +0000
|
||||
|
||||
secubox-toolbox-ng (0.1.10-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* social: faithfully port the in-process social_graph correlation — the engine
|
||||
computes cross-site tracker edges (byte-exact cookie_id_hash, deny-list,
|
||||
eTLD+1 3rd-party check, CMP consent_state) and relays HASH-ONLY edges
|
||||
(never raw values, WG-only) to the new portal /__toolbox/social-event →
|
||||
social.record_edge → /social graph un-frozen. --social-relay (default on). (ref #662)
|
||||
|
||||
-- Gerald KERMA <devel@cybermind.fr> Thu, 19 Jun 2026 11:30:00 +0000
|
||||
|
||||
secubox-toolbox-ng (0.1.9-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* telemetry: relay per-flow metadata to the analysis sidecars (dpi /classify,
|
||||
|
|
|
|||
61
packages/secubox-toolbox-ng/testdata/social-cookie-id-fixtures.json
vendored
Normal file
61
packages/secubox-toolbox-ng/testdata/social-cookie-id-fixtures.json
vendored
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
{
|
||||
"_comment": "Cross-engine parity fixtures for social.cookie_id_hash (#662). GENERATED by the real secubox_toolbox.social.cookie_id_hash (Python = source of truth); the Go cookieIDHash MUST reproduce every `expect` byte-for-byte. Note: tracker_domain + cookie_name are LOWER-cased before hashing, the cookie_value is NOT; NUL (0x00) separators; UTF-8 with 'replace' errors. See tests/test_social_parity.py (Python) ↔ social_test.go (Go).",
|
||||
"fixtures": [
|
||||
{
|
||||
"tracker_domain": "doubleclick.net",
|
||||
"cookie_name": "IDE",
|
||||
"cookie_value": "AHWqTUm123",
|
||||
"expect": "8e7fadaeb2584768",
|
||||
"why": "plain ascii"
|
||||
},
|
||||
{
|
||||
"tracker_domain": "DoubleClick.NET",
|
||||
"cookie_name": "ide",
|
||||
"cookie_value": "AHWqTUm123",
|
||||
"expect": "8e7fadaeb2584768",
|
||||
"why": "domain+name UPPER folded, value verbatim -> identical hash to #1 (proves domain+name are lower-cased)"
|
||||
},
|
||||
{
|
||||
"tracker_domain": "doubleclick.net",
|
||||
"cookie_name": "IDE",
|
||||
"cookie_value": "ahwqtum123",
|
||||
"expect": "550317c9729652c2",
|
||||
"why": "value lower-cased DIFFERS from #1 (proves the VALUE is NOT folded)"
|
||||
},
|
||||
{
|
||||
"tracker_domain": "ads.example.com",
|
||||
"cookie_name": "_ga",
|
||||
"cookie_value": "GA1.2.999.111",
|
||||
"expect": "89a398ebd72ee863",
|
||||
"why": "GA cookie"
|
||||
},
|
||||
{
|
||||
"tracker_domain": "tracker.io",
|
||||
"cookie_name": "uid",
|
||||
"cookie_value": "Ünîcødé✓",
|
||||
"expect": "3b4923e9d9bb77a2",
|
||||
"why": "unicode value (utf-8 encoded)"
|
||||
},
|
||||
{
|
||||
"tracker_domain": "tracker.io",
|
||||
"cookie_name": "Ünîcödé",
|
||||
"cookie_value": "val",
|
||||
"expect": "d4db5a0d71216313",
|
||||
"why": "unicode cookie NAME (lower-cased + utf-8)"
|
||||
},
|
||||
{
|
||||
"tracker_domain": "",
|
||||
"cookie_name": "x",
|
||||
"cookie_value": "y",
|
||||
"expect": "2081f4f26135019e",
|
||||
"why": "empty domain still hashes (NUL separators)"
|
||||
},
|
||||
{
|
||||
"tracker_domain": "d.net",
|
||||
"cookie_name": "n",
|
||||
"cookie_value": "",
|
||||
"expect": "b0da6b889cb198a1",
|
||||
"why": "empty value"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -138,6 +138,87 @@ async def toolbox_ad_event(request: Request) -> Response:
|
|||
log.debug("ad-event ingest failed: %s", e)
|
||||
return Response(status_code=204)
|
||||
|
||||
|
||||
# #662 — cross-site cookie-tracker edge ingest from the Go MITM engine (sbxmitm).
|
||||
# The #662 Phase-7 cutover decommissioned the in-process Python social_graph addon
|
||||
# that fed social.record_edge(), so the kbin /social graph (social_edges →
|
||||
# social_nodes/social_links) froze. The engine now computes the SAME 3rd-party
|
||||
# cookie-tracker edges (FAITHFUL port of social_graph.py: deny-list, eTLD+1
|
||||
# 3rd-party check, cookie_id_hash, CMP consent_state) and POSTs a batch here. We
|
||||
# call social.record_edge() per row, which writes raw social_edges; the existing
|
||||
# app.py social_fold_loop folds them into nodes/links.
|
||||
#
|
||||
# Raw cookie VALUES never reach this endpoint — only the truncated cookie_id_hash
|
||||
# (privacy/CSPN; this is exactly why the original ran in-process).
|
||||
#
|
||||
# UNAUTHENTICATED, same trust note as /__toolbox/ad-event: the engine reaches the
|
||||
# portal only over the R3 nft perimeter (loopback / WG ingress).
|
||||
_SOCIAL_EVENT_ROW_CAP = 5000 # bound the edge list so a misbehaving engine can't flood us
|
||||
_SOCIAL_FOLD_DEBOUNCE = 60 # seconds: floor between in-handler safety folds
|
||||
_social_last_fold = 0.0 # module-level throttle timestamp
|
||||
|
||||
|
||||
@router.post("/__toolbox/social-event")
|
||||
async def toolbox_social_event(request: Request) -> Response:
|
||||
"""Ingest a batch of cross-site tracker edges from the Go engine. Best-effort:
|
||||
never 500s the engine (it is fire-and-forget) — always returns 204. See the
|
||||
trust note above for why this is unauthenticated."""
|
||||
global _social_last_fold
|
||||
try:
|
||||
# Body-size guard BEFORE parsing (mirrors /__toolbox/ad-event): the legit
|
||||
# payload (≤5000 edges) is well under 2 MB; reject larger outright so a
|
||||
# misbehaving/compromised WG peer can't pressure portal memory.
|
||||
try:
|
||||
clen = int(request.headers.get("content-length") or 0)
|
||||
except (TypeError, ValueError):
|
||||
clen = 0
|
||||
if clen > 2 * 1024 * 1024:
|
||||
return Response(status_code=204)
|
||||
body = await request.json()
|
||||
if not isinstance(body, dict):
|
||||
return Response(status_code=204)
|
||||
edges = body.get("edges") or []
|
||||
if not isinstance(edges, list):
|
||||
edges = []
|
||||
edges = edges[:_SOCIAL_EVENT_ROW_CAP]
|
||||
|
||||
from . import social as _social
|
||||
|
||||
recorded = 0
|
||||
for e in edges:
|
||||
if not isinstance(e, dict):
|
||||
continue
|
||||
try:
|
||||
_social.record_edge(
|
||||
client_mac_hash=e.get("client_mac_hash") or "",
|
||||
src_site=e.get("src_site") or "",
|
||||
tracker_domain=e.get("tracker_domain") or "",
|
||||
cookie_id_hash_val=e.get("cookie_id_hash_val") or "",
|
||||
ja4_hash=e.get("ja4_hash") or None,
|
||||
consent_state=e.get("consent_state") or "none_seen",
|
||||
)
|
||||
recorded += 1
|
||||
except Exception as row_err: # one bad row never fails the batch
|
||||
log.debug("social-event row failed: %s", row_err)
|
||||
|
||||
# Safety fold: the app.py social_fold_loop already folds every 5 min, but
|
||||
# fold here too (debounced to ≤ once / 60 s via a module-level timestamp)
|
||||
# so a freshly-ingested edge surfaces in the d3 graph promptly even between
|
||||
# loop ticks. Cheap (indexed window scan) and self-throttling; a fold
|
||||
# failure is swallowed (the loop will catch up).
|
||||
if recorded:
|
||||
now = time.time()
|
||||
if now - _social_last_fold >= _SOCIAL_FOLD_DEBOUNCE:
|
||||
_social_last_fold = now
|
||||
try:
|
||||
_social.fold_recent(window_seconds=600)
|
||||
except Exception as fold_err:
|
||||
log.debug("social-event fold failed: %s", fold_err)
|
||||
except Exception as e: # never raise into the engine's fire-and-forget POST
|
||||
log.debug("social-event ingest failed: %s", e)
|
||||
return Response(status_code=204)
|
||||
|
||||
|
||||
# Cap geo/UA enrichment on /admin/clients/rich to the rows the UI actually shows
|
||||
# (top-5 + headroom). Beyond this, clients get bare fields — avoids ~51 cached
|
||||
# geo lookups per poll (ref #644).
|
||||
|
|
|
|||
48
packages/secubox-toolbox/tests/test_social_parity.py
Normal file
48
packages/secubox-toolbox/tests/test_social_parity.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||
"""Cross-engine SOCIAL parity harness — Python side (#662).
|
||||
|
||||
Loads the SAME ``social-cookie-id-fixtures.json`` the Go core uses
|
||||
(``../secubox-toolbox-ng/testdata``) and asserts ``social.cookie_id_hash``
|
||||
reproduces each fixture's ``expect``.
|
||||
|
||||
Python is the source of truth: the ``expect`` values were GENERATED by this very
|
||||
``social.cookie_id_hash``. The Go side (cmd/sbxmitm/social_test.go) must
|
||||
reproduce them byte-for-byte. Both files reading identical inputs is what makes
|
||||
the parity meaningful — the same anti-rig discipline as the jar parity harness.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from secubox_toolbox import social
|
||||
|
||||
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
# tests/ → packages/secubox-toolbox → packages → packages/secubox-toolbox-ng
|
||||
_NG_TESTDATA = os.path.normpath(
|
||||
os.path.join(_HERE, "..", "..", "secubox-toolbox-ng", "testdata"))
|
||||
_FIXTURES = os.path.join(_NG_TESTDATA, "social-cookie-id-fixtures.json")
|
||||
|
||||
|
||||
def _load():
|
||||
with open(_FIXTURES, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def test_cookie_id_hash_parity():
|
||||
data = _load()
|
||||
assert data["fixtures"], "no fixtures"
|
||||
failures = []
|
||||
for fx in data["fixtures"]:
|
||||
got = social.cookie_id_hash(
|
||||
fx["tracker_domain"], fx["cookie_name"], fx["cookie_value"])
|
||||
if got != fx["expect"]:
|
||||
failures.append((fx, got))
|
||||
assert not failures, f"cookie_id_hash drift: {failures}"
|
||||
|
||||
|
||||
def test_cookie_id_hash_invariants():
|
||||
# domain + name are lower-cased; the value is NOT.
|
||||
assert social.cookie_id_hash("A.NET", "N", "v") == social.cookie_id_hash("a.net", "n", "v")
|
||||
assert social.cookie_id_hash("a.net", "n", "V") != social.cookie_id_hash("a.net", "n", "v")
|
||||
Loading…
Reference in New Issue
Block a user