mirror of
https://github.com/CyberMind-FR/secubox-deb.git
synced 2026-06-29 15:31:31 +00:00
Compare commits
10 Commits
84f0a37fdf
...
7355e606ca
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7355e606ca | ||
| e594f681a4 | |||
| 0db96a8beb | |||
| 667d8a09e0 | |||
| 170619053f | |||
|
|
25f6c19586 | ||
| 6dcf978e66 | |||
| df052796d9 | |||
| 5fc8785d68 | |||
| 25a3afaff1 |
153
packages/secubox-toolbox-ng/cmd/sbxmitm/jar.go
Normal file
153
packages/secubox-toolbox-ng/cmd/sbxmitm/jar.go
Normal file
|
|
@ -0,0 +1,153 @@
|
||||||
|
// SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||||
|
// Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||||
|
//
|
||||||
|
// SecuBox-Deb :: toolbox-ng :: anti-track fake-identity jar (#662 Phase 4)
|
||||||
|
//
|
||||||
|
// Byte-exact port of the Python anti-track HMAC fake-identity jar
|
||||||
|
// (packages/secubox-toolbox/secubox_toolbox/privacy.py: _jar_key / _shape /
|
||||||
|
// fake_id). Python is the source of truth; this mirrors it exactly, proven by
|
||||||
|
// the cross-engine parity harness (testdata/jar-fixtures.json + jar_test.go ↔
|
||||||
|
// tests/test_jar_parity.py).
|
||||||
|
//
|
||||||
|
// The jar mints a STABLE fabricated cookie value per (client, tracker,
|
||||||
|
// cookie_name): a deterministic HMAC-SHA256 of stable inputs, never derived
|
||||||
|
// from real client data, identical across workers and restarts ('rémanent').
|
||||||
|
//
|
||||||
|
// Pure standard library — no external modules, no go.sum.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/hmac"
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/binary"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// _privacyMultiTLD mirrors privacy._MULTI_TLD EXACTLY (NOT ad_ghost._2L — they
|
||||||
|
// differ: privacy has ac.uk/com.cn/com.tr/gov.uk/org.uk, lacks gouv.fr; and
|
||||||
|
// privacy returns IP literals as-is where ad_ghost returns None). The jar MUST
|
||||||
|
// use the privacy-flavored registrable so fakeID is byte-identical to
|
||||||
|
// privacy.fake_id across engines (else the fake persona mismatches at cutover).
|
||||||
|
var _privacyMultiTLD = map[string]bool{
|
||||||
|
"ac.uk": true, "co.jp": true, "co.nz": true, "co.uk": true, "co.za": true,
|
||||||
|
"com.au": true, "com.br": true, "com.cn": true, "com.tr": true,
|
||||||
|
"gov.uk": true, "org.uk": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// registrableJar mirrors privacy.registrable (NOT policy.go's ad_ghost-flavored
|
||||||
|
// registrable). eTLD+1 with the privacy multi-TLD table; IP literals returned
|
||||||
|
// as-is.
|
||||||
|
func registrableJar(host string) string {
|
||||||
|
host = strings.TrimRight(strings.ToLower(strings.TrimSpace(host)), ".")
|
||||||
|
if host == "" {
|
||||||
|
return host
|
||||||
|
}
|
||||||
|
allDigit := true
|
||||||
|
for _, c := range strings.ReplaceAll(host, ".", "") {
|
||||||
|
if c < '0' || c > '9' {
|
||||||
|
allDigit = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if allDigit {
|
||||||
|
return host // IP literal → as-is (matches privacy.registrable)
|
||||||
|
}
|
||||||
|
parts := strings.Split(host, ".")
|
||||||
|
if len(parts) <= 2 {
|
||||||
|
return host
|
||||||
|
}
|
||||||
|
last2 := strings.Join(parts[len(parts)-2:], ".")
|
||||||
|
if _privacyMultiTLD[last2] {
|
||||||
|
return strings.Join(parts[len(parts)-3:], ".")
|
||||||
|
}
|
||||||
|
return last2
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadJarKey reads the seed key file, trimming surrounding whitespace exactly
|
||||||
|
// like Python's `Path(JAR_KEY_PATH).read_bytes().strip()`.
|
||||||
|
//
|
||||||
|
// Returns nil when the file is missing/unreadable OR strips to empty — both of
|
||||||
|
// which mirror Python's `_jar_key()` returning None (which makes fake_id return
|
||||||
|
// None / fakeID return ("", false)). Note: strings.TrimSpace and Python's
|
||||||
|
// bytes.strip() trim the SAME ASCII whitespace set on byte boundaries
|
||||||
|
// (space, \t, \n, \r, \v=0x0b, \f=0x0c). The canonical key's first/last bytes
|
||||||
|
// must be non-whitespace, which the test fixture guarantees.
|
||||||
|
func loadJarKey(path string) []byte {
|
||||||
|
raw, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// strings.TrimSpace over the byte string trims the same ASCII whitespace
|
||||||
|
// bytes Python's bytes.strip() does (it also strips Unicode space runes,
|
||||||
|
// but a key file is raw bytes with ASCII-whitespace padding, so the two
|
||||||
|
// agree on the edge bytes the fixture uses).
|
||||||
|
key := []byte(strings.TrimSpace(string(raw)))
|
||||||
|
if len(key) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return key
|
||||||
|
}
|
||||||
|
|
||||||
|
// shape renders the HMAC digest into the cookie's observed format so the
|
||||||
|
// target accepts it. Mirrors privacy._shape EXACTLY:
|
||||||
|
//
|
||||||
|
// n = (name or "").lower()
|
||||||
|
// i = int.from_bytes(digest[:8], "big"); j = int.from_bytes(digest[8:16], "big")
|
||||||
|
// if n.startswith("_ga"): return "GA1.2.%d.%d" % (i % 1e10, j % 1e10)
|
||||||
|
// if n in ("_fbp",): return "fb.1.%d.%d" % (i % 1e13, j % 1e10)
|
||||||
|
// if n in ("uuid","uid","_pk_id") or len(name) >= 32:
|
||||||
|
// h = digest.hex(); return "%s-%s-%s-%s-%s" % (h[:8],h[8:12],h[12:16],h[16:20],h[20:32])
|
||||||
|
// return digest.hex()[:32]
|
||||||
|
//
|
||||||
|
// Note: Python `len(name)` is the RUNE (character) length, not byte length;
|
||||||
|
// we use len([]rune(name)) to match. The GA1/fb int math is on a uint64 read
|
||||||
|
// big-endian from the first/second 8 bytes; every modulus is < 2^64 so the
|
||||||
|
// Go uint64 computation matches Python's non-negative int, and fmt "%d" of a
|
||||||
|
// uint64 matches Python's "%d".
|
||||||
|
func shape(name string, digest []byte) string {
|
||||||
|
n := strings.ToLower(name)
|
||||||
|
i := binary.BigEndian.Uint64(digest[:8])
|
||||||
|
j := binary.BigEndian.Uint64(digest[8:16])
|
||||||
|
switch {
|
||||||
|
case strings.HasPrefix(n, "_ga"):
|
||||||
|
return fmt.Sprintf("GA1.2.%d.%d", i%10_000_000_000, j%10_000_000_000)
|
||||||
|
case n == "_fbp":
|
||||||
|
return fmt.Sprintf("fb.1.%d.%d", i%10_000_000_000_000, j%10_000_000_000)
|
||||||
|
case n == "uuid" || n == "uid" || n == "_pk_id" || len([]rune(name)) >= 32:
|
||||||
|
h := hex.EncodeToString(digest)
|
||||||
|
return fmt.Sprintf("%s-%s-%s-%s-%s", h[:8], h[8:12], h[12:16], h[16:20], h[20:32])
|
||||||
|
default:
|
||||||
|
return hex.EncodeToString(digest)[:32]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fakeID returns a stable fabricated cookie value for (clientHash, tracker,
|
||||||
|
// cookieName). Mirrors privacy.fake_id EXACTLY:
|
||||||
|
//
|
||||||
|
// if not key or not client_hash or not tracker: return None
|
||||||
|
// msg = ("%s|%s|%s" % (client_hash, registrable(tracker), cookie_name)).encode()
|
||||||
|
// digest = hmac.new(key, msg, sha256).digest()
|
||||||
|
// return _shape(cookie_name, digest)
|
||||||
|
//
|
||||||
|
// Returns ("", false) for every case where Python returns None: empty key,
|
||||||
|
// empty clientHash, or empty tracker.
|
||||||
|
//
|
||||||
|
// IMPORTANT: this uses registrableJar (privacy.registrable flavor), NOT the
|
||||||
|
// ad_ghost-flavored registrable() in policy.go. They DIVERGE (gov.uk vs gouv.fr,
|
||||||
|
// IP literals) — `privacy.fake_id` folds the tracker via privacy.registrable, so
|
||||||
|
// the jar MUST too or the fake persona mismatches across engines at cutover.
|
||||||
|
// Do NOT "consolidate" to policy.registrable; the divergence-guard fixtures
|
||||||
|
// (ad.example.gov.uk, 9.9.9.9) will fail if you do.
|
||||||
|
func fakeID(clientHash, tracker, cookieName string, key []byte) (string, bool) {
|
||||||
|
if len(key) == 0 || clientHash == "" || tracker == "" {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
msg := fmt.Sprintf("%s|%s|%s", clientHash, registrableJar(tracker), cookieName)
|
||||||
|
mac := hmac.New(sha256.New, key)
|
||||||
|
mac.Write([]byte(msg))
|
||||||
|
digest := mac.Sum(nil)
|
||||||
|
return shape(cookieName, digest), true
|
||||||
|
}
|
||||||
141
packages/secubox-toolbox-ng/cmd/sbxmitm/jar_test.go
Normal file
141
packages/secubox-toolbox-ng/cmd/sbxmitm/jar_test.go
Normal file
|
|
@ -0,0 +1,141 @@
|
||||||
|
// SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||||
|
// Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||||
|
//
|
||||||
|
// Cross-engine JAR parity harness — Go side (#662 Phase 4).
|
||||||
|
//
|
||||||
|
// Loads testdata/jar-fixtures.json + the fixed test key (testdata/jar-test.key,
|
||||||
|
// NOT the real /etc key), computes fakeID per fixture, and asserts == the
|
||||||
|
// fixture's expect. The Python side (../secubox-toolbox/tests/test_jar_parity.py)
|
||||||
|
// loads the SAME files and drives privacy.fake_id; both must agree → the HMAC
|
||||||
|
// fake-identity jar is byte-exact across engines. Python is the source of truth.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
type jarFixture struct {
|
||||||
|
Client string `json:"client"`
|
||||||
|
Tracker string `json:"tracker"`
|
||||||
|
CookieName string `json:"cookie_name"`
|
||||||
|
Expect string `json:"expect"`
|
||||||
|
Why string `json:"why"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type jarFile struct {
|
||||||
|
KeyFile string `json:"key_file"`
|
||||||
|
KeyHex string `json:"key_hex"`
|
||||||
|
Fixtures []jarFixture `json:"fixtures"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadJarFile(t *testing.T) (jarFile, string) {
|
||||||
|
t.Helper()
|
||||||
|
dir := testdataDir(t) // shared with policy_test.go (cmd/sbxmitm → ../../testdata)
|
||||||
|
raw, err := os.ReadFile(filepath.Join(dir, "jar-fixtures.json"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read jar fixtures: %v", err)
|
||||||
|
}
|
||||||
|
var jf jarFile
|
||||||
|
if err := json.Unmarshal(raw, &jf); err != nil {
|
||||||
|
t.Fatalf("parse jar fixtures: %v", err)
|
||||||
|
}
|
||||||
|
if len(jf.Fixtures) == 0 {
|
||||||
|
t.Fatal("no jar fixtures")
|
||||||
|
}
|
||||||
|
return jf, dir
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJarKeyLoad: loadJarKey strips the file's surrounding whitespace back to
|
||||||
|
// the canonical key declared in key_hex (proves .strip()/TrimSpace parity).
|
||||||
|
func TestJarKeyLoad(t *testing.T) {
|
||||||
|
jf, dir := loadJarFile(t)
|
||||||
|
key := loadJarKey(filepath.Join(dir, jf.KeyFile))
|
||||||
|
if key == nil {
|
||||||
|
t.Fatal("loadJarKey returned nil")
|
||||||
|
}
|
||||||
|
want, err := hex.DecodeString(jf.KeyHex)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("bad key_hex: %v", err)
|
||||||
|
}
|
||||||
|
if hex.EncodeToString(key) != hex.EncodeToString(want) {
|
||||||
|
t.Fatalf("loaded key %x != canonical %x", key, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJarParity: fakeID == Python-generated expect for every fixture.
|
||||||
|
func TestJarParity(t *testing.T) {
|
||||||
|
jf, dir := loadJarFile(t)
|
||||||
|
key := loadJarKey(filepath.Join(dir, jf.KeyFile))
|
||||||
|
if key == nil {
|
||||||
|
t.Fatal("loadJarKey returned nil — cannot run parity")
|
||||||
|
}
|
||||||
|
for _, fx := range jf.Fixtures {
|
||||||
|
got, ok := fakeID(fx.Client, fx.Tracker, fx.CookieName, key)
|
||||||
|
if !ok {
|
||||||
|
t.Errorf("fakeID(%q,%q,%q) returned !ok (%s)", fx.Client, fx.Tracker, fx.CookieName, fx.Why)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if got != fx.Expect {
|
||||||
|
t.Errorf("fakeID(%q,%q,%q)=%q want %q (%s)",
|
||||||
|
fx.Client, fx.Tracker, fx.CookieName, got, fx.Expect, fx.Why)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJarShapeCoverage: the fixtures must exercise every _shape branch, else
|
||||||
|
// "parity" is vacuous for an untested branch.
|
||||||
|
func TestJarShapeCoverage(t *testing.T) {
|
||||||
|
jf, _ := loadJarFile(t)
|
||||||
|
var sawGA, sawFB, sawUUID, sawHex bool
|
||||||
|
for _, fx := range jf.Fixtures {
|
||||||
|
switch {
|
||||||
|
case len(fx.Expect) >= 4 && fx.Expect[:4] == "GA1.":
|
||||||
|
sawGA = true
|
||||||
|
case len(fx.Expect) >= 3 && fx.Expect[:3] == "fb.":
|
||||||
|
sawFB = true
|
||||||
|
case len(fx.Expect) == 36 && fx.Expect[8] == '-':
|
||||||
|
sawUUID = true
|
||||||
|
case len(fx.Expect) == 32:
|
||||||
|
sawHex = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !sawGA || !sawFB || !sawUUID || !sawHex {
|
||||||
|
t.Fatalf("shape coverage incomplete: GA=%v FB=%v UUID=%v HEX=%v", sawGA, sawFB, sawUUID, sawHex)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJarFolding: two subdomains of the same registrable tracker, same client &
|
||||||
|
// cookie name, mint the IDENTICAL fake id (registrable() folding).
|
||||||
|
func TestJarFolding(t *testing.T) {
|
||||||
|
jf, dir := loadJarFile(t)
|
||||||
|
key := loadJarKey(filepath.Join(dir, jf.KeyFile))
|
||||||
|
a, _ := fakeID("foldclient", "px.doubleclick.net", "uid", key)
|
||||||
|
b, _ := fakeID("foldclient", "ads.doubleclick.net", "uid", key)
|
||||||
|
if a == "" || a != b {
|
||||||
|
t.Fatalf("folding broken: px=%q ads=%q", a, b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestJarNilCases: fakeID returns ("",false) exactly where Python returns None.
|
||||||
|
func TestJarNilCases(t *testing.T) {
|
||||||
|
jf, dir := loadJarFile(t)
|
||||||
|
key := loadJarKey(filepath.Join(dir, jf.KeyFile))
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
client, tracker, cookie string
|
||||||
|
k []byte
|
||||||
|
}{
|
||||||
|
{"empty key", "c", "t.example", "uid", nil},
|
||||||
|
{"empty client", "", "t.example", "uid", key},
|
||||||
|
{"empty tracker", "c", "", "uid", key},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
if v, ok := fakeID(tc.client, tc.tracker, tc.cookie, tc.k); ok || v != "" {
|
||||||
|
t.Errorf("%s: fakeID=%q,%v want \"\",false", tc.name, v, ok)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -129,38 +129,14 @@ func (c *CA) forge(host string) (*tls.Certificate, error) {
|
||||||
return tc, nil
|
return tc, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Pure handler logic (the ported addon decisions) ─────────────────────────
|
// ── Pure handler logic ───────────────────────────────────────────────────────
|
||||||
|
//
|
||||||
type Policy struct {
|
// The decision surface (Decide / action / registrable / splice helpers) lives
|
||||||
AdHosts []string // ad_ghost: 204 these (suffix match)
|
// in policy.go, ported from the Python addons and proven at parity by the
|
||||||
SpliceHosts []string // tls_splice: passthrough, no MITM (suffix match)
|
// cross-engine harness. The body-inject helper is kept here next to the wiring.
|
||||||
Inject []byte // banner / ad-CSS marker injected before </head> or </body>
|
|
||||||
}
|
|
||||||
|
|
||||||
func suffixMatch(host string, pats []string) bool {
|
|
||||||
h := strings.ToLower(strings.TrimSpace(host))
|
|
||||||
for _, p := range pats {
|
|
||||||
p = strings.ToLower(p)
|
|
||||||
if h == p || strings.HasSuffix(h, "."+p) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// action: "block" (204), "splice" (passthrough), or "mitm".
|
|
||||||
func (p Policy) action(host string) string {
|
|
||||||
if suffixMatch(host, p.SpliceHosts) {
|
|
||||||
return "splice"
|
|
||||||
}
|
|
||||||
if suffixMatch(host, p.AdHosts) {
|
|
||||||
return "block"
|
|
||||||
}
|
|
||||||
return "mitm"
|
|
||||||
}
|
|
||||||
|
|
||||||
// injectMarker inserts p.Inject before </head> (else </body>, else prepends).
|
// injectMarker inserts p.Inject before </head> (else </body>, else prepends).
|
||||||
func (p Policy) injectMarker(body []byte) []byte {
|
func (p *Policy) injectMarker(body []byte) []byte {
|
||||||
if len(p.Inject) == 0 || bytes.Contains(body, p.Inject) {
|
if len(p.Inject) == 0 || bytes.Contains(body, p.Inject) {
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
@ -201,7 +177,7 @@ func ja4ish(h *tls.ClientHelloInfo) string {
|
||||||
|
|
||||||
type Proxy struct {
|
type Proxy struct {
|
||||||
ca *CA
|
ca *CA
|
||||||
pol Policy
|
pol *Policy
|
||||||
jaSink func(string) // JA4 observations (logged; a sidecar in prod)
|
jaSink func(string) // JA4 observations (logged; a sidecar in prod)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -290,13 +266,17 @@ func main() {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("CA load: %v", err)
|
log.Fatalf("CA load: %v", err)
|
||||||
}
|
}
|
||||||
|
// Load the BLOCK/SPLICE policy from the SAME on-disk config the Python
|
||||||
|
// addons read (defaults + env overrides). Missing files are tolerated
|
||||||
|
// (best-effort, like the addons): the engine then simply MITMs everything.
|
||||||
|
pol, err := LoadPolicy(PolicyOpts{})
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("policy load: %v", err)
|
||||||
|
}
|
||||||
|
pol.Inject = []byte("<!-- sbx-ng banner -->")
|
||||||
px := &Proxy{
|
px := &Proxy{
|
||||||
ca: ca,
|
ca: ca,
|
||||||
pol: Policy{
|
pol: pol,
|
||||||
AdHosts: []string{"doubleclick.net", "googlesyndication.com"},
|
|
||||||
SpliceHosts: []string{"googlevideo.com", "fbcdn.net"},
|
|
||||||
Inject: []byte("<!-- sbx-ng banner -->"),
|
|
||||||
},
|
|
||||||
jaSink: func(s string) { log.Printf("ja4 %s", s) },
|
jaSink: func(s string) { log.Printf("ja4 %s", s) },
|
||||||
}
|
}
|
||||||
srv := &http.Server{Addr: *addr, Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
srv := &http.Server{Addr: *addr, Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
|
||||||
|
|
@ -72,24 +72,13 @@ func TestForgeChainsToCA(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestActionDecision(t *testing.T) {
|
// NOTE (#662 Phase 3): the old TestActionDecision drove the removed hardcoded
|
||||||
p := Policy{AdHosts: []string{"doubleclick.net"}, SpliceHosts: []string{"googlevideo.com"}}
|
// Policy{AdHosts, SpliceHosts} fields. The decision surface now loads from
|
||||||
cases := map[string]string{
|
// disk (LoadPolicy) and mirrors the Python addons; coverage moved to
|
||||||
"ads.doubleclick.net": "block",
|
// TestParityDecide / TestPolicyActionVerbs in policy_test.go.
|
||||||
"doubleclick.net": "block",
|
|
||||||
"r1.googlevideo.com": "splice",
|
|
||||||
"news.example.com": "mitm",
|
|
||||||
"notdoubleclick.net": "mitm",
|
|
||||||
}
|
|
||||||
for host, want := range cases {
|
|
||||||
if got := p.action(host); got != want {
|
|
||||||
t.Errorf("action(%q)=%q want %q", host, got, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestInjectMarker(t *testing.T) {
|
func TestInjectMarker(t *testing.T) {
|
||||||
p := Policy{Inject: []byte("<!--SBX-->")}
|
p := &Policy{Inject: []byte("<!--SBX-->")}
|
||||||
out := string(p.injectMarker([]byte("<html><head></head><body>hi</body></html>")))
|
out := string(p.injectMarker([]byte("<html><head></head><body>hi</body></html>")))
|
||||||
if !contains(out, "<!--SBX--></head>") {
|
if !contains(out, "<!--SBX--></head>") {
|
||||||
t.Fatalf("marker not injected before </head>: %s", out)
|
t.Fatalf("marker not injected before </head>: %s", out)
|
||||||
|
|
|
||||||
369
packages/secubox-toolbox-ng/cmd/sbxmitm/policy.go
Normal file
369
packages/secubox-toolbox-ng/cmd/sbxmitm/policy.go
Normal file
|
|
@ -0,0 +1,369 @@
|
||||||
|
// SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||||
|
// Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||||
|
//
|
||||||
|
// SecuBox-Deb :: toolbox-ng :: policy layer (#662 Phase 3)
|
||||||
|
//
|
||||||
|
// Ports the toolbox BLOCK (ad_ghost) and SPLICE (tls_splice) decision logic
|
||||||
|
// into the Go core, reading the SAME on-disk config files the Python addons
|
||||||
|
// use. Python is the source of truth; this mirrors it byte-for-byte on the
|
||||||
|
// decision surface, proven by the cross-engine parity harness
|
||||||
|
// (testdata/parity-fixtures.json + policy_test.go ↔ tests/test_engine_parity.py).
|
||||||
|
//
|
||||||
|
// Pure standard library — no external modules, no go.sum.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ── ad_ghost: static ad/tracker host pattern (port of _AD_HOST) ──────────────
|
||||||
|
//
|
||||||
|
// Python (mitmproxy_addons/ad_ghost.py):
|
||||||
|
//
|
||||||
|
// _AD_HOST = re.compile(
|
||||||
|
// r"(?:^|\.)(?:doubleclick|googlesyndication|googleadservices|"
|
||||||
|
// r"googletagservices|adservice\.google|amazon-adsystem|adnxs|adsrvr|"
|
||||||
|
// r"adform|criteo|rubiconproject|taboola|outbrain|smartadserver|moatads|"
|
||||||
|
// r"scorecardresearch|2mdn|adroll|pubmatic|openx|casalemedia|"
|
||||||
|
// r"yieldlove|sharethrough|teads|3lift|adsystem|adserver)",
|
||||||
|
// re.IGNORECASE)
|
||||||
|
//
|
||||||
|
// Every construct here — non-capturing groups, `^`, `\.`, alternation, the
|
||||||
|
// case-insensitive flag — is RE2-safe, so it translates 1:1 to Go regexp via
|
||||||
|
// the `(?i)` inline flag. No fallback substring split was needed.
|
||||||
|
const adHostPattern = `(?i)(?:^|\.)(?:doubleclick|googlesyndication|googleadservices|` +
|
||||||
|
`googletagservices|adservice\.google|amazon-adsystem|adnxs|adsrvr|` +
|
||||||
|
`adform|criteo|rubiconproject|taboola|outbrain|smartadserver|moatads|` +
|
||||||
|
`scorecardresearch|2mdn|adroll|pubmatic|openx|casalemedia|` +
|
||||||
|
`yieldlove|sharethrough|teads|3lift|adsystem|adserver)`
|
||||||
|
|
||||||
|
// _2L_TLD: two-level public suffixes (port of ad_ghost._2L_TLD).
|
||||||
|
var twoLevelTLD = map[string]bool{
|
||||||
|
"co.uk": true, "com.au": true, "co.jp": true, "co.nz": true,
|
||||||
|
"com.br": true, "co.za": true, "gouv.fr": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── PolicyOpts: configurable file paths (env-overridable, like Python) ───────
|
||||||
|
|
||||||
|
// PolicyOpts holds the on-disk paths the loaders read. Empty fields fall back
|
||||||
|
// to the real production defaults (or the env override) in LoadPolicy.
|
||||||
|
type PolicyOpts struct {
|
||||||
|
AllowPath string // ad-allowlist.txt (_ALLOW_PATH)
|
||||||
|
LearnedPath string // learned-trackers.txt (_LEARNED_PATH)
|
||||||
|
SpliceSeedPath string // conf/tls-splice-seed.conf (SEED_PATH)
|
||||||
|
SpliceLearnPath string // splice-learned.txt (LEARNED_PATH)
|
||||||
|
PureTrackersPath string // pure-trackers.txt (PURE_PATH)
|
||||||
|
FortknoxSites []string // filters.json fortknox_sites
|
||||||
|
SelfDomains []string // _SELF_REGS (default {secubox.in}, env SECUBOX_SELF_DOMAINS)
|
||||||
|
}
|
||||||
|
|
||||||
|
// defaultPolicyOpts returns the production defaults, honoring the same env vars
|
||||||
|
// the Python addons read.
|
||||||
|
func defaultPolicyOpts() PolicyOpts {
|
||||||
|
o := PolicyOpts{
|
||||||
|
AllowPath: "/var/lib/secubox/toolbox/ad-allowlist.txt",
|
||||||
|
LearnedPath: "/var/lib/secubox/toolbox/learned-trackers.txt",
|
||||||
|
SpliceSeedPath: envOr("SECUBOX_SPLICE_SEED", "/usr/lib/secubox/toolbox/conf/tls-splice-seed.conf"),
|
||||||
|
SpliceLearnPath: envOr("SECUBOX_SPLICE_LEARNED", "/var/lib/secubox/toolbox/splice-learned.txt"),
|
||||||
|
PureTrackersPath: envOr("SECUBOX_PURE_TRACKERS", "/var/lib/secubox/toolbox/pure-trackers.txt"),
|
||||||
|
}
|
||||||
|
// _SELF_REGS: env SECUBOX_SELF_DOMAINS (comma-split), default {secubox.in}.
|
||||||
|
self := os.Getenv("SECUBOX_SELF_DOMAINS")
|
||||||
|
if strings.TrimSpace(self) == "" {
|
||||||
|
self = "secubox.in"
|
||||||
|
}
|
||||||
|
for _, d := range strings.Split(self, ",") {
|
||||||
|
if d = strings.TrimSpace(strings.ToLower(d)); d != "" {
|
||||||
|
o.SelfDomains = append(o.SelfDomains, d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
|
||||||
|
func envOr(key, def string) string {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Policy: the loaded decision state ────────────────────────────────────────
|
||||||
|
|
||||||
|
// Policy carries the loaded sets/regex and decides per-host actions. It also
|
||||||
|
// keeps the legacy PoC fields (Inject) so the existing wiring/tests still work.
|
||||||
|
type Policy struct {
|
||||||
|
adHost *regexp.Regexp
|
||||||
|
learned map[string]bool // learned-trackers (host or registrable, lowercased)
|
||||||
|
allow map[string]bool // ad-allowlist (host or registrable, lowercased)
|
||||||
|
spliceSeed map[string]bool // splice seed patterns
|
||||||
|
spliceLearn map[string]bool // splice learned patterns
|
||||||
|
never map[string]bool // pure-trackers ∪ fortknox (splice never-set)
|
||||||
|
selfRegs map[string]bool // own-infra registrable domains
|
||||||
|
selfDomains []string // own-infra (for the host==d || host endswith .d guard)
|
||||||
|
|
||||||
|
// Legacy PoC fields kept so non-policy behaviour is unchanged.
|
||||||
|
Inject []byte // banner / ad-CSS marker injected before </head> or </body>
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadLines mirrors the comment-stripping Python loaders (splice._load_lines,
|
||||||
|
// ad_ghost._allowed's allowlist read): split on first '#', trim, lowercase,
|
||||||
|
// skip blanks. Missing/unreadable file → empty set (best-effort).
|
||||||
|
func loadLines(path string) map[string]bool {
|
||||||
|
return scanLines(path, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadLinesRaw mirrors ad_ghost._learned_set, which does NOT comment-strip —
|
||||||
|
// learned-trackers.txt is a machine-generated one-host-per-line file. It does
|
||||||
|
// `{ln.strip().lower() for ln in f if ln.strip()}`. Matching this exactly is
|
||||||
|
// load-bearing for parity (a '#' in this file would be kept verbatim, not a
|
||||||
|
// comment), so the Go core must mirror the divergent behaviour, not normalise it.
|
||||||
|
func loadLinesRaw(path string) map[string]bool {
|
||||||
|
return scanLines(path, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func scanLines(path string, stripComments bool) map[string]bool {
|
||||||
|
out := map[string]bool{}
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
sc := bufio.NewScanner(f)
|
||||||
|
sc.Buffer(make([]byte, 0, 64*1024), 1<<20)
|
||||||
|
for sc.Scan() {
|
||||||
|
ln := sc.Text()
|
||||||
|
if stripComments {
|
||||||
|
if i := strings.IndexByte(ln, '#'); i >= 0 {
|
||||||
|
ln = ln[:i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ln = strings.ToLower(strings.TrimSpace(ln))
|
||||||
|
if ln != "" {
|
||||||
|
out[ln] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadPolicy loads all backing files from opts (defaults applied for empty
|
||||||
|
// fields) and compiles the ad-host regex. It never returns an error for missing
|
||||||
|
// files (best-effort, like the Python addons), only for a regex-compile bug.
|
||||||
|
func LoadPolicy(opts PolicyOpts) (*Policy, error) {
|
||||||
|
def := defaultPolicyOpts()
|
||||||
|
if opts.AllowPath == "" {
|
||||||
|
opts.AllowPath = def.AllowPath
|
||||||
|
}
|
||||||
|
if opts.LearnedPath == "" {
|
||||||
|
opts.LearnedPath = def.LearnedPath
|
||||||
|
}
|
||||||
|
if opts.SpliceSeedPath == "" {
|
||||||
|
opts.SpliceSeedPath = def.SpliceSeedPath
|
||||||
|
}
|
||||||
|
if opts.SpliceLearnPath == "" {
|
||||||
|
opts.SpliceLearnPath = def.SpliceLearnPath
|
||||||
|
}
|
||||||
|
if opts.PureTrackersPath == "" {
|
||||||
|
opts.PureTrackersPath = def.PureTrackersPath
|
||||||
|
}
|
||||||
|
if len(opts.SelfDomains) == 0 {
|
||||||
|
opts.SelfDomains = def.SelfDomains
|
||||||
|
}
|
||||||
|
|
||||||
|
re, err := regexp.Compile(adHostPattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// never-set = pure-trackers ∪ fortknox_sites (mirrors TlsSplice._refresh_sets).
|
||||||
|
never := loadLines(opts.PureTrackersPath)
|
||||||
|
for _, s := range opts.FortknoxSites {
|
||||||
|
if s = strings.Trim(strings.ToLower(strings.TrimSpace(s)), "."); s != "" {
|
||||||
|
never[s] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
selfRegs := map[string]bool{}
|
||||||
|
selfDomains := make([]string, 0, len(opts.SelfDomains))
|
||||||
|
for _, d := range opts.SelfDomains {
|
||||||
|
d = strings.ToLower(strings.TrimSpace(d))
|
||||||
|
if d == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
selfRegs[d] = true
|
||||||
|
selfDomains = append(selfDomains, d)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Policy{
|
||||||
|
adHost: re,
|
||||||
|
learned: loadLinesRaw(opts.LearnedPath), // mirrors _learned_set (no comment-strip)
|
||||||
|
allow: loadLines(opts.AllowPath),
|
||||||
|
spliceSeed: loadLines(opts.SpliceSeedPath),
|
||||||
|
spliceLearn: loadLines(opts.SpliceLearnPath),
|
||||||
|
never: never,
|
||||||
|
selfRegs: selfRegs,
|
||||||
|
selfDomains: selfDomains,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── registrable: port of ad_ghost._registrable ───────────────────────────────
|
||||||
|
//
|
||||||
|
// host = host.split(":")[0].lower().strip(".")
|
||||||
|
// if not host or host.replace(".","").isdigit() or ":" in host: return None
|
||||||
|
// p = host.split(".")
|
||||||
|
// if len(p) <= 2: return host
|
||||||
|
// last2 = ".".join(p[-2:])
|
||||||
|
// return ".".join(p[-3:]) if (last2 in _2L_TLD and len(p) >= 3) else last2
|
||||||
|
func registrable(host string) string {
|
||||||
|
host = strings.ToLower(host)
|
||||||
|
if i := strings.IndexByte(host, ':'); i >= 0 {
|
||||||
|
host = host[:i]
|
||||||
|
}
|
||||||
|
host = strings.Trim(host, ".")
|
||||||
|
if host == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
// host.replace(".","").isdigit() → all-digit IPv4-ish → no registrable.
|
||||||
|
if isAllDigits(strings.ReplaceAll(host, ".", "")) {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
// The Python checks ":" in host AFTER stripping the port; a residual colon
|
||||||
|
// (e.g. an IPv6 literal) yields None. We already split on the first colon,
|
||||||
|
// so re-check the remainder for any colon to mirror exactly.
|
||||||
|
if strings.IndexByte(host, ':') >= 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
p := strings.Split(host, ".")
|
||||||
|
if len(p) <= 2 {
|
||||||
|
return host
|
||||||
|
}
|
||||||
|
last2 := strings.Join(p[len(p)-2:], ".")
|
||||||
|
if twoLevelTLD[last2] && len(p) >= 3 {
|
||||||
|
return strings.Join(p[len(p)-3:], ".")
|
||||||
|
}
|
||||||
|
return last2
|
||||||
|
}
|
||||||
|
|
||||||
|
func isAllDigits(s string) bool {
|
||||||
|
if s == "" {
|
||||||
|
return false // Python "".isdigit() is False
|
||||||
|
}
|
||||||
|
for _, r := range s {
|
||||||
|
if r < '0' || r > '9' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── splice helpers: port of splice.host_matches / should_splice ──────────────
|
||||||
|
|
||||||
|
// hostMatches: True if host == pattern OR host is a dotted-suffix subdomain.
|
||||||
|
func hostMatches(host string, patterns map[string]bool) bool {
|
||||||
|
h := strings.Trim(strings.ToLower(host), ".")
|
||||||
|
if h == "" || len(patterns) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if patterns[h] {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for p := range patterns {
|
||||||
|
if strings.HasSuffix(h, "."+p) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// allowed: port of ad_ghost._allowed. Own-infra ALWAYS wins (reflash-safe),
|
||||||
|
// then the operator allowlist (host or registrable).
|
||||||
|
func (p *Policy) allowed(host string) bool {
|
||||||
|
h := strings.ToLower(host)
|
||||||
|
reg := registrable(h)
|
||||||
|
if reg == "" {
|
||||||
|
reg = h
|
||||||
|
}
|
||||||
|
// own infra: registrable in selfRegs, OR host == d || host endswith "."+d.
|
||||||
|
if p.selfRegs[reg] {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for _, d := range p.selfDomains {
|
||||||
|
if h == d || strings.HasSuffix(h, "."+d) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return p.allow[h] || p.allow[reg]
|
||||||
|
}
|
||||||
|
|
||||||
|
// shouldSplice: port of splice.should_splice (never wins; then seed ∪ learned).
|
||||||
|
func (p *Policy) shouldSplice(sni string) bool {
|
||||||
|
s := strings.Trim(strings.ToLower(sni), ".")
|
||||||
|
if s == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if hostMatches(s, p.never) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return hostMatches(s, p.spliceSeed) || hostMatches(s, p.spliceLearn)
|
||||||
|
}
|
||||||
|
|
||||||
|
// blockedByAd: port of the ad_ghost requestheaders block decision (sans the
|
||||||
|
// allowlist guard, which Decide applies first): _AD_HOST match OR
|
||||||
|
// registrable/host in learned-trackers.
|
||||||
|
func (p *Policy) blockedByAd(host string) bool {
|
||||||
|
if p.adHost.MatchString(host) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
reg := registrable(host)
|
||||||
|
if reg != "" && p.learned[reg] {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return p.learned[strings.ToLower(host)]
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Decide: the unified cross-engine decision ────────────────────────────────
|
||||||
|
//
|
||||||
|
// action ∈ {"allow","block","splice","mitm"}. Precedence (mirrors the Python
|
||||||
|
// across the two addons, documented in the harness):
|
||||||
|
//
|
||||||
|
// 1. own-infra / allowlist → "allow" (ad_ghost._allowed; never block/splice)
|
||||||
|
// 2. splice never-set check, then seed/learned → "splice"
|
||||||
|
// (tls_splice runs FIRST at the TLS layer; should_splice already excludes
|
||||||
|
// the never-set = pure-trackers ∪ fortknox, so a tracker that is also a
|
||||||
|
// splice candidate fails should_splice here and falls through to block)
|
||||||
|
// 3. _AD_HOST / learned → "block" (ad_ghost requestheaders, request layer)
|
||||||
|
// 4. otherwise → "mitm"
|
||||||
|
//
|
||||||
|
// sni defaults to host when empty (the live engine splices on SNI == the TLS
|
||||||
|
// host; for the parity harness host and sni are the same value).
|
||||||
|
func (p *Policy) Decide(host, sni string) string {
|
||||||
|
if sni == "" {
|
||||||
|
sni = host
|
||||||
|
}
|
||||||
|
if p.allowed(host) {
|
||||||
|
return "allow"
|
||||||
|
}
|
||||||
|
if p.shouldSplice(sni) {
|
||||||
|
return "splice"
|
||||||
|
}
|
||||||
|
if p.blockedByAd(host) {
|
||||||
|
return "block"
|
||||||
|
}
|
||||||
|
return "mitm"
|
||||||
|
}
|
||||||
|
|
||||||
|
// action keeps the legacy 3-verb surface (block/splice/mitm) for the PoC
|
||||||
|
// CONNECT wiring, derived from Decide: "allow" collapses to "mitm" (an
|
||||||
|
// allowlisted host is intercepted normally, just never short-circuited).
|
||||||
|
func (p *Policy) action(host string) string {
|
||||||
|
switch p.Decide(host, host) {
|
||||||
|
case "splice":
|
||||||
|
return "splice"
|
||||||
|
case "block":
|
||||||
|
return "block"
|
||||||
|
default: // "allow" and "mitm" both → normal interception
|
||||||
|
return "mitm"
|
||||||
|
}
|
||||||
|
}
|
||||||
142
packages/secubox-toolbox-ng/cmd/sbxmitm/policy_test.go
Normal file
142
packages/secubox-toolbox-ng/cmd/sbxmitm/policy_test.go
Normal file
|
|
@ -0,0 +1,142 @@
|
||||||
|
// SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||||
|
// Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||||
|
//
|
||||||
|
// Cross-engine parity harness — Go side (#662 Phase 3).
|
||||||
|
//
|
||||||
|
// Loads testdata/parity-fixtures.json + the testdata/config snapshot, runs
|
||||||
|
// Policy.Decide on each host, and asserts == the fixture's expect. The Python
|
||||||
|
// side (../secubox-toolbox/tests/test_engine_parity.py) loads the SAME files
|
||||||
|
// and drives the SAME decision; both must agree → parity proven.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
type parityConfig struct {
|
||||||
|
AdAllowlist string `json:"ad_allowlist"`
|
||||||
|
LearnedTrackers string `json:"learned_trackers"`
|
||||||
|
SpliceSeed string `json:"splice_seed"`
|
||||||
|
SpliceLearned string `json:"splice_learned"`
|
||||||
|
PureTrackers string `json:"pure_trackers"`
|
||||||
|
SelfDomains []string `json:"self_domains"`
|
||||||
|
FortknoxSites []string `json:"fortknox_sites"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type parityFixture struct {
|
||||||
|
Host string `json:"host"`
|
||||||
|
Expect string `json:"expect"`
|
||||||
|
Why string `json:"why"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type parityFile struct {
|
||||||
|
Config parityConfig `json:"config"`
|
||||||
|
Fixtures []parityFixture `json:"fixtures"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// testdataDir resolves the testdata/ dir relative to this package
|
||||||
|
// (cmd/sbxmitm → ../../testdata).
|
||||||
|
func testdataDir(t *testing.T) string {
|
||||||
|
t.Helper()
|
||||||
|
d, err := filepath.Abs(filepath.Join("..", "..", "testdata"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadParityFile(t *testing.T) (parityFile, string) {
|
||||||
|
t.Helper()
|
||||||
|
dir := testdataDir(t)
|
||||||
|
raw, err := os.ReadFile(filepath.Join(dir, "parity-fixtures.json"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read fixtures: %v", err)
|
||||||
|
}
|
||||||
|
var pf parityFile
|
||||||
|
if err := json.Unmarshal(raw, &pf); err != nil {
|
||||||
|
t.Fatalf("parse fixtures: %v", err)
|
||||||
|
}
|
||||||
|
if len(pf.Fixtures) == 0 {
|
||||||
|
t.Fatal("no fixtures")
|
||||||
|
}
|
||||||
|
return pf, dir
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParityDecide(t *testing.T) {
|
||||||
|
pf, dir := loadParityFile(t)
|
||||||
|
cfgPath := func(rel string) string { return filepath.Join(dir, filepath.FromSlash(rel)) }
|
||||||
|
|
||||||
|
pol, err := LoadPolicy(PolicyOpts{
|
||||||
|
AllowPath: cfgPath(pf.Config.AdAllowlist),
|
||||||
|
LearnedPath: cfgPath(pf.Config.LearnedTrackers),
|
||||||
|
SpliceSeedPath: cfgPath(pf.Config.SpliceSeed),
|
||||||
|
SpliceLearnPath: cfgPath(pf.Config.SpliceLearned),
|
||||||
|
PureTrackersPath: cfgPath(pf.Config.PureTrackers),
|
||||||
|
FortknoxSites: pf.Config.FortknoxSites,
|
||||||
|
SelfDomains: pf.Config.SelfDomains,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadPolicy: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, fx := range pf.Fixtures {
|
||||||
|
got := pol.Decide(fx.Host, fx.Host)
|
||||||
|
if got != fx.Expect {
|
||||||
|
t.Errorf("Decide(%q)=%q want %q (%s)", fx.Host, got, fx.Expect, fx.Why)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestPolicyActionVerbs checks the legacy 3-verb action() surface still wired
|
||||||
|
// into the PoC CONNECT path: allow collapses to mitm; block/splice preserved.
|
||||||
|
func TestPolicyActionVerbs(t *testing.T) {
|
||||||
|
pf, dir := loadParityFile(t)
|
||||||
|
cfgPath := func(rel string) string { return filepath.Join(dir, filepath.FromSlash(rel)) }
|
||||||
|
pol, err := LoadPolicy(PolicyOpts{
|
||||||
|
AllowPath: cfgPath(pf.Config.AdAllowlist),
|
||||||
|
LearnedPath: cfgPath(pf.Config.LearnedTrackers),
|
||||||
|
SpliceSeedPath: cfgPath(pf.Config.SpliceSeed),
|
||||||
|
SpliceLearnPath: cfgPath(pf.Config.SpliceLearned),
|
||||||
|
PureTrackersPath: cfgPath(pf.Config.PureTrackers),
|
||||||
|
FortknoxSites: pf.Config.FortknoxSites,
|
||||||
|
SelfDomains: pf.Config.SelfDomains,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
cases := map[string]string{
|
||||||
|
"ads.doubleclick.net": "block",
|
||||||
|
"r1.googlevideo.com": "splice",
|
||||||
|
"news.example.com": "mitm",
|
||||||
|
"notdoubleclick.net": "mitm",
|
||||||
|
"analytics.example-allowed.com": "mitm", // allow → normal interception (mitm verb)
|
||||||
|
"hub.secubox.in": "mitm", // own-infra → normal interception
|
||||||
|
}
|
||||||
|
for host, want := range cases {
|
||||||
|
if got := pol.action(host); got != want {
|
||||||
|
t.Errorf("action(%q)=%q want %q", host, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRegistrable exercises the _registrable port incl. the 2-level TLD list.
|
||||||
|
func TestRegistrable(t *testing.T) {
|
||||||
|
cases := map[string]string{
|
||||||
|
"a.b.example.com": "example.com",
|
||||||
|
"example.com": "example.com",
|
||||||
|
"com": "com",
|
||||||
|
"a.b.example.co.uk": "example.co.uk",
|
||||||
|
"example.co.uk": "example.co.uk", // 2 labels → returned as-is
|
||||||
|
"x.y.z.example.com": "example.com",
|
||||||
|
"1.2.3.4": "",
|
||||||
|
"": "",
|
||||||
|
}
|
||||||
|
for in, want := range cases {
|
||||||
|
if got := registrable(in); got != want {
|
||||||
|
t.Errorf("registrable(%q)=%q want %q", in, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
93
packages/secubox-toolbox-ng/cmd/sbxmitm/sidecar.go
Normal file
93
packages/secubox-toolbox-ng/cmd/sbxmitm/sidecar.go
Normal file
|
|
@ -0,0 +1,93 @@
|
||||||
|
// SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||||
|
// Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||||
|
//
|
||||||
|
// SecuBox-Deb :: toolbox-ng :: sidecar emit helper (#662 Phase 4)
|
||||||
|
//
|
||||||
|
// Fire-and-forget POST to a unix-socket'd SecuBox module, mirroring the Python
|
||||||
|
// addons' _common.fire_forget_post: it NEVER blocks the proxy flow and NEVER
|
||||||
|
// raises into the caller. The live engine will relay extracted signals to the
|
||||||
|
// existing module sockets; this is the transport only — NOT yet wired into the
|
||||||
|
// live request/response path (Phase 5+ wiring).
|
||||||
|
//
|
||||||
|
// Addon → socket mapping the live engine will use (verbatim from the Python
|
||||||
|
// addons' TARGET constants, packages/secubox-toolbox/mitmproxy_addons/*.py):
|
||||||
|
//
|
||||||
|
// addon socket path route
|
||||||
|
// cookies → /run/secubox/cookies.sock POST /inject
|
||||||
|
// dpi → /run/secubox/dpi.sock POST /classify
|
||||||
|
// avatar → /run/secubox/avatar.sock POST /fingerprint
|
||||||
|
// ja4 → /run/secubox/threat-analyst.sock POST /ja4
|
||||||
|
// soc_relay → /run/secubox/soc.sock POST /event
|
||||||
|
// social_graph: in-process (no socket) — correlated inside the engine, not emitted.
|
||||||
|
//
|
||||||
|
// emit takes the full socket PATH (not an http+unix:// URL) plus the route in
|
||||||
|
// the payload's destination; callers build the path from the table above.
|
||||||
|
//
|
||||||
|
// Pure standard library — no external modules, no go.sum.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// emitTimeout caps the whole connect+write+read so a slow/dead module socket
|
||||||
|
// can never wedge the engine. Mirrors the Python httpx timeout=2.
|
||||||
|
const emitTimeout = 2 * time.Second
|
||||||
|
|
||||||
|
// emit fires a fire-and-forget POST of payload to the given unix socket at
|
||||||
|
// route, in a detached goroutine. It returns immediately and never blocks the
|
||||||
|
// caller; all errors (missing socket, dead peer, timeout) are swallowed —
|
||||||
|
// dropping a relayed signal must never break a client flow. Mirrors
|
||||||
|
// _common.fire_forget_post + queue_async (create_task, never raise).
|
||||||
|
//
|
||||||
|
// route is the HTTP path on the module (e.g. "/inject", "/classify"); use the
|
||||||
|
// addon→socket table above to pick socketPath + route together.
|
||||||
|
func emit(socketPath, route string, payload []byte) {
|
||||||
|
go emitSync(socketPath, route, payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitSync performs the actual POST synchronously (under emitTimeout). Exposed
|
||||||
|
// (lowercase, same-package) so tests can observe delivery deterministically
|
||||||
|
// without racing the goroutine. Returns an error only for the test's benefit;
|
||||||
|
// emit() discards it.
|
||||||
|
func emitSync(socketPath, route string, payload []byte) error {
|
||||||
|
if route == "" {
|
||||||
|
route = "/"
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), emitTimeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
var d net.Dialer
|
||||||
|
conn, err := d.DialContext(ctx, "unix", socketPath)
|
||||||
|
if err != nil {
|
||||||
|
return err // dead/missing socket — swallowed by emit()
|
||||||
|
}
|
||||||
|
defer conn.Close()
|
||||||
|
|
||||||
|
if dl, ok := ctx.Deadline(); ok {
|
||||||
|
_ = conn.SetDeadline(dl)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Minimal HTTP/1.1 POST. Host is a placeholder (unix transport); the module
|
||||||
|
// FastAPI apps ignore it. Connection: close so the peer EOFs after replying.
|
||||||
|
req := fmt.Sprintf(
|
||||||
|
"POST %s HTTP/1.1\r\nHost: secubox.local\r\nContent-Type: application/json\r\n"+
|
||||||
|
"Content-Length: %d\r\nConnection: close\r\n\r\n",
|
||||||
|
route, len(payload))
|
||||||
|
if _, err := conn.Write([]byte(req)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if len(payload) > 0 {
|
||||||
|
if _, err := conn.Write(payload); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Best-effort drain so the peer sees a clean close; we don't parse the
|
||||||
|
// response (fire-and-forget). Errors here are irrelevant.
|
||||||
|
buf := make([]byte, 512)
|
||||||
|
_, _ = conn.Read(buf)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
125
packages/secubox-toolbox-ng/cmd/sbxmitm/sidecar_test.go
Normal file
125
packages/secubox-toolbox-ng/cmd/sbxmitm/sidecar_test.go
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
// SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||||
|
// Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||||
|
//
|
||||||
|
// Unit tests for the sidecar emit helper (#662 Phase 4).
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"net"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestEmitDelivers: emitSync to a live unix socket delivers the POST request
|
||||||
|
// line, route and JSON body.
|
||||||
|
func TestEmitDelivers(t *testing.T) {
|
||||||
|
sock := filepath.Join(t.TempDir(), "emit.sock")
|
||||||
|
ln, err := net.Listen("unix", sock)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("listen: %v", err)
|
||||||
|
}
|
||||||
|
defer ln.Close()
|
||||||
|
|
||||||
|
got := make(chan string, 1)
|
||||||
|
go func() {
|
||||||
|
c, err := ln.Accept()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer c.Close()
|
||||||
|
c.SetReadDeadline(time.Now().Add(2 * time.Second))
|
||||||
|
var sb strings.Builder
|
||||||
|
r := bufio.NewReader(c)
|
||||||
|
buf := make([]byte, 4096)
|
||||||
|
for {
|
||||||
|
n, err := r.Read(buf)
|
||||||
|
sb.Write(buf[:n])
|
||||||
|
if err != nil || strings.Contains(sb.String(), `"k":"v"`) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Reply so emitSync's drain completes cleanly.
|
||||||
|
c.Write([]byte("HTTP/1.1 204 No Content\r\nContent-Length: 0\r\nConnection: close\r\n\r\n"))
|
||||||
|
got <- sb.String()
|
||||||
|
}()
|
||||||
|
|
||||||
|
if err := emitSync(sock, "/classify", []byte(`{"k":"v"}`)); err != nil {
|
||||||
|
t.Fatalf("emitSync: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case raw := <-got:
|
||||||
|
if !strings.HasPrefix(raw, "POST /classify HTTP/1.1") {
|
||||||
|
t.Errorf("missing/wrong request line in:\n%s", raw)
|
||||||
|
}
|
||||||
|
if !strings.Contains(raw, `{"k":"v"}`) {
|
||||||
|
t.Errorf("body not delivered in:\n%s", raw)
|
||||||
|
}
|
||||||
|
case <-time.After(3 * time.Second):
|
||||||
|
t.Fatal("server never received the emit")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEmitDeadSocketNoPanicNoBlock: emit() (the goroutine form) to a
|
||||||
|
// nonexistent socket must return immediately and never panic, and emitSync
|
||||||
|
// must just return an error without blocking past the timeout.
|
||||||
|
func TestEmitDeadSocketNoPanicNoBlock(t *testing.T) {
|
||||||
|
dead := filepath.Join(t.TempDir(), "nope.sock")
|
||||||
|
|
||||||
|
// emit (async) returns instantly even though the socket is dead.
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
defer close(done)
|
||||||
|
emit(dead, "/inject", []byte(`{"x":1}`)) // must not panic/block
|
||||||
|
}()
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
case <-time.After(time.Second):
|
||||||
|
t.Fatal("emit() blocked on a dead socket")
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitSync surfaces the dial error (which emit swallows) without blocking.
|
||||||
|
start := time.Now()
|
||||||
|
if err := emitSync(dead, "/inject", []byte(`{}`)); err == nil {
|
||||||
|
t.Error("emitSync to dead socket: expected error, got nil")
|
||||||
|
}
|
||||||
|
if elapsed := time.Since(start); elapsed > emitTimeout+time.Second {
|
||||||
|
t.Errorf("emitSync blocked %v on dead socket", elapsed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEmitEmptyRouteDefaults: an empty route becomes "/".
|
||||||
|
func TestEmitEmptyRouteDefaults(t *testing.T) {
|
||||||
|
sock := filepath.Join(t.TempDir(), "root.sock")
|
||||||
|
ln, err := net.Listen("unix", sock)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer ln.Close()
|
||||||
|
got := make(chan string, 1)
|
||||||
|
go func() {
|
||||||
|
c, err := ln.Accept()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer c.Close()
|
||||||
|
buf := make([]byte, 256)
|
||||||
|
n, _ := c.Read(buf)
|
||||||
|
c.Write([]byte("HTTP/1.1 204 No Content\r\nContent-Length: 0\r\nConnection: close\r\n\r\n"))
|
||||||
|
got <- string(buf[:n])
|
||||||
|
}()
|
||||||
|
if err := emitSync(sock, "", nil); err != nil {
|
||||||
|
t.Fatalf("emitSync: %v", err)
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case raw := <-got:
|
||||||
|
if !strings.HasPrefix(raw, "POST / HTTP/1.1") {
|
||||||
|
t.Errorf("empty route not defaulted to /, got:\n%s", raw)
|
||||||
|
}
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatal("no request received")
|
||||||
|
}
|
||||||
|
}
|
||||||
4
packages/secubox-toolbox-ng/testdata/config/ad-allowlist.txt
vendored
Normal file
4
packages/secubox-toolbox-ng/testdata/config/ad-allowlist.txt
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
# SecuBox toolbox-ng parity fixture: operator ad-allowlist.
|
||||||
|
# Allowlist ALWAYS wins (never block, never splice, never record).
|
||||||
|
analytics.example-allowed.com # an allowlisted host
|
||||||
|
criteo-but-allowed.example # would-be-ad registrable, but allowlisted
|
||||||
3
packages/secubox-toolbox-ng/testdata/config/learned-trackers.txt
vendored
Normal file
3
packages/secubox-toolbox-ng/testdata/config/learned-trackers.txt
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
learned-tracker.example
|
||||||
|
pure-tracker.example
|
||||||
|
commented-learned.example # inline comment — _learned_set keeps the FULL line, not comment-stripped
|
||||||
3
packages/secubox-toolbox-ng/testdata/config/pure-trackers.txt
vendored
Normal file
3
packages/secubox-toolbox-ng/testdata/config/pure-trackers.txt
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
# SecuBox toolbox-ng parity fixture: pure trackers — the splice never-set.
|
||||||
|
# A host here is NEVER spliced even if it's a splice-seed/learned candidate.
|
||||||
|
pure-tracker.example # pure tracker AND in splice-learned → never wins → block
|
||||||
3
packages/secubox-toolbox-ng/testdata/config/splice-learned.txt
vendored
Normal file
3
packages/secubox-toolbox-ng/testdata/config/splice-learned.txt
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
# SecuBox toolbox-ng parity fixture: auto-learned splice (never-HTML) hosts.
|
||||||
|
assets.example-cdn.com # a splice-learned host
|
||||||
|
pure-tracker.example # ALSO in pure-trackers (never) → never wins → not spliced
|
||||||
3
packages/secubox-toolbox-ng/testdata/config/tls-splice-seed.conf
vendored
Normal file
3
packages/secubox-toolbox-ng/testdata/config/tls-splice-seed.conf
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
# SecuBox toolbox-ng parity fixture: shipped splice seed (pure-asset CDNs).
|
||||||
|
googlevideo.com # YouTube video streams
|
||||||
|
fbcdn.net # Facebook / Instagram media
|
||||||
84
packages/secubox-toolbox-ng/testdata/jar-fixtures.json
vendored
Normal file
84
packages/secubox-toolbox-ng/testdata/jar-fixtures.json
vendored
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
{
|
||||||
|
"_doc": "Cross-engine JAR (anti-track HMAC fake-identity) parity fixtures (#662 Phase 4). Go core (jar_test.go) and Python (privacy.fake_id via tests/test_jar_parity.py) load THIS file + the fixed test key file (jar-test.key, NOT the real /etc/secubox/secrets/privacy-jar.key), compute fakeID/fake_id per fixture, and MUST agree. Python is the source of truth; expect values are GENERATED by privacy.fake_id (never hand-computed). The key file carries leading/trailing whitespace to exercise .strip()/TrimSpace; key_hex below is the canonical post-strip key.",
|
||||||
|
"key_file": "jar-test.key",
|
||||||
|
"key_hex": "53656375426f780a546573744a61724b65795631aabbccddeeff0011deadbe7f",
|
||||||
|
"fixtures": [
|
||||||
|
{
|
||||||
|
"client": "clientAAA",
|
||||||
|
"tracker": "google-analytics.com",
|
||||||
|
"cookie_name": "_ga",
|
||||||
|
"expect": "GA1.2.3904711466.3108239649",
|
||||||
|
"why": "_ga cookie -> GA1 shape"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"client": "clientAAA",
|
||||||
|
"tracker": "google-analytics.com",
|
||||||
|
"cookie_name": "_ga_ABC123",
|
||||||
|
"expect": "GA1.2.5796600959.265364931",
|
||||||
|
"why": "GA4 per-property -> still GA1 shape (startswith _ga)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"client": "clientAAA",
|
||||||
|
"tracker": "connect.facebook.net",
|
||||||
|
"cookie_name": "_fbp",
|
||||||
|
"expect": "fb.1.6011068296128.8272063998",
|
||||||
|
"why": "_fbp -> fb shape"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"client": "clientAAA",
|
||||||
|
"tracker": "tracker.example.com",
|
||||||
|
"cookie_name": "uuid",
|
||||||
|
"expect": "a357739e-e6e8-020e-c9ee-cb92950d1a71",
|
||||||
|
"why": "uuid -> uuid shape"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"client": "clientAAA",
|
||||||
|
"tracker": "matomo.example.com",
|
||||||
|
"cookie_name": "_pk_id",
|
||||||
|
"expect": "7be228ae-3261-d609-1cec-dc0dc05a8abf",
|
||||||
|
"why": "_pk_id -> uuid shape"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"client": "clientAAA",
|
||||||
|
"tracker": "tracker.example.com",
|
||||||
|
"cookie_name": "abcdefghijklmnopqrstuvwxyz012345",
|
||||||
|
"expect": "416e7233-dfb8-ec7f-a2fe-45ed5dbdcaf4",
|
||||||
|
"why": "name >=32 chars -> uuid shape via len branch"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"client": "clientAAA",
|
||||||
|
"tracker": "tracker.example.com",
|
||||||
|
"cookie_name": "sid",
|
||||||
|
"expect": "5cb0940c4562a4f76cf638e40ff552af",
|
||||||
|
"why": "generic -> hex[:32]"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"client": "clientFold",
|
||||||
|
"tracker": "px.doubleclick.net",
|
||||||
|
"cookie_name": "uid",
|
||||||
|
"expect": "c1b6daf8-7ac1-edf6-c67b-3e23ec8eb61d",
|
||||||
|
"why": "registrable folding A (px.doubleclick.net)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"client": "clientFold",
|
||||||
|
"tracker": "ads.doubleclick.net",
|
||||||
|
"cookie_name": "uid",
|
||||||
|
"expect": "c1b6daf8-7ac1-edf6-c67b-3e23ec8eb61d",
|
||||||
|
"why": "registrable folding B (ads.doubleclick.net) -> SAME fake_id as A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"client": "clientGovuk",
|
||||||
|
"tracker": "ad.example.gov.uk",
|
||||||
|
"cookie_name": "uid",
|
||||||
|
"expect": "75cc2df5-1ee2-da62-9023-aa11c57419af",
|
||||||
|
"why": "DIVERGENCE GUARD: privacy.registrable=example.gov.uk (gov.uk in privacy._MULTI_TLD); ad_ghost._2L lacks gov.uk so policy.registrable would give gov.uk -> forces the jar to use registrableJar"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"client": "clientIP",
|
||||||
|
"tracker": "9.9.9.9",
|
||||||
|
"cookie_name": "sid",
|
||||||
|
"expect": "53bf4dd57df7a26d6eff83092c869835",
|
||||||
|
"why": "DIVERGENCE GUARD: IP-literal tracker -> privacy.registrable returns as-is (ad_ghost._registrable returns None) -> forces registrableJar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
packages/secubox-toolbox-ng/testdata/jar-test.key
vendored
Normal file
BIN
packages/secubox-toolbox-ng/testdata/jar-test.key
vendored
Normal file
Binary file not shown.
31
packages/secubox-toolbox-ng/testdata/parity-fixtures.json
vendored
Normal file
31
packages/secubox-toolbox-ng/testdata/parity-fixtures.json
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
{
|
||||||
|
"_doc": "Cross-engine parity fixtures (#662 Phase 3). Both the Go core (policy_test.go) and the Python addons (tests/test_engine_parity.py) load THIS file plus the testdata/config snapshot, run their Decide logic on each host, and must agree. Python is the source of truth; Go matches it. action ∈ {allow, block, splice, mitm}.",
|
||||||
|
"config": {
|
||||||
|
"ad_allowlist": "config/ad-allowlist.txt",
|
||||||
|
"learned_trackers": "config/learned-trackers.txt",
|
||||||
|
"splice_seed": "config/tls-splice-seed.conf",
|
||||||
|
"splice_learned": "config/splice-learned.txt",
|
||||||
|
"pure_trackers": "config/pure-trackers.txt",
|
||||||
|
"self_domains": ["secubox.in"],
|
||||||
|
"fortknox_sites": ["mybank.example"]
|
||||||
|
},
|
||||||
|
"fixtures": [
|
||||||
|
{"host": "ads.doubleclick.net", "expect": "block", "why": "static ad host (_AD_HOST dotted-prefix doubleclick)"},
|
||||||
|
{"host": "doubleclick.net", "expect": "block", "why": "static ad host (_AD_HOST bare)"},
|
||||||
|
{"host": "criteo.com", "expect": "block", "why": "static ad host (_AD_HOST criteo)"},
|
||||||
|
{"host": "learned-tracker.example", "expect": "block", "why": "auto-learned tracker (learned-trackers.txt)"},
|
||||||
|
{"host": "pure-tracker.example", "expect": "block", "why": "pure-tracker + splice-learned: never wins (no splice) → falls to block (also learned)"},
|
||||||
|
{"host": "hub.secubox.in", "expect": "allow", "why": "own-infra subdomain (self_domains) — never block/splice"},
|
||||||
|
{"host": "secubox.in", "expect": "allow", "why": "own-infra apex"},
|
||||||
|
{"host": "analytics.example-allowed.com", "expect": "allow", "why": "operator allowlisted host"},
|
||||||
|
{"host": "criteo-but-allowed.example", "expect": "allow", "why": "would-be-ad registrable but allowlisted → allowlist wins"},
|
||||||
|
{"host": "r1.googlevideo.com", "expect": "splice", "why": "splice seed subdomain (CDN shard)"},
|
||||||
|
{"host": "googlevideo.com", "expect": "splice", "why": "splice seed exact"},
|
||||||
|
{"host": "assets.example-cdn.com", "expect": "splice", "why": "splice-learned host"},
|
||||||
|
{"host": "mybank.example", "expect": "mitm", "why": "fortknox site in never-set; not in seed/learned → no splice; not ad/learned → mitm"},
|
||||||
|
{"host": "notdoubleclick.net", "expect": "mitm", "why": "no-false-suffix negative — _AD_HOST requires (^|.) boundary"},
|
||||||
|
{"host": "news.example.com", "expect": "mitm", "why": "plain site"},
|
||||||
|
{"host": "notsecubox.in", "expect": "mitm", "why": "own-infra FALSE-prefix negative — must NOT match self_domains"},
|
||||||
|
{"host": "commented-learned.example", "expect": "mitm", "why": "learned-trackers NOT comment-stripped (_learned_set keeps full line incl ' # ...'); bare host not in set → not blocked. Discriminates loadLinesRaw vs loadLines"}
|
||||||
|
]
|
||||||
|
}
|
||||||
125
packages/secubox-toolbox/tests/test_engine_parity.py
Normal file
125
packages/secubox-toolbox/tests/test_engine_parity.py
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||||
|
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||||
|
"""Cross-engine parity harness — Python side (#662 Phase 3).
|
||||||
|
|
||||||
|
Loads the SAME ``parity-fixtures.json`` and ``testdata/config`` snapshot the Go
|
||||||
|
core uses (``../secubox-toolbox-ng/testdata``), drives the production Python
|
||||||
|
decision logic — ``ad_ghost._allowed`` + ``_AD_HOST`` + the learned-trackers
|
||||||
|
check, composed with ``splice.should_splice`` — under the SAME precedence as
|
||||||
|
Go's ``Policy.Decide``, and asserts the action == the fixture's ``expect``.
|
||||||
|
|
||||||
|
Python is the source of truth: if Go and Python ever diverge on a fixture, Go
|
||||||
|
is fixed to match this. Both test files reading the identical inputs is what
|
||||||
|
makes the parity meaningful.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from mitmproxy_addons import ad_ghost
|
||||||
|
from secubox_toolbox import splice
|
||||||
|
|
||||||
|
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
# tests/ → packages/secubox-toolbox → packages → packages/secubox-toolbox-ng
|
||||||
|
_NG_TESTDATA = os.path.normpath(
|
||||||
|
os.path.join(_HERE, "..", "..", "secubox-toolbox-ng", "testdata"))
|
||||||
|
_FIXTURES = os.path.join(_NG_TESTDATA, "parity-fixtures.json")
|
||||||
|
|
||||||
|
|
||||||
|
def _load_fixtures():
|
||||||
|
with open(_FIXTURES, encoding="utf-8") as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
def _cfg_path(rel: str) -> str:
|
||||||
|
return os.path.join(_NG_TESTDATA, rel.replace("/", os.sep))
|
||||||
|
|
||||||
|
|
||||||
|
def _decide(host: str, sni: str, *, seed, learned_splice, never,
|
||||||
|
self_regs) -> str:
|
||||||
|
"""Mirror Go's Policy.Decide precedence EXACTLY.
|
||||||
|
|
||||||
|
1. own-infra / allowlist (ad_ghost._allowed) → "allow"
|
||||||
|
2. splice never-set check, then seed/learned (splice.should_splice) → "splice"
|
||||||
|
3. _AD_HOST match OR registrable/host in learned-trackers → "block"
|
||||||
|
4. otherwise → "mitm"
|
||||||
|
"""
|
||||||
|
# 1. allowlist + own-infra ALWAYS win first.
|
||||||
|
if ad_ghost._allowed(host):
|
||||||
|
return "allow"
|
||||||
|
# 2. splice (TLS layer runs first; never-set already excludes trackers).
|
||||||
|
if splice.should_splice(sni or host, seed, learned_splice, never):
|
||||||
|
return "splice"
|
||||||
|
# 3. ad_ghost block decision (request layer).
|
||||||
|
blocked = bool(ad_ghost._AD_HOST.search(host))
|
||||||
|
if not blocked:
|
||||||
|
reg = ad_ghost._registrable(host)
|
||||||
|
ls = ad_ghost._learned_set()
|
||||||
|
if (reg and reg in ls) or host.lower() in ls:
|
||||||
|
blocked = True
|
||||||
|
if blocked:
|
||||||
|
return "block"
|
||||||
|
# 4.
|
||||||
|
return "mitm"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def parity_env(monkeypatch):
|
||||||
|
"""Point the Python addon decision logic at the SAME testdata snapshot the
|
||||||
|
Go core loads, and load the splice sets the same way the addon does."""
|
||||||
|
data = _load_fixtures()
|
||||||
|
cfg = data["config"]
|
||||||
|
|
||||||
|
# ad_ghost: allowlist + learned-trackers paths, self-domains, fresh caches.
|
||||||
|
monkeypatch.setattr(ad_ghost, "_ALLOW_PATH", _cfg_path(cfg["ad_allowlist"]))
|
||||||
|
monkeypatch.setattr(ad_ghost, "_LEARNED_PATH", _cfg_path(cfg["learned_trackers"]))
|
||||||
|
monkeypatch.setattr(ad_ghost, "_SELF_REGS",
|
||||||
|
{d.strip().lower() for d in cfg["self_domains"] if d.strip()})
|
||||||
|
# reset module-level caches so the monkeypatched paths are (re)read.
|
||||||
|
monkeypatch.setattr(ad_ghost, "_allow", set())
|
||||||
|
monkeypatch.setattr(ad_ghost, "_allow_mtime", 0.0)
|
||||||
|
monkeypatch.setattr(ad_ghost, "_learned", set())
|
||||||
|
monkeypatch.setattr(ad_ghost, "_learned_mtime", 0.0)
|
||||||
|
monkeypatch.setattr(ad_ghost, "_learned_check", 0.0) # bypass the 60s cache
|
||||||
|
|
||||||
|
# splice: load seed/learned the addon way; never = pure-trackers ∪ fortknox.
|
||||||
|
seed = splice.load_splice_seed(_cfg_path(cfg["splice_seed"]))
|
||||||
|
learned_splice = splice.load_learned_splice(_cfg_path(cfg["splice_learned"]))
|
||||||
|
never = splice.load_learned_splice(_cfg_path(cfg["pure_trackers"]))
|
||||||
|
for s in cfg.get("fortknox_sites", []) or []:
|
||||||
|
never.add(str(s).lower().strip("."))
|
||||||
|
|
||||||
|
return {
|
||||||
|
"fixtures": data["fixtures"],
|
||||||
|
"seed": seed,
|
||||||
|
"learned_splice": learned_splice,
|
||||||
|
"never": never,
|
||||||
|
"self_regs": ad_ghost._SELF_REGS,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_parity_decide(parity_env):
|
||||||
|
seed = parity_env["seed"]
|
||||||
|
learned_splice = parity_env["learned_splice"]
|
||||||
|
never = parity_env["never"]
|
||||||
|
self_regs = parity_env["self_regs"]
|
||||||
|
|
||||||
|
failures = []
|
||||||
|
for fx in parity_env["fixtures"]:
|
||||||
|
host = fx["host"]
|
||||||
|
got = _decide(host, host, seed=seed, learned_splice=learned_splice,
|
||||||
|
never=never, self_regs=self_regs)
|
||||||
|
if got != fx["expect"]:
|
||||||
|
failures.append(
|
||||||
|
f"Decide({host!r})={got!r} want {fx['expect']!r} ({fx.get('why')})")
|
||||||
|
assert not failures, "Python↔fixture parity mismatches:\n" + "\n".join(failures)
|
||||||
|
|
||||||
|
|
||||||
|
def test_fixtures_present(parity_env):
|
||||||
|
# Guard: the fixture set must cover every action class, else "parity" is
|
||||||
|
# vacuously true for a missing branch.
|
||||||
|
actions = {fx["expect"] for fx in parity_env["fixtures"]}
|
||||||
|
assert actions == {"allow", "block", "splice", "mitm"}, actions
|
||||||
97
packages/secubox-toolbox/tests/test_jar_parity.py
Normal file
97
packages/secubox-toolbox/tests/test_jar_parity.py
Normal file
|
|
@ -0,0 +1,97 @@
|
||||||
|
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||||
|
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||||
|
"""Cross-engine JAR parity harness — Python side (#662 Phase 4).
|
||||||
|
|
||||||
|
Loads the SAME ``jar-fixtures.json`` + fixed test key the Go core uses
|
||||||
|
(``../secubox-toolbox-ng/testdata``), points ``privacy.JAR_KEY_PATH`` at the
|
||||||
|
test key (NOT the real ``/etc/secubox/secrets/privacy-jar.key``), resets the
|
||||||
|
jar-key cache, and asserts ``privacy.fake_id`` == each fixture's ``expect``.
|
||||||
|
|
||||||
|
Python is the source of truth: the ``expect`` values were GENERATED by this
|
||||||
|
very ``privacy.fake_id`` with the test key. The Go side (jar_test.go) must
|
||||||
|
reproduce them byte-for-byte. Both files reading identical inputs is what makes
|
||||||
|
the parity meaningful.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from secubox_toolbox import privacy
|
||||||
|
|
||||||
|
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
# tests/ → packages/secubox-toolbox → packages → packages/secubox-toolbox-ng
|
||||||
|
_NG_TESTDATA = os.path.normpath(
|
||||||
|
os.path.join(_HERE, "..", "..", "secubox-toolbox-ng", "testdata"))
|
||||||
|
_FIXTURES = os.path.join(_NG_TESTDATA, "jar-fixtures.json")
|
||||||
|
|
||||||
|
|
||||||
|
def _load():
|
||||||
|
with open(_FIXTURES, encoding="utf-8") as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def jar_env(monkeypatch):
|
||||||
|
"""Point privacy at the test key file and reset the cache so the override
|
||||||
|
is (re)read. Mirrors exactly the (path, cache) surface the Go loadJarKey
|
||||||
|
reads."""
|
||||||
|
data = _load()
|
||||||
|
key_path = os.path.join(_NG_TESTDATA, data["key_file"].replace("/", os.sep))
|
||||||
|
monkeypatch.setattr(privacy, "JAR_KEY_PATH", key_path)
|
||||||
|
monkeypatch.setattr(privacy, "_jar_key_cache", {"v": None})
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def test_jar_key_loads_canonical(jar_env):
|
||||||
|
# _jar_key() must strip the file's surrounding whitespace back to the
|
||||||
|
# canonical key declared in key_hex (proves .strip() parity with TrimSpace).
|
||||||
|
key = privacy._jar_key()
|
||||||
|
assert key is not None
|
||||||
|
assert key.hex() == jar_env["key_hex"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_jar_parity(jar_env):
|
||||||
|
failures = []
|
||||||
|
for fx in jar_env["fixtures"]:
|
||||||
|
got = privacy.fake_id(fx["client"], fx["tracker"], fx["cookie_name"])
|
||||||
|
if got != fx["expect"]:
|
||||||
|
failures.append(
|
||||||
|
f"fake_id({fx['client']!r},{fx['tracker']!r},{fx['cookie_name']!r})"
|
||||||
|
f"={got!r} want {fx['expect']!r} ({fx.get('why')})")
|
||||||
|
assert not failures, "Python↔fixture jar parity mismatches:\n" + "\n".join(failures)
|
||||||
|
|
||||||
|
|
||||||
|
def test_jar_shapes_covered(jar_env):
|
||||||
|
# Every _shape branch must appear, else parity is vacuous for that branch.
|
||||||
|
shapes = set()
|
||||||
|
for fx in jar_env["fixtures"]:
|
||||||
|
e = fx["expect"]
|
||||||
|
if e.startswith("GA1."):
|
||||||
|
shapes.add("ga")
|
||||||
|
elif e.startswith("fb."):
|
||||||
|
shapes.add("fb")
|
||||||
|
elif len(e) == 36 and e[8] == "-":
|
||||||
|
shapes.add("uuid")
|
||||||
|
elif len(e) == 32:
|
||||||
|
shapes.add("hex")
|
||||||
|
assert shapes == {"ga", "fb", "uuid", "hex"}, shapes
|
||||||
|
|
||||||
|
|
||||||
|
def test_jar_folding(jar_env):
|
||||||
|
# Two subdomains of the same registrable tracker fold to the SAME fake id.
|
||||||
|
a = privacy.fake_id("foldclient", "px.doubleclick.net", "uid")
|
||||||
|
b = privacy.fake_id("foldclient", "ads.doubleclick.net", "uid")
|
||||||
|
assert a is not None and a == b
|
||||||
|
|
||||||
|
|
||||||
|
def test_jar_none_cases(jar_env):
|
||||||
|
# fake_id returns None exactly where Go fakeID returns ("", False).
|
||||||
|
assert privacy.fake_id("", "t.example", "uid") is None # empty client
|
||||||
|
assert privacy.fake_id("c", "", "uid") is None # empty tracker
|
||||||
|
# empty key → None
|
||||||
|
monkeypatched_empty = {"v": b""}
|
||||||
|
object.__setattr__(privacy, "_jar_key_cache", monkeypatched_empty)
|
||||||
|
assert privacy.fake_id("c", "t.example", "uid") is None
|
||||||
Loading…
Reference in New Issue
Block a user