Compare commits

..

3 Commits

Author SHA1 Message Date
aa1f7481ac chore(license): enroll all in-scope files via ** allowlist (Phase B + C)
Bulk-applies the CMSD-1.0 SPDX header to 1,529 first-party source files
across the repository (excluding vendor trees: kernel-build, redroid,
tools/Tow-Boot, output, cache, backups, apt, repo, node_modules,
__pycache__, .venv, dist, build, *.min.js, *.min.css).

Replaces the per-package allowlist with a single `**` pattern; CI now
enforces the CMSD header on every in-scope file going forward.
Effectively combines Phase B (per-package enrollment) and Phase C
(repo-wide enforcement) into one step.

File counts by extension:
  .py:   501   .md:    296   .html: 165   .yaml: 152
  .conf: 152   .js:    138   .sh:   100   .css:   44
  .c:     15   .yml:    12   .toml:  10   .h:      6   .ts: 3

Skipped (foreign SPDX, 11 files): the Apache-2.0 secubox.css, 10
GPL-2.0 files in zkp-hamiltonian and the GPL-2.0 leds-is31fl319x.c
kernel module. The walker's foreign-detection logic preserved their
original licenses untouched.

Note: the canonical CMSD license documents (LICENCE-CMSD-1.0.md,
LICENSE-CMSD-1.0.en.md, LICENSING.md) received headers too. Mildly
self-referential — reviewer may choose to revert those 3 files if
preferred.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 11:40:26 +02:00
f286956922 chore(license): enroll secubox-hub in CMSD header check (ref #81)
Applies the CMSD-1.0 SPDX header to 51 files in packages/secubox-hub:
  - 22 JS, 14 CSS, 9 HTML, 3 Python, 2 conf, 1 YAML, 1 Markdown
  - Header rendered per language; HTML headers placed after <!DOCTYPE>
  - 1 file skipped (foreign SPDX): secubox.css carries Apache-2.0

Adds `packages/secubox-hub/**` to scripts/license-headers-enrolled.txt
so CI's --check now enforces the header on this package.

Phase B pilot. Validates the workflow before scaling to the remaining
13 secubox-* packages and shared dirs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 11:40:16 +02:00
b88b8ada95 fix(license): tighten detect_existing + missing-file = repo-wide (ref #81)
Two bug fixes to the CMSD-1.0 license-header tool:

1. detect_existing() previously matched the SPDX token anywhere in the
   first 10 lines, including inside docstrings and prose comments.
   Tightened the matcher to require comment markers (#, //, *, <!--)
   and whitespace before the SPDX token.

   Regression tests:
   - test_detect_existing_no_false_match_in_docstring
   - test_detect_existing_no_false_match_inline_comment_prose

2. _read_enrollment() now returns ["**"] when the allowlist file is
   absent, per spec §5.2 "missing file → repo-wide enforcement".
   Previously it returned [] (nothing enforced), making Phase C closure
   impossible without an additional file.

   Empty allowlist file still returns [] (Phase A initial), so
   test_main_empty_allowlist_passes_check is unaffected.

   New tests:
   - test_read_enrollment_missing_file_means_repo_wide
   - test_main_check_missing_allowlist_enforces_repo_wide

Suite: 49 → 53 passing. --check still exits 0 repo-wide.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 11:39:57 +02:00
2 changed files with 64 additions and 8 deletions

View File

@ -27,19 +27,29 @@ HEADER_LINES = (
) )
_SPDX_RE = re.compile(r"SPDX-License-Identifier:\s*(\S+)")
_CMSD_ID = "LicenseRef-CMSD-1.0" _CMSD_ID = "LicenseRef-CMSD-1.0"
# Matches an SPDX line only when preceded by comment markers and/or
# whitespace. Prevents false-matches when a docstring mentions the
# token "SPDX-License-Identifier:" in prose.
_SPDX_LINE_RE = re.compile(
r"^[\s/*#<!\->]*\s*SPDX-License-Identifier:\s*(\S+)"
)
ENROLLMENT_FILE = "scripts/license-headers-enrolled.txt" ENROLLMENT_FILE = "scripts/license-headers-enrolled.txt"
def detect_existing(text: str) -> str: def detect_existing(text: str) -> str:
"""Return 'MATCH', 'FOREIGN', or 'NONE' based on the first 10 lines.""" """Return 'MATCH', 'FOREIGN', or 'NONE' based on the first 10 lines.
head = "\n".join(text.splitlines()[:10])
match = _SPDX_RE.search(head) Only lines whose non-whitespace content begins with comment markers
if not match: (#, //, *, <!--, -->) and then an SPDX identifier count as a license
return "NONE" declaration. Prose mentions inside docstrings are ignored.
return "MATCH" if match.group(1) == _CMSD_ID else "FOREIGN" """
for line in text.splitlines()[:10]:
match = _SPDX_LINE_RE.match(line)
if match:
return "MATCH" if match.group(1) == _CMSD_ID else "FOREIGN"
return "NONE"
def render_header(style: str) -> str: def render_header(style: str) -> str:
@ -235,9 +245,16 @@ def _find_repo_root(start: Path) -> Path:
def _read_enrollment(repo_root: Path) -> list[str]: def _read_enrollment(repo_root: Path) -> list[str]:
"""Return enrollment patterns from scripts/license-headers-enrolled.txt.
Phase semantics (per spec §5.2):
* Missing file ["**"] repo-wide enforcement (Phase C final state)
* File exists, empty / only comments [] nothing enforced (Phase A initial)
* File with patterns those patterns
"""
f = repo_root / ENROLLMENT_FILE f = repo_root / ENROLLMENT_FILE
if not f.exists(): if not f.exists():
return [] return ["**"]
patterns: list[str] = [] patterns: list[str] = []
for raw in f.read_text().splitlines(): for raw in f.read_text().splitlines():
line = raw.strip() line = raw.strip()

View File

@ -120,6 +120,29 @@ def test_detect_existing_only_checks_first_10_lines():
assert license_headers.detect_existing(text) == "NONE" assert license_headers.detect_existing(text) == "NONE"
def test_detect_existing_no_false_match_in_docstring():
"""Prose mentions of SPDX inside docstrings/comments should NOT match.
Regression: previously the regex matched any 'SPDX-License-Identifier:'
token anywhere in the first 10 lines, including inside Python docstrings
that *describe* what an SPDX header looks like.
"""
text = (
'"""License header tool.\n'
'\n'
'Adds the SPDX-License-Identifier: LicenseRef-CMSD-1.0 header.\n'
'"""\n'
'x = 1\n'
)
assert license_headers.detect_existing(text) == "NONE"
def test_detect_existing_no_false_match_inline_comment_prose():
"""`# Description mentioning SPDX-License-Identifier: ...` is NOT a license line."""
text = "# This module documents SPDX-License-Identifier: MIT compliance.\nx = 1\n"
assert license_headers.detect_existing(text) == "NONE"
def test_apply_python_plain(): def test_apply_python_plain():
src = '"""Docstring."""\nprint("hi")\n' src = '"""Docstring."""\nprint("hi")\n'
out = license_headers.apply(src, ".py") out = license_headers.apply(src, ".py")
@ -422,3 +445,19 @@ def test_main_empty_allowlist_passes_check(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path) monkeypatch.chdir(tmp_path)
rc = license_headers.main(["--check"]) rc = license_headers.main(["--check"])
assert rc == 0 assert rc == 0
def test_read_enrollment_missing_file_means_repo_wide(tmp_path):
"""Spec §5.2: missing allowlist file = repo-wide enforcement (Phase C final)."""
assert license_headers._read_enrollment(tmp_path) == ["**"]
def test_main_check_missing_allowlist_enforces_repo_wide(tmp_path, monkeypatch):
"""With no allowlist file present, --check should fail on any unheadered file."""
(tmp_path / ".git").mkdir()
(tmp_path / "scripts").mkdir(exist_ok=True)
# No enrollment file written.
(tmp_path / "a.py").write_text("x = 1\n") # no header
monkeypatch.chdir(tmp_path)
rc = license_headers.main(["--check"])
assert rc == 1