refactor: complete bootstrap of ARNES agent harness framework

- Add complete agent harness structure with 8 roles (leader, triager, architect, implementer, reviewer, security, qa, documenter) - Implement strict workflow with 9 stages and mandatory gates - Add comprehensive verification script and runtime status tracking - Create artifact-based evidence system with contracts and schemas - Add agent policy matrix with permissions and anti-cheat rules - Include test suite (44 tests passing) and CI-ready structure - Add documentation: README, HOWTO, CHECKPOINTS, templates - Configure model routing policies and token-aware task assignment - Add BDD/SDD specification guides and feature templates - Include starter pack for quick project onboarding All verification checks pass. Framework ready for production use.
2026-05-17 23:25:35 +02:00
parent 622e5df382
commit 3ff9b70e4c
104 changed files with 8534 additions and 187 deletions
--- a/harness/agents.matrix.yml
+++ b/harness/agents.matrix.yml
@@ -2,15 +2,27 @@ version: 1

 roles:
  leader:
-    can_edit: ["work/", "backlog/", "spec/", "harness/"]
+    emoji: "🧭"
+    can_edit: ["work/", "backlog/", "spec/", "harness/", "AGENTS.md", "CHECKPOINTS.md"]
    cannot_edit: ["src/", "tests/"]
    responsibilities:
      - plan
      - orchestrate
      - enforce_gates
      - close_feature
+      - issue_orders_in_english_caveman
+
+  triager:
+    emoji: "🧩"
+    can_edit: ["backlog/", "work/artifacts/", "spec/"]
+    cannot_edit: ["src/", "tests/", "backlog/features.json:status=done"]
+    responsibilities:
+      - normalize_requests
+      - create_tickets_in_english_caveman
+      - define_scope_acceptance

  architect:
+    emoji: "🏗️"
    can_edit: ["spec/", "harness/contracts/", "docs/"]
    cannot_edit: ["src/", "tests/", "backlog/features.json:status"]
    responsibilities:
@@ -18,6 +30,7 @@ roles:
      - update_contracts

  implementer:
+    emoji: "🛠️"
    can_edit: ["src/", "tests/", "work/artifacts/"]
    cannot_edit:
      - "backlog/features.json:done"
@@ -32,6 +45,7 @@ roles:
      - produce_implementer_evidence

  reviewer:
+    emoji: "🔍"
    can_edit: ["work/artifacts/"]
    cannot_edit: ["src/", "tests/", "backlog/"]
    responsibilities:
@@ -39,6 +53,7 @@ roles:
      - emit_reviewer_verdict

  security:
+    emoji: "🔒"
    can_edit: ["work/artifacts/"]
    cannot_edit: ["src/", "tests/", "backlog/"]
    responsibilities:
@@ -48,6 +63,7 @@ roles:
      - emit_security_verdict

  qa:
+    emoji: "🧪"
    can_edit: ["work/artifacts/"]
    cannot_edit: ["src/", "tests/", "backlog/"]
    responsibilities:
@@ -56,8 +72,18 @@ roles:
      - regression_checks
      - emit_qa_verdict

+  documenter:
+    emoji: "📚"
+    can_edit: ["docs/", "spec/", "README.md", "HOWTO.md", "work/artifacts/"]
+    cannot_edit: ["src/", "tests/", "backlog/features.json:status"]
+    responsibilities:
+      - document_feature_changes
+      - update_user_docs
+      - emit_documenter_summary
+
 anti_cheat:
  - "Implementer cannot promote feature to done"
  - "Done requires reviewer/security/qa approved artifacts"
+  - "Done requires documenter evidence"
  - "Leader close requires verify.sh success"
  - "Evidence must be on disk; chat-only claims are invalid"
--- a/harness/models.profiles.yml
+++ b/harness/models.profiles.yml
@@ -0,0 +1,51 @@
+version: 1
+
+policy:
+  goal: "Use smallest model that can do task well"
+  fallback_order: ["tiny", "small", "medium", "large"]
+
+profiles:
+  tiny:
+    use_for:
+      - status updates
+      - file moves
+      - boilerplate JSON
+      - simple docs formatting
+  small:
+    use_for:
+      - triage ticket drafting
+      - reviewer/security/qa short verdicts
+      - changelog/doc updates
+      - refactors with low logic risk
+  medium:
+    use_for:
+      - architecture decisions
+      - non-trivial implementation
+      - multi-file integration changes
+  large:
+    use_for:
+      - complex debugging
+      - deep root-cause analysis
+      - migrations with high risk
+      - ambiguous requirements
+
+role_defaults:
+  leader: small
+  triager: small
+  architect: medium
+  implementer: medium
+  reviewer: small
+  security: small
+  qa: small
+  documenter: tiny
+
+stage_overrides:
+  triage_translate: small
+  intake: small
+  design: medium
+  build: medium
+  review_gate: small
+  security_gate: small
+  qa_gate: small
+  documentation_gate: tiny
+  close: small
--- a/harness/policies/language.md
+++ b/harness/policies/language.md
@@ -0,0 +1,22 @@
+# Policy: Language and style
+
+## Internal language
+- Internal artifacts, tickets, and leader orders must be in **English**.
+- User chat can be in any language.
+
+## Style mode: Caveman English
+- Short words.
+- Short lines.
+- One idea per line.
+- No fluff.
+- No long intros.
+- Prefer bullets.
+
+## Ticket writing rules
+- Title: 4–10 words.
+- Acceptance: 3–6 bullets max.
+- Keep scope explicit (in/out).
+- Use active verbs: Fix, Add, Move, Remove, Validate.
+
+## Runtime action rules
+- `agent_status.action` should be concise (<= 60 chars).
--- a/harness/policies/model-routing.md
+++ b/harness/policies/model-routing.md
@@ -0,0 +1,24 @@
+# Policy: Model routing
+
+Use model by task complexity, not by habit.
+
+## Core rule
+- Start small.
+- Escalate only when blocked or quality poor.
+
+## Escalation triggers
+- Repeated failed attempts.
+- Ambiguous requirements.
+- Cross-module side effects.
+- Security-critical code paths.
+
+## De-escalation triggers
+- Routine CRUD edits.
+- Mechanical refactors.
+- Artifact writing.
+- Status/timeline updates.
+
+## Required behavior
+- Record chosen model class in artifact header when work is non-trivial.
+- Keep outputs concise to reduce token burn.
+- If `harness/project.config.json` has `model_mode=lean`, prefer tiny/small whenever possible.
--- a/harness/workflow.stages.yml
+++ b/harness/workflow.stages.yml
@@ -4,6 +4,15 @@ feature_states:
  allowed: [pending, in_progress, blocked, done]

 stages:
+  - name: triage_translate
+    owner: leader
+    optional: true
+    input:
+      - backlog/features.json
+      - work/current.md
+    output:
+      - work/artifacts/<feature_id>/triage.md
+
  - name: intake
    owner: leader
    input:
@@ -41,6 +50,12 @@ stages:
    output:
      - work/artifacts/<feature_id>/qa.json

+  - name: documentation_gate
+    owner: documenter
+    required: true
+    output:
+      - work/artifacts/<feature_id>/documenter.md
+
  - name: close
    owner: leader
    required: true
@@ -52,4 +67,5 @@ close_requirements:
  - reviewer.json.verdict == "APPROVED"
  - security.json.verdict == "APPROVED"
  - qa.json.verdict == "APPROVED"
+  - documenter.md exists
  - scripts/verify.sh exit_code == 0