refactor: complete bootstrap of ARNES agent harness framework

- Add complete agent harness structure with 8 roles (leader, triager, architect, implementer, reviewer, security, qa, documenter)
- Implement strict workflow with 9 stages and mandatory gates
- Add comprehensive verification script and runtime status tracking
- Create artifact-based evidence system with contracts and schemas
- Add agent policy matrix with permissions and anti-cheat rules
- Include test suite (44 tests passing) and CI-ready structure
- Add documentation: README, HOWTO, CHECKPOINTS, templates
- Configure model routing policies and token-aware task assignment
- Add BDD/SDD specification guides and feature templates
- Include starter pack for quick project onboarding

All verification checks pass. Framework ready for production use.
This commit is contained in:
rikrdo
2026-05-17 23:25:35 +02:00
parent 622e5df382
commit 3ff9b70e4c
104 changed files with 8534 additions and 187 deletions

238
scripts/agent_status.py Executable file
View File

@@ -0,0 +1,238 @@
#!/usr/bin/env python3
import argparse
import json
import re
from datetime import datetime, timezone
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
STATUS_PATH = ROOT / 'work' / 'runtime-status.json'
MATRIX_PATH = ROOT / 'harness' / 'agents.matrix.yml'
ARTIFACTS_DIR = ROOT / 'work' / 'artifacts'
DEFAULT_EMOJIS = {
'leader': '🧭',
'triager': '🧩',
'architect': '🏗️',
'implementer': '🛠️',
'reviewer': '🔍',
'security': '🔒',
'qa': '🧪',
'documenter': '📚',
}
GATE_FILES = {
'reviewer': 'reviewer.json',
'security': 'security.json',
'qa': 'qa.json',
'documenter': 'documenter.md',
'leader': 'leader-close.json',
}
def now_iso():
return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace('+00:00', 'Z')
def load_json(path: Path, default=None):
if not path.exists():
return default
return json.loads(path.read_text(encoding='utf-8'))
def save_json(path: Path, payload):
path.write_text(json.dumps(payload, indent=2, ensure_ascii=False) + '\n', encoding='utf-8')
def load_role_emojis():
emojis = dict(DEFAULT_EMOJIS)
if not MATRIX_PATH.exists():
return emojis
current_role = None
for line in MATRIX_PATH.read_text(encoding='utf-8').splitlines():
match_role = re.match(r'^ ([a-z_]+):\s*$', line)
if match_role:
current_role = match_role.group(1)
continue
match_emoji = re.match(r'^\s{4}emoji:\s*["\']?(.*?)["\']?\s*$', line)
if match_emoji and current_role:
emojis[current_role] = match_emoji.group(1)
return emojis
def default_status():
return {
'feature_id': None,
'stage': 'idle',
'agent': 'leader',
'action': 'Sin ejecución activa',
'state': 'waiting',
'next_agent': 'leader',
'waiting_for': 'Seleccionar una feature pending y actualizar este estado',
'updated_at': now_iso(),
'timeline': [],
}
def load_status():
status = load_json(STATUS_PATH, default_status())
base = default_status()
for key, value in base.items():
status.setdefault(key, value)
if not isinstance(status.get('timeline'), list):
status['timeline'] = []
return status
def gate_status(feature_id):
gates = {}
if not feature_id:
return gates
feature_dir = ARTIFACTS_DIR / feature_id
for gate, filename in GATE_FILES.items():
path = feature_dir / filename
if not path.exists():
gates[gate] = 'pending'
continue
if gate == 'documenter':
gates[gate] = 'approved'
continue
try:
payload = json.loads(path.read_text(encoding='utf-8'))
gates[gate] = 'approved' if payload.get('verdict') == 'APPROVED' else 'present'
except Exception:
gates[gate] = 'invalid'
return gates
def render_gate(gate, state, emojis):
icon = {
'approved': '',
'pending': '',
'present': '⚠️',
'invalid': '',
}.get(state, '')
label = {
'leader': 'close',
'documenter': 'docs',
}.get(gate, gate)
return f"{icon} {emojis.get(gate, '')} {label}: {state.upper()}"
def show_status():
status = load_status()
emojis = load_role_emojis()
feature_id = status.get('feature_id') or ''
current_agent = status.get('agent', 'leader')
next_agent = status.get('next_agent') or ''
gates = gate_status(status.get('feature_id'))
print('╔══════════════════════════════════════════════════════════════╗')
print('║ ARNES · Runtime Status ║')
print('╚══════════════════════════════════════════════════════════════╝')
print(f"Feature activa : {feature_id}")
print(f"Stage actual : {status.get('stage', '')}")
print(f"Agente actual : {emojis.get(current_agent, '')} {current_agent}")
print(f"Acción : {status.get('action', '')}")
print(f"Estado : {status.get('state', '')}")
print(f"Siguiente : {emojis.get(next_agent, '')} {next_agent}")
print(f"Esperando : {status.get('waiting_for', '')}")
print(f"Actualizado : {status.get('updated_at', '')}")
print()
print('Gates')
if gates:
for gate in ['reviewer', 'security', 'qa', 'documenter', 'leader']:
print(f" {render_gate(gate, gates.get(gate, 'pending'), emojis)}")
else:
print(' — Sin feature activa —')
print()
print('Timeline')
timeline = status.get('timeline', [])[-8:]
if not timeline:
print(' — Sin eventos —')
return
for item in timeline:
agent = item.get('agent', 'leader')
emoji = emojis.get(agent, '')
ts = item.get('ts', '')
stage = item.get('stage', '')
state = item.get('state', '')
message = item.get('message', '')
print(f" - {ts} · {emoji} {agent} · {stage} · {state} · {message}")
def set_status(args):
status = load_status()
if args.feature_id is not None:
status['feature_id'] = args.feature_id or None
if args.stage is not None:
status['stage'] = args.stage
if args.agent is not None:
status['agent'] = args.agent
if args.action is not None:
status['action'] = args.action
if args.state is not None:
status['state'] = args.state
if args.next_agent is not None:
status['next_agent'] = args.next_agent
if args.waiting_for is not None:
status['waiting_for'] = args.waiting_for
status['updated_at'] = now_iso()
event_message = args.note or status.get('action') or 'Estado actualizado'
status['timeline'].append({
'ts': status['updated_at'],
'agent': status.get('agent', 'leader'),
'stage': status.get('stage', ''),
'state': status.get('state', ''),
'message': event_message,
})
status['timeline'] = status['timeline'][-20:]
save_json(STATUS_PATH, status)
show_status()
def reset_status(_args):
status = default_status()
status['updated_at'] = now_iso()
save_json(STATUS_PATH, status)
show_status()
def build_parser():
parser = argparse.ArgumentParser(description='Renderiza y actualiza el estado visible de ARNES.')
sub = parser.add_subparsers(dest='command', required=True)
sub.add_parser('show', help='Muestra el panel visible de estado')
set_parser = sub.add_parser('set', help='Actualiza el estado runtime y añade evento a timeline')
set_parser.add_argument('--feature-id')
set_parser.add_argument('--stage')
set_parser.add_argument('--agent')
set_parser.add_argument('--action')
set_parser.add_argument('--state')
set_parser.add_argument('--next-agent')
set_parser.add_argument('--waiting-for')
set_parser.add_argument('--note')
sub.add_parser('reset', help='Resetea el estado runtime a idle')
return parser
def main():
parser = build_parser()
args = parser.parse_args()
if args.command == 'show':
show_status()
elif args.command == 'set':
set_status(args)
elif args.command == 'reset':
reset_status(args)
else:
parser.print_help()
return 1
return 0
if __name__ == '__main__':
raise SystemExit(main())

78
scripts/new_ticket.py Executable file
View File

@@ -0,0 +1,78 @@
#!/usr/bin/env python3
import json
from datetime import date
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
BACKLOG = ROOT / 'backlog' / 'features.json'
def ask(prompt, default=''):
value = input(f"{prompt}{' [' + default + ']' if default else ''}: ").strip()
return value if value else default
def next_id(features):
nums = []
for f in features:
fid = str(f.get('id', ''))
if fid.startswith('F-') and fid[2:].isdigit():
nums.append(int(fid[2:]))
return f"F-{(max(nums) + 1) if nums else 1:03d}"
def main():
data = json.loads(BACKLOG.read_text(encoding='utf-8'))
features = data.get('features', [])
print('Create ticket (English caveman style).')
ttype = ask('Type (feature/fix/bug/chore)', 'feature')
title = ask('Title (short EN)', f'{ttype.capitalize()} TODO')
problem = ask('Problem (short EN)', 'Need change')
goal = ask('Goal (short EN)', 'Make flow better')
scope_in = ask('Scope IN (comma list EN)', 'Core flow')
scope_out = ask('Scope OUT (comma list EN)', 'No redesign')
risk = ask('Risk (low/med/high)', 'low')
priority = ask('Priority (low/med/high)', 'med')
print('Acceptance bullets (EN caveman). Empty line to end.')
acceptance = []
while True:
line = input('- ').strip()
if not line:
break
acceptance.append(line)
if not acceptance:
acceptance = [
'Flow works end to end',
'No break old behavior',
'verify.sh is green'
]
fid = next_id(features)
desc = (
f"Problem: {problem}. "
f"Goal: {goal}. "
f"Scope IN: {scope_in}. "
f"Scope OUT: {scope_out}. "
f"Type: {ttype}. Priority: {priority}. Risk: {risk}."
)
features.append({
'id': fid,
'title': title,
'description': desc,
'acceptance': acceptance,
'status': 'pending',
'created_at': str(date.today()),
'gates': {'review': False, 'security': False, 'qa': False}
})
data['features'] = features
BACKLOG.write_text(json.dumps(data, indent=2, ensure_ascii=False) + '\n', encoding='utf-8')
print(f'Created {fid}: {title}')
if __name__ == '__main__':
main()

36
scripts/run.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/bin/bash
# Script para arrancar el servidor ARNES UI API
set -e
cd "$(dirname "$0")"
# Configuración
PORT=${1:-8000}
HOST="0.0.0.0"
# Colores
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} ARNES API - Starting...${NC}"
echo -e "${GREEN}========================================${NC}"
echo ""
echo -e " URL: ${YELLOW}http://localhost:${PORT}/ui/login.html${NC}"
echo -e " Host: ${YELLOW}${HOST}:${PORT}${NC}"
echo ""
echo -e " Credenciales de prueba:"
echo -e " Email: ${YELLOW}alice@example.com${NC}"
echo -e " Password: ${YELLOW}SecurePass123!${NC}"
echo ""
# Instalar dependencias si falta
if ! python3 -c "import fastapi" 2>/dev/null; then
echo -e "${YELLOW}Instalando dependencias...${NC}"
pip3 install -q fastapi uvicorn pydantic PyJWT bcrypt httpx
fi
# Arrancar servidor
exec python3 -m uvicorn src.main:app --host "$HOST" --port "$PORT" --reload

173
scripts/start.sh Executable file
View File

@@ -0,0 +1,173 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"
ask() {
local prompt="$1"; local def="${2:-}"; local val
if [ -n "$def" ]; then
read -r -p "$prompt [$def]: " val || true
echo "${val:-$def}"
else
read -r -p "$prompt: " val || true
echo "$val"
fi
}
echo "=== ARNES start wizard ==="
echo "Mode: clone arnes-fork, put your app folder inside, run this wizard."
PROJECT_NAME="$(ask 'Project name' 'my-project')"
PROJECT_DESC="$(ask 'Project description' 'Project using ARNES template')"
APP_DIR="$(ask 'App directory (relative)' 'app')"
STACK_CHOICE="$(ask 'Stack preset (1=default Flask+MariaDB+Skeleton, 2=custom)' '1')"
if [ "$STACK_CHOICE" = "2" ]; then
BACKEND="$(ask 'Backend stack' 'python/flask')"
DB="$(ask 'Database' 'mariadb')"
CSSFW="$(ask 'CSS framework' 'skeleton')"
else
BACKEND="python/flask"
DB="mariadb"
CSSFW="skeleton"
fi
TEST_CMD="$(ask 'Test command' 'make test')"
LINT_CMD="$(ask 'Lint command (optional)' '')"
MODEL_MODE="$(ask 'Model mode (lean/balanced/power)' 'lean')"
ADD_BOOTSTRAP="$(ask 'Create bootstrap ticket F-001 now? (y/n)' 'y')"
mkdir -p "$APP_DIR"
if [ "$CSSFW" = "skeleton" ]; then
mkdir -p "$APP_DIR/static/css" "$APP_DIR/static/images"
cp -n defaults/flask-skeleton/static/css/normalize.css "$APP_DIR/static/css/normalize.css" || true
cp -n defaults/flask-skeleton/static/css/skeleton.css "$APP_DIR/static/css/skeleton.css" || true
cp -n defaults/flask-skeleton/static/images/favicon.png "$APP_DIR/static/images/favicon.png" || true
fi
cat > harness/project.config.json <<JSON
{
"project_name": "$PROJECT_NAME",
"project_description": "$PROJECT_DESC",
"app_dir": "$APP_DIR",
"stack": {
"backend": "$BACKEND",
"database": "$DB",
"css": "$CSSFW"
},
"commands": {
"test": "$TEST_CMD",
"lint": "$LINT_CMD"
},
"model_mode": "$MODEL_MODE"
}
JSON
cat > scripts/verify.local.sh <<'SH'
#!/usr/bin/env bash
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
cd "$ROOT"
if [ ! -f "harness/project.config.json" ]; then
echo "[LOCAL] missing harness/project.config.json"
exit 1
fi
APP_DIR=$(python3 - <<'PY'
import json
from pathlib import Path
cfg=json.loads(Path('harness/project.config.json').read_text())
print(cfg.get('app_dir','app'))
PY
)
TEST_CMD=$(python3 - <<'PY'
import json
from pathlib import Path
cfg=json.loads(Path('harness/project.config.json').read_text())
print(cfg.get('commands',{}).get('test',''))
PY
)
LINT_CMD=$(python3 - <<'PY'
import json
from pathlib import Path
cfg=json.loads(Path('harness/project.config.json').read_text())
print(cfg.get('commands',{}).get('lint',''))
PY
)
if [ ! -d "$APP_DIR" ]; then
echo "[LOCAL] app dir not found: $APP_DIR"
exit 1
fi
echo "[LOCAL] app dir OK: $APP_DIR"
if [ -n "$LINT_CMD" ]; then
echo "[LOCAL] lint: $LINT_CMD"
bash -lc "$LINT_CMD"
fi
if [ -n "$TEST_CMD" ]; then
echo "[LOCAL] test: $TEST_CMD"
bash -lc "$TEST_CMD"
fi
echo "[LOCAL] OK"
SH
chmod +x scripts/verify.local.sh
python3 - <<PY
import json
from pathlib import Path
from datetime import date
b=Path('backlog/features.json')
data=json.loads(b.read_text(encoding='utf-8'))
data['project']='$PROJECT_NAME'
data['description']='$PROJECT_DESC'
features=data.get('features',[])
if '$ADD_BOOTSTRAP'.lower().startswith('y') and not features:
features.append({
'id':'F-001',
'title':'Bootstrap ARNES on project',
'description':'Setup ARNES pipeline and run first complete feature cycle.',
'acceptance':['verify.sh is green','runtime status works','first feature closes with gates'],
'status':'pending',
'created_at':str(date.today()),
'gates':{'review':False,'security':False,'qa':False}
})
data['features']=features
b.write_text(json.dumps(data,indent=2,ensure_ascii=False)+'\n',encoding='utf-8')
PY
cat > work/current.md <<EOF
# Current session
- Feature in progress: _none_
- Orchestrator: _leader_
## Plan
- Pick one pending feature.
- Run ./scripts/verify.sh
- Set runtime status.
## Next step
- Use python3 scripts/new_ticket.py to create first real ticket.
EOF
python3 scripts/agent_status.py reset >/dev/null || true
echo ""
echo "Done. Project configured."
echo "- Config: harness/project.config.json"
echo "- Local checks: scripts/verify.local.sh"
echo "- Ticket tool: python3 scripts/new_ticket.py"
echo "- Verify: ./scripts/verify.sh"
echo "- Runtime: python3 scripts/agent_status.py show"

93
scripts/test_api.py Normal file
View File

@@ -0,0 +1,93 @@
"""Test script for the API."""
import sys
import time
import subprocess
import requests
from threading import Thread
SERVER_URL = "http://127.0.0.1:8000"
def start_server():
"""Start the uvicorn server."""
subprocess.run([
"python3", "-m", "uvicorn",
"src.main:app",
"--host", "127.0.0.1",
"--port", "8000"
])
def wait_for_server(timeout=10):
"""Wait for server to be ready."""
start = time.time()
while time.time() - start < timeout:
try:
response = requests.get(f"{SERVER_URL}/health", timeout=1)
if response.status_code == 200:
return True
except:
pass
time.sleep(0.5)
return False
def test_health():
"""Test health endpoint."""
response = requests.get(f"{SERVER_URL}/health")
assert response.status_code == 200
assert response.json()["status"] == "healthy"
print("✅ Health check passed")
def test_login():
"""Test login endpoint."""
response = requests.post(
f"{SERVER_URL}/api/v1/auth/login",
json={"email": "alice@example.com", "password": "SecurePass123!"}
)
assert response.status_code == 200
data = response.json()
assert data["success"] == True
assert "access_token" in data["data"]
print("✅ Login endpoint passed")
return data["data"]["access_token"]
def test_login_invalid():
"""Test login with invalid credentials."""
response = requests.post(
f"{SERVER_URL}/api/v1/auth/login",
json={"email": "alice@example.com", "password": "WrongPassword!"}
)
assert response.status_code == 401
print("✅ Invalid login returns 401")
def test_profile():
"""Test profile endpoint."""
response = requests.get(f"{SERVER_URL}/api/v1/profile/me")
assert response.status_code == 200
print("✅ Profile endpoint passed")
def run_tests():
"""Run all tests."""
print("🔧 Starting server...")
server_thread = Thread(target=start_server, daemon=True)
server_thread.start()
print("⏳ Waiting for server...")
if not wait_for_server():
print("❌ Server failed to start")
return False
print("✅ Server is ready!\n")
try:
test_health()
test_login()
test_login_invalid()
test_profile()
print("\n🎉 All tests passed!")
return True
except Exception as e:
print(f"\n❌ Test failed: {e}")
return False
if __name__ == "__main__":
success = run_tests()
sys.exit(0 if success else 1)

View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
# Ejemplo de overlay local por proyecto.
# Copiar a scripts/verify.local.sh y adaptar.
set -euo pipefail
echo "[LOCAL] checks específicos del proyecto"
# Ejemplos:
# alembic check
# pytest -m smoke -q
# npm run lint
echo "[LOCAL] OK"

View File

@@ -12,6 +12,8 @@ fail() { printf "${RED}[FAIL]${NC} %s\n" "$1"; }
EXIT_CODE=0
cd "$(dirname "$0")/.." || exit 1
echo "── 1) Verificando estructura base ─────────────────────"
required=(
"AGENTS.md"
@@ -21,6 +23,9 @@ required=(
"harness/policies/governance.md"
"harness/policies/security.md"
"harness/policies/quality.md"
"harness/policies/language.md"
"harness/policies/model-routing.md"
"harness/models.profiles.yml"
"harness/contracts/handoff.md"
"harness/contracts/evidence.schema.json"
"spec/product.md"
@@ -29,6 +34,11 @@ required=(
"backlog/features.json"
"work/current.md"
"work/history.md"
"work/runtime-status.json"
"scripts/agent_status.py"
"scripts/new_ticket.py"
"scripts/start.sh"
"platforms/pi/README.md"
)
for f in "${required[@]}"; do
@@ -62,6 +72,11 @@ if not isinstance(features, list):
print('[FAIL] features debe ser una lista')
sys.exit(1)
ids = [str(f.get('id', '')).strip() for f in features]
if len(ids) != len(set(ids)):
print('[FAIL] Hay IDs de feature duplicados en backlog/features.json')
sys.exit(1)
in_progress = [f for f in features if f.get('status') == 'in_progress']
if len(in_progress) > 1:
print(f"[FAIL] Hay {len(in_progress)} features in_progress (máximo 1)")
@@ -76,7 +91,7 @@ for f in features:
if status == 'done':
d = root / 'work' / 'artifacts' / fid
req = ['reviewer.json', 'security.json', 'qa.json', 'leader-close.json']
req = ['reviewer.json', 'security.json', 'qa.json', 'leader-close.json', 'documenter.md']
missing = [name for name in req if not (d / name).is_file()]
if missing:
print(f"[FAIL] Feature {fid} done sin artefactos: {', '.join(missing)}")
@@ -106,8 +121,32 @@ print(f"[OK] backlog válido ({len(features)} features)")
PY
if [ $? -ne 0 ]; then EXIT_CODE=1; fi
python3 - <<'PY'
import json
import pathlib
import sys
path = pathlib.Path('work/runtime-status.json')
required = ['feature_id', 'stage', 'agent', 'action', 'state', 'next_agent', 'waiting_for', 'updated_at', 'timeline']
try:
data = json.loads(path.read_text(encoding='utf-8'))
except Exception as e:
print(f"[FAIL] work/runtime-status.json inválido: {e}")
sys.exit(1)
missing = [key for key in required if key not in data]
if missing:
print(f"[FAIL] work/runtime-status.json incompleto: {', '.join(missing)}")
sys.exit(1)
if not isinstance(data.get('timeline'), list):
print('[FAIL] work/runtime-status.json timeline debe ser una lista')
sys.exit(1)
print('[OK] runtime-status válido')
PY
if [ $? -ne 0 ]; then EXIT_CODE=1; fi
echo ""
echo "── 3) Verificación de tests/build (opcional auto-detect) ─"
echo "── 3) Verificación de tests/build (auto-detect) ───────"
if [ -f "Makefile" ] && grep -qE '^test:' Makefile; then
if make test; then ok "make test OK"; else fail "make test falló"; EXIT_CODE=1; fi
elif [ -f "package.json" ]; then
@@ -127,9 +166,24 @@ else
fi
echo ""
echo "── 4) Resumen ─────────────────────────────────────────"
echo "── 4) Overlay local opcional ─────────────────────────"
if [ -x "scripts/verify.local.sh" ]; then
if ./scripts/verify.local.sh; then
ok "verify.local.sh OK"
else
fail "verify.local.sh falló"
EXIT_CODE=1
fi
elif [ -f "scripts/verify.local.sh" ]; then
warn "scripts/verify.local.sh existe pero no es ejecutable"
else
warn "Sin overlay local (scripts/verify.local.sh)"
fi
echo ""
echo "── 5) Resumen ─────────────────────────────────────────"
if [ $EXIT_CODE -eq 0 ]; then
ok "Harness verificado. Puedes trabajar."
ok "Harness verificado. Template listo para adaptar a cualquier proyecto."
else
fail "Harness NO verificado. Corrige antes de continuar."
fi